Skip to content

Commit c0772d3

Browse files
committed
Scope implicit column collation to direct refs (fixes NOCASE bleed)
Turso's get_collseq_parts_from_expr walked the whole expression tree and picked up column collation from nested Column refs. Per SQLite semantics, implicit column collation inherits only from direct column refs (optionally through a COLLATE operator); compound expressions like CONCAT(col, 'str') should be BINARY. Fixes testComplexInformationSchemaQueries, where `CONCAT(COLUMN_NAME, ' (column)')` inside a UNION was inheriting COLUMN_NAME's NOCASE collation and sorting case-insensitively instead of BINARY (which the test asserts on). Explicit COLLATE at any depth is still honoured — that part of the walk remains intact.
1 parent 0a8ae2c commit c0772d3

1 file changed

Lines changed: 127 additions & 0 deletions

File tree

.github/workflows/phpunit-tests-turso.yml

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,133 @@ jobs:
433433
print('patched delete.rs to allow DELETE FROM sqlite_sequence')
434434
PY_DELETE_SEQ
435435
436+
# Collation: Turso's get_collseq_parts_from_expr walks the entire
437+
# expression tree and picks up column collation from nested Column
438+
# refs. Per SQLite rules, implicit column collation only inherits
439+
# from *direct* column refs (possibly through COLLATE). For
440+
# compound expressions like CONCAT(col, 'str') the result should
441+
# be BINARY, not col's collation. Fix ORDER BY on UNION of
442+
# computed expressions (testComplexInformationSchemaQueries).
443+
python3 - <<'PY_COLLATE'
444+
p = 'core/translate/collate.rs'
445+
s = open(p).read()
446+
# Replace the walk-based column-collation lookup with a top-level-only
447+
# unwrap that only peels COLLATE operators. Keep the explicit-COLLATE
448+
# search via walk_expr intact (that's SQLite-correct).
449+
old = (
450+
"fn get_collseq_parts_from_expr(\n"
451+
" top_expr: &Expr,\n"
452+
" referenced_tables: &TableReferences,\n"
453+
") -> Result<(Option<CollationSeq>, Option<CollationSeq>)> {\n"
454+
" let mut maybe_column_collseq = None;\n"
455+
" let mut maybe_explicit_collseq = None;\n"
456+
"\n"
457+
" walk_expr(top_expr, &mut |expr: &Expr| -> Result<WalkControl> {\n"
458+
" match expr {\n"
459+
" Expr::Collate(_, seq) => {\n"
460+
" // Only store the first (leftmost) COLLATE operator we find\n"
461+
" if maybe_explicit_collseq.is_none() {\n"
462+
" maybe_explicit_collseq =\n"
463+
" Some(CollationSeq::new(seq.as_str()).unwrap_or_default());\n"
464+
" }\n"
465+
" // Skip children since we've found a COLLATE operator\n"
466+
" return Ok(WalkControl::SkipChildren);\n"
467+
)
468+
new = (
469+
"fn get_collseq_parts_from_expr(\n"
470+
" top_expr: &Expr,\n"
471+
" referenced_tables: &TableReferences,\n"
472+
") -> Result<(Option<CollationSeq>, Option<CollationSeq>)> {\n"
473+
" let mut maybe_column_collseq: Option<CollationSeq> = None;\n"
474+
" let mut maybe_explicit_collseq: Option<CollationSeq> = None;\n"
475+
"\n"
476+
" // Implicit column collation: only direct refs (possibly through\n"
477+
" // COLLATE) — matches SQLite. Walking into compound expressions\n"
478+
" // (CONCAT, arithmetic, fn calls) picks up unrelated column\n"
479+
" // collations which bleed into ORDER BY.\n"
480+
" {\n"
481+
" let mut cur = top_expr;\n"
482+
" loop {\n"
483+
" match cur {\n"
484+
" Expr::Collate(inner, _) => { cur = inner; }\n"
485+
" _ => break,\n"
486+
" }\n"
487+
" }\n"
488+
" match cur {\n"
489+
" Expr::Column { table, column, .. } => {\n"
490+
" if let Some((_, tref)) = referenced_tables.find_table_by_internal_id(*table) {\n"
491+
" if let Some(col) = tref.get_column_at(*column) {\n"
492+
" maybe_column_collseq = col.collation_opt();\n"
493+
" }\n"
494+
" }\n"
495+
" }\n"
496+
" Expr::RowId { table, .. } => {\n"
497+
" if let Some((_, tref)) = referenced_tables.find_table_by_internal_id(*table) {\n"
498+
" if let Some(btree) = tref.btree() {\n"
499+
" if let Some((_, rc)) = btree.get_rowid_alias_column() {\n"
500+
" maybe_column_collseq = rc.collation_opt();\n"
501+
" }\n"
502+
" }\n"
503+
" }\n"
504+
" }\n"
505+
" _ => {}\n"
506+
" }\n"
507+
" }\n"
508+
"\n"
509+
" // Explicit COLLATE at any nesting is still honoured per SQLite.\n"
510+
" walk_expr(top_expr, &mut |expr: &Expr| -> Result<WalkControl> {\n"
511+
" match expr {\n"
512+
" Expr::Collate(_, seq) => {\n"
513+
" if maybe_explicit_collseq.is_none() {\n"
514+
" maybe_explicit_collseq =\n"
515+
" Some(CollationSeq::new(seq.as_str()).unwrap_or_default());\n"
516+
" }\n"
517+
" return Ok(WalkControl::SkipChildren);\n"
518+
)
519+
assert old in s, 'get_collseq_parts_from_expr start block not found'
520+
s = s.replace(old, new, 1)
521+
522+
# Now delete the old Column/RowId walk blocks that used to set
523+
# maybe_column_collseq (since we've moved that to top-level above).
524+
import re
525+
col_block_pat = re.compile(
526+
r" Expr::Column \{ table, column, \.\. \} => \{\n"
527+
r" let \(_, table_ref\) = referenced_tables\n"
528+
r" \.find_table_by_internal_id\(\*table\)\n"
529+
r" \.ok_or_else\(\|\| crate::LimboError::ParseError\(\"table not found\"\.to_string\(\)\)\)\?;\n"
530+
r" let column = table_ref\n"
531+
r" \.get_column_at\(\*column\)\n"
532+
r" \.ok_or_else\(\|\| crate::LimboError::ParseError\(\"column not found\"\.to_string\(\)\)\)\?;\n"
533+
r" if maybe_column_collseq\.is_none\(\) \{\n"
534+
r" maybe_column_collseq = column\.collation_opt\(\);\n"
535+
r" \}\n"
536+
r" return Ok\(WalkControl::Continue\);\n"
537+
r" \}\n"
538+
r" Expr::RowId \{ table, \.\. \} => \{\n"
539+
r" let \(_, table_ref\) = referenced_tables\n"
540+
r" \.find_table_by_internal_id\(\*table\)\n"
541+
r" \.ok_or_else\(\|\| crate::LimboError::ParseError\(\"table not found\"\.to_string\(\)\)\)\?;\n"
542+
r" if let Some\(btree\) = table_ref\.btree\(\) \{\n"
543+
r" if let Some\(\(_, rowid_alias_col\)\) = btree\.get_rowid_alias_column\(\) \{\n"
544+
r" if maybe_column_collseq\.is_none\(\) \{\n"
545+
r" maybe_column_collseq = rowid_alias_col\.collation_opt\(\);\n"
546+
r" \}\n"
547+
r" \}\n"
548+
r" \}\n"
549+
r" return Ok\(WalkControl::Continue\);\n"
550+
r" \}\n"
551+
)
552+
# Apply only inside the get_collseq_parts_from_expr function, which ends before the next `fn ` declaration.
553+
func_start = s.find('fn get_collseq_parts_from_expr')
554+
assert func_start >= 0
555+
func_end = s.find('\n}\n', func_start) + 3
556+
new_body = col_block_pat.sub('', s[func_start:func_end], count=1)
557+
assert new_body != s[func_start:func_end], 'old Column/RowId walk blocks not found'
558+
s = s[:func_start] + new_body + s[func_end:]
559+
open(p, 'w').write(s)
560+
print('patched collate.rs to scope column-collation to direct refs')
561+
PY_COLLATE
562+
436563
# CREATE TRIGGER: Turso reconstructs the stored sqlite_master.sql by
437564
# serializing the AST (trigger::create_trigger_to_sql), which loses
438565
# user-provided whitespace/formatting. Real SQLite preserves the

0 commit comments

Comments
 (0)