@@ -433,6 +433,133 @@ jobs:
433433 print('patched delete.rs to allow DELETE FROM sqlite_sequence')
434434 PY_DELETE_SEQ
435435
436+ # Collation: Turso's get_collseq_parts_from_expr walks the entire
437+ # expression tree and picks up column collation from nested Column
438+ # refs. Per SQLite rules, implicit column collation only inherits
439+ # from *direct* column refs (possibly through COLLATE). For
440+ # compound expressions like CONCAT(col, 'str') the result should
441+ # be BINARY, not col's collation. Fix ORDER BY on UNION of
442+ # computed expressions (testComplexInformationSchemaQueries).
443+ python3 - <<'PY_COLLATE'
444+ p = 'core/translate/collate.rs'
445+ s = open(p).read()
446+ # Replace the walk-based column-collation lookup with a top-level-only
447+ # unwrap that only peels COLLATE operators. Keep the explicit-COLLATE
448+ # search via walk_expr intact (that's SQLite-correct).
449+ old = (
450+ "fn get_collseq_parts_from_expr(\n"
451+ " top_expr: &Expr,\n"
452+ " referenced_tables: &TableReferences,\n"
453+ ") -> Result<(Option<CollationSeq>, Option<CollationSeq>)> {\n"
454+ " let mut maybe_column_collseq = None;\n"
455+ " let mut maybe_explicit_collseq = None;\n"
456+ "\n"
457+ " walk_expr(top_expr, &mut |expr: &Expr| -> Result<WalkControl> {\n"
458+ " match expr {\n"
459+ " Expr::Collate(_, seq) => {\n"
460+ " // Only store the first (leftmost) COLLATE operator we find\n"
461+ " if maybe_explicit_collseq.is_none() {\n"
462+ " maybe_explicit_collseq =\n"
463+ " Some(CollationSeq::new(seq.as_str()).unwrap_or_default());\n"
464+ " }\n"
465+ " // Skip children since we've found a COLLATE operator\n"
466+ " return Ok(WalkControl::SkipChildren);\n"
467+ )
468+ new = (
469+ "fn get_collseq_parts_from_expr(\n"
470+ " top_expr: &Expr,\n"
471+ " referenced_tables: &TableReferences,\n"
472+ ") -> Result<(Option<CollationSeq>, Option<CollationSeq>)> {\n"
473+ " let mut maybe_column_collseq: Option<CollationSeq> = None;\n"
474+ " let mut maybe_explicit_collseq: Option<CollationSeq> = None;\n"
475+ "\n"
476+ " // Implicit column collation: only direct refs (possibly through\n"
477+ " // COLLATE) — matches SQLite. Walking into compound expressions\n"
478+ " // (CONCAT, arithmetic, fn calls) picks up unrelated column\n"
479+ " // collations which bleed into ORDER BY.\n"
480+ " {\n"
481+ " let mut cur = top_expr;\n"
482+ " loop {\n"
483+ " match cur {\n"
484+ " Expr::Collate(inner, _) => { cur = inner; }\n"
485+ " _ => break,\n"
486+ " }\n"
487+ " }\n"
488+ " match cur {\n"
489+ " Expr::Column { table, column, .. } => {\n"
490+ " if let Some((_, tref)) = referenced_tables.find_table_by_internal_id(*table) {\n"
491+ " if let Some(col) = tref.get_column_at(*column) {\n"
492+ " maybe_column_collseq = col.collation_opt();\n"
493+ " }\n"
494+ " }\n"
495+ " }\n"
496+ " Expr::RowId { table, .. } => {\n"
497+ " if let Some((_, tref)) = referenced_tables.find_table_by_internal_id(*table) {\n"
498+ " if let Some(btree) = tref.btree() {\n"
499+ " if let Some((_, rc)) = btree.get_rowid_alias_column() {\n"
500+ " maybe_column_collseq = rc.collation_opt();\n"
501+ " }\n"
502+ " }\n"
503+ " }\n"
504+ " }\n"
505+ " _ => {}\n"
506+ " }\n"
507+ " }\n"
508+ "\n"
509+ " // Explicit COLLATE at any nesting is still honoured per SQLite.\n"
510+ " walk_expr(top_expr, &mut |expr: &Expr| -> Result<WalkControl> {\n"
511+ " match expr {\n"
512+ " Expr::Collate(_, seq) => {\n"
513+ " if maybe_explicit_collseq.is_none() {\n"
514+ " maybe_explicit_collseq =\n"
515+ " Some(CollationSeq::new(seq.as_str()).unwrap_or_default());\n"
516+ " }\n"
517+ " return Ok(WalkControl::SkipChildren);\n"
518+ )
519+ assert old in s, 'get_collseq_parts_from_expr start block not found'
520+ s = s.replace(old, new, 1)
521+
522+ # Now delete the old Column/RowId walk blocks that used to set
523+ # maybe_column_collseq (since we've moved that to top-level above).
524+ import re
525+ col_block_pat = re.compile(
526+ r" Expr::Column \{ table, column, \.\. \} => \{\n"
527+ r" let \(_, table_ref\) = referenced_tables\n"
528+ r" \.find_table_by_internal_id\(\*table\)\n"
529+ r" \.ok_or_else\(\|\| crate::LimboError::ParseError\(\"table not found\"\.to_string\(\)\)\)\?;\n"
530+ r" let column = table_ref\n"
531+ r" \.get_column_at\(\*column\)\n"
532+ r" \.ok_or_else\(\|\| crate::LimboError::ParseError\(\"column not found\"\.to_string\(\)\)\)\?;\n"
533+ r" if maybe_column_collseq\.is_none\(\) \{\n"
534+ r" maybe_column_collseq = column\.collation_opt\(\);\n"
535+ r" \}\n"
536+ r" return Ok\(WalkControl::Continue\);\n"
537+ r" \}\n"
538+ r" Expr::RowId \{ table, \.\. \} => \{\n"
539+ r" let \(_, table_ref\) = referenced_tables\n"
540+ r" \.find_table_by_internal_id\(\*table\)\n"
541+ r" \.ok_or_else\(\|\| crate::LimboError::ParseError\(\"table not found\"\.to_string\(\)\)\)\?;\n"
542+ r" if let Some\(btree\) = table_ref\.btree\(\) \{\n"
543+ r" if let Some\(\(_, rowid_alias_col\)\) = btree\.get_rowid_alias_column\(\) \{\n"
544+ r" if maybe_column_collseq\.is_none\(\) \{\n"
545+ r" maybe_column_collseq = rowid_alias_col\.collation_opt\(\);\n"
546+ r" \}\n"
547+ r" \}\n"
548+ r" \}\n"
549+ r" return Ok\(WalkControl::Continue\);\n"
550+ r" \}\n"
551+ )
552+ # Apply only inside the get_collseq_parts_from_expr function, which ends before the next `fn ` declaration.
553+ func_start = s.find('fn get_collseq_parts_from_expr')
554+ assert func_start >= 0
555+ func_end = s.find('\n}\n', func_start) + 3
556+ new_body = col_block_pat.sub('', s[func_start:func_end], count=1)
557+ assert new_body != s[func_start:func_end], 'old Column/RowId walk blocks not found'
558+ s = s[:func_start] + new_body + s[func_end:]
559+ open(p, 'w').write(s)
560+ print('patched collate.rs to scope column-collation to direct refs')
561+ PY_COLLATE
562+
436563 # CREATE TRIGGER: Turso reconstructs the stored sqlite_master.sql by
437564 # serializing the AST (trigger::create_trigger_to_sql), which loses
438565 # user-provided whitespace/formatting. Real SQLite preserves the
0 commit comments