@@ -226,6 +226,38 @@ class WP_SQLite_Driver {
226226 '%y ' => '%y ' ,
227227 );
228228
229+ /**
230+ * A stack of maps for resolving ORDER BY unqualified names to select-item expressions.
231+ *
232+ * Motivation:
233+ * - MySQL allows ORDER BY to reference output columns by their column names (or aliases)
234+ * without qualification (e.g., ORDER BY name), and it resolves the reference against
235+ * the SELECT list when unambiguous. SQLite requires the term to be either a positional
236+ * index or a resolvable expression (e.g., t1.name) and will error on ambiguity. To provide
237+ * MySQL-compatible behavior, we keep track of SELECT output names and their corresponding
238+ * translated expressions so ORDER BY terms can be rewritten to safe expressions.
239+ *
240+ * Lifecycle:
241+ * - We push an empty frame at the start of translating a querySpecification (single SELECT).
242+ * - As each selectItem is translated, we record an output name → translated expression mapping:
243+ * - Explicit alias: SELECT expr AS alias → alias → expr
244+ * - Plain column ref: SELECT t1.name → name → t1.name
245+ * - Computed expression without alias: SELECT CONCAT('a','b') → 'CONCAT(…)'
246+ * - If the same output name appears more than once, the entry is marked ambiguous (null).
247+ * - ORDER BY translation consults the top stack frame to resolve simple unqualified terms.
248+ * - We pop any frames pushed during the current queryExpression after its children are translated.
249+ *
250+ * Semantics:
251+ * - Only simple unqualified ORDER BY tokens (name or `name`) are considered for rewrite.
252+ * - If the name uniquely matches an output column in the current SELECT list, we rewrite the
253+ * term to the recorded translated expression (e.g., `name` → `t1`.`name` or alias token).
254+ * - If the name is ambiguous or not found, we leave the ORDER BY term unchanged so SQLite can
255+ * raise a meaningful error consistent with MySQL’s ambiguity rules.
256+ *
257+ * @var array<int, array<string, string|null>> Stack of frames: name(lowercase) → expression|null
258+ */
259+ private $ select_output_name_to_ordinal_stack = array ();
260+
229261 /**
230262 * A map of MySQL data types to implicit default values for non-strict mode.
231263 *
@@ -2513,6 +2545,11 @@ private function translate( $node ): ?string {
25132545 $ rule_name = $ node ->rule_name ;
25142546 switch ( $ rule_name ) {
25152547 case 'querySpecification ' :
2548+ // Start a new SELECT-output frame so ORDER BY can resolve names against its related
2549+ // SELECT column list (and not, e.g., an unrelated subquery).
2550+ // We defer popping until the enclosing queryExpression finishes to ensure ORDER BY sees it.
2551+ $ this ->select_output_name_to_ordinal_stack [] = array ();
2552+
25162553 // Translate "HAVING ..." without "GROUP BY ..." to "GROUP BY 1 HAVING ...".
25172554 if ( $ node ->has_child_node ( 'havingClause ' ) && ! $ node ->has_child_node ( 'groupByClause ' ) ) {
25182555 $ parts = array ();
@@ -2528,6 +2565,17 @@ private function translate( $node ): ?string {
25282565 return implode ( ' ' , $ parts );
25292566 }
25302567 return $ this ->translate_sequence ( $ node ->get_children () );
2568+ case 'queryExpression ' :
2569+ // Pop the recorded SELECT column names to avoid using them when translating a higher-level
2570+ // ORDER BY clause.
2571+ $ depth_before = count ( $ this ->select_output_name_to_ordinal_stack );
2572+ $ result = $ this ->translate_sequence ( $ node ->get_children () );
2573+ while ( count ( $ this ->select_output_name_to_ordinal_stack ) > $ depth_before ) {
2574+ array_pop ( $ this ->select_output_name_to_ordinal_stack );
2575+ }
2576+ return $ result ;
2577+ case 'orderClause ' :
2578+ return $ this ->translate_order_clause_with_select_alias_resolution ( $ node );
25312579 case 'qualifiedIdentifier ' :
25322580 case 'tableRefWithWildcard ' :
25332581 $ parts = $ node ->get_descendant_nodes ( 'identifier ' );
@@ -2926,6 +2974,159 @@ private function translate_simple_expr( WP_Parser_Node $node ): string {
29262974 return $ this ->translate_sequence ( $ node ->get_children () );
29272975 }
29282976
2977+ /**
2978+ * Record a SELECT columns list to resolve ambiguous ORDER BY terms.
2979+ *
2980+ * @see https://github.com/WordPress/sqlite-database-integration/issues/228
2981+ * @param string $output_name Output column name (alias, inferred name).
2982+ * @param string|null $full_column_expression Translated expression that produces the column value
2983+ * If null, defaults to quoting the name as an identifier.
2984+ * @return void
2985+ */
2986+ private function record_select_column_name_for_ambiguous_column_resolution ( string $ output_name , ?string $ full_column_expression = null ): void {
2987+ if ( empty ( $ this ->select_output_name_to_ordinal_stack ) ) {
2988+ return ;
2989+ }
2990+ $ normalized = strtolower ( $ output_name );
2991+ $ frame_index = count ( $ this ->select_output_name_to_ordinal_stack ) - 1 ;
2992+ $ frame = $ this ->select_output_name_to_ordinal_stack [ $ frame_index ];
2993+ if ( array_key_exists ( $ normalized , $ frame ) ) {
2994+ $ frame [ $ normalized ] = null ; // ambiguous
2995+ } else {
2996+ // Store the translated expression used in the SELECT list.
2997+ $ frame [ $ normalized ] = $ full_column_expression ?? ('` ' . $ output_name . '` ' );
2998+ }
2999+ $ this ->select_output_name_to_ordinal_stack [ $ frame_index ] = $ frame ;
3000+ }
3001+
3002+ /**
3003+ * Translate ORDER BY and resolve ambiguous unqualified names against the current SELECT list.
3004+ *
3005+ * ## Problem
3006+ *
3007+ * MySQL allows ORDER BY to reference output columns by their column names (or aliases)
3008+ * without qualification (e.g., ORDER BY name), and it resolves the reference against
3009+ * the SELECT list when unambiguous. SQLite requires the term to be either a positional
3010+ * index or a resolvable expression (e.g., t1.name) and will error on ambiguity.
3011+ *
3012+ * For example, given the following tables and data:
3013+ *
3014+ * ```sql
3015+ * CREATE TABLE t1 (id INT, name TEXT);
3016+ * CREATE TABLE t2 (t1_id INT, name TEXT);
3017+ *
3018+ * INSERT INTO t1 (id, name) VALUES (1, "T1 A");
3019+ * INSERT INTO t1 (id, name) VALUES (2, "T1 B");
3020+ * INSERT INTO t2 (t1_id, name) VALUES (1, "T2 B");
3021+ * INSERT INTO t2 (t1_id, name) VALUES (2, "T2 A");
3022+ * ```
3023+ *
3024+ * The following queries **error in SQLite but not in MySQL**:
3025+ *
3026+ * ```sql
3027+ * SELECT t1.name -- name column used for ORDER BY in MySQL
3028+ * FROM t1
3029+ * JOIN t2 ON t2.t1_id = t1.id
3030+ * ORDER BY name;
3031+ * -- [MySQL] T1 A, T1 B
3032+ * -- [SQLite] Query Error: ambiguous column name: name
3033+ *
3034+ * SELECT t2.name -- name column used for ORDER BY in MySQL
3035+ * FROM t1
3036+ * JOIN t2 ON t2.t1_id = t1.id
3037+ * ORDER BY name;
3038+ * -- [MySQL] T2 A, T2 B
3039+ * -- [SQLite] Query Error: ambiguous column name: name
3040+ * ```
3041+ *
3042+ * ## Solution
3043+ *
3044+ * When we see a non-fully qualified column in an `ORDER BY` clause and the same column
3045+ * is fully qualified and non-ambiguous in the `SELECT` list, we can use the same full
3046+ * qualifier in the `ORDER BY` clause.
3047+ *
3048+ * For example, the above queries are rewritten as:
3049+ *
3050+ * ```sql
3051+ * SELECT t1.name
3052+ * FROM t1
3053+ * JOIN t2 ON t2.t1_id = t1.id
3054+ * ORDER BY t1.name;
3055+ *
3056+ * SELECT t2.name
3057+ * FROM t1
3058+ * JOIN t2 ON t2.t1_id = t1.id
3059+ * ORDER BY t2.name;
3060+ * -- [MySQL] T2 A, T2 B
3061+ * -- [SQLite] T2 A, T2 B
3062+ * ```
3063+ *
3064+ * ## Limitations
3065+ *
3066+ * This solution is limited to simple unqualified ORDER BY terms. Complex computed
3067+ * expressions, wildcards, and other non-simple terms are not supported.
3068+ *
3069+ * @param WP_Parser_Node $order_clause The orderClause node.
3070+ * @return string The translated ORDER BY clause.
3071+ */
3072+ private function translate_order_clause_with_select_alias_resolution ( WP_Parser_Node $ order_clause ): string {
3073+ $ order_list = $ order_clause ->get_first_child_node ( 'orderList ' );
3074+ if ( null === $ order_list ) {
3075+ return $ this ->translate_sequence ( $ order_clause ->get_children () );
3076+ }
3077+ $ parts = array ( 'ORDER BY ' );
3078+ $ order_items = array ();
3079+ foreach ( $ order_list ->get_child_nodes ( 'orderExpression ' ) as $ order_expr ) {
3080+ $ expr_nodes = $ order_expr ->get_children ();
3081+ $ expr = $ this ->translate ( $ expr_nodes [0 ] );
3082+ $ direction = null ;
3083+ if ( isset ( $ expr_nodes [1 ] ) ) {
3084+ $ direction = $ this ->translate ( $ expr_nodes [1 ] );
3085+ }
3086+
3087+ $ resolved = $ this ->maybe_resolve_unqualified_order_term_to_select_expression ( $ expr );
3088+ if ( null !== $ resolved ) {
3089+ $ expr = (string ) $ resolved ;
3090+ }
3091+ $ order_items [] = trim ( $ expr . ( $ direction ? ( ' ' . $ direction ) : '' ) );
3092+ }
3093+ $ parts [] = implode ( ', ' , $ order_items );
3094+ return implode ( ' ' , $ parts );
3095+ }
3096+
3097+ /**
3098+ * Try to resolve an ORDER BY term like `name` to a select-item ordinal when uniquely present.
3099+ *
3100+ * @param string $translated_expr The already-translated expression string for the term.
3101+ * @return int|null Ordinal (1-based) if resolved; null otherwise.
3102+ */
3103+ private function maybe_resolve_unqualified_order_term_to_select_expression ( string $ translated_expr ): ?string {
3104+ if ( empty ( $ this ->select_output_name_to_ordinal_stack ) ) {
3105+ return null ;
3106+ }
3107+ $ frame = $ this ->select_output_name_to_ordinal_stack [ count ( $ this ->select_output_name_to_ordinal_stack ) - 1 ];
3108+ // Only consider simple unqualified identifiers: `name` or name
3109+ $ trimmed = trim ( $ translated_expr );
3110+ // Remove backticks if present for lookup purposes.
3111+ if ( strlen ( $ trimmed ) >= 2 && $ trimmed [0 ] === '` ' && substr ( $ trimmed , -1 ) === '` ' ) {
3112+ $ key = strtolower ( substr ( $ trimmed , 1 , -1 ) );
3113+ } else {
3114+ $ key = strtolower ( $ trimmed );
3115+ }
3116+ // If expression contains a dot, function call, parentheses, or spaces, do not resolve.
3117+ if ( strpbrk ( $ key , ".() " ) !== false ) {
3118+ return null ;
3119+ }
3120+ if ( ! array_key_exists ( $ key , $ frame ) ) {
3121+ return null ;
3122+ }
3123+ $ select_expr = $ frame [ $ key ];
3124+ if ( null === $ select_expr ) {
3125+ return null ; // ambiguous
3126+ }
3127+ return (string ) $ select_expr ;
3128+ }
3129+
29293130 /**
29303131 * Translate a MySQL LIKE expression to SQLite.
29313132 *
@@ -3219,6 +3420,11 @@ public function translate_select_item( WP_Parser_Node $node ): string {
32193420 // When an explicit alias is provided, we can use it as is.
32203421 $ alias = $ node ->get_first_child_node ( 'selectAlias ' );
32213422 if ( $ alias ) {
3423+ // Record explicit alias in the current select output map.
3424+ $ this ->record_select_column_name_for_ambiguous_column_resolution (
3425+ $ this ->unquote_sqlite_identifier ( $ this ->translate ( $ alias ->get_first_child () ) ),
3426+ $ item
3427+ );
32223428 return $ item ;
32233429 }
32243430
@@ -3233,7 +3439,17 @@ public function translate_select_item( WP_Parser_Node $node ): string {
32333439 * In this case, SQLite uses the same logic as MySQL, so using the value
32343440 * as is without adding an explicit alias will produce the correct result.
32353441 */
3236- $ column_ref = $ node ->get_first_descendant_node ( 'columnRef ' );
3442+ $ column_ref = $ node ->get_first_descendant_node ( 'columnRef ' );
3443+ if ( $ column_ref ) {
3444+ // Record inferred output name from column reference (the final column name part).
3445+ $ identifiers = $ column_ref ->get_descendant_nodes ( 'identifier ' );
3446+ if ( ! empty ( $ identifiers ) ) {
3447+ // For qualified references like t1.name, the last identifier is the column name.
3448+ $ last_identifier = end ( $ identifiers );
3449+ $ column_name = $ this ->unquote_sqlite_identifier ( $ this ->translate ( $ last_identifier ) );
3450+ $ this ->record_select_column_name_for_ambiguous_column_resolution ( $ column_name , $ item );
3451+ }
3452+ }
32373453 $ is_column_ref = $ column_ref && $ item === $ this ->translate ( $ column_ref );
32383454 if ( $ is_column_ref ) {
32393455 return $ item ;
@@ -3256,6 +3472,7 @@ public function translate_select_item( WP_Parser_Node $node ): string {
32563472 // let's avoid unnecessary aliases ("SELECT `id` AS `id` FROM t").
32573473 return $ item ;
32583474 }
3475+ $ this ->record_select_column_name_for_ambiguous_column_resolution ( $ raw_alias );
32593476 return sprintf ( '%s AS %s ' , $ item , $ alias );
32603477 }
32613478
0 commit comments