Skip to content

Commit b9cf8b9

Browse files
committed
Resolve ambiguous column names in ORDER BY
1 parent f6666fe commit b9cf8b9

2 files changed

Lines changed: 247 additions & 1 deletion

File tree

tests/WP_SQLite_Driver_Translation_Tests.php

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,35 @@ public function testSelect(): void {
9393
);
9494
}
9595

96+
/**
97+
* Test MySQL-compatible ORDER BY name resolution with unqualified columns.
98+
* When a column name appears unqualified in ORDER BY and is unique in the SELECT list,
99+
* it should resolve to the SELECT expression (or alias) instead of causing an ambiguity error.
100+
*
101+
* This demonstrates the core issue: In MySQL this works, while SQLite would normally error on ambiguity.
102+
* We rewrite unqualified ORDER BY terms to the SELECT expression recorded for that output name.
103+
*/
104+
public function testSelectOrderByAmbiguousColumnResolution(): void {
105+
// Test with explicit aliases - should resolve to alias name
106+
$this->assertQuery(
107+
'SELECT `t1`.`name` AS `t1_name` FROM `t1` JOIN `t2` ON `t2`.`t1_id` = `t1`.`id` ORDER BY `t1_name`',
108+
'SELECT t1.name AS t1_name FROM t1 JOIN t2 ON t2.t1_id = t1.id ORDER BY `t1_name`'
109+
);
110+
111+
// This demonstrates the core issue: In MySQL this works, in SQLite it errors
112+
// MySQL resolves the unqualified 'name' in ORDER BY to the unique 'name' in SELECT list
113+
$this->assertQuery(
114+
'SELECT `t1`.`name` FROM `t1` JOIN `t2` ON `t2`.`t1_id` = `t1`.`id` ORDER BY `t1`.`name`',
115+
'SELECT t1.name FROM t1 JOIN t2 ON t2.t1_id = t1.id ORDER BY name'
116+
);
117+
118+
// It should also work with multiple ambiguous columns.
119+
$this->assertQuery(
120+
'SELECT `t1`.`id` , `t1`.`name` FROM `t1` JOIN `t2` ON `t2`.`t1_id` = `t1`.`id` ORDER BY `t1`.`id`, `t1`.`name`',
121+
'SELECT t1.id, t1.name FROM t1 JOIN t2 ON t2.t1_id = t1.id ORDER BY id, name'
122+
);
123+
}
124+
96125
public function testInsert(): void {
97126
$this->assertQuery(
98127
'INSERT INTO `t` ( `c` ) VALUES ( 1 )',

wp-includes/sqlite-ast/class-wp-sqlite-driver.php

Lines changed: 218 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,38 @@ class WP_SQLite_Driver {
226226
'%y' => '%y',
227227
);
228228

229+
/**
230+
* A stack of maps for resolving ORDER BY unqualified names to select-item expressions.
231+
*
232+
* Motivation:
233+
* - MySQL allows ORDER BY to reference output columns by their column names (or aliases)
234+
* without qualification (e.g., ORDER BY name), and it resolves the reference against
235+
* the SELECT list when unambiguous. SQLite requires the term to be either a positional
236+
* index or a resolvable expression (e.g., t1.name) and will error on ambiguity. To provide
237+
* MySQL-compatible behavior, we keep track of SELECT output names and their corresponding
238+
* translated expressions so ORDER BY terms can be rewritten to safe expressions.
239+
*
240+
* Lifecycle:
241+
* - We push an empty frame at the start of translating a querySpecification (single SELECT).
242+
* - As each selectItem is translated, we record an output name → translated expression mapping:
243+
* - Explicit alias: SELECT expr AS alias → alias → expr
244+
* - Plain column ref: SELECT t1.name → name → t1.name
245+
* - Computed expression without alias: SELECT CONCAT('a','b') → 'CONCAT(…)'
246+
* - If the same output name appears more than once, the entry is marked ambiguous (null).
247+
* - ORDER BY translation consults the top stack frame to resolve simple unqualified terms.
248+
* - We pop any frames pushed during the current queryExpression after its children are translated.
249+
*
250+
* Semantics:
251+
* - Only simple unqualified ORDER BY tokens (name or `name`) are considered for rewrite.
252+
* - If the name uniquely matches an output column in the current SELECT list, we rewrite the
253+
* term to the recorded translated expression (e.g., `name` → `t1`.`name` or alias token).
254+
* - If the name is ambiguous or not found, we leave the ORDER BY term unchanged so SQLite can
255+
* raise a meaningful error consistent with MySQL’s ambiguity rules.
256+
*
257+
* @var array<int, array<string, string|null>> Stack of frames: name(lowercase) → expression|null
258+
*/
259+
private $select_output_name_to_ordinal_stack = array();
260+
229261
/**
230262
* A map of MySQL data types to implicit default values for non-strict mode.
231263
*
@@ -2513,6 +2545,11 @@ private function translate( $node ): ?string {
25132545
$rule_name = $node->rule_name;
25142546
switch ( $rule_name ) {
25152547
case 'querySpecification':
2548+
// Start a new SELECT-output frame so ORDER BY can resolve names against its related
2549+
// SELECT column list (and not, e.g., an unrelated subquery).
2550+
// We defer popping until the enclosing queryExpression finishes to ensure ORDER BY sees it.
2551+
$this->select_output_name_to_ordinal_stack[] = array();
2552+
25162553
// Translate "HAVING ..." without "GROUP BY ..." to "GROUP BY 1 HAVING ...".
25172554
if ( $node->has_child_node( 'havingClause' ) && ! $node->has_child_node( 'groupByClause' ) ) {
25182555
$parts = array();
@@ -2528,6 +2565,17 @@ private function translate( $node ): ?string {
25282565
return implode( ' ', $parts );
25292566
}
25302567
return $this->translate_sequence( $node->get_children() );
2568+
case 'queryExpression':
2569+
// Pop the recorded SELECT column names to avoid using them when translating a higher-level
2570+
// ORDER BY clause.
2571+
$depth_before = count( $this->select_output_name_to_ordinal_stack );
2572+
$result = $this->translate_sequence( $node->get_children() );
2573+
while ( count( $this->select_output_name_to_ordinal_stack ) > $depth_before ) {
2574+
array_pop( $this->select_output_name_to_ordinal_stack );
2575+
}
2576+
return $result;
2577+
case 'orderClause':
2578+
return $this->translate_order_clause_with_select_alias_resolution( $node );
25312579
case 'qualifiedIdentifier':
25322580
case 'tableRefWithWildcard':
25332581
$parts = $node->get_descendant_nodes( 'identifier' );
@@ -2926,6 +2974,159 @@ private function translate_simple_expr( WP_Parser_Node $node ): string {
29262974
return $this->translate_sequence( $node->get_children() );
29272975
}
29282976

2977+
/**
2978+
* Record a SELECT columns list to resolve ambiguous ORDER BY terms.
2979+
*
2980+
* @see https://github.com/WordPress/sqlite-database-integration/issues/228
2981+
* @param string $output_name Output column name (alias, inferred name).
2982+
* @param string|null $full_column_expression Translated expression that produces the column value
2983+
* If null, defaults to quoting the name as an identifier.
2984+
* @return void
2985+
*/
2986+
private function record_select_column_name_for_ambiguous_column_resolution( string $output_name, ?string $full_column_expression = null ): void {
2987+
if ( empty( $this->select_output_name_to_ordinal_stack ) ) {
2988+
return;
2989+
}
2990+
$normalized = strtolower( $output_name );
2991+
$frame_index = count( $this->select_output_name_to_ordinal_stack ) - 1;
2992+
$frame = $this->select_output_name_to_ordinal_stack[ $frame_index ];
2993+
if ( array_key_exists( $normalized, $frame ) ) {
2994+
$frame[ $normalized ] = null; // ambiguous
2995+
} else {
2996+
// Store the translated expression used in the SELECT list.
2997+
$frame[ $normalized ] = $full_column_expression ?? ('`' . $output_name . '`');
2998+
}
2999+
$this->select_output_name_to_ordinal_stack[ $frame_index ] = $frame;
3000+
}
3001+
3002+
/**
3003+
* Translate ORDER BY and resolve ambiguous unqualified names against the current SELECT list.
3004+
*
3005+
* ## Problem
3006+
*
3007+
* MySQL allows ORDER BY to reference output columns by their column names (or aliases)
3008+
* without qualification (e.g., ORDER BY name), and it resolves the reference against
3009+
* the SELECT list when unambiguous. SQLite requires the term to be either a positional
3010+
* index or a resolvable expression (e.g., t1.name) and will error on ambiguity.
3011+
*
3012+
* For example, given the following tables and data:
3013+
*
3014+
* ```sql
3015+
* CREATE TABLE t1 (id INT, name TEXT);
3016+
* CREATE TABLE t2 (t1_id INT, name TEXT);
3017+
*
3018+
* INSERT INTO t1 (id, name) VALUES (1, "T1 A");
3019+
* INSERT INTO t1 (id, name) VALUES (2, "T1 B");
3020+
* INSERT INTO t2 (t1_id, name) VALUES (1, "T2 B");
3021+
* INSERT INTO t2 (t1_id, name) VALUES (2, "T2 A");
3022+
* ```
3023+
*
3024+
* The following queries **error in SQLite but not in MySQL**:
3025+
*
3026+
* ```sql
3027+
* SELECT t1.name -- name column used for ORDER BY in MySQL
3028+
* FROM t1
3029+
* JOIN t2 ON t2.t1_id = t1.id
3030+
* ORDER BY name;
3031+
* -- [MySQL] T1 A, T1 B
3032+
* -- [SQLite] Query Error: ambiguous column name: name
3033+
*
3034+
* SELECT t2.name -- name column used for ORDER BY in MySQL
3035+
* FROM t1
3036+
* JOIN t2 ON t2.t1_id = t1.id
3037+
* ORDER BY name;
3038+
* -- [MySQL] T2 A, T2 B
3039+
* -- [SQLite] Query Error: ambiguous column name: name
3040+
* ```
3041+
*
3042+
* ## Solution
3043+
*
3044+
* When we see a non-fully qualified column in an `ORDER BY` clause and the same column
3045+
* is fully qualified and non-ambiguous in the `SELECT` list, we can use the same full
3046+
* qualifier in the `ORDER BY` clause.
3047+
*
3048+
* For example, the above queries are rewritten as:
3049+
*
3050+
* ```sql
3051+
* SELECT t1.name
3052+
* FROM t1
3053+
* JOIN t2 ON t2.t1_id = t1.id
3054+
* ORDER BY t1.name;
3055+
*
3056+
* SELECT t2.name
3057+
* FROM t1
3058+
* JOIN t2 ON t2.t1_id = t1.id
3059+
* ORDER BY t2.name;
3060+
* -- [MySQL] T2 A, T2 B
3061+
* -- [SQLite] T2 A, T2 B
3062+
* ```
3063+
*
3064+
* ## Limitations
3065+
*
3066+
* This solution is limited to simple unqualified ORDER BY terms. Complex computed
3067+
* expressions, wildcards, and other non-simple terms are not supported.
3068+
*
3069+
* @param WP_Parser_Node $order_clause The orderClause node.
3070+
* @return string The translated ORDER BY clause.
3071+
*/
3072+
private function translate_order_clause_with_select_alias_resolution( WP_Parser_Node $order_clause ): string {
3073+
$order_list = $order_clause->get_first_child_node( 'orderList' );
3074+
if ( null === $order_list ) {
3075+
return $this->translate_sequence( $order_clause->get_children() );
3076+
}
3077+
$parts = array( 'ORDER BY' );
3078+
$order_items = array();
3079+
foreach ( $order_list->get_child_nodes( 'orderExpression' ) as $order_expr ) {
3080+
$expr_nodes = $order_expr->get_children();
3081+
$expr = $this->translate( $expr_nodes[0] );
3082+
$direction = null;
3083+
if ( isset( $expr_nodes[1] ) ) {
3084+
$direction = $this->translate( $expr_nodes[1] );
3085+
}
3086+
3087+
$resolved = $this->maybe_resolve_unqualified_order_term_to_select_expression( $expr );
3088+
if ( null !== $resolved ) {
3089+
$expr = (string) $resolved;
3090+
}
3091+
$order_items[] = trim( $expr . ( $direction ? ( ' ' . $direction ) : '' ) );
3092+
}
3093+
$parts[] = implode( ', ', $order_items );
3094+
return implode( ' ', $parts );
3095+
}
3096+
3097+
/**
3098+
* Try to resolve an ORDER BY term like `name` to a select-item ordinal when uniquely present.
3099+
*
3100+
* @param string $translated_expr The already-translated expression string for the term.
3101+
* @return int|null Ordinal (1-based) if resolved; null otherwise.
3102+
*/
3103+
private function maybe_resolve_unqualified_order_term_to_select_expression( string $translated_expr ): ?string {
3104+
if ( empty( $this->select_output_name_to_ordinal_stack ) ) {
3105+
return null;
3106+
}
3107+
$frame = $this->select_output_name_to_ordinal_stack[ count( $this->select_output_name_to_ordinal_stack ) - 1 ];
3108+
// Only consider simple unqualified identifiers: `name` or name
3109+
$trimmed = trim( $translated_expr );
3110+
// Remove backticks if present for lookup purposes.
3111+
if ( strlen( $trimmed ) >= 2 && $trimmed[0] === '`' && substr( $trimmed, -1 ) === '`' ) {
3112+
$key = strtolower( substr( $trimmed, 1, -1 ) );
3113+
} else {
3114+
$key = strtolower( $trimmed );
3115+
}
3116+
// If expression contains a dot, function call, parentheses, or spaces, do not resolve.
3117+
if ( strpbrk( $key, ".() ") !== false ) {
3118+
return null;
3119+
}
3120+
if ( ! array_key_exists( $key, $frame ) ) {
3121+
return null;
3122+
}
3123+
$select_expr = $frame[ $key ];
3124+
if ( null === $select_expr ) {
3125+
return null; // ambiguous
3126+
}
3127+
return (string) $select_expr;
3128+
}
3129+
29293130
/**
29303131
* Translate a MySQL LIKE expression to SQLite.
29313132
*
@@ -3219,6 +3420,11 @@ public function translate_select_item( WP_Parser_Node $node ): string {
32193420
// When an explicit alias is provided, we can use it as is.
32203421
$alias = $node->get_first_child_node( 'selectAlias' );
32213422
if ( $alias ) {
3423+
// Record explicit alias in the current select output map.
3424+
$this->record_select_column_name_for_ambiguous_column_resolution(
3425+
$this->unquote_sqlite_identifier( $this->translate( $alias->get_first_child() ) ),
3426+
$item
3427+
);
32223428
return $item;
32233429
}
32243430

@@ -3233,7 +3439,17 @@ public function translate_select_item( WP_Parser_Node $node ): string {
32333439
* In this case, SQLite uses the same logic as MySQL, so using the value
32343440
* as is without adding an explicit alias will produce the correct result.
32353441
*/
3236-
$column_ref = $node->get_first_descendant_node( 'columnRef' );
3442+
$column_ref = $node->get_first_descendant_node( 'columnRef' );
3443+
if ( $column_ref ) {
3444+
// Record inferred output name from column reference (the final column name part).
3445+
$identifiers = $column_ref->get_descendant_nodes( 'identifier' );
3446+
if ( ! empty( $identifiers ) ) {
3447+
// For qualified references like t1.name, the last identifier is the column name.
3448+
$last_identifier = end( $identifiers );
3449+
$column_name = $this->unquote_sqlite_identifier( $this->translate( $last_identifier ) );
3450+
$this->record_select_column_name_for_ambiguous_column_resolution( $column_name, $item );
3451+
}
3452+
}
32373453
$is_column_ref = $column_ref && $item === $this->translate( $column_ref );
32383454
if ( $is_column_ref ) {
32393455
return $item;
@@ -3256,6 +3472,7 @@ public function translate_select_item( WP_Parser_Node $node ): string {
32563472
// let's avoid unnecessary aliases ("SELECT `id` AS `id` FROM t").
32573473
return $item;
32583474
}
3475+
$this->record_select_column_name_for_ambiguous_column_resolution( $raw_alias );
32593476
return sprintf( '%s AS %s', $item, $alias );
32603477
}
32613478

0 commit comments

Comments
 (0)