From 6048e05a10988ce1be1d2426af7e7eb3f4c631be Mon Sep 17 00:00:00 2001 From: kould Date: Fri, 5 Jun 2026 00:20:49 +0800 Subject: [PATCH 1/3] Fix USING column binding for right joins --- src/binder/expr.rs | 27 +++++++----- src/binder/mod.rs | 96 ++++++++++++++++++++++++++++++++++++----- src/binder/select.rs | 24 +++++++++-- tests/slt/crdb/join.slt | 15 +++---- 4 files changed, 131 insertions(+), 31 deletions(-) diff --git a/src/binder/expr.rs b/src/binder/expr.rs index 51d04a72..85ef96d1 100644 --- a/src/binder/expr.rs +++ b/src/binder/expr.rs @@ -623,19 +623,26 @@ impl<'a, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<'a, '_, T )) } else { // handle col syntax - let mut got_column = Self::find_column_in_scope( - &self.context, - &mut self.table_schema_buf, - full_name.1.as_str(), - ); + let mut find_visible_column = + |context: &BinderContext<'a, T>| -> Result, DatabaseError> { + Ok(context + .using + .get(full_name.1.as_str()) + .map(|using_column| using_column.visible_expr()) + .transpose()? + .or_else(|| { + Self::find_column_in_scope( + context, + &mut self.table_schema_buf, + full_name.1.as_str(), + ) + })) + }; + let mut got_column = find_visible_column(&self.context)?; if got_column.is_none() { if let Some(parent) = self.parent { self.context.mark_outer_ref(); - got_column = Self::find_column_in_scope( - &parent.context, - &mut self.table_schema_buf, - full_name.1.as_str(), - ); + got_column = find_visible_column(&parent.context)?; } } match got_column { diff --git a/src/binder/mod.rs b/src/binder/mod.rs index 92bb0e12..78ffdd09 100644 --- a/src/binder/mod.rs +++ b/src/binder/mod.rs @@ -39,7 +39,7 @@ use sqlparser::ast::{ Statement, TableObject, }; use sqlparser::tokenizer::Span; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -54,6 +54,7 @@ use crate::planner::{LogicalPlan, SchemaOutput}; use crate::storage::{TableCache, Transaction, ViewCache}; use crate::types::tuple::SchemaRef; use crate::types::value::DataValue; +use crate::types::LogicalType; pub enum InputRefType { AggCall, @@ -204,6 +205,69 @@ impl BoundSource<'_> { } } +#[derive(Clone)] +pub(crate) struct UsingColumn { + join_type: JoinType, + left_column: ColumnRef, + left_position: usize, + right_column: ColumnRef, + right_position: usize, +} + +impl UsingColumn { + fn new( + join_type: JoinType, + left_column: ColumnRef, + left_position: usize, + right_column: ColumnRef, + right_position: usize, + ) -> Self { + Self { + join_type, + left_column, + left_position, + right_column, + right_position, + } + } + + fn left_expr(&self) -> ScalarExpression { + ScalarExpression::column_expr(self.left_column.clone(), self.left_position) + } + + fn right_expr(&self) -> ScalarExpression { + ScalarExpression::column_expr(self.right_column.clone(), self.right_position) + } + + pub(crate) fn visible_expr(&self) -> Result { + match self.join_type { + JoinType::RightOuter => Ok(self.right_expr()), + JoinType::Full => { + let left_expr = self.left_expr(); + let right_expr = self.right_expr(); + let left_ty = left_expr.return_type(); + let right_ty = right_expr.return_type(); + let ty = LogicalType::max_logical_type(&left_ty, &right_ty)?.into_owned(); + + Ok(ScalarExpression::Coalesce { + exprs: vec![left_expr, right_expr], + ty, + }) + } + JoinType::Inner | JoinType::LeftOuter | JoinType::Cross => Ok(self.left_expr()), + } + } + + pub(crate) fn hides_column(&self, column: &ColumnRef) -> bool { + let hidden_column = if self.join_type.is_right() { + &self.left_column + } else { + &self.right_column + }; + hidden_column.same_column(column) + } +} + #[derive(Clone)] pub struct BinderContext<'a, T: Transaction> { pub(crate) scala_functions: &'a ScalaFunctions, @@ -221,7 +285,7 @@ pub struct BinderContext<'a, T: Transaction> { group_by_exprs: Vec, pub(crate) agg_calls: Vec, // join - using: HashSet, + using: HashMap, bind_step: QueryBindStep, sub_queries: HashMap>, @@ -471,15 +535,27 @@ impl<'a, T: Transaction> BinderContext<'a, T> { pub fn add_using( &mut self, + name: String, join_type: JoinType, - left_expr: &ColumnRef, - right_expr: &ColumnRef, - ) { - self.using.insert(if join_type.is_right() { - left_expr.clone() - } else { - right_expr.clone() - }); + left_column: &ColumnRef, + left_position: usize, + right_column: &ColumnRef, + right_position: usize, + ) -> Result<(), DatabaseError> { + if self.using.contains_key(&name) { + return Err(DatabaseError::UnsupportedStmt(format!( + "duplicate `USING({name})` across joins is not supported" + ))); + } + let using_column = UsingColumn::new( + join_type, + left_column.clone(), + left_position, + right_column.clone(), + right_position, + ); + self.using.insert(name, using_column); + Ok(()) } pub fn add_alias( diff --git a/src/binder/select.rs b/src/binder/select.rs index 2ef20df6..22a1a2d0 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -1152,7 +1152,11 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' return Some(&table_name) == column.table_name(); } is_qualified_wildcard - || Some(&table_name) == column.table_name() && !context.using.contains(column) + || Some(&table_name) == column.table_name() + && !context + .using + .values() + .any(|using_column| using_column.hides_column(column)) }; let (schema_ref, position_offset) = @@ -1909,7 +1913,14 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' ident, )); }; - self.context.add_using(join_type, left_column, right_column); + self.context.add_using( + name.clone(), + join_type, + left_column, + left_position, + right_column, + left_schema.len() + right_position, + )?; on_keys.push(( ScalarExpression::column_expr(left_column.clone(), left_position), ScalarExpression::column_expr( @@ -1951,7 +1962,14 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' left_schema.len() + right_position, ); - self.context.add_using(join_type, left_column, right_column); + self.context.add_using( + name.to_string(), + join_type, + left_column, + left_position, + right_column, + left_schema.len() + right_position, + )?; on_keys.push((left_expr, right_expr)); } } diff --git a/tests/slt/crdb/join.slt b/tests/slt/crdb/join.slt index 9fd23d24..16b37a8f 100644 --- a/tests/slt/crdb/join.slt +++ b/tests/slt/crdb/join.slt @@ -705,9 +705,9 @@ query TTT SELECT s, str1.s, str2.s FROM str1 RIGHT OUTER JOIN str2 USING(s) order by str2.s ---- A A A -null null B -null null C -null null E +B null B +C null C +E null E query ITIT SELECT * FROM str1 LEFT OUTER JOIN str2 ON str1.s = str2.s order by str1.a @@ -877,11 +877,10 @@ SELECT * FROM l RIGHT OUTER JOIN r USING(a) WHERE a = 3 ---- 1 3 1 -# TODO: a= 4 means x on both sides -# query III -# SELECT * FROM l RIGHT OUTER JOIN r USING(a) WHERE a = 4 -# ---- -# NULL 4 1 +query III +SELECT * FROM l RIGHT OUTER JOIN r USING(a) WHERE a = 4 +---- +null 4 1 statement ok drop table if exists foo From b0023194a61ac3e72ce49a385d1c46496d079151 Mon Sep 17 00:00:00 2001 From: kould Date: Fri, 5 Jun 2026 00:41:28 +0800 Subject: [PATCH 2/3] Add USING binding coverage for join types --- tests/slt/crdb/join.slt | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/slt/crdb/join.slt b/tests/slt/crdb/join.slt index 16b37a8f..6f42b2c1 100644 --- a/tests/slt/crdb/join.slt +++ b/tests/slt/crdb/join.slt @@ -862,6 +862,31 @@ INSERT INTO l VALUES (1, 1), (2, 1), (3, 1) statement ok INSERT INTO r VALUES (2, 1), (3, 1), (4, 1) +query III +SELECT a, l.a, r.a FROM l INNER JOIN r USING(a) WHERE a = 2 +---- +2 2 2 + +query III +SELECT a, l.a, r.a FROM l LEFT OUTER JOIN r USING(a) WHERE a = 1 +---- +1 1 null + +query III +SELECT a, l.a, r.a FROM l RIGHT OUTER JOIN r USING(a) WHERE a = 4 +---- +4 null 4 + +query III +SELECT a, l.a, r.a FROM l FULL OUTER JOIN r USING(a) WHERE a = 1 +---- +1 1 null + +query III +SELECT a, l.a, r.a FROM l FULL OUTER JOIN r USING(a) WHERE a = 4 +---- +4 null 4 + query III SELECT * FROM l LEFT OUTER JOIN r USING(a) WHERE a = 1 ---- From 7334a54f3a17e8e6416e1d30d51eabc7fafd9c76 Mon Sep 17 00:00:00 2001 From: kould Date: Fri, 5 Jun 2026 00:44:43 +0800 Subject: [PATCH 3/3] Update right join USING unit test --- src/execution/dql/join/nested_loop_join.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/execution/dql/join/nested_loop_join.rs b/src/execution/dql/join/nested_loop_join.rs index 35305e6d..50eaa5be 100644 --- a/src/execution/dql/join/nested_loop_join.rs +++ b/src/execution/dql/join/nested_loop_join.rs @@ -1178,7 +1178,7 @@ mod test { } #[test] - fn test_right_join_using_keeps_left_visible_column_binding() -> Result<(), DatabaseError> { + fn test_right_join_using_binds_visible_column_to_right_side() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); let db = DataBaseBuilder::path(temp_dir.path()).build_in_memory()?; @@ -1216,9 +1216,9 @@ mod test { Some("A".to_string()), Some("A".to_string()) ], - vec![None, None, Some("B".to_string())], - vec![None, None, Some("C".to_string())], - vec![None, None, Some("E".to_string())], + vec![Some("B".to_string()), None, Some("B".to_string())], + vec![Some("C".to_string()), None, Some("C".to_string())], + vec![Some("E".to_string()), None, Some("E".to_string())], ] );