Skip to content

Commit 1d08370

Browse files
committed
x
1 parent 99ae306 commit 1d08370

4 files changed

Lines changed: 190 additions & 55 deletions

File tree

src/query/expression/src/function/comparison.rs

Lines changed: 141 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pub trait StatComparisonOp {
3333
const SELECT_LESS: bool;
3434
const INCLUDE_EQUAL: bool;
3535

36-
fn estimate_minmax_range_true_count(
36+
fn range_true_count(
3737
ndv: Ndv,
3838
cardinality: f64,
3939
cmp_min: Ordering,
@@ -138,7 +138,7 @@ pub trait ConstantComparisonAdapter {
138138
}
139139

140140
impl<'s, 'a, A: ConstantComparisonAdapter> ConstantComparison<'s, 'a, A> {
141-
pub fn from_constant_args(stat: &'s StatBinaryArg<'a>) -> Result<Option<(Self, bool)>, String> {
141+
pub fn from_args(stat: &'s StatBinaryArg<'a>) -> Result<Option<(Self, bool)>, String> {
142142
if let Some(input) =
143143
Self::new(&stat.args[0], &stat.args[1], stat.cardinality)?.map(|input| (input, false))
144144
{
@@ -196,30 +196,32 @@ impl<'s, 'a, A: ConstantComparisonAdapter> ConstantComparison<'s, 'a, A> {
196196
}
197197

198198
pub fn boolean_stat(&self, true_count: StatEstimate) -> ReturnStat {
199+
let has_true = true_count.upper > 0.0;
200+
let has_false = true_count.lower < self.non_null_cardinality;
201+
let boolean_domain = BooleanDomain {
202+
has_true,
203+
has_false,
204+
};
205+
let value_domain = (has_true || has_false || self.null_count == 0)
206+
.then(|| Box::new(Domain::Boolean(boolean_domain)));
199207
let domain = if self.nullable {
200208
Domain::Nullable(NullableDomain {
201209
has_null: self.null_count != 0,
202-
value: Some(Box::new(Domain::Boolean(BooleanDomain {
203-
has_true: true,
204-
has_false: true,
205-
}))),
210+
value: value_domain,
206211
})
207212
} else {
208-
Domain::Boolean(BooleanDomain {
209-
has_true: true,
210-
has_false: true,
211-
})
213+
Domain::Boolean(boolean_domain)
212214
};
213-
215+
let possible_values = has_true as u8 + has_false as u8;
214216
ReturnStat {
215217
domain,
216-
ndv: Ndv::Stat(2.0),
218+
ndv: Ndv::Stat(possible_values as f64),
217219
null_count: self.null_count,
218220
distribution: OwnedDistribution::Boolean(BooleanDistribution { true_count }),
219221
}
220222
}
221223

222-
pub fn constant_equality_true_count(
224+
pub fn equality_true_count(
223225
&self,
224226
minmax_cmp: Option<(Ordering, Ordering)>,
225227
not_eq: bool,
@@ -271,6 +273,131 @@ pub fn estimate_ndv_true_count(ndv: Ndv, not_eq: bool, cardinality: f64) -> Stat
271273
let expected = selectivity * cardinality;
272274
match ndv {
273275
Ndv::Stat(_) => StatEstimate::exact(expected),
274-
Ndv::Max(_) => StatEstimate::new(0.0, expected, cardinality),
276+
Ndv::Max(_) => {
277+
if not_eq {
278+
StatEstimate::new(0.0, expected, expected)
279+
} else {
280+
StatEstimate::new(expected, expected, cardinality)
281+
}
282+
}
283+
}
284+
}
285+
286+
#[cfg(test)]
287+
mod tests {
288+
use super::*;
289+
290+
struct TestAdapter;
291+
292+
impl ConstantComparisonAdapter for TestAdapter {
293+
type Value = ();
294+
type Domain = ();
295+
296+
fn constant(_scalar: Scalar) -> Result<Self::Value, String> {
297+
unimplemented!()
298+
}
299+
300+
fn domain(_domain: &Domain) -> Result<Self::Domain, String> {
301+
unimplemented!()
302+
}
303+
304+
fn compare(_left: &Self::Value, _right: &Self::Value) -> Ordering {
305+
unimplemented!()
306+
}
307+
}
308+
309+
fn test_comparison<'a>(
310+
stat: &'a ArgStat<'a>,
311+
non_null_cardinality: f64,
312+
null_count: u64,
313+
nullable: bool,
314+
) -> ConstantComparison<'a, 'a, TestAdapter> {
315+
ConstantComparison {
316+
stat,
317+
constant: (),
318+
domain: None,
319+
non_null_cardinality,
320+
null_count,
321+
nullable,
322+
_a: PhantomData,
323+
}
324+
}
325+
326+
#[test]
327+
fn test_boolean_stat_uses_true_count_domain_and_ndv() {
328+
let stat = ArgStat {
329+
domain: Domain::Boolean(BooleanDomain {
330+
has_true: true,
331+
has_false: true,
332+
}),
333+
ndv: Ndv::Stat(2.0),
334+
null_count: 0,
335+
distribution: crate::stat_distribution::BorrowedDistribution::Unknown,
336+
};
337+
let comparison = test_comparison(&stat, 100.0, 0, false);
338+
339+
let all_false = comparison.boolean_stat(StatEstimate::exact(0.0));
340+
assert_eq!(
341+
all_false.domain,
342+
Domain::Boolean(BooleanDomain {
343+
has_true: false,
344+
has_false: true,
345+
})
346+
);
347+
assert!(matches!(all_false.ndv, Ndv::Stat(1.0)));
348+
349+
let all_true = comparison.boolean_stat(StatEstimate::exact(100.0));
350+
assert_eq!(
351+
all_true.domain,
352+
Domain::Boolean(BooleanDomain {
353+
has_true: true,
354+
has_false: false,
355+
})
356+
);
357+
assert!(matches!(all_true.ndv, Ndv::Stat(1.0)));
358+
359+
let uncertain = comparison.boolean_stat(StatEstimate::new(10.0, 10.0, 100.0));
360+
assert_eq!(
361+
uncertain.domain,
362+
Domain::Boolean(BooleanDomain {
363+
has_true: true,
364+
has_false: true,
365+
})
366+
);
367+
assert!(matches!(uncertain.ndv, Ndv::Stat(2.0)));
368+
}
369+
370+
#[test]
371+
fn test_boolean_stat_omits_nullable_value_domain_without_non_null_values() {
372+
let stat = ArgStat {
373+
domain: Domain::Nullable(NullableDomain {
374+
has_null: true,
375+
value: None,
376+
}),
377+
ndv: Ndv::Stat(0.0),
378+
null_count: 10,
379+
distribution: crate::stat_distribution::BorrowedDistribution::Unknown,
380+
};
381+
let comparison = test_comparison(&stat, 0.0, 10, true);
382+
383+
let output = comparison.boolean_stat(StatEstimate::exact(0.0));
384+
assert_eq!(
385+
output.domain,
386+
Domain::Nullable(NullableDomain {
387+
has_null: true,
388+
value: None,
389+
})
390+
);
391+
assert!(matches!(output.ndv, Ndv::Stat(0.0)));
392+
output.check_consistency().unwrap();
393+
}
394+
395+
#[test]
396+
fn test_estimate_ndv_true_count_uses_max_ndv_bounds() {
397+
let eq_count = estimate_ndv_true_count(Ndv::Max(10.0), false, 100.0);
398+
assert_eq!(eq_count, StatEstimate::new(10.0, 10.0, 100.0));
399+
400+
let not_eq_count = estimate_ndv_true_count(Ndv::Max(10.0), true, 100.0);
401+
assert_eq!(not_eq_count, StatEstimate::new(0.0, 90.0, 90.0));
275402
}
276403
}

src/query/expression/src/function/stat_distribution.rs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -231,10 +231,18 @@ fn check_boolean_distribution<D>(
231231
stat: &StatDistribution<D>,
232232
distribution: &BooleanDistribution,
233233
) -> Result<(), String> {
234-
if !matches!(
235-
stat.domain,
236-
Domain::Nullable(NullableDomain { value: Some(box Domain::Boolean(_)), .. })|Domain::Boolean(_)
237-
) {
234+
let valid_domain = match &stat.domain {
235+
Domain::Boolean(_) => true,
236+
Domain::Nullable(NullableDomain {
237+
value: Some(box Domain::Boolean(_)),
238+
..
239+
}) => true,
240+
Domain::Nullable(NullableDomain { value: None, .. }) => {
241+
distribution.true_count.upper == 0.0
242+
}
243+
_ => false,
244+
};
245+
if !valid_domain {
238246
return Err(format!(
239247
"boolean distribution requires boolean non-null value domain, got {:?}",
240248
stat.domain
@@ -340,7 +348,7 @@ mod tests {
340348

341349
valid.check_consistency().unwrap();
342350

343-
let invalid = ReturnStat {
351+
let all_null = ReturnStat {
344352
domain: Domain::Nullable(NullableDomain {
345353
has_null: true,
346354
value: None,
@@ -352,6 +360,20 @@ mod tests {
352360
}),
353361
};
354362

363+
all_null.check_consistency().unwrap();
364+
365+
let invalid = ReturnStat {
366+
domain: Domain::Nullable(NullableDomain {
367+
has_null: true,
368+
value: None,
369+
}),
370+
ndv: Ndv::Max(1.0),
371+
null_count: 10,
372+
distribution: OwnedDistribution::Boolean(BooleanDistribution {
373+
true_count: StatEstimate::new(0.0, 0.5, 1.0),
374+
}),
375+
};
376+
355377
let err = invalid.check_consistency().unwrap_err();
356378
assert!(err.contains("boolean non-null value domain"));
357379
}

src/query/functions/src/scalars/comparison.rs

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -265,12 +265,11 @@ where
265265
return Ok(Some(stat));
266266
}
267267

268-
let Some((input, _)) = ConstantComparison::<TypedComparisonStat<T>>::from_constant_args(&stat)?
269-
else {
268+
let Some((input, _)) = ConstantComparison::<TypedComparisonStat<T>>::from_args(&stat)? else {
270269
return Ok(None);
271270
};
272271

273-
let true_count = input.constant_equality_true_count(
272+
let true_count = input.equality_true_count(
274273
input
275274
.domain
276275
.as_ref()
@@ -298,8 +297,7 @@ where
298297
return Ok(Some(stat));
299298
}
300299

301-
let Some((input, reverse)) =
302-
ConstantComparison::<TypedComparisonStat<T>>::from_constant_args(&stat)?
300+
let Some((input, reverse)) = ConstantComparison::<TypedComparisonStat<T>>::from_args(&stat)?
303301
else {
304302
return Ok(None);
305303
};
@@ -351,7 +349,7 @@ where
351349
return Ok(None);
352350
};
353351

354-
Ok(Op::estimate_minmax_range_true_count(
352+
Ok(Op::range_true_count(
355353
input.stat.ndv,
356354
input.non_null_cardinality,
357355
cmp_min,
@@ -2169,8 +2167,10 @@ mod tests {
21692167
use databend_common_expression::FunctionContext;
21702168
use databend_common_expression::stat_distribution::BorrowedDistribution;
21712169
use databend_common_expression::stat_distribution::Ndv;
2170+
use databend_common_expression::types::Int64Type;
21722171
use databend_common_expression::types::NumberDomain;
21732172
use databend_common_expression::types::SimpleDomain;
2173+
use databend_common_expression::types::UInt8Type;
21742174
use databend_common_expression::types::nullable::NullableDomain;
21752175
use databend_common_expression::types::string::StringDomain;
21762176
use jsonb::OwnedJsonb;
@@ -2225,11 +2225,10 @@ mod tests {
22252225
cardinality: 10.0,
22262226
args: &args,
22272227
};
2228-
let input =
2229-
ConstantComparison::<TypedComparisonStat<NumberType<i64>>>::from_constant_args(&stat)
2230-
.unwrap()
2231-
.unwrap()
2232-
.0;
2228+
let input = ConstantComparison::<TypedComparisonStat<Int64Type>>::from_args(&stat)
2229+
.unwrap()
2230+
.unwrap()
2231+
.0;
22332232
let range = IntegerRangeComparison::from_input(&input).unwrap();
22342233

22352234
let lt_count = range.true_count::<LtOp>();
@@ -2263,7 +2262,7 @@ mod tests {
22632262
cardinality: 10.0,
22642263
args: &args,
22652264
};
2266-
let output = derive_comparison_stat::<NumberType<i64>, GtOp>(stat)
2265+
let output = derive_comparison_stat::<Int64Type, GtOp>(stat)
22672266
.unwrap()
22682267
.unwrap();
22692268
let true_count = output.boolean_distribution().unwrap().true_count;
@@ -2296,7 +2295,7 @@ mod tests {
22962295
args: &args,
22972296
};
22982297

2299-
let output = derive_comparison_stat::<NumberType<i64>, GtOp>(stat)
2298+
let output = derive_comparison_stat::<Int64Type, GtOp>(stat)
23002299
.unwrap()
23012300
.unwrap();
23022301

@@ -2335,14 +2334,13 @@ mod tests {
23352334
cardinality: 11.0,
23362335
args: &args,
23372336
};
2338-
let input =
2339-
ConstantComparison::<TypedComparisonStat<NumberType<u8>>>::from_constant_args(&stat)
2340-
.unwrap()
2341-
.unwrap()
2342-
.0;
2337+
let input = ConstantComparison::<TypedComparisonStat<UInt8Type>>::from_args(&stat)
2338+
.unwrap()
2339+
.unwrap()
2340+
.0;
23432341

23442342
assert_eq!(input.constant, 5_u8);
2345-
let true_count = ordered_comparison_true_count::<NumberType<u8>, GtOp>(&input)
2343+
let true_count = ordered_comparison_true_count::<UInt8Type, GtOp>(&input)
23462344
.unwrap()
23472345
.unwrap();
23482346
assert!((true_count.expected - 5.0).abs() < 1e-9);

src/query/functions/src/scalars/decimal/src/comparison.rs

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,7 @@ fn derive_decimal_equality_stat<Op: EqualityOp>(
144144
return Ok(Some(stat));
145145
}
146146

147-
let Some((input, _)) = ConstantComparison::<DecimalComparisonStat>::from_constant_args(&stat)?
148-
else {
147+
let Some((input, _)) = ConstantComparison::<DecimalComparisonStat>::from_args(&stat)? else {
149148
return Ok(None);
150149
};
151150

@@ -155,7 +154,7 @@ fn derive_decimal_equality_stat<Op: EqualityOp>(
155154
DecimalComparisonStat::compare(&input.constant, &max),
156155
)
157156
});
158-
let true_count = input.constant_equality_true_count(minmax_cmp, Op::NOT_EQ);
157+
let true_count = input.equality_true_count(minmax_cmp, Op::NOT_EQ);
159158
Ok(Some(input.boolean_stat(true_count)))
160159
}
161160

@@ -166,8 +165,7 @@ fn derive_decimal_range_stat<Op: StatComparisonOp>(
166165
return Ok(Some(stat));
167166
}
168167

169-
let Some((input, reverse)) =
170-
ConstantComparison::<DecimalComparisonStat>::from_constant_args(&stat)?
168+
let Some((input, reverse)) = ConstantComparison::<DecimalComparisonStat>::from_args(&stat)?
171169
else {
172170
return Ok(None);
173171
};
@@ -178,19 +176,9 @@ fn derive_decimal_range_stat<Op: StatComparisonOp>(
178176
let cmp_min = DecimalComparisonStat::compare(&input.constant, &min);
179177
let cmp_max = DecimalComparisonStat::compare(&input.constant, &max);
180178
Ok(if reverse {
181-
Op::Reverse::estimate_minmax_range_true_count(
182-
input.stat.ndv,
183-
input.non_null_cardinality,
184-
cmp_min,
185-
cmp_max,
186-
)
179+
Op::Reverse::range_true_count(input.stat.ndv, input.non_null_cardinality, cmp_min, cmp_max)
187180
} else {
188-
Op::estimate_minmax_range_true_count(
189-
input.stat.ndv,
190-
input.non_null_cardinality,
191-
cmp_min,
192-
cmp_max,
193-
)
181+
Op::range_true_count(input.stat.ndv, input.non_null_cardinality, cmp_min, cmp_max)
194182
}
195183
.map(|true_count| input.boolean_stat(true_count)))
196184
}

0 commit comments

Comments
 (0)