@@ -26,14 +26,12 @@ use databend_common_expression::stat_distribution::ArgStat;
2626use databend_common_expression:: types:: DataType ;
2727use databend_common_expression:: types:: NumberScalar ;
2828use databend_common_functions:: BUILTIN_FUNCTIONS ;
29- use databend_common_statistics:: DEFAULT_HISTOGRAM_BUCKETS ;
30- use databend_common_statistics:: Datum ;
3129
30+ use super :: constraint:: ValueConstraint ;
3231use crate :: ColumnBinding ;
3332use crate :: Symbol ;
3433use crate :: optimizer:: ir:: ColumnStat ;
3534use crate :: optimizer:: ir:: ColumnStatSet ;
36- use crate :: optimizer:: ir:: HistogramBuilder ;
3735use crate :: plans:: ComparisonOp ;
3836use crate :: plans:: FunctionCall ;
3937use crate :: plans:: ScalarExpr ;
@@ -237,7 +235,8 @@ impl SelectivityVisitor<'_> {
237235 let column_stat = self
238236 . ensure_column_stat ( column_index)
239237 . expect ( "checked above" ) ;
240- Self :: update_comparison_column_stat ( column_stat, op, const_datum, selectivity) ?;
238+ let constraint = ValueConstraint :: from_comparison ( op, const_datum) ;
239+ constraint. apply_to_column_stat ( column_stat, selectivity. as_n ( ) . copied ( ) ) ?;
241240 return Ok ( selectivity) ;
242241 }
243242 ( Expr :: FunctionCall ( func) , Expr :: Constant ( val) )
@@ -294,47 +293,6 @@ impl SelectivityVisitor<'_> {
294293 Ok ( Selectivity :: N ( distr. true_count . expected / self . cardinality ) )
295294 }
296295
297- fn update_comparison_column_stat (
298- column_stat : & mut ColumnStat ,
299- op : ComparisonOp ,
300- const_datum : Datum ,
301- selectivity : Selectivity ,
302- ) -> Result < ( ) > {
303- match op {
304- ComparisonOp :: Equal => {
305- * column_stat = ColumnStat :: from_const ( const_datum) ;
306- Ok ( ( ) )
307- }
308- ComparisonOp :: NotEqual => {
309- if let Selectivity :: N ( n) = selectivity {
310- update_statistic (
311- column_stat,
312- column_stat. min . clone ( ) ,
313- column_stat. max . clone ( ) ,
314- n,
315- ) ?;
316- }
317- Ok ( ( ) )
318- }
319- _ => {
320- match selectivity {
321- Selectivity :: N ( 0.0 ) => {
322- column_stat. ndv = column_stat. ndv . reduce_by_selectivity ( 0.0 ) ;
323- }
324- Selectivity :: N ( n) if n < 1.0 => {
325- if let Some ( ( new_min, new_max) ) =
326- comparison_range_bounds ( column_stat, & const_datum, op) ?
327- {
328- update_statistic ( column_stat, new_min, new_max, n) ?;
329- }
330- }
331- _ => { }
332- }
333- Ok ( ( ) )
334- }
335- }
336- }
337-
338296 // The method uses probability predication to compute like selectivity.
339297 // The core idea is from postgresql.
340298 fn compute_like ( & mut self , func : & ExprCall ) -> Result < Selectivity > {
@@ -532,78 +490,20 @@ fn is_true_constant_predicate(constant: &Constant) -> bool {
532490 }
533491}
534492
535- fn comparison_range_bounds (
536- column_stat : & ColumnStat ,
537- const_datum : & Datum ,
538- op : ComparisonOp ,
539- ) -> Result < Option < ( Datum , Datum ) > > {
540- let ( new_min, new_max) = match op {
541- ComparisonOp :: GT | ComparisonOp :: GTE => (
542- Datum :: max ( Some ( column_stat. min . clone ( ) ) , Some ( const_datum. clone ( ) ) ) ,
543- Some ( column_stat. max . clone ( ) ) ,
544- ) ,
545- ComparisonOp :: LT | ComparisonOp :: LTE => (
546- Some ( column_stat. min . clone ( ) ) ,
547- Datum :: min ( Some ( column_stat. max . clone ( ) ) , Some ( const_datum. clone ( ) ) ) ,
548- ) ,
549- _ => unreachable ! ( ) ,
550- } ;
551- let ( Some ( new_min) , Some ( new_max) ) = ( new_min, new_max) else {
552- return Ok ( None ) ;
553- } ;
554- if new_min. compare ( & new_max) ? == std:: cmp:: Ordering :: Greater {
555- return Ok ( None ) ;
556- }
557-
558- Ok ( Some ( ( new_min, new_max) ) )
559- }
560-
561- fn update_statistic (
562- column_stat : & mut ColumnStat ,
563- new_min : Datum ,
564- new_max : Datum ,
565- selectivity : f64 ,
566- ) -> Result < ( ) > {
567- column_stat. ndv = column_stat. ndv . reduce_by_selectivity ( selectivity) ;
568- column_stat. min = new_min. clone ( ) ;
569- column_stat. max = new_max. clone ( ) ;
570- column_stat. null_count = ( column_stat. null_count as f64 * selectivity) . ceil ( ) as u64 ;
571-
572- if let Some ( histogram) = & column_stat. histogram {
573- // If selectivity < 0.2, most buckets are invalid and
574- // the accuracy histogram can be discarded.
575- // Todo: support unfixed buckets number for histogram and prune the histogram.
576- if !histogram. accuracy ( ) || selectivity < 0.2 {
577- let num_values = histogram. num_values ( ) ;
578- let new_num_values = ( num_values * selectivity) . ceil ( ) as u64 ;
579- let new_ndv = column_stat. ndv . value ( ) as u64 ;
580- column_stat. histogram = if new_ndv <= 2 {
581- None
582- } else {
583- Some ( HistogramBuilder :: from_ndv (
584- new_ndv,
585- new_num_values. max ( new_ndv) ,
586- Some ( ( new_min, new_max) ) ,
587- DEFAULT_HISTOGRAM_BUCKETS ,
588- ) ?)
589- }
590- }
591- }
592-
593- Ok ( ( ) )
594- }
595-
596493#[ cfg( test) ]
597494mod tests {
598495 use databend_common_expression:: Scalar ;
599496 use databend_common_expression:: types:: NumberDataType ;
600497 use databend_common_expression:: types:: NumberScalar ;
601498 use databend_common_expression:: types:: decimal:: DecimalScalar ;
602499 use databend_common_expression:: types:: decimal:: DecimalSize ;
500+ use databend_common_statistics:: DEFAULT_HISTOGRAM_BUCKETS ;
501+ use databend_common_statistics:: Datum ;
603502
604503 use super :: * ;
605504 use crate :: ColumnBindingBuilder ;
606505 use crate :: Visibility ;
506+ use crate :: optimizer:: ir:: HistogramBuilder ;
607507 use crate :: optimizer:: ir:: Ndv ;
608508 use crate :: plans:: BoundColumnRef ;
609509 use crate :: plans:: ConstantExpr ;
0 commit comments