@@ -276,6 +276,7 @@ impl SelectivityVisitor<'_> {
276276 let column_stat = self
277277 . ensure_column_stat ( column_index)
278278 . expect ( "checked above" ) ;
279+
279280 return Self :: compute_comparison_with_stat (
280281 column_stat,
281282 constant,
@@ -366,6 +367,9 @@ impl SelectivityVisitor<'_> {
366367 Some ( histogram) => {
367368 let selectivity =
368369 Self :: compute_histogram_comparison ( histogram, op, & const_datum) ?;
370+
371+ let distorted = !histogram. accuracy && is_histogram_range_distorted ( histogram) ;
372+
369373 if let Selectivity :: N ( n) = selectivity {
370374 let ( new_min, new_max) = match op {
371375 ComparisonOp :: GT | ComparisonOp :: GTE => {
@@ -376,9 +380,20 @@ impl SelectivityVisitor<'_> {
376380 }
377381 _ => unreachable ! ( ) ,
378382 } ;
383+
379384 update_statistic ( column_stat, new_min, new_max, n) ?;
380385 }
381- Ok ( selectivity)
386+
387+ // For inaccurate histograms with a distorted range (e.g. outlier
388+ // sentinel values inflating min/max), the linear interpolation above
389+ // is unreliable. We still ran update_statistic so that min/max bounds
390+ // are narrowed correctly for subsequent predicates on the same column
391+ // (e.g. the `< 200` in `col > 100 AND col < 200`), but we override
392+ // the selectivity with LowerBound.
393+ match distorted {
394+ true => Ok ( Selectivity :: LowerBound ) ,
395+ false => Ok ( selectivity) ,
396+ }
382397 }
383398 None => {
384399 if column_is_integer {
@@ -806,3 +821,10 @@ impl Selectivity {
806821 }
807822 }
808823}
824+
825+ fn is_histogram_range_distorted ( histogram : & Histogram ) -> bool {
826+ const BUCKET_WIDTH_THRESHOLD : f64 = 1e12 ;
827+ histogram
828+ . avg_spacing
829+ . is_some_and ( |bw| bw > BUCKET_WIDTH_THRESHOLD )
830+ }
0 commit comments