Skip to content

Commit 4e2558b

Browse files
authored
fix(query): grace join and selectivity improvements (#19758)
* fix: grace join and selectivity improvements * z * z
1 parent 2266935 commit 4e2558b

5 files changed

Lines changed: 73 additions & 5 deletions

File tree

src/common/statistics/src/histogram.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,21 @@ pub const DEFAULT_HISTOGRAM_BUCKETS: usize = 100;
3838
pub struct Histogram {
3939
pub accuracy: bool,
4040
pub buckets: Vec<HistogramBucket>,
41+
/// Bucket width: (max - min) / num_buckets.
42+
/// Only set when accuracy == false (generated from NDV + min/max).
43+
/// Used to detect range distortion caused by outlier values inflating min/max.
44+
/// A very large bucket_width means linear interpolation within buckets is unreliable.
45+
#[serde(default)]
46+
pub avg_spacing: Option<f64>,
4147
}
4248

4349
impl Histogram {
4450
pub fn new(buckets: Vec<HistogramBucket>, accuracy: bool) -> Self {
45-
Self { accuracy, buckets }
51+
Self {
52+
accuracy,
53+
buckets,
54+
avg_spacing: None,
55+
}
4656
}
4757

4858
/// Get number of buckets

src/query/service/src/pipelines/processors/transforms/new_hash_join/grace/grace_join.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ impl<T: GraceMemoryJoin> GraceHashJoin<T> {
303303
)?;
304304

305305
for hash in hashes.iter_mut() {
306-
*hash = ((*hash << self.shift_bits) >> 60) & 0b1111;
306+
*hash = Self::get_partition_id(*hash, self.shift_bits);
307307
}
308308

309309
Ok(self.build_partition_stream.partition(hashes, data, true))
@@ -324,7 +324,7 @@ impl<T: GraceMemoryJoin> GraceHashJoin<T> {
324324
)?;
325325

326326
for hash in hashes.iter_mut() {
327-
*hash = ((*hash << self.shift_bits) >> 60) & 0b1111;
327+
*hash = Self::get_partition_id(*hash, self.shift_bits);
328328
}
329329

330330
Ok(self.probe_partition_stream.partition(hashes, data, true))
@@ -385,6 +385,20 @@ impl<T: GraceMemoryJoin> GraceHashJoin<T> {
385385

386386
Ok(())
387387
}
388+
389+
#[inline(always)]
390+
#[cfg(target_feature = "sse4.2")]
391+
fn get_partition_id(hash: u64, shift_bits: usize) -> u64 {
392+
// On SSE4.2, _mm_crc32_u64 only sets the low 32 bits; high 32 bits are always 0.
393+
// Extract partition bits from the low 32 bits to avoid all rows landing in partition 0.
394+
(hash << shift_bits >> 28) & 0b1111
395+
}
396+
397+
#[inline(always)]
398+
#[cfg(not(target_feature = "sse4.2"))]
399+
fn get_partition_id(hash: u64, shift_bits: usize) -> u64 {
400+
(hash << shift_bits >> 60) & 0b1111
401+
}
388402
}
389403

390404
pub enum RestoreStage {

src/query/service/src/pipelines/processors/transforms/new_hash_join/hybrid/hybrid_state.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,15 @@ impl HybridHashJoinState {
5454
level: usize,
5555
factory: Arc<HashJoinFactory>,
5656
) -> Result<Arc<HybridHashJoinState>> {
57+
// On SSE4.2, fast_hash (_mm_crc32_u64) only sets the low 32 bits.
58+
#[cfg(target_feature = "sse4.2")]
59+
const HASH_JOIN_SPILL_MAX_LEVEL: usize = 7;
60+
#[cfg(not(target_feature = "sse4.2"))]
61+
const HASH_JOIN_SPILL_MAX_LEVEL: usize = 15;
62+
5763
let settings = ctx.get_settings();
58-
let max_level = settings.get_max_hash_join_spill_level()? as usize;
64+
let max_spill_level = settings.get_max_hash_join_spill_level()? as usize;
65+
let max_level = (max_spill_level).min(HASH_JOIN_SPILL_MAX_LEVEL);
5966

6067
Ok(Arc::new(HybridHashJoinState {
6168
ctx,

src/query/sql/src/planner/optimizer/ir/stats/histogram.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ impl HistogramBuilder {
5151
Ok(Histogram {
5252
buckets: vec![],
5353
accuracy: false,
54+
avg_spacing: None,
5455
})
5556
};
5657
}
@@ -69,6 +70,19 @@ impl HistogramBuilder {
6970
}
7071
};
7172

73+
// Compute avg_spacing before moving min/max into UniformSampleSet.
74+
// avg_spacing = (max - min) / num_buckets (bucket width); used later to detect range distortion.
75+
let avg_spacing = if let (Ok(min_f), Ok(max_f)) = (min.as_double(), max.as_double()) {
76+
let range = max_f - min_f;
77+
if range > 0.0 && num_buckets > 0 {
78+
Some(range / num_buckets as f64)
79+
} else {
80+
None
81+
}
82+
} else {
83+
None
84+
};
85+
7286
// Adjust number of buckets if needed
7387
let adjusted_num_buckets = if num_buckets > ndv as usize {
7488
ndv as usize
@@ -83,6 +97,7 @@ impl HistogramBuilder {
8397
Ok(Histogram {
8498
buckets,
8599
accuracy: false,
100+
avg_spacing,
86101
})
87102
}
88103

src/query/sql/src/planner/optimizer/ir/stats/selectivity.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ impl SelectivityVisitor<'_> {
276276
let column_stat = self
277277
.ensure_column_stat(column_index)
278278
.expect("checked above");
279+
279280
return Self::compute_comparison_with_stat(
280281
column_stat,
281282
constant,
@@ -366,6 +367,9 @@ impl SelectivityVisitor<'_> {
366367
Some(histogram) => {
367368
let selectivity =
368369
Self::compute_histogram_comparison(histogram, op, &const_datum)?;
370+
371+
let distorted = !histogram.accuracy && is_histogram_range_distorted(histogram);
372+
369373
if let Selectivity::N(n) = selectivity {
370374
let (new_min, new_max) = match op {
371375
ComparisonOp::GT | ComparisonOp::GTE => {
@@ -376,9 +380,20 @@ impl SelectivityVisitor<'_> {
376380
}
377381
_ => unreachable!(),
378382
};
383+
379384
update_statistic(column_stat, new_min, new_max, n)?;
380385
}
381-
Ok(selectivity)
386+
387+
// For inaccurate histograms with a distorted range (e.g. outlier
388+
// sentinel values inflating min/max), the linear interpolation above
389+
// is unreliable. We still ran update_statistic so that min/max bounds
390+
// are narrowed correctly for subsequent predicates on the same column
391+
// (e.g. the `< 200` in `col > 100 AND col < 200`), but we override
392+
// the selectivity with LowerBound.
393+
match distorted {
394+
true => Ok(Selectivity::LowerBound),
395+
false => Ok(selectivity),
396+
}
382397
}
383398
None => {
384399
if column_is_integer {
@@ -806,3 +821,10 @@ impl Selectivity {
806821
}
807822
}
808823
}
824+
825+
fn is_histogram_range_distorted(histogram: &Histogram) -> bool {
826+
const BUCKET_WIDTH_THRESHOLD: f64 = 1e12;
827+
histogram
828+
.avg_spacing
829+
.is_some_and(|bw| bw > BUCKET_WIDTH_THRESHOLD)
830+
}

0 commit comments

Comments
 (0)