Skip to content

Commit 7376ef3

Browse files
Restrict SQL top-k detection to OnlyTemporal queries
1 parent 7043753 commit 7376ef3

2 files changed

Lines changed: 49 additions & 6 deletions

File tree

asap-query-engine/src/engines/simple_engine/sql.rs

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -606,12 +606,16 @@ impl SimpleEngine {
606606
}
607607
};
608608

609-
// Top-k detection takes precedence: `... ORDER BY <agg alias> DESC LIMIT k`
610-
// is served by CountMinSketchWithHeap (Statistic::Topk) rather than the
611-
// plain COUNT/SUM path, so the sketch heap drives the result set. Both
612-
// COUNT(col) and SUM(col) map to Topk; they differ only in how the
613-
// backing sketch is weighted (see TopkWeighting).
614-
let topk = detect_sql_topk(&query_data);
609+
// Top-k (CountMinSketchWithHeap) is defined only for flat temporal queries:
610+
// one window, one GROUP BY key, COUNT/SUM ... ORDER BY <agg alias> DESC LIMIT k.
611+
// Nested patterns attach ORDER BY / LIMIT to the outer SELECT; `query_data` from
612+
// parse is the outer layer, while the temporal aggregate lives in `inner_data`
613+
// for OneTemporalOneSpatial. Running detect_sql_topk on the outer layer would
614+
// mis-classify spatial rollups as top-k.
615+
let topk = match query_pattern_type {
616+
QueryPatternType::OnlyTemporal => detect_sql_topk(&query_data),
617+
QueryPatternType::OnlySpatial | QueryPatternType::OneTemporalOneSpatial => None,
618+
};
615619
let statistic_to_compute = if topk.is_some() {
616620
Statistic::Topk
617621
} else {
@@ -961,6 +965,24 @@ mod detect_topk_tests {
961965
let qd = parse(&sql).expect("query should parse");
962966
assert_eq!(detect_sql_topk(&qd), None);
963967
}
968+
969+
#[test]
970+
fn nested_outer_layer_would_match_detect_sql_topk() {
971+
// Spatial-over-temporal: ORDER BY / LIMIT sit on the outer SELECT, so the
972+
// parsed top-level `query_data` looks like SUM top-k even though the temporal
973+
// aggregate is in the subquery. The engine must not promote this to Topk.
974+
let sql = format!(
975+
"SELECT srcip, SUM(bytes) AS rollup FROM ( \
976+
SELECT srcip, dstip, SUM(pkt_len) AS bytes FROM netflow_table {WINDOW} \
977+
GROUP BY srcip, dstip \
978+
) sub GROUP BY srcip ORDER BY rollup DESC LIMIT 10"
979+
);
980+
let qd = parse(&sql).expect("nested query should parse");
981+
assert!(
982+
detect_sql_topk(&qd).is_some(),
983+
"outer SELECT alone matches the top-k shape (this is why OnlyTemporal is gated)",
984+
);
985+
}
964986
}
965987

966988
#[cfg(test)]

asap-query-engine/src/tests/sql_pattern_matching_tests.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod tests {
1212
use crate::engines::simple_engine::SimpleEngine;
1313
use crate::stores::simple_map_store::SimpleMapStore;
1414
use promql_utilities::data_model::KeyByLabelNames;
15+
use promql_utilities::query_logics::enums::Statistic;
1516
use sql_utilities::sqlhelper::{SQLSchema, Table};
1617
use std::collections::{HashMap, HashSet};
1718
use std::sync::Arc;
@@ -176,6 +177,26 @@ mod tests {
176177
);
177178
}
178179

180+
#[test]
181+
fn nested_order_by_limit_is_not_topk() {
182+
// Outer ORDER BY + LIMIT on a spatial-over-temporal query must not be routed
183+
// through CountMinSketchWithHeap; the inner temporal SUM is not a flat top-k.
184+
let template = "SELECT L1, SUM(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) sub GROUP BY L1";
185+
let engine = build_sql_engine(template, 1, 10);
186+
187+
let incoming = "SELECT L1, SUM(result) AS rollup FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4) sub GROUP BY L1 ORDER BY rollup DESC LIMIT 10";
188+
let query_time = 1727740810.0_f64;
189+
190+
let context = engine
191+
.build_query_execution_context_sql(incoming.to_string(), query_time)
192+
.expect("nested spatial-of-temporal query should build a context");
193+
assert_ne!(
194+
context.metadata.statistic_to_compute,
195+
Statistic::Topk,
196+
"top-k detection is OnlyTemporal-only; nested outer ORDER BY LIMIT must stay on the plain SUM path",
197+
);
198+
}
199+
179200
#[test]
180201
fn test_no_match_returns_none() {
181202
// Engine has a SUM template; incoming uses AVG — should never match

0 commit comments

Comments
 (0)