Skip to content

Commit 8140ea7

Browse files
feat(sql-utilities,query-engine): add SQL ORDER BY and LIMIT support
1 parent d7fced2 commit 8140ea7

5 files changed

Lines changed: 614 additions & 27 deletions

File tree

asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,28 @@ impl SQLSchema {
8989
#[derive(Debug, Clone)]
9090
pub struct SQLQueryData {
9191
pub aggregation_info: AggregationInfo,
92+
/// Alias of the aggregate function in SELECT, e.g. `agg(v) AS p99` → `Some("p99")`.
93+
/// Captured separately from `aggregation_info` because it's presentational only:
94+
/// two queries that differ solely in alias must still match the same template.
95+
pub aggregation_alias: Option<String>,
9296
pub metric: String,
9397
pub labels: HashSet<String>,
9498
pub time_info: TimeInfo,
9599
pub subquery: Option<Box<SQLQueryData>>,
100+
/// `ORDER BY` items in source order. Empty when no ORDER BY is present.
101+
/// Excluded from `matches_sql_pattern` since ordering is post-aggregation.
102+
pub order_by: Vec<OrderByItem>,
103+
/// `LIMIT N`. None when no LIMIT is present. Excluded from `matches_sql_pattern`.
104+
pub limit: Option<u64>,
105+
}
106+
107+
/// Single `ORDER BY` clause item: a column reference plus sort direction.
108+
/// `column` is either a GROUP BY identifier or the aggregate alias.
109+
#[derive(Debug, Clone, PartialEq, Eq)]
110+
pub struct OrderByItem {
111+
pub column: String,
112+
/// `true` for ASC (the default when neither ASC nor DESC is specified), `false` for DESC.
113+
pub ascending: bool,
96114
}
97115

98116
#[derive(Debug, Clone)]

asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs

Lines changed: 119 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -700,10 +700,17 @@ mod tests {
700700
}
701701

702702
#[test]
703-
fn test_order_by_is_rejected() {
704-
assert!(parse_sql_query(
705-
"SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2 ORDER BY L1"
706-
).is_none());
703+
fn test_order_by_groupby_column_default_ascending() {
704+
// Bare `ORDER BY L1` (no ASC/DESC) defaults to ascending. The order_by item
705+
// must reflect that the column is a GROUP BY key.
706+
let q = parse_sql_query(
707+
"SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2 ORDER BY L1",
708+
)
709+
.expect("ORDER BY group-by column should parse");
710+
assert_eq!(q.order_by.len(), 1);
711+
assert_eq!(q.order_by[0].column, "L1");
712+
assert!(q.order_by[0].ascending);
713+
assert_eq!(q.limit, None);
707714
}
708715

709716
// ── scrape_interval > 1s regression tests (issue #201) ───────────────────
@@ -919,4 +926,112 @@ mod tests {
919926
assert!(query.labels.contains("L2"));
920927
assert_eq!(query.aggregation_info.get_name(), "SUM");
921928
}
929+
930+
// ── ORDER BY / LIMIT support ─────────────────────────────────────────────
931+
//
932+
// The parser must accept ORDER BY (possibly multi-column, with optional ASC/DESC)
933+
// and LIMIT N, capturing them in SQLQueryData for the engine to apply post-aggregation.
934+
// ORDER BY columns must reference either the aggregate alias or a GROUP BY column.
935+
// The aggregate alias is captured separately so `ORDER BY <alias>` can resolve.
936+
937+
#[test]
938+
fn test_order_by_aggregate_alias_desc_limit_n() {
939+
// Top-N user case: ORDER BY <agg alias> DESC LIMIT 10.
940+
let q = parse_sql_query(
941+
"SELECT L1, L2, QUANTILE(0.99, value) AS p99 FROM cpu_usage \
942+
WHERE time BETWEEN DATEADD(s, -11, NOW()) AND DATEADD(s, -10, NOW()) \
943+
GROUP BY L1, L2 \
944+
ORDER BY p99 DESC LIMIT 10",
945+
)
946+
.expect("ORDER BY <agg alias> DESC LIMIT N should parse");
947+
assert_eq!(q.aggregation_alias.as_deref(), Some("p99"));
948+
assert_eq!(q.order_by.len(), 1);
949+
assert_eq!(q.order_by[0].column, "p99");
950+
assert!(!q.order_by[0].ascending);
951+
assert_eq!(q.limit, Some(10));
952+
}
953+
954+
#[test]
955+
fn test_order_by_multiple_columns_mixed_directions() {
956+
let q = parse_sql_query(
957+
"SELECT QUANTILE(0.99, value) AS p99 FROM cpu_usage \
958+
WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() \
959+
GROUP BY L1, L2 \
960+
ORDER BY L1 ASC, p99 DESC",
961+
)
962+
.expect("multi-column ORDER BY with mixed directions should parse");
963+
assert_eq!(q.order_by.len(), 2);
964+
assert_eq!(q.order_by[0].column, "L1");
965+
assert!(q.order_by[0].ascending);
966+
assert_eq!(q.order_by[1].column, "p99");
967+
assert!(!q.order_by[1].ascending);
968+
}
969+
970+
#[test]
971+
fn test_limit_only_no_orderby() {
972+
let q = parse_sql_query(
973+
"SELECT SUM(value) FROM cpu_usage \
974+
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
975+
GROUP BY L1 \
976+
LIMIT 5",
977+
)
978+
.expect("LIMIT without ORDER BY should parse");
979+
assert!(q.order_by.is_empty());
980+
assert_eq!(q.limit, Some(5));
981+
}
982+
983+
#[test]
984+
fn test_order_by_unknown_column_rejected() {
985+
// mystery_col is neither the aggregate alias nor a GROUP BY column.
986+
assert!(parse_sql_query(
987+
"SELECT QUANTILE(0.99, value) AS p99 FROM cpu_usage \
988+
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
989+
GROUP BY L1 \
990+
ORDER BY mystery_col",
991+
)
992+
.is_none());
993+
}
994+
995+
#[test]
996+
fn test_order_by_expression_rejected() {
997+
assert!(parse_sql_query(
998+
"SELECT QUANTILE(0.99, value) AS p99 FROM cpu_usage \
999+
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
1000+
GROUP BY L1 \
1001+
ORDER BY p99 + 1",
1002+
)
1003+
.is_none());
1004+
}
1005+
1006+
#[test]
1007+
fn test_limit_with_offset_rejected() {
1008+
// OFFSET is not supported (no pagination semantics in the precompute model).
1009+
assert!(parse_sql_query(
1010+
"SELECT SUM(value) FROM cpu_usage \
1011+
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
1012+
GROUP BY L1 \
1013+
LIMIT 5 OFFSET 3",
1014+
)
1015+
.is_none());
1016+
}
1017+
1018+
#[test]
1019+
fn test_matches_template_ignores_order_by_and_limit() {
1020+
// A registered template without ORDER BY / LIMIT must still match an incoming
1021+
// query that adds them — they're presentational, not structural.
1022+
let template = parse_sql_query(
1023+
"SELECT QUANTILE(0.99, value) AS p99 FROM cpu_usage \
1024+
WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
1025+
GROUP BY L1, L2",
1026+
)
1027+
.unwrap();
1028+
let incoming = parse_sql_query(
1029+
"SELECT QUANTILE(0.99, value) AS top FROM cpu_usage \
1030+
WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' \
1031+
GROUP BY L1, L2 \
1032+
ORDER BY top DESC LIMIT 25",
1033+
)
1034+
.unwrap();
1035+
assert!(incoming.matches_sql_pattern(&template));
1036+
}
9221037
}

asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_matcher.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,13 @@ impl SQLQuery {
5353

5454
let query_data = SQLQueryData {
5555
aggregation_info: aggregation,
56+
aggregation_alias: None,
5657
metric,
5758
labels,
5859
time_info: time,
5960
subquery: None,
61+
order_by: Vec::new(),
62+
limit: None,
6063
};
6164

6265
self.query_data.push(query_data);

asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs

Lines changed: 114 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::sqlhelper::SQLSchema;
2-
use crate::sqlhelper::{AggregationInfo, SQLQueryData, TimeInfo};
2+
use crate::sqlhelper::{AggregationInfo, OrderByItem, SQLQueryData, TimeInfo};
33
use sqlparser::ast::*;
44
use std::collections::HashSet;
55

@@ -36,20 +36,106 @@ impl SQLPatternParser {
3636
}
3737

3838
fn parse_query_node(&self, query: &Query) -> Option<SQLQueryData> {
39-
if query.order_by.is_some() {
40-
println!("ORDER BY is not supported");
41-
return None;
42-
}
39+
// Parse ORDER BY / LIMIT before walking into the SELECT body. Both are properties
40+
// of the outer Query, not of the inner Select. Any unsupported sub-shape (positional
41+
// refs, expressions, OFFSET, NULLS FIRST/LAST, ClickHouse `ORDER BY ALL`, etc.)
42+
// bails the whole query rather than silently dropping it.
43+
let order_by_items = self.parse_order_by_items(query)?;
44+
let limit = self.parse_limit_value(query)?;
4345

4446
// Convert CTE to subquery if present
4547
let query = self.cte_to_subquery(query);
4648

47-
match &query.body.as_ref() {
48-
SetExpr::Select(select) => self.parse_select(select),
49+
let mut data = match &query.body.as_ref() {
50+
SetExpr::Select(select) => self.parse_select(select)?,
4951
_ => {
5052
println!("Not a SELECT statement");
51-
None
53+
return None;
54+
}
55+
};
56+
57+
// ORDER BY columns must reference either the aggregate alias or a group-by key.
58+
// Anything else (e.g. `ORDER BY some_other_column`) is rejected to avoid the
59+
// engine returning an arbitrary order in cases where the user assumed the
60+
// column would resolve.
61+
for item in &order_by_items {
62+
let valid = data.aggregation_alias.as_deref() == Some(item.column.as_str())
63+
|| data.labels.contains(&item.column);
64+
if !valid {
65+
return None;
66+
}
67+
}
68+
69+
data.order_by = order_by_items;
70+
data.limit = limit;
71+
Some(data)
72+
}
73+
74+
/// Convert `query.order_by` into a flat `Vec<OrderByItem>`.
75+
/// Returns `Some(vec![])` when no ORDER BY is present.
76+
/// Returns `None` for any unsupported shape (positional refs, expressions,
77+
/// `WITH FILL`, `NULLS FIRST/LAST`, ClickHouse `ORDER BY ALL`, `INTERPOLATE`).
78+
fn parse_order_by_items(&self, query: &Query) -> Option<Vec<OrderByItem>> {
79+
let order_by = match &query.order_by {
80+
None => return Some(Vec::new()),
81+
Some(ob) => ob,
82+
};
83+
if order_by.interpolate.is_some() {
84+
return None;
85+
}
86+
let exprs = match &order_by.kind {
87+
OrderByKind::Expressions(e) => e,
88+
// `ORDER BY ALL` (DuckDB / ClickHouse extension) is not supported.
89+
OrderByKind::All(_) => return None,
90+
};
91+
let mut items = Vec::with_capacity(exprs.len());
92+
for ob in exprs {
93+
if ob.with_fill.is_some() || ob.options.nulls_first.is_some() {
94+
return None;
5295
}
96+
let column = match &ob.expr {
97+
Expr::Identifier(ident) => ident.value.clone(),
98+
_ => return None,
99+
};
100+
// Default direction is ASC when neither ASC nor DESC is written.
101+
let ascending = ob.options.asc.unwrap_or(true);
102+
items.push(OrderByItem { column, ascending });
103+
}
104+
Some(items)
105+
}
106+
107+
/// Convert `query.limit_clause` into an `Option<u64>`.
108+
/// Returns `Some(None)` when no LIMIT is present.
109+
/// Returns `None` for any unsupported shape (OFFSET, `LIMIT BY`, MySQL `LIMIT a, b`,
110+
/// non-literal expressions, `LIMIT ALL`).
111+
fn parse_limit_value(&self, query: &Query) -> Option<Option<u64>> {
112+
let clause = match &query.limit_clause {
113+
None => return Some(None),
114+
Some(c) => c,
115+
};
116+
let limit_expr = match clause {
117+
// MySQL-style `LIMIT a, b` (offset-comma-limit) is not supported.
118+
LimitClause::OffsetCommaLimit { .. } => return None,
119+
LimitClause::LimitOffset {
120+
limit,
121+
offset,
122+
limit_by,
123+
} => {
124+
if offset.is_some() || !limit_by.is_empty() {
125+
return None;
126+
}
127+
match limit {
128+
None => return Some(None), // `LIMIT ALL` or no LIMIT
129+
Some(e) => e,
130+
}
131+
}
132+
};
133+
match limit_expr {
134+
Expr::Value(ValueWithSpan {
135+
value: Value::Number(n, _),
136+
..
137+
}) => n.parse::<u64>().ok().map(Some),
138+
_ => None,
53139
}
54140
}
55141

@@ -88,7 +174,7 @@ impl SQLPatternParser {
88174
fn parse_select(&self, select: &Select) -> Option<SQLQueryData> {
89175
let (metric, has_subquery) = self.get_metric(select)?;
90176

91-
let aggregation = self.get_aggregation(select)?;
177+
let (aggregation, aggregation_alias) = self.get_aggregation(select)?;
92178

93179
let group_bys = self.get_groupbys(select)?;
94180

@@ -118,17 +204,20 @@ impl SQLPatternParser {
118204

119205
Some(SQLQueryData {
120206
aggregation_info: aggregation,
207+
aggregation_alias,
121208
metric,
122209
labels: group_bys,
123210
time_info,
124211
subquery: None,
212+
order_by: Vec::new(),
213+
limit: None,
125214
})
126215
} else {
127216
// Parse subquery
128217
let subquery = match &select.from[0].relation {
129218
TableFactor::Derived { subquery, .. } => match subquery.body.as_ref() {
130219
SetExpr::Select(inner_select) => {
131-
let inner_aggregation = self.get_aggregation(inner_select)?;
220+
let (inner_aggregation, inner_alias) = self.get_aggregation(inner_select)?;
132221
let inner_group_bys = self.get_groupbys(inner_select)?;
133222
if !self.select_identifiers_subset_of(inner_select, &inner_group_bys) {
134223
return None;
@@ -137,10 +226,13 @@ impl SQLPatternParser {
137226

138227
Some(Box::new(SQLQueryData {
139228
aggregation_info: inner_aggregation,
229+
aggregation_alias: inner_alias,
140230
metric: metric.clone(),
141231
labels: inner_group_bys,
142232
time_info,
143233
subquery: None,
234+
order_by: Vec::new(),
235+
limit: None,
144236
}))
145237
}
146238
_ => None,
@@ -150,10 +242,13 @@ impl SQLPatternParser {
150242

151243
Some(SQLQueryData {
152244
aggregation_info: aggregation,
245+
aggregation_alias,
153246
metric,
154247
labels: group_bys,
155248
time_info: TimeInfo::new("UNUSED".to_string(), -1.0, -1_f64),
156249
subquery: Some(subquery),
250+
order_by: Vec::new(),
251+
limit: None,
157252
})
158253
}
159254
}
@@ -238,17 +333,20 @@ impl SQLPatternParser {
238333
true
239334
}
240335

241-
fn get_aggregation(&self, select: &Select) -> Option<AggregationInfo> {
336+
fn get_aggregation(&self, select: &Select) -> Option<(AggregationInfo, Option<String>)> {
242337
// Find the (single) aggregate function in the projection list. Other
243338
// projection items must be plain column references — these are expected to
244339
// be group-by keys (e.g. `SELECT g1, g2, SUM(v) FROM t GROUP BY g1, g2`).
245340
// Anything else (multiple aggregates, computed expressions, literals, *)
246341
// is rejected since the structural pattern model only tracks one statistic.
342+
// Also captures the aggregate's alias if the SELECT writes `agg(v) AS <alias>`,
343+
// so `ORDER BY <alias>` can resolve later.
247344
let mut agg_func: Option<&Function> = None;
345+
let mut agg_alias: Option<String> = None;
248346
for item in &select.projection {
249-
let expr = match item {
250-
SelectItem::UnnamedExpr(expr) => expr,
251-
SelectItem::ExprWithAlias { expr, .. } => expr,
347+
let (expr, alias) = match item {
348+
SelectItem::UnnamedExpr(expr) => (expr, None),
349+
SelectItem::ExprWithAlias { expr, alias } => (expr, Some(alias.value.clone())),
252350
_ => return None,
253351
};
254352
match expr {
@@ -257,6 +355,7 @@ impl SQLPatternParser {
257355
return None;
258356
}
259357
agg_func = Some(f);
358+
agg_alias = alias;
260359
}
261360
Expr::Identifier(_) | Expr::CompoundIdentifier(_) => {}
262361
_ => return None,
@@ -332,7 +431,7 @@ impl SQLPatternParser {
332431
name
333432
};
334433

335-
Some(AggregationInfo::new(normalized_name, col, args))
434+
Some((AggregationInfo::new(normalized_name, col, args), agg_alias))
336435
}
337436

338437
fn get_metric(&self, select: &Select) -> Option<(String, bool)> {

0 commit comments

Comments
 (0)