@@ -595,73 +595,6 @@ mod tests {
595595 ) ;
596596 }
597597
598- /// Regression for three matcher-side gaps that surface together when the
599- /// simple_engine SQL path runs an HLL `COUNT(DISTINCT)` query end-to-end:
600- ///
601- /// 1. The parser correctly normalises `COUNT(DISTINCT col)` to the
602- /// aggregation name `"CARDINALITY"`, but
603- /// `SQLPatternMatcher::is_valid_aggregation` never gained `CARDINALITY`
604- /// in its `legal_aggregations` set, so the validator rejected the query
605- /// with `IllegalAggregationFn` before pattern matching ran.
606- ///
607- /// 2. After fixing (1), `flatten_query_info` validates the aggregation's
608- /// "value column" against `schema.is_valid_value_column`, which only
609- /// knows table value columns. `COUNT(DISTINCT col)` legitimately targets
610- /// metadata/label columns (e.g. `COUNT(DISTINCT dstip)`), so the
611- /// validator rejected it with `InvalidValueCol`. The fix accepts
612- /// metadata columns *only* for CARDINALITY.
613- ///
614- /// 3. With both fixed, the query classifies as `SpatioTemporal` because
615- /// `GROUP BY` only covers a subset of metadata columns — exactly the
616- /// shape of the user's real `COUNT(DISTINCT dstip) GROUP BY srcip`
617- /// query, which selects on `srcip` and aggregates over `dstip` (so
618- /// labels ⊊ metadata_columns).
619- ///
620- /// Observed log line that motivated this test:
621- /// error: Some(IllegalAggregationFn),
622- /// msg: Some("attempt to use illegal aggregation function CARDINALITY")
623- #[ test]
624- fn test_count_distinct_passes_aggregation_allowlist ( ) {
625- check_query (
626- "SELECT COUNT(DISTINCT L4) FROM cpu_usage \
627- WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
628- GROUP BY L1, L2, L3",
629- vec ! [ QueryType :: SpatioTemporal ] ,
630- None ,
631- ) ;
632- }
633-
634- /// Companion: when `GROUP BY` covers all metadata columns *except* the
635- /// distinct-target itself, the query is still SpatioTemporal — the
636- /// distinct-target is the value column, not a grouping label, so labels
637- /// always form a strict subset of metadata_columns. Guards against future
638- /// "treat L4 as both label and value" regressions in the classifier.
639- #[ test]
640- fn test_count_distinct_with_full_remaining_labels_is_spatiotemporal ( ) {
641- check_query (
642- "SELECT COUNT(DISTINCT L4) FROM cpu_usage \
643- WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
644- GROUP BY L1, L2, L3",
645- vec ! [ QueryType :: SpatioTemporal ] ,
646- None ,
647- ) ;
648- }
649-
650- /// Negative case: `COUNT(DISTINCT not_in_schema)` against a column that's
651- /// neither a value_column nor a metadata_column must still be rejected as
652- /// `InvalidValueCol`. The CARDINALITY relaxation widens what's *allowed*
653- /// (metadata columns) but doesn't disable the schema check entirely.
654- #[ test]
655- fn test_count_distinct_unknown_column_still_rejected ( ) {
656- check_query (
657- "SELECT COUNT(DISTINCT bogus_column) FROM cpu_usage \
658- WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
659- GROUP BY L1, L2, L3",
660- vec ! [ ] ,
661- Some ( QueryError :: InvalidValueCol ) ,
662- ) ;
663- }
664-
665598 #[ test]
666599 fn test_error_spatial_scrape_duration_too_small ( ) {
667600 check_query (
@@ -1107,8 +1040,7 @@ mod tests {
11071040 // `COUNT(DISTINCT col)` must be normalised to a cardinality aggregation
11081041 // (`AggregationInfo.name == "CARDINALITY"`) so the engine routes it to a
11091042 // distinct-tracking sketch (SetAggregator / HLL) instead of a plain Count
1110- // sketch. The parser today drops `DISTINCT` silently — a parser-level bug
1111- // that would dispatch streaming counts as totals.
1043+ // sketch.
11121044
11131045 #[ test]
11141046 fn test_count_distinct_single_column_maps_to_cardinality ( ) {
@@ -1240,4 +1172,49 @@ mod tests {
12401172 )
12411173 . is_none( ) ) ;
12421174 }
1175+
1176+ /// Matcher must accept parser-normalised `CARDINALITY` (not `IllegalAggregationFn`),
1177+ /// allow distinct targets in metadata_columns (e.g. `dstip`), and classify
1178+ /// `COUNT(DISTINCT col) GROUP BY <label subset>` as `SpatioTemporal`.
1179+ #[ test]
1180+ fn test_count_distinct_passes_aggregation_allowlist ( ) {
1181+ check_query (
1182+ "SELECT COUNT(DISTINCT L4) FROM cpu_usage \
1183+ WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
1184+ GROUP BY L1, L2, L3",
1185+ vec ! [ QueryType :: SpatioTemporal ] ,
1186+ None ,
1187+ ) ;
1188+ }
1189+
1190+ /// Companion: when `GROUP BY` covers all metadata columns *except* the
1191+ /// distinct-target itself, the query is still SpatioTemporal — the
1192+ /// distinct-target is the value column, not a grouping label, so labels
1193+ /// always form a strict subset of metadata_columns. Guards against future
1194+ /// "treat L4 as both label and value" regressions in the classifier.
1195+ #[ test]
1196+ fn test_count_distinct_with_full_remaining_labels_is_spatiotemporal ( ) {
1197+ check_query (
1198+ "SELECT COUNT(DISTINCT L4) FROM cpu_usage \
1199+ WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
1200+ GROUP BY L1, L2, L3",
1201+ vec ! [ QueryType :: SpatioTemporal ] ,
1202+ None ,
1203+ ) ;
1204+ }
1205+
1206+ /// Negative case: `COUNT(DISTINCT not_in_schema)` against a column that's
1207+ /// neither a value_column nor a metadata_column must still be rejected as
1208+ /// `InvalidValueCol`. The CARDINALITY relaxation widens what's *allowed*
1209+ /// (metadata columns) but doesn't disable the schema check entirely.
1210+ #[ test]
1211+ fn test_count_distinct_unknown_column_still_rejected ( ) {
1212+ check_query (
1213+ "SELECT COUNT(DISTINCT bogus_column) FROM cpu_usage \
1214+ WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() \
1215+ GROUP BY L1, L2, L3",
1216+ vec ! [ ] ,
1217+ Some ( QueryError :: InvalidValueCol ) ,
1218+ ) ;
1219+ }
12431220}
0 commit comments