Skip to content

Commit ab6465d

Browse files
updated scripts
1 parent 39eb0d4 commit ab6465d

4 files changed

Lines changed: 128 additions & 102 deletions

File tree

asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,65 @@ mod tests {
496496
);
497497
}
498498

499+
// ── ClickHouse parametric syntax + explicit BETWEEN timestamps ────────────
500+
// These verify that a fully ClickHouse-compatible query (no DATEADD, no NOW())
501+
// is parseable by ASAP: quantile(q)(col) + BETWEEN 'start' AND 'end'.
502+
503+
#[test]
504+
fn test_clickhouse_explicit_datetime_temporal_quantile() {
505+
check_query(
506+
"SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01 00:00:00' AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4",
507+
vec![QueryType::TemporalQuantile],
508+
None,
509+
);
510+
}
511+
512+
#[test]
513+
// ASAP-only: parse_datetime accepts the Z suffix (interprets as UTC), but ClickHouse
514+
// rejects it with TYPE_MISMATCH when comparing against a DateTime column.
515+
// Do not use Z-suffix strings in queries intended for both systems.
516+
fn test_asap_only_iso_z_temporal_quantile() {
517+
check_query(
518+
"SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01T00:00:00Z' AND '2025-10-01T00:00:10Z' GROUP BY L1, L2, L3, L4",
519+
vec![QueryType::TemporalQuantile],
520+
None,
521+
);
522+
}
523+
524+
#[test]
525+
// Both ASAP (parse_datetime) and ClickHouse treat ISO-without-Z as local server time.
526+
// They agree only when running in the same timezone; prefer 'YYYY-MM-DD HH:MM:SS'
527+
// (space format) to avoid this implicit dependency.
528+
fn test_iso_no_z_treated_as_local_time_temporal_quantile() {
529+
check_query(
530+
"SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01T00:00:00' AND '2025-10-01T00:00:10' GROUP BY L1, L2, L3, L4",
531+
vec![QueryType::TemporalQuantile],
532+
None,
533+
);
534+
}
535+
536+
#[test]
537+
fn test_clickhouse_explicit_datetime_spatial_quantile() {
538+
check_query(
539+
"SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01 00:00:00' AND '2025-10-01 00:00:01' GROUP BY L1",
540+
vec![QueryType::Spatial],
541+
None,
542+
);
543+
}
544+
545+
#[test]
546+
fn test_clickhouse_explicit_matches_now_template() {
547+
// A ClickHouse-style query (explicit timestamps, parametric quantile) must
548+
// match a stored DATEADD(NOW()) template of the same shape.
549+
let template = parse_sql_query(
550+
"SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4"
551+
).unwrap();
552+
let incoming = parse_sql_query(
553+
"SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01 00:00:00' AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4"
554+
).unwrap();
555+
assert!(incoming.matches_sql_pattern(&template));
556+
}
557+
499558
// ── Error cases ──────────────────────────────────────────────────────────
500559

501560
#[test]

asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,11 @@ impl SQLPatternParser {
320320
}
321321

322322
fn get_timestamp_from_datetime_str(datetime_str: &str) -> Option<f64> {
323+
// parse_datetime treats timezone-naive strings (e.g. "2025-10-01 00:00:00",
324+
// "2025-10-01T00:00:00") as local server time, matching ClickHouse's behavior —
325+
// but only when both run in the same timezone. Z-suffix strings (e.g.
326+
// "2025-10-01T00:00:00Z") are interpreted as UTC here but rejected by ClickHouse.
327+
// Use space-format datetime strings ("YYYY-MM-DD HH:MM:SS") for portability.
323328
let parsed_datetime = parse_datetime(datetime_str).ok()?;
324329
Some(parsed_datetime.timestamp().as_second() as f64)
325330
}

asap-tools/execution-utilities/benchmark/README.md

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ pip3 install --user -r requirements.txt
3535
cd ~/ASAPQuery/asap-query-engine && cargo build --release
3636
```
3737

38+
> **UTC requirement:** Both ASAP and ClickHouse must run in UTC so that bare
39+
> datetime strings (`'YYYY-MM-DD HH:MM:SS'`) are interpreted identically by both
40+
> systems. Set `TZ=UTC` in the environment for ASAP processes and ensure
41+
> ClickHouse's `timezone` config is set to `UTC`. If the two systems run in
42+
> different timezones, queries will target different time windows on each side.
43+
3844
---
3945

4046
## ClickBench + ClickHouse End-to-End Example
@@ -115,8 +121,7 @@ python generate_queries.py \
115121
```
116122

117123
This writes:
118-
- `queries/clickbench_asap.sql` — ASAP queries (ISO timestamps)
119-
- `queries/clickbench_clickhouse.sql` — ClickHouse queries (datetime timestamps)
124+
- `queries/clickbench.sql` — shared query file for both ASAP and ClickHouse
120125
- `queries/clickbench_streaming.yaml` — Arroyo streaming config
121126
- `queries/clickbench_inference.yaml` — QueryEngineRust inference config
122127

@@ -166,8 +171,8 @@ Verify: `$INSTALL_DIR/clickhouse client --query "SELECT count(*) FROM hits"`
166171
```bash
167172
python run_benchmark.py \
168173
--mode both \
169-
--asap-sql-file ./queries/clickbench_asap.sql \
170-
--baseline-sql-file ./queries/clickbench_clickhouse.sql \
174+
--asap-sql-file ./queries/clickbench.sql \
175+
--baseline-sql-file ./queries/clickbench.sql \
171176
--asap-url "http://localhost:8088/api/v1/query" \
172177
--output-dir ./results \
173178
--output-prefix clickbench
@@ -258,8 +263,8 @@ python export_to_database.py \
258263
```bash
259264
python run_benchmark.py \
260265
--mode both \
261-
--asap-sql-file ./queries/h2o_asap.sql \
262-
--baseline-sql-file ./queries/h2o_clickhouse.sql \
266+
--asap-sql-file ./queries/h2o.sql \
267+
--baseline-sql-file ./queries/h2o.sql \
263268
--asap-url "http://localhost:8088/api/v1/query" \
264269
--output-dir ./results \
265270
--output-prefix h2o
@@ -290,7 +295,7 @@ python export_to_arroyo.py \
290295
cd ~/ASAPQuery/asap-query-engine
291296

292297
./target/release/query_engine_rust \
293-
--kafka-topic sketch_topic
298+
--kafka-topic sketch_topic
294299
--input-format json \
295300
--config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml \
296301
--streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml \
@@ -303,25 +308,25 @@ cd ~/ASAPQuery/asap-query-engine
303308
### Step 8 — Load data into Elasticsearch (baseline)
304309

305310
```bash
306-
python export_to_database.py
307-
--dataset h2o
308-
--file-path ./data/G1_1e7_1e2_0_0.csv
309-
--es-host localhost
310-
--es-port 9200
311-
--es-index h2o_groupby
311+
python export_to_database.py
312+
--dataset h2o
313+
--file-path ./data/G1_1e7_1e2_0_0.csv
314+
--es-host localhost
315+
--es-port 9200
316+
--es-index h2o_groupby
312317
--es-api-key your-api-key
313318
--es-bulk-size 5000
314319
```
315320

316321
### Step 9 — Run benchmark
317322

318323
```bash
319-
python run_benchmark.py
320-
--mode asap
321-
--asap-sql-file ./queries/h2o_asap.sql
322-
--baseline-sql-file ./queries/h2o_elasticsearch.sql
323-
--elastic-host localhost
324-
--elastic-port 9200
324+
python run_benchmark.py
325+
--mode asap
326+
--asap-sql-file ./queries/h2o.sql
327+
--baseline-sql-file ./queries/h2o.sql
328+
--elastic-host localhost
329+
--elastic-port 9200
325330
--elastic-api-key your-api-key
326331
--output-dir ./results --output-prefix h2o
327332
```
@@ -370,8 +375,8 @@ python export_to_database.py \
370375
# 6. Run benchmark
371376
python run_benchmark.py \
372377
--mode both \
373-
--asap-sql-file ./queries/my_dataset_asap.sql \
374-
--baseline-sql-file ./queries/my_dataset_clickhouse.sql \
378+
--asap-sql-file ./queries/my_dataset.sql \
379+
--baseline-sql-file ./queries/my_dataset.sql \
375380
--asap-url "http://localhost:8088/api/v1/query" \
376381
--output-dir ./results
377382
```
@@ -407,6 +412,6 @@ $INSTALL_DIR/clickhouse client --query "TRUNCATE TABLE hits"
407412
| `prepare_data.py` | Convert raw data to Arroyo file source format (RFC3339, string columns) |
408413
| `export_to_arroyo.py` | Launch Arroyo sketch pipeline (file or kafka source) |
409414
| `export_to_database.py` | Load data into ClickHouse for baseline |
410-
| `generate_queries.py` | Generate paired ASAP + ClickHouse SQL query files and streaming/inference YAML configs |
415+
| `generate_queries.py` | Generate a shared SQL query file (ClickHouse-compatible syntax, used for both ASAP and ClickHouse) and optional streaming/inference YAML configs |
411416
| `run_benchmark.py` | Run queries and produce CSV results + plots |
412417
| `configs/` | ClickHouse init SQL (CREATE TABLE statements) |

0 commit comments

Comments
 (0)