Skip to content

Commit 7e1983b

Browse files
benjamib112benjamib112
authored andcommitted
formatting
1 parent 80909dd commit 7e1983b

2 files changed

Lines changed: 21 additions & 30 deletions

File tree

asap-tools/execution-utilities/benchmark/download_dataset.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import sys
1717
import urllib.request
1818

19-
2019
CLICKBENCH_URL = "https://datasets.clickhouse.com/hits_compatible/hits.json.gz"
2120
CLICKBENCH_FILENAME = "hits.json.gz"
2221

asap-tools/execution-utilities/benchmark/generate_queries.py

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464
from typing import List, Optional
6565

6666

67-
6867
def _parse_timestamp(value: str) -> Optional[datetime]:
6968
"""Try to parse a timestamp string in common formats."""
7069
value = str(value).strip()
@@ -90,9 +89,7 @@ def _parse_timestamp(value: str) -> Optional[datetime]:
9089
return None
9190

9291

93-
def _scan_ts_range_json(
94-
file_path: str, ts_column: str, compressed: bool
95-
) -> tuple:
92+
def _scan_ts_range_json(file_path: str, ts_column: str, compressed: bool) -> tuple:
9693
"""Scan a JSON-lines file and return (min_ts, max_ts, count)."""
9794
min_ts = max_ts = None
9895
count = 0
@@ -119,11 +116,10 @@ def _scan_ts_range_json(
119116
return min_ts, max_ts, count
120117

121118

122-
def _scan_ts_range_csv(
123-
file_path: str, ts_column: str
124-
) -> tuple:
119+
def _scan_ts_range_csv(file_path: str, ts_column: str) -> tuple:
125120
"""Scan a CSV file and return (min_ts, max_ts, count)."""
126121
import csv
122+
127123
min_ts = max_ts = None
128124
count = 0
129125
with open(file_path, "r", newline="") as f:
@@ -145,25 +141,25 @@ def _scan_ts_range_csv(
145141
return min_ts, max_ts, count
146142

147143

148-
def detect_timestamps(
149-
data_file: str, data_file_format: str, ts_column: str
150-
) -> tuple:
144+
def detect_timestamps(data_file: str, data_file_format: str, ts_column: str) -> tuple:
151145
"""Return (min_ts, max_ts) by scanning the entire data file."""
152146
fmt = data_file_format.lower()
153147
if fmt in ("json.gz", "jsonl.gz"):
154-
min_ts, max_ts, count = _scan_ts_range_json(data_file, ts_column, compressed=True)
148+
min_ts, max_ts, count = _scan_ts_range_json(
149+
data_file, ts_column, compressed=True
150+
)
155151
elif fmt in ("json", "jsonl"):
156-
min_ts, max_ts, count = _scan_ts_range_json(data_file, ts_column, compressed=False)
152+
min_ts, max_ts, count = _scan_ts_range_json(
153+
data_file, ts_column, compressed=False
154+
)
157155
elif fmt == "csv":
158156
min_ts, max_ts, count = _scan_ts_range_csv(data_file, ts_column)
159157
else:
160158
print(f"ERROR: Unsupported data file format: {data_file_format}")
161159
sys.exit(1)
162160

163161
if min_ts is None:
164-
print(
165-
f"ERROR: No '{ts_column}' timestamps found in {data_file}"
166-
)
162+
print(f"ERROR: No '{ts_column}' timestamps found in {data_file}")
167163
sys.exit(1)
168164

169165
return min_ts, max_ts
@@ -250,19 +246,11 @@ def generate_sql_files(
250246
desc_db = f"quantile window ending at {db_end}"
251247

252248
if window_form == "dateadd":
253-
asap_where = (
254-
f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{asap_end}') AND '{asap_end}'"
255-
)
256-
db_where = (
257-
f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{db_end}') AND '{db_end}'"
258-
)
249+
asap_where = f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{asap_end}') AND '{asap_end}'"
250+
db_where = f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{db_end}') AND '{db_end}'"
259251
else:
260-
asap_where = (
261-
f"{ts_column} BETWEEN '{asap_start}' AND '{asap_end}'"
262-
)
263-
db_where = (
264-
f"{ts_column} BETWEEN '{db_start}' AND '{db_end}'"
265-
)
252+
asap_where = f"{ts_column} BETWEEN '{asap_start}' AND '{asap_end}'"
253+
db_where = f"{ts_column} BETWEEN '{db_start}' AND '{db_end}'"
266254

267255
asap_sql = (
268256
f"-- {label}: {desc_asap}\n"
@@ -379,15 +367,19 @@ def main():
379367
# Table/column config
380368
parser.add_argument("--table-name", required=True)
381369
parser.add_argument("--ts-column", required=True, help="Timestamp column name")
382-
parser.add_argument("--value-column", required=True, help="Column to compute quantile on")
370+
parser.add_argument(
371+
"--value-column", required=True, help="Column to compute quantile on"
372+
)
383373
parser.add_argument(
384374
"--group-by-columns",
385375
required=True,
386376
help="Comma-separated GROUP BY columns",
387377
)
388378
# Query parameters
389379
parser.add_argument("--quantile", type=float, default=0.95)
390-
parser.add_argument("--window-size", type=int, default=10, help="Window size in seconds")
380+
parser.add_argument(
381+
"--window-size", type=int, default=10, help="Window size in seconds"
382+
)
391383
parser.add_argument("--num-queries", type=int, default=50)
392384
parser.add_argument(
393385
"--ts-format-asap",

0 commit comments

Comments
 (0)