Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions storage/duckdb/cmake/duckdb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ ExternalProject_Add(duckdb_build
-DBUILD_TPCE=OFF
-DEXTENSION_STATIC_BUILD=1
"-DDUCKDB_EXTENSION_CONFIGS=${CMAKE_CURRENT_SOURCE_DIR}/cmake/duckdb_extensions.cmake"
# Upstream sets DUCKDB_EXTENSION_JEMALLOC_LINKED via add_extension_definitions(),
# which runs in extension/ but NOT in src/, so allocator.cpp (in duckdb_static)
# compiles the glibc malloc() path even though libjemalloc_extension.a is linked.
# Define it globally + add the jemalloc header dir so the USE_JEMALLOC branch is active.
"-DCMAKE_CXX_FLAGS=-DDUCKDB_EXTENSION_JEMALLOC_LINKED=1 -I${DUCKDB_SUBMODULE_DIR}/extension/jemalloc/include"
Comment thread
drrtuy marked this conversation as resolved.
-DENABLE_SANITIZER=FALSE
-DENABLE_UBSAN=OFF
-DOVERRIDE_GIT_DESCRIBE=v1.5.2-0-g0000000000
Expand Down
1 change: 1 addition & 0 deletions storage/duckdb/cmake/duckdb_extensions.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Extensions required by the DuckDB storage engine plugin for MariaDB.
# This config is passed to DuckDB via DUCKDB_EXTENSION_CONFIGS.

duckdb_extension_load(jemalloc)
duckdb_extension_load(core_functions)
duckdb_extension_load(icu)
duckdb_extension_load(json)
108 changes: 87 additions & 21 deletions storage/duckdb/runtime/duckdb_mysql_compat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
#include "duckdb/main/connection.hpp"

#include "duckdb/common/types/string_type.hpp"
#include "duckdb/execution/expression_executor.hpp"
#include "duckdb/function/scalar/regexp.hpp"
#include "duckdb/planner/expression/bound_function_expression.hpp"
#include "re2/re2.h"

namespace myduck
Expand Down Expand Up @@ -982,6 +985,86 @@ static void locate_3arg_func(duckdb::DataChunk &args,
});
}

/* ================================================================
regexp_replace(VARCHAR, VARCHAR, VARCHAR) -> VARCHAR

MariaDB REGEXP_REPLACE replaces ALL matches (global), unlike DuckDB's
native 3-arg form which replaces only the first. We reuse DuckDB's
native bind-data / local-state so a constant pattern is compiled once
at bind time (RegexInitLocalState) instead of per row.

Invalid-pattern behavior mirrors MariaDB:
- constant pattern -> RegexLocalState ctor throws (query error);
- non-constant -> per-row NULL.
================================================================ */

static duckdb::unique_ptr<duckdb::FunctionData>
regexp_replace_bind(duckdb::ClientContext &context,
duckdb::ScalarFunction &,
duckdb::vector<duckdb::unique_ptr<duckdb::Expression>>
&arguments)
{
auto data= duckdb::make_uniq<duckdb::RegexpReplaceBindData>();
data->constant_pattern= duckdb::regexp_util::TryParseConstantPattern(
context, *arguments[1], data->constant_string);
data->global_replace= true;
data->options.set_log_errors(false);
return duckdb::unique_ptr<duckdb::FunctionData>(std::move(data));
}

static void regexp_replace_global_func(duckdb::DataChunk &args,
duckdb::ExpressionState &state,
duckdb::Vector &result)
{
auto &func_expr= state.expr.Cast<duckdb::BoundFunctionExpression>();
auto &info= func_expr.bind_info->Cast<duckdb::RegexpReplaceBindData>();

auto &strings= args.data[0];
auto &patterns= args.data[1];
auto &replaces= args.data[2];

if (info.constant_pattern)
{
auto &lstate= duckdb::ExecuteFunctionState::GetFunctionState(state)
->Cast<duckdb::RegexLocalState>();
duckdb::BinaryExecutor::Execute<duckdb::string_t, duckdb::string_t,
duckdb::string_t>(
strings, replaces, result, args.size(),
[&](duckdb::string_t input, duckdb::string_t replace) {
std::string s= input.GetString();
duckdb_re2::RE2::GlobalReplace(
&s, lstate.constant_pattern,
duckdb_re2::StringPiece(replace.GetData(), replace.GetSize()));
return duckdb::StringVector::AddString(result, s);
});
}
else
{
duckdb::TernaryExecutor::ExecuteWithNulls<duckdb::string_t,
duckdb::string_t,
duckdb::string_t,
duckdb::string_t>(
strings, patterns, replaces, result, args.size(),
[&](duckdb::string_t input, duckdb::string_t pattern,
duckdb::string_t replace, duckdb::ValidityMask &mask,
duckdb::idx_t idx) -> duckdb::string_t {
duckdb_re2::RE2 re(
duckdb_re2::StringPiece(pattern.GetData(), pattern.GetSize()),
info.options);
if (!re.ok())
{
mask.SetInvalid(idx);
return duckdb::string_t();
}
std::string s= input.GetString();
duckdb_re2::RE2::GlobalReplace(
&s, re,
duckdb_re2::StringPiece(replace.GetData(), replace.GetSize()));
return duckdb::StringVector::AddString(result, s);
});
}
}

/* ================================================================
Registration
================================================================ */
Expand Down Expand Up @@ -1207,32 +1290,15 @@ void register_mysql_compat_functions(duckdb::DatabaseInstance &db)
}

/* regexp_replace(VARCHAR, VARCHAR, VARCHAR) → VARCHAR
Replaces all occurrences of pattern in expr with replacement. */
Global (replace-all) MariaDB semantics with bind-time pattern
compilation for constant patterns. See regexp_replace_global_func. */
{
duckdb::ScalarFunctionSet set("regexp_replace");
set.AddFunction(duckdb::ScalarFunction(
{duckdb::LogicalType::VARCHAR, duckdb::LogicalType::VARCHAR,
duckdb::LogicalType::VARCHAR},
duckdb::LogicalType::VARCHAR,
[](duckdb::DataChunk &args, duckdb::ExpressionState &,
duckdb::Vector &result) {
duckdb::TernaryExecutor::Execute<duckdb::string_t, duckdb::string_t,
duckdb::string_t,
duckdb::string_t>(
args.data[0], args.data[1], args.data[2], result, args.size(),
[&](duckdb::string_t expr, duckdb::string_t pat,
duckdb::string_t repl) -> duckdb::string_t {
duckdb_re2::RE2 re(
duckdb_re2::StringPiece(pat.GetData(), pat.GetSize()));
if (!re.ok())
return expr;
std::string s(expr.GetData(), expr.GetSize());
duckdb_re2::RE2::GlobalReplace(
&s, re,
duckdb_re2::StringPiece(repl.GetData(), repl.GetSize()));
return duckdb::StringVector::AddString(result, s);
});
}));
duckdb::LogicalType::VARCHAR, regexp_replace_global_func,
regexp_replace_bind, nullptr, nullptr, duckdb::RegexInitLocalState));
duckdb::CreateScalarFunctionInfo info(std::move(set));
info.on_conflict= duckdb::OnCreateConflict::ALTER_ON_CONFLICT;
catalog.CreateFunction(transaction, info);
Expand Down
14 changes: 7 additions & 7 deletions storage/duckdb/tpch/02_generate.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
# Generate TPC-H data (.tbl, pipe-delimited) at scale factor $SF into $DATA_DIR.
# Skips generation if all .tbl files already exist (set FORCE=1 to regenerate).
# Generate TPC-H data (Parquet) at scale factor $SF into $DATA_DIR.
# Skips generation if all .parquet files already exist (set FORCE=1 to regenerate).
set -euo pipefail
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$DIR/config.sh"
Expand All @@ -10,12 +10,12 @@ command -v tpchgen-cli >/dev/null 2>&1 || { echo "ERROR: run ./01_install.sh fir
mkdir -p "$DATA_DIR"

missing=0
for t in "${TABLES[@]}"; do [ -f "$DATA_DIR/$t.tbl" ] || missing=1; done
for t in "${TABLES[@]}"; do [ -f "$DATA_DIR/$t.parquet" ] || missing=1; done
if [ "$missing" = 0 ] && [ "${FORCE:-0}" != 1 ]; then
echo "All .tbl files already present in $DATA_DIR (set FORCE=1 to regenerate)."
echo "All .parquet files already present in $DATA_DIR (set FORCE=1 to regenerate)."
exit 0
fi

echo "Generating TPC-H SF$SF (.tbl) into $DATA_DIR ..."
tpchgen-cli -s "$SF" --output-dir "$DATA_DIR"
ls -la "$DATA_DIR"/*.tbl
echo "Generating TPC-H SF$SF (Parquet) into $DATA_DIR ..."
tpchgen-cli -s "$SF" --format=parquet --output-dir "$DATA_DIR"
ls -la "$DATA_DIR"/*.parquet
31 changes: 18 additions & 13 deletions storage/duckdb/tpch/03_schema.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
#!/usr/bin/env bash
# Create schema $SCHEMA and the 8 TPC-H tables inside the embedded DuckDB,
# via run_in_duckdb. CREATE OR REPLACE makes this idempotent.
# Create database $SCHEMA and the 8 TPC-H tables as ENGINE=DUCKDB, directly
# through the mariadb client. DROP + CREATE makes this idempotent.
set -euo pipefail
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$DIR/config.sh"

echo "Creating schema '$SCHEMA' and tables ..."
duck "CREATE SCHEMA IF NOT EXISTS $SCHEMA" >/dev/null
duck "CREATE OR REPLACE TABLE $SCHEMA.region (r_regionkey INTEGER PRIMARY KEY, r_name VARCHAR, r_comment VARCHAR)" >/dev/null
duck "CREATE OR REPLACE TABLE $SCHEMA.nation (n_nationkey INTEGER PRIMARY KEY, n_name VARCHAR, n_regionkey INTEGER, n_comment VARCHAR)" >/dev/null
duck "CREATE OR REPLACE TABLE $SCHEMA.supplier (s_suppkey INTEGER PRIMARY KEY, s_name VARCHAR, s_address VARCHAR, s_nationkey INTEGER, s_phone VARCHAR, s_acctbal DECIMAL(15,2), s_comment VARCHAR)" >/dev/null
duck "CREATE OR REPLACE TABLE $SCHEMA.customer (c_custkey INTEGER PRIMARY KEY, c_name VARCHAR, c_address VARCHAR, c_nationkey INTEGER, c_phone VARCHAR, c_acctbal DECIMAL(15,2), c_mktsegment VARCHAR, c_comment VARCHAR)" >/dev/null
duck "CREATE OR REPLACE TABLE $SCHEMA.part (p_partkey INTEGER PRIMARY KEY, p_name VARCHAR, p_mfgr VARCHAR, p_brand VARCHAR, p_type VARCHAR, p_size INTEGER, p_container VARCHAR, p_retailprice DECIMAL(15,2), p_comment VARCHAR)" >/dev/null
duck "CREATE OR REPLACE TABLE $SCHEMA.partsupp (ps_partkey INTEGER, ps_suppkey INTEGER, ps_availqty INTEGER, ps_supplycost DECIMAL(15,2), ps_comment VARCHAR, PRIMARY KEY (ps_partkey, ps_suppkey))" >/dev/null
duck "CREATE OR REPLACE TABLE $SCHEMA.orders (o_orderkey BIGINT PRIMARY KEY, o_custkey INTEGER, o_orderstatus VARCHAR, o_totalprice DECIMAL(15,2), o_orderdate DATE, o_orderpriority VARCHAR, o_clerk VARCHAR, o_shippriority INTEGER, o_comment VARCHAR)" >/dev/null
duck "CREATE OR REPLACE TABLE $SCHEMA.lineitem (l_orderkey BIGINT, l_partkey INTEGER, l_suppkey INTEGER, l_linenumber INTEGER, l_quantity DECIMAL(15,2), l_extendedprice DECIMAL(15,2), l_discount DECIMAL(15,2), l_tax DECIMAL(15,2), l_returnflag VARCHAR, l_linestatus VARCHAR, l_shipdate DATE, l_commitdate DATE, l_receiptdate DATE, l_shipinstruct VARCHAR, l_shipmode VARCHAR, l_comment VARCHAR, PRIMARY KEY (l_orderkey, l_linenumber))" >/dev/null
echo "Schema '$SCHEMA' ready."
echo "Creating database '$SCHEMA' and ENGINE=DUCKDB tables ..."
mdb "CREATE DATABASE IF NOT EXISTS $SCHEMA"

for t in "${TABLES[@]}"; do
mdb_db "DROP TABLE IF EXISTS $t"
done

mdb_db "CREATE TABLE region (r_regionkey INTEGER PRIMARY KEY, r_name VARCHAR(25), r_comment VARCHAR(152)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
mdb_db "CREATE TABLE nation (n_nationkey INTEGER PRIMARY KEY, n_name VARCHAR(25), n_regionkey INTEGER, n_comment VARCHAR(152)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
mdb_db "CREATE TABLE supplier (s_suppkey INTEGER PRIMARY KEY, s_name VARCHAR(25), s_address VARCHAR(40), s_nationkey INTEGER, s_phone VARCHAR(15), s_acctbal DECIMAL(15,2), s_comment VARCHAR(101)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
mdb_db "CREATE TABLE customer (c_custkey INTEGER PRIMARY KEY, c_name VARCHAR(25), c_address VARCHAR(40), c_nationkey INTEGER, c_phone VARCHAR(15), c_acctbal DECIMAL(15,2), c_mktsegment VARCHAR(10), c_comment VARCHAR(117)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
mdb_db "CREATE TABLE part (p_partkey INTEGER PRIMARY KEY, p_name VARCHAR(55), p_mfgr VARCHAR(25), p_brand VARCHAR(10), p_type VARCHAR(25), p_size INTEGER, p_container VARCHAR(10), p_retailprice DECIMAL(15,2), p_comment VARCHAR(23)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
mdb_db "CREATE TABLE partsupp (ps_partkey INTEGER, ps_suppkey INTEGER, ps_availqty INTEGER, ps_supplycost DECIMAL(15,2), ps_comment VARCHAR(199), PRIMARY KEY (ps_partkey, ps_suppkey)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
mdb_db "CREATE TABLE orders (o_orderkey BIGINT PRIMARY KEY, o_custkey INTEGER, o_orderstatus CHAR(1), o_totalprice DECIMAL(15,2), o_orderdate DATE, o_orderpriority VARCHAR(15), o_clerk VARCHAR(15), o_shippriority INTEGER, o_comment VARCHAR(79)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
mdb_db "CREATE TABLE lineitem (l_orderkey BIGINT, l_partkey INTEGER, l_suppkey INTEGER, l_linenumber INTEGER, l_quantity DECIMAL(15,2), l_extendedprice DECIMAL(15,2), l_discount DECIMAL(15,2), l_tax DECIMAL(15,2), l_returnflag CHAR(1), l_linestatus CHAR(1), l_shipdate DATE, l_commitdate DATE, l_receiptdate DATE, l_shipinstruct VARCHAR(25), l_shipmode VARCHAR(10), l_comment VARCHAR(44), PRIMARY KEY (l_orderkey, l_linenumber)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
echo "Database '$SCHEMA' ready."
15 changes: 9 additions & 6 deletions storage/duckdb/tpch/04_load.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
#!/usr/bin/env bash
# Populate $SCHEMA.* with COPY from the generated .tbl files (DuckDB reads the
# pipe-delimited, header-less, trailing-'|' tbl format with DELIMITER '|').
# Populate $SCHEMA.* from the generated Parquet files. Loading runs on the
# embedded DuckDB via run_in_duckdb (the duck helper) with read_parquet():
# the ENGINE=DUCKDB tables created in step 3 are addressable inside DuckDB as
# <database>.<table>, so INSERT ... SELECT * FROM read_parquet() fills them
# server-side without round-tripping the data through the MariaDB client.
# Times each table and prints row counts.
set -euo pipefail
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$DIR/config.sh"

echo "== COPY load from $DATA_DIR into schema '$SCHEMA' (wall clock incl. client) =="
echo "== read_parquet load from $DATA_DIR into database '$SCHEMA' (wall clock incl. client) =="
total=0
for t in "${TABLES[@]}"; do
f="$DATA_DIR/$t.tbl"
f="$DATA_DIR/$t.parquet"
[ -f "$f" ] || { echo "ERROR: missing $f (run ./02_generate.sh)" >&2; exit 1; }
duck "TRUNCATE $SCHEMA.$t" >/dev/null 2>&1 || true
start=$(date +%s.%N)
duck "COPY $SCHEMA.$t FROM '$f' (DELIMITER '|')" >/dev/null
duck "INSERT INTO $SCHEMA.$t SELECT * FROM read_parquet('$f')" >/dev/null
end=$(date +%s.%N)
total=$(awk -v a="$total" -v s="$start" -v e="$end" 'BEGIN{print a+(e-s)}')
awk -v s="$start" -v e="$end" -v t="$t" 'BEGIN{printf "%-10s %9.3f s\n", t, e-s}'
Expand All @@ -23,5 +26,5 @@ awk -v a="$total" 'BEGIN{printf "%-10s %9.3f s\n", "TOTAL", a}'
echo "== row counts =="
for t in "${TABLES[@]}"; do
printf "%-10s " "$t"
duck "SELECT count(*) FROM $SCHEMA.$t" | grep -Eo '^[0-9]+$' | tail -1
mdb_db "SELECT count(*) FROM $t" | grep -Eo '^[0-9]+$' | tail -1
done
12 changes: 5 additions & 7 deletions storage/duckdb/tpch/05_run_queries.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env bash
# Run the 22 TPC-H queries from $TPCH_SQL against $SCHEMA via run_in_duckdb,
# timing each (wall clock, one client invocation). Writes a TSV of timings.
# Queries get the raw MariaDB-dialect text (no pushdown rewrites): any query
# using MariaDB-only syntax is reported as ERR.
# Run the 22 TPC-H queries from $TPCH_SQL against database $SCHEMA directly
# through the mariadb client (no run_in_duckdb), timing each (wall clock, one
# client invocation). Writes a TSV of timings. Any query that errors out
# (e.g. unsupported syntax) is reported as ERR.
set -uo pipefail
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$DIR/config.sh"
Expand All @@ -21,11 +21,9 @@ for i in $(seq 1 22); do
n=$(printf "%02d" "$i")
f="$MQ/q$n.sql"
[ -f "$f" ] || continue
combined="SET schema '$SCHEMA'; $(cat "$f")"
esc=$(printf '%s' "$combined" | sed "s/'/''/g")

start=$(date +%s.%N)
out=$("$MARIADB" -N -e "SELECT run_in_duckdb('$esc')" 2>&1)
out=$("$MARIADB" --default-character-set="$CHARSET" -N -D "$SCHEMA" < "$f" 2>&1)
end=$(date +%s.%N)

if printf '%s' "$out" | grep -qiE 'error'; then
Expand Down
44 changes: 23 additions & 21 deletions storage/duckdb/tpch/README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# TPC-H kit — DuckDB storage engine (MariaDB)

Reproducible TPC-H pipeline for the embedded DuckDB engine: install a generator,
generate data, create the schema, COPY-load it, and run the 22 queries — all
through MariaDB's `run_in_duckdb`.
Reproducible TPC-H pipeline for the DuckDB storage engine: install a generator,
generate Parquet data, create the `ENGINE=DUCKDB` tables, load them with
`read_parquet()`, and run the 22 queries directly through the `mariadb` client.

## Pipeline

| Step | Script | What it does |
|---|---|---|
| 1 | `01_install.sh` | Install `tpchgen-cli` (pip / uv / cargo). |
| 2 | `02_generate.sh` | Generate `.tbl` data at scale factor `$SF` into `$DATA_DIR`. |
| 3 | `03_schema.sh` | Create schema `$SCHEMA` + 8 tables in the embedded DuckDB. |
| 4 | `04_load.sh` | `COPY` each `.tbl` into `$SCHEMA.*`; prints per-table timings + row counts. |
| 2 | `02_generate.sh` | Generate Parquet data at scale factor `$SF` into `$DATA_DIR`. |
| 3 | `03_schema.sh` | Create database `$SCHEMA` + 8 `ENGINE=DUCKDB` tables. |
| 4 | `04_load.sh` | `INSERT ... SELECT * FROM read_parquet()` each `.parquet` into `$SCHEMA.*` (via `run_in_duckdb`); prints per-table timings + row counts. |
| 5 | `05_run_queries.sh` | Run the 22 queries from `$TPCH_SQL`; writes `query_timings.tsv`. |

Run everything: `./run_all.sh` (steps are idempotent; generation is skipped if data exists).
Expand All @@ -23,33 +23,35 @@ All knobs live in `config.sh` and are overridable via environment:
```bash
SF=1 ./run_all.sh # scale factor 1
DATA_DIR=/data/tpch SF=10 ./run_all.sh # custom data location
SCHEMA=tpch_bench ./03_schema.sh # custom DuckDB schema
SCHEMA=tpch_bench ./03_schema.sh # custom MariaDB database
```

| Var | Default | Meaning |
|---|---|---|
| `SF` | `10` | TPC-H scale factor |
| `DATA_DIR` | `/git/tpch/sf<SF>` | where `.tbl` files are generated/read |
| `SCHEMA` | `bench` | DuckDB schema populated via the UDF |
| `DATA_DIR` | `/git/tpch/sf<SF>` | where `.parquet` files are generated/read |
| `SCHEMA` | `bench` | MariaDB database holding the `ENGINE=DUCKDB` tables |
| `TPCH_SQL` | `/tpch.sql` | source of the 22 (MariaDB-dialect) queries |
| `MARIADB` | `mariadb` | client command |

## Prerequisites

- A running MariaDB server with the DuckDB engine loaded and the
`run_in_duckdb` function available.
- A running MariaDB server with the DuckDB storage engine loaded
(`ENGINE=DUCKDB` available) and the `run_in_duckdb` function installed
(used only for the Parquet load).
- `pip`, `uv`, or `cargo` to install the generator; `tpchgen-cli` on `PATH`
afterwards (pip user installs land in `~/.local/bin`).

## How it works / caveats

- **Generator:** `tpchgen-cli -s <SF> --output-dir <DATA_DIR>` emits classic
`.tbl` files (pipe-delimited, no header, trailing `|`). DuckDB loads these with
`COPY ... (DELIMITER '|')` — the trailing delimiter is tolerated.
- **Load target:** data goes into a DuckDB-native schema (`bench`) inside the
embedded instance via `run_in_duckdb`, not into `ENGINE=DUCKDB` MariaDB
tables (which can't be `COPY`-loaded).
- **Queries:** taken from `/tpch.sql` (MariaDB dialect) and executed as
`SET schema '<SCHEMA>'; <query>` through the UDF. The UDF receives the **raw**
text — the engine's dialect rewrites only apply on pushdown, so any
MariaDB-only syntax errors out and is reported as `ERR` in the timings.
- **Generator:** `tpchgen-cli -s <SF> --format=parquet --output-dir <DATA_DIR>`
emits one `.parquet` file per table.
- **Load target:** data goes into `ENGINE=DUCKDB` tables in a regular MariaDB
database (`bench`). The load runs server-side on the embedded DuckDB via
`run_in_duckdb`: `INSERT INTO <db>.<table> SELECT * FROM read_parquet(...)`.
ENGINE=DUCKDB tables are addressable inside DuckDB as `<db>.<table>`, so the
Parquet data never round-trips through the MariaDB client. The TPC-H Parquet
column order/types match the table definitions, so a plain `SELECT *` works.
- **Queries:** taken from `$TPCH_SQL` (MariaDB dialect) and executed directly
through the `mariadb` client with `$SCHEMA` as the default database. Any
query that errors out is reported as `ERR` in the timings.
Loading