MariaDB · drrtuy · Jun 18, 2026 · Jun 18, 2026 · Jun 22, 2026
diff --git a/storage/duckdb/cmake/duckdb.cmake b/storage/duckdb/cmake/duckdb.cmake
@@ -74,6 +74,11 @@ ExternalProject_Add(duckdb_build
     -DBUILD_TPCE=OFF
     -DEXTENSION_STATIC_BUILD=1
     "-DDUCKDB_EXTENSION_CONFIGS=${CMAKE_CURRENT_SOURCE_DIR}/cmake/duckdb_extensions.cmake"
+    # Upstream sets DUCKDB_EXTENSION_JEMALLOC_LINKED via add_extension_definitions(),
+    # which runs in extension/ but NOT in src/, so allocator.cpp (in duckdb_static)
+    # compiles the glibc malloc() path even though libjemalloc_extension.a is linked.
+    # Define it globally + add the jemalloc header dir so the USE_JEMALLOC branch is active.
+    "-DCMAKE_CXX_FLAGS=-DDUCKDB_EXTENSION_JEMALLOC_LINKED=1 -I${DUCKDB_SUBMODULE_DIR}/extension/jemalloc/include"
     -DENABLE_SANITIZER=FALSE
     -DENABLE_UBSAN=OFF
     -DOVERRIDE_GIT_DESCRIBE=v1.5.2-0-g0000000000

diff --git a/storage/duckdb/cmake/duckdb_extensions.cmake b/storage/duckdb/cmake/duckdb_extensions.cmake
@@ -1,6 +1,7 @@
 # Extensions required by the DuckDB storage engine plugin for MariaDB.
 # This config is passed to DuckDB via DUCKDB_EXTENSION_CONFIGS.
 
+duckdb_extension_load(jemalloc)
 duckdb_extension_load(core_functions)
 duckdb_extension_load(icu)
 duckdb_extension_load(json)
diff --git a/storage/duckdb/runtime/duckdb_mysql_compat.cc b/storage/duckdb/runtime/duckdb_mysql_compat.cc
@@ -54,6 +54,9 @@
 #include "duckdb/main/connection.hpp"
 
 #include "duckdb/common/types/string_type.hpp"
+#include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/function/scalar/regexp.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
 #include "re2/re2.h"
 
 namespace myduck
@@ -982,6 +985,86 @@ static void locate_3arg_func(duckdb::DataChunk &args,
       });
 }
 
+/* ================================================================
+   regexp_replace(VARCHAR, VARCHAR, VARCHAR) -> VARCHAR
+
+   MariaDB REGEXP_REPLACE replaces ALL matches (global), unlike DuckDB's
+   native 3-arg form which replaces only the first.  We reuse DuckDB's
+   native bind-data / local-state so a constant pattern is compiled once
+   at bind time (RegexInitLocalState) instead of per row.
+
+   Invalid-pattern behavior mirrors MariaDB:
+     - constant pattern  -> RegexLocalState ctor throws (query error);
+     - non-constant       -> per-row NULL.
+   ================================================================ */
+
+static duckdb::unique_ptr<duckdb::FunctionData>
+regexp_replace_bind(duckdb::ClientContext &context,
+                    duckdb::ScalarFunction &,
+                    duckdb::vector<duckdb::unique_ptr<duckdb::Expression>>
+                        &arguments)
+{
+  auto data= duckdb::make_uniq<duckdb::RegexpReplaceBindData>();
+  data->constant_pattern= duckdb::regexp_util::TryParseConstantPattern(
+      context, *arguments[1], data->constant_string);
+  data->global_replace= true;
+  data->options.set_log_errors(false);
+  return duckdb::unique_ptr<duckdb::FunctionData>(std::move(data));
+}
+
+static void regexp_replace_global_func(duckdb::DataChunk &args,
+                                       duckdb::ExpressionState &state,
+                                       duckdb::Vector &result)
+{
+  auto &func_expr= state.expr.Cast<duckdb::BoundFunctionExpression>();
+  auto &info= func_expr.bind_info->Cast<duckdb::RegexpReplaceBindData>();
+
+  auto &strings= args.data[0];
+  auto &patterns= args.data[1];
+  auto &replaces= args.data[2];
+
+  if (info.constant_pattern)
+  {
+    auto &lstate= duckdb::ExecuteFunctionState::GetFunctionState(state)
+                      ->Cast<duckdb::RegexLocalState>();
+    duckdb::BinaryExecutor::Execute<duckdb::string_t, duckdb::string_t,
+                                    duckdb::string_t>(
+        strings, replaces, result, args.size(),
+        [&](duckdb::string_t input, duckdb::string_t replace) {
+          std::string s= input.GetString();
+          duckdb_re2::RE2::GlobalReplace(
+              &s, lstate.constant_pattern,
+              duckdb_re2::StringPiece(replace.GetData(), replace.GetSize()));
+          return duckdb::StringVector::AddString(result, s);
+        });
+  }
+  else
+  {
+    duckdb::TernaryExecutor::ExecuteWithNulls<duckdb::string_t,
+                                              duckdb::string_t,
+                                              duckdb::string_t,
+                                              duckdb::string_t>(
+        strings, patterns, replaces, result, args.size(),
+        [&](duckdb::string_t input, duckdb::string_t pattern,
+            duckdb::string_t replace, duckdb::ValidityMask &mask,
+            duckdb::idx_t idx) -> duckdb::string_t {
+          duckdb_re2::RE2 re(
+              duckdb_re2::StringPiece(pattern.GetData(), pattern.GetSize()),
+              info.options);
+          if (!re.ok())
+          {
+            mask.SetInvalid(idx);
+            return duckdb::string_t();
+          }
+          std::string s= input.GetString();
+          duckdb_re2::RE2::GlobalReplace(
+              &s, re,
+              duckdb_re2::StringPiece(replace.GetData(), replace.GetSize()));
+          return duckdb::StringVector::AddString(result, s);
+        });
+  }
+}
+
 /* ================================================================
    Registration
    ================================================================ */
@@ -1207,32 +1290,15 @@ void register_mysql_compat_functions(duckdb::DatabaseInstance &db)
   }
 
   /* regexp_replace(VARCHAR, VARCHAR, VARCHAR) → VARCHAR
-     Replaces all occurrences of pattern in expr with replacement. */
+     Global (replace-all) MariaDB semantics with bind-time pattern
+     compilation for constant patterns.  See regexp_replace_global_func. */
   {
     duckdb::ScalarFunctionSet set("regexp_replace");
     set.AddFunction(duckdb::ScalarFunction(
         {duckdb::LogicalType::VARCHAR, duckdb::LogicalType::VARCHAR,
          duckdb::LogicalType::VARCHAR},
-        duckdb::LogicalType::VARCHAR,
-        [](duckdb::DataChunk &args, duckdb::ExpressionState &,
-           duckdb::Vector &result) {
-          duckdb::TernaryExecutor::Execute<duckdb::string_t, duckdb::string_t,
-                                           duckdb::string_t,
-                                           duckdb::string_t>(
-              args.data[0], args.data[1], args.data[2], result, args.size(),
-              [&](duckdb::string_t expr, duckdb::string_t pat,
-                  duckdb::string_t repl) -> duckdb::string_t {
-                duckdb_re2::RE2 re(
-                    duckdb_re2::StringPiece(pat.GetData(), pat.GetSize()));
-                if (!re.ok())
-                  return expr;
-                std::string s(expr.GetData(), expr.GetSize());
-                duckdb_re2::RE2::GlobalReplace(
-                    &s,  re,
-                    duckdb_re2::StringPiece(repl.GetData(), repl.GetSize()));
-                return duckdb::StringVector::AddString(result, s);
-              });
-        }));
+        duckdb::LogicalType::VARCHAR, regexp_replace_global_func,
+        regexp_replace_bind, nullptr, nullptr, duckdb::RegexInitLocalState));
     duckdb::CreateScalarFunctionInfo info(std::move(set));
     info.on_conflict= duckdb::OnCreateConflict::ALTER_ON_CONFLICT;
     catalog.CreateFunction(transaction, info);

diff --git a/storage/duckdb/tpch/02_generate.sh b/storage/duckdb/tpch/02_generate.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
-# Generate TPC-H data (.tbl, pipe-delimited) at scale factor $SF into $DATA_DIR.
-# Skips generation if all .tbl files already exist (set FORCE=1 to regenerate).
+# Generate TPC-H data (Parquet) at scale factor $SF into $DATA_DIR.
+# Skips generation if all .parquet files already exist (set FORCE=1 to regenerate).
 set -euo pipefail
 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "$DIR/config.sh"
@@ -10,12 +10,12 @@ command -v tpchgen-cli >/dev/null 2>&1 || { echo "ERROR: run ./01_install.sh fir
 mkdir -p "$DATA_DIR"
 
 missing=0
-for t in "${TABLES[@]}"; do [ -f "$DATA_DIR/$t.tbl" ] || missing=1; done
+for t in "${TABLES[@]}"; do [ -f "$DATA_DIR/$t.parquet" ] || missing=1; done
 if [ "$missing" = 0 ] && [ "${FORCE:-0}" != 1 ]; then
-  echo "All .tbl files already present in $DATA_DIR (set FORCE=1 to regenerate)."
+  echo "All .parquet files already present in $DATA_DIR (set FORCE=1 to regenerate)."
   exit 0
 fi
 
-echo "Generating TPC-H SF$SF (.tbl) into $DATA_DIR ..."
-tpchgen-cli -s "$SF" --output-dir "$DATA_DIR"
-ls -la "$DATA_DIR"/*.tbl
+echo "Generating TPC-H SF$SF (Parquet) into $DATA_DIR ..."
+tpchgen-cli -s "$SF" --format=parquet --output-dir "$DATA_DIR"
+ls -la "$DATA_DIR"/*.parquet
diff --git a/storage/duckdb/tpch/03_schema.sh b/storage/duckdb/tpch/03_schema.sh
@@ -1,18 +1,23 @@
 #!/usr/bin/env bash
-# Create schema $SCHEMA and the 8 TPC-H tables inside the embedded DuckDB,
-# via run_in_duckdb. CREATE OR REPLACE makes this idempotent.
+# Create database $SCHEMA and the 8 TPC-H tables as ENGINE=DUCKDB, directly
+# through the mariadb client. DROP + CREATE makes this idempotent.
 set -euo pipefail
 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "$DIR/config.sh"
 
-echo "Creating schema '$SCHEMA' and tables ..."
-duck "CREATE SCHEMA IF NOT EXISTS $SCHEMA" >/dev/null
-duck "CREATE OR REPLACE TABLE $SCHEMA.region   (r_regionkey INTEGER PRIMARY KEY, r_name VARCHAR, r_comment VARCHAR)" >/dev/null
-duck "CREATE OR REPLACE TABLE $SCHEMA.nation   (n_nationkey INTEGER PRIMARY KEY, n_name VARCHAR, n_regionkey INTEGER, n_comment VARCHAR)" >/dev/null
-duck "CREATE OR REPLACE TABLE $SCHEMA.supplier (s_suppkey INTEGER PRIMARY KEY, s_name VARCHAR, s_address VARCHAR, s_nationkey INTEGER, s_phone VARCHAR, s_acctbal DECIMAL(15,2), s_comment VARCHAR)" >/dev/null
-duck "CREATE OR REPLACE TABLE $SCHEMA.customer (c_custkey INTEGER PRIMARY KEY, c_name VARCHAR, c_address VARCHAR, c_nationkey INTEGER, c_phone VARCHAR, c_acctbal DECIMAL(15,2), c_mktsegment VARCHAR, c_comment VARCHAR)" >/dev/null
-duck "CREATE OR REPLACE TABLE $SCHEMA.part     (p_partkey INTEGER PRIMARY KEY, p_name VARCHAR, p_mfgr VARCHAR, p_brand VARCHAR, p_type VARCHAR, p_size INTEGER, p_container VARCHAR, p_retailprice DECIMAL(15,2), p_comment VARCHAR)" >/dev/null
-duck "CREATE OR REPLACE TABLE $SCHEMA.partsupp (ps_partkey INTEGER, ps_suppkey INTEGER, ps_availqty INTEGER, ps_supplycost DECIMAL(15,2), ps_comment VARCHAR, PRIMARY KEY (ps_partkey, ps_suppkey))" >/dev/null
-duck "CREATE OR REPLACE TABLE $SCHEMA.orders   (o_orderkey BIGINT PRIMARY KEY, o_custkey INTEGER, o_orderstatus VARCHAR, o_totalprice DECIMAL(15,2), o_orderdate DATE, o_orderpriority VARCHAR, o_clerk VARCHAR, o_shippriority INTEGER, o_comment VARCHAR)" >/dev/null
-duck "CREATE OR REPLACE TABLE $SCHEMA.lineitem (l_orderkey BIGINT, l_partkey INTEGER, l_suppkey INTEGER, l_linenumber INTEGER, l_quantity DECIMAL(15,2), l_extendedprice DECIMAL(15,2), l_discount DECIMAL(15,2), l_tax DECIMAL(15,2), l_returnflag VARCHAR, l_linestatus VARCHAR, l_shipdate DATE, l_commitdate DATE, l_receiptdate DATE, l_shipinstruct VARCHAR, l_shipmode VARCHAR, l_comment VARCHAR, PRIMARY KEY (l_orderkey, l_linenumber))" >/dev/null
-echo "Schema '$SCHEMA' ready."
+echo "Creating database '$SCHEMA' and ENGINE=DUCKDB tables ..."
+mdb "CREATE DATABASE IF NOT EXISTS $SCHEMA"
+
+for t in "${TABLES[@]}"; do
+  mdb_db "DROP TABLE IF EXISTS $t"
+done
+
+mdb_db "CREATE TABLE region   (r_regionkey INTEGER PRIMARY KEY, r_name VARCHAR(25), r_comment VARCHAR(152)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
+mdb_db "CREATE TABLE nation   (n_nationkey INTEGER PRIMARY KEY, n_name VARCHAR(25), n_regionkey INTEGER, n_comment VARCHAR(152)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
+mdb_db "CREATE TABLE supplier (s_suppkey INTEGER PRIMARY KEY, s_name VARCHAR(25), s_address VARCHAR(40), s_nationkey INTEGER, s_phone VARCHAR(15), s_acctbal DECIMAL(15,2), s_comment VARCHAR(101)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
+mdb_db "CREATE TABLE customer (c_custkey INTEGER PRIMARY KEY, c_name VARCHAR(25), c_address VARCHAR(40), c_nationkey INTEGER, c_phone VARCHAR(15), c_acctbal DECIMAL(15,2), c_mktsegment VARCHAR(10), c_comment VARCHAR(117)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
+mdb_db "CREATE TABLE part     (p_partkey INTEGER PRIMARY KEY, p_name VARCHAR(55), p_mfgr VARCHAR(25), p_brand VARCHAR(10), p_type VARCHAR(25), p_size INTEGER, p_container VARCHAR(10), p_retailprice DECIMAL(15,2), p_comment VARCHAR(23)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
+mdb_db "CREATE TABLE partsupp (ps_partkey INTEGER, ps_suppkey INTEGER, ps_availqty INTEGER, ps_supplycost DECIMAL(15,2), ps_comment VARCHAR(199), PRIMARY KEY (ps_partkey, ps_suppkey)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
+mdb_db "CREATE TABLE orders   (o_orderkey BIGINT PRIMARY KEY, o_custkey INTEGER, o_orderstatus CHAR(1), o_totalprice DECIMAL(15,2), o_orderdate DATE, o_orderpriority VARCHAR(15), o_clerk VARCHAR(15), o_shippriority INTEGER, o_comment VARCHAR(79)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
+mdb_db "CREATE TABLE lineitem (l_orderkey BIGINT, l_partkey INTEGER, l_suppkey INTEGER, l_linenumber INTEGER, l_quantity DECIMAL(15,2), l_extendedprice DECIMAL(15,2), l_discount DECIMAL(15,2), l_tax DECIMAL(15,2), l_returnflag CHAR(1), l_linestatus CHAR(1), l_shipdate DATE, l_commitdate DATE, l_receiptdate DATE, l_shipinstruct VARCHAR(25), l_shipmode VARCHAR(10), l_comment VARCHAR(44), PRIMARY KEY (l_orderkey, l_linenumber)) ENGINE=DUCKDB DEFAULT CHARSET=$CHARSET"
+echo "Database '$SCHEMA' ready."
diff --git a/storage/duckdb/tpch/04_load.sh b/storage/duckdb/tpch/04_load.sh
@@ -1,19 +1,22 @@
 #!/usr/bin/env bash
-# Populate $SCHEMA.* with COPY from the generated .tbl files (DuckDB reads the
-# pipe-delimited, header-less, trailing-'|' tbl format with DELIMITER '|').
+# Populate $SCHEMA.* from the generated Parquet files. Loading runs on the
+# embedded DuckDB via run_in_duckdb (the duck helper) with read_parquet():
+# the ENGINE=DUCKDB tables created in step 3 are addressable inside DuckDB as
+# <database>.<table>, so INSERT ... SELECT * FROM read_parquet() fills them
+# server-side without round-tripping the data through the MariaDB client.
 # Times each table and prints row counts.
 set -euo pipefail
 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "$DIR/config.sh"
 
-echo "== COPY load from $DATA_DIR into schema '$SCHEMA' (wall clock incl. client) =="
+echo "== read_parquet load from $DATA_DIR into database '$SCHEMA' (wall clock incl. client) =="
 total=0
 for t in "${TABLES[@]}"; do
-  f="$DATA_DIR/$t.tbl"
+  f="$DATA_DIR/$t.parquet"
   [ -f "$f" ] || { echo "ERROR: missing $f (run ./02_generate.sh)" >&2; exit 1; }
   duck "TRUNCATE $SCHEMA.$t" >/dev/null 2>&1 || true
   start=$(date +%s.%N)
-  duck "COPY $SCHEMA.$t FROM '$f' (DELIMITER '|')" >/dev/null
+  duck "INSERT INTO $SCHEMA.$t SELECT * FROM read_parquet('$f')" >/dev/null
   end=$(date +%s.%N)
   total=$(awk -v a="$total" -v s="$start" -v e="$end" 'BEGIN{print a+(e-s)}')
   awk -v s="$start" -v e="$end" -v t="$t" 'BEGIN{printf "%-10s %9.3f s\n", t, e-s}'
@@ -23,5 +26,5 @@ awk -v a="$total" 'BEGIN{printf "%-10s %9.3f s\n", "TOTAL", a}'
 echo "== row counts =="
 for t in "${TABLES[@]}"; do
   printf "%-10s " "$t"
-  duck "SELECT count(*) FROM $SCHEMA.$t" | grep -Eo '^[0-9]+$' | tail -1
+  mdb_db "SELECT count(*) FROM $t" | grep -Eo '^[0-9]+$' | tail -1
 done
diff --git a/storage/duckdb/tpch/05_run_queries.sh b/storage/duckdb/tpch/05_run_queries.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# Run the 22 TPC-H queries from $TPCH_SQL against $SCHEMA via run_in_duckdb,
-# timing each (wall clock, one client invocation). Writes a TSV of timings.
-# Queries get the raw MariaDB-dialect text (no pushdown rewrites): any query
-# using MariaDB-only syntax is reported as ERR.
+# Run the 22 TPC-H queries from $TPCH_SQL against database $SCHEMA directly
+# through the mariadb client (no run_in_duckdb), timing each (wall clock, one
+# client invocation). Writes a TSV of timings. Any query that errors out
+# (e.g. unsupported syntax) is reported as ERR.
 set -uo pipefail
 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "$DIR/config.sh"
@@ -21,11 +21,9 @@ for i in $(seq 1 22); do
   n=$(printf "%02d" "$i")
   f="$MQ/q$n.sql"
   [ -f "$f" ] || continue
-  combined="SET schema '$SCHEMA'; $(cat "$f")"
-  esc=$(printf '%s' "$combined" | sed "s/'/''/g")
 
   start=$(date +%s.%N)
-  out=$("$MARIADB" -N -e "SELECT run_in_duckdb('$esc')" 2>&1)
+  out=$("$MARIADB" --default-character-set="$CHARSET" -N -D "$SCHEMA" < "$f" 2>&1)
   end=$(date +%s.%N)
 
   if printf '%s' "$out" | grep -qiE 'error'; then

diff --git a/storage/duckdb/tpch/README.md b/storage/duckdb/tpch/README.md
@@ -1,17 +1,17 @@
 # TPC-H kit — DuckDB storage engine (MariaDB)
 
-Reproducible TPC-H pipeline for the embedded DuckDB engine: install a generator,
-generate data, create the schema, COPY-load it, and run the 22 queries — all
-through MariaDB's `run_in_duckdb`.
+Reproducible TPC-H pipeline for the DuckDB storage engine: install a generator,
+generate Parquet data, create the `ENGINE=DUCKDB` tables, load them with
+`read_parquet()`, and run the 22 queries directly through the `mariadb` client.
 
 ## Pipeline
 
 | Step | Script | What it does |
 |---|---|---|
 | 1 | `01_install.sh` | Install `tpchgen-cli` (pip / uv / cargo). |
-| 2 | `02_generate.sh` | Generate `.tbl` data at scale factor `$SF` into `$DATA_DIR`. |
-| 3 | `03_schema.sh` | Create schema `$SCHEMA` + 8 tables in the embedded DuckDB. |
-| 4 | `04_load.sh` | `COPY` each `.tbl` into `$SCHEMA.*`; prints per-table timings + row counts. |
+| 2 | `02_generate.sh` | Generate Parquet data at scale factor `$SF` into `$DATA_DIR`. |
+| 3 | `03_schema.sh` | Create database `$SCHEMA` + 8 `ENGINE=DUCKDB` tables. |
+| 4 | `04_load.sh` | `INSERT ... SELECT * FROM read_parquet()` each `.parquet` into `$SCHEMA.*` (via `run_in_duckdb`); prints per-table timings + row counts. |
 | 5 | `05_run_queries.sh` | Run the 22 queries from `$TPCH_SQL`; writes `query_timings.tsv`. |
 
 Run everything: `./run_all.sh` (steps are idempotent; generation is skipped if data exists).
@@ -23,33 +23,35 @@ All knobs live in `config.sh` and are overridable via environment:
 ```bash
 SF=1 ./run_all.sh                      # scale factor 1
 DATA_DIR=/data/tpch SF=10 ./run_all.sh # custom data location
-SCHEMA=tpch_bench ./03_schema.sh       # custom DuckDB schema
+SCHEMA=tpch_bench ./03_schema.sh       # custom MariaDB database
 ```
 
 | Var | Default | Meaning |
 |---|---|---|
 | `SF` | `10` | TPC-H scale factor |
-| `DATA_DIR` | `/git/tpch/sf<SF>` | where `.tbl` files are generated/read |
-| `SCHEMA` | `bench` | DuckDB schema populated via the UDF |
+| `DATA_DIR` | `/git/tpch/sf<SF>` | where `.parquet` files are generated/read |
+| `SCHEMA` | `bench` | MariaDB database holding the `ENGINE=DUCKDB` tables |
 | `TPCH_SQL` | `/tpch.sql` | source of the 22 (MariaDB-dialect) queries |
 | `MARIADB` | `mariadb` | client command |
 
 ## Prerequisites
 
-- A running MariaDB server with the DuckDB engine loaded and the
-  `run_in_duckdb` function available.
+- A running MariaDB server with the DuckDB storage engine loaded
+  (`ENGINE=DUCKDB` available) and the `run_in_duckdb` function installed
+  (used only for the Parquet load).
 - `pip`, `uv`, or `cargo` to install the generator; `tpchgen-cli` on `PATH`
   afterwards (pip user installs land in `~/.local/bin`).
 
 ## How it works / caveats
 
-- **Generator:** `tpchgen-cli -s <SF> --output-dir <DATA_DIR>` emits classic
-  `.tbl` files (pipe-delimited, no header, trailing `|`). DuckDB loads these with
-  `COPY ... (DELIMITER '|')` — the trailing delimiter is tolerated.
-- **Load target:** data goes into a DuckDB-native schema (`bench`) inside the
-  embedded instance via `run_in_duckdb`, not into `ENGINE=DUCKDB` MariaDB
-  tables (which can't be `COPY`-loaded).
-- **Queries:** taken from `/tpch.sql` (MariaDB dialect) and executed as
-  `SET schema '<SCHEMA>'; <query>` through the UDF. The UDF receives the **raw**
-  text — the engine's dialect rewrites only apply on pushdown, so any
-  MariaDB-only syntax errors out and is reported as `ERR` in the timings.
+- **Generator:** `tpchgen-cli -s <SF> --format=parquet --output-dir <DATA_DIR>`
+  emits one `.parquet` file per table.
+- **Load target:** data goes into `ENGINE=DUCKDB` tables in a regular MariaDB
+  database (`bench`). The load runs server-side on the embedded DuckDB via
+  `run_in_duckdb`: `INSERT INTO <db>.<table> SELECT * FROM read_parquet(...)`.
+  ENGINE=DUCKDB tables are addressable inside DuckDB as `<db>.<table>`, so the
+  Parquet data never round-trips through the MariaDB client. The TPC-H Parquet
+  column order/types match the table definitions, so a plain `SELECT *` works.
+- **Queries:** taken from `$TPCH_SQL` (MariaDB dialect) and executed directly
+  through the `mariadb` client with `$SCHEMA` as the default database. Any
+  query that errors out is reported as `ERR` in the timings.