From 3a17789273e8d1d1fd0d3435687888d83cf46ae7 Mon Sep 17 00:00:00 2001 From: Dan Draper Date: Wed, 6 May 2026 14:43:55 +1000 Subject: [PATCH] perf(stack/drizzle): wrap like/ilike in eql_v2.bloom_filter(...) @> ... MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `eql_v2.like` and `eql_v2.ilike` are SQL functions whose bodies are already a single inlinable `SELECT eql_v2.bloom_filter(a) @> eql_v2.bloom_filter(b)` — but they're marked `VOLATILE`, so the planner won't inline them into an index match, and the documented `bench_text_bloom_idx` GIN functional index never engages. Customers on Supabase silently seq-scan every encrypted free-text search. Emit the inlined containment form directly so the GIN index engages on every install. Same shape as the hmac_256 wrap for eq/ne/inArray (#421). Verified via the bench: like and ilike both report `Bitmap Heap Scan` (over `bench_text_bloom_idx`) post-fix instead of `Seq Scan`. Note that `eql_v2.like` and `eql_v2.ilike` resolve to the same post-encryption SQL — case sensitivity is determined by the column's `freeTextSearch` token filters (e.g. `downcase`), not by which operator the user picked. The wrapped form preserves that behavior. The proper EQL-side fix (mark these functions IMMUTABLE so the planner can inline them and bypass the Drizzle workaround entirely) should be filed against cipherstash/encrypt-query-language as a follow-up. --- .changeset/drizzle-bloom-filter-like-ilike.md | 5 +++ .../drizzle/operators.explain.test.ts | 45 ++++++++++++++----- packages/stack/src/drizzle/operators.ts | 14 +++++- 3 files changed, 52 insertions(+), 12 deletions(-) create mode 100644 .changeset/drizzle-bloom-filter-like-ilike.md diff --git a/.changeset/drizzle-bloom-filter-like-ilike.md b/.changeset/drizzle-bloom-filter-like-ilike.md new file mode 100644 index 00000000..6fb4504a --- /dev/null +++ b/.changeset/drizzle-bloom-filter-like-ilike.md @@ -0,0 +1,5 @@ +--- +"@cipherstash/stack": patch +--- + +perf(drizzle): wrap `like` / `ilike` / `notIlike` in `eql_v2.bloom_filter(...) @> eql_v2.bloom_filter(...)` so encrypted free-text searches engage the bloom_filter functional GIN index on Supabase and any `--exclude-operator-family` install. Previously the operators emitted `eql_v2.like(col, value)` / `eql_v2.ilike(col, value)` — the function bodies contain the inlinable bloom-filter form, but they're marked `VOLATILE` so the planner can't inline them, and the documented `bench_text_bloom_idx` GIN index never engages. Drizzle now emits the inlined form directly. diff --git a/packages/bench/__tests__/drizzle/operators.explain.test.ts b/packages/bench/__tests__/drizzle/operators.explain.test.ts index 66392f9b..87244907 100644 --- a/packages/bench/__tests__/drizzle/operators.explain.test.ts +++ b/packages/bench/__tests__/drizzle/operators.explain.test.ts @@ -156,22 +156,45 @@ describe('#421: equality and array operators', () => { }) }) -// --- #422: investigation operators ---------------------------------------- +// --- like / ilike: bloom-filter containment ------------------------------ // -// We don't yet know which call-shaped forms the planner inlines. Record plan -// shape; assertions land in a follow-up once #422 closes. -describe('#422: call-shaped operators (recorded, not asserted)', () => { - it('records like / ilike plan shapes', async () => { - await tryExplainWhere( - 'like', - (await ops.like(benchTable.encText, '%value-00000%')) as SQL, +// `eql_v2.like` and `eql_v2.ilike` are SQL functions whose bodies are already +// `SELECT eql_v2.bloom_filter(a) @> eql_v2.bloom_filter(b)` — but they're +// marked VOLATILE, so the planner won't inline them into the index match. +// Drizzle now emits the inlined containment form directly so the bloom GIN +// index engages. The wildcard pattern is irrelevant at the SQL layer — +// bloom-filter match works on the encrypted token set, not LIKE syntax. +describe('like / ilike: engage bloom_filter functional index', () => { + // The pattern needs to be selective — `%value-0000042%` is unique to one + // seeded row, whereas a broad pattern like `%value-00000%` is shared by + // every row in the fixture (all seed values start with `value-`), and + // the planner correctly picks seq scan when the predicate matches every + // row. + it('like engages bench_text_bloom_idx', async () => { + const plan = await explainWhere( + (await ops.like(benchTable.encText, '%value-0000042%')) as SQL, ) - await tryExplainWhere( - 'ilike', - (await ops.ilike(benchTable.encText, '%VALUE-00000%')) as SQL, + recordObservation('like', plan) + expect(hasSeqScan(plan), summarize(plan)).toBe(false) + }) + + it('ilike engages bench_text_bloom_idx', async () => { + const plan = await explainWhere( + (await ops.ilike(benchTable.encText, '%VALUE-0000042%')) as SQL, ) + recordObservation('ilike', plan) + expect(hasSeqScan(plan), summarize(plan)).toBe(false) }) +}) +// --- #422: remaining call-shaped operators (recorded, not asserted) ------ +// +// gt/gte/lt/lte/between have no Supabase functional index path today (OPE +// work is still in flight in EQL). jsonb_path_* don't have an obvious +// containment form on ste_vec. order_by has no Supabase index path either. +// Record plan shape for the investigation log; assertions land in a +// follow-up once EQL ships the relevant index recipes. +describe('#422: remaining call-shaped operators (recorded, not asserted)', () => { it('records gt / gte / lt / lte plan shapes', async () => { for (const [name, build] of [ ['gt', () => ops.gt(benchTable.encInt, 5000)], diff --git a/packages/stack/src/drizzle/operators.ts b/packages/stack/src/drizzle/operators.ts index 46d73879..e9181cee 100644 --- a/packages/stack/src/drizzle/operators.ts +++ b/packages/stack/src/drizzle/operators.ts @@ -851,7 +851,19 @@ function createTextSearchOperator( ) } - const sqlFn = sql`eql_v2.${sql.raw(operator === 'notIlike' ? 'ilike' : operator)}(${left}, ${bindIfParam(encrypted, left)})` + // Emit the bloom-filter containment form directly. `eql_v2.like` / + // `eql_v2.ilike` are themselves a single-statement `SELECT + // eql_v2.bloom_filter(a) @> eql_v2.bloom_filter(b)` — but the functions + // are marked VOLATILE, so the planner won't inline them, and the + // documented `bench_text_bloom_idx` GIN functional index never engages. + // Inlining by hand here lets the planner match the index on every + // install, including Supabase. (Same shape as the hmac_256 wrap for + // eq/ne/inArray.) + // + // `like` and `ilike` resolve to the same SQL post-encryption — case + // sensitivity is determined by the column's `freeTextSearch` token + // filters, not by which operator the user picked. + const sqlFn = sql`eql_v2.bloom_filter(${left}) @> eql_v2.bloom_filter(${bindIfParam(encrypted, left)}::eql_v2_encrypted)` return operator === 'notIlike' ? sql`NOT (${sqlFn})` : sqlFn }