Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 1 addition & 28 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,32 +111,6 @@ rejected rather than silently returning wrong data. Each opened connection makes
one small request to determine the file size, then fetches frames on demand.
Frames are cached per connection, so repeated reads do not re-hit the network.

#### Coalescing reads for remote (S3/CDN) databases

By default each frame is fetched in its own Range GET. For high-latency stores
like S3 a query can fire many small GETs. Enable an in-memory, page-aligned read
cache to coalesce the contiguous run of missing pages behind a read into a
single GET (default page size 64 KiB) and to serve adjacent frames from cache:

```go
import sqlitezstd "github.com/jtarchie/sqlitezstd"

// Register a cache-enabled VFS once (e.g. at startup). DSN query params are
// stripped before the VFS sees the path, so configuration lives on the named
// VFS, not the URL.
err := sqlitezstd.Register("zstdcache",
sqlitezstd.WithHTTPCacheSize(64<<20), // ~64 MiB of coalesced pages per open
)

db, _ := sql.Open("sqlite3", "https://bucket.example.com/segment.sqlite.zst?vfs=zstdcache")
```

In practice this collapses a remote query's request count by an order of
magnitude — a full-table-scan test issues **125 Range GETs without the cache vs
9 with it (~14× fewer)**. The cache is per opened file and bounded by the
configured byte cap (LRU eviction), so memory stays bounded. Tune the page size
with `WithHTTPPageSize`.

For authenticated buckets, supply a signing transport with
`WithRoundTripper`/`WithHTTPClient`; the library still wraps it with timeout,
retry, and range-validation.
Expand All @@ -153,8 +127,7 @@ go build -tags fts5 ./...
### Configuration

Importing the package registers a `zstd` VFS with sensible defaults. To tune the
frame-cache size, HTTP timeout, retry count, HTTP read cache
(`WithHTTPCacheSize`/`WithHTTPPageSize`), transport
frame-cache size, HTTP timeout, retry count, transport
(`WithRoundTripper`/`WithHTTPClient`), or logger, register your own named VFS and
reference it via `?vfs=<name>`:

Expand Down
58 changes: 4 additions & 54 deletions benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@ import (
_ "github.com/mattn/go-sqlite3" // ensure you import the SQLite3 driver
)

// minCacheVFS registers (once) a VFS whose frame cache holds a single frame,
// approximating the pre-cache behavior where the upstream reader kept only one
// decompressed frame. Benchmarking against it on the same fixture isolates the
// effect of the frame cache.
// minCacheVFS registers (once) a VFS whose frame cache holds a single frame.
// Benchmarking against it on the same fixture isolates the effect of the
// default frame-cache size.
func minCacheVFS(b *testing.B) string {
b.Helper()

Expand All @@ -37,7 +36,7 @@ func minCacheVFS(b *testing.B) string {

// BenchmarkReadCompressedSQLiteFTS5PorterMinCache mirrors
// BenchmarkReadCompressedSQLiteFTS5Porter but with a single-frame cache, so the
// two together show the frame cache's impact (allocs/op and B/op in particular).
// two together show the frame cache's impact.
func BenchmarkReadCompressedSQLiteFTS5PorterMinCache(b *testing.B) {
_, zstPath := setupDB(b)

Expand Down Expand Up @@ -340,55 +339,6 @@ func BenchmarkReadCompressedHTTPSQLite(b *testing.B) {
})
}

// cacheHTTPVFS registers (once) a cache-enabled VFS for the HTTP benchmark.
func cacheHTTPVFS(b *testing.B) string {
b.Helper()

const name = "zstd-httpcache-bench"

if err := sqlitezstd.Register(name, sqlitezstd.WithHTTPCacheSize(64<<20)); err != nil &&
!strings.Contains(err.Error(), "already") {
b.Fatalf("Failed to register http-cache vfs: %v", err)
}

return name
}

// BenchmarkReadCompressedHTTPSQLiteCached mirrors BenchmarkReadCompressedHTTPSQLite
// but through the coalescing HTTP cache. Over a local httptest server the latency
// win is small; the real benefit (far fewer Range GETs) is asserted by
// TestHTTPCacheCoalescesGETs.
func BenchmarkReadCompressedHTTPSQLiteCached(b *testing.B) {
_, zstPath := setupDB(b)

zstDir := filepath.Dir(zstPath)

server := httptest.NewServer(http.FileServer(http.Dir(zstDir)))
defer server.Close()

vfs := cacheHTTPVFS(b)

client, err := sql.Open("sqlite3", fmt.Sprintf("%s/%s?vfs=%s", server.URL, filepath.Base(zstPath), vfs))
if err != nil {
b.Fatalf("Query failed: %v", err)
}
defer client.Close() //nolint: errcheck

client.SetMaxOpenConns(max(4, runtime.NumCPU()))

b.ResetTimer()

b.RunParallel(func(pb *testing.PB) {
var count int
for pb.Next() {
err = client.QueryRow("SELECT MAX(value) FROM entries").Scan(&count)
if err != nil {
b.Fatalf("Query failed: %v", err)
}
}
})
}

func BenchmarkReadCompressedRtreeSQLite(b *testing.B) {
_, zstPath := setupDB(b)

Expand Down
129 changes: 0 additions & 129 deletions cache_internal_test.go

This file was deleted.

64 changes: 0 additions & 64 deletions decoder.go

This file was deleted.

4 changes: 2 additions & 2 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ func (z *ZstdFile) SectorSize() int64 {
// A whole zstd frame must be decompressed to serve any byte within it, but
// SQLite reads a read-only immutable database page-by-page regardless of the
// reported sector size — the intra-frame locality win is captured by the
// frame cache (see decoder.go / readerat.go), not by this value. Reporting 0
// keeps SQLite on its default behavior.
// frame cache, not by this value. Reporting 0 keeps SQLite on its default
// behavior.
return 0
}

Expand Down
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ go 1.25.0
require (
github.com/SaveTheRbtz/zstd-seekable-format-go/pkg v0.10.0
github.com/brianvoe/gofakeit/v7 v7.7.3
github.com/cespare/xxhash/v2 v2.3.0
github.com/georgysavva/scany/v2 v2.1.4
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/klauspost/compress v1.18.6
github.com/mattn/go-sqlite3 v1.14.32
github.com/onsi/ginkgo/v2 v2.26.0
Expand All @@ -18,6 +16,7 @@ require (

require (
github.com/Masterminds/semver/v3 v3.4.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/google/go-cmp v0.7.0 // indirect
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/pprof v0.0.0-20251002213607-436353cc1ee6 h1:/WHh/1k4thM/w+PAZEIiZK9NwCMFahw5tUzKUCnUtds=
github.com/google/pprof v0.0.0-20251002213607-436353cc1ee6/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b h1:C8S2+VttkHFdOOCXJe+YGfa4vHYwlt4Zx+IVXQ97jYg=
Expand Down
Loading