forked from jtarchie/sqlitezstd
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdecoder.go
More file actions
64 lines (53 loc) · 2.05 KB
/
decoder.go
File metadata and controls
64 lines (53 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
package sqlitezstd
import (
"sync"
"github.com/cespare/xxhash/v2"
lru "github.com/hashicorp/golang-lru/v2"
"github.com/klauspost/compress/zstd"
)
// sharedDecoder is a single, process-wide zstd decoder shared by every opened
// file. The seekable reader only ever calls DecodeAll, which is safe for
// concurrent use, so one decoder replaces the per-Open decoder pool that was
// allocated (and never closed) for each connection. It is intentionally never
// closed because it lives for the lifetime of the process.
//
// nolint: gochecknoglobals
var sharedDecoder = sync.OnceValues(func() (*zstd.Decoder, error) {
return zstd.NewReader(nil)
})
// zstdDecoder is the subset of *zstd.Decoder used here (matching the seekable
// ZSTDDecoder interface). It is an interface so the cache can be unit-tested.
type zstdDecoder interface {
DecodeAll(input, dst []byte) ([]byte, error)
}
// cachingDecoder wraps a zstd decoder with an LRU of decompressed frames keyed
// by the hash of the compressed input. The upstream seekable reader keeps only
// a single decompressed frame, so SQLite's scattered page reads otherwise force
// the same frames to be decompressed (and freshly allocated) over and over —
// the dominant cost in the FTS5/trigram benchmarks.
type cachingDecoder struct {
dec zstdDecoder
cache *lru.Cache[uint64, []byte]
}
func newCachingDecoder(dec zstdDecoder, size int) (*cachingDecoder, error) {
cache, err := lru.New[uint64, []byte](size)
if err != nil {
return nil, err
}
return &cachingDecoder{dec: dec, cache: cache}, nil
}
// DecodeAll implements the seekable ZSTDDecoder interface. The seekable reader
// only ever reads from (never mutates) the returned slice and always passes a
// nil dst, so returning a shared cached slice is safe for concurrent readers.
func (c *cachingDecoder) DecodeAll(input, dst []byte) ([]byte, error) {
key := xxhash.Sum64(input)
if cached, ok := c.cache.Get(key); ok {
return cached, nil
}
out, err := c.dec.DecodeAll(input, dst)
if err != nil {
return nil, err
}
_ = c.cache.Add(key, out)
return out, nil
}