Skip to content

Commit 51efda7

Browse files
authored
perf(enrich): --memprofile + --max-buffer-pool + --copy-threads flags (Phase C) (#147)
Phase C of the OOM fix plan. Surfaces the memory-budgeting knobs that Phase A baked into the codebase but kept as compile-time defaults. Empirical finding: Phase A+B alone brought ~/projects/-scale enrich (49k files) from 9-15 GB peak RSS (OOM-killed exit 137) to 3.12 GB — well under the 4 GiB acceptance bar. The full streaming refactor originally scoped for this phase is not load-bearing at current scale; it remains a worthwhile future investment for 10M+ node graphs but ships separately if/when that scale arrives. Flags added to `codeiq enrich`: - --memprofile=<path> Write a heap profile after enrich completes. For OOM debugging — pair with /usr/bin/time -v. - --max-buffer-pool=N Cap Kuzu BufferPoolSize in bytes (default 2 GiB). For hosts where 2 GiB is still too much. - --copy-threads=N Cap Kuzu COPY FROM parallelism (default min(4, GOMAXPROCS)). EnrichOptions struct extended with StoreBufferPoolBytes + StoreCopyThreads; analyzer.Enrich now routes through graph.OpenWithOptions with those values. Plan: docs/superpowers/plans/2026-05-13-enrich-oom-fix.md Phase C. Verification: - go test ./... -count=1: 877 pass. - /tmp/codeiq-c enrich ~/projects/polyglot-bench/airflow recorded 1.27 GB peak RSS via /usr/bin/time -v (down from pre-Phase-A 3.8 GB observed by the research pprof agent). - ~/projects/ enrich peak RSS: 3.12 GB (below 4 GiB acceptance bar).
1 parent 548a5ec commit 51efda7

2 files changed

Lines changed: 45 additions & 3 deletions

File tree

go/internal/analyzer/enrich.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ type EnrichOptions struct {
2323
// GraphDir overrides the Kuzu output directory. When "", the default
2424
// `<root>/.codeiq/graph/codeiq.kuzu` is used.
2525
GraphDir string
26+
// StoreBufferPoolBytes caps Kuzu's buffer pool. Zero -> graph package
27+
// default (2 GiB).
28+
StoreBufferPoolBytes uint64
29+
// StoreCopyThreads caps Kuzu COPY FROM parallelism. Zero -> graph
30+
// package default (min(4, GOMAXPROCS)).
31+
StoreCopyThreads uint64
2632
}
2733

2834
// EnrichSummary reports per-run counters from a successful Enrich.
@@ -109,7 +115,10 @@ func Enrich(root string, c *cache.Cache, opts EnrichOptions) (EnrichSummary, err
109115

110116
// 6. Bulk-load Kuzu — schema + nodes + edges + indexes. The store is
111117
// closed when this function returns; read-side commands re-open it.
112-
store, err := graph.Open(opts.GraphDir)
118+
store, err := graph.OpenWithOptions(opts.GraphDir, graph.OpenOptions{
119+
BufferPoolBytes: opts.StoreBufferPoolBytes,
120+
MaxThreads: opts.StoreCopyThreads,
121+
})
113122
if err != nil {
114123
return EnrichSummary{}, fmt.Errorf("enrich: open graph: %w", err)
115124
}

go/internal/cli/enrich.go

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ package cli
22

33
import (
44
"fmt"
5+
"os"
56
"path/filepath"
7+
"runtime"
8+
"runtime/pprof"
69

710
"github.com/randomcodespace/codeiq/go/internal/analyzer"
811
"github.com/randomcodespace/codeiq/go/internal/cache"
@@ -11,7 +14,12 @@ import (
1114

1215
func init() {
1316
registerSubcommand(func() *cobra.Command {
14-
var graphDir string
17+
var (
18+
graphDir string
19+
memProfile string
20+
maxBufferPool int64
21+
copyThreads int
22+
)
1523
cmd := &cobra.Command{
1624
Use: "enrich [path]",
1725
Short: "Load the SQLite cache into Kuzu and run linkers, classifiers, intelligence.",
@@ -48,10 +56,29 @@ become available and the stdio MCP server can serve clients.`,
4856
return fmt.Errorf("open cache %s: %w", cachePath, err)
4957
}
5058
defer c.Close()
51-
summary, err := analyzer.Enrich(root, c, analyzer.EnrichOptions{GraphDir: graphDir})
59+
opts := analyzer.EnrichOptions{GraphDir: graphDir}
60+
if maxBufferPool > 0 {
61+
opts.StoreBufferPoolBytes = uint64(maxBufferPool)
62+
}
63+
if copyThreads > 0 {
64+
opts.StoreCopyThreads = uint64(copyThreads)
65+
}
66+
summary, err := analyzer.Enrich(root, c, opts)
5267
if err != nil {
5368
return err
5469
}
70+
if memProfile != "" {
71+
runtime.GC()
72+
f, ferr := os.Create(memProfile)
73+
if ferr != nil {
74+
return fmt.Errorf("create mem profile: %w", ferr)
75+
}
76+
defer f.Close()
77+
if perr := pprof.WriteHeapProfile(f); perr != nil {
78+
return fmt.Errorf("write mem profile: %w", perr)
79+
}
80+
fmt.Fprintf(cmd.ErrOrStderr(), "heap profile written to %s\n", memProfile)
81+
}
5582
fmt.Fprintf(cmd.OutOrStdout(),
5683
"enrich complete: %d nodes, %d edges, %d services\n",
5784
summary.Nodes, summary.Edges, summary.Services)
@@ -60,6 +87,12 @@ become available and the stdio MCP server can serve clients.`,
6087
}
6188
cmd.Flags().StringVar(&graphDir, "graph-dir", "",
6289
"Output directory for the Kuzu graph store (default: <path>/.codeiq/graph/codeiq.kuzu).")
90+
cmd.Flags().StringVar(&memProfile, "memprofile", "",
91+
"Write a heap profile to this path after enrich completes. For OOM debugging — use with /usr/bin/time -v.")
92+
cmd.Flags().Int64Var(&maxBufferPool, "max-buffer-pool", 0,
93+
"Cap Kuzu BufferPoolSize in bytes (default: 2 GiB; 0 means default).")
94+
cmd.Flags().IntVar(&copyThreads, "copy-threads", 0,
95+
"Cap Kuzu COPY FROM parallelism (default: min(4, GOMAXPROCS); 0 means default).")
6396
return cmd
6497
})
6598
}

0 commit comments

Comments
 (0)