Skip to content

Commit 0c945e0

Browse files
committed
Add cycle-collection tests for the Rust-side identity cache
These are the contract for the gc_handler that comes next: the Rust extension's node_cache forms a cycle (cache -> wrapper -> $native_ast property -> WpMySqlNativeAst -> cache) that PHP's cycle collector can't walk into without help. The tests will fail until the handler exposes the cached wrappers to PHP's GC. The tests are deliberately hostile — loops with explicit gc_collect_cycles between iterations, assertions on memory floors, mutation-before-drop, overlapping-AST lifetimes, and orphaned-child use-after-drop. Each one breaks in a different direction the leak can manifest. These tests fail on the current commit; the next commit makes them pass.
1 parent 30a38ae commit 0c945e0

1 file changed

Lines changed: 274 additions & 0 deletions

File tree

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
<?php
2+
3+
use PHPUnit\Framework\TestCase;
4+
5+
/**
6+
* Cycle-collection / memory-bound tests for the Rust-side identity cache.
7+
*
8+
* The Rust extension stores cached wrappers in a HashMap that PHP's GC
9+
* cannot see by default. Each cached wrapper has a `$native_ast` property
10+
* pointing back at the AST, forming a cycle the cycle collector can't
11+
* walk into without help. These tests are the contract for the custom
12+
* `gc_handler` on `WP_MySQL_Native_Ast` that exposes the cached wrappers
13+
* to PHP's collector — they will fail until the handler is in place and
14+
* working correctly. They're written to break in every direction the
15+
* leak can manifest:
16+
*
17+
* - Loops parsing many ASTs without explicit GC must not grow without
18+
* bound (ordinary mode of use).
19+
* - Walking, mutating, and dropping an AST must reclaim the wrapper
20+
* memory once `gc_collect_cycles()` runs.
21+
* - Holding a child wrapper after the parent AST goes out of scope must
22+
* not crash, must not corrupt memory, and the AST must stay alive as
23+
* long as that child is reachable.
24+
* - Nested ASTs with overlapping lifetimes must not interfere — dropping
25+
* one mustn't free another's cached wrappers.
26+
* - Mutating a cached wrapper before dropping the AST must still allow
27+
* collection.
28+
*
29+
* Skipped when the native extension is not loaded.
30+
*/
31+
class WP_MySQL_Native_Parser_Node_Cycle_Tests extends TestCase {
32+
33+
protected function setUp(): void {
34+
if ( ! class_exists( 'WP_MySQL_Native_Parser', false ) ) {
35+
$this->markTestSkipped( 'Native MySQL parser extension is not loaded.' );
36+
}
37+
// Force a clean slate before each test — ASTs from earlier tests
38+
// must not pollute the memory measurements below.
39+
gc_collect_cycles();
40+
}
41+
42+
private function parse( string $sql ): WP_Parser_Node {
43+
static $grammar = null;
44+
if ( null === $grammar ) {
45+
$grammar = new WP_Parser_Grammar( include __DIR__ . '/../../../src/mysql/mysql-grammar.php' );
46+
}
47+
$lexer = new WP_MySQL_Lexer( $sql );
48+
$tokens = $lexer instanceof WP_MySQL_Native_Lexer
49+
? $lexer->native_token_stream()
50+
: $lexer->remaining_tokens();
51+
$parser = new WP_MySQL_Parser( $grammar, $tokens );
52+
$tree = $parser->parse();
53+
$this->assertNotNull( $tree, 'Failed to parse SQL: ' . $sql );
54+
return $tree;
55+
}
56+
57+
/**
58+
* Hostile loop: parse and walk many ASTs in a tight loop, only
59+
* `gc_collect_cycles()` between iterations. Memory must plateau.
60+
*
61+
* Without a working gc_handler the Rust cache retains every AST's
62+
* wrappers forever — peak memory grows linearly with iteration count.
63+
* With the handler, each dropped AST's cycle is collected and the
64+
* working set stays bounded.
65+
*/
66+
public function test_repeated_parse_walk_drop_does_not_leak(): void {
67+
$sql = 'SELECT a, b, c FROM t WHERE a + b * c IN (1, 2, 3) AND d = 4';
68+
69+
// Warm-up: do enough work that allocator overhead is amortized
70+
// before we sample the floor.
71+
for ( $i = 0; $i < 20; $i++ ) {
72+
$ast = $this->parse( $sql );
73+
$ast->get_descendants();
74+
$ast = null;
75+
gc_collect_cycles();
76+
}
77+
$baseline = memory_get_usage();
78+
79+
// Now run substantially more iterations and assert the working
80+
// set stays within a small multiple of the warm-up floor.
81+
for ( $i = 0; $i < 500; $i++ ) {
82+
$ast = $this->parse( $sql );
83+
$ast->get_descendants();
84+
$ast = null;
85+
gc_collect_cycles();
86+
}
87+
$after = memory_get_usage();
88+
89+
// 4 MB headroom — generous, but a leaking cache adds tens of MB
90+
// across 500 iterations on this query.
91+
$delta = $after - $baseline;
92+
$this->assertLessThan(
93+
4 * 1024 * 1024,
94+
$delta,
95+
sprintf(
96+
'Memory grew %.1f MB across 500 parse-walk-drop cycles; the per-AST cache is not being collected.',
97+
$delta / 1024 / 1024
98+
)
99+
);
100+
}
101+
102+
/**
103+
* After dropping the AST and triggering GC, the entire wrapper
104+
* graph must be reclaimable. We hand out one descendant, drop the
105+
* root, then drop the descendant — the next gc cycle must reclaim
106+
* the rest of the cached wrappers.
107+
*/
108+
public function test_drop_then_gc_reclaims_cached_wrappers(): void {
109+
$sql = 'SELECT a, b, c FROM t WHERE a + b * c IN (1, 2, 3) AND d = 4';
110+
111+
// Establish a memory floor with no AST live.
112+
gc_collect_cycles();
113+
$floor = memory_get_usage();
114+
115+
$ast = $this->parse( $sql );
116+
$descendant = $ast->get_first_descendant_node();
117+
$this->assertNotNull( $descendant );
118+
$ast = null;
119+
$descendant = null;
120+
gc_collect_cycles();
121+
122+
$after = memory_get_usage();
123+
$delta = $after - $floor;
124+
// Generous bound — but tens of MB of leaked wrappers would blow it.
125+
$this->assertLessThan(
126+
1 * 1024 * 1024,
127+
$delta,
128+
sprintf(
129+
'After dropping the AST and the descendant and running gc, %.1f MB of cached wrappers remain.',
130+
$delta / 1024 / 1024
131+
)
132+
);
133+
}
134+
135+
/**
136+
* Holding a child wrapper *outlives* the variable holding the root.
137+
* The cache pinning the child must keep the AST alive (no UAF when
138+
* the bridge is called on the orphaned child). Once the child is
139+
* also dropped, GC must collect the whole graph.
140+
*/
141+
public function test_orphaned_child_keeps_ast_alive_then_collects(): void {
142+
$sql = 'SELECT a, b, c FROM t WHERE a + b * c IN (1, 2, 3)';
143+
$child = ( function () use ( $sql ) {
144+
$ast = $this->parse( $sql );
145+
return $ast->get_first_descendant_node();
146+
} )();
147+
148+
// Root variable is gone; only the child reference remains, but
149+
// the cache still pins the AST through the back-reference. The
150+
// child must still be functional — accessing it must not crash.
151+
$this->assertNotNull( $child );
152+
$this->assertIsString( $child->rule_name );
153+
// The child's own children should also resolve without UAF.
154+
$grand = $child->get_first_child();
155+
$this->assertNotNull( $grand );
156+
157+
// Now drop the child too; the AST + cache should be reclaimable.
158+
$child = null;
159+
$grand = null;
160+
gc_collect_cycles();
161+
// If the cycle collected, this assertion always passes; the real
162+
// signal is the absence of a segfault during teardown.
163+
$this->addToAssertionCount( 1 );
164+
}
165+
166+
/**
167+
* Mutating a cached wrapper through `append_child` before dropping
168+
* the AST must not block collection. The mutated wrapper's
169+
* `$children` array now contains a non-cached node; that's an extra
170+
* edge for the gc_handler to traverse, not a reason to leak.
171+
*/
172+
public function test_mutation_before_drop_does_not_block_collection(): void {
173+
$sql = 'SELECT 1 + 2';
174+
175+
gc_collect_cycles();
176+
$floor = memory_get_usage();
177+
178+
for ( $i = 0; $i < 200; $i++ ) {
179+
$ast = $this->parse( $sql );
180+
$child = $ast->get_first_child_node();
181+
$injected = new WP_Parser_Node( 0, 'synthetic-' . $i );
182+
$ast->append_child( $injected );
183+
// Touch the cache after mutation to keep wrappers live.
184+
$ast->get_descendants();
185+
$ast = null;
186+
$child = null;
187+
$injected = null;
188+
gc_collect_cycles();
189+
}
190+
$after = memory_get_usage();
191+
$delta = $after - $floor;
192+
$this->assertLessThan(
193+
4 * 1024 * 1024,
194+
$delta,
195+
sprintf(
196+
'Memory grew %.1f MB across 200 mutate-then-drop cycles.',
197+
$delta / 1024 / 1024
198+
)
199+
);
200+
}
201+
202+
/**
203+
* Two ASTs alive simultaneously, then dropped in interleaved order.
204+
* Dropping AST A must not affect AST B's cached wrappers; both must
205+
* eventually collect once unreferenced.
206+
*/
207+
public function test_overlapping_asts_do_not_corrupt_each_other(): void {
208+
$ast_a = $this->parse( 'SELECT a FROM ta WHERE a > 1' );
209+
$ast_b = $this->parse( 'SELECT b FROM tb WHERE b < 9' );
210+
211+
$child_a = $ast_a->get_first_descendant_node();
212+
$child_b = $ast_b->get_first_descendant_node();
213+
214+
// Drop A first and run gc; B must remain fully functional.
215+
$ast_a = null;
216+
$child_a = null;
217+
gc_collect_cycles();
218+
219+
$this->assertNotNull( $child_b );
220+
$walk = $ast_b->get_descendants();
221+
$this->assertNotEmpty( $walk );
222+
223+
// Drop B too; walk one of its still-held descendants — the cache
224+
// is still alive because $child_b pins it.
225+
$ast_b = null;
226+
$this->assertIsString( $child_b->rule_name );
227+
228+
$child_b = null;
229+
$walk = null;
230+
gc_collect_cycles();
231+
$this->addToAssertionCount( 1 );
232+
}
233+
234+
/**
235+
* Re-walk + drop + collect across many iterations. This is the
236+
* "translator pass on each query" shape of real workloads. The Rust
237+
* cache should give us the perf win of `rewalk` without the memory
238+
* cliff that a missing gc_handler creates.
239+
*/
240+
public function test_rewalk_loop_stays_bounded(): void {
241+
$sql = 'SELECT a, b, c, d, e FROM t WHERE (a + b) * (c - d) > e AND f IN (1,2,3,4,5)';
242+
243+
gc_collect_cycles();
244+
// Warm-up.
245+
for ( $i = 0; $i < 10; $i++ ) {
246+
$ast = $this->parse( $sql );
247+
for ( $r = 0; $r < 10; $r++ ) {
248+
$ast->get_descendants();
249+
}
250+
$ast = null;
251+
gc_collect_cycles();
252+
}
253+
$floor = memory_get_usage();
254+
255+
for ( $i = 0; $i < 200; $i++ ) {
256+
$ast = $this->parse( $sql );
257+
for ( $r = 0; $r < 10; $r++ ) {
258+
$ast->get_descendants();
259+
}
260+
$ast = null;
261+
gc_collect_cycles();
262+
}
263+
$after = memory_get_usage();
264+
$delta = $after - $floor;
265+
$this->assertLessThan(
266+
4 * 1024 * 1024,
267+
$delta,
268+
sprintf(
269+
'Rewalk loop grew memory by %.1f MB; cache likely uncollectable.',
270+
$delta / 1024 / 1024
271+
)
272+
);
273+
}
274+
}

0 commit comments

Comments
 (0)