Add hit-heavy perf scenarios to compare cache vs baseline

adamziel · adamziel · commit 2c07dacdc540 · 2026-04-30T23:10:26.000+02:00
The walk benchmark we already had is cache-miss heavy (one walk per AST,
every node visited once), so the identity cache shows up there as a
small overhead rather than a win. The cache is supposed to pay back in
hit-heavy patterns: re-walks of the same tree, repeated child reads at
the root, and translator-style passes that re-enter visited subtrees.

Adds three modes (--mode=rewalk|reread|subtree) and runs each on both
the PR and the baseline so the comparison is apples-to-apples on the
same runner, same corpus.
diff --git a/.github/workflows/native-ast-perf.yml b/.github/workflows/native-ast-perf.yml
@@ -131,6 +131,48 @@ jobs:
           php -d extension="$BASE_EXT" tests/tools/run-native-ast-walk-benchmark.php --no-walk \
             | tee "$GITHUB_WORKSPACE/packages/mysql-on-sqlite/baseline-native-parse-only.txt"
 
+      # Hit-heavy scenarios — these are where the per-AST identity cache is
+      # supposed to win. The baseline reallocates wrappers on every accessor
+      # call, while the PR reuses them. Run on both to make the gap visible.
+      - name: Benchmark — native rewalk x10 (this PR)
+        working-directory: packages/mysql-on-sqlite
+        run: |
+          php -d extension="$NATIVE_EXT" tests/tools/run-native-ast-walk-benchmark.php --mode=rewalk --repeat=10 \
+            | tee native-rewalk.txt
+
+      - name: Benchmark — baseline rewalk x10
+        working-directory: ../baseline/packages/mysql-on-sqlite
+        run: |
+          BASE_EXT="$(realpath ../../packages/php-ext-wp-mysql-parser/target/release/libwp_mysql_parser.so)"
+          php -d extension="$BASE_EXT" tests/tools/run-native-ast-walk-benchmark.php --mode=rewalk --repeat=10 \
+            | tee "$GITHUB_WORKSPACE/packages/mysql-on-sqlite/baseline-native-rewalk.txt"
+
+      - name: Benchmark — native reread x20 (this PR)
+        working-directory: packages/mysql-on-sqlite
+        run: |
+          php -d extension="$NATIVE_EXT" tests/tools/run-native-ast-walk-benchmark.php --mode=reread --repeat=20 \
+            | tee native-reread.txt
+
+      - name: Benchmark — baseline reread x20
+        working-directory: ../baseline/packages/mysql-on-sqlite
+        run: |
+          BASE_EXT="$(realpath ../../packages/php-ext-wp-mysql-parser/target/release/libwp_mysql_parser.so)"
+          php -d extension="$BASE_EXT" tests/tools/run-native-ast-walk-benchmark.php --mode=reread --repeat=20 \
+            | tee "$GITHUB_WORKSPACE/packages/mysql-on-sqlite/baseline-native-reread.txt"
+
+      - name: Benchmark — native subtree x5 (this PR)
+        working-directory: packages/mysql-on-sqlite
+        run: |
+          php -d extension="$NATIVE_EXT" tests/tools/run-native-ast-walk-benchmark.php --mode=subtree --repeat=5 \
+            | tee native-subtree.txt
+
+      - name: Benchmark — baseline subtree x5
+        working-directory: ../baseline/packages/mysql-on-sqlite
+        run: |
+          BASE_EXT="$(realpath ../../packages/php-ext-wp-mysql-parser/target/release/libwp_mysql_parser.so)"
+          php -d extension="$BASE_EXT" tests/tools/run-native-ast-walk-benchmark.php --mode=subtree --repeat=5 \
+            | tee "$GITHUB_WORKSPACE/packages/mysql-on-sqlite/baseline-native-subtree.txt"
+
       - name: Summarize
         if: always()
         working-directory: packages/mysql-on-sqlite
@@ -148,7 +190,7 @@ jobs:
             echo
             echo '| scenario | result |'
             echo '|---|---|'
-            for f in php-parse-only.txt php-walk.txt native-parse-only.txt native-walk.txt baseline-native-parse-only.txt baseline-native-walk.txt; do
+            for f in php-parse-only.txt php-walk.txt native-parse-only.txt native-walk.txt baseline-native-parse-only.txt baseline-native-walk.txt native-rewalk.txt baseline-native-rewalk.txt native-reread.txt baseline-native-reread.txt native-subtree.txt baseline-native-subtree.txt; do
               [ -f "$f" ] || continue
               line="$(cat "$f")"
               echo "| ${f%.txt} | \`$line\` |"
diff --git a/packages/mysql-on-sqlite/tests/tools/run-native-ast-walk-benchmark.php b/packages/mysql-on-sqlite/tests/tools/run-native-ast-walk-benchmark.php
@@ -3,15 +3,29 @@
 /**
  * Benchmark for the native AST walk path with the per-AST identity cache.
  *
- * Parses every query in the MySQL server suite, then walks each AST
- * exhaustively through `get_descendants()` and `get_first_child_node()`
- * loops to exercise the bridge accessors and the identity map. Reports
- * wall time, peak memory, and a basic identity-stability check so the
- * cache cost can be compared against the no-cache baseline.
+ * Parses every query in the MySQL server suite, then walks each AST through
+ * a configurable mode to exercise the bridge accessors and the identity map.
+ * Reports wall time, peak memory, and a basic identity-stability check so
+ * the cache cost can be compared against the no-cache baseline.
+ *
+ * Modes:
+ *   walk       — single full descendant walk per AST (cache-miss heavy).
+ *   no-walk    — parse only.
+ *   rewalk=N   — repeat the descendant walk N times per AST (1st pass is
+ *                misses, remaining passes are all hits — the scenario the
+ *                identity cache is supposed to win on).
+ *   reread=N   — for each top-level child node, call accessors N times to
+ *                exercise repeated-read hit paths.
+ *   subtree=N  — walk descendants once, then re-read each one's first child
+ *                N times — models translator/rewriter passes that re-enter
+ *                the same subtrees.
  *
  * Usage:
- *   php run-native-ast-walk-benchmark.php          # walks via accessors
- *   php run-native-ast-walk-benchmark.php --no-walk # parse only, baseline
+ *   php run-native-ast-walk-benchmark.php
+ *   php run-native-ast-walk-benchmark.php --mode=no-walk
+ *   php run-native-ast-walk-benchmark.php --mode=rewalk --repeat=10
+ *   php run-native-ast-walk-benchmark.php --mode=reread --repeat=10
+ *   php run-native-ast-walk-benchmark.php --mode=subtree --repeat=5
  *
  * The script auto-detects the native extension. Without it, the walk
  * exercises the pure-PHP WP_Parser_Node path, which is useful as the
@@ -26,7 +40,17 @@ function ( $severity, $message, $file, $line ) {
 
 require_once __DIR__ . '/../../src/load.php';
 
-$walk_tree = ! in_array( '--no-walk', $argv, true );
+$mode   = 'walk';
+$repeat = 1;
+foreach ( $argv as $arg ) {
+	if ( '--no-walk' === $arg ) {
+		$mode = 'no-walk';
+	} elseif ( 0 === strpos( $arg, '--mode=' ) ) {
+		$mode = substr( $arg, 7 );
+	} elseif ( 0 === strpos( $arg, '--repeat=' ) ) {
+		$repeat = max( 1, (int) substr( $arg, 9 ) );
+	}
+}
 
 $grammar_data = include __DIR__ . '/../../src/mysql/mysql-grammar.php';
 $grammar      = new WP_Parser_Grammar( $grammar_data );
@@ -68,24 +92,70 @@ function ( $severity, $message, $file, $line ) {
 		}
 		++$total;
 
-		if ( ! $walk_tree ) {
-			continue;
-		}
-
-		// Exhaustive descendant walk — exercises both the per-call accessor
-		// path and (when the native extension is loaded) the identity map.
-		$descendants = $ast->get_descendants();
-		$walked     += count( $descendants );
-
-		// Re-read the first child a few times and confirm identity is
-		// stable. With the cache, this must be the same instance every
-		// call; a regression would surface as a cheap, deterministic flag.
-		$first = $ast->get_first_child_node();
-		if ( null !== $first ) {
-			$again = $ast->get_first_child_node();
-			if ( $first !== $again ) {
-				$identity_ok = false;
-			}
+		switch ( $mode ) {
+			case 'no-walk':
+				break;
+
+			case 'walk':
+				$descendants = $ast->get_descendants();
+				$walked     += count( $descendants );
+
+				$first = $ast->get_first_child_node();
+				if ( null !== $first ) {
+					$again = $ast->get_first_child_node();
+					if ( $first !== $again ) {
+						$identity_ok = false;
+					}
+				}
+				break;
+
+			case 'rewalk':
+				// Repeated full-tree walks. After the first pass every wrapper
+				// the cache returns is a hit; without the cache, every pass
+				// re-allocates wrappers for the entire tree from scratch.
+				for ( $r = 0; $r < $repeat; $r++ ) {
+					$descendants = $ast->get_descendants();
+					$walked     += count( $descendants );
+				}
+				break;
+
+			case 'reread':
+				// Repeated top-level child reads. Models analysis passes that
+				// keep poking at the root of the tree.
+				for ( $r = 0; $r < $repeat; $r++ ) {
+					$child = $ast->get_first_child_node();
+					if ( null !== $child ) {
+						++$walked;
+						// Identity must hold across repeated reads.
+						if ( $r > 0 && $child !== $prev ) {
+							$identity_ok = false;
+						}
+						$prev = $child;
+					}
+				}
+				break;
+
+			case 'subtree':
+				// Walk descendants once, then for each descendant re-read its
+				// first child N times. Models translator/rewriter passes that
+				// re-enter previously visited subtrees.
+				$descendants = $ast->get_descendants();
+				foreach ( $descendants as $d ) {
+					if ( ! $d instanceof WP_Parser_Node ) {
+						continue;
+					}
+					for ( $r = 0; $r < $repeat; $r++ ) {
+						$child = $d->get_first_child_node();
+						if ( null !== $child ) {
+							++$walked;
+						}
+					}
+				}
+				break;
+
+			default:
+				fwrite( STDERR, "Unknown mode: $mode\n" );
+				exit( 2 );
 		}
 	} catch ( Throwable $e ) {
 		++$failures;
@@ -97,9 +167,10 @@ function ( $severity, $message, $file, $line ) {
 $native   = class_exists( 'WP_MySQL_Native_Parser', false ) ? 'native' : 'php';
 
 printf(
-	"path=%s walk=%s parsed=%d walked_nodes=%d failures=%d duration=%.4fs qps=%d peak_mem=%.1fMB identity_ok=%s\n",
+	"path=%s mode=%s repeat=%d parsed=%d walked_nodes=%d failures=%d duration=%.4fs qps=%d peak_mem=%.1fMB identity_ok=%s\n",
 	$native,
-	$walk_tree ? 'yes' : 'no',
+	$mode,
+	$repeat,
 	$total,
 	$walked,
 	$failures,
@@ -110,6 +181,6 @@ function ( $severity, $message, $file, $line ) {
 );
 
 if ( ! $identity_ok ) {
-	fwrite( STDERR, "Identity check failed: get_first_child_node() returned different instances.\n" );
+	fwrite( STDERR, "Identity check failed: accessor returned different instances on repeat read.\n" );
 	exit( 1 );
 }