Skip to content

Commit 04786fb

Browse files
committed
Append end-of-input sentinel token to drop range checks
Add a sentinel WP_Parser_Token with id EMPTY_RULE_ID (0) to the end of the token array. Real MySQL tokens never have id 0 (WHITESPACE, the only token with id 0, is stripped by the lexer before tokens reach the parser), so the sentinel cannot match any real terminal. This lets the hot path drop the 'position < token_count' range check everywhere it reads the current token id: the selector lookup at method entry, the inline terminal match inside the branch loop, and the post-branch INTO negative lookahead for selectStatement. Any read past the last real token falls naturally into the nullable-fallback or branch-miss handling. Also drop a few dead locals ($token_count, $fragment_ids) that no longer appear in the hot path after the change. End-to-end parser benchmark: Before: ~28,700 QPS (avg) After: ~29,800 QPS (+4%).
1 parent 7da1107 commit 04786fb

1 file changed

Lines changed: 29 additions & 12 deletions

File tree

packages/mysql-on-sqlite/src/parser/class-wp-parser.php

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,28 @@ class WP_Parser {
2525

2626
public function __construct( WP_Parser_Grammar $grammar, array $tokens ) {
2727
$this->grammar = $grammar;
28-
$this->tokens = $tokens;
2928
$this->token_count = count( $tokens );
29+
// Append an end-of-input sentinel token whose id is EMPTY_RULE_ID
30+
// (0). The hot path can then read $tokens[$pos]->id unconditionally
31+
// when $pos is the current cursor, because the sentinel naturally
32+
// fails to match any real grammar terminal while feeding the
33+
// nullable-fallback branch of the selector check.
34+
//
35+
// Invariants the hot path relies on:
36+
// - The sentinel id (0) cannot match any grammar terminal.
37+
// strip_epsilon_markers() removes id 0 from every branch at
38+
// grammar build time, so no $subrule_id in the inner loop ever
39+
// equals 0 and ++$this->position can never advance past the
40+
// sentinel.
41+
// - The sentinel must never be appended to a node's children. It
42+
// is only inspected via $tokens[$pos]->id; tokens are pushed
43+
// into $children only on terminal-id equality, which the
44+
// sentinel cannot satisfy.
45+
// - WP_MySQL_Parser::next_query() bounds at $position < $token_count
46+
// (set above, before the append), so the sentinel sits at index
47+
// $token_count and is never fed into a parse round.
48+
$tokens[] = new WP_Parser_Token( WP_Parser_Grammar::EMPTY_RULE_ID, 0, 0, '' );
49+
$this->tokens = $tokens;
3050
$this->position = 0;
3151
$this->rules = $grammar->rules;
3252
$this->rule_names = $grammar->rule_names;
@@ -51,15 +71,14 @@ public function parse() {
5171
* round trip per consumed token.
5272
*/
5373
private function parse_recursive( $rule_id ) {
54-
$tokens = $this->tokens;
55-
$token_count = $this->token_count;
56-
$position = $this->position;
74+
$tokens = $this->tokens;
75+
$position = $this->position;
5776

5877
// Narrow the set of branches worth trying using the precomputed FIRST
5978
// sets. When no entry exists for the current token but the rule is
6079
// nullable, all candidate branches would match empty, so we return
6180
// immediately without entering any branch.
62-
$tid = $position < $token_count ? $tokens[ $position ]->id : WP_Parser_Grammar::EMPTY_RULE_ID;
81+
$tid = $tokens[ $position ]->id;
6382
if ( isset( $this->branches_for_token[ $rule_id ][ $tid ] ) ) {
6483
$candidate_branches = $this->branches_for_token[ $rule_id ][ $tid ];
6584
} elseif ( isset( $this->nullable_branches[ $rule_id ] ) ) {
@@ -70,9 +89,8 @@ private function parse_recursive( $rule_id ) {
7089

7190
$highest_terminal_id = $this->highest_terminal_id;
7291
$branches = $this->rules[ $rule_id ];
73-
$fragment_ids = $this->fragment_ids;
7492
$rule_name = $this->rule_names[ $rule_id ];
75-
$is_fragment = isset( $fragment_ids[ $rule_id ] );
93+
$is_fragment = isset( $this->fragment_ids[ $rule_id ] );
7694
$is_select_statement = 'selectStatement' === $rule_name;
7795
$branch_matches = false;
7896
$children = array();
@@ -83,10 +101,10 @@ private function parse_recursive( $rule_id ) {
83101
$branch_matches = true;
84102
foreach ( $branch as $subrule_id ) {
85103
if ( $subrule_id <= $highest_terminal_id ) {
86-
if (
87-
$this->position < $token_count
88-
&& $tokens[ $this->position ]->id === $subrule_id
89-
) {
104+
// The sentinel at $tokens[$token_count] has id 0 so it
105+
// cannot match any real terminal, making the range check
106+
// unnecessary here.
107+
if ( $tokens[ $this->position ]->id === $subrule_id ) {
90108
$children[] = $tokens[ $this->position ];
91109
++$this->position;
92110
continue;
@@ -125,7 +143,6 @@ private function parse_recursive( $rule_id ) {
125143
if (
126144
$branch_matches
127145
&& $is_select_statement
128-
&& $this->position < $token_count
129146
&& WP_MySQL_Lexer::INTO_SYMBOL === $tokens[ $this->position ]->id
130147
) {
131148
$branch_matches = false;

0 commit comments

Comments
 (0)