Skip to content

Commit b0646aa

Browse files
committed
Append end-of-input sentinel token to drop range checks
Add a sentinel WP_Parser_Token with id EMPTY_RULE_ID (0) to the end of the token array. Real MySQL tokens never have id 0 (WHITESPACE, the only token with id 0, is stripped by the lexer before tokens reach the parser), so the sentinel cannot match any real terminal. This lets the hot path drop the 'position < token_count' range check everywhere it reads the current token id: the selector lookup at method entry, the inline terminal match inside the branch loop, and the post-branch INTO negative lookahead for selectStatement. Any read past the last real token falls naturally into the nullable-fallback or branch-miss handling. Also drop a few dead locals ($token_count, $fragment_ids) that no longer appear in the hot path after the change. End-to-end parser benchmark: Before: ~28,700 QPS (avg) After: ~29,800 QPS (+4%).
1 parent 1dba087 commit b0646aa

1 file changed

Lines changed: 15 additions & 12 deletions

File tree

packages/mysql-on-sqlite/src/parser/class-wp-parser.php

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,14 @@ class WP_Parser {
2525

2626
public function __construct( WP_Parser_Grammar $grammar, array $tokens ) {
2727
$this->grammar = $grammar;
28-
$this->tokens = $tokens;
2928
$this->token_count = count( $tokens );
29+
// Append an end-of-input sentinel token whose id is EMPTY_RULE_ID
30+
// (0). The hot path can then read $tokens[$pos]->id unconditionally
31+
// when $pos is the current cursor, because the sentinel naturally
32+
// fails to match any real grammar terminal while feeding the
33+
// nullable-fallback branch of the selector check.
34+
$tokens[] = new WP_Parser_Token( WP_Parser_Grammar::EMPTY_RULE_ID, 0, 0, '' );
35+
$this->tokens = $tokens;
3036
$this->position = 0;
3137
$this->rules = $grammar->rules;
3238
$this->rule_names = $grammar->rule_names;
@@ -51,15 +57,14 @@ public function parse() {
5157
* round trip per consumed token.
5258
*/
5359
private function parse_recursive( $rule_id ) {
54-
$tokens = $this->tokens;
55-
$token_count = $this->token_count;
56-
$position = $this->position;
60+
$tokens = $this->tokens;
61+
$position = $this->position;
5762

5863
// Narrow the set of branches worth trying using the precomputed FIRST
5964
// sets. When no entry exists for the current token but the rule is
6065
// nullable, all candidate branches would match empty, so we return
6166
// immediately without entering any branch.
62-
$tid = $position < $token_count ? $tokens[ $position ]->id : WP_Parser_Grammar::EMPTY_RULE_ID;
67+
$tid = $tokens[ $position ]->id;
6368
if ( isset( $this->branches_for_token[ $rule_id ][ $tid ] ) ) {
6469
$candidate_branches = $this->branches_for_token[ $rule_id ][ $tid ];
6570
} elseif ( isset( $this->nullable_branches[ $rule_id ] ) ) {
@@ -70,9 +75,8 @@ private function parse_recursive( $rule_id ) {
7075

7176
$highest_terminal_id = $this->highest_terminal_id;
7277
$branches = $this->rules[ $rule_id ];
73-
$fragment_ids = $this->fragment_ids;
7478
$rule_name = $this->rule_names[ $rule_id ];
75-
$is_fragment = isset( $fragment_ids[ $rule_id ] );
79+
$is_fragment = isset( $this->fragment_ids[ $rule_id ] );
7680
$is_select_statement = 'selectStatement' === $rule_name;
7781
$branch_matches = false;
7882
$children = array();
@@ -83,10 +87,10 @@ private function parse_recursive( $rule_id ) {
8387
$branch_matches = true;
8488
foreach ( $branch as $subrule_id ) {
8589
if ( $subrule_id <= $highest_terminal_id ) {
86-
if (
87-
$this->position < $token_count
88-
&& $tokens[ $this->position ]->id === $subrule_id
89-
) {
90+
// The sentinel at $tokens[$token_count] has id 0 so it
91+
// cannot match any real terminal, making the range check
92+
// unnecessary here.
93+
if ( $tokens[ $this->position ]->id === $subrule_id ) {
9094
$children[] = $tokens[ $this->position ];
9195
++$this->position;
9296
continue;
@@ -125,7 +129,6 @@ private function parse_recursive( $rule_id ) {
125129
if (
126130
$branch_matches
127131
&& $is_select_statement
128-
&& $this->position < $token_count
129132
&& WP_MySQL_Lexer::INTO_SYMBOL === $tokens[ $this->position ]->id
130133
) {
131134
$branch_matches = false;

0 commit comments

Comments
 (0)