Skip to content

Commit 2a66d00

Browse files
committed
Add direct-return fast path for single-candidate rules
On the MySQL grammar, 1,290 of 1,916 rules have a selector where every (rule, token) entry points to exactly one branch. Those rules account for ~55% of parse_recursive calls on the test corpus (722k of 1.3M per 10k queries). Flag those rules at grammar build time. In parse_recursive, detect the flag and take the only candidate branch directly, skipping the candidate-iteration loop. On match failure, restore $position and return false directly instead of going through the multi-candidate branch_matches/break sequence. End-to-end parser benchmark: no JIT: ~31.6K -> ~32.6K QPS avg (+3%) tracing JIT: ~52.6K -> ~55.7K QPS avg (+6%)
1 parent ffda59d commit 2a66d00

2 files changed

Lines changed: 79 additions & 11 deletions

File tree

packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,15 @@ class WP_Parser_Grammar {
5353
*/
5454
public $nullable_branches = array();
5555

56+
/**
57+
* Per-rule flag indicating every (rule, token) selector entry points
58+
* to exactly one branch. The parser uses this to skip the outer
59+
* foreach when a single candidate is the only possibility.
60+
*
61+
* @var array<int,true>
62+
*/
63+
public $single_candidate_rules = array();
64+
5665
public $lowest_non_terminal_id;
5766
public $highest_terminal_id;
5867

@@ -365,8 +374,12 @@ private function build_branch_selectors() {
365374
// the per-attempt $rules[$rule_id][$idx] indirection in the
366375
// parser hot loop. The dedup itself is what keeps the cost at
367376
// ~+16 MiB; without it the embedded table would be ~40 MB.
368-
$by_signature = array();
377+
$by_signature = array();
378+
$all_single_candidates = true;
369379
foreach ( $selector as $tid => $idx_list ) {
380+
if ( 1 !== count( $idx_list ) ) {
381+
$all_single_candidates = false;
382+
}
370383
$sig = implode( ',', $idx_list );
371384
if ( isset( $by_signature[ $sig ] ) ) {
372385
$selector[ $tid ] = $by_signature[ $sig ];
@@ -380,6 +393,9 @@ private function build_branch_selectors() {
380393
}
381394
}
382395
$this->branches_for_token[ $rule_id ] = $selector;
396+
if ( $all_single_candidates ) {
397+
$this->single_candidate_rules[ $rule_id ] = true;
398+
}
383399
}
384400
}
385401
}

packages/mysql-on-sqlite/src/parser/class-wp-parser.php

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class WP_Parser {
2222
private $nullable_branches;
2323
private $highest_terminal_id;
2424
private $select_statement_rule_id;
25+
private $single_candidate_rules;
2526

2627
public function __construct( WP_Parser_Grammar $grammar, array $tokens ) {
2728
$this->grammar = $grammar;
@@ -45,14 +46,15 @@ public function __construct( WP_Parser_Grammar $grammar, array $tokens ) {
4546
// - WP_MySQL_Parser::next_query() bounds at $position < $token_count
4647
// (set above, before the append), so the sentinel sits at index
4748
// $token_count and is never fed into a parse round.
48-
$tokens[] = new WP_Parser_Token( WP_Parser_Grammar::EMPTY_RULE_ID, 0, 0, '' );
49-
$this->tokens = $tokens;
50-
$this->position = 0;
51-
$this->rule_names = $grammar->rule_names;
52-
$this->fragment_ids = $grammar->fragment_ids;
53-
$this->branches_for_token = $grammar->branches_for_token;
54-
$this->nullable_branches = $grammar->nullable_branches;
55-
$this->highest_terminal_id = $grammar->highest_terminal_id;
49+
$tokens[] = new WP_Parser_Token( WP_Parser_Grammar::EMPTY_RULE_ID, 0, 0, '' );
50+
$this->tokens = $tokens;
51+
$this->position = 0;
52+
$this->rule_names = $grammar->rule_names;
53+
$this->fragment_ids = $grammar->fragment_ids;
54+
$this->branches_for_token = $grammar->branches_for_token;
55+
$this->nullable_branches = $grammar->nullable_branches;
56+
$this->highest_terminal_id = $grammar->highest_terminal_id;
57+
$this->single_candidate_rules = $grammar->single_candidate_rules;
5658

5759
// The INTO negative-lookahead only fires for selectStatement. Cache
5860
// the rule id so the per-call check is an int compare instead of a
@@ -93,8 +95,58 @@ private function parse_recursive( $rule_id ) {
9395
$highest_terminal_id = $this->highest_terminal_id;
9496
$is_fragment = isset( $this->fragment_ids[ $rule_id ] );
9597
$is_select_statement = $rule_id === $this->select_statement_rule_id;
96-
$branch_matches = false;
97-
$children = array();
98+
99+
// Fast path for rules where every (rule, token) selector entry
100+
// points to exactly one branch - about 55% of nonterminal calls
101+
// on the MySQL corpus. Skip the outer foreach and the
102+
// $branch_matches bookkeeping; every failure path just rewinds
103+
// the position and returns false directly.
104+
if ( isset( $this->single_candidate_rules[ $rule_id ] ) ) {
105+
$branch = $candidate_branches[0];
106+
$children = array();
107+
foreach ( $branch as $subrule_id ) {
108+
if ( $subrule_id <= $highest_terminal_id ) {
109+
if ( $tokens[ $this->position ]->id === $subrule_id ) {
110+
$children[] = $tokens[ $this->position ];
111+
++$this->position;
112+
continue;
113+
}
114+
$this->position = $position;
115+
return false;
116+
}
117+
118+
$subnode = $this->parse_recursive( $subrule_id );
119+
if ( false === $subnode ) {
120+
$this->position = $position;
121+
return false;
122+
}
123+
if ( true === $subnode ) {
124+
continue;
125+
}
126+
if ( is_array( $subnode ) ) {
127+
foreach ( $subnode as $c ) {
128+
$children[] = $c;
129+
}
130+
} else {
131+
$children[] = $subnode;
132+
}
133+
}
134+
135+
if ( $is_select_statement && WP_MySQL_Lexer::INTO_SYMBOL === $tokens[ $this->position ]->id ) {
136+
$this->position = $position;
137+
return false;
138+
}
139+
if ( ! $children ) {
140+
return true;
141+
}
142+
if ( $is_fragment ) {
143+
return $children;
144+
}
145+
return new WP_Parser_Node( $rule_id, $this->rule_names[ $rule_id ], $children );
146+
}
147+
148+
$branch_matches = false;
149+
$children = array();
98150
foreach ( $candidate_branches as $branch ) {
99151
$this->position = $position;
100152
$children = array();

0 commit comments

Comments
 (0)