Skip to content

Commit ce75275

Browse files
committed
Add direct-return fast path for single-candidate rules
On the MySQL grammar, 1,290 of 1,916 rules have a selector where every (rule, token) entry points to exactly one branch. Those rules account for ~55% of parse_recursive calls on the test corpus (722k of 1.3M per 10k queries). Flag those rules at grammar build time. In parse_recursive, detect the flag and take the only candidate branch directly, skipping the candidate-iteration loop. On match failure, restore $position and return false directly instead of going through the multi-candidate branch_matches/break sequence. End-to-end parser benchmark: no JIT: ~31.6K -> ~32.6K QPS avg (+3%) tracing JIT: ~52.6K -> ~55.7K QPS avg (+6%)
1 parent 5f6bb6a commit ce75275

2 files changed

Lines changed: 82 additions & 11 deletions

File tree

packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ class WP_Parser_Grammar {
5555
*/
5656
public $nullable_branches = array();
5757

58+
/**
59+
* Per-rule flag indicating every (rule, token) selector entry points
60+
* to exactly one branch. The parser uses this to skip the outer
61+
* foreach when a single candidate is the only possibility.
62+
*
63+
* @var array<int,true>
64+
*/
65+
public $single_candidate_rules = array();
66+
5867
public $lowest_non_terminal_id;
5968
public $highest_terminal_id;
6069

@@ -345,8 +354,12 @@ private function build_branch_selectors() {
345354
// copy-on-write share one sequences array across all of
346355
// them. Without this the nested table would be ~40 MB; with
347356
// it, ~1 MB.
348-
$by_signature = array();
357+
$by_signature = array();
358+
$all_single_candidates = true;
349359
foreach ( $selector as $tid => $idx_list ) {
360+
if ( 1 !== count( $idx_list ) ) {
361+
$all_single_candidates = false;
362+
}
350363
$sig = implode( ',', $idx_list );
351364
if ( isset( $by_signature[ $sig ] ) ) {
352365
$selector[ $tid ] = $by_signature[ $sig ];
@@ -360,6 +373,9 @@ private function build_branch_selectors() {
360373
}
361374
}
362375
$this->branches_for_token[ $rule_id ] = $selector;
376+
if ( $all_single_candidates ) {
377+
$this->single_candidate_rules[ $rule_id ] = true;
378+
}
363379
}
364380
}
365381
}

packages/mysql-on-sqlite/src/parser/class-wp-parser.php

Lines changed: 65 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class WP_Parser {
2222
private $nullable_branches;
2323
private $highest_terminal_id;
2424
private $select_statement_rule_id;
25+
private $single_candidate_rules;
2526

2627
public function __construct( WP_Parser_Grammar $grammar, array $tokens ) {
2728
$this->grammar = $grammar;
@@ -31,14 +32,15 @@ public function __construct( WP_Parser_Grammar $grammar, array $tokens ) {
3132
// when $pos is the current cursor, because the sentinel naturally
3233
// fails to match any real grammar terminal while feeding the
3334
// nullable-fallback branch of the selector check.
34-
$tokens[] = new WP_Parser_Token( WP_Parser_Grammar::EMPTY_RULE_ID, 0, 0, '' );
35-
$this->tokens = $tokens;
36-
$this->position = 0;
37-
$this->rule_names = $grammar->rule_names;
38-
$this->fragment_ids = $grammar->fragment_ids ?? array();
39-
$this->branches_for_token = $grammar->branches_for_token;
40-
$this->nullable_branches = $grammar->nullable_branches;
41-
$this->highest_terminal_id = $grammar->highest_terminal_id;
35+
$tokens[] = new WP_Parser_Token( WP_Parser_Grammar::EMPTY_RULE_ID, 0, 0, '' );
36+
$this->tokens = $tokens;
37+
$this->position = 0;
38+
$this->rule_names = $grammar->rule_names;
39+
$this->fragment_ids = $grammar->fragment_ids ?? array();
40+
$this->branches_for_token = $grammar->branches_for_token;
41+
$this->nullable_branches = $grammar->nullable_branches;
42+
$this->highest_terminal_id = $grammar->highest_terminal_id;
43+
$this->single_candidate_rules = $grammar->single_candidate_rules ?? array();
4244

4345
// The INTO negative-lookahead only fires for selectStatement. Cache
4446
// the rule id so the per-call check is an int compare instead of a
@@ -89,8 +91,61 @@ private function parse_recursive( $rule_id ) {
8991
$highest_terminal_id = $this->highest_terminal_id;
9092
$is_fragment = isset( $this->fragment_ids[ $rule_id ] );
9193
$is_select_statement = $rule_id === $this->select_statement_rule_id;
92-
$branch_matches = false;
93-
$children = array();
94+
95+
// Fast path for rules where every (rule, token) selector entry
96+
// points to exactly one branch - about 55% of nonterminal calls
97+
// on the MySQL corpus. Skipping the outer foreach avoids the
98+
// foreach iterator setup for those calls.
99+
if ( isset( $this->single_candidate_rules[ $rule_id ] ) ) {
100+
// Single-candidate fast path: the rule has exactly one branch
101+
// to try for this token, so skip the outer foreach and the
102+
// $branch_matches bookkeeping - every failure path just
103+
// rewinds the position and returns false directly.
104+
$branch = $candidate_branches[0];
105+
$children = array();
106+
foreach ( $branch as $subrule_id ) {
107+
if ( $subrule_id <= $highest_terminal_id ) {
108+
if ( $tokens[ $this->position ]->id === $subrule_id ) {
109+
$children[] = $tokens[ $this->position ];
110+
++$this->position;
111+
continue;
112+
}
113+
$this->position = $position;
114+
return false;
115+
}
116+
117+
$subnode = $this->parse_recursive( $subrule_id );
118+
if ( false === $subnode ) {
119+
$this->position = $position;
120+
return false;
121+
}
122+
if ( true === $subnode ) {
123+
continue;
124+
}
125+
if ( is_array( $subnode ) ) {
126+
foreach ( $subnode as $c ) {
127+
$children[] = $c;
128+
}
129+
} else {
130+
$children[] = $subnode;
131+
}
132+
}
133+
134+
if ( $is_select_statement && WP_MySQL_Lexer::INTO_SYMBOL === $tokens[ $this->position ]->id ) {
135+
$this->position = $position;
136+
return false;
137+
}
138+
if ( ! $children ) {
139+
return true;
140+
}
141+
if ( $is_fragment ) {
142+
return $children;
143+
}
144+
return new WP_Parser_Node( $rule_id, $this->rule_names[ $rule_id ], $children );
145+
}
146+
147+
$branch_matches = false;
148+
$children = array();
94149
foreach ( $candidate_branches as $branch ) {
95150
$this->position = $position;
96151
$children = array();

0 commit comments

Comments
 (0)