@@ -22,6 +22,7 @@ class WP_Parser {
2222 private $ nullable_branches ;
2323 private $ highest_terminal_id ;
2424 private $ select_statement_rule_id ;
25+ private $ single_candidate_rules ;
2526
2627 public function __construct ( WP_Parser_Grammar $ grammar , array $ tokens ) {
2728 $ this ->grammar = $ grammar ;
@@ -31,14 +32,15 @@ public function __construct( WP_Parser_Grammar $grammar, array $tokens ) {
3132 // when $pos is the current cursor, because the sentinel naturally
3233 // fails to match any real grammar terminal while feeding the
3334 // nullable-fallback branch of the selector check.
34- $ tokens [] = new WP_Parser_Token ( WP_Parser_Grammar::EMPTY_RULE_ID , 0 , 0 , '' );
35- $ this ->tokens = $ tokens ;
36- $ this ->position = 0 ;
37- $ this ->rule_names = $ grammar ->rule_names ;
38- $ this ->fragment_ids = $ grammar ->fragment_ids ?? array ();
39- $ this ->branches_for_token = $ grammar ->branches_for_token ;
40- $ this ->nullable_branches = $ grammar ->nullable_branches ;
41- $ this ->highest_terminal_id = $ grammar ->highest_terminal_id ;
35+ $ tokens [] = new WP_Parser_Token ( WP_Parser_Grammar::EMPTY_RULE_ID , 0 , 0 , '' );
36+ $ this ->tokens = $ tokens ;
37+ $ this ->position = 0 ;
38+ $ this ->rule_names = $ grammar ->rule_names ;
39+ $ this ->fragment_ids = $ grammar ->fragment_ids ?? array ();
40+ $ this ->branches_for_token = $ grammar ->branches_for_token ;
41+ $ this ->nullable_branches = $ grammar ->nullable_branches ;
42+ $ this ->highest_terminal_id = $ grammar ->highest_terminal_id ;
43+ $ this ->single_candidate_rules = $ grammar ->single_candidate_rules ?? array ();
4244
4345 // The INTO negative-lookahead only fires for selectStatement. Cache
4446 // the rule id so the per-call check is an int compare instead of a
@@ -89,8 +91,61 @@ private function parse_recursive( $rule_id ) {
8991 $ highest_terminal_id = $ this ->highest_terminal_id ;
9092 $ is_fragment = isset ( $ this ->fragment_ids [ $ rule_id ] );
9193 $ is_select_statement = $ rule_id === $ this ->select_statement_rule_id ;
92- $ branch_matches = false ;
93- $ children = array ();
94+
95+ // Fast path for rules where every (rule, token) selector entry
96+ // points to exactly one branch - about 55% of nonterminal calls
97+ // on the MySQL corpus. Skipping the outer foreach avoids the
98+ // foreach iterator setup for those calls.
99+ if ( isset ( $ this ->single_candidate_rules [ $ rule_id ] ) ) {
100+ // Single-candidate fast path: the rule has exactly one branch
101+ // to try for this token, so skip the outer foreach and the
102+ // $branch_matches bookkeeping - every failure path just
103+ // rewinds the position and returns false directly.
104+ $ branch = $ candidate_branches [0 ];
105+ $ children = array ();
106+ foreach ( $ branch as $ subrule_id ) {
107+ if ( $ subrule_id <= $ highest_terminal_id ) {
108+ if ( $ tokens [ $ this ->position ]->id === $ subrule_id ) {
109+ $ children [] = $ tokens [ $ this ->position ];
110+ ++$ this ->position ;
111+ continue ;
112+ }
113+ $ this ->position = $ position ;
114+ return false ;
115+ }
116+
117+ $ subnode = $ this ->parse_recursive ( $ subrule_id );
118+ if ( false === $ subnode ) {
119+ $ this ->position = $ position ;
120+ return false ;
121+ }
122+ if ( true === $ subnode ) {
123+ continue ;
124+ }
125+ if ( is_array ( $ subnode ) ) {
126+ foreach ( $ subnode as $ c ) {
127+ $ children [] = $ c ;
128+ }
129+ } else {
130+ $ children [] = $ subnode ;
131+ }
132+ }
133+
134+ if ( $ is_select_statement && WP_MySQL_Lexer::INTO_SYMBOL === $ tokens [ $ this ->position ]->id ) {
135+ $ this ->position = $ position ;
136+ return false ;
137+ }
138+ if ( ! $ children ) {
139+ return true ;
140+ }
141+ if ( $ is_fragment ) {
142+ return $ children ;
143+ }
144+ return new WP_Parser_Node ( $ rule_id , $ this ->rule_names [ $ rule_id ], $ children );
145+ }
146+
147+ $ branch_matches = false ;
148+ $ children = array ();
94149 foreach ( $ candidate_branches as $ branch ) {
95150 $ this ->position = $ position ;
96151 $ children = array ();
0 commit comments