Skip to content

Commit f726c2e

Browse files
committed
Embed branch symbol sequences directly in the per-token selector
Previously the per-(rule, token) selector stored a list of branch indexes that the parser then had to look up in $rules[$rule_id] on every branch attempt. Store the branch symbol sequences themselves so the hot loop can iterate candidate branches directly. PHP arrays are copy-on-write, so sharing the same branch sequence across selector entries for many tokens costs negligible extra memory. The nullable_branches map shrinks to a bool marker since the parser only uses it for existence checks. Also cache the start rule id on the grammar so parse() skips its array_search() across rule_names on every call. End-to-end parser benchmark: Before: ~29,800 QPS (avg) After: ~31,700 QPS (+6%).
1 parent b0646aa commit f726c2e

2 files changed

Lines changed: 28 additions & 7 deletions

File tree

packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ class WP_Parser_Grammar {
5858
public $lowest_non_terminal_id;
5959
public $highest_terminal_id;
6060

61+
/**
62+
* Cached id of the grammar's start rule, populated lazily on first parse.
63+
*
64+
* @var int|null
65+
*/
66+
public $start_rule_id;
67+
6168
public function __construct( array $rules ) {
6269
$this->inflate( $rules );
6370
}
@@ -319,10 +326,20 @@ private function build_branch_selectors() {
319326
foreach ( $selector as $tid => $idx_list ) {
320327
$merged[ $tid ] = self::merge_sorted( $idx_list, $nullable_branch_ids );
321328
}
322-
$selector = $merged;
323-
$this->nullable_branches[ $rule_id ] = $nullable_branch_ids;
329+
$selector = $merged;
330+
$this->nullable_branches[ $rule_id ] = true;
324331
}
325332
if ( $selector ) {
333+
// Store the candidate branch sequences directly so the parser
334+
// can foreach over them without an extra $branches[$idx]
335+
// indirection on every branch attempt.
336+
foreach ( $selector as $tid => $idx_list ) {
337+
$seqs = array();
338+
foreach ( $idx_list as $idx ) {
339+
$seqs[] = $branches[ $idx ];
340+
}
341+
$selector[ $tid ] = $seqs;
342+
}
326343
$this->branches_for_token[ $rule_id ] = $selector;
327344
}
328345
}

packages/mysql-on-sqlite/src/parser/class-wp-parser.php

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,14 @@ public function __construct( WP_Parser_Grammar $grammar, array $tokens ) {
4444

4545
public function parse() {
4646
// @TODO: Make the starting rule lookup non-grammar-specific.
47-
$query_rule_id = $this->grammar->get_rule_id( 'query' );
48-
$ast = $this->parse_recursive( $query_rule_id );
47+
// Cache the query rule id on the grammar - get_rule_id() does a
48+
// linear array_search over all rule names which, on the MySQL
49+
// grammar, costs a few microseconds per lookup.
50+
$grammar = $this->grammar;
51+
if ( null === $grammar->start_rule_id ) {
52+
$grammar->start_rule_id = $grammar->get_rule_id( 'query' );
53+
}
54+
$ast = $this->parse_recursive( $grammar->start_rule_id );
4955
return false === $ast ? null : $ast;
5056
}
5157

@@ -74,14 +80,12 @@ private function parse_recursive( $rule_id ) {
7480
}
7581

7682
$highest_terminal_id = $this->highest_terminal_id;
77-
$branches = $this->rules[ $rule_id ];
7883
$rule_name = $this->rule_names[ $rule_id ];
7984
$is_fragment = isset( $this->fragment_ids[ $rule_id ] );
8085
$is_select_statement = 'selectStatement' === $rule_name;
8186
$branch_matches = false;
8287
$children = array();
83-
foreach ( $candidate_branches as $idx ) {
84-
$branch = $branches[ $idx ];
88+
foreach ( $candidate_branches as $branch ) {
8589
$this->position = $position;
8690
$children = array();
8791
$branch_matches = true;

0 commit comments

Comments
 (0)