Skip to content

Commit 0fc8ff8

Browse files
committed
Refine parser dispatch tables
1 parent 9c2bd24 commit 0fc8ff8

2 files changed

Lines changed: 77 additions & 19 deletions

File tree

packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ class WP_Parser_Grammar {
2929
public $rules;
3030
public $rule_names;
3131
public $fragment_ids;
32-
public $lookahead_is_match_possible = array();
3332
public $branch_candidates = array();
3433
public $lowest_non_terminal_id;
3534
public $highest_terminal_id;
3635
private $rule_ids_by_name = array();
36+
private static $lookahead_table_cache = array();
3737

3838
public function __construct( array $rules ) {
3939
$this->inflate( $rules );
@@ -88,13 +88,41 @@ private function inflate( $grammar ) {
8888
$this->rules[ $rule_id ] = $branches;
8989
}
9090

91-
$this->compute_lookahead_tables();
91+
$lookahead_cache_key = $this->get_lookahead_cache_key( $grammar );
92+
if ( isset( self::$lookahead_table_cache[ $lookahead_cache_key ] ) ) {
93+
$this->branch_candidates = self::$lookahead_table_cache[ $lookahead_cache_key ];
94+
} else {
95+
$this->branch_candidates = $this->compute_branch_candidates();
96+
self::$lookahead_table_cache[ $lookahead_cache_key ] = $this->branch_candidates;
97+
}
9298
}
9399

94100
/**
95-
* Compute FIRST-set lookahead tables for rules and individual branches.
101+
* Get a stable cache key for the compressed grammar.
96102
*/
97-
private function compute_lookahead_tables(): void {
103+
private function get_lookahead_cache_key( array $grammar ): string {
104+
$hash = hash_init( 'md5' );
105+
106+
hash_update( $hash, (string) $grammar['rules_offset'] );
107+
108+
foreach ( $grammar['rules_names'] as $rule_name ) {
109+
hash_update( $hash, "\0n" . $rule_name );
110+
}
111+
112+
foreach ( $grammar['grammar'] as $branches ) {
113+
hash_update( $hash, "\0r" );
114+
foreach ( $branches as $branch ) {
115+
hash_update( $hash, "\0b" . implode( ',', $branch ) );
116+
}
117+
}
118+
119+
return hash_final( $hash );
120+
}
121+
122+
/**
123+
* Compute FIRST-set branch candidates.
124+
*/
125+
private function compute_branch_candidates(): array {
98126
$first_sets = array();
99127
foreach ( $this->rules as $rule_id => $_branches ) {
100128
$first_sets[ $rule_id ] = array();
@@ -115,32 +143,36 @@ private function compute_lookahead_tables(): void {
115143
}
116144
} while ( $changed );
117145

118-
$this->lookahead_is_match_possible = $first_sets;
119-
146+
$branch_candidates = array();
120147
foreach ( $this->rules as $rule_id => $branches ) {
121-
$this->branch_candidates[ $rule_id ] = array();
148+
$branch_candidates[ $rule_id ] = array();
122149
foreach ( $branches as $branch_index => $branch ) {
123150
$branch_first = $this->get_branch_first_set(
124151
$branch,
125152
$first_sets
126153
);
127154
foreach ( $branch_first as $token_id => $_ ) {
128-
$this->branch_candidates[ $rule_id ][ $token_id ][] = $branch_index;
155+
$branch_candidates[ $rule_id ][ $token_id ][] = $branch_index;
129156
}
130157
}
131-
if ( isset( $this->branch_candidates[ $rule_id ][ self::EMPTY_RULE_ID ] ) ) {
132-
$empty_branches = $this->branch_candidates[ $rule_id ][ self::EMPTY_RULE_ID ];
133-
foreach ( $this->branch_candidates[ $rule_id ] as $token_id => $branch_indexes ) {
158+
if ( isset( $branch_candidates[ $rule_id ][ self::EMPTY_RULE_ID ] ) ) {
159+
$empty_branches = $branch_candidates[ $rule_id ][ self::EMPTY_RULE_ID ];
160+
foreach ( $branch_candidates[ $rule_id ] as $token_id => $branch_indexes ) {
134161
if ( self::EMPTY_RULE_ID === $token_id ) {
135162
continue;
136163
}
137-
$this->branch_candidates[ $rule_id ][ $token_id ] = $this->merge_branch_indexes(
164+
$branch_candidates[ $rule_id ][ $token_id ] = $this->merge_branch_indexes(
138165
$branch_indexes,
139166
$empty_branches
140167
);
141168
}
142169
}
170+
foreach ( $branch_candidates[ $rule_id ] as $token_id => $branch_indexes ) {
171+
$branch_candidates[ $rule_id ][ $token_id ] = $this->compact_branch_indexes( $branch_indexes );
172+
}
143173
}
174+
175+
return $branch_candidates;
144176
}
145177

146178
/**
@@ -187,6 +219,19 @@ private function get_branch_first_set( array $branch, array $first_sets ): array
187219
return $branch_first;
188220
}
189221

222+
/**
223+
* Compact branch indexes when there is only one candidate.
224+
*
225+
* @param int[] $branch_indexes Branch indexes to pack.
226+
* @return int|int[] Branch index, or branch indexes when there are multiple candidates.
227+
*/
228+
private function compact_branch_indexes( array $branch_indexes ) {
229+
if ( 1 === count( $branch_indexes ) ) {
230+
return $branch_indexes[0];
231+
}
232+
return $branch_indexes;
233+
}
234+
190235
/**
191236
* Merge two branch-index lists while preserving grammar order.
192237
*

packages/mysql-on-sqlite/src/parser/class-wp-parser.php

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class WP_Parser {
1212
protected $grammar;
1313
protected $tokens;
1414
protected $position;
15+
private $token_ids;
1516
private $rules;
1617
private $rule_names;
1718
private $fragment_ids;
@@ -35,6 +36,10 @@ public function parse() {
3536
$this->highest_terminal_id = $this->grammar->highest_terminal_id;
3637
$this->select_statement_rule_id = $this->grammar->get_rule_id( 'selectStatement' );
3738
$this->token_count = count( $this->tokens );
39+
$this->token_ids = array();
40+
foreach ( $this->tokens as $token ) {
41+
$this->token_ids[] = $token->id;
42+
}
3843
$this->failed_matches = array();
3944

4045
// @TODO: Make the starting rule lookup non-grammar-specific.
@@ -54,7 +59,7 @@ private function parse_recursive( $rule_id ) {
5459
return true;
5560
}
5661

57-
if ( $this->tokens[ $this->position ]->id === $rule_id ) {
62+
if ( $this->token_ids[ $this->position ] === $rule_id ) {
5863
return $this->tokens[ $this->position++ ];
5964
}
6065
return false;
@@ -68,22 +73,28 @@ private function parse_recursive( $rule_id ) {
6873
$branches = $this->rules[ $rule_id ];
6974

7075
$token_id = $this->position < $this->token_count
71-
? $this->tokens[ $this->position ]->id
76+
? $this->token_ids[ $this->position ]
7277
: null;
7378
$rule_name = $this->rule_names[ $rule_id ];
7479
$branch_candidates = $this->branch_candidates[ $rule_id ];
7580
$branch_indexes = null !== $token_id && isset( $branch_candidates[ $token_id ] )
7681
? $branch_candidates[ $token_id ]
7782
: ( $branch_candidates[ WP_Parser_Grammar::EMPTY_RULE_ID ] ?? array() );
7883

79-
if ( ! count( $branch_indexes ) ) {
84+
$single_branch_index = is_int( $branch_indexes ) ? $branch_indexes : null;
85+
86+
if ( null === $single_branch_index && ! count( $branch_indexes ) ) {
8087
$this->failed_matches[ $starting_position ][ $rule_id ] = true;
8188
return false;
8289
}
8390

8491
$branch_matches = false;
8592
$node = null;
86-
foreach ( $branch_indexes as $branch_index ) {
93+
$branch_index_count = null === $single_branch_index ? count( $branch_indexes ) : 1;
94+
for ( $branch_index_offset = 0; $branch_index_offset < $branch_index_count; $branch_index_offset++ ) {
95+
$branch_index = null === $single_branch_index
96+
? $branch_indexes[ $branch_index_offset ]
97+
: $single_branch_index;
8798
$branch = $branches[ $branch_index ];
8899
$this->position = $starting_position;
89100
$node = new WP_Parser_Node( $rule_id, $rule_name );
@@ -116,11 +127,13 @@ private function parse_recursive( $rule_id ) {
116127
// for right-associative rules, which could solve this.
117128
// See: https://github.com/mysql/mysql-workbench/blob/8.0.38/library/parsers/grammars/MySQLParser.g4#L994
118129
// See: https://github.com/antlr/antlr4/issues/488
119-
$la = $this->tokens[ $this->position ] ?? null;
130+
$lookahead_id = $this->position < $this->token_count
131+
? $this->token_ids[ $this->position ]
132+
: null;
120133
if (
121-
$la
134+
null !== $lookahead_id
122135
&& $rule_id === $this->select_statement_rule_id
123-
&& WP_MySQL_Lexer::INTO_SYMBOL === $la->id
136+
&& WP_MySQL_Lexer::INTO_SYMBOL === $lookahead_id
124137
) {
125138
$branch_matches = false;
126139
}

0 commit comments

Comments
 (0)