Skip to content

Commit 090350f

Browse files
committed
Explore native parser branch dispatch
1 parent 5a9067a commit 090350f

1 file changed

Lines changed: 41 additions & 3 deletions

File tree

  • packages/mysql-on-sqlite/ext/wp-mysql-parser/src

packages/mysql-on-sqlite/ext/wp-mysql-parser/src/lib.rs

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -927,12 +927,27 @@ struct Grammar {
927927
}
928928

929929
struct Rule {
930-
branches: Vec<Vec<i64>>,
930+
branches: Vec<Branch>,
931931
lookahead: Option<Vec<i64>>,
932932
rule_name: String,
933933
is_fragment: bool,
934934
}
935935

936+
struct Branch {
937+
symbols: Vec<i64>,
938+
first_symbol: Option<i64>,
939+
}
940+
941+
impl Branch {
942+
fn new(symbols: Vec<i64>) -> Self {
943+
let first_symbol = symbols.first().copied();
944+
Self {
945+
symbols,
946+
first_symbol,
947+
}
948+
}
949+
}
950+
936951
impl Grammar {
937952
fn rule(&self, rule_id: i64) -> Option<&Rule> {
938953
usize::try_from(rule_id)
@@ -946,6 +961,21 @@ impl Grammar {
946961
.map(|rule| rule.is_fragment)
947962
.unwrap_or(false)
948963
}
964+
965+
fn can_symbol_start(&self, symbol_id: i64, token_id: i64) -> bool {
966+
if symbol_id <= self.highest_terminal_id {
967+
return 0 == symbol_id || symbol_id == token_id;
968+
}
969+
970+
let Some(rule) = self.rule(symbol_id) else {
971+
return false;
972+
};
973+
let Some(lookahead) = rule.lookahead.as_ref() else {
974+
return true;
975+
};
976+
977+
lookahead.binary_search(&token_id).is_ok() || lookahead.binary_search(&0).is_ok()
978+
}
949979
}
950980

951981
static GRAMMAR_CACHE: OnceLock<Mutex<HashMap<u32, Arc<Grammar>>>> = OnceLock::new();
@@ -1106,14 +1136,22 @@ impl WpMySqlNativeParser {
11061136
}
11071137

11081138
let starting_position = self.position;
1139+
let starting_token_id = self.token_ids.get(starting_position).copied().unwrap_or(0);
11091140
let mut matched_node = None;
11101141

11111142
for branch in &rule.branches {
1143+
if branch
1144+
.first_symbol
1145+
.is_some_and(|symbol_id| !grammar.can_symbol_start(symbol_id, starting_token_id))
1146+
{
1147+
continue;
1148+
}
1149+
11121150
self.position = starting_position;
11131151
let mut children = Vec::new();
11141152
let mut branch_matches = true;
11151153

1116-
for &subrule_id in branch {
1154+
for &subrule_id in &branch.symbols {
11171155
match self.parse_recursive_inner(subrule_id)? {
11181156
ParseMatch::No => {
11191157
branch_matches = false;
@@ -1360,7 +1398,7 @@ fn build_rules(
13601398
}
13611399

13621400
dense_rules[index] = Some(Rule {
1363-
branches,
1401+
branches: branches.into_iter().map(Branch::new).collect(),
13641402
lookahead,
13651403
rule_name: rule_names.get(&rule_id).cloned().unwrap_or_default(),
13661404
is_fragment: fragment_ids.contains(&rule_id),

0 commit comments

Comments
 (0)