Skip to content

Commit 2baaa66

Browse files
committed
Speed up native parser token streaming
1 parent 91d2c3d commit 2baaa66

3 files changed

Lines changed: 145 additions & 54 deletions

File tree

packages/mysql-on-sqlite/ext/wp-mysql-parser/src/lib.rs

Lines changed: 135 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::os::raw::c_char;
55
use std::ptr;
66
use std::sync::{Arc, Mutex, OnceLock};
77

8-
use ext_php_rs::convert::{IntoZval, IntoZvalDyn};
8+
use ext_php_rs::convert::{FromZval, IntoZval, IntoZvalDyn};
99
use ext_php_rs::exception::{PhpException, PhpResult};
1010
use ext_php_rs::ffi::{zend_class_entry, zend_object, zval};
1111
use ext_php_rs::flags::DataType;
@@ -110,6 +110,32 @@ fn update_object_property(
110110
Ok(())
111111
}
112112

113+
fn create_mysql_token(sql_zval: &Zval, token: TokenInfo, no_backslash: bool) -> PhpResult<Zval> {
114+
let id = token.id;
115+
let start = i64::try_from(token.start).map_err(php_error)?;
116+
let length = i64::try_from(token.end.saturating_sub(token.start)).map_err(php_error)?;
117+
let classes = php_classes()?;
118+
let mut object = classes.mysql_token.new();
119+
120+
update_object_property(&mut object, classes.parser_token, "id", id)?;
121+
update_object_property(&mut object, classes.parser_token, "start", start)?;
122+
update_object_property(&mut object, classes.parser_token, "length", length)?;
123+
update_object_property(
124+
&mut object,
125+
classes.parser_token,
126+
"input",
127+
sql_zval.shallow_clone(),
128+
)?;
129+
update_object_property(
130+
&mut object,
131+
classes.mysql_token,
132+
"sql_mode_no_backslash_escapes_enabled",
133+
no_backslash,
134+
)?;
135+
136+
object.into_zval(false).map_err(php_error)
137+
}
138+
113139
fn sql_modes_mask(sql_modes: &[String]) -> i64 {
114140
let mut mask = 0;
115141
for sql_mode in sql_modes {
@@ -182,6 +208,22 @@ struct TokenInfo {
182208
end: usize,
183209
}
184210

211+
#[php_class]
212+
#[php(name = "WP_MySQL_Native_Token_Stream")]
213+
pub struct WpMySqlNativeTokenStream {
214+
sql_zval: Zval,
215+
tokens: Vec<TokenInfo>,
216+
no_backslash: bool,
217+
}
218+
219+
#[php_impl]
220+
#[php(change_method_case = "snake_case")]
221+
impl WpMySqlNativeTokenStream {
222+
pub fn count(&self) -> usize {
223+
self.tokens.len()
224+
}
225+
}
226+
185227
#[php_class]
186228
#[php(name = "WP_MySQL_Native_Lexer", modifier = "register_lexer_constants")]
187229
pub struct WpMySqlNativeLexer {
@@ -266,6 +308,21 @@ impl WpMySqlNativeLexer {
266308
Ok(tokens)
267309
}
268310

311+
pub fn native_token_stream(&mut self) -> WpMySqlNativeTokenStream {
312+
let mut tokens = Vec::new();
313+
while self.next_token() {
314+
if let Some(token) = self.current_token_info() {
315+
tokens.push(token);
316+
}
317+
}
318+
319+
WpMySqlNativeTokenStream {
320+
sql_zval: self.sql_zval.shallow_clone(),
321+
tokens,
322+
no_backslash: self.is_sql_mode_active(SQL_MODE_NO_BACKSLASH_ESCAPES),
323+
}
324+
}
325+
269326
pub fn get_mysql_version(&self) -> i64 {
270327
self.mysql_version
271328
}
@@ -293,30 +350,11 @@ impl WpMySqlNativeLexer {
293350
}
294351

295352
fn create_token(&self, token: TokenInfo) -> PhpResult<Zval> {
296-
let id = token.id;
297-
let start = i64::try_from(token.start).map_err(php_error)?;
298-
let length = i64::try_from(token.end.saturating_sub(token.start)).map_err(php_error)?;
299-
let no_backslash = self.is_sql_mode_active(SQL_MODE_NO_BACKSLASH_ESCAPES);
300-
let classes = php_classes()?;
301-
let mut object = classes.mysql_token.new();
302-
303-
update_object_property(&mut object, classes.parser_token, "id", id)?;
304-
update_object_property(&mut object, classes.parser_token, "start", start)?;
305-
update_object_property(&mut object, classes.parser_token, "length", length)?;
306-
update_object_property(
307-
&mut object,
308-
classes.parser_token,
309-
"input",
310-
self.sql_zval.shallow_clone(),
311-
)?;
312-
update_object_property(
313-
&mut object,
314-
classes.mysql_token,
315-
"sql_mode_no_backslash_escapes_enabled",
316-
no_backslash,
317-
)?;
318-
319-
object.into_zval(false).map_err(php_error)
353+
create_mysql_token(
354+
&self.sql_zval,
355+
token,
356+
self.is_sql_mode_active(SQL_MODE_NO_BACKSLASH_ESCAPES),
357+
)
320358
}
321359

322360
fn read_next_token(&mut self) -> Option<i64> {
@@ -929,11 +967,42 @@ enum ParseMatch {
929967
Token(usize),
930968
}
931969

970+
enum ParserTokenSource {
971+
Php(Vec<Zval>),
972+
Native {
973+
sql_zval: Zval,
974+
tokens: Vec<TokenInfo>,
975+
no_backslash: bool,
976+
},
977+
}
978+
979+
impl ParserTokenSource {
980+
fn create_php_token(&self, index: usize) -> PhpResult<Zval> {
981+
match self {
982+
Self::Php(tokens) => tokens
983+
.get(index)
984+
.map(Zval::shallow_clone)
985+
.ok_or_else(|| php_error("Parser token index is out of range")),
986+
Self::Native {
987+
sql_zval,
988+
tokens,
989+
no_backslash,
990+
} => {
991+
let token = tokens
992+
.get(index)
993+
.copied()
994+
.ok_or_else(|| php_error("Parser token index is out of range"))?;
995+
create_mysql_token(sql_zval, token, *no_backslash)
996+
}
997+
}
998+
}
999+
}
1000+
9321001
#[php_class]
9331002
#[php(name = "WP_MySQL_Native_Parser")]
9341003
pub struct WpMySqlNativeParser {
9351004
grammar: Arc<Grammar>,
936-
tokens: Vec<Zval>,
1005+
token_source: ParserTokenSource,
9371006
token_ids: Vec<i64>,
9381007
position: usize,
9391008
current_ast: Option<ParseMatch>,
@@ -945,11 +1014,11 @@ pub struct WpMySqlNativeParser {
9451014
impl WpMySqlNativeParser {
9461015
pub fn __construct(grammar: &mut Zval, tokens: &mut Zval) -> PhpResult<Self> {
9471016
let grammar = export_grammar(grammar)?;
948-
let (tokens, token_ids) = export_tokens(tokens)?;
1017+
let (token_source, token_ids) = export_tokens(tokens)?;
9491018

9501019
Ok(Self {
9511020
grammar,
952-
tokens,
1021+
token_source,
9531022
token_ids,
9541023
position: 0,
9551024
current_ast: None,
@@ -959,21 +1028,13 @@ impl WpMySqlNativeParser {
9591028

9601029
pub fn parse(&mut self) -> PhpResult<Zval> {
9611030
stacker::maybe_grow(STACK_RED_ZONE, STACK_GROW_SIZE, || {
962-
let ast = self.parse_recursive(self.grammar.query_rule_id)?;
1031+
let ast = self.parse_recursive_inner(self.grammar.query_rule_id)?;
9631032
self.create_php_ast(&ast)
9641033
})
9651034
}
9661035

9671036
pub fn next_query(&mut self) -> PhpResult<bool> {
968-
if self.position >= self.tokens.len() {
969-
self.current_ast = None;
970-
self.current_php_ast = None;
971-
return Ok(false);
972-
}
973-
974-
self.current_ast = Some(self.parse_recursive(self.grammar.query_rule_id)?);
975-
self.current_php_ast = None;
976-
Ok(true)
1037+
stacker::maybe_grow(STACK_RED_ZONE, STACK_GROW_SIZE, || self.next_query_inner())
9771038
}
9781039

9791040
pub fn get_query_ast(&mut self) -> PhpResult<Zval> {
@@ -995,15 +1056,21 @@ impl WpMySqlNativeParser {
9951056
}
9961057

9971058
impl WpMySqlNativeParser {
998-
fn parse_recursive(&mut self, rule_id: i64) -> PhpResult<ParseMatch> {
999-
stacker::maybe_grow(STACK_RED_ZONE, STACK_GROW_SIZE, || {
1000-
self.parse_recursive_inner(rule_id)
1001-
})
1059+
fn next_query_inner(&mut self) -> PhpResult<bool> {
1060+
if self.position >= self.token_ids.len() {
1061+
self.current_ast = None;
1062+
self.current_php_ast = None;
1063+
return Ok(false);
1064+
}
1065+
1066+
self.current_ast = Some(self.parse_recursive_inner(self.grammar.query_rule_id)?);
1067+
self.current_php_ast = None;
1068+
Ok(true)
10021069
}
10031070

10041071
fn parse_recursive_inner(&mut self, rule_id: i64) -> PhpResult<ParseMatch> {
10051072
if rule_id <= self.grammar.highest_terminal_id {
1006-
if self.position >= self.tokens.len() {
1073+
if self.position >= self.token_ids.len() {
10071074
return Ok(ParseMatch::No);
10081075
}
10091076
if rule_id == 0 {
@@ -1047,7 +1114,7 @@ impl WpMySqlNativeParser {
10471114
let mut branch_matches = true;
10481115

10491116
for &subrule_id in branch {
1050-
match self.parse_recursive(subrule_id)? {
1117+
match self.parse_recursive_inner(subrule_id)? {
10511118
ParseMatch::No => {
10521119
branch_matches = false;
10531120
break;
@@ -1095,6 +1162,12 @@ impl WpMySqlNativeParser {
10951162
}
10961163

10971164
fn create_php_ast(&self, ast: &ParseMatch) -> PhpResult<Zval> {
1165+
stacker::maybe_grow(STACK_RED_ZONE, STACK_GROW_SIZE, || {
1166+
self.create_php_ast_inner(ast)
1167+
})
1168+
}
1169+
1170+
fn create_php_ast_inner(&self, ast: &ParseMatch) -> PhpResult<Zval> {
10981171
match ast {
10991172
ParseMatch::No => Ok(Zval::null()),
11001173
ParseMatch::Empty => {
@@ -1103,14 +1176,12 @@ impl WpMySqlNativeParser {
11031176
Ok(zval)
11041177
}
11051178
ParseMatch::Node(node) => self.create_php_node(node),
1106-
ParseMatch::Token(index) => Ok(self.tokens[*index].shallow_clone()),
1179+
ParseMatch::Token(index) => self.token_source.create_php_token(*index),
11071180
}
11081181
}
11091182

11101183
fn create_php_node(&self, ast_node: &AstNode) -> PhpResult<Zval> {
1111-
stacker::maybe_grow(STACK_RED_ZONE, STACK_GROW_SIZE, || {
1112-
self.create_php_node_inner(ast_node)
1113-
})
1184+
self.create_php_node_inner(ast_node)
11141185
}
11151186

11161187
fn create_php_node_inner(&self, ast_node: &AstNode) -> PhpResult<Zval> {
@@ -1126,7 +1197,7 @@ impl WpMySqlNativeParser {
11261197
for child in &ast_node.children {
11271198
let child_zval = match child {
11281199
AstChild::Node(child_node) => self.create_php_node(child_node)?,
1129-
AstChild::Token(index) => self.tokens[*index].shallow_clone(),
1200+
AstChild::Token(index) => self.token_source.create_php_token(*index)?,
11301201
};
11311202
children.push(child_zval);
11321203
}
@@ -1229,7 +1300,19 @@ fn export_grammar(grammar: &mut Zval) -> PhpResult<Arc<Grammar>> {
12291300
Ok(grammar)
12301301
}
12311302

1232-
fn export_tokens(tokens: &mut Zval) -> PhpResult<(Vec<Zval>, Vec<i64>)> {
1303+
fn export_tokens(tokens: &mut Zval) -> PhpResult<(ParserTokenSource, Vec<i64>)> {
1304+
if let Some(stream) = <&WpMySqlNativeTokenStream as FromZval>::from_zval(tokens) {
1305+
let token_ids = stream.tokens.iter().map(|token| token.id).collect();
1306+
return Ok((
1307+
ParserTokenSource::Native {
1308+
sql_zval: stream.sql_zval.shallow_clone(),
1309+
tokens: stream.tokens.clone(),
1310+
no_backslash: stream.no_backslash,
1311+
},
1312+
token_ids,
1313+
));
1314+
}
1315+
12331316
let array = tokens
12341317
.array()
12351318
.ok_or_else(|| php_error("Parser tokens must be an array"))?;
@@ -1245,7 +1328,7 @@ fn export_tokens(tokens: &mut Zval) -> PhpResult<(Vec<Zval>, Vec<i64>)> {
12451328
token_ids.push(id);
12461329
}
12471330

1248-
Ok((token_objects, token_ids))
1331+
Ok((ParserTokenSource::Php(token_objects), token_ids))
12491332
}
12501333

12511334
fn build_rules(
@@ -1368,6 +1451,7 @@ extern "C" fn php_module_info(_module: *mut ModuleEntry) {
13681451
#[php_module]
13691452
pub fn get_module(module: ModuleBuilder) -> ModuleBuilder {
13701453
module
1454+
.class::<WpMySqlNativeTokenStream>()
13711455
.class::<WpMySqlNativeLexer>()
13721456
.class::<WpMySqlNativeParser>()
13731457
.info_function(php_module_info)

packages/mysql-on-sqlite/src/sqlite/class-wp-pdo-mysql-on-sqlite.php

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1153,11 +1153,16 @@ public function get_insert_id() {
11531153
* @return WP_MySQL_Parser A parser initialized for the MySQL query.
11541154
*/
11551155
public function create_parser( string $query ): WP_MySQL_Parser {
1156-
$lexer = new WP_MySQL_Lexer(
1156+
$lexer = new WP_MySQL_Lexer(
11571157
$query,
11581158
80038,
11591159
$this->active_sql_modes
11601160
);
1161+
if ( method_exists( $lexer, 'native_token_stream' ) ) {
1162+
$tokens = $lexer->native_token_stream();
1163+
return new WP_MySQL_Parser( self::$mysql_grammar, $tokens );
1164+
}
1165+
11611166
$tokens = $lexer->remaining_tokens();
11621167
return new WP_MySQL_Parser( self::$mysql_grammar, $tokens );
11631168
}

packages/mysql-on-sqlite/tests/tools/run-parser-benchmark.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,10 @@ function get_stats( $total, $failures, $exceptions ) {
5656

5757
try {
5858
$lexer = new WP_MySQL_Lexer( $query );
59-
$tokens = $lexer->remaining_tokens();
60-
if ( count( $tokens ) === 0 ) {
59+
$tokens = method_exists( $lexer, 'native_token_stream' )
60+
? $lexer->native_token_stream()
61+
: $lexer->remaining_tokens();
62+
if ( ( is_array( $tokens ) ? count( $tokens ) : $tokens->count() ) === 0 ) {
6163
throw new Exception( 'Failed to tokenize query: ' . $query );
6264
}
6365

0 commit comments

Comments
 (0)