Skip to content

Commit 9c2bd24

Browse files
committed
Speed up MySQL lexer and parser
1 parent de8f9b0 commit 9c2bd24

5 files changed

Lines changed: 264 additions & 105 deletions

File tree

packages/mysql-on-sqlite/src/mysql/class-wp-mysql-lexer.php

Lines changed: 80 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2111,6 +2111,13 @@ class WP_MySQL_Lexer {
21112111
*/
21122112
private $sql;
21132113

2114+
/**
2115+
* Byte length of the SQL payload.
2116+
*
2117+
* @var int
2118+
*/
2119+
private $sql_length;
2120+
21142121
/**
21152122
* The version of the MySQL server that the SQL payload is intended for.
21162123
*
@@ -2189,6 +2196,7 @@ public function __construct(
21892196
array $sql_modes = array()
21902197
) {
21912198
$this->sql = $sql;
2199+
$this->sql_length = strlen( $sql );
21922200
$this->mysql_version = $mysql_version;
21932201

21942202
foreach ( $sql_modes as $sql_mode ) {
@@ -2284,10 +2292,46 @@ public function get_token(): ?WP_MySQL_Token {
22842292
* @return WP_MySQL_Token[] An array of token objects representing the remaining tokens.
22852293
*/
22862294
public function remaining_tokens(): array {
2287-
$tokens = array();
2288-
while ( true === $this->next_token() ) {
2289-
$token = $this->get_token();
2290-
$tokens[] = $token;
2295+
$tokens = array();
2296+
$no_backslash_escapes_sql_mode_set = $this->is_sql_mode_active(
2297+
self::SQL_MODE_NO_BACKSLASH_ESCAPES
2298+
);
2299+
2300+
while ( true ) {
2301+
if (
2302+
self::EOF === $this->token_type
2303+
|| ( null === $this->token_type && $this->bytes_already_read > 0 )
2304+
) {
2305+
$this->token_type = null;
2306+
break;
2307+
}
2308+
2309+
do {
2310+
$this->token_starts_at = $this->bytes_already_read;
2311+
$this->token_type = $this->read_next_token();
2312+
} while (
2313+
self::WHITESPACE === $this->token_type
2314+
|| self::COMMENT === $this->token_type
2315+
|| self::MYSQL_COMMENT_START === $this->token_type
2316+
|| self::MYSQL_COMMENT_END === $this->token_type
2317+
);
2318+
2319+
if ( null === $this->token_type ) {
2320+
break;
2321+
}
2322+
2323+
$tokens[] = new WP_MySQL_Token(
2324+
$this->token_type,
2325+
$this->token_starts_at,
2326+
$this->bytes_already_read - $this->token_starts_at,
2327+
$this->sql,
2328+
$no_backslash_escapes_sql_mode_set
2329+
);
2330+
2331+
if ( self::EOF === $this->token_type ) {
2332+
$this->token_type = null;
2333+
break;
2334+
}
22912335
}
22922336
return $tokens;
22932337
}
@@ -2356,10 +2400,10 @@ private function read_next_token(): ?int {
23562400

23572401
if ( "'" === $byte || '"' === $byte || '`' === $byte ) {
23582402
$type = $this->read_quoted_text();
2359-
} elseif ( null !== $byte && strspn( $byte, self::DIGIT_MASK ) > 0 ) {
2403+
} elseif ( null !== $byte && $byte >= '0' && $byte <= '9' ) {
23602404
$type = $this->read_number();
23612405
} elseif ( '.' === $byte ) {
2362-
if ( null !== $next_byte && strspn( $next_byte, self::DIGIT_MASK ) > 0 ) {
2406+
if ( null !== $next_byte && $next_byte >= '0' && $next_byte <= '9' ) {
23632407
$type = $this->read_number();
23642408
} else {
23652409
$this->bytes_already_read += 1;
@@ -2420,8 +2464,8 @@ private function read_next_token(): ?int {
24202464
} elseif ( '-' === $byte ) {
24212465
if (
24222466
'-' === $next_byte
2423-
&& $this->bytes_already_read + 2 < strlen( $this->sql )
2424-
&& strspn( $this->sql[ $this->bytes_already_read + 2 ], self::WHITESPACE_MASK ) > 0
2467+
&& $this->bytes_already_read + 2 < $this->sql_length
2468+
&& false !== strpos( self::WHITESPACE_MASK, $this->sql[ $this->bytes_already_read + 2 ] )
24252469
) {
24262470
$type = $this->read_line_comment();
24272471
} elseif ( '>' === $next_byte ) {
@@ -2547,7 +2591,13 @@ private function read_next_token(): ?int {
25472591
}
25482592
} elseif ( '#' === $byte ) {
25492593
$type = $this->read_line_comment();
2550-
} elseif ( null !== $byte && strspn( $byte, self::WHITESPACE_MASK ) > 0 ) {
2594+
} elseif (
2595+
' ' === $byte
2596+
|| "\t" === $byte
2597+
|| "\n" === $byte
2598+
|| "\r" === $byte
2599+
|| "\f" === $byte
2600+
) {
25512601
$this->bytes_already_read += strspn( $this->sql, self::WHITESPACE_MASK, $this->bytes_already_read );
25522602
$type = self::WHITESPACE;
25532603
} elseif ( ( 'x' === $byte || 'X' === $byte || 'b' === $byte || 'B' === $byte ) && "'" === $next_byte ) {
@@ -2675,7 +2725,7 @@ private function read_number(): ?int {
26752725
'0' === $byte
26762726
&& 'x' === $next_byte
26772727
&& null !== $third_byte
2678-
&& strspn( $third_byte, self::HEX_DIGIT_MASK ) > 0
2728+
&& false !== strpos( self::HEX_DIGIT_MASK, $third_byte )
26792729
)
26802730
// HEX number in the form of x'N' or X'N'.
26812731
|| ( ( 'x' === $byte || 'X' === $byte ) && "'" === $next_byte )
@@ -2685,7 +2735,7 @@ private function read_number(): ?int {
26852735
$this->bytes_already_read += strspn( $this->sql, self::HEX_DIGIT_MASK, $this->bytes_already_read );
26862736
if ( $is_quoted ) {
26872737
if (
2688-
$this->bytes_already_read >= strlen( $this->sql )
2738+
$this->bytes_already_read >= $this->sql_length
26892739
|| "'" !== $this->sql[ $this->bytes_already_read ]
26902740
) {
26912741
return null; // Invalid input.
@@ -2708,7 +2758,7 @@ private function read_number(): ?int {
27082758
$this->bytes_already_read += strspn( $this->sql, '01', $this->bytes_already_read );
27092759
if ( $is_quoted ) {
27102760
if (
2711-
$this->bytes_already_read >= strlen( $this->sql )
2761+
$this->bytes_already_read >= $this->sql_length
27122762
|| "'" !== $this->sql[ $this->bytes_already_read ]
27132763
) {
27142764
return null; // Invalid input.
@@ -2737,11 +2787,12 @@ private function read_number(): ?int {
27372787
( 'e' === $byte || 'E' === $byte )
27382788
&& null !== $next_byte
27392789
&& (
2740-
strspn( $next_byte, self::DIGIT_MASK ) > 0
2790+
( $next_byte >= '0' && $next_byte <= '9' )
27412791
|| (
27422792
( '+' === $next_byte || '-' === $next_byte )
2743-
&& $this->bytes_already_read + 2 < strlen( $this->sql )
2744-
&& strspn( $this->sql[ $this->bytes_already_read + 2 ], self::DIGIT_MASK ) > 0
2793+
&& $this->bytes_already_read + 2 < $this->sql_length
2794+
&& $this->sql[ $this->bytes_already_read + 2 ] >= '0'
2795+
&& $this->sql[ $this->bytes_already_read + 2 ] <= '9'
27452796
)
27462797
);
27472798
if ( $has_exponent ) {
@@ -2840,7 +2891,11 @@ private function read_quoted_text(): ?int {
28402891
// in which case the escape sequence is consumed and the loop continues.
28412892
$at = $this->bytes_already_read;
28422893
while ( true ) {
2843-
$at += strcspn( $this->sql, $quote, $at );
2894+
$quote_at = strpos( $this->sql, $quote, $at );
2895+
if ( false === $quote_at ) {
2896+
return null; // Invalid input.
2897+
}
2898+
$at = $quote_at;
28442899

28452900
/*
28462901
* By default, quotes can be escaped with a "\".
@@ -2852,18 +2907,16 @@ private function read_quoted_text(): ?int {
28522907
* "\\\" is an escaped backslash and an escape sequence, and so on.
28532908
*/
28542909
if ( ! $no_backslash_escapes ) {
2855-
for ($i = 0; '\\' === $this->sql[ $at - $i - 1 ]; $i += 1);
2910+
$i = 0;
2911+
while ( '\\' === ( $this->sql[ $at - $i - 1 ] ?? null ) ) {
2912+
$i += 1;
2913+
}
28562914
if ( 1 === $i % 2 ) {
28572915
$at += 1;
28582916
continue;
28592917
}
28602918
}
28612919

2862-
// Unclosed string - unexpected EOF.
2863-
if ( ( $this->sql[ $at ] ?? null ) !== $quote ) {
2864-
return null; // Invalid input.
2865-
}
2866-
28672920
// Check if the quote is doubled.
28682921
if ( ( $this->sql[ $at + 1 ] ?? null ) === $quote ) {
28692922
$at += 2;
@@ -2922,17 +2975,11 @@ private function read_mysql_comment(): int {
29222975
}
29232976

29242977
private function read_comment_content(): void {
2925-
while ( true ) {
2926-
$this->bytes_already_read += strcspn( $this->sql, '*', $this->bytes_already_read );
2927-
$this->bytes_already_read += 1; // Consume the '*'.
2928-
$byte = $this->sql[ $this->bytes_already_read ] ?? null;
2929-
if ( null === $byte ) {
2930-
break;
2931-
}
2932-
if ( '/' === $byte ) {
2933-
$this->bytes_already_read += 1; // Consume the '/'.
2934-
break;
2935-
}
2978+
$comment_end = strpos( $this->sql, '*/', $this->bytes_already_read );
2979+
if ( false === $comment_end ) {
2980+
$this->bytes_already_read = $this->sql_length;
2981+
} else {
2982+
$this->bytes_already_read = $comment_end + 2;
29362983
}
29372984
}
29382985

packages/mysql-on-sqlite/src/mysql/class-wp-mysql-token.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ public function __construct(
3030
string $input,
3131
bool $sql_mode_no_backslash_escapes_enabled
3232
) {
33-
parent::__construct( $id, $start, $length, $input );
33+
$this->id = $id;
34+
$this->start = $start;
35+
$this->length = $length;
36+
$this->input = $input;
3437
$this->sql_mode_no_backslash_escapes_enabled = $sql_mode_no_backslash_escapes_enabled;
3538
}
3639

0 commit comments

Comments
 (0)