Skip to content

Commit 5275791

Browse files
authored
Cache native grammar on parser grammar object (#387)
## Summary - add one explicit `WP_Parser_Grammar::$native_grammar` cache slot - store the compiled Rust grammar on the PHP grammar object instead of in a content-hash cache - remove the full exported-grammar hash walk from native parser construction ## Why The previous Rust-only content-key cache preserved a smaller PHP diff, but every parser construction still exported and recursively hashed the entire grammar before it could hit cache. In the SQLite smoke benchmark that dropped the native path back to roughly 2x faster than PHP. This restores the object-attached cache path we had before, but keeps the PHP diff explicit and minimal: one new public cache property on `WP_Parser_Grammar`. ## Measurements Command: ```bash TMP_TEST_NATIVE_QUERY_COUNT=250 ./tmp-test-native/run.sh ``` | Run | PHP parser | Rust parser | Speedup | | ---: | ---: | ---: | ---: | | 1 | 3.088s | 0.389s | 7.94x | | 2 | 3.126s | 0.386s | 8.10x | | 3 | 2.927s | 0.348s | 8.41x | Default 2000-query smoke workload: | Workload | PHP parser | Rust parser | Speedup | | --- | ---: | ---: | ---: | | 2000 generated queries, including 8 x 2000-row inserts | 24.082s | 3.008s | 8.01x | ## Testing - `cargo fmt --check` - `php -l packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php` - `git diff --check` - `TMP_TEST_NATIVE_QUERY_COUNT=250 ./tmp-test-native/run.sh` - `./tmp-test-native/run.sh` ## Notes This assumes `WP_Parser_Grammar` is immutable after construction for native parsing purposes. That matches current use, and the tradeoff is isolated in this PR so it is visible in review.
1 parent 80d8a64 commit 5275791

2 files changed

Lines changed: 36 additions & 138 deletions

File tree

packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class WP_Parser_Grammar {
3232
public $lookahead_is_match_possible = array();
3333
public $lowest_non_terminal_id;
3434
public $highest_terminal_id;
35+
public $native_grammar;
3536

3637
public function __construct( array $rules ) {
3738
$this->inflate( $rules );

packages/php-ext-wp-mysql-parser/src/lib.rs

Lines changed: 35 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::collections::{HashMap, HashSet};
44
use std::os::raw::c_char;
55
use std::ptr;
6-
use std::sync::{Arc, Mutex, OnceLock};
6+
use std::sync::Arc;
77

88
use ext_php_rs::convert::{FromZval, IntoZval, IntoZvalDyn};
99
use ext_php_rs::exception::{PhpException, PhpResult};
@@ -915,12 +915,6 @@ struct Rule {
915915
is_fragment: bool,
916916
}
917917

918-
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
919-
struct GrammarCacheKey {
920-
low: u64,
921-
high: u64,
922-
}
923-
924918
impl Grammar {
925919
fn rule(&self, rule_id: i64) -> Option<&Rule> {
926920
usize::try_from(rule_id)
@@ -930,10 +924,11 @@ impl Grammar {
930924
}
931925
}
932926

933-
// Cache only Rust-owned grammar data, keyed by exported grammar content. Zend
934-
// object handles are request-local and can be reused, so they must not identify
935-
// cached entries.
936-
static GRAMMAR_CACHE: OnceLock<Mutex<HashMap<GrammarCacheKey, Arc<Grammar>>>> = OnceLock::new();
927+
#[php_class]
928+
#[php(name = "WP_MySQL_Native_Grammar")]
929+
pub struct WpMySqlNativeGrammar {
930+
grammar: Arc<Grammar>,
931+
}
937932

938933
enum ParserTokenSource {
939934
Php(Vec<Zval>),
@@ -1685,23 +1680,17 @@ impl WpMySqlNativeParser {
16851680
}
16861681

16871682
fn export_grammar(grammar_zval: &mut Zval) -> PhpResult<Arc<Grammar>> {
1683+
if let Some(cached) = cached_native_grammar(grammar_zval)? {
1684+
return Ok(cached);
1685+
}
1686+
16881687
let exported = php_function("wp_sqlite_mysql_native_export_grammar")?
16891688
.try_call(vec![&*grammar_zval as &dyn IntoZvalDyn])
16901689
.map_err(php_error)?;
16911690
let array = exported
16921691
.array()
16931692
.ok_or_else(|| php_error("Exported grammar must be an array"))?;
16941693

1695-
let cache_key = grammar_cache_key(array)?;
1696-
if let Some(cached) = GRAMMAR_CACHE
1697-
.get_or_init(|| Mutex::new(HashMap::new()))
1698-
.lock()
1699-
.map_err(|_| php_error("Grammar cache lock poisoned"))?
1700-
.get(&cache_key)
1701-
{
1702-
return Ok(Arc::clone(cached));
1703-
}
1704-
17051694
let highest_terminal_id = array
17061695
.get("highest_terminal_id")
17071696
.and_then(Zval::long)
@@ -1751,130 +1740,37 @@ fn export_grammar(grammar_zval: &mut Zval) -> PhpResult<Arc<Grammar>> {
17511740
select_statement_rule_id,
17521741
});
17531742

1754-
GRAMMAR_CACHE
1755-
.get_or_init(|| Mutex::new(HashMap::new()))
1756-
.lock()
1757-
.map_err(|_| php_error("Grammar cache lock poisoned"))?
1758-
.insert(cache_key, Arc::clone(&grammar));
1743+
cache_native_grammar(grammar_zval, Arc::clone(&grammar))?;
17591744

17601745
Ok(grammar)
17611746
}
17621747

1763-
fn grammar_cache_key(array: &ZendHashTable) -> PhpResult<GrammarCacheKey> {
1764-
let mut hasher = GrammarCacheHasher::new();
1765-
hash_grammar_array(&mut hasher, array)?;
1766-
Ok(hasher.finish())
1767-
}
1768-
1769-
struct GrammarCacheHasher {
1770-
low: u64,
1771-
high: u64,
1772-
}
1773-
1774-
impl GrammarCacheHasher {
1775-
fn new() -> Self {
1776-
Self {
1777-
low: 0xcbf29ce484222325,
1778-
high: 0x6c62272e07bb0142,
1779-
}
1780-
}
1781-
1782-
fn write_byte(&mut self, byte: u8) {
1783-
self.low ^= u64::from(byte);
1784-
self.low = self.low.wrapping_mul(0x100000001b3);
1785-
self.high ^= u64::from(byte).rotate_left(5);
1786-
self.high = self.high.wrapping_mul(0x100000001b3 ^ 0x9e3779b97f4a7c15);
1787-
}
1788-
1789-
fn write_bytes(&mut self, bytes: &[u8]) {
1790-
self.write_usize(bytes.len());
1791-
for byte in bytes {
1792-
self.write_byte(*byte);
1793-
}
1794-
}
1795-
1796-
fn write_i64(&mut self, value: i64) {
1797-
for byte in value.to_le_bytes() {
1798-
self.write_byte(byte);
1799-
}
1800-
}
1801-
1802-
fn write_u64(&mut self, value: u64) {
1803-
for byte in value.to_le_bytes() {
1804-
self.write_byte(byte);
1805-
}
1806-
}
1807-
1808-
fn write_usize(&mut self, value: usize) {
1809-
self.write_u64(value as u64);
1810-
}
1811-
1812-
fn finish(self) -> GrammarCacheKey {
1813-
GrammarCacheKey {
1814-
low: self.low,
1815-
high: self.high,
1816-
}
1817-
}
1818-
}
1819-
1820-
fn hash_grammar_array(hasher: &mut GrammarCacheHasher, array: &ZendHashTable) -> PhpResult<()> {
1821-
hasher.write_usize(array.len());
1822-
for (key, value) in array {
1823-
hash_grammar_array_key(hasher, key);
1824-
hash_grammar_zval(hasher, value)?;
1825-
}
1826-
Ok(())
1827-
}
1828-
1829-
fn hash_grammar_zval(hasher: &mut GrammarCacheHasher, zval: &Zval) -> PhpResult<()> {
1830-
let zval = zval.dereference();
1831-
match zval.get_type() {
1832-
DataType::Null => hasher.write_byte(0),
1833-
DataType::False => hasher.write_byte(1),
1834-
DataType::True => hasher.write_byte(2),
1835-
DataType::Long => {
1836-
hasher.write_byte(3);
1837-
hasher.write_i64(
1838-
zval.long()
1839-
.ok_or_else(|| php_error("Grammar integer value is invalid"))?,
1840-
);
1841-
}
1842-
DataType::String => {
1843-
hasher.write_byte(4);
1844-
hasher.write_bytes(
1845-
zval.str()
1846-
.ok_or_else(|| php_error("Grammar string value is invalid"))?
1847-
.as_bytes(),
1848-
);
1849-
}
1850-
DataType::Array => {
1851-
hasher.write_byte(5);
1852-
let array = zval
1853-
.array()
1854-
.ok_or_else(|| php_error("Grammar array value is invalid"))?;
1855-
hash_grammar_array(hasher, array)?;
1856-
}
1857-
_ => return Err(php_error("Unsupported grammar cache value")),
1858-
}
1748+
fn cached_native_grammar(grammar: &Zval) -> PhpResult<Option<Arc<Grammar>>> {
1749+
let object = grammar
1750+
.object()
1751+
.ok_or_else(|| php_error("Parser grammar must be an object"))?;
1752+
let properties = object.get_properties().map_err(php_error)?;
1753+
let Some(native_grammar) = properties.get("native_grammar") else {
1754+
return Ok(None);
1755+
};
1756+
let Some(native_grammar) = <&WpMySqlNativeGrammar as FromZval>::from_zval(native_grammar)
1757+
else {
1758+
return Ok(None);
1759+
};
18591760

1860-
Ok(())
1761+
Ok(Some(Arc::clone(&native_grammar.grammar)))
18611762
}
18621763

1863-
fn hash_grammar_array_key(hasher: &mut GrammarCacheHasher, key: ArrayKey<'_>) {
1864-
match key {
1865-
ArrayKey::Long(value) => {
1866-
hasher.write_byte(0);
1867-
hasher.write_i64(value);
1868-
}
1869-
ArrayKey::String(value) => {
1870-
hasher.write_byte(1);
1871-
hasher.write_bytes(value.as_bytes());
1872-
}
1873-
ArrayKey::Str(value) => {
1874-
hasher.write_byte(1);
1875-
hasher.write_bytes(value.as_bytes());
1876-
}
1877-
}
1764+
fn cache_native_grammar(grammar_zval: &mut Zval, grammar: Arc<Grammar>) -> PhpResult<()> {
1765+
let object = grammar_zval
1766+
.object_mut()
1767+
.ok_or_else(|| php_error("Parser grammar must be an object"))?;
1768+
let native_grammar = WpMySqlNativeGrammar { grammar }
1769+
.into_zval(false)
1770+
.map_err(php_error)?;
1771+
object
1772+
.set_property("native_grammar", native_grammar)
1773+
.map_err(php_error)
18781774
}
18791775

18801776
fn export_tokens(tokens: &mut Zval) -> PhpResult<(ParserTokenSource, Vec<i64>)> {
@@ -2028,6 +1924,7 @@ extern "C" fn php_module_info(_module: *mut ModuleEntry) {
20281924
#[php_module]
20291925
pub fn get_module(module: ModuleBuilder) -> ModuleBuilder {
20301926
module
1927+
.class::<WpMySqlNativeGrammar>()
20311928
.class::<WpMySqlNativeAst>()
20321929
.class::<WpMySqlNativeTokenStream>()
20331930
.class::<WpMySqlNativeLexer>()

0 commit comments

Comments
 (0)