Skip to content

Commit 2476729

Browse files
committed
Limit native parser PHP changes
1 parent 5c0d0e7 commit 2476729

4 files changed

Lines changed: 159 additions & 132 deletions

File tree

packages/mysql-on-sqlite/ext/wp-mysql-parser/src/lib.rs

Lines changed: 138 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::collections::{HashMap, HashSet};
44
use std::os::raw::c_char;
55
use std::ptr;
6-
use std::sync::Arc;
6+
use std::sync::{Arc, Mutex, OnceLock};
77

88
use ext_php_rs::convert::{FromZval, IntoZval, IntoZvalDyn};
99
use ext_php_rs::exception::{PhpException, PhpResult};
@@ -915,6 +915,12 @@ struct Rule {
915915
is_fragment: bool,
916916
}
917917

918+
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
919+
struct GrammarCacheKey {
920+
low: u64,
921+
high: u64,
922+
}
923+
918924
impl Grammar {
919925
fn rule(&self, rule_id: i64) -> Option<&Rule> {
920926
usize::try_from(rule_id)
@@ -924,11 +930,10 @@ impl Grammar {
924930
}
925931
}
926932

927-
#[php_class]
928-
#[php(name = "WP_MySQL_Native_Grammar")]
929-
pub struct WpMySqlNativeGrammar {
930-
grammar: Arc<Grammar>,
931-
}
933+
// Cache only Rust-owned grammar data, keyed by exported grammar content. Zend
934+
// object handles are request-local and can be reused, so they must not identify
935+
// cached entries.
936+
static GRAMMAR_CACHE: OnceLock<Mutex<HashMap<GrammarCacheKey, Arc<Grammar>>>> = OnceLock::new();
932937

933938
enum ParserTokenSource {
934939
Php(Vec<Zval>),
@@ -1670,17 +1675,23 @@ impl WpMySqlNativeParser {
16701675
}
16711676

16721677
fn export_grammar(grammar_zval: &mut Zval) -> PhpResult<Arc<Grammar>> {
1673-
if let Some(cached) = cached_native_grammar(grammar_zval)? {
1674-
return Ok(cached);
1675-
}
1676-
16771678
let exported = php_function("wp_sqlite_mysql_native_export_grammar")?
16781679
.try_call(vec![&*grammar_zval as &dyn IntoZvalDyn])
16791680
.map_err(php_error)?;
16801681
let array = exported
16811682
.array()
16821683
.ok_or_else(|| php_error("Exported grammar must be an array"))?;
16831684

1685+
let cache_key = grammar_cache_key(array)?;
1686+
if let Some(cached) = GRAMMAR_CACHE
1687+
.get_or_init(|| Mutex::new(HashMap::new()))
1688+
.lock()
1689+
.map_err(|_| php_error("Grammar cache lock poisoned"))?
1690+
.get(&cache_key)
1691+
{
1692+
return Ok(Arc::clone(cached));
1693+
}
1694+
16841695
let highest_terminal_id = array
16851696
.get("highest_terminal_id")
16861697
.and_then(Zval::long)
@@ -1730,37 +1741,130 @@ fn export_grammar(grammar_zval: &mut Zval) -> PhpResult<Arc<Grammar>> {
17301741
select_statement_rule_id,
17311742
});
17321743

1733-
cache_native_grammar(grammar_zval, Arc::clone(&grammar))?;
1744+
GRAMMAR_CACHE
1745+
.get_or_init(|| Mutex::new(HashMap::new()))
1746+
.lock()
1747+
.map_err(|_| php_error("Grammar cache lock poisoned"))?
1748+
.insert(cache_key, Arc::clone(&grammar));
17341749

17351750
Ok(grammar)
17361751
}
17371752

1738-
fn cached_native_grammar(grammar: &Zval) -> PhpResult<Option<Arc<Grammar>>> {
1739-
let object = grammar
1740-
.object()
1741-
.ok_or_else(|| php_error("Parser grammar must be an object"))?;
1742-
let properties = object.get_properties().map_err(php_error)?;
1743-
let Some(native_grammar) = properties.get("native_grammar") else {
1744-
return Ok(None);
1745-
};
1746-
let Some(native_grammar) = <&WpMySqlNativeGrammar as FromZval>::from_zval(native_grammar)
1747-
else {
1748-
return Ok(None);
1749-
};
1753+
fn grammar_cache_key(array: &ZendHashTable) -> PhpResult<GrammarCacheKey> {
1754+
let mut hasher = GrammarCacheHasher::new();
1755+
hash_grammar_array(&mut hasher, array)?;
1756+
Ok(hasher.finish())
1757+
}
17501758

1751-
Ok(Some(Arc::clone(&native_grammar.grammar)))
1759+
struct GrammarCacheHasher {
1760+
low: u64,
1761+
high: u64,
17521762
}
17531763

1754-
fn cache_native_grammar(grammar_zval: &mut Zval, grammar: Arc<Grammar>) -> PhpResult<()> {
1755-
let object = grammar_zval
1756-
.object_mut()
1757-
.ok_or_else(|| php_error("Parser grammar must be an object"))?;
1758-
let native_grammar = WpMySqlNativeGrammar { grammar }
1759-
.into_zval(false)
1760-
.map_err(php_error)?;
1761-
object
1762-
.set_property("native_grammar", native_grammar)
1763-
.map_err(php_error)
1764+
impl GrammarCacheHasher {
1765+
fn new() -> Self {
1766+
Self {
1767+
low: 0xcbf29ce484222325,
1768+
high: 0x6c62272e07bb0142,
1769+
}
1770+
}
1771+
1772+
fn write_byte(&mut self, byte: u8) {
1773+
self.low ^= u64::from(byte);
1774+
self.low = self.low.wrapping_mul(0x100000001b3);
1775+
self.high ^= u64::from(byte).rotate_left(5);
1776+
self.high = self.high.wrapping_mul(0x100000001b3 ^ 0x9e3779b97f4a7c15);
1777+
}
1778+
1779+
fn write_bytes(&mut self, bytes: &[u8]) {
1780+
self.write_usize(bytes.len());
1781+
for byte in bytes {
1782+
self.write_byte(*byte);
1783+
}
1784+
}
1785+
1786+
fn write_i64(&mut self, value: i64) {
1787+
for byte in value.to_le_bytes() {
1788+
self.write_byte(byte);
1789+
}
1790+
}
1791+
1792+
fn write_u64(&mut self, value: u64) {
1793+
for byte in value.to_le_bytes() {
1794+
self.write_byte(byte);
1795+
}
1796+
}
1797+
1798+
fn write_usize(&mut self, value: usize) {
1799+
self.write_u64(value as u64);
1800+
}
1801+
1802+
fn finish(self) -> GrammarCacheKey {
1803+
GrammarCacheKey {
1804+
low: self.low,
1805+
high: self.high,
1806+
}
1807+
}
1808+
}
1809+
1810+
fn hash_grammar_array(hasher: &mut GrammarCacheHasher, array: &ZendHashTable) -> PhpResult<()> {
1811+
hasher.write_usize(array.len());
1812+
for (key, value) in array {
1813+
hash_grammar_array_key(hasher, key);
1814+
hash_grammar_zval(hasher, value)?;
1815+
}
1816+
Ok(())
1817+
}
1818+
1819+
fn hash_grammar_zval(hasher: &mut GrammarCacheHasher, zval: &Zval) -> PhpResult<()> {
1820+
let zval = zval.dereference();
1821+
match zval.get_type() {
1822+
DataType::Null => hasher.write_byte(0),
1823+
DataType::False => hasher.write_byte(1),
1824+
DataType::True => hasher.write_byte(2),
1825+
DataType::Long => {
1826+
hasher.write_byte(3);
1827+
hasher.write_i64(
1828+
zval.long()
1829+
.ok_or_else(|| php_error("Grammar integer value is invalid"))?,
1830+
);
1831+
}
1832+
DataType::String => {
1833+
hasher.write_byte(4);
1834+
hasher.write_bytes(
1835+
zval.str()
1836+
.ok_or_else(|| php_error("Grammar string value is invalid"))?
1837+
.as_bytes(),
1838+
);
1839+
}
1840+
DataType::Array => {
1841+
hasher.write_byte(5);
1842+
let array = zval
1843+
.array()
1844+
.ok_or_else(|| php_error("Grammar array value is invalid"))?;
1845+
hash_grammar_array(hasher, array)?;
1846+
}
1847+
_ => return Err(php_error("Unsupported grammar cache value")),
1848+
}
1849+
1850+
Ok(())
1851+
}
1852+
1853+
fn hash_grammar_array_key(hasher: &mut GrammarCacheHasher, key: ArrayKey<'_>) {
1854+
match key {
1855+
ArrayKey::Long(value) => {
1856+
hasher.write_byte(0);
1857+
hasher.write_i64(value);
1858+
}
1859+
ArrayKey::String(value) => {
1860+
hasher.write_byte(1);
1861+
hasher.write_bytes(value.as_bytes());
1862+
}
1863+
ArrayKey::Str(value) => {
1864+
hasher.write_byte(1);
1865+
hasher.write_bytes(value.as_bytes());
1866+
}
1867+
}
17641868
}
17651869

17661870
fn export_tokens(tokens: &mut Zval) -> PhpResult<(ParserTokenSource, Vec<i64>)> {
@@ -1914,7 +2018,6 @@ extern "C" fn php_module_info(_module: *mut ModuleEntry) {
19142018
#[php_module]
19152019
pub fn get_module(module: ModuleBuilder) -> ModuleBuilder {
19162020
module
1917-
.class::<WpMySqlNativeGrammar>()
19182021
.class::<WpMySqlNativeAst>()
19192022
.class::<WpMySqlNativeTokenStream>()
19202023
.class::<WpMySqlNativeLexer>()

packages/mysql-on-sqlite/src/mysql/mysql-rust-bridge.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
*/
1414
function wp_sqlite_mysql_native_export_grammar( WP_Parser_Grammar $grammar ): array {
1515
return array(
16-
'highest_terminal_id' => $grammar->get_highest_terminal_id(),
17-
'rules' => $grammar->get_rules(),
18-
'lookahead_is_match_possible' => $grammar->get_lookahead_is_match_possible(),
19-
'rule_names' => $grammar->get_rule_names(),
20-
'fragment_ids' => $grammar->get_fragment_ids(),
16+
'highest_terminal_id' => $grammar->highest_terminal_id,
17+
'rules' => $grammar->rules,
18+
'lookahead_is_match_possible' => $grammar->lookahead_is_match_possible,
19+
'rule_names' => $grammar->rule_names,
20+
'fragment_ids' => $grammar->fragment_ids,
2121
);
2222
}

packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php

Lines changed: 6 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -26,48 +26,17 @@ class WP_Parser_Grammar {
2626
/**
2727
* @TODO: Review and document these properties and their visibility.
2828
*/
29-
private $rules = array();
30-
private $rule_names = array();
31-
private $fragment_ids = array();
32-
private $lookahead_is_match_possible = array();
33-
private $lowest_non_terminal_id;
34-
private $highest_terminal_id;
35-
public $native_grammar;
29+
public $rules;
30+
public $rule_names;
31+
public $fragment_ids;
32+
public $lookahead_is_match_possible = array();
33+
public $lowest_non_terminal_id;
34+
public $highest_terminal_id;
3635

3736
public function __construct( array $rules ) {
3837
$this->inflate( $rules );
3938
}
4039

41-
public function __get( $name ) {
42-
if ( $this->is_grammar_property( $name ) ) {
43-
return $this->$name;
44-
}
45-
46-
trigger_error( 'Undefined property: ' . __CLASS__ . '::$' . $name, E_USER_NOTICE );
47-
return null;
48-
}
49-
50-
public function __isset( $name ) {
51-
return $this->is_grammar_property( $name ) && isset( $this->$name );
52-
}
53-
54-
public function __set( $name, $value ) {
55-
if ( $this->is_grammar_property( $name ) ) {
56-
$this->$name = $value;
57-
$this->native_grammar = null;
58-
return;
59-
}
60-
61-
trigger_error( 'Undefined property: ' . __CLASS__ . '::$' . $name, E_USER_NOTICE );
62-
}
63-
64-
public function __unset( $name ) {
65-
if ( $this->is_grammar_property( $name ) ) {
66-
unset( $this->$name );
67-
$this->native_grammar = null;
68-
}
69-
}
70-
7140
public function get_rule_name( $rule_id ) {
7241
return $this->rule_names[ $rule_id ];
7342
}
@@ -76,41 +45,6 @@ public function get_rule_id( $rule_name ) {
7645
return array_search( $rule_name, $this->rule_names, true );
7746
}
7847

79-
public function get_rules() {
80-
return $this->rules;
81-
}
82-
83-
public function get_rule_names() {
84-
return $this->rule_names;
85-
}
86-
87-
public function get_fragment_ids() {
88-
return $this->fragment_ids;
89-
}
90-
91-
public function get_lookahead_is_match_possible() {
92-
return $this->lookahead_is_match_possible;
93-
}
94-
95-
public function get_highest_terminal_id() {
96-
return $this->highest_terminal_id;
97-
}
98-
99-
private function is_grammar_property( $name ) {
100-
return in_array(
101-
$name,
102-
array(
103-
'rules',
104-
'rule_names',
105-
'fragment_ids',
106-
'lookahead_is_match_possible',
107-
'lowest_non_terminal_id',
108-
'highest_terminal_id',
109-
),
110-
true
111-
);
112-
}
113-
11448
/**
11549
* Inflate the grammar to an internal representation optimized for parsing.
11650
*

0 commit comments

Comments
 (0)