Skip to content

Commit 2d34613

Browse files
committed
Use depth-first pre-order NLR traversal in node selector methods
This traversal produces a natural ordering that corresponds to the original parsed input. It also aligns with CSS selector behavior. This is useful in situations when we need to reconstract or retrieve a fragment of the original input with tokens in the correct order.
1 parent e49c578 commit 2d34613

3 files changed

Lines changed: 167 additions & 71 deletions

File tree

phpunit.xml.dist

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
<directory suffix=".php">tests/</directory>
1818
<!-- Exclude test tools. -->
1919
<exclude>tests/tools</exclude>
20-
<!-- Exclude new parser tests for now. -->
21-
<exclude>tests/parser</exclude>
2220
</testsuite>
2321
</testsuites>
2422
</phpunit>

tests/parser/WP_Parser_Node_Tests.php

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,21 @@ public function testEmptyChildren(): void {
3535
}
3636

3737
public function testNodeTree(): void {
38+
$input = 'SELECT 1 + 2, 2';
39+
3840
// Prepare nodes and tokens.
3941
$root = new WP_Parser_Node( 1, 'root' );
4042
$n_keyword = new WP_Parser_Node( 2, 'keyword' );
4143
$n_expr_a = new WP_Parser_Node( 3, 'expr' );
4244
$n_expr_b = new WP_Parser_Node( 3, 'expr' );
4345
$n_expr_c = new WP_Parser_Node( 3, 'expr' );
44-
$t_select = new WP_Parser_Token( 100, 'SELECT' );
45-
$t_comma = new WP_Parser_Token( 200, ',' );
46-
$t_plus = new WP_Parser_Token( 300, '+' );
47-
$t_one = new WP_Parser_Token( 400, '1' );
48-
$t_two_a = new WP_Parser_Token( 400, '2' );
49-
$t_two_b = new WP_Parser_Token( 400, '2' );
50-
$t_eof = new WP_Parser_Token( 500, '' );
46+
$t_select = new WP_Parser_Token( 100, 0, 6, $input );
47+
$t_comma = new WP_Parser_Token( 200, 12, 1, $input );
48+
$t_plus = new WP_Parser_Token( 300, 9, 1, $input );
49+
$t_one = new WP_Parser_Token( 400, 7, 1, $input );
50+
$t_two_a = new WP_Parser_Token( 400, 11, 1, $input );
51+
$t_two_b = new WP_Parser_Token( 400, 14, 1, $input );
52+
$t_eof = new WP_Parser_Token( 500, 15, 0, $input );
5153

5254
// Prepare a tree.
5355
//
@@ -102,38 +104,36 @@ public function testNodeTree(): void {
102104
$this->assertSame( array(), $root->get_child_tokens( 100 ) );
103105

104106
// Test single descendant methods.
105-
// @TODO: Consider breadth-first search vs depth-first search.
106107
$this->assertSame( $n_keyword, $root->get_first_descendant_node() );
107108
$this->assertSame( $n_expr_a, $root->get_first_descendant_node( 'expr' ) );
108109
$this->assertSame( null, $root->get_first_descendant_node( 'root' ) );
109-
$this->assertSame( $t_comma, $root->get_first_descendant_token() );
110+
$this->assertSame( $t_select, $root->get_first_descendant_token() );
110111
$this->assertSame( $t_one, $root->get_first_descendant_token( 400 ) );
111112
$this->assertSame( null, $root->get_first_descendant_token( 123 ) );
112113

113114
// Test multiple descendant methods.
114-
// @TODO: Consider breadth-first search vs depth-first search.
115115
$this->assertSame(
116-
array( $n_keyword, $n_expr_a, $t_comma, $n_expr_b, $t_eof, $t_select, $t_one, $t_plus, $n_expr_c, $t_two_a, $t_two_b ),
116+
array( $n_keyword, $t_select, $n_expr_a, $t_one, $t_plus, $n_expr_c, $t_two_b, $t_comma, $n_expr_b, $t_two_a, $t_eof ),
117117
$root->get_descendants()
118118
);
119119
$this->assertSame(
120-
array( $n_keyword, $n_expr_a, $n_expr_b, $n_expr_c ),
120+
array( $n_keyword, $n_expr_a, $n_expr_c, $n_expr_b ),
121121
$root->get_descendant_nodes()
122122
);
123123
$this->assertSame(
124-
array( $n_expr_a, $n_expr_b, $n_expr_c ),
124+
array( $n_expr_a, $n_expr_c, $n_expr_b ),
125125
$root->get_descendant_nodes( 'expr' )
126126
);
127127
$this->assertSame(
128128
array(),
129129
$root->get_descendant_nodes( 'root' )
130130
);
131131
$this->assertSame(
132-
array( $t_comma, $t_eof, $t_select, $t_one, $t_plus, $t_two_a, $t_two_b ),
132+
array( $t_select, $t_one, $t_plus, $t_two_b, $t_comma, $t_two_a, $t_eof ),
133133
$root->get_descendant_tokens()
134134
);
135135
$this->assertSame(
136-
array( $t_one, $t_two_a, $t_two_b ),
136+
array( $t_one, $t_two_b, $t_two_a ),
137137
$root->get_descendant_tokens( 400 )
138138
);
139139
$this->assertSame(

wp-includes/parser/class-wp-parser-node.php

Lines changed: 152 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,21 @@ public function merge_fragment( $node ) {
102102
$this->children = array_merge( $this->children, $node->children );
103103
}
104104

105+
/**
106+
* Check if this node has any child nodes or tokens.
107+
*
108+
* @return bool True if this node has any child nodes or tokens, false otherwise.
109+
*/
105110
public function has_child(): bool {
106111
return count( $this->children ) > 0;
107112
}
108113

114+
/**
115+
* Check if this node has any child nodes.
116+
*
117+
* @param string|null $rule_name Optional. A node rule name to check for.
118+
* @return bool True if any child nodes are found, false otherwise.
119+
*/
109120
public function has_child_node( ?string $rule_name = null ): bool {
110121
foreach ( $this->children as $child ) {
111122
if (
@@ -118,6 +129,12 @@ public function has_child_node( ?string $rule_name = null ): bool {
118129
return false;
119130
}
120131

132+
/**
133+
* Check if this node has any child tokens.
134+
*
135+
* @param int|null $token_id Optional. A token ID to check for.
136+
* @return bool True if any child tokens are found, false otherwise.
137+
*/
121138
public function has_child_token( ?int $token_id = null ): bool {
122139
foreach ( $this->children as $child ) {
123140
if (
@@ -130,11 +147,22 @@ public function has_child_token( ?int $token_id = null ): bool {
130147
return false;
131148
}
132149

133-
150+
/**
151+
* Get the first child node or token of this node.
152+
*
153+
* @return WP_Parser_Node|WP_Parser_Token|null The first child node or token;
154+
* null when no children are found.
155+
*/
134156
public function get_first_child() {
135157
return $this->children[0] ?? null;
136158
}
137159

160+
/**
161+
* Get the first child node of this node.
162+
*
163+
* @param string|null $rule_name Optional. A node rule name to check for.
164+
* @return WP_Parser_Node|null The first matching child node; null when no children are found.
165+
*/
138166
public function get_first_child_node( ?string $rule_name = null ): ?WP_Parser_Node {
139167
foreach ( $this->children as $child ) {
140168
if (
@@ -147,6 +175,12 @@ public function get_first_child_node( ?string $rule_name = null ): ?WP_Parser_No
147175
return null;
148176
}
149177

178+
/**
179+
* Get the first child token of this node.
180+
*
181+
* @param int|null $token_id Optional. A token ID to check for.
182+
* @return WP_Parser_Token|null The first matching child token; null when no children are found.
183+
*/
150184
public function get_first_child_token( ?int $token_id = null ): ?WP_Parser_Token {
151185
foreach ( $this->children as $child ) {
152186
if (
@@ -159,42 +193,73 @@ public function get_first_child_token( ?int $token_id = null ): ?WP_Parser_Token
159193
return null;
160194
}
161195

196+
/**
197+
* Get the first descendant node of this node.
198+
*
199+
* The node children are traversed recursively in a depth-first order until
200+
* a matching descendant node is found, or the entire subtree is searched.
201+
*
202+
* @param string|null $rule_name Optional. A node rule name to check for.
203+
* @return WP_Parser_Node|null The first matching descendant node; null when no descendants are found.
204+
*/
162205
public function get_first_descendant_node( ?string $rule_name = null ): ?WP_Parser_Node {
163-
$nodes = array( $this );
164-
while ( count( $nodes ) ) {
165-
$node = array_shift( $nodes );
166-
$child = $node->get_first_child_node( $rule_name );
167-
if ( $child ) {
206+
for ( $i = 0; $i < count( $this->children ); $i++ ) {
207+
$child = $this->children[ $i ];
208+
if ( ! $child instanceof WP_Parser_Node ) {
209+
continue;
210+
}
211+
if ( null === $rule_name || $child->rule_name === $rule_name ) {
168212
return $child;
169213
}
170-
$children = $node->get_child_nodes();
171-
if ( count( $children ) > 0 ) {
172-
array_push( $nodes, ...$children );
214+
$node = $child->get_first_descendant_node( $rule_name );
215+
if ( $node ) {
216+
return $node;
173217
}
174218
}
175219
return null;
176220
}
177221

222+
/**
223+
* Get the first descendant token of this node.
224+
*
225+
* The node children are traversed recursively in a depth-first order until
226+
* a matching descendant token is found, or the entire subtree is searched.
227+
*
228+
* @param int|null $token_id Optional. A token ID to check for.
229+
* @return WP_Parser_Token|null The first matching descendant token; null when no descendants are found.
230+
*/
178231
public function get_first_descendant_token( ?int $token_id = null ): ?WP_Parser_Token {
179-
$nodes = array( $this );
180-
while ( count( $nodes ) ) {
181-
$node = array_shift( $nodes );
182-
$child = $node->get_first_child_token( $token_id );
183-
if ( $child ) {
184-
return $child;
185-
}
186-
$children = $node->get_child_nodes();
187-
if ( count( $children ) > 0 ) {
188-
array_push( $nodes, ...$children );
232+
for ( $i = 0; $i < count( $this->children ); $i++ ) {
233+
$child = $this->children[ $i ];
234+
if ( $child instanceof WP_Parser_Token ) {
235+
if ( null === $token_id || $child->id === $token_id ) {
236+
return $child;
237+
}
238+
} else {
239+
$token = $child->get_first_descendant_token( $token_id );
240+
if ( $token ) {
241+
return $token;
242+
}
189243
}
190244
}
191245
return null;
192246
}
193247

248+
/**
249+
* Get all children of this node.
250+
*
251+
* @return array<WP_Parser_Node|WP_Parser_Token> An array of all child nodes and tokens of this node.
252+
*/
194253
public function get_children(): array {
195254
return $this->children;
196255
}
197256

257+
/**
258+
* Get all child nodes of this node.
259+
*
260+
* @param string|null $rule_name Optional. A node rule name to check for.
261+
* @return WP_Parser_Node[] An array of all matching child nodes.
262+
*/
198263
public function get_child_nodes( ?string $rule_name = null ): array {
199264
$nodes = array();
200265
foreach ( $this->children as $child ) {
@@ -208,6 +273,12 @@ public function get_child_nodes( ?string $rule_name = null ): array {
208273
return $nodes;
209274
}
210275

276+
/**
277+
* Get all child tokens of this node.
278+
*
279+
* @param int|null $token_id Optional. A token ID to check for.
280+
* @return WP_Parser_Token[] An array of all matching child tokens.
281+
*/
211282
public function get_child_tokens( ?int $token_id = null ): array {
212283
$tokens = array();
213284
foreach ( $this->children as $child ) {
@@ -221,67 +292,94 @@ public function get_child_tokens( ?int $token_id = null ): array {
221292
return $tokens;
222293
}
223294

295+
/**
296+
* Get all descendants of this node.
297+
*
298+
* The descendants are collected using a depth-first pre-order NLR traversal.
299+
* This produces a natural ordering that corresponds to the original input.
300+
*
301+
* @return array<WP_Parser_Node|WP_Parser_Token> An array of all descendant nodes and tokens of this node.
302+
*/
224303
public function get_descendants(): array {
225-
$nodes = array( $this );
226-
$all_descendants = array();
227-
while ( count( $nodes ) ) {
228-
$node = array_shift( $nodes );
229-
$all_descendants = array_merge( $all_descendants, $node->get_children() );
230-
$children = $node->get_child_nodes();
231-
if ( count( $children ) > 0 ) {
232-
array_push( $nodes, ...$children );
304+
$descendants = array();
305+
foreach ( $this->children as $child ) {
306+
if ( $child instanceof WP_Parser_Node ) {
307+
$descendants[] = $child;
308+
$descendants = array_merge( $descendants, $child->get_descendants() );
309+
} else {
310+
$descendants[] = $child;
233311
}
234312
}
235-
return $all_descendants;
313+
return $descendants;
236314
}
237315

316+
/**
317+
* Get all descendant nodes of this node.
318+
*
319+
* The descendants are collected using a depth-first pre-order NLR traversal.
320+
* This produces a natural ordering that corresponds to the original input.
321+
* All matching nodes are collected during the traversal.
322+
*
323+
* @param string|null $rule_name Optional. A node rule name to check for.
324+
* @return WP_Parser_Node[] An array of all matching descendant nodes.
325+
*/
238326
public function get_descendant_nodes( ?string $rule_name = null ): array {
239-
$nodes = array( $this );
240-
$all_descendants = array();
241-
while ( count( $nodes ) ) {
242-
$node = array_shift( $nodes );
243-
$all_descendants = array_merge( $all_descendants, $node->get_child_nodes( $rule_name ) );
244-
$children = $node->get_child_nodes();
245-
if ( count( $children ) > 0 ) {
246-
array_push( $nodes, ...$children );
327+
$nodes = array();
328+
foreach ( $this->children as $child ) {
329+
if ( ! $child instanceof WP_Parser_Node ) {
330+
continue;
331+
}
332+
if ( null === $rule_name || $child->rule_name === $rule_name ) {
333+
$nodes[] = $child;
247334
}
335+
$nodes = array_merge( $nodes, $child->get_descendant_nodes( $rule_name ) );
248336
}
249-
return $all_descendants;
337+
return $nodes;
250338
}
251339

340+
/**
341+
* Get all descendant tokens of this node.
342+
*
343+
* The descendants are collected using a depth-first pre-order NLR traversal.
344+
* This produces a natural ordering that corresponds to the original input.
345+
* All matching tokens are collected during the traversal.
346+
*
347+
* @param int|null $token_id Optional. A token ID to check for.
348+
* @return WP_Parser_Token[] An array of all matching descendant tokens.
349+
*/
252350
public function get_descendant_tokens( ?int $token_id = null ): array {
253-
$nodes = array( $this );
254-
$all_descendants = array();
255-
while ( count( $nodes ) ) {
256-
$node = array_shift( $nodes );
257-
$all_descendants = array_merge( $all_descendants, $node->get_child_tokens( $token_id ) );
258-
$children = $node->get_child_nodes();
259-
if ( count( $children ) > 0 ) {
260-
array_push( $nodes, ...$children );
351+
$tokens = array();
352+
foreach ( $this->children as $child ) {
353+
if ( $child instanceof WP_Parser_Token ) {
354+
if ( null === $token_id || $child->id === $token_id ) {
355+
$tokens[] = $child;
356+
}
357+
} else {
358+
$tokens = array_merge( $tokens, $child->get_descendant_tokens( $token_id ) );
261359
}
262360
}
263-
return $all_descendants;
361+
return $tokens;
264362
}
265363

266364
/**
267-
* Get the byte offset in the input SQL string where this node begins.
365+
* Get the byte offset in the input string where this node begins.
268366
*
269-
* @return int
367+
* @return int The byte offset in the input string where this node begins.
270368
*/
271369
public function get_start(): int {
272370
return $this->get_first_descendant_token()->start;
273371
}
274372

275373
/**
276-
* Get the byte length of this node in the input SQL string.
374+
* Get the byte length of this node in the input string.
277375
*
278-
* @return int
376+
* @return int The byte length of this node in the input string.
279377
*/
280378
public function get_length(): int {
281-
$tokens = $this->get_descendant_tokens();
282-
$last_token = end( $tokens );
283-
$start = $this->get_start();
284-
return $last_token->start + $last_token->length - $start;
379+
$tokens = $this->get_descendant_tokens();
380+
$first_token = $tokens[0];
381+
$last_token = $tokens[ count( $tokens ) - 1 ];
382+
return $last_token->start + $last_token->length - $first_token->start;
285383
}
286384

287385
/*

0 commit comments

Comments
 (0)