From 95dd69439c79af3c086086a279c7f022c15a6c14 Mon Sep 17 00:00:00 2001 From: Matheus Martins Date: Sat, 13 Jun 2026 08:39:21 +0000 Subject: [PATCH] fix(semantic-tokens): split multiline tokens one per line The LSP spec requires that semantic tokens do not span line boundaries. Block comments (/* */ and /** */) were emitted as a single token covering the full text length, so clients clamped the token to the first line and left continuation lines un-highlighted. Introduce AstVisitor::emitSplitting() which detects newlines in the raw token text and delegates to emit() once per physical line with a line-scoped length. Single-line tokens take the existing fast path unchanged. Handles \r\n line endings by stripping the trailing \r before computing each line's length. Add unit tests asserting per-line coverage and the general no-token-spans- multiple-lines invariant, plus a Behat scenario that exercises the fix end-to-end through the live LSP server. Co-Authored-By: Claude Sonnet 4.6 --- features/understand/semantic_tokens.feature | 14 ++++++ src/Handler/SemanticTokens/AstVisitor.php | 27 ++++++++++- .../Handler/SemanticTokens/AstVisitorTest.php | 48 +++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) diff --git a/features/understand/semantic_tokens.feature b/features/understand/semantic_tokens.feature index c40fd88..6533432 100644 --- a/features/understand/semantic_tokens.feature +++ b/features/understand/semantic_tokens.feature @@ -39,6 +39,20 @@ Feature: Semantic tokens Then a "typeParameter" token covers "int" in "/turbofish.xphp" And a "typeParameter" token covers "User" in "/turbofish.xphp" + Scenario: Multiline block comment highlights on every physical line + Given the file at "/doc.xphp" contains the following lines: + """ + emit($out, $token->pos, strlen($token->text), $type); + $this->emitSplitting($out, $token->pos, $token->text, $type); } } @@ -567,6 +567,31 @@ private static function classLikeType(ClassLike $node): string * * @param list $out */ + /** + * Emit one token per physical line covered by $text, starting at + * $originalOffset. Required by the LSP spec: "Tokens cannot … span + * multiple lines." Single-line tokens take the fast path. + * + * @param list $out + */ + private function emitSplitting(array &$out, int $originalOffset, string $text, string $type, array $modifiers = []): void + { + if (!str_contains($text, "\n")) { + $this->emit($out, $originalOffset, strlen($text), $type, $modifiers); + return; + } + + $offset = $originalOffset; + foreach (explode("\n", $text) as $segment) { + // Strip a trailing \r so \r\n line endings don't inflate the length. + $visibleLen = strlen(rtrim($segment, "\r")); + if ($visibleLen > 0) { + $this->emit($out, $offset, $visibleLen, $type, $modifiers); + } + $offset += strlen($segment) + 1; // +1 for the consumed \n + } + } + public function emit(array &$out, int $originalOffset, int $length, string $type, array $modifiers = []): void { if ($length <= 0) { diff --git a/test/Handler/SemanticTokens/AstVisitorTest.php b/test/Handler/SemanticTokens/AstVisitorTest.php index 1ac88f0..18f6cc9 100644 --- a/test/Handler/SemanticTokens/AstVisitorTest.php +++ b/test/Handler/SemanticTokens/AstVisitorTest.php @@ -99,6 +99,54 @@ public function testDocCommentIsClassifiedAsComment(): void $this->assertTokenSubstring($specs, $source, '/** doc */', 'comment'); } + public function testMultilineBlockCommentEmitsTokenOnEachLine(): void + { + // LSP spec: tokens cannot span line boundaries. A three-line docblock + // must produce one comment token per physical line. + $source = "collect($source); + + $commentLines = array_values(array_unique(array_map( + fn (TokenSpec $s) => $s->line, + array_filter($specs, fn (TokenSpec $s) => $s->type === 'comment'), + ))); + sort($commentLines); + self::assertSame( + [1, 2, 3], + $commentLines, + 'each physical line of the docblock must carry a comment token', + ); + } + + public function testNoTokenSpansMultipleLines(): void + { + // General LSP invariant: no emitted token may have a length that + // carries past the end of its own line. + $source = "collect($source); + $lines = explode("\n", $source); + + foreach ($specs as $spec) { + $lineContent = $lines[$spec->line] ?? ''; + // UTF-16 length of the content from startChar to end of line. + $lineFromStart = substr($lineContent, $spec->startChar); + $maxLen = PositionMap::lengthInUtf16($lineFromStart); + self::assertLessThanOrEqual( + $maxLen, + $spec->length, + sprintf( + 'token %s at L%d C%d has length %d which extends past line end (max %d): %s', + $spec->type, + $spec->line, + $spec->startChar, + $spec->length, + $maxLen, + json_encode($lineContent), + ), + ); + } + } + // --- Pass 2: AST ------------------------------------------------------- public function testClassNameIsClassified(): void