From 82ee54e7265f0fe875c72cb32e1a0d26ab150821 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 13 May 2026 15:23:36 +0200 Subject: [PATCH 1/6] =?UTF-8?q?fix:=20FindBin::libs=20=E2=80=94=20jar=20li?= =?UTF-8?q?b=20decoding,=20Symbol::qualify=5Fto=5Fref,=20ref-to-glob=20con?= =?UTF-8?q?text?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Load jar:PERL5LIB modules via raw bytes and FileUtils.decodePerlSourceBytes after CompilerOptions exists (matches filesystem BOM/CRLF handling). - Native Symbol::qualify_to_ref: qualify one-arg via caller()+two-arg qualify; return GlobalVariable.getGlobalIO-backed globs instead of orphan RuntimeGlob refs. - EmitOperator: evaluate \*{...} operand in scalar context (comma/ternary inside braces). - Parser: *{EXPR} as code; unary prototype indirect suppression; * { brace whitespace. - Test::More: use_ok string-eval visibility for \@imports under PerlOnJava. - Add unit test symbol_star_brace_qualify_to_ref.t. Verified: make; timeout 600 ./jcpan -t FindBin::libs. Generated with [Cursor](https://cursor.com/docs) Co-Authored-By: Cursor Co-authored-by: Cursor --- .../perlonjava/backend/jvm/EmitOperator.java | 4 ++ .../frontend/parser/ListParser.java | 15 +++- .../frontend/parser/PrototypeArgs.java | 62 +++++++++++++++++ .../frontend/parser/SubroutineParser.java | 68 ++++++++++++++++++- .../perlonjava/frontend/parser/Variable.java | 29 +++++++- .../runtime/operators/ModuleOperators.java | 19 +++--- .../perlonjava/runtime/perlmodule/Symbol.java | 24 ++++--- .../runtime/runtimetypes/FileUtils.java | 26 +++++-- src/main/perl/lib/Test/More.pm | 14 +++- .../unit/symbol_star_brace_qualify_to_ref.t | 35 ++++++++++ 10 files changed, 264 insertions(+), 32 deletions(-) create mode 100644 src/test/resources/unit/symbol_star_brace_qualify_to_ref.t diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java b/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java index 2ead729e4..ec263bbf1 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java @@ -1767,6 +1767,10 @@ static void handleCreateReference(EmitterVisitor emitterVisitor, OperatorNode no } else if (node.operand instanceof OperatorNode op && op.operator.equals("$")) { // Scalar variable - use SCALAR context contextType = RuntimeContextType.SCALAR; + } else if (node.operand instanceof OperatorNode op && op.operator.equals("*")) { + // *{EXPR} — EXPR is evaluated in scalar context (e.g. Symbol::qualify_to_ref's + // \*{ qualify $_[0], ... }). LIST context breaks the comma/ternary inside braces. + contextType = RuntimeContextType.SCALAR; } node.operand.accept(emitterVisitor.with(contextType)); diff --git a/src/main/java/org/perlonjava/frontend/parser/ListParser.java b/src/main/java/org/perlonjava/frontend/parser/ListParser.java index 82b2180be..534a8fe3a 100644 --- a/src/main/java/org/perlonjava/frontend/parser/ListParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/ListParser.java @@ -60,6 +60,10 @@ static ListNode parseZeroOrOneList(Parser parser, int minItems) { // No argument // 'isa' when enabled as a feature is an infix operator, not a bareword argument expr = new ListNode(parser.tokenIndex); + } else if (token.text.equals("?")) { + // `defined ? expr : expr` (zero-arg defined uses $_), `rand ? expr : expr`, etc. + // Do not parse the ternary `?` as the unary operator's optional operand. + expr = new ListNode(parser.tokenIndex); } else { // Argument without parentheses expr = ListNode.makeList(parser.parseExpression(parser.getPrecedence("isa") + 1)); @@ -328,7 +332,9 @@ public static boolean looksLikeEmptyList(Parser parser) { List savedHeredocNodes = ParseHeredoc.saveHeredocState(parser); LexerToken token = TokenUtils.consume(parser); - LexerToken token1 = parser.tokens.get(parser.tokenIndex); // Next token including spaces + LexerToken token1 = parser.tokenIndex < parser.tokens.size() + ? parser.tokens.get(parser.tokenIndex) + : new LexerToken(LexerTokenType.EOF, ""); LexerToken nextToken = TokenUtils.peek(parser); // After spaces // Check if this is a list terminator, but we need to restore position for the check @@ -385,7 +391,12 @@ public static boolean looksLikeEmptyList(Parser parser) { if (CompilerOptions.DEBUG_ENABLED) parser.ctx.logDebug("parseZeroOrMoreList looks like regex"); } else { // Subroutine call with zero arguments, followed by infix operator: `pos = 3` - if (CompilerOptions.DEBUG_ENABLED) parser.ctx.logDebug("parseZeroOrMoreList return zero at `" + parser.tokens.get(parser.tokenIndex) + "`"); + if (CompilerOptions.DEBUG_ENABLED) { + String dbgTok = parser.tokenIndex < parser.tokens.size() + ? String.valueOf(parser.tokens.get(parser.tokenIndex)) + : "EOF"; + parser.ctx.logDebug("parseZeroOrMoreList return zero at `" + dbgTok + "`"); + } // if (LVALUE_INFIX_OP.contains(token.text)) { // throw new PerlCompilerException(tokenIndex, "Can't modify non-lvalue subroutine call", ctx.errorUtil); // } diff --git a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java index 4fa3a4302..c48f75af5 100644 --- a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java +++ b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java @@ -415,6 +415,12 @@ private static void parsePrototypeArguments(Parser parser, ListNode args, String parser.throwError("syntax error"); } + // Builtin Perl parsing (no parentheses): `symlink qw(a b)`, `atan2 qw(1 2)`, etc. + // One parenthesis-free list literal fills successive leading `$` / `_` prototype slots. + if (tryConsumeParenFreeWordListForLeadingScalars(parser, args, prototype, hasParentheses)) { + return; + } + // If prototype starts with ';' and we're at a terminator or single comma, all arguments are optional if (prototype.startsWith(";") && (isArgumentTerminator(parser) || isComma(TokenUtils.peek(parser)))) { return; @@ -470,6 +476,62 @@ private static void parsePrototypeArguments(Parser parser, ListNode args, String } } + /** + * Perl builtins accept {@code symlink qw(a b)} without commas: a single qw list fills successive + * leading scalar prototype slots (see {@code perl -MO=Deparse -e 'symlink qw(/x /y)'}). + */ + private static boolean tryConsumeParenFreeWordListForLeadingScalars( + Parser parser, ListNode args, String prototype, boolean hasParentheses) { + if (hasParentheses || prototype == null || prototype.isEmpty()) { + return false; + } + int slots = countLeadingScalarPrototypeSlots(prototype); + if (slots < 2) { + return false; + } + int saved = parser.tokenIndex; + Node expr = parser.parseExpression(parser.getPrecedence(",")); + if (expr instanceof ListNode ln + && ln.elements.size() == slots + && isPlainStringWordList(ln)) { + for (Node word : ln.elements) { + Node scalarArg = ParserNodeUtils.toScalarContext(word); + copyArgumentStartIndex(word, scalarArg); + scalarArg.setAnnotation("context", "SCALAR"); + args.elements.add(scalarArg); + } + return true; + } + parser.tokenIndex = saved; + return false; + } + + private static int countLeadingScalarPrototypeSlots(String prototype) { + int count = 0; + for (int i = 0; i < prototype.length(); i++) { + char c = prototype.charAt(i); + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { + continue; + } + if (c == '$' || c == '_') { + count++; + } else { + break; + } + } + return count; + } + + /** True if every element is a {@link StringNode} (parenthesis-free qw word list). */ + private static boolean isPlainStringWordList(ListNode ln) { + for (Node n : ln.elements) { + if (!(n instanceof StringNode)) { + return false; + } + } + return true; + } + /** * Parses an argument with optional comma handling. * diff --git a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java index 4b0379889..c6cd334bf 100644 --- a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java @@ -212,7 +212,8 @@ static Node parseSubroutineCall(Parser parser, boolean isMethod) { // idiom is recognised (parses as `Error::Simple->catch(with {...})`). if (peek(parser).type == LexerTokenType.IDENTIFIER && isValidIndirectMethod(subName, parser) - && !prototypeHasGlob) { + && !prototypeHasGlob + && !(subExists && prototypeStartsWithScalarSlot(prototype))) { int currentIndex2 = parser.tokenIndex; String packageName = IdentifierParser.parseSubroutineIdentifier(parser); // System.out.println("maybe indirect object: " + packageName + "->" + subName); @@ -583,6 +584,71 @@ private static boolean isValidIndirectMethod(String subName, Parser parser) { return false; } + /** + * True when the subroutine prototype starts with a scalar (or {@code _}) slot after optional + * semicolons / grouping parens. Such subs take their first argument in scalar context; a bareword + * after the name is not indirect-object syntax ({@code ok Bin, "..."} vs {@code Bin->ok(...)}, + * {@code require_ok Foo::Bar} vs {@code Foo::Bar->require_ok}). Filehandle-first prototypes + * ({@code *}) still allow indirect notation ({@code print HANDLE LIST}). + */ + private static boolean prototypeStartsWithScalarSlot(String prototype) { + if (prototype == null || prototype.isEmpty()) { + return false; + } + int i = 0; + int n = prototype.length(); + while (i < n) { + char c = prototype.charAt(i); + if (c == ' ' || c == '\t') { + i++; + continue; + } + if (c == ';') { + i++; + continue; + } + if (c == '(' || c == ')') { + i++; + continue; + } + if (c == '[') { + int depth = 1; + i++; + while (i < n && depth > 0) { + char bc = prototype.charAt(i); + if (bc == '[') { + depth++; + } else if (bc == ']') { + depth--; + } + i++; + } + continue; + } + if (c == '\\') { + i++; + if (i < n) { + i++; + } + continue; + } + if (c == '*') { + return false; + } + if (c == '@') { + return false; + } + if (c == '%') { + return false; + } + if (c == '&') { + return false; + } + return c == '$' || c == '_'; + } + return false; + } + private static Node parseIndirectMethodCall(Parser parser, IdentifierNode nameNode) { // If the subroutine does not exist and there are no parentheses, it is not a subroutine call diff --git a/src/main/java/org/perlonjava/frontend/parser/Variable.java b/src/main/java/org/perlonjava/frontend/parser/Variable.java index a7329934c..f788be30a 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Variable.java +++ b/src/main/java/org/perlonjava/frontend/parser/Variable.java @@ -99,8 +99,9 @@ public static boolean isFieldInClassHierarchy(Parser parser, String fieldName) { */ public static Node parseVariable(Parser parser, String sigil) { Node operand; - LexerToken nextToken = parser.tokenIndex < parser.tokens.size() - ? parser.tokens.get(parser.tokenIndex) + int nextTokIdx = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens); + LexerToken nextToken = nextTokIdx < parser.tokens.size() + ? parser.tokens.get(nextTokIdx) : new LexerToken(LexerTokenType.EOF, ""); // Special case 1: $${...} - nested scalar dereference @@ -831,6 +832,30 @@ public static Node parseBracedVariable(Parser parser, String sigil, boolean isSt return new OperatorNode(sigil, new StringNode("", parser.tokenIndex), parser.tokenIndex); } + // *{EXPR} must parse EXPR as full Perl code (calls, commas, ternaries). + // The identifier fast-path below wrongly treats the start of `qualify $_[0], ...` + // as a bareword glob name `qualify`, breaking Symbol::qualify_to_ref's + // `return \*{ qualify $_[0], ... };` (bogus globs like *::bin). + if ("*".equals(sigil)) { + boolean savedInsideBracedDereference = parser.insideBracedDereference; + boolean savedParsingTakeReference = parser.parsingTakeReference; + parser.parsingTakeReference = false; + try { + BlockNode block = ParseBlock.parseBlock(parser); + if (!TokenUtils.peek(parser).text.equals("}")) { + throw new PerlCompilerException( + parser.tokenIndex, + "Missing closing brace in *{...} construct", + parser.ctx.errorUtil); + } + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); + return new OperatorNode(sigil, block, parser.tokenIndex); + } finally { + parser.insideBracedDereference = savedInsideBracedDereference; + parser.parsingTakeReference = savedParsingTakeReference; + } + } + // For string interpolation, preprocess \" sequences IN PLACE if (isStringInterpolation) { int startIndex = parser.tokenIndex; diff --git a/src/main/java/org/perlonjava/runtime/operators/ModuleOperators.java b/src/main/java/org/perlonjava/runtime/operators/ModuleOperators.java index 05e8ebc45..8d420e9f4 100644 --- a/src/main/java/org/perlonjava/runtime/operators/ModuleOperators.java +++ b/src/main/java/org/perlonjava/runtime/operators/ModuleOperators.java @@ -7,10 +7,8 @@ import org.perlonjava.runtime.perlmodule.BHooksEndOfScope; import org.perlonjava.runtime.runtimetypes.*; -import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; @@ -153,6 +151,9 @@ private static RuntimeBase doFile(RuntimeScalar runtimeScalar, boolean setINC, b String code = null; String actualFileName = null; + /** Raw bytes from {@code jar:PERL5LIB} before decoding — decoded once {@link CompilerOptions} exists. */ + byte[] jarPrefetchedBytes = null; + // Variables for handling array references with state RuntimeCode codeRef = null; RuntimeArray stateArgs = null; @@ -570,16 +571,11 @@ else if (code == null) { actualFileName = GlobalContext.JAR_PERLLIB + "/" + fileName; fullName = Paths.get(resourcePath); // Just for compatibility - try (InputStream is = resource.openStream(); - BufferedReader reader = new BufferedReader(new InputStreamReader(is))) { - StringBuilder content = new StringBuilder(); - String line = null; - while ((line = reader.readLine()) != null) { - content.append(line).append("\n"); - } - code = content.toString(); + try (InputStream is = resource.openStream()) { + jarPrefetchedBytes = is.readAllBytes(); break; } catch (IOException e1) { + jarPrefetchedBytes = null; // Continue to next directory } } @@ -635,6 +631,9 @@ else if (code == null) { parsedArgs.applySourceFilters = shouldApplyFilters; // Enable source filter preprocessing if needed parsedArgs.disassembleEnabled = RuntimeCode.DISASSEMBLE; parsedArgs.useInterpreter = RuntimeCode.USE_INTERPRETER; + if (jarPrefetchedBytes != null) { + code = FileUtils.decodePerlSourceBytes(jarPrefetchedBytes, parsedArgs); + } if (code == null) { try { // Use the absolute fullName for file I/O (parsedArgs.fileName may be relative) diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Symbol.java b/src/main/java/org/perlonjava/runtime/perlmodule/Symbol.java index 542e067cc..fe0a5700f 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Symbol.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Symbol.java @@ -3,8 +3,6 @@ import org.perlonjava.backend.jvm.EmitterMethodCreator; import org.perlonjava.runtime.runtimetypes.*; -import static org.perlonjava.runtime.runtimetypes.RuntimeContextType.SCALAR; - /** * The Symbol class provides functionalities for symbol manipulation in a Perl-like environment. * It extends PerlModuleBase to leverage module initialization and method registration. @@ -134,7 +132,7 @@ public static RuntimeList qualify(RuntimeArray args, int ctx) { if (args.size() > 1) { packageName = args.get(1); } else { - RuntimeList callerList = RuntimeCode.caller(new RuntimeList(), SCALAR); + RuntimeList callerList = RuntimeCode.caller(new RuntimeList(), RuntimeContextType.SCALAR); packageName = callerList.scalar(); } RuntimeScalar result; @@ -161,16 +159,26 @@ public static RuntimeList qualify_to_ref(RuntimeArray args, int ctx) { if (args.size() < 1 || args.size() > 2) { throw new IllegalStateException("Bad number of arguments for qualify_to_ref()"); } - RuntimeScalar object = qualify(args, ctx).scalar(); + RuntimeScalar object; + if (args.size() == 1) { + RuntimeArray qa = new RuntimeArray(); + qa.push(args.get(0)); + // Prefer perl-compatible caller(); InterpreterState can diverge from caller inside + // closures invoked from another package (qualify_to_ref must match embedded qualify). + qa.push(RuntimeCode.caller(new RuntimeList(), RuntimeContextType.SCALAR).scalar()); + object = qualify(qa, ctx).scalar(); + } else { + object = qualify(args, ctx).scalar(); + } RuntimeScalar result; if (!object.isString()) { // Already a glob reference or similar — return as-is result = object; } else { - // Create a named RuntimeGlob and return a GLOBREFERENCE to it. - // This mirrors Perl's \*{name}: the caller gets a reference whose - // hash slot (and other slots) delegate to the global symbol table. - result = new RuntimeGlob(object.toString()).createReference(); + // Use the canonical stash glob (vivifying if needed), not a detached RuntimeGlob. + // new RuntimeGlob(name).createReference() pointed at an orphan glob — slots like + // ARRAY never saw @Pkg::name (Symbol::qualify_to_ref, FindBin::libs path). + result = GlobalVariable.getGlobalIO(object.toString()).createReference(); } // System.out.println("qualify_to_ref returns " + result.type); RuntimeList list = new RuntimeList(); diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/FileUtils.java b/src/main/java/org/perlonjava/runtime/runtimetypes/FileUtils.java index 601716945..1409d6045 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/FileUtils.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/FileUtils.java @@ -15,15 +15,14 @@ public class FileUtils { /** - * Reads a file with automatic encoding detection based on BOM (Byte Order Mark). - * Supports UTF-8, UTF-16BE, UTF-16LE, and defaults to UTF-8 if no BOM is found. + * Decodes Perl module / script bytes (from disk or from a JAR resource) using the same rules as + * {@link #readFileWithEncodingDetection(Path, CompilerOptions)}: BOM detection, charset selection, + * {@link CompilerOptions#rawCodeBytes} for {@code __DATA__}, and newline normalization for the lexer. * - * @param filePath The path to the file to read - * @return The decoded string content of the file - * @throws IOException if the file cannot be read + *

Jar loading previously used {@code InputStreamReader} + {@code readLine()}, which could diverge + * from file reads (charset, raw bytes); keep one path so identical sources compile identically. */ - public static String readFileWithEncodingDetection(Path filePath, CompilerOptions parsedArgs) throws IOException { - byte[] bytes = Files.readAllBytes(filePath); + public static String decodePerlSourceBytes(byte[] bytes, CompilerOptions parsedArgs) { String content = detectEncodingAndDecode(bytes, parsedArgs); // Normalize line endings: \r\n → \n, bare \r → \n // This must happen for source files so the Lexer sees clean \n line endings. @@ -34,6 +33,19 @@ public static String readFileWithEncodingDetection(Path filePath, CompilerOption return content; } + /** + * Reads a file with automatic encoding detection based on BOM (Byte Order Mark). + * Supports UTF-8, UTF-16BE, UTF-16LE, and defaults to UTF-8 if no BOM is found. + * + * @param filePath The path to the file to read + * @return The decoded string content of the file + * @throws IOException if the file cannot be read + */ + public static String readFileWithEncodingDetection(Path filePath, CompilerOptions parsedArgs) throws IOException { + byte[] bytes = Files.readAllBytes(filePath); + return decodePerlSourceBytes(bytes, parsedArgs); + } + /** * Detects the encoding of file content based on BOM and heuristics, then decodes it. * diff --git a/src/main/perl/lib/Test/More.pm b/src/main/perl/lib/Test/More.pm index 29dda00e8..beb1651eb 100644 --- a/src/main/perl/lib/Test/More.pm +++ b/src/main/perl/lib/Test/More.pm @@ -1023,7 +1023,7 @@ USE package $pack; BEGIN { \${^WARNING_BITS} = \$args[-1] if defined \$args[-1] } #line $line $filename -use $module \@{\$args[0]}; +use $module \@Test::More::_USE_OK_IMPORTS; 1; USE } @@ -1053,7 +1053,17 @@ sub _eval { my( $sigdie, $eval_result, $eval_error ); { local( $@, $!, $SIG{__DIE__} ); # isolate eval - $eval_result = eval $code; ## no critic (BuiltinFunctions::ProhibitStringyEval) + # PerlOnJava string-eval does not close over the lexical @args from use_ok's + # caller the way perl(1) does, so \@{$args[0]} inside the eval never sees imports. + # Copy into a package-visible array for the duration of the eval (local element copy, + # not typeglob aliasing — aliases did not populate Exporter import lists). + if (@args && ref($args[0]) eq 'ARRAY') { + no warnings 'once'; + local @Test::More::_USE_OK_IMPORTS = @{ $args[0] }; + $eval_result = eval $code; + } else { + $eval_result = eval $code; + } $eval_error = $@; $sigdie = $SIG{__DIE__} || undef; } diff --git a/src/test/resources/unit/symbol_star_brace_qualify_to_ref.t b/src/test/resources/unit/symbol_star_brace_qualify_to_ref.t new file mode 100644 index 000000000..08e16d051 --- /dev/null +++ b/src/test/resources/unit/symbol_star_brace_qualify_to_ref.t @@ -0,0 +1,35 @@ +# Regression: *{ qualify EXPR } must parse EXPR as code (Symbol::qualify_to_ref). +# +# Run: perl src/test/resources/unit/symbol_star_brace_qualify_to_ref.t +# ./jperl src/test/resources/unit/symbol_star_brace_qualify_to_ref.t + +use strict; +use warnings; +use Test::More tests => 3; + +use Symbol qw( qualify qualify_to_ref ); + +package Testophile; + +no strict 'refs'; +*{ 'bin' } = [ 'one', 'two' ]; + +package main; + +is( + Symbol::qualify( 'bin', 'Testophile' ), + 'Testophile::bin', + 'qualify(bin, Testophile)', +); + +my $r = do { + package Testophile; + Symbol::qualify_to_ref('bin'); +}; +is( ref($r), 'GLOB', 'qualify_to_ref (1-arg, caller Testophile) returns GLOB ref' ); + +is( + scalar( @{ *{$r} } ), + 2, + '@{ *qualify_to_ref(...) } aliases @Testophile::bin', +); From c9511f8ca2add24a008b5438de8b0acbb81cfc84 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 13 May 2026 16:29:01 +0200 Subject: [PATCH 2/6] =?UTF-8?q?fix(parser):=20*{NAME}=20vs=20*{EXPR}=20?= =?UTF-8?q?=E2=80=94=20restore=20qualified-glob=20fast=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR727 parsed every *{...} as ParseBlock, so *{P2::ISA} became a useless statement and strict subs rejected the bareword (mro/basic.t, basic_utf8.t showed 0/0 in harness logs). Try IdentifierParser for a single qualified name through `}`; otherwise ParseBlock for Symbol::qualify_to_ref-style expressions. Generated with [Cursor](https://cursor.com/docs) Co-Authored-By: Cursor Co-authored-by: Cursor --- .../perlonjava/frontend/parser/Variable.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/perlonjava/frontend/parser/Variable.java b/src/main/java/org/perlonjava/frontend/parser/Variable.java index f788be30a..9fc659dcc 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Variable.java +++ b/src/main/java/org/perlonjava/frontend/parser/Variable.java @@ -832,11 +832,27 @@ public static Node parseBracedVariable(Parser parser, String sigil, boolean isSt return new OperatorNode(sigil, new StringNode("", parser.tokenIndex), parser.tokenIndex); } - // *{EXPR} must parse EXPR as full Perl code (calls, commas, ternaries). - // The identifier fast-path below wrongly treats the start of `qualify $_[0], ...` - // as a bareword glob name `qualify`, breaking Symbol::qualify_to_ref's - // `return \*{ qualify $_[0], ... };` (bogus globs like *::bin). + // *{EXPR}: either a glob NAME (`*{P2::ISA}`) or a full expression (`*{ qualify $_[0], ... }`). + // Never parse a lone qualified identifier with ParseBlock — strict subs rejects it as a + // useless statement (perl5_t/t/mro/basic.t). When `{` is not followed by a single + // identifier up to `}`, fall back to ParseBlock like Perl's expression-in-braces form. if ("*".equals(sigil)) { + int savedIdx = parser.tokenIndex; + parser.tokenIndex = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens); + String globInnerName = IdentifierParser.parseComplexIdentifierInner(parser, true, true); + if (globInnerName != null && !globInnerName.isEmpty()) { + if (!isMaybeOperator(globInnerName, parser) + && !isBuiltinFunctionFollowedByArrow(globInnerName, parser)) { + parser.tokenIndex = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens); + if (TokenUtils.peek(parser).text.equals("}")) { + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); + int idx = parser.tokenIndex; + return new OperatorNode(sigil, new IdentifierNode(globInnerName, idx), idx); + } + } + } + parser.tokenIndex = savedIdx; + boolean savedInsideBracedDereference = parser.insideBracedDereference; boolean savedParsingTakeReference = parser.parsingTakeReference; parser.parsingTakeReference = false; From 997cd882ac502550a36d285ef44c15b9de36745a Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 13 May 2026 16:50:11 +0200 Subject: [PATCH 3/6] fix: restore upstream Test::More use_ok/_eval (drop lexical-eval workaround) eval STRING already snapshots outer lexicals (EmitEval + EvalStringHandler / RuntimeCode.evalStringHelper BEGIN-alias path), so \@{$args[0]} inside use_ok's generated eval sees @_eval's @args like perl(1). The package-array shim was unnecessary. Add unit/eval_string_lexical_args_use.t covering the @{$args[0]} pattern. Generated with [Cursor](https://cursor.com/docs) Co-Authored-By: Cursor Co-authored-by: Cursor --- src/main/perl/lib/Test/More.pm | 14 ++----------- .../unit/eval_string_lexical_args_use.t | 21 +++++++++++++++++++ 2 files changed, 23 insertions(+), 12 deletions(-) create mode 100644 src/test/resources/unit/eval_string_lexical_args_use.t diff --git a/src/main/perl/lib/Test/More.pm b/src/main/perl/lib/Test/More.pm index beb1651eb..29dda00e8 100644 --- a/src/main/perl/lib/Test/More.pm +++ b/src/main/perl/lib/Test/More.pm @@ -1023,7 +1023,7 @@ USE package $pack; BEGIN { \${^WARNING_BITS} = \$args[-1] if defined \$args[-1] } #line $line $filename -use $module \@Test::More::_USE_OK_IMPORTS; +use $module \@{\$args[0]}; 1; USE } @@ -1053,17 +1053,7 @@ sub _eval { my( $sigdie, $eval_result, $eval_error ); { local( $@, $!, $SIG{__DIE__} ); # isolate eval - # PerlOnJava string-eval does not close over the lexical @args from use_ok's - # caller the way perl(1) does, so \@{$args[0]} inside the eval never sees imports. - # Copy into a package-visible array for the duration of the eval (local element copy, - # not typeglob aliasing — aliases did not populate Exporter import lists). - if (@args && ref($args[0]) eq 'ARRAY') { - no warnings 'once'; - local @Test::More::_USE_OK_IMPORTS = @{ $args[0] }; - $eval_result = eval $code; - } else { - $eval_result = eval $code; - } + $eval_result = eval $code; ## no critic (BuiltinFunctions::ProhibitStringyEval) $eval_error = $@; $sigdie = $SIG{__DIE__} || undef; } diff --git a/src/test/resources/unit/eval_string_lexical_args_use.t b/src/test/resources/unit/eval_string_lexical_args_use.t new file mode 100644 index 000000000..128470a3e --- /dev/null +++ b/src/test/resources/unit/eval_string_lexical_args_use.t @@ -0,0 +1,21 @@ +# Regression: eval STRING must close over outer lexicals like perl(1) +# (Test::More::use_ok passes \@imports via my @args and uses \@{$args[0]} in eval). + +use strict; +use warnings; +use Test::More tests => 1; + +sub run_eval { + my ( $code, @args ) = @_; + my $out = eval $code; + die $@ if $@; + return $out; +} + +my @want = qw(alpha beta gamma); +my $got = run_eval( + q{ join '|', @{$args[0]} }, + \@want, +); + +is( $got, join( '|', @want ), 'eval STRING sees outer @args in @{$args[0]}' ); From fc7f8cd8ee8d14cfc6d183d70fdb44d4ba61b26a Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 13 May 2026 17:44:42 +0200 Subject: [PATCH 4/6] fix: unblock harness parity for exec.t, bproto.t (qx/readpipe/unpack) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - qx/Heredoc-qx: disable ${\"…\"} braced preprocessing except in qq strings so `${\"hello"}` stays ref-to-string inside backticks; thread flag through ParseHeredoc/StringParser/StringDoubleQuoted and Parser state. - Runtime readpipe overrides: dispatch via InterpreterState package + RuntimeArray args to the package CV; empty exec() returns false and sets ENOENT (no throw). - Prototype parsing: qw-merge lookahead only for consecutive `$` slots so `$_` builtins like unpack no longer mis-parse as syntax errors. - Builtin arity messages: pass builtin names into parseZeroOrOneList for defined, undef, and unary builtins; empty pack reports Not enough arguments for pack. Verified: perl5_t/t/op/exec.t (41/41), perl5_t/t/comp/bproto.t (16/16), make. Generated with [Cursor](https://cursor.com/docs) Co-Authored-By: Cursor Co-authored-by: Cursor --- .../frontend/parser/ListParser.java | 15 +++- .../frontend/parser/OperatorParser.java | 15 ++-- .../frontend/parser/ParseHeredoc.java | 27 +++++-- .../perlonjava/frontend/parser/Parser.java | 8 ++ .../frontend/parser/PrototypeArgs.java | 74 ++++++++++++++----- .../frontend/parser/StringDoubleQuoted.java | 14 +++- .../frontend/parser/StringParser.java | 2 +- .../perlonjava/frontend/parser/Variable.java | 4 +- .../frontend/parser/Whitespace.java | 5 +- .../runtime/operators/SystemOperator.java | 26 ++++++- 10 files changed, 152 insertions(+), 38 deletions(-) diff --git a/src/main/java/org/perlonjava/frontend/parser/ListParser.java b/src/main/java/org/perlonjava/frontend/parser/ListParser.java index 534a8fe3a..77f3286ba 100644 --- a/src/main/java/org/perlonjava/frontend/parser/ListParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/ListParser.java @@ -36,6 +36,15 @@ public class ListParser { * @throws PerlCompilerException If the syntax is incorrect or the minimum number of items is not met. */ static ListNode parseZeroOrOneList(Parser parser, int minItems) { + return parseZeroOrOneList(parser, minItems, null); + } + + /** + * @param tooManyArgsForBuiltin if non-null and more than one parenthesized argument is parsed, + * emit {@code Too many arguments for } (Perl builtin wording) + * instead of a generic syntax error. + */ + static ListNode parseZeroOrOneList(Parser parser, int minItems, String tooManyArgsForBuiltin) { if (looksLikeEmptyList(parser)) { // Return an empty list if it looks like an empty list if (minItems > 0) { @@ -52,7 +61,11 @@ static ListNode parseZeroOrOneList(Parser parser, int minItems) { TokenUtils.consume(parser); expr = new ListNode(parseList(parser, ")", 0), parser.tokenIndex); if (expr.elements.size() > 1) { - parser.throwError("syntax error"); + if (tooManyArgsForBuiltin != null) { + parser.throwError("Too many arguments for " + tooManyArgsForBuiltin); + } else { + parser.throwError("syntax error"); + } } } else if (token.type == LexerTokenType.EOF || isListTerminator(parser, token) || token.text.equals(",") || (token.text.equals("isa") && token.type == LexerTokenType.IDENTIFIER diff --git a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java index b18f9e3df..cdd09961f 100644 --- a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java @@ -639,7 +639,7 @@ static OperatorNode parseOperatorWithOneOptionalArgument(Parser parser, LexerTok Node operand; // Handle operators with one optional argument String text = token.text; - operand = ListParser.parseZeroOrOneList(parser, 0); + operand = ListParser.parseZeroOrOneList(parser, 0, text); if (((ListNode) operand).elements.isEmpty()) { switch (text) { case "sleep": @@ -869,7 +869,7 @@ static OperatorNode parseDefined(Parser parser, LexerToken token, int currentInd // Handle 'defined' operator with special parsing context boolean parsingTakeReference = parser.parsingTakeReference; parser.parsingTakeReference = true; // don't call `&subr` while parsing "Take reference" - operand = ListParser.parseZeroOrOneList(parser, 0); + operand = ListParser.parseZeroOrOneList(parser, 0, "defined"); parser.parsingTakeReference = parsingTakeReference; if (operand.elements.isEmpty()) { // `defined` without arguments means `defined $_` @@ -890,7 +890,7 @@ static OperatorNode parseUndef(Parser parser, LexerToken token, int currentIndex // Similar to 'defined', we need to prevent &subr from being auto-called boolean parsingTakeReference = parser.parsingTakeReference; parser.parsingTakeReference = true; // don't call `&subr` while parsing "Take reference" - operand = ListParser.parseZeroOrOneList(parser, 0); + operand = ListParser.parseZeroOrOneList(parser, 0, "undef"); parser.parsingTakeReference = parsingTakeReference; if (operand.elements.isEmpty()) { // `undef` without arguments returns undef @@ -1291,7 +1291,7 @@ static BinaryOperatorNode parseSeek(Parser parser, LexerToken token, int current static OperatorNode parseReadpipe(Parser parser) { Node operand; // Handle 'readpipe' operator with one optional argument - operand = ListParser.parseZeroOrOneList(parser, 0); + operand = ListParser.parseZeroOrOneList(parser, 0, "readpipe"); if (((ListNode) operand).elements.isEmpty()) { // Create `$_` variable if no argument is provided operand = ParserNodeUtils.scalarUnderscore(parser); @@ -1300,9 +1300,10 @@ static OperatorNode parseReadpipe(Parser parser) { } static OperatorNode parsePack(Parser parser, LexerToken token, int currentIndex) { - Node operand; - // Handle 'pack' operator with one or more arguments - operand = ListParser.parseZeroOrMoreList(parser, 1, false, true, false, false); + ListNode operand = ListParser.parseZeroOrMoreList(parser, 0, false, true, false, false); + if (operand.elements.isEmpty()) { + parser.throwError("Not enough arguments for pack"); + } return new OperatorNode(token.text, operand, currentIndex); } diff --git a/src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java b/src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java index 41950ac80..321d8954a 100644 --- a/src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java +++ b/src/main/java/org/perlonjava/frontend/parser/ParseHeredoc.java @@ -47,7 +47,23 @@ static OperatorNode parseHeredoc(Parser parser, String tokenText) { } } else if (tokenText.length() == 1 && "'`\"".contains(tokenText)) { delimiter = tokenText; - } else if (token.type == LexerTokenType.IDENTIFIER) { + // Lexer tokenizes <<`LABEL` as: ` LABEL ` (opening/closing backticks are separate tokens). + // parseRawString("q") does not handle a NUMBER in the middle, so grab LABEL here and skip q(). + if ("`".equals(delimiter) + && token.type == LexerTokenType.OPERATOR + && parser.tokenIndex + 2 < parser.tokens.size()) { + LexerToken mid = parser.tokens.get(parser.tokenIndex + 1); + LexerToken end = parser.tokens.get(parser.tokenIndex + 2); + if ((mid.type == LexerTokenType.IDENTIFIER || mid.type == LexerTokenType.NUMBER) + && end.type == LexerTokenType.OPERATOR + && "`".equals(end.text)) { + TokenUtils.consume(parser); + identifier = mid.text; + TokenUtils.consume(parser); + TokenUtils.consume(parser); + } + } + } else if (token.type == LexerTokenType.IDENTIFIER || token.type == LexerTokenType.NUMBER) { delimiter = "\""; identifier = tokenText; TokenUtils.consume(parser); @@ -234,10 +250,10 @@ else if (currentIndex >= tokens.size() || operand = new StringNode(string, newlineIndex); break; case "\"": - operand = interpolateString(parser, string, newlineIndex); + operand = interpolateString(parser, string, newlineIndex, true); break; case "`": - Node interpolated = interpolateString(parser, string, newlineIndex); + Node interpolated = interpolateString(parser, string, newlineIndex, false); List elements = new ArrayList<>(); elements.add(interpolated); ListNode list = new ListNode(elements, newlineIndex); @@ -263,7 +279,7 @@ else if (currentIndex >= tokens.size() || parser.tokenIndex = newlineIndex; } - private static Node interpolateString(Parser parser, String string, int newlineIndex) { + private static Node interpolateString(Parser parser, String string, int newlineIndex, boolean preprocessBracedBackslashQuotes) { ArrayList buffers = new ArrayList<>(); buffers.add(string); StringParser.ParsedString rawStr = new StringParser.ParsedString(newlineIndex, newlineIndex, buffers, ' ', ' ', ' ', ' '); @@ -273,7 +289,8 @@ private static Node interpolateString(Parser parser, String string, int newlineI List heredocContext = new ArrayList<>(); // Parse the string with the new context, preserving the original parser context - Node result = StringDoubleQuoted.parseDoubleQuotedString(parser.ctx, rawStr, true, true, false, heredocContext, parser); + Node result = StringDoubleQuoted.parseDoubleQuotedString(parser.ctx, rawStr, true, true, false, heredocContext, parser, + preprocessBracedBackslashQuotes); // After parsing, any heredocs declared in this context need to be added to the parent parser.getHeredocNodes().addAll(heredocContext); diff --git a/src/main/java/org/perlonjava/frontend/parser/Parser.java b/src/main/java/org/perlonjava/frontend/parser/Parser.java index 85952b419..d1c0dfcfc 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Parser.java +++ b/src/main/java/org/perlonjava/frontend/parser/Parser.java @@ -65,6 +65,14 @@ public class Parser { // re-tokenized string content and __LINE__ should use this as the base line, // counting newlines from the inner token list to offset from it. public int baseLineNumber = 0; + /** + * When {@code true} (qq and normal strings), {@link Variable#parseBracedVariable} may rewrite + * {@code \"} before {@code "} inside {@code ${...}} so patterns like {@code "${\"name\"}"} + * interpolate {@code $name}. When {@code false} (qx / command heredocs), keep the backslash so + * {@code ${\"hello"}} parses as a reference to the string {@code hello} and dereferences it, + * matching Perl 5. + */ + public boolean preprocessBracedBackslashQuotesInInterpolation = true; /** * Constructs a Parser with the given context and tokens. diff --git a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java index c48f75af5..b838bcd8b 100644 --- a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java +++ b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java @@ -1,11 +1,14 @@ package org.perlonjava.frontend.parser; import org.perlonjava.frontend.astnode.*; +import org.perlonjava.frontend.lexer.Lexer; import org.perlonjava.frontend.lexer.LexerToken; import org.perlonjava.frontend.lexer.LexerTokenType; import org.perlonjava.runtime.runtimetypes.GlobalVariable; import org.perlonjava.runtime.runtimetypes.PerlCompilerException; +import java.util.List; + import static org.perlonjava.frontend.parser.ListParser.consumeCommas; import static org.perlonjava.frontend.parser.ListParser.isComma; import static org.perlonjava.frontend.parser.ParserNodeUtils.scalarUnderscore; @@ -245,21 +248,27 @@ static ListNode consumeArgsWithPrototype(Parser parser, String prototype, boolea // Check for too many arguments without parentheses only if prototype expects 2+ args if (!hasParentheses && countPrototypeArgs(prototype) >= 2) { - // If we see a comma after parsing all required args, check if it's a trailing comma - if (isComma(TokenUtils.peek(parser))) { - // Consume the comma and check what follows - int saveIndex = parser.tokenIndex; - consumeCommas(parser); - LexerToken nextToken = TokenUtils.peek(parser); - // If followed by a statement terminator, it's a trailing comma (allowed) - // Otherwise, it's too many arguments - if (!Parser.isExpressionTerminator(nextToken) && - nextToken.type != LexerTokenType.EOF && - !nextToken.text.equals(")")) { + // Do not use TokenUtils.peek here: it runs Whitespace.skipWhitespace(), which + // processes NEWLINE and may fill in a pending << heredoc before arguments are done. + List tokens = parser.tokens; + int i = skipHorizontalWhitespaceTokens(tokens, parser.tokenIndex); + if (i < tokens.size() && isComma(tokens.get(i))) { + int j = skipHorizontalWhitespaceTokens(tokens, i + 1); + LexerToken nextToken = tokenAtOrEof(tokens, j); + // Trailing comma before the newline that starts a pending << heredoc body is valid + // (see op/exec.t package o block). A newline with no pending heredoc is an extra arg. + boolean trailingCommaBeforeHeredoc = + nextToken.type == LexerTokenType.NEWLINE && !parser.getHeredocNodes().isEmpty(); + if (!trailingCommaBeforeHeredoc + && !Parser.isExpressionTerminator(nextToken) + && nextToken.type != LexerTokenType.EOF + && !nextToken.text.equals(")")) { throwTooManyArgumentsError(parser); } - // Restore position - the comma will be handled by the caller - parser.tokenIndex = saveIndex; + if (trailingCommaBeforeHeredoc) { + parser.tokenIndex = i; + consumeCommas(parser); + } } } } @@ -300,6 +309,26 @@ static ListNode consumeArgsWithPrototype(Parser parser, String prototype, boolea return args; } + /** Advance past SPACE/TAB-only whitespace tokens; never consume NEWLINE (heredoc triggers). */ + private static int skipHorizontalWhitespaceTokens(List tokens, int i) { + while (i < tokens.size()) { + LexerToken t = tokens.get(i); + if (t.type == LexerTokenType.WHITESPACE) { + i++; + continue; + } + break; + } + return i; + } + + private static LexerToken tokenAtOrEof(List tokens, int i) { + if (i >= tokens.size()) { + return new LexerToken(LexerTokenType.EOF, Lexer.EOF); + } + return tokens.get(i); + } + private static int firstNonCodeArgIndexAfterAmpersandPrototype(String prototype, ListNode args) { if (prototype == null || args.elements.size() < 2) { return -1; @@ -485,11 +514,12 @@ private static boolean tryConsumeParenFreeWordListForLeadingScalars( if (hasParentheses || prototype == null || prototype.isEmpty()) { return false; } - int slots = countLeadingScalarPrototypeSlots(prototype); + int slots = countLeadingConsecutiveDollarPrototypeSlots(prototype); if (slots < 2) { return false; } int saved = parser.tokenIndex; + List savedHeredocs = ParseHeredoc.saveHeredocState(parser); Node expr = parser.parseExpression(parser.getPrecedence(",")); if (expr instanceof ListNode ln && ln.elements.size() == slots @@ -503,21 +533,29 @@ && isPlainStringWordList(ln)) { return true; } parser.tokenIndex = saved; + parser.getHeredocNodes().clear(); + parser.getHeredocNodes().addAll(savedHeredocs); return false; } - private static int countLeadingScalarPrototypeSlots(String prototype) { + /** + * Counts leading {@code $} slots only (skipping whitespace). Used for the parenthesis-free + * {@code qw(...)} merge optimization: prototypes like {@code $_} (template + implicit {@code $_}) + * must not run that lookahead — it would invoke {@code parseExpression} at a terminator and yield a + * bogus syntax error instead of {@code Not enough arguments for unpack}. + */ + private static int countLeadingConsecutiveDollarPrototypeSlots(String prototype) { int count = 0; for (int i = 0; i < prototype.length(); i++) { char c = prototype.charAt(i); if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { continue; } - if (c == '$' || c == '_') { + if (c == '$') { count++; - } else { - break; + continue; } + break; } return count; } diff --git a/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java b/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java index 8634ed1e5..ab84fc571 100644 --- a/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java +++ b/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java @@ -91,7 +91,7 @@ private StringDoubleQuoted(EmitterContext ctx, List tokens, Parser p * @return An AST node representing the parsed string (StringNode, BinaryOperatorNode for join, etc.) */ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement) { - return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, null); + return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, null, null, true); } /** @@ -109,7 +109,7 @@ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedStrin * @return An AST node representing the parsed string (StringNode, BinaryOperatorNode for join, etc.) */ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement, List sharedHeredocNodes) { - return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, sharedHeredocNodes, null); + return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, sharedHeredocNodes, null, true); } /** @@ -125,6 +125,14 @@ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedStrin * @return An AST node representing the parsed string */ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement, List sharedHeredocNodes, Parser originalParser) { + return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, + sharedHeredocNodes, originalParser, true); + } + + /** + * @param preprocessBracedBackslashQuotes See {@link Parser#preprocessBracedBackslashQuotesInInterpolation}. + */ + static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement, List sharedHeredocNodes, Parser originalParser, boolean preprocessBracedBackslashQuotes) { // Extract the first buffer (double-quoted strings don't have multiple parts like here-docs) var input = rawStr.buffers.getFirst(); var tokenIndex = rawStr.next; @@ -142,6 +150,8 @@ static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedStrin new Parser(ctx, tokens, sharedHeredocNodes) : new Parser(ctx, tokens); + parser.preprocessBracedBackslashQuotesInInterpolation = preprocessBracedBackslashQuotes; + // Preserve context flags from original parser if provided if (originalParser != null) { parser.isInMethod = originalParser.isInMethod; diff --git a/src/main/java/org/perlonjava/frontend/parser/StringParser.java b/src/main/java/org/perlonjava/frontend/parser/StringParser.java index 01bcddf29..6daed2977 100644 --- a/src/main/java/org/perlonjava/frontend/parser/StringParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/StringParser.java @@ -586,7 +586,7 @@ public static OperatorNode parseRegexMatch(EmitterContext ctx, String operator, public static OperatorNode parseSystemCommand(EmitterContext ctx, String operator, ParsedString rawStr) { operator = "qx"; // Parse as interpolated string (like double quotes) - Node parsed = StringDoubleQuoted.parseDoubleQuotedString(ctx, rawStr, true, true, false); + Node parsed = StringDoubleQuoted.parseDoubleQuotedString(ctx, rawStr, true, true, false, null, null, false); List elements = new ArrayList<>(); elements.add(parsed); ListNode list = new ListNode(elements, rawStr.index); diff --git a/src/main/java/org/perlonjava/frontend/parser/Variable.java b/src/main/java/org/perlonjava/frontend/parser/Variable.java index 9fc659dcc..7e8ec643a 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Variable.java +++ b/src/main/java/org/perlonjava/frontend/parser/Variable.java @@ -872,8 +872,8 @@ public static Node parseBracedVariable(Parser parser, String sigil, boolean isSt } } - // For string interpolation, preprocess \" sequences IN PLACE - if (isStringInterpolation) { + // For qq-like interpolation, preprocess \" sequences IN PLACE (not qx/command strings). + if (isStringInterpolation && parser.preprocessBracedBackslashQuotesInInterpolation) { int startIndex = parser.tokenIndex; int braceLevel = 1; diff --git a/src/main/java/org/perlonjava/frontend/parser/Whitespace.java b/src/main/java/org/perlonjava/frontend/parser/Whitespace.java index e8959f77f..44c30b805 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Whitespace.java +++ b/src/main/java/org/perlonjava/frontend/parser/Whitespace.java @@ -33,7 +33,10 @@ public static int skipWhitespace(Parser parser, int tokenIndex, List case NEWLINE: if (!parser.getHeredocNodes().isEmpty()) { - // Process heredocs before advancing past the NEWLINE + // parseHeredocAfterNewline reads parser.tokenIndex as the newline position. + // This loop advances local tokenIndex across WHITESPACE without syncing the parser, + // so align before processing pending heredocs. + parser.tokenIndex = tokenIndex; ParseHeredoc.parseHeredocAfterNewline(parser); tokenIndex = parser.tokenIndex; } else if (parser.heredocNewlineIndex == tokenIndex && parser.heredocSkipToIndex > tokenIndex) { diff --git a/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java b/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java index 600670f87..27516df15 100644 --- a/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java @@ -1,5 +1,6 @@ package org.perlonjava.runtime.operators; +import org.perlonjava.backend.bytecode.InterpreterState; import org.perlonjava.runtime.ForkOpenCompleteException; import org.perlonjava.runtime.ForkOpenState; import org.perlonjava.runtime.mro.InheritanceResolver; @@ -17,9 +18,12 @@ import java.util.regex.Pattern; import static org.perlonjava.runtime.runtimetypes.GlobalContext.encodeSpecialVar; +import static org.perlonjava.runtime.runtimetypes.GlobalVariable.getGlobalCodeRef; import static org.perlonjava.runtime.runtimetypes.GlobalVariable.getGlobalVariable; +import static org.perlonjava.runtime.runtimetypes.GlobalVariable.isGlobalCodeRefDefined; import static org.perlonjava.runtime.runtimetypes.GlobalVariable.setGlobalVariable; import static org.perlonjava.runtime.runtimetypes.RuntimeIO.flushAllHandles; +import static org.perlonjava.runtime.runtimetypes.RuntimeScalarCache.scalarFalse; import static org.perlonjava.runtime.runtimetypes.RuntimeScalarCache.scalarUndef; /** @@ -44,6 +48,24 @@ public class SystemOperator { * @throws PerlCompilerException if an error occurs during command execution or stream handling. */ public static RuntimeBase systemCommand(RuntimeScalar command, int ctx) { + // Perl dispatches qx//, `` and readpipe() through the package readpipe CV when defined + // (use subs + sub readpipe), not straight to the shell. + // Use InterpreterState (updated by JVM `package` / scoped blocks), not caller() — + // caller() from inside this runtime helper resolves the wrong package and skips the override. + String pkg = InterpreterState.currentPackage.get().toString(); + if (pkg == null || pkg.isEmpty()) { + pkg = "main"; + } + String fqReadpipe = pkg.endsWith("::") ? pkg + "readpipe" : pkg + "::readpipe"; + if (isGlobalCodeRefDefined(fqReadpipe)) { + RuntimeScalar cv = getGlobalCodeRef(fqReadpipe); + if (cv.value instanceof RuntimeCode rc && rc.defined()) { + RuntimeArray argv = new RuntimeArray(); + argv.add(command); + return rc.apply(argv, RuntimeCode.effectiveCallContext(ctx)); + } + } + String cmd = command.toString(); CommandResult result; @@ -552,7 +574,9 @@ public static RuntimeScalar exec(RuntimeList args, boolean hasHandle, int ctx) { List flattenedArgs = flattenToStringList(args.elements); if (flattenedArgs.isEmpty()) { - throw new PerlCompilerException("exec: no command specified"); + // Perl returns false and sets errno (typically ENOENT) — does not die. + getGlobalVariable("main::!").set(2); + return scalarFalse; } // Check for pending fork-open emulation From 8d708bd49b6430f4dd52ee27b082d5456eb04a6c Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 13 May 2026 19:34:46 +0200 Subject: [PATCH 5/6] fix(parser): keep indirect-object `new Class ...` when package defines `sub new` `subExists && prototypeStartsWithScalarSlot` skipped the indirect-object probe; inside a package that defines `new ($...)`, `new Parse::RecDescent::Directive(...)` was parsed as a direct call to the enclosing `new`, breaking Parse::RecDescent / DBIx::Class). Always run the probe when the method bareword is `new`. Verified: require Parse::RecDescent from ~/.perlonjava/lib; make. Generated with [Cursor](https://cursor.com/docs) Co-Authored-By: Cursor Co-authored-by: Cursor --- .../org/perlonjava/frontend/parser/SubroutineParser.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java index c6cd334bf..30009fbcd 100644 --- a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java @@ -210,10 +210,15 @@ static Node parseSubroutineCall(Parser parser, boolean isMethod) { // this is how Error.pm's classic // try { ... } catch Error::Simple with { ... } // idiom is recognised (parses as `Error::Simple->catch(with {...})`). + // Perl indirect-object `new Class LIST` must stay available even when the *current* + // package defines `sub new ($...)` (Parse::RecDescent::Error::code uses + // `new Parse::RecDescent::Directive(...)`). Without this exception, `subExists && + // prototypeStartsWithScalarSlot` skips the probe and we mis-parse as a direct call to + // the enclosing package's `new`, yielding bogus arity errors. if (peek(parser).type == LexerTokenType.IDENTIFIER && isValidIndirectMethod(subName, parser) && !prototypeHasGlob - && !(subExists && prototypeStartsWithScalarSlot(prototype))) { + && !(subExists && prototypeStartsWithScalarSlot(prototype) && !subName.equals("new"))) { int currentIndex2 = parser.tokenIndex; String packageName = IdentifierParser.parseSubroutineIdentifier(parser); // System.out.println("maybe indirect object: " + packageName + "->" + subName); From fdd3d769eb95612df7614bebdd4e0f110a7f9730 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 13 May 2026 19:58:03 +0200 Subject: [PATCH 6/6] fix(parser): drop prototype gate on indirect-object probe The scalar-leading-prototype heuristic skipped the indirect-object probe entirely, which mis-parsed `method Some::Long::Class LIST` as a direct call to an enclosing sub when that sub had a `$`-leading prototype (e.g. Parse::RecDescent). Perl does not single out `new`; always attempt the probe and rely on existing rejection/backtracking for ambiguous cases. Verified: make; `require Parse::RecDescent` under jperl. Generated with [Cursor](https://cursor.com/docs) Co-Authored-By: Cursor Co-authored-by: Cursor --- .../frontend/parser/SubroutineParser.java | 86 +++---------------- 1 file changed, 10 insertions(+), 76 deletions(-) diff --git a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java index 30009fbcd..71ed9640b 100644 --- a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java @@ -201,24 +201,23 @@ static Node parseSubroutineCall(Parser parser, boolean isMethod) { boolean prototypeHasGlob = prototype != null && prototype.contains("*"); - // If a package name follows, then it looks like a indirect method - // Unless the subName looks like an operator - // Unless the subName has a prototype with `*` - // // Note: feature-gated core keywords (`try`, `catch`, `finally`) should // participate in indirect-object parsing when their feature is *off* — // this is how Error.pm's classic // try { ... } catch Error::Simple with { ... } // idiom is recognised (parses as `Error::Simple->catch(with {...})`). - // Perl indirect-object `new Class LIST` must stay available even when the *current* - // package defines `sub new ($...)` (Parse::RecDescent::Error::code uses - // `new Parse::RecDescent::Directive(...)`). Without this exception, `subExists && - // prototypeStartsWithScalarSlot` skips the probe and we mis-parse as a direct call to - // the enclosing package's `new`, yielding bogus arity errors. + + // If a package name follows, it may be indirect-object syntax (INVOCANT->METHOD(LIST)). + // + // Do not skip this probe just because the callee has a scalar-leading prototype; that + // heuristic mis-parsed `new Some::Long::Name LIST` inside a package that defines its own + // `sub new ($...)` as a direct arity-checked call to the wrong sub (Parse::RecDescent). + // Perl does not treat `new` specially here — the method bareword is arbitrary. + // + // Ambiguous parses rely on the rejection/backtracking logic inside this block. if (peek(parser).type == LexerTokenType.IDENTIFIER && isValidIndirectMethod(subName, parser) - && !prototypeHasGlob - && !(subExists && prototypeStartsWithScalarSlot(prototype) && !subName.equals("new"))) { + && !prototypeHasGlob) { int currentIndex2 = parser.tokenIndex; String packageName = IdentifierParser.parseSubroutineIdentifier(parser); // System.out.println("maybe indirect object: " + packageName + "->" + subName); @@ -589,71 +588,6 @@ private static boolean isValidIndirectMethod(String subName, Parser parser) { return false; } - /** - * True when the subroutine prototype starts with a scalar (or {@code _}) slot after optional - * semicolons / grouping parens. Such subs take their first argument in scalar context; a bareword - * after the name is not indirect-object syntax ({@code ok Bin, "..."} vs {@code Bin->ok(...)}, - * {@code require_ok Foo::Bar} vs {@code Foo::Bar->require_ok}). Filehandle-first prototypes - * ({@code *}) still allow indirect notation ({@code print HANDLE LIST}). - */ - private static boolean prototypeStartsWithScalarSlot(String prototype) { - if (prototype == null || prototype.isEmpty()) { - return false; - } - int i = 0; - int n = prototype.length(); - while (i < n) { - char c = prototype.charAt(i); - if (c == ' ' || c == '\t') { - i++; - continue; - } - if (c == ';') { - i++; - continue; - } - if (c == '(' || c == ')') { - i++; - continue; - } - if (c == '[') { - int depth = 1; - i++; - while (i < n && depth > 0) { - char bc = prototype.charAt(i); - if (bc == '[') { - depth++; - } else if (bc == ']') { - depth--; - } - i++; - } - continue; - } - if (c == '\\') { - i++; - if (i < n) { - i++; - } - continue; - } - if (c == '*') { - return false; - } - if (c == '@') { - return false; - } - if (c == '%') { - return false; - } - if (c == '&') { - return false; - } - return c == '$' || c == '_'; - } - return false; - } - private static Node parseIndirectMethodCall(Parser parser, IdentifierNode nameNode) { // If the subroutine does not exist and there are no parentheses, it is not a subroutine call