From 42598542765ac5f517bb2c9914437eaf0a87f96e Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Wed, 28 Jan 2026 16:29:21 +0100 Subject: [PATCH 1/3] Fix parser.t test 14: handle \c without character - Add proper error handling for missing control character after \c - Update StringDoubleQuoted.java to inline control character logic with error checking - Update StringSegmentParser.java handleControlCharacter method to throw proper error - Test 14 in parser.t now passes Test 15 ($ at end of string) still needs work. --- .../perlonjava/parser/StringDoubleQuoted.java | 16 ++++++- .../parser/StringSegmentParser.java | 46 +++++++++++++++---- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/perlonjava/parser/StringDoubleQuoted.java b/src/main/java/org/perlonjava/parser/StringDoubleQuoted.java index a682d8fe7..e10daea92 100644 --- a/src/main/java/org/perlonjava/parser/StringDoubleQuoted.java +++ b/src/main/java/org/perlonjava/parser/StringDoubleQuoted.java @@ -5,6 +5,7 @@ import org.perlonjava.lexer.Lexer; import org.perlonjava.lexer.LexerToken; import org.perlonjava.lexer.LexerTokenType; +import org.perlonjava.runtime.PerlCompilerException; import java.util.ArrayList; import java.util.List; @@ -457,7 +458,20 @@ private void parseDoubleQuotedEscapes() { case "$" -> appendToCurrentSegment("$"); // Control character: \cX - case "c" -> handleControlCharacter(); + case "c" -> { + var controlChar = TokenUtils.consumeChar(parser); + if (controlChar.isEmpty()) { + throw new PerlCompilerException(parser.tokenIndex, "Missing control char name in \\c", parser.ctx.errorUtil); + } + var c = controlChar.charAt(0); + var result = (c >= 'A' && c <= 'Z') ? String.valueOf((char) (c - 'A' + 1)) + : (c >= 'a' && c <= 'z') ? String.valueOf((char) (c - 'a' + 1)) + : c == '@' ? String.valueOf((char) 0) + : (c >= '[' && c <= '_') ? String.valueOf((char) (c - '[' + 27)) + : c == '?' ? String.valueOf((char) 127) + : String.valueOf(c); + appendToCurrentSegment(result); + } // Case modification end marker case "E" -> { diff --git a/src/main/java/org/perlonjava/parser/StringSegmentParser.java b/src/main/java/org/perlonjava/parser/StringSegmentParser.java index 0936f35c4..3c37a6373 100644 --- a/src/main/java/org/perlonjava/parser/StringSegmentParser.java +++ b/src/main/java/org/perlonjava/parser/StringSegmentParser.java @@ -391,6 +391,33 @@ private Node parseSimpleVariableInterpolation(String sigil) { } return new OperatorNode(sigil, new IdentifierNode(identifier, tokenIndex), tokenIndex); + } else { + // No identifier found after sigil + // For array sigils, check if next token starts with $ (e.g., @$b means array of $b) + if ("@".equals(sigil) && parser.tokenIndex < parser.tokens.size()) { + LexerToken nextToken = parser.tokens.get(parser.tokenIndex); + if (nextToken.text.startsWith("$")) { + // This is @$var - array of scalar variable + // Consume the $ token + TokenUtils.consume(parser); + // Now parse the rest of the identifier + identifier = IdentifierParser.parseComplexIdentifier(parser); + if (identifier == null || identifier.isEmpty()) { + throw new PerlCompilerException(tokenIndex, "Missing identifier after $", ctx.errorUtil); + } + // Return the array of scalar variable + return new OperatorNode(sigil, new OperatorNode("$", new IdentifierNode(identifier, tokenIndex), tokenIndex), tokenIndex); + } + } + if (!"$".equals(sigil)) { + throw new PerlCompilerException(tokenIndex, "Missing identifier after " + sigil, ctx.errorUtil); + } + + // For $ sigil with no identifier, check if we're at end of string + if (parser.tokenIndex >= parser.tokens.size() || + parser.tokens.get(parser.tokenIndex).type == LexerTokenType.EOF) { + throw new PerlCompilerException(tokenIndex, "Final $ should be \\$ or $name", ctx.errorUtil); + } } // Handle dereferenced variables: ${$var}, ${${$var}}, etc. @@ -934,16 +961,17 @@ private boolean isNonInterpolatingCharacter(String text) { */ void handleControlCharacter() { var controlChar = TokenUtils.consumeChar(parser); - if (!controlChar.isEmpty()) { - var c = controlChar.charAt(0); - var result = (c >= 'A' && c <= 'Z') ? String.valueOf((char) (c - 'A' + 1)) - : (c >= 'a' && c <= 'z') ? String.valueOf((char) (c - 'a' + 1)) - : c == '@' ? String.valueOf((char) 0) - : (c >= '[' && c <= '_') ? String.valueOf((char) (c - '[' + 27)) - : c == '?' ? String.valueOf((char) 127) - : String.valueOf(c); - appendToCurrentSegment(result); + if (controlChar.isEmpty()) { + throw new PerlCompilerException(parser.tokenIndex, "Missing control char name in \\c", parser.ctx.errorUtil); } + var c = controlChar.charAt(0); + var result = (c >= 'A' && c <= 'Z') ? String.valueOf((char) (c - 'A' + 1)) + : (c >= 'a' && c <= 'z') ? String.valueOf((char) (c - 'a' + 1)) + : c == '@' ? String.valueOf((char) 0) + : (c >= '[' && c <= '_') ? String.valueOf((char) (c - '[' + 27)) + : c == '?' ? String.valueOf((char) 127) + : String.valueOf(c); + appendToCurrentSegment(result); } /** From 12764f6ec76fab20cb5cf51643626070212fe6c3 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Wed, 28 Jan 2026 16:30:47 +0100 Subject: [PATCH 2/3] Add targeted EOF check for $ at end of string - Add very specific check in shouldInterpolateVariable for $ at EOF - Only applies to double-quoted string context (not regex) - Test 14 still working, test 15 still needs work - Build is healthy --- .../org/perlonjava/parser/StringSegmentParser.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/main/java/org/perlonjava/parser/StringSegmentParser.java b/src/main/java/org/perlonjava/parser/StringSegmentParser.java index 3c37a6373..c014f5afa 100644 --- a/src/main/java/org/perlonjava/parser/StringSegmentParser.java +++ b/src/main/java/org/perlonjava/parser/StringSegmentParser.java @@ -806,6 +806,13 @@ public void setOriginalStringContent(String content) { this.originalStringContent = content; } + /** + * Gets the original string content. + */ + protected String getOriginalStringContent() { + return originalStringContent; + } + /** * Creates and throws an offset-aware error with correct context. * Matches Perl's actual error format for string interpolation errors. @@ -914,6 +921,11 @@ private boolean shouldInterpolateVariable(String sigil) { var nextToken = tokens.get(parser.tokenIndex); if (nextToken.type == LexerTokenType.EOF) { + // Special case: $ at EOF in double-quoted string should generate error + // But only for StringDoubleQuoted, not for other contexts like regex + if ("$".equals(sigil) && interpolateVariable && !isRegex && !isRegexReplacement) { + return true; + } return false; } From 3fbfdef977c7245a5f7d8dd4ee8f2ad80dd94a84 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Wed, 28 Jan 2026 16:39:17 +0100 Subject: [PATCH 3/3] Fix parser.t test 15: handle $ at end of string - Add check for empty identifier in parseSimpleVariableInterpolation - When $ sigil has empty identifier and we're at EOF, generate proper error - Both test 14 and test 15 now pass - Error message: 'Final $ should be $ or $name' matches expected output --- .../perlonjava/parser/StringSegmentParser.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/main/java/org/perlonjava/parser/StringSegmentParser.java b/src/main/java/org/perlonjava/parser/StringSegmentParser.java index c014f5afa..05c555b2c 100644 --- a/src/main/java/org/perlonjava/parser/StringSegmentParser.java +++ b/src/main/java/org/perlonjava/parser/StringSegmentParser.java @@ -390,9 +390,24 @@ private Node parseSimpleVariableInterpolation(String sigil) { } } + // Special case: empty identifier for $ sigil (like $ at end of string) + if ("$".equals(sigil) && identifier.isEmpty()) { + // Check if we're at end of string + if (parser.tokenIndex >= parser.tokens.size() || + parser.tokens.get(parser.tokenIndex).type == LexerTokenType.EOF) { + throw new PerlCompilerException(tokenIndex, "Final $ should be \\$ or $name", ctx.errorUtil); + } + } + return new OperatorNode(sigil, new IdentifierNode(identifier, tokenIndex), tokenIndex); } else { // No identifier found after sigil + // Check if we're at end of string for $ sigil + if ("$".equals(sigil) && (parser.tokenIndex >= parser.tokens.size() || + parser.tokens.get(parser.tokenIndex).type == LexerTokenType.EOF)) { + throw new PerlCompilerException(tokenIndex, "Final $ should be \\$ or $name", ctx.errorUtil); + } + // For array sigils, check if next token starts with $ (e.g., @$b means array of $b) if ("@".equals(sigil) && parser.tokenIndex < parser.tokens.size()) { LexerToken nextToken = parser.tokens.get(parser.tokenIndex);