-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathParsePrimary.java
More file actions
512 lines (458 loc) · 25.6 KB
/
ParsePrimary.java
File metadata and controls
512 lines (458 loc) · 25.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
package org.perlonjava.parser;
import org.perlonjava.astnode.*;
import org.perlonjava.lexer.LexerToken;
import org.perlonjava.lexer.LexerTokenType;
import org.perlonjava.runtime.GlobalVariable;
import org.perlonjava.runtime.PerlCompilerException;
import org.perlonjava.runtime.RuntimeGlob;
import org.perlonjava.runtime.RuntimeScalar;
import org.perlonjava.runtime.RuntimeScalarType;
import org.perlonjava.symbols.SymbolTable;
import static org.perlonjava.parser.ParserNodeUtils.scalarUnderscore;
import static org.perlonjava.parser.TokenUtils.peek;
import static org.perlonjava.runtime.GlobalVariable.existsGlobalCodeRef;
/**
* The ParsePrimary class is responsible for parsing primary expressions in Perl source code.
*
* <p>Primary expressions are the basic building blocks of the language, including:
* <ul>
* <li>Identifiers (variables, subroutines, keywords)</li>
* <li>Literals (numbers, strings)</li>
* <li>Operators (unary and special operators)</li>
* <li>Grouping constructs (parentheses, brackets, braces)</li>
* </ul>
*
* <p>This class handles the complex logic of Perl's syntax, including:
* <ul>
* <li>Core function overriding via CORE::GLOBAL::</li>
* <li>Feature-based operator enabling (say, fc, state, etc.)</li>
* <li>Autoquoting with the fat comma (=>)</li>
* <li>Reference operators (\)</li>
* <li>File test operators (-f, -d, etc.)</li>
* </ul>
*
* @see Parser
* @see Node
*/
public class ParsePrimary {
/**
* Parses a primary expression from the parser's token stream.
*
* <p>This is the main entry point for parsing primary expressions. It consumes
* the next token from the input stream and delegates to appropriate parsing
* methods based on the token type.
*
* @param parser The parser instance containing the token stream and parsing context
* @return A Node representing the parsed primary expression, or null for EOF
* @throws PerlCompilerException if an unexpected token is encountered
*/
public static Node parsePrimary(Parser parser) {
int startIndex = parser.tokenIndex;
LexerToken token = TokenUtils.consume(parser); // Consume the next token from the input
String operator = token.text;
Node operand;
switch (token.type) {
case IDENTIFIER:
// Handle identifiers: variables, subroutines, keywords, etc.
return parseIdentifier(parser, startIndex, token, operator);
case NUMBER:
// Handle numeric literals (integers, floats, hex, octal, binary)
return NumberParser.parseNumber(parser, token);
case STRING:
// Handle string literals (already parsed by lexer)
return new StringNode(token.text, parser.tokenIndex);
case OPERATOR:
// Handle operators and special constructs
return parseOperator(parser, token, operator);
case EOF:
// Handle end of input gracefully
return null;
default:
// Any other token type is a syntax error
throw new PerlCompilerException(parser.tokenIndex, "syntax error", parser.ctx.errorUtil);
}
}
/**
* Parses an identifier token, which could be a keyword, subroutine call, or variable.
*
* <p>This method handles several complex cases:
* <ul>
* <li>Autoquoting: identifiers before => are treated as strings</li>
* <li>CORE:: prefix: explicit core function calls (CORE::print)</li>
* <li>Feature checking: some operators require specific features (say, fc, state)</li>
* <li>Function overriding: checks for user-defined overrides of core functions</li>
* <li>Core operators: delegates to CoreOperatorResolver for built-in operators</li>
* <li>Subroutine calls: defaults to parsing as a subroutine call</li>
* </ul>
*
* @param parser The parser instance
* @param startIndex The token index where this identifier started
* @param token The identifier token being parsed
* @param operator The text of the identifier (same as token.text)
* @return A Node representing the parsed identifier construct
* @throws PerlCompilerException if CORE:: is used with a non-keyword
*/
private static Node parseIdentifier(Parser parser, int startIndex, LexerToken token, String operator) {
String nextTokenText = parser.tokens.get(parser.tokenIndex).text;
String peekTokenText = peek(parser).text;
// Check for autoquoting: bareword => is treated as "bareword"
if (peekTokenText.equals("=>")) {
// Autoquote: convert identifier to string literal
return new StringNode(token.text, parser.tokenIndex);
}
boolean operatorEnabled = false;
boolean calledWithCore = false;
// Check if this is an explicit CORE:: call (e.g., CORE::print)
// IMPORTANT: Check this BEFORE lexical subs, because "state sub CORE" shouldn't break CORE::uc
if (token.text.equals("CORE") && nextTokenText.equals("::")) {
calledWithCore = true;
operatorEnabled = true; // CORE:: functions are always enabled
TokenUtils.consume(parser); // consume "::"
token = TokenUtils.consume(parser); // consume the actual operator
operator = token.text;
}
// IMPORTANT: Check for lexical subs AFTER CORE::, but before checking for quote-like operators!
// This allows "my sub y" to shadow the "y///" transliteration operator
// But doesn't interfere with CORE:: prefix handling
// ALSO: Don't treat as lexical sub if :: follows - that's a qualified name like Encode::is_utf8
if (!calledWithCore && !nextTokenText.equals("::")) {
String lexicalKey = "&" + operator;
SymbolTable.SymbolEntry lexicalEntry = parser.ctx.symbolTable.getSymbolEntry(lexicalKey);
if (lexicalEntry != null && lexicalEntry.ast() instanceof OperatorNode) {
// This is a lexical sub - parse it as a subroutine call
parser.tokenIndex = startIndex; // backtrack
return SubroutineParser.parseSubroutineCall(parser, false);
}
}
// Check for quote-like operators that should always be parsed as operators
if (isIsQuoteLikeOperator(operator)) {
operatorEnabled = true;
} else if (!nextTokenText.equals("::")) {
// Check if the operator is enabled in the current scope
// Some operators require specific features to be enabled
operatorEnabled = switch (operator) {
case "all" -> parser.ctx.symbolTable.isFeatureCategoryEnabled("keyword_all");
case "any" -> parser.ctx.symbolTable.isFeatureCategoryEnabled("keyword_any");
case "say", "fc", "state", "evalbytes" -> parser.ctx.symbolTable.isFeatureCategoryEnabled(operator);
case "__SUB__" -> parser.ctx.symbolTable.isFeatureCategoryEnabled("current_sub");
case "__CLASS__" -> parser.ctx.symbolTable.isFeatureCategoryEnabled("class");
case "method" -> parser.ctx.symbolTable.isFeatureCategoryEnabled("class");
default -> true; // Most operators are always enabled
};
}
// Check for overridable operators (unless explicitly called with CORE::)
if (!calledWithCore && operatorEnabled && ParserTables.OVERRIDABLE_OP.contains(operator)) {
// Core functions can be overridden in two ways:
// 1. By defining a subroutine in the current package
// 2. By defining a subroutine in CORE::GLOBAL::
// Special case: 'do' followed by '{' is a do-block, not a function call
if (operator.equals("do") && peekTokenText.equals("{")) {
// This is a do block, not a do function call - let CoreOperatorResolver handle it
} else {
// Check for local package override
String fullName = parser.ctx.symbolTable.getCurrentPackage() + "::" + operator;
if (GlobalVariable.isSubs.getOrDefault(fullName, false)) {
// Example: 'use subs "hex"; sub hex { 456 } print hex("123"), "\n"'
parser.tokenIndex = startIndex; // backtrack to reparse as subroutine
return SubroutineParser.parseSubroutineCall(parser, false);
}
// Check for CORE::GLOBAL:: override
String coreGlobalName = "CORE::GLOBAL::" + operator;
if (RuntimeGlob.isGlobAssigned(coreGlobalName) && existsGlobalCodeRef(coreGlobalName)) {
// Example: 'BEGIN { *CORE::GLOBAL::hex = sub { 456 } } print hex("123"), "\n"'
parser.tokenIndex = startIndex; // backtrack
// Rewrite the tokens to call CORE::GLOBAL::operator
parser.tokens.add(startIndex, new LexerToken(LexerTokenType.IDENTIFIER, "CORE"));
parser.tokens.add(startIndex + 1, new LexerToken(LexerTokenType.OPERATOR, "::"));
parser.tokens.add(startIndex + 2, new LexerToken(LexerTokenType.IDENTIFIER, "GLOBAL"));
parser.tokens.add(startIndex + 3, new LexerToken(LexerTokenType.OPERATOR, "::"));
return SubroutineParser.parseSubroutineCall(parser, false);
}
}
}
// Try to parse as a core operator/keyword
if (operatorEnabled) {
Node operation = CoreOperatorResolver.parseCoreOperator(parser, token, startIndex);
if (operation != null) {
return operation;
}
}
// If CORE:: was used but the operator wasn't recognized, it's an error
if (calledWithCore) {
throw new PerlCompilerException(parser.tokenIndex,
"CORE::" + operator + " is not a keyword", parser.ctx.errorUtil);
}
// Default: treat as a subroutine call or bareword
parser.tokenIndex = startIndex; // backtrack
return SubroutineParser.parseSubroutineCall(parser, false);
}
static boolean isIsQuoteLikeOperator(String operator) {
return operator.equals("q") || operator.equals("qq") ||
operator.equals("qw") || operator.equals("qx") ||
operator.equals("qr") ||
operator.equals("m") || operator.equals("s") ||
operator.equals("tr") || operator.equals("y");
}
/**
* Parses operator tokens and special constructs.
*
* <p>This method handles a wide variety of operators and special constructs:
* <ul>
* <li>Grouping operators: (), [], {}</li>
* <li>Quote operators: ', ", /, `, etc.</li>
* <li>Reference operator: \</li>
* <li>Sigils: $, @, %, *, &</li>
* <li>Unary operators: !, +, -, ~, ++, --</li>
* <li>File test operators: -f, -d, etc.</li>
* <li>Special constructs: <<heredoc, diamond operator <></li>
* </ul>
*
* @param parser The parser instance
* @param token The operator token being parsed
* @param operator The operator text (same as token.text)
* @return A Node representing the parsed operator construct
* @throws PerlCompilerException if the operator is not recognized or used incorrectly
*/
static Node parseOperator(Parser parser, LexerToken token, String operator) {
// Check for autoquoting: keyword operators before => should be treated as barewords
// This handles cases like: and => {...}, or => {...}, xor => {...}
if (operator.equals("and") || operator.equals("or") || operator.equals("xor")) {
String peekTokenText = peek(parser).text;
if (peekTokenText.equals("=>")) {
// Autoquote: convert operator keyword to string literal
return new StringNode(token.text, parser.tokenIndex);
}
}
Node operand = null;
switch (token.text) {
case "(":
// Parentheses create a list context and group expressions
return new ListNode(ListParser.parseList(parser, ")", 0), parser.tokenIndex);
case "{":
// Curly braces create anonymous hash references
return new HashLiteralNode(ListParser.parseList(parser, "}", 0), parser.tokenIndex);
case "[":
// Square brackets create anonymous array references
return new ArrayLiteralNode(ListParser.parseList(parser, "]", 0), parser.tokenIndex);
case ".":
// Dot at the beginning of a primary expression is a fractional number (.5)
return NumberParser.parseFractionalNumber(parser);
case "<", "<<":
// Diamond operator <> or heredoc <<EOF
return OperatorParser.parseDiamondOperator(parser, token);
case "'", "\"", "/", "//", "/=", "`":
// Quote-like operators for strings, regexes, and command execution
return StringParser.parseRawString(parser, token.text);
case "::":
// Leading :: means main:: (e.g., ::foo is main::foo)
// This allows accessing global variables even when a lexical exists
LexerToken nextToken2 = peek(parser);
if (nextToken2.type == LexerTokenType.IDENTIFIER) {
// Insert "main" before the :: to create main::identifier
parser.tokens.add(parser.tokenIndex - 1, new LexerToken(LexerTokenType.IDENTIFIER, "main"));
parser.tokenIndex--; // Go back to process "main"
return parseIdentifier(parser, parser.tokenIndex,
new LexerToken(LexerTokenType.IDENTIFIER, "main"), "main");
}
throw new PerlCompilerException(parser.tokenIndex, "syntax error", parser.ctx.errorUtil);
case "\\":
// Reference operator: \$var, \@array, \%hash, \&sub
// Set flag to prevent &sub from being called during parsing
parser.parsingTakeReference = true;
operand = parser.parseExpression(parser.getPrecedence(token.text) + 1);
parser.parsingTakeReference = false;
// Special case for \&{CORE::push} take a reference to an operator
if (operand instanceof OperatorNode operatorNode && operatorNode.operator.equals("&")) {
if (operatorNode.operand instanceof IdentifierNode identifierNode && identifierNode.name.startsWith("CORE::")) {
// TODO implement take reference to operator
//
// ./jperl -e ' BEGIN { *shove = sub (\@@) { CORE::push(@{$_[0]}, @_[1..$#_]) } } shove @array, 1,2,3; print "[@array]\n" '
// [1 2 3]
//
throw new PerlCompilerException("Not implemented: take reference of operator `\\&" + identifierNode.name + "`");
}
}
return new OperatorNode(token.text, operand, parser.tokenIndex);
case "$", "$#", "@", "%", "*":
// Variable sigils: $scalar, @array, %hash, *glob, $#array
return Variable.parseVariable(parser, token.text);
case "&":
// Code sigil: &subroutine
return Variable.parseCoderefVariable(parser, token);
case "!", "+":
// Simple unary operators
operand = parser.parseExpression(parser.getPrecedence(token.text) + 1);
if (operand == null) {
parser.throwError("syntax error");
}
return new OperatorNode(token.text, operand, parser.tokenIndex);
case "~", "~.":
// Bitwise complement operators
// ~ is string bitwise complement by default, binary~ with 'use feature "bitwise"'
// ~. is always numeric bitwise complement
if (parser.ctx.symbolTable.isFeatureCategoryEnabled("bitwise")) {
if (operator.equals("~")) {
operator = "binary" + operator; // Mark as binary bitwise
}
} else {
if (operator.equals("~.")) {
// ~. requires bitwise feature
throw new PerlCompilerException(parser.tokenIndex, "syntax error", parser.ctx.errorUtil);
}
}
operand = parser.parseExpression(parser.getPrecedence(token.text) + 1);
return new OperatorNode(operator, operand, parser.tokenIndex);
case "~~":
// Handle ~~ as two separate ~ operators
// First, handle it as a single ~ operator
String firstOperator = "~";
if (parser.ctx.symbolTable.isFeatureCategoryEnabled("bitwise")) {
firstOperator = "binary~";
}
// Put back a single ~ token for the next parse
parser.tokenIndex--; // Back up
LexerToken currentToken = parser.tokens.get(parser.tokenIndex);
currentToken.text = "~";
// Parse the operand (which will start with the second ~)
operand = parser.parseExpression(parser.getPrecedence("~") + 1);
return new OperatorNode(firstOperator, operand, parser.tokenIndex);
case "--":
case "++":
// Pre-increment/decrement operators
operand = parser.parseExpression(parser.getPrecedence(token.text));
return new OperatorNode(token.text, operand, parser.tokenIndex);
case "-":
// Unary minus or file test operator (-f, -d, etc.)
LexerToken nextToken = parser.tokens.get(parser.tokenIndex);
if (nextToken.type == LexerTokenType.IDENTIFIER && nextToken.text.length() == 1) {
// Check if this is a valid file test operator
String testOp = nextToken.text;
if (isValidFileTestOperator(testOp)) {
return parseFileTestOperator(parser, nextToken, operand);
} else {
// Not a valid filetest operator
// Check if there's a function with this name
String functionName = nextToken.text;
String fullName = parser.ctx.symbolTable.getCurrentPackage() + "::" + functionName;
RuntimeScalar codeRef = GlobalVariable.getGlobalCodeRef(fullName);
if (codeRef.getDefinedBoolean()) {
// There's a function with this name, treat as regular unary minus
// Don't do anything special here, just fall through to regular unary minus handling
} else {
// Not a valid filetest operator and no function with this name
// Check what comes after to decide how to handle
// Skip whitespace to find the next meaningful token
int afterIndex = parser.tokenIndex + 1;
while (afterIndex < parser.tokens.size() &&
parser.tokens.get(afterIndex).type == LexerTokenType.WHITESPACE) {
afterIndex++;
}
if (afterIndex < parser.tokens.size()) {
LexerToken afterNext = parser.tokens.get(afterIndex);
// If there's something after the identifier that looks like an operand,
// it's probably an attempt to use a filetest operator, so give an error
if (afterNext.type == LexerTokenType.NUMBER ||
afterNext.type == LexerTokenType.STRING ||
afterNext.type == LexerTokenType.IDENTIFIER ||
afterNext.text.equals("$") || afterNext.text.equals("@")) {
parser.throwError("syntax error - Invalid filetest operator near \"" + testOp + " 1\"");
}
}
// Otherwise, fall through to regular unary minus handling (will treat as string)
// This handles cases like -a in strict mode where it should be "-a"
}
}
}
// Regular unary minus
operand = parser.parseExpression(parser.getPrecedence(token.text) + 1);
// Check for missing operand - this is a syntax error
if (operand == null) {
parser.throwError("Syntax error: unary minus operator requires an operand");
}
if (operand instanceof IdentifierNode identifierNode) {
// Special case: -bareword becomes "-bareword" (string)
return new StringNode("-" + identifierNode.name, parser.tokenIndex);
}
return new OperatorNode("unaryMinus", operand, parser.tokenIndex);
case "*=":
// Special variable glob "="
return new OperatorNode("*", new IdentifierNode("=", parser.tokenIndex), parser.tokenIndex);
default:
// Unknown operator
throw new PerlCompilerException(parser.tokenIndex, "syntax error", parser.ctx.errorUtil);
}
}
private static Node parseFileTestOperator(Parser parser, LexerToken nextToken, Node operand) {
String operator;
// File test operator: -f filename, -d $dir, etc.
operator = "-" + nextToken.text;
int startLineNumber = parser.ctx.errorUtil.getLineNumber(parser.tokenIndex - 1); // Save line number before peek() side effects
parser.tokenIndex++;
nextToken = peek(parser);
if (nextToken.text.equals("=>")) {
// autoquote ` -X => ... `
return new StringNode(operator, parser.tokenIndex);
}
var hasParenthesis = false;
if (nextToken.text.equals("(")) {
hasParenthesis = true;
TokenUtils.consume(parser);
nextToken = peek(parser);
}
// File tests accept bareword filehandles; parse them before generic expression parsing
// can turn them into subroutine calls. But '_' is special: it refers to the last stat buffer.
if (nextToken.type == LexerTokenType.IDENTIFIER) {
String name = nextToken.text;
if (!name.equals("_") && name.matches("^[A-Z_][A-Z0-9_]*$")) {
TokenUtils.consume(parser);
// autovivify filehandle and convert to globref
GlobalVariable.getGlobalIO(FileHandle.normalizeBarewordHandle(parser, name));
Node fh = FileHandle.parseBarewordHandle(parser, name);
operand = fh != null ? fh : new IdentifierNode(name, parser.tokenIndex);
if (hasParenthesis) {
TokenUtils.consume(parser, LexerTokenType.OPERATOR, ")");
}
return new OperatorNode(operator, operand, parser.tokenIndex);
}
}
if (nextToken.text.equals("_")) {
// Special case: -f _ uses the stat buffer from the last file test
TokenUtils.consume(parser);
operand = new IdentifierNode("_", parser.tokenIndex);
} else {
// Parse the filename/handle argument
ListNode listNode = ListParser.parseZeroOrOneList(parser, 0);
if (listNode.elements.isEmpty()) {
// No argument provided, use $_ as default
operand = scalarUnderscore(parser);
} else if (listNode.elements.size() == 1) {
operand = listNode.elements.getFirst();
} else {
parser.throwError("syntax error");
}
}
if (hasParenthesis) {
TokenUtils.consume(parser, LexerTokenType.OPERATOR, ")");
}
// Check for ambiguous cases like -C- where we have a unary minus without operand
if (operand instanceof org.perlonjava.astnode.OperatorNode opNode &&
"unaryMinus".equals(opNode.operator) && opNode.operand == null) {
// This is an ambiguous case like -C-
// Use the saved line number from before peek() side effects
String warningMsg = "Warning: Use of \"" + operator + "-\" without parentheses is ambiguous at " +
parser.ctx.errorUtil.getFileName() + " line " + startLineNumber + ".\n" +
"syntax error at " + parser.ctx.errorUtil.getFileName() + " line " + startLineNumber + ", at EOF\n" +
"Execution of " + parser.ctx.errorUtil.getFileName() + " aborted due to compilation errors.";
throw new org.perlonjava.runtime.PerlParserException(warningMsg);
}
return new OperatorNode(operator, operand, parser.tokenIndex);
}
/**
* Checks if a single character represents a valid file test operator.
* Valid operators: r w x o R W X O e z s f d l p S b c t u g k T B M A C
* Note: 'a' is NOT a valid file test operator (it's used for the -a command line switch)
*/
private static boolean isValidFileTestOperator(String op) {
return op.length() == 1 && "rwxoRWXOezsfdlpSbctugkTBMAC".indexOf(op.charAt(0)) >= 0;
}
}