Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 91 additions & 1 deletion fyi/semgrep-grammars/src/semgrep-python/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,19 @@

const base_grammar = require('tree-sitter-python/grammar');

// commaSep1 is defined in the base grammar's module scope and is therefore not
// visible here, so we redefine it for the argument_list override below.
function commaSep1(rule) {
return seq(rule, repeat(seq(',', rule)));
}

module.exports = grammar(base_grammar, {
name: 'python',

conflicts: ($, previous) => previous.concat([
// '{ ... }' is ambiguous between a set whose element is the '...' ellipsis
// expression and a dictionary whose element is the '...' sgrep ellipsis.
[$.primary_expression, $.dictionary],
]),

/*
Expand All @@ -34,7 +43,13 @@ module.exports = grammar(base_grammar, {
// identifier is a terminal, and thus can't do
// the usual choice/previous shadowing definition.

identifier: $ => /\$?[_\p{XID_Start}][_\p{XID_Continue}]*/,
// The first alternative matches semgrep metavariable-ellipsis '$...NAME'
// (e.g. '$...ARGS'). Like the menhir lexer, we treat it as just another
// identifier (-> Name in the AST), so no extra conversion is needed.
// The second alternative is the base identifier plus a leading '$' for
// ordinary metavariables ('$FOO').
identifier: $ =>
/\$\.\.\.[A-Z_][A-Z_0-9]*|\$?[_\p{XID_Start}][_\p{XID_Continue}]*/,

// Allow '...' in the attribute position of a dot-access expression,
// so that patterns like `a. ... .d` work for matching call chains.
Expand All @@ -46,5 +61,80 @@ module.exports = grammar(base_grammar, {
field('attribute', choice($.identifier, '...')),
)),

// sgrep-ext: deep ellipsis '<... e ...>' (menhir AST: DeepEllipsis).
// Exposed as an expression, mirroring how the base grammar exposes
// 'ellipsis' inside primary_expression.
deep_ellipsis: $ => seq('<...', $.expression, '...>'),

// Spread `previous.members` so the new member is added to the *flat*
// choice and we automatically track any members added by future upstream
// grammar bumps. Plain `choice(previous, ...)` would instead wrap the
// original members under a single nested node and churn every site in the
// conversion that destructures a primary_expression.
primary_expression: ($, previous) => choice(
...previous.members,
$.deep_ellipsis,
),

// sgrep-ext: typed metavariable '$X: type' (menhir AST: TypedMetavar).
// The menhir parser only accepts this in argument position to avoid
// conflicts with annotations/slices/lambda elsewhere, so we do the same.
typed_metavariable: $ => seq($.identifier, ':', $.type),

// Copy of the base argument_list with $.typed_metavariable added to the
// set of permitted arguments. (tree-sitter rule overrides replace the rule
// wholesale, so the seq must be restated.)
argument_list: $ => seq(
'(',
optional(commaSep1(
choice(
$.expression,
$.list_splat,
$.dictionary_splat,
alias($.parenthesized_list_splat, $.parenthesized_expression),
$.keyword_argument,
$.typed_metavariable,
),
)),
optional(','),
')',
),

// sgrep-ext: bare '...' as a parameter, e.g. 'def f(...)'
// (menhir AST: ParamEllipsis). Spread previous.members (see
// primary_expression note).
parameter: ($, previous) => choice(
...previous.members,
$.ellipsis,
),

// sgrep-ext: a standalone decorator as a whole pattern, e.g. '@$NAME(...)'
// (menhir AST: the 'Decorator' any). A lone decorator is not a valid
// statement, so we let the module itself be a single decorator; the
// conversion's parse_pattern turns that into a Decorator. A real program
// (including a normal decorated definition) still uses the statement-list
// form, which consumes the whole input and therefore wins.
module: ($, previous) => choice(previous, $.decorator),

// sgrep-ext: '...' as a dict element, e.g. '{ ..., $K: $V, ... }'
// (menhir AST: Key (Ellipsis ...)). Restated wholesale to add $.ellipsis
// to the element choice. Sets/lists already accept '...' since their
// elements are plain expressions.
dictionary: $ => seq(
'{',
optional(commaSep1(choice($.pair, $.dictionary_splat, $.ellipsis))),
optional(','),
'}',
),

// sgrep-ext: '...' as an assignment / for-in target, e.g. the 'for ...'
// in '[... for ... in xs]'. The comprehension body already accepts '...'
// because it is a plain expression. Spread previous.members (see
// primary_expression note).
pattern: ($, previous) => choice(
...previous.members,
$.ellipsis,
),

}
});
37 changes: 14 additions & 23 deletions fyi/versions
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,29 @@ File: semgrep-grammars/src/tree-sitter-python/LICENSE
Git repo name: tree-sitter-python
Latest commit in repo: c5fca1a186e8e528115196178c28eefa8d86b0b0
Last change in file:
commit 409b5d671eb0ea4972eeacaaca24bbec1acf79b1
Author: Amaan Qureshi <amaanq12@gmail.com>
Date: Tue Jan 7 16:59:00 2025 -0500
commit c5fca1a186e8e528115196178c28eefa8d86b0b0
Author: Amaan Qureshi <contact@amaanq.com>
Date: Thu Sep 11 00:06:42 2025 -0400

fix(scanner): keep size of indents consistent
ci: clone examples after running tests
---
File: semgrep-grammars/src/tree-sitter-python/grammar.js
Git repo name: tree-sitter-python
Latest commit in repo: c5fca1a186e8e528115196178c28eefa8d86b0b0
Last change in file:
commit 409b5d671eb0ea4972eeacaaca24bbec1acf79b1
Author: Amaan Qureshi <amaanq12@gmail.com>
Date: Tue Jan 7 16:59:00 2025 -0500
commit c5fca1a186e8e528115196178c28eefa8d86b0b0
Author: Amaan Qureshi <contact@amaanq.com>
Date: Thu Sep 11 00:06:42 2025 -0400

fix(scanner): keep size of indents consistent
ci: clone examples after running tests
---
File: semgrep-grammars/src/semgrep-python/grammar.js
Git repo name: caracas
Latest commit in repo: f8a2e9ce7645454fbcb4aacb1887821aebd66f7f
Git repo name: athens
Latest commit in repo: c1251fd3669f676e7cd593f1a92bce523f25c261
Last change in file:
commit d68c1d87318808ec1b36ce89570ef6c0bc763f77
Author: Brandon Wu <49291449+brandonspark@users.noreply.github.com>
Date: Wed Mar 4 13:06:00 2026 -0800
commit c1251fd3669f676e7cd593f1a92bce523f25c261
Author: Marc-André Laverdière <marc-andre@semgrep.com>
Date: Fri Jun 19 15:02:08 2026 -0600

feat(python): support ellipsis in dot-access chain patterns (#567)

Override the `attribute` rule to accept `'...'` as an alternative to
`identifier` in the attribute field. This lets semgrep patterns like
`a. ... .d` parse correctly for matching call chains, matching the
existing Java `field_access` behavior.

Fixes https://github.com/semgrep/semgrep/issues/11545

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
simplifications
---
Loading