From 832d6fbca3584767cec914e550e19fd021d53df1 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 9 Jan 2026 13:49:17 +0000 Subject: [PATCH 1/6] Prepare `LocalSourceNode` for locality Removes the dependence on the (global) `ModuleVariableNode.getARead()`, by adding a local version (that doesn't include `import *` reads) instead. --- .../python/dataflow/new/internal/DataFlowPublic.qll | 10 +++++++--- .../python/dataflow/new/internal/LocalSources.qll | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index ffecbcba57ac..1597553c7563 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -440,13 +440,17 @@ class ModuleVariableNode extends Node, TModuleVariableNode { /** Gets a node that reads this variable. */ Node getARead() { - result.asCfgNode() = var.getALoad().getAFlowNode() and - // Ignore reads that happen when the module is imported. These are only executed once. - not result.getScope() = mod + result = this.getALocalRead() or this = import_star_read(result) } + /** Gets a node that reads this variable, excluding reads that happen through `from ... import *`. */ + Node getALocalRead() { + result.asCfgNode() = var.getALoad().getAFlowNode() and + not result.getScope() = mod + } + /** Gets an `EssaNode` that corresponds to an assignment of this global variable. */ Node getAWrite() { any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode)) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll index c43a111c9c8b..7752846ae1ff 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll @@ -67,7 +67,7 @@ class LocalSourceNode extends Node { or // We explicitly include any read of a global variable, as some of these may have local flow going // into them. - this = any(ModuleVariableNode mvn).getARead() + this = any(ModuleVariableNode v).getALocalRead() or // We include all scope entry definitions, as these act as the local source within the scope they // enter. @@ -248,7 +248,7 @@ private module Cached { pragma[nomagic] private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo, _) and - not nodeTo = any(ModuleVariableNode v).getARead() + not nodeTo = any(ModuleVariableNode v).getALocalRead() } /** From fc43d222a2cb0557c4006033e79a9eb735738c33 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 9 Jan 2026 17:01:11 +0000 Subject: [PATCH 2/6] Remove global restriction on `ModuleVariableNode` This may result in more nodes, but it should still be bounded by the number of global variables in the source code. --- .../python/dataflow/new/internal/DataFlowPublic.qll | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 1597553c7563..76782e1f6340 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -76,15 +76,7 @@ newtype TNode = node.getNode() = any(Comp c).getIterable() } or /** A node representing a global (module-level) variable in a specific module. */ - TModuleVariableNode(Module m, GlobalVariable v) { - v.getScope() = m and - ( - v.escapes() - or - isAccessedThroughImportStar(m) and - ImportStar::globalNameDefinedInModule(v.getId(), m) - ) - } or + TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m } or /** * A synthetic node representing that an iterable sequence flows to consumer. */ From cd842447e8aa649146006801ef9d5623ff710c41 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 9 Jan 2026 17:12:39 +0000 Subject: [PATCH 3/6] Get rid of unused predicate --- .../lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 76782e1f6340..b2832c8bdb44 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -462,8 +462,6 @@ class ModuleVariableNode extends Node, TModuleVariableNode { override Location getLocation() { result = mod.getLocation() } } -private predicate isAccessedThroughImportStar(Module m) { m = ImportStar::getStarImported(_) } - private ModuleVariableNode import_star_read(Node n) { resolved_import_star_module(result.getModule(), result.getVariable().getId(), n) } From b0e94e8544fda249c5f7690c755f27b8ca920374 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 12 Jan 2026 15:04:14 +0000 Subject: [PATCH 4/6] Fix tests With `ModuleVariableNode`s now appearing for _all_ global variables (not just the ones that actually seem to be used), some of the tests changed a bit. Mostly this was in the form of new flow (because of new nodes that popped into existence). For some inline expectation tests, I opted to instead exclude these results, as there was no suitable location to annotate. For the normal tests, I just accepted the output (after having vetted it carefully, of course). --- .../utils/test/dataflow/MaximalFlowTest.qll | 4 ++- .../experimental/attrs/AttrWrites.expected | 1 + .../dataflow/basic/global.expected | 15 +++++++++++ .../dataflow/basic/globalStep.expected | 4 +++ .../dataflow/basic/local.expected | 5 ++++ .../dataflow/basic/maximalFlows.expected | 5 ++++ .../dataflow/basic/sinks.expected | 5 ++++ .../dataflow/basic/sources.expected | 5 ++++ .../dataflow/global-flow/test.py | 26 +++++++++---------- .../dataflow/typetracking/moduleattr.expected | 1 + .../PoorMansFunctionResolutionTest.ql | 4 ++- 11 files changed, 60 insertions(+), 15 deletions(-) diff --git a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll index 7587584a269b..5e9831906a4a 100644 --- a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll +++ b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll @@ -8,7 +8,9 @@ module MaximalFlowTest implements FlowTestSig { predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) { source != sink and - MaximalFlows::flow(source, sink) + MaximalFlows::flow(source, sink) and + // exclude ModuleVariableNodes (which have location 0:0:0:0) + not sink instanceof DataFlow::ModuleVariableNode } } diff --git a/python/ql/test/experimental/attrs/AttrWrites.expected b/python/ql/test/experimental/attrs/AttrWrites.expected index f8a55824043f..d1fc30b34515 100644 --- a/python/ql/test/experimental/attrs/AttrWrites.expected +++ b/python/ql/test/experimental/attrs/AttrWrites.expected @@ -1,4 +1,5 @@ | test.py:5:9:5:16 | ControlFlowNode for __init__ | test.py:4:1:4:20 | ControlFlowNode for ClassExpr | __init__ | test.py:5:5:5:28 | ControlFlowNode for FunctionExpr | | test.py:6:9:6:16 | ControlFlowNode for Attribute | test.py:6:9:6:12 | ControlFlowNode for self | foo | test.py:6:20:6:22 | ControlFlowNode for foo | +| test.py:9:1:9:9 | ControlFlowNode for Attribute | test.py:0:0:0:0 | ModuleVariableNode in Module test for myobj | foo | test.py:9:13:9:17 | ControlFlowNode for StringLiteral | | test.py:9:1:9:9 | ControlFlowNode for Attribute | test.py:9:1:9:5 | ControlFlowNode for myobj | foo | test.py:9:13:9:17 | ControlFlowNode for StringLiteral | | test.py:12:1:12:25 | ControlFlowNode for setattr() | test.py:12:9:12:13 | ControlFlowNode for myobj | foo | test.py:12:23:12:24 | ControlFlowNode for IntegerLiteral | diff --git a/python/ql/test/library-tests/dataflow/basic/global.expected b/python/ql/test/library-tests/dataflow/basic/global.expected index 7d2c0cab9b92..9e0ef2e6751b 100644 --- a/python/ql/test/library-tests/dataflow/basic/global.expected +++ b/python/ql/test/library-tests/dataflow/basic/global.expected @@ -1,6 +1,9 @@ +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:19:1:19 | ControlFlowNode for x | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z | @@ -8,26 +11,33 @@ | test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:2:3:2:3 | ControlFlowNode for y | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:3:3:3:3 | ControlFlowNode for z | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:3:3:3:3 | ControlFlowNode for z | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:3:3:3:3 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x | @@ -37,6 +47,8 @@ | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:7:2:7 | ControlFlowNode for x | @@ -47,7 +59,10 @@ | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:7:1:7:1 | ControlFlowNode for b | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/dataflow/basic/globalStep.expected b/python/ql/test/library-tests/dataflow/basic/globalStep.expected index 00ee53dba003..26d8902e7bbe 100644 --- a/python/ql/test/library-tests/dataflow/basic/globalStep.expected +++ b/python/ql/test/library-tests/dataflow/basic/globalStep.expected @@ -1,5 +1,6 @@ | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | @@ -31,10 +32,13 @@ | test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a | +| test.py:7:1:7:1 | ControlFlowNode for b | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/dataflow/basic/local.expected b/python/ql/test/library-tests/dataflow/basic/local.expected index 142c84015aeb..eb47f1308d42 100644 --- a/python/ql/test/library-tests/dataflow/basic/local.expected +++ b/python/ql/test/library-tests/dataflow/basic/local.expected @@ -1,3 +1,8 @@ +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for b | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected b/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected index a9fa5d8da920..421918620455 100644 --- a/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected +++ b/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected @@ -1,7 +1,12 @@ +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:19:1:19 | ControlFlowNode for x | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | diff --git a/python/ql/test/library-tests/dataflow/basic/sinks.expected b/python/ql/test/library-tests/dataflow/basic/sinks.expected index bf5800e0b73f..34525d5043e1 100644 --- a/python/ql/test/library-tests/dataflow/basic/sinks.expected +++ b/python/ql/test/library-tests/dataflow/basic/sinks.expected @@ -1,3 +1,8 @@ +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | | test.py:1:1:1:21 | SynthDictSplatParameterNode | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/library-tests/dataflow/basic/sources.expected b/python/ql/test/library-tests/dataflow/basic/sources.expected index bf5800e0b73f..34525d5043e1 100644 --- a/python/ql/test/library-tests/dataflow/basic/sources.expected +++ b/python/ql/test/library-tests/dataflow/basic/sources.expected @@ -1,3 +1,8 @@ +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | | test.py:1:1:1:21 | SynthDictSplatParameterNode | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/library-tests/dataflow/global-flow/test.py b/python/ql/test/library-tests/dataflow/global-flow/test.py index ab2da1c2959d..0b62e47c71fb 100644 --- a/python/ql/test/library-tests/dataflow/global-flow/test.py +++ b/python/ql/test/library-tests/dataflow/global-flow/test.py @@ -8,9 +8,9 @@ g1, g2 = [6], [7] # $writes=g1 writes=g2 -# Assignment that's only referenced in this scope. This one will not give rise to a `ModuleVariableNode`. +# Assignment that's only referenced in this scope. -unreferenced_g = [8] +unreferenced_g = [8] # $writes=unreferenced_g print(unreferenced_g) # Testing modifications of globals @@ -34,7 +34,7 @@ # A global with multiple potential definitions -import unknown_module +import unknown_module # $writes=unknown_module if unknown_module.attr: g_mult = [200] # $writes=g_mult else: @@ -46,7 +46,7 @@ if unknown_module.attr: g_redef = [500] # $writes=g_redef -def global_access(): +def global_access(): # $writes=global_access l = 5 print(g) # $reads=g print(g1) # $reads=g1 @@ -59,12 +59,12 @@ def global_access(): def print_g_mod(): # $writes=print_g_mod print(g_mod) # $reads=g_mod -def global_mod(): +def global_mod(): # $writes=global_mod global g_mod g_mod += [150] # $reads,writes=g_mod print_g_mod() # $reads=print_g_mod -def global_inside_local_function(): +def global_inside_local_function(): # $writes=global_inside_local_function def local_function(): print(g) # $reads=g local_function() @@ -76,21 +76,21 @@ def local_function(): import foo_module # $writes=foo_module -def use_foo(): +def use_foo(): # $writes=use_foo print(foo_module.attr) # $reads=foo_module # Partial imports from bar import baz_attr, quux_attr # $writes=baz_attr writes=quux_attr -def use_partial_import(): +def use_partial_import(): # $writes=use_partial_import print(baz_attr, quux_attr) # $reads=baz_attr reads=quux_attr # Aliased imports from spam_module import ham_attr as eggs_attr # $writes=eggs_attr -def use_aliased_import(): +def use_aliased_import(): # $writes=use_aliased_import print(eggs_attr) # $reads=eggs_attr # Import star (unlikely to work unless we happen to extract/model the referenced module) @@ -99,23 +99,23 @@ def use_aliased_import(): from unknown import * -def secretly_use_unknown(): +def secretly_use_unknown(): # $writes=secretly_use_unknown print(unknown_attr) # $reads=unknown_attr # Known modules from known import * -def secretly_use_known(): +def secretly_use_known(): # $writes=secretly_use_known print(known_attr) # $reads=known_attr # Local import in function -def imports_locally(): +def imports_locally(): # $writes=imports_locally import mod1 # Global import hidden in function -def imports_stuff(): +def imports_stuff(): # $writes=imports_stuff global mod2 import mod2 # $writes=mod2 diff --git a/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected b/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected index ff9673aaaea8..06b560623929 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected +++ b/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected @@ -2,6 +2,7 @@ module_tracker | import_as_attr.py:1:6:1:11 | ControlFlowNode for ImportExpr | module_attr_tracker | import_as_attr.py:0:0:0:0 | ModuleVariableNode in Module import_as_attr for attr_ref | +| import_as_attr.py:0:0:0:0 | ModuleVariableNode in Module import_as_attr for x | | import_as_attr.py:1:20:1:35 | ControlFlowNode for ImportMember | | import_as_attr.py:1:28:1:35 | ControlFlowNode for attr_ref | | import_as_attr.py:3:1:3:1 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql b/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql index b0325b027c3a..b9575e43493e 100644 --- a/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql +++ b/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql @@ -17,7 +17,9 @@ module InlinePoorMansFunctionResolutionTest implements TestSig { ) and // exclude decorator calls (which with our extractor rewrites does reference the // function) - not ref.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall() + not ref.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall() and + // exclude ModuleVariableNodes (which have location 0:0:0:0) + not ref instanceof DataFlow::ModuleVariableNode | value = func.getName() and tag = "resolved" and From a11646395e71abd39661c0ed50d367c74c66e945 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 12 Jan 2026 15:53:08 +0000 Subject: [PATCH 5/6] Split out `ArgumentNode` inheritance Overriding both `argumentOf` and `toString` was causing sadness for the local annotations. Easiest fix was to split these out into their own subclasses. The names are perhaps a bit too verbose now (alternative suggestions are welcomed). --- .../dataflow/new/internal/DataFlowDispatch.qll | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index c717cd7bc97b..b617d5eacffd 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1722,7 +1722,7 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl * `CallGraphConstruction::Make` in stead of * `CallGraphConstruction::Simple::Make` appropriately. */ -class CapturedVariablesArgumentNode extends CfgNode, ArgumentNode { +class CapturedVariablesArgumentNode extends CfgNode { CallNode callNode; CapturedVariablesArgumentNode() { @@ -1733,7 +1733,11 @@ class CapturedVariablesArgumentNode extends CfgNode, ArgumentNode { } override string toString() { result = "Capturing closure argument" } +} +class CapturedVariablesArgumentNodeAsArgumentNode extends CapturedVariablesArgumentNode, + ArgumentNode +{ override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { callNode = call.getNode() and pos.isLambdaSelf() @@ -1741,9 +1745,7 @@ class CapturedVariablesArgumentNode extends CfgNode, ArgumentNode { } /** A synthetic node representing the values of variables captured by a comprehension. */ -class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode, - ArgumentNode -{ +class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode { Comp comp; SynthCompCapturedVariablesArgumentNode() { this = TSynthCompCapturedVariablesArgumentNode(comp) } @@ -1755,7 +1757,11 @@ class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVar override Location getLocation() { result = comp.getLocation() } Comp getComprehension() { result = comp } +} +class SynthCompCapturedVariablesArgumentNodeAsArgumentNode extends SynthCompCapturedVariablesArgumentNode, + ArgumentNode +{ override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { call.(ComprehensionCall).getComprehension() = comp and pos.isLambdaSelf() From 6c95109c8ed35fe0885394863d5df7d69f2ba639 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 12 Jan 2026 16:53:54 +0000 Subject: [PATCH 6/6] Overapproximate `CapturedVariablesArgumentNode` We just create one for each call, not just the ones that capture variables. Of course, this causes a _lot_ of nodes to be instances of `CapturedVariablesArgumentNode`, which made the override of `toString` problematic, so that was removed as well. --- .../python/dataflow/new/internal/DataFlowDispatch.qll | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index b617d5eacffd..a5388a9447aa 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1725,14 +1725,7 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl class CapturedVariablesArgumentNode extends CfgNode { CallNode callNode; - CapturedVariablesArgumentNode() { - node = callNode.getFunction() and - exists(Function target | resolveCall(callNode, target, _) | - target = any(VariableCapture::CapturedVariable v).getACapturingScope() - ) - } - - override string toString() { result = "Capturing closure argument" } + CapturedVariablesArgumentNode() { node = callNode.getFunction() } } class CapturedVariablesArgumentNodeAsArgumentNode extends CapturedVariablesArgumentNode,