From 2451f95e468e708720c53f7a30eca05199e35752 Mon Sep 17 00:00:00 2001
From: Ben OConnor <bengoconnor@gmail.com>
Date: Wed, 27 May 2026 16:35:10 -0400
Subject: [PATCH 1/5] jedi: emit col_offset and fix runner correctness bugs

The Jedi runner had four pre-existing bugs that caused every prediction
to fail the strict scorer (`is_same_element`, added in 2f7c6056):

1. Predictions did not emit col_offset, which strict matching requires.
2. get_function_name only stripped the first dotted component, so paths
   deeper than two levels produced over-long names like
   "assignments.chained.main.func1" instead of "func1". The runner
   silently broke outside of Docker's shallow path layout.
3. Module-level variables received a spurious "function" key because the
   parent-is-module check compared parent.name to parent.module_name
   instead of checking parent.type == "module".
4. Function-reference assignments (e.g., `a = func1`) were typed as
   func1's return type instead of callable, because the runner ran
   find_types_by_execute on every inferred function without checking
   whether the position was the function's own definition site.

Result on micro-benchmark: 5/850 (local, old) and 414/850 (Docker, old)
both rise to 433/850 under both lenient and strict scorers.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../jedi/src/jedi_type_inference.py           | 97 ++++++++++++-------
 1 file changed, 63 insertions(+), 34 deletions(-)
diff --git a/src/target_tools/jedi/src/jedi_type_inference.py b/src/target_tools/jedi/src/jedi_type_inference.py
index 61a27775d..a72afed04 100644
--- a/src/target_tools/jedi/src/jedi_type_inference.py
+++ b/src/target_tools/jedi/src/jedi_type_inference.py
@@ -100,17 +100,24 @@ def find_types_by_execute(self, jedi_obj):
         return _type
 
     def get_function_name(self, jedi_obj):
+        """Return the qualified name of jedi_obj relative to its module,
+        walking up parent scopes so nested functions become 'outer.inner'."""
         try:
             if jedi_obj.name == "<lambda>":
-                func_name = "lambda"
-            else:
-                parts = jedi_obj.full_name.split(".", 1)
-                func_name = parts[-1] if len(parts) > 1 else jedi_obj.full_name
-        except Exception as e:
+                return "lambda"
+            parts = []
+            current = jedi_obj
+            while current is not None and current.type != "module":
+                name = "lambda" if current.name == "<lambda>" else current.name
+                parts.append(name)
+                try:
+                    current = current.parent()
+                except Exception:
+                    break
+            return ".".join(reversed(parts)) if parts else jedi_obj.name
+        except Exception:
             print("full_name not found in jedi_obj?")
-            func_name = jedi_obj.name
-
-        return func_name
+            return jedi_obj.name
 
     def infer_types(self):
         """
@@ -143,24 +150,46 @@ def infer_types(self):
                 if _infer:
                     for inferred in _infer:
                         if inferred.type == "function":
-                            # _type = self.parse_type_hint(inferred.get_type_hint())
-                            # if not _type:
-                            #     self.find_types_by_execute(inferred)
-
-                            _type = self.find_types_by_execute(inferred)
-
-                            _info = {
-                                "file": node.name,
-                                "line_number": pos["line"],
-                            }
-                            if inferred.name != "<lambda>":
-                                _info["function"] = self.get_function_name(inferred)
-                            _info["type"] = _type if _type else {"any"}
-
-                            variable_name = var.split(":")[0].strip()
-                            if variable_name != self.get_function_name(inferred):
-                                _info["variable"] = variable_name
-                            if _type:
+                            # Distinguish between the function's own definition
+                            # site (return-type is what's wanted, e.g. for `def
+                            # func1():`) and a reference to it (callable is
+                            # what's wanted, e.g. for `a = func1`).
+                            at_def_site = (
+                                pos["line"] == inferred.line
+                                and pos["column"] == inferred.column
+                            )
+
+                            if at_def_site:
+                                _type = self.find_types_by_execute(inferred)
+
+                                _info = {
+                                    "file": node.name,
+                                    "line_number": pos["line"],
+                                    "col_offset": pos["column"] + 1,
+                                }
+                                if inferred.name != "<lambda>":
+                                    _info["function"] = self.get_function_name(inferred)
+                                _info["type"] = _type if _type else {"any"}
+
+                                variable_name = var.split(":")[0].strip()
+                                if variable_name != self.get_function_name(inferred):
+                                    _info["variable"] = variable_name
+                                if _type:
+                                    output_inferred.append(_info)
+                            else:
+                                variable_name = var.split(":")[0].strip()
+                                _info = {
+                                    "file": node.name,
+                                    "line_number": pos["line"],
+                                    "col_offset": pos["column"] + 1,
+                                    "variable": variable_name,
+                                    "type": {"callable"},
+                                }
+                                parent = pos["jedi_obj"].parent()
+                                if parent and parent.type != "module":
+                                    parent_func = self.get_function_name(parent)
+                                    if parent_func:
+                                        _info["function"] = parent_func
                                 output_inferred.append(_info)
 
                         elif inferred.type == "instance":
@@ -187,17 +216,15 @@ def infer_types(self):
                             _info = {
                                 "file": node.name,
                                 "line_number": pos["line"],
+                                "col_offset": pos["column"] + 1,
                                 "variable": var.split(":")[0],
                                 "type": {_type},
                             }
-                            if (
-                                not pos["jedi_obj"].parent().name
-                                == pos["jedi_obj"].parent().module_name
-                            ):
-                                if self.get_function_name(pos["jedi_obj"].parent()):
-                                    _info["function"] = self.get_function_name(
-                                        pos["jedi_obj"].parent()
-                                    )
+                            parent = pos["jedi_obj"].parent()
+                            if parent and parent.type != "module":
+                                parent_func = self.get_function_name(parent)
+                                if parent_func:
+                                    _info["function"] = parent_func
                             if _type:
                                 output_inferred.append(_info)
 
@@ -206,6 +233,7 @@ def infer_types(self):
                             _info = {
                                 "file": node.name,
                                 "line_number": pos["line"],
+                                "col_offset": pos["column"] + 1,
                                 "variable": var.split(":")[0],
                                 "function": self.get_function_name(
                                     pos["jedi_obj"].parent()
@@ -225,6 +253,7 @@ def infer_types(self):
                         _info = {
                             "file": node.name,
                             "line_number": pos["line"],
+                            "col_offset": pos["column"] + 1,
                             "parameter": var.split(":")[0],
                             "function": self.get_function_name(
                                 pos["jedi_obj"].parent()

From 82b5ffb84e54a1cafd22994b187fd6b16179ee7f Mon Sep 17 00:00:00 2001
From: Ben OConnor <bengoconnor@gmail.com>
Date: Wed, 27 May 2026 16:35:21 -0400
Subject: [PATCH 2/5] headergen: emit col_offset via source-parsed enrichment

The HeaderGen server (headergen PyPI package) returns inference results
without col_offset, so every prediction failed the strict scorer
(`is_same_element`). HeaderGen's runner is a thin HTTP client and does
not build its own dicts, so the fix lives in translator.py.

Approach: after receiving the server response, parse the source with
Jedi to build a (name, line) -> col_offset map, then look up each
entry's position. Subscript and attribute expressions reported as
"a[0]" or "self.x" use the base name's column; nested functions
reported as "outer.inner" use the inner name's column.

Result on micro-benchmark: 0/850 strict rises to 603/850 under strict.
Lenient is essentially unchanged (612 baseline -> 611). The 8-entry
gap between lenient (611) and strict (603) is line_number mismatches
between HeaderGen and GT that the lenient scorer silently accepts
(line_number checks are commented out in large_scale_analysis.py).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/target_tools/headergen/src/runner.py     |  1 +
 src/target_tools/headergen/src/translator.py | 53 ++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/src/target_tools/headergen/src/runner.py b/src/target_tools/headergen/src/runner.py
index 69698e40e..5f3105b5b 100644
--- a/src/target_tools/headergen/src/runner.py
+++ b/src/target_tools/headergen/src/runner.py
@@ -62,6 +62,7 @@ def main_runner(args):
             logger.info(file)
 
             inferred = process_file(file)
+            inferred = translator.enrich_with_col_offsets(file, inferred)
 
             json_file_path = str(file).replace(".py", "_result.json")
 
diff --git a/src/target_tools/headergen/src/translator.py b/src/target_tools/headergen/src/translator.py
index 2014be66b..1f0d8acae 100644
--- a/src/target_tools/headergen/src/translator.py
+++ b/src/target_tools/headergen/src/translator.py
@@ -3,12 +3,65 @@
 import os
 from pathlib import Path
 
+import jedi
+
 
 def list_json_files(folder_path):
     python_files = sorted(Path(folder_path).rglob("*.json"))
     return python_files
 
 
+def build_position_map(source_path):
+    """Map (name, line_number) -> 1-indexed col_offset for every definition
+    in the source file. HeaderGen's server doesn't emit col_offset, so we
+    recover it by parsing the source with Jedi."""
+    positions = {}
+    try:
+        script = jedi.Script(path=str(source_path))
+        for n in script.get_names(all_scopes=True, definitions=True, references=True):
+            positions.setdefault((n.name, n.line), n.column + 1)
+    except Exception:
+        pass
+    return positions
+
+
+def _lookup_name(entry):
+    """Return the source-level name to look up for this entry's position."""
+    if "variable" in entry:
+        # Subscript/attribute accesses like 'h[0]' or 'self.child' are
+        # reported as the full expression; the col_offset GT expects is
+        # where the base name begins.
+        name = entry["variable"]
+        for sep in ("[", "."):
+            if sep in name:
+                name = name.split(sep, 1)[0]
+                break
+        return name
+    if "parameter" in entry:
+        return entry["parameter"]
+    if "function" in entry:
+        # Nested functions are reported as 'outer.inner'; the position
+        # we want is the inner name's own column.
+        return entry["function"].rsplit(".", 1)[-1]
+    return None
+
+
+def enrich_with_col_offsets(source_path, entries):
+    """Augment HeaderGen entries with col_offset by looking up the position
+    of each entry's identifying name in the source file."""
+    positions = build_position_map(source_path)
+    for entry in entries:
+        if "col_offset" in entry:
+            continue
+        name = _lookup_name(entry)
+        if name is None:
+            continue
+        col = positions.get((name, entry["line_number"]))
+        if col is not None:
+            entry["col_offset"] = col
+    return entries
+
+
 def translate_content(file_path):
     with open(file_path) as f:
         data = json.load(f)

From 4ce11983aa626c858e55db3e17df8b83432caf1b Mon Sep 17 00:00:00 2001
From: Ben OConnor <bengoconnor@gmail.com>
Date: Wed, 27 May 2026 16:35:29 -0400
Subject: [PATCH 3/5] scalpel: emit col_offset via source-parsed enrichment

Scalpel's runner builds dicts in-process but does not include col_offset,
so every prediction failed the strict scorer (`is_same_element`). The
fix mirrors the HeaderGen approach: parse the source with Jedi to build
a position map and look up each entry's column in translator.py, called
once after process_file in the runner.

Result on micro-benchmark: 0/845 strict rises to 179/845 under strict.
Lenient is preserved at 182/845 (Docker baseline 183/850 - close enough
for path/version drift). One file fails Scalpel inference; not addressed
here.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/target_tools/scalpel/src/runner.py     |  2 +
 src/target_tools/scalpel/src/translator.py | 48 ++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/src/target_tools/scalpel/src/runner.py b/src/target_tools/scalpel/src/runner.py
index c078d7f5b..20e1f7c62 100644
--- a/src/target_tools/scalpel/src/runner.py
+++ b/src/target_tools/scalpel/src/runner.py
@@ -4,6 +4,7 @@
 import os
 from pathlib import Path
 
+import translator
 import utils
 from scalpel.typeinfer.typeinfer import TypeInference
 
@@ -42,6 +43,7 @@ def main_runner(args):
         try:
             # logger.debug(file)
             inferred = process_file(file)
+            inferred = translator.enrich_with_col_offsets(file, inferred)
             json_file_path = str(file).replace(".py", "_result.json")
 
             with open(json_file_path, "w") as json_file:
diff --git a/src/target_tools/scalpel/src/translator.py b/src/target_tools/scalpel/src/translator.py
index 9f3c550a8..e0da9dac8 100644
--- a/src/target_tools/scalpel/src/translator.py
+++ b/src/target_tools/scalpel/src/translator.py
@@ -3,12 +3,60 @@
 import os
 from pathlib import Path
 
+import jedi
+
 
 def list_json_files(folder_path):
     python_files = sorted(Path(folder_path).rglob("*.json"))
     return python_files
 
 
+def build_position_map(source_path):
+    """Map (name, line_number) -> 1-indexed col_offset for every definition
+    and reference in the source. Scalpel's runner doesn't emit col_offset, so
+    we recover it by parsing the source with Jedi."""
+    positions = {}
+    try:
+        script = jedi.Script(path=str(source_path))
+        for n in script.get_names(all_scopes=True, definitions=True, references=True):
+            positions.setdefault((n.name, n.line), n.column + 1)
+    except Exception:
+        pass
+    return positions
+
+
+def _lookup_name(entry):
+    """Return the source-level name to look up for this entry's position."""
+    if "variable" in entry:
+        name = entry["variable"]
+        for sep in ("[", "."):
+            if sep in name:
+                name = name.split(sep, 1)[0]
+                break
+        return name
+    if "parameter" in entry:
+        return entry["parameter"]
+    if "function" in entry:
+        return entry["function"].rsplit(".", 1)[-1]
+    return None
+
+
+def enrich_with_col_offsets(source_path, entries):
+    """Augment entries with col_offset by looking up the position of each
+    entry's identifying name in the source file."""
+    positions = build_position_map(source_path)
+    for entry in entries:
+        if "col_offset" in entry:
+            continue
+        name = _lookup_name(entry)
+        if name is None:
+            continue
+        col = positions.get((name, entry["line_number"]))
+        if col is not None:
+            entry["col_offset"] = col
+    return entries
+
+
 def main_translator(args):
     json_files = list_json_files(args.bechmark_path)
     error_count = 0

From 5130d1a37733bb2ae6728a7669875aad7006422b Mon Sep 17 00:00:00 2001
From: Ben OConnor <bengoconnor@gmail.com>
Date: Wed, 27 May 2026 19:22:52 -0400
Subject: [PATCH 4/5] headergen, scalpel: use stdlib ast for col_offset
 recovery, skip ambiguous

The previous version of these translators used jedi for the (name, line)
-> col_offset position lookup. That has two issues:

1. Architectural smell: Scalpel's runner would have to depend on jedi (a
   sibling tool being evaluated). HeaderGen happened to have jedi as a
   transitive dep so the import worked, but the principle was wrong.

2. Synthesis vs recovery: when multiple names share the same (name, line)
   key (e.g., `x = lambda x: x` has three `x`s on one line), the previous
   code silently picked the first via setdefault. That risks attaching a
   col_offset the tool didn't actually intend.

This commit replaces jedi with stdlib `ast` (same lookup, no extra dep,
no cross-tool entanglement) and skips col_offset emission entirely when
the lookup is ambiguous. Empirically across the full micro-benchmark:

- HeaderGen: 805/853 entries have a unique position, 5 are ambiguous,
  43 are unfindable in source (the latter are HeaderGen's `ClassName.attr`
  style which differs from GT's `self.attr` convention; unmatchable
  regardless).
- Scalpel: 369/369 unique, zero ambiguous.

So the col_offset enrichment is recovery (the position is determined by
what the tool already emitted) for >94% of HeaderGen output and 100% of
Scalpel output. The remaining ambiguous entries are now correctly handled
by NOT attaching a position rather than guessing.

Docker results after this change:
- HeaderGen: 0 -> 580/850 strict (591 -> 601 lenient)
- Scalpel:   0 -> 180/850 strict (183 -> 187 lenient)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/target_tools/headergen/src/translator.py | 46 ++++++++++++++------
 src/target_tools/scalpel/src/translator.py   | 46 ++++++++++++++------
 2 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/src/target_tools/headergen/src/translator.py b/src/target_tools/headergen/src/translator.py
index 1f0d8acae..d0d89de80 100644
--- a/src/target_tools/headergen/src/translator.py
+++ b/src/target_tools/headergen/src/translator.py
@@ -1,10 +1,10 @@
 import argparse
+import ast
 import json
 import os
+from collections import defaultdict
 from pathlib import Path
 
-import jedi
-
 
 def list_json_files(folder_path):
     python_files = sorted(Path(folder_path).rglob("*.json"))
@@ -12,16 +12,33 @@ def list_json_files(folder_path):
 
 
 def build_position_map(source_path):
-    """Map (name, line_number) -> 1-indexed col_offset for every definition
-    in the source file. HeaderGen's server doesn't emit col_offset, so we
-    recover it by parsing the source with Jedi."""
-    positions = {}
+    """Map (name, line_number) -> [1-indexed col_offsets] for every name
+    occurrence in the source. HeaderGen's server doesn't emit col_offset, but
+    for any (name, line) it gives us, the column is determined by the source.
+    We keep all candidates so the enrichment can skip ambiguous cases."""
+    positions = defaultdict(list)
     try:
-        script = jedi.Script(path=str(source_path))
-        for n in script.get_names(all_scopes=True, definitions=True, references=True):
-            positions.setdefault((n.name, n.line), n.column + 1)
+        with open(source_path) as f:
+            tree = ast.parse(f.read())
     except Exception:
-        pass
+        return positions
+
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Name):
+            positions[(node.id, node.lineno)].append(node.col_offset + 1)
+        elif isinstance(node, ast.arg):
+            positions[(node.arg, node.lineno)].append(node.col_offset + 1)
+        elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            prefix = (
+                "async def " if isinstance(node, ast.AsyncFunctionDef) else "def "
+            )
+            positions[(node.name, node.lineno)].append(
+                node.col_offset + len(prefix) + 1
+            )
+        elif isinstance(node, ast.ClassDef):
+            positions[(node.name, node.lineno)].append(
+                node.col_offset + len("class ") + 1
+            )
     return positions
 
 
@@ -48,7 +65,8 @@ def _lookup_name(entry):
 
 def enrich_with_col_offsets(source_path, entries):
     """Augment HeaderGen entries with col_offset by looking up the position
-    of each entry's identifying name in the source file."""
+    of each entry's identifying name in the source file. Skip ambiguous
+    cases (multiple candidates) so we never guess a position."""
     positions = build_position_map(source_path)
     for entry in entries:
         if "col_offset" in entry:
@@ -56,9 +74,9 @@ def enrich_with_col_offsets(source_path, entries):
         name = _lookup_name(entry)
         if name is None:
             continue
-        col = positions.get((name, entry["line_number"]))
-        if col is not None:
-            entry["col_offset"] = col
+        cands = sorted(set(positions.get((name, entry["line_number"]), [])))
+        if len(cands) == 1:
+            entry["col_offset"] = cands[0]
     return entries
 
 
diff --git a/src/target_tools/scalpel/src/translator.py b/src/target_tools/scalpel/src/translator.py
index e0da9dac8..5541fc4d4 100644
--- a/src/target_tools/scalpel/src/translator.py
+++ b/src/target_tools/scalpel/src/translator.py
@@ -1,10 +1,10 @@
 import argparse
+import ast
 import json
 import os
+from collections import defaultdict
 from pathlib import Path
 
-import jedi
-
 
 def list_json_files(folder_path):
     python_files = sorted(Path(folder_path).rglob("*.json"))
@@ -12,16 +12,33 @@ def list_json_files(folder_path):
 
 
 def build_position_map(source_path):
-    """Map (name, line_number) -> 1-indexed col_offset for every definition
-    and reference in the source. Scalpel's runner doesn't emit col_offset, so
-    we recover it by parsing the source with Jedi."""
-    positions = {}
+    """Map (name, line_number) -> [1-indexed col_offsets] for every name
+    occurrence in the source. Scalpel's runner doesn't emit col_offset, but
+    for any (name, line) it gives us, the column is determined by the source.
+    We keep all candidates so the enrichment can skip ambiguous cases."""
+    positions = defaultdict(list)
     try:
-        script = jedi.Script(path=str(source_path))
-        for n in script.get_names(all_scopes=True, definitions=True, references=True):
-            positions.setdefault((n.name, n.line), n.column + 1)
+        with open(source_path) as f:
+            tree = ast.parse(f.read())
     except Exception:
-        pass
+        return positions
+
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Name):
+            positions[(node.id, node.lineno)].append(node.col_offset + 1)
+        elif isinstance(node, ast.arg):
+            positions[(node.arg, node.lineno)].append(node.col_offset + 1)
+        elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            prefix = (
+                "async def " if isinstance(node, ast.AsyncFunctionDef) else "def "
+            )
+            positions[(node.name, node.lineno)].append(
+                node.col_offset + len(prefix) + 1
+            )
+        elif isinstance(node, ast.ClassDef):
+            positions[(node.name, node.lineno)].append(
+                node.col_offset + len("class ") + 1
+            )
     return positions
 
 
@@ -43,7 +60,8 @@ def _lookup_name(entry):
 
 def enrich_with_col_offsets(source_path, entries):
     """Augment entries with col_offset by looking up the position of each
-    entry's identifying name in the source file."""
+    entry's identifying name in the source file. Skip ambiguous cases
+    (multiple candidates) so we never guess a position."""
     positions = build_position_map(source_path)
     for entry in entries:
         if "col_offset" in entry:
@@ -51,9 +69,9 @@ def enrich_with_col_offsets(source_path, entries):
         name = _lookup_name(entry)
         if name is None:
             continue
-        col = positions.get((name, entry["line_number"]))
-        if col is not None:
-            entry["col_offset"] = col
+        cands = sorted(set(positions.get((name, entry["line_number"]), [])))
+        if len(cands) == 1:
+            entry["col_offset"] = cands[0]
     return entries
 
 

From e4b7b97f6ff6b93ef6ce1ba9957c7aac94106156 Mon Sep 17 00:00:00 2001
From: Ben OConnor <bengoconnor@gmail.com>
Date: Wed, 27 May 2026 19:23:11 -0400
Subject: [PATCH 5/5] headergen: install g++ in Dockerfile to enable
 line-profiler build

The HeaderGen Docker image previously installed only gcc, not g++.
HeaderGen's transitive dependency `line-profiler` has a Cython C++
extension; on platforms where no prebuilt wheel exists (e.g., arm64
Linux + Python 3.10), pip falls back to building from source, which
requires g++. The build then fails with:

    g++ -... -c line_profiler/_line_profiler.cpp -o ...
    error: command 'g++' failed: No such file or directory
    ERROR: Failed building wheel for line-profiler

This change adds g++ to the apt-get install line. The image now builds
cleanly on arm64 hosts as well as amd64.

(Builds may have appeared to succeed previously on hosts where a
line-profiler wheel was already cached from an earlier build with g++
present; fresh builds without the cache hit the source-compile path
and surface the issue.)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/target_tools/headergen/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/target_tools/headergen/Dockerfile b/src/target_tools/headergen/Dockerfile
index d7dd8b834..df49b8d2f 100644
--- a/src/target_tools/headergen/Dockerfile
+++ b/src/target_tools/headergen/Dockerfile
@@ -10,7 +10,7 @@ WORKDIR /app
 
 # Install dependencies
 RUN apt-get update \
-    && apt-get -y install git gcc
+    && apt-get -y install git gcc g++
 
 
 COPY requirements.txt /app/requirements.txt