From 2451f95e468e708720c53f7a30eca05199e35752 Mon Sep 17 00:00:00 2001 From: Ben OConnor Date: Wed, 27 May 2026 16:35:10 -0400 Subject: [PATCH 1/5] jedi: emit col_offset and fix runner correctness bugs The Jedi runner had four pre-existing bugs that caused every prediction to fail the strict scorer (`is_same_element`, added in 2f7c6056): 1. Predictions did not emit col_offset, which strict matching requires. 2. get_function_name only stripped the first dotted component, so paths deeper than two levels produced over-long names like "assignments.chained.main.func1" instead of "func1". The runner silently broke outside of Docker's shallow path layout. 3. Module-level variables received a spurious "function" key because the parent-is-module check compared parent.name to parent.module_name instead of checking parent.type == "module". 4. Function-reference assignments (e.g., `a = func1`) were typed as func1's return type instead of callable, because the runner ran find_types_by_execute on every inferred function without checking whether the position was the function's own definition site. Result on micro-benchmark: 5/850 (local, old) and 414/850 (Docker, old) both rise to 433/850 under both lenient and strict scorers. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../jedi/src/jedi_type_inference.py | 97 ++++++++++++------- 1 file changed, 63 insertions(+), 34 deletions(-) diff --git a/src/target_tools/jedi/src/jedi_type_inference.py b/src/target_tools/jedi/src/jedi_type_inference.py index 61a27775d..a72afed04 100644 --- a/src/target_tools/jedi/src/jedi_type_inference.py +++ b/src/target_tools/jedi/src/jedi_type_inference.py @@ -100,17 +100,24 @@ def find_types_by_execute(self, jedi_obj): return _type def get_function_name(self, jedi_obj): + """Return the qualified name of jedi_obj relative to its module, + walking up parent scopes so nested functions become 'outer.inner'.""" try: if jedi_obj.name == "": - func_name = "lambda" - else: - parts = jedi_obj.full_name.split(".", 1) - func_name = parts[-1] if len(parts) > 1 else jedi_obj.full_name - except Exception as e: + return "lambda" + parts = [] + current = jedi_obj + while current is not None and current.type != "module": + name = "lambda" if current.name == "" else current.name + parts.append(name) + try: + current = current.parent() + except Exception: + break + return ".".join(reversed(parts)) if parts else jedi_obj.name + except Exception: print("full_name not found in jedi_obj?") - func_name = jedi_obj.name - - return func_name + return jedi_obj.name def infer_types(self): """ @@ -143,24 +150,46 @@ def infer_types(self): if _infer: for inferred in _infer: if inferred.type == "function": - # _type = self.parse_type_hint(inferred.get_type_hint()) - # if not _type: - # self.find_types_by_execute(inferred) - - _type = self.find_types_by_execute(inferred) - - _info = { - "file": node.name, - "line_number": pos["line"], - } - if inferred.name != "": - _info["function"] = self.get_function_name(inferred) - _info["type"] = _type if _type else {"any"} - - variable_name = var.split(":")[0].strip() - if variable_name != self.get_function_name(inferred): - _info["variable"] = variable_name - if _type: + # Distinguish between the function's own definition + # site (return-type is what's wanted, e.g. for `def + # func1():`) and a reference to it (callable is + # what's wanted, e.g. for `a = func1`). + at_def_site = ( + pos["line"] == inferred.line + and pos["column"] == inferred.column + ) + + if at_def_site: + _type = self.find_types_by_execute(inferred) + + _info = { + "file": node.name, + "line_number": pos["line"], + "col_offset": pos["column"] + 1, + } + if inferred.name != "": + _info["function"] = self.get_function_name(inferred) + _info["type"] = _type if _type else {"any"} + + variable_name = var.split(":")[0].strip() + if variable_name != self.get_function_name(inferred): + _info["variable"] = variable_name + if _type: + output_inferred.append(_info) + else: + variable_name = var.split(":")[0].strip() + _info = { + "file": node.name, + "line_number": pos["line"], + "col_offset": pos["column"] + 1, + "variable": variable_name, + "type": {"callable"}, + } + parent = pos["jedi_obj"].parent() + if parent and parent.type != "module": + parent_func = self.get_function_name(parent) + if parent_func: + _info["function"] = parent_func output_inferred.append(_info) elif inferred.type == "instance": @@ -187,17 +216,15 @@ def infer_types(self): _info = { "file": node.name, "line_number": pos["line"], + "col_offset": pos["column"] + 1, "variable": var.split(":")[0], "type": {_type}, } - if ( - not pos["jedi_obj"].parent().name - == pos["jedi_obj"].parent().module_name - ): - if self.get_function_name(pos["jedi_obj"].parent()): - _info["function"] = self.get_function_name( - pos["jedi_obj"].parent() - ) + parent = pos["jedi_obj"].parent() + if parent and parent.type != "module": + parent_func = self.get_function_name(parent) + if parent_func: + _info["function"] = parent_func if _type: output_inferred.append(_info) @@ -206,6 +233,7 @@ def infer_types(self): _info = { "file": node.name, "line_number": pos["line"], + "col_offset": pos["column"] + 1, "variable": var.split(":")[0], "function": self.get_function_name( pos["jedi_obj"].parent() @@ -225,6 +253,7 @@ def infer_types(self): _info = { "file": node.name, "line_number": pos["line"], + "col_offset": pos["column"] + 1, "parameter": var.split(":")[0], "function": self.get_function_name( pos["jedi_obj"].parent() From 82b5ffb84e54a1cafd22994b187fd6b16179ee7f Mon Sep 17 00:00:00 2001 From: Ben OConnor Date: Wed, 27 May 2026 16:35:21 -0400 Subject: [PATCH 2/5] headergen: emit col_offset via source-parsed enrichment The HeaderGen server (headergen PyPI package) returns inference results without col_offset, so every prediction failed the strict scorer (`is_same_element`). HeaderGen's runner is a thin HTTP client and does not build its own dicts, so the fix lives in translator.py. Approach: after receiving the server response, parse the source with Jedi to build a (name, line) -> col_offset map, then look up each entry's position. Subscript and attribute expressions reported as "a[0]" or "self.x" use the base name's column; nested functions reported as "outer.inner" use the inner name's column. Result on micro-benchmark: 0/850 strict rises to 603/850 under strict. Lenient is essentially unchanged (612 baseline -> 611). The 8-entry gap between lenient (611) and strict (603) is line_number mismatches between HeaderGen and GT that the lenient scorer silently accepts (line_number checks are commented out in large_scale_analysis.py). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/target_tools/headergen/src/runner.py | 1 + src/target_tools/headergen/src/translator.py | 53 ++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/src/target_tools/headergen/src/runner.py b/src/target_tools/headergen/src/runner.py index 69698e40e..5f3105b5b 100644 --- a/src/target_tools/headergen/src/runner.py +++ b/src/target_tools/headergen/src/runner.py @@ -62,6 +62,7 @@ def main_runner(args): logger.info(file) inferred = process_file(file) + inferred = translator.enrich_with_col_offsets(file, inferred) json_file_path = str(file).replace(".py", "_result.json") diff --git a/src/target_tools/headergen/src/translator.py b/src/target_tools/headergen/src/translator.py index 2014be66b..1f0d8acae 100644 --- a/src/target_tools/headergen/src/translator.py +++ b/src/target_tools/headergen/src/translator.py @@ -3,12 +3,65 @@ import os from pathlib import Path +import jedi + def list_json_files(folder_path): python_files = sorted(Path(folder_path).rglob("*.json")) return python_files +def build_position_map(source_path): + """Map (name, line_number) -> 1-indexed col_offset for every definition + in the source file. HeaderGen's server doesn't emit col_offset, so we + recover it by parsing the source with Jedi.""" + positions = {} + try: + script = jedi.Script(path=str(source_path)) + for n in script.get_names(all_scopes=True, definitions=True, references=True): + positions.setdefault((n.name, n.line), n.column + 1) + except Exception: + pass + return positions + + +def _lookup_name(entry): + """Return the source-level name to look up for this entry's position.""" + if "variable" in entry: + # Subscript/attribute accesses like 'h[0]' or 'self.child' are + # reported as the full expression; the col_offset GT expects is + # where the base name begins. + name = entry["variable"] + for sep in ("[", "."): + if sep in name: + name = name.split(sep, 1)[0] + break + return name + if "parameter" in entry: + return entry["parameter"] + if "function" in entry: + # Nested functions are reported as 'outer.inner'; the position + # we want is the inner name's own column. + return entry["function"].rsplit(".", 1)[-1] + return None + + +def enrich_with_col_offsets(source_path, entries): + """Augment HeaderGen entries with col_offset by looking up the position + of each entry's identifying name in the source file.""" + positions = build_position_map(source_path) + for entry in entries: + if "col_offset" in entry: + continue + name = _lookup_name(entry) + if name is None: + continue + col = positions.get((name, entry["line_number"])) + if col is not None: + entry["col_offset"] = col + return entries + + def translate_content(file_path): with open(file_path) as f: data = json.load(f) From 4ce11983aa626c858e55db3e17df8b83432caf1b Mon Sep 17 00:00:00 2001 From: Ben OConnor Date: Wed, 27 May 2026 16:35:29 -0400 Subject: [PATCH 3/5] scalpel: emit col_offset via source-parsed enrichment Scalpel's runner builds dicts in-process but does not include col_offset, so every prediction failed the strict scorer (`is_same_element`). The fix mirrors the HeaderGen approach: parse the source with Jedi to build a position map and look up each entry's column in translator.py, called once after process_file in the runner. Result on micro-benchmark: 0/845 strict rises to 179/845 under strict. Lenient is preserved at 182/845 (Docker baseline 183/850 - close enough for path/version drift). One file fails Scalpel inference; not addressed here. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/target_tools/scalpel/src/runner.py | 2 + src/target_tools/scalpel/src/translator.py | 48 ++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/target_tools/scalpel/src/runner.py b/src/target_tools/scalpel/src/runner.py index c078d7f5b..20e1f7c62 100644 --- a/src/target_tools/scalpel/src/runner.py +++ b/src/target_tools/scalpel/src/runner.py @@ -4,6 +4,7 @@ import os from pathlib import Path +import translator import utils from scalpel.typeinfer.typeinfer import TypeInference @@ -42,6 +43,7 @@ def main_runner(args): try: # logger.debug(file) inferred = process_file(file) + inferred = translator.enrich_with_col_offsets(file, inferred) json_file_path = str(file).replace(".py", "_result.json") with open(json_file_path, "w") as json_file: diff --git a/src/target_tools/scalpel/src/translator.py b/src/target_tools/scalpel/src/translator.py index 9f3c550a8..e0da9dac8 100644 --- a/src/target_tools/scalpel/src/translator.py +++ b/src/target_tools/scalpel/src/translator.py @@ -3,12 +3,60 @@ import os from pathlib import Path +import jedi + def list_json_files(folder_path): python_files = sorted(Path(folder_path).rglob("*.json")) return python_files +def build_position_map(source_path): + """Map (name, line_number) -> 1-indexed col_offset for every definition + and reference in the source. Scalpel's runner doesn't emit col_offset, so + we recover it by parsing the source with Jedi.""" + positions = {} + try: + script = jedi.Script(path=str(source_path)) + for n in script.get_names(all_scopes=True, definitions=True, references=True): + positions.setdefault((n.name, n.line), n.column + 1) + except Exception: + pass + return positions + + +def _lookup_name(entry): + """Return the source-level name to look up for this entry's position.""" + if "variable" in entry: + name = entry["variable"] + for sep in ("[", "."): + if sep in name: + name = name.split(sep, 1)[0] + break + return name + if "parameter" in entry: + return entry["parameter"] + if "function" in entry: + return entry["function"].rsplit(".", 1)[-1] + return None + + +def enrich_with_col_offsets(source_path, entries): + """Augment entries with col_offset by looking up the position of each + entry's identifying name in the source file.""" + positions = build_position_map(source_path) + for entry in entries: + if "col_offset" in entry: + continue + name = _lookup_name(entry) + if name is None: + continue + col = positions.get((name, entry["line_number"])) + if col is not None: + entry["col_offset"] = col + return entries + + def main_translator(args): json_files = list_json_files(args.bechmark_path) error_count = 0 From 5130d1a37733bb2ae6728a7669875aad7006422b Mon Sep 17 00:00:00 2001 From: Ben OConnor Date: Wed, 27 May 2026 19:22:52 -0400 Subject: [PATCH 4/5] headergen, scalpel: use stdlib ast for col_offset recovery, skip ambiguous The previous version of these translators used jedi for the (name, line) -> col_offset position lookup. That has two issues: 1. Architectural smell: Scalpel's runner would have to depend on jedi (a sibling tool being evaluated). HeaderGen happened to have jedi as a transitive dep so the import worked, but the principle was wrong. 2. Synthesis vs recovery: when multiple names share the same (name, line) key (e.g., `x = lambda x: x` has three `x`s on one line), the previous code silently picked the first via setdefault. That risks attaching a col_offset the tool didn't actually intend. This commit replaces jedi with stdlib `ast` (same lookup, no extra dep, no cross-tool entanglement) and skips col_offset emission entirely when the lookup is ambiguous. Empirically across the full micro-benchmark: - HeaderGen: 805/853 entries have a unique position, 5 are ambiguous, 43 are unfindable in source (the latter are HeaderGen's `ClassName.attr` style which differs from GT's `self.attr` convention; unmatchable regardless). - Scalpel: 369/369 unique, zero ambiguous. So the col_offset enrichment is recovery (the position is determined by what the tool already emitted) for >94% of HeaderGen output and 100% of Scalpel output. The remaining ambiguous entries are now correctly handled by NOT attaching a position rather than guessing. Docker results after this change: - HeaderGen: 0 -> 580/850 strict (591 -> 601 lenient) - Scalpel: 0 -> 180/850 strict (183 -> 187 lenient) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/target_tools/headergen/src/translator.py | 46 ++++++++++++++------ src/target_tools/scalpel/src/translator.py | 46 ++++++++++++++------ 2 files changed, 64 insertions(+), 28 deletions(-) diff --git a/src/target_tools/headergen/src/translator.py b/src/target_tools/headergen/src/translator.py index 1f0d8acae..d0d89de80 100644 --- a/src/target_tools/headergen/src/translator.py +++ b/src/target_tools/headergen/src/translator.py @@ -1,10 +1,10 @@ import argparse +import ast import json import os +from collections import defaultdict from pathlib import Path -import jedi - def list_json_files(folder_path): python_files = sorted(Path(folder_path).rglob("*.json")) @@ -12,16 +12,33 @@ def list_json_files(folder_path): def build_position_map(source_path): - """Map (name, line_number) -> 1-indexed col_offset for every definition - in the source file. HeaderGen's server doesn't emit col_offset, so we - recover it by parsing the source with Jedi.""" - positions = {} + """Map (name, line_number) -> [1-indexed col_offsets] for every name + occurrence in the source. HeaderGen's server doesn't emit col_offset, but + for any (name, line) it gives us, the column is determined by the source. + We keep all candidates so the enrichment can skip ambiguous cases.""" + positions = defaultdict(list) try: - script = jedi.Script(path=str(source_path)) - for n in script.get_names(all_scopes=True, definitions=True, references=True): - positions.setdefault((n.name, n.line), n.column + 1) + with open(source_path) as f: + tree = ast.parse(f.read()) except Exception: - pass + return positions + + for node in ast.walk(tree): + if isinstance(node, ast.Name): + positions[(node.id, node.lineno)].append(node.col_offset + 1) + elif isinstance(node, ast.arg): + positions[(node.arg, node.lineno)].append(node.col_offset + 1) + elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + prefix = ( + "async def " if isinstance(node, ast.AsyncFunctionDef) else "def " + ) + positions[(node.name, node.lineno)].append( + node.col_offset + len(prefix) + 1 + ) + elif isinstance(node, ast.ClassDef): + positions[(node.name, node.lineno)].append( + node.col_offset + len("class ") + 1 + ) return positions @@ -48,7 +65,8 @@ def _lookup_name(entry): def enrich_with_col_offsets(source_path, entries): """Augment HeaderGen entries with col_offset by looking up the position - of each entry's identifying name in the source file.""" + of each entry's identifying name in the source file. Skip ambiguous + cases (multiple candidates) so we never guess a position.""" positions = build_position_map(source_path) for entry in entries: if "col_offset" in entry: @@ -56,9 +74,9 @@ def enrich_with_col_offsets(source_path, entries): name = _lookup_name(entry) if name is None: continue - col = positions.get((name, entry["line_number"])) - if col is not None: - entry["col_offset"] = col + cands = sorted(set(positions.get((name, entry["line_number"]), []))) + if len(cands) == 1: + entry["col_offset"] = cands[0] return entries diff --git a/src/target_tools/scalpel/src/translator.py b/src/target_tools/scalpel/src/translator.py index e0da9dac8..5541fc4d4 100644 --- a/src/target_tools/scalpel/src/translator.py +++ b/src/target_tools/scalpel/src/translator.py @@ -1,10 +1,10 @@ import argparse +import ast import json import os +from collections import defaultdict from pathlib import Path -import jedi - def list_json_files(folder_path): python_files = sorted(Path(folder_path).rglob("*.json")) @@ -12,16 +12,33 @@ def list_json_files(folder_path): def build_position_map(source_path): - """Map (name, line_number) -> 1-indexed col_offset for every definition - and reference in the source. Scalpel's runner doesn't emit col_offset, so - we recover it by parsing the source with Jedi.""" - positions = {} + """Map (name, line_number) -> [1-indexed col_offsets] for every name + occurrence in the source. Scalpel's runner doesn't emit col_offset, but + for any (name, line) it gives us, the column is determined by the source. + We keep all candidates so the enrichment can skip ambiguous cases.""" + positions = defaultdict(list) try: - script = jedi.Script(path=str(source_path)) - for n in script.get_names(all_scopes=True, definitions=True, references=True): - positions.setdefault((n.name, n.line), n.column + 1) + with open(source_path) as f: + tree = ast.parse(f.read()) except Exception: - pass + return positions + + for node in ast.walk(tree): + if isinstance(node, ast.Name): + positions[(node.id, node.lineno)].append(node.col_offset + 1) + elif isinstance(node, ast.arg): + positions[(node.arg, node.lineno)].append(node.col_offset + 1) + elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + prefix = ( + "async def " if isinstance(node, ast.AsyncFunctionDef) else "def " + ) + positions[(node.name, node.lineno)].append( + node.col_offset + len(prefix) + 1 + ) + elif isinstance(node, ast.ClassDef): + positions[(node.name, node.lineno)].append( + node.col_offset + len("class ") + 1 + ) return positions @@ -43,7 +60,8 @@ def _lookup_name(entry): def enrich_with_col_offsets(source_path, entries): """Augment entries with col_offset by looking up the position of each - entry's identifying name in the source file.""" + entry's identifying name in the source file. Skip ambiguous cases + (multiple candidates) so we never guess a position.""" positions = build_position_map(source_path) for entry in entries: if "col_offset" in entry: @@ -51,9 +69,9 @@ def enrich_with_col_offsets(source_path, entries): name = _lookup_name(entry) if name is None: continue - col = positions.get((name, entry["line_number"])) - if col is not None: - entry["col_offset"] = col + cands = sorted(set(positions.get((name, entry["line_number"]), []))) + if len(cands) == 1: + entry["col_offset"] = cands[0] return entries From e4b7b97f6ff6b93ef6ce1ba9957c7aac94106156 Mon Sep 17 00:00:00 2001 From: Ben OConnor Date: Wed, 27 May 2026 19:23:11 -0400 Subject: [PATCH 5/5] headergen: install g++ in Dockerfile to enable line-profiler build The HeaderGen Docker image previously installed only gcc, not g++. HeaderGen's transitive dependency `line-profiler` has a Cython C++ extension; on platforms where no prebuilt wheel exists (e.g., arm64 Linux + Python 3.10), pip falls back to building from source, which requires g++. The build then fails with: g++ -... -c line_profiler/_line_profiler.cpp -o ... error: command 'g++' failed: No such file or directory ERROR: Failed building wheel for line-profiler This change adds g++ to the apt-get install line. The image now builds cleanly on arm64 hosts as well as amd64. (Builds may have appeared to succeed previously on hosts where a line-profiler wheel was already cached from an earlier build with g++ present; fresh builds without the cache hit the source-compile path and surface the issue.) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/target_tools/headergen/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/target_tools/headergen/Dockerfile b/src/target_tools/headergen/Dockerfile index d7dd8b834..df49b8d2f 100644 --- a/src/target_tools/headergen/Dockerfile +++ b/src/target_tools/headergen/Dockerfile @@ -10,7 +10,7 @@ WORKDIR /app # Install dependencies RUN apt-get update \ - && apt-get -y install git gcc + && apt-get -y install git gcc g++ COPY requirements.txt /app/requirements.txt