Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/target_tools/headergen/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ WORKDIR /app

# Install dependencies
RUN apt-get update \
&& apt-get -y install git gcc
&& apt-get -y install git gcc g++


COPY requirements.txt /app/requirements.txt
Expand Down
1 change: 1 addition & 0 deletions src/target_tools/headergen/src/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def main_runner(args):
logger.info(file)

inferred = process_file(file)
inferred = translator.enrich_with_col_offsets(file, inferred)

json_file_path = str(file).replace(".py", "_result.json")

Expand Down
71 changes: 71 additions & 0 deletions src/target_tools/headergen/src/translator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import argparse
import ast
import json
import os
from collections import defaultdict
from pathlib import Path


Expand All @@ -9,6 +11,75 @@ def list_json_files(folder_path):
return python_files


def build_position_map(source_path):
"""Map (name, line_number) -> [1-indexed col_offsets] for every name
occurrence in the source. HeaderGen's server doesn't emit col_offset, but
for any (name, line) it gives us, the column is determined by the source.
We keep all candidates so the enrichment can skip ambiguous cases."""
positions = defaultdict(list)
try:
with open(source_path) as f:
tree = ast.parse(f.read())
except Exception:
return positions

for node in ast.walk(tree):
if isinstance(node, ast.Name):
positions[(node.id, node.lineno)].append(node.col_offset + 1)
elif isinstance(node, ast.arg):
positions[(node.arg, node.lineno)].append(node.col_offset + 1)
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
prefix = (
"async def " if isinstance(node, ast.AsyncFunctionDef) else "def "
)
positions[(node.name, node.lineno)].append(
node.col_offset + len(prefix) + 1
)
elif isinstance(node, ast.ClassDef):
positions[(node.name, node.lineno)].append(
node.col_offset + len("class ") + 1
)
return positions


def _lookup_name(entry):
"""Return the source-level name to look up for this entry's position."""
if "variable" in entry:
# Subscript/attribute accesses like 'h[0]' or 'self.child' are
# reported as the full expression; the col_offset GT expects is
# where the base name begins.
name = entry["variable"]
for sep in ("[", "."):
if sep in name:
name = name.split(sep, 1)[0]
break
return name
if "parameter" in entry:
return entry["parameter"]
if "function" in entry:
# Nested functions are reported as 'outer.inner'; the position
# we want is the inner name's own column.
return entry["function"].rsplit(".", 1)[-1]
return None


def enrich_with_col_offsets(source_path, entries):
"""Augment HeaderGen entries with col_offset by looking up the position
of each entry's identifying name in the source file. Skip ambiguous
cases (multiple candidates) so we never guess a position."""
positions = build_position_map(source_path)
for entry in entries:
if "col_offset" in entry:
continue
name = _lookup_name(entry)
if name is None:
continue
cands = sorted(set(positions.get((name, entry["line_number"]), [])))
if len(cands) == 1:
entry["col_offset"] = cands[0]
return entries


def translate_content(file_path):
with open(file_path) as f:
data = json.load(f)
Expand Down
97 changes: 63 additions & 34 deletions src/target_tools/jedi/src/jedi_type_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,24 @@ def find_types_by_execute(self, jedi_obj):
return _type

def get_function_name(self, jedi_obj):
"""Return the qualified name of jedi_obj relative to its module,
walking up parent scopes so nested functions become 'outer.inner'."""
try:
if jedi_obj.name == "<lambda>":
func_name = "lambda"
else:
parts = jedi_obj.full_name.split(".", 1)
func_name = parts[-1] if len(parts) > 1 else jedi_obj.full_name
except Exception as e:
return "lambda"
parts = []
current = jedi_obj
while current is not None and current.type != "module":
name = "lambda" if current.name == "<lambda>" else current.name
parts.append(name)
try:
current = current.parent()
except Exception:
break
return ".".join(reversed(parts)) if parts else jedi_obj.name
except Exception:
print("full_name not found in jedi_obj?")
func_name = jedi_obj.name

return func_name
return jedi_obj.name

def infer_types(self):
"""
Expand Down Expand Up @@ -143,24 +150,46 @@ def infer_types(self):
if _infer:
for inferred in _infer:
if inferred.type == "function":
# _type = self.parse_type_hint(inferred.get_type_hint())
# if not _type:
# self.find_types_by_execute(inferred)

_type = self.find_types_by_execute(inferred)

_info = {
"file": node.name,
"line_number": pos["line"],
}
if inferred.name != "<lambda>":
_info["function"] = self.get_function_name(inferred)
_info["type"] = _type if _type else {"any"}

variable_name = var.split(":")[0].strip()
if variable_name != self.get_function_name(inferred):
_info["variable"] = variable_name
if _type:
# Distinguish between the function's own definition
# site (return-type is what's wanted, e.g. for `def
# func1():`) and a reference to it (callable is
# what's wanted, e.g. for `a = func1`).
at_def_site = (
pos["line"] == inferred.line
and pos["column"] == inferred.column
)

if at_def_site:
_type = self.find_types_by_execute(inferred)

_info = {
"file": node.name,
"line_number": pos["line"],
"col_offset": pos["column"] + 1,
}
if inferred.name != "<lambda>":
_info["function"] = self.get_function_name(inferred)
_info["type"] = _type if _type else {"any"}

variable_name = var.split(":")[0].strip()
if variable_name != self.get_function_name(inferred):
_info["variable"] = variable_name
if _type:
output_inferred.append(_info)
else:
variable_name = var.split(":")[0].strip()
_info = {
"file": node.name,
"line_number": pos["line"],
"col_offset": pos["column"] + 1,
"variable": variable_name,
"type": {"callable"},
}
parent = pos["jedi_obj"].parent()
if parent and parent.type != "module":
parent_func = self.get_function_name(parent)
if parent_func:
_info["function"] = parent_func
output_inferred.append(_info)

elif inferred.type == "instance":
Expand All @@ -187,17 +216,15 @@ def infer_types(self):
_info = {
"file": node.name,
"line_number": pos["line"],
"col_offset": pos["column"] + 1,
"variable": var.split(":")[0],
"type": {_type},
}
if (
not pos["jedi_obj"].parent().name
== pos["jedi_obj"].parent().module_name
):
if self.get_function_name(pos["jedi_obj"].parent()):
_info["function"] = self.get_function_name(
pos["jedi_obj"].parent()
)
parent = pos["jedi_obj"].parent()
if parent and parent.type != "module":
parent_func = self.get_function_name(parent)
if parent_func:
_info["function"] = parent_func
if _type:
output_inferred.append(_info)

Expand All @@ -206,6 +233,7 @@ def infer_types(self):
_info = {
"file": node.name,
"line_number": pos["line"],
"col_offset": pos["column"] + 1,
"variable": var.split(":")[0],
"function": self.get_function_name(
pos["jedi_obj"].parent()
Expand All @@ -225,6 +253,7 @@ def infer_types(self):
_info = {
"file": node.name,
"line_number": pos["line"],
"col_offset": pos["column"] + 1,
"parameter": var.split(":")[0],
"function": self.get_function_name(
pos["jedi_obj"].parent()
Expand Down
2 changes: 2 additions & 0 deletions src/target_tools/scalpel/src/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
from pathlib import Path

import translator
import utils
from scalpel.typeinfer.typeinfer import TypeInference

Expand Down Expand Up @@ -42,6 +43,7 @@ def main_runner(args):
try:
# logger.debug(file)
inferred = process_file(file)
inferred = translator.enrich_with_col_offsets(file, inferred)
json_file_path = str(file).replace(".py", "_result.json")

with open(json_file_path, "w") as json_file:
Expand Down
66 changes: 66 additions & 0 deletions src/target_tools/scalpel/src/translator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import argparse
import ast
import json
import os
from collections import defaultdict
from pathlib import Path


Expand All @@ -9,6 +11,70 @@ def list_json_files(folder_path):
return python_files


def build_position_map(source_path):
"""Map (name, line_number) -> [1-indexed col_offsets] for every name
occurrence in the source. Scalpel's runner doesn't emit col_offset, but
for any (name, line) it gives us, the column is determined by the source.
We keep all candidates so the enrichment can skip ambiguous cases."""
positions = defaultdict(list)
try:
with open(source_path) as f:
tree = ast.parse(f.read())
except Exception:
return positions

for node in ast.walk(tree):
if isinstance(node, ast.Name):
positions[(node.id, node.lineno)].append(node.col_offset + 1)
elif isinstance(node, ast.arg):
positions[(node.arg, node.lineno)].append(node.col_offset + 1)
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
prefix = (
"async def " if isinstance(node, ast.AsyncFunctionDef) else "def "
)
positions[(node.name, node.lineno)].append(
node.col_offset + len(prefix) + 1
)
elif isinstance(node, ast.ClassDef):
positions[(node.name, node.lineno)].append(
node.col_offset + len("class ") + 1
)
return positions


def _lookup_name(entry):
"""Return the source-level name to look up for this entry's position."""
if "variable" in entry:
name = entry["variable"]
for sep in ("[", "."):
if sep in name:
name = name.split(sep, 1)[0]
break
return name
if "parameter" in entry:
return entry["parameter"]
if "function" in entry:
return entry["function"].rsplit(".", 1)[-1]
return None


def enrich_with_col_offsets(source_path, entries):
"""Augment entries with col_offset by looking up the position of each
entry's identifying name in the source file. Skip ambiguous cases
(multiple candidates) so we never guess a position."""
positions = build_position_map(source_path)
for entry in entries:
if "col_offset" in entry:
continue
name = _lookup_name(entry)
if name is None:
continue
cands = sorted(set(positions.get((name, entry["line_number"]), [])))
if len(cands) == 1:
entry["col_offset"] = cands[0]
return entries


def main_translator(args):
json_files = list_json_files(args.bechmark_path)
error_count = 0
Expand Down