Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
a085851
Rework code hallucination into a request-grounded agent-solution pipe…
adaamko Jun 7, 2026
f4d2a4e
Add grounding, Context7, format variety and audit tooling to code-age…
adaamko Jun 9, 2026
697c7a2
Reject invalid taxonomy labels at the boundary and stop caching trans…
adaamko Jun 9, 2026
3b260fb
Add in-place grounding repair for generated samples
adaamko Jun 9, 2026
9b65ee9
Add --hall-ids-file for explicit hallucination-target selection
adaamko Jun 9, 2026
d9401f2
Document the dataset's construction, audit, and repair provenance
adaamko Jun 9, 2026
7476692
Add in-place clean-to-hallucinated sample converter
adaamko Jun 9, 2026
8d66dea
Document the test-set verification protocol
adaamko Jun 11, 2026
f834f6e
Add fast HF-Trainer span-detector training path
adaamko Jun 11, 2026
31ea5ad
Allow merging multiple hub datasets in span-detector training
adaamko Jun 15, 2026
830ea3d
Add --limit to span trainer for smoke tests
adaamko Jun 15, 2026
2516814
Drop group_by_length (not a TrainingArguments kwarg in transformers 5.x)
adaamko Jun 15, 2026
e5102d8
Add per-source/per-language span-model eval harness
adaamko Jun 15, 2026
3dd1af2
Tokenize once on rank 0 under DDP (main_process_first); use all 4 GPUs
adaamko Jun 15, 2026
5be55b5
Use longest_first truncation (only_first crashes on answers > max_len…
adaamko Jun 15, 2026
01f7930
Resume only if a checkpoint exists (fresh start no longer errors)
adaamko Jun 15, 2026
3991c78
Add span-text SFT data builder for the generative detector
adaamko Jun 15, 2026
890d651
Single-pass span-model eval (predict once, ~4x faster)
adaamko Jun 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -182,4 +182,6 @@ temp/

# cache/
lettucedetect/cache/
testing/
testing/
tool-output-extractor/
reviews/
224 changes: 224 additions & 0 deletions demo/code_hallucination_viewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
"""Streamlit viewer for code-agent hallucination samples.

Browse generated samples — the developer request, the grounded context, and the
answer with hallucinated spans highlighted by category — to spot-check quality.

Run::

streamlit run demo/code_hallucination_viewer.py

Point it at a generated directory (``data/v2/code_agent``) or a single JSONL file
via the sidebar.
"""

from __future__ import annotations

import html
import json
from pathlib import Path

import streamlit as st

CATEGORY_STYLE = {
"contradiction": ("#ffd6d6", "#cc0000"),
"unsupported_addition": ("#ffe9c7", "#cc7700"),
"fabricated_reference": ("#e7d6ff", "#7a00cc"),
}
DEFAULT_DIR = "data/v2/code_agent"


@st.cache_data(show_spinner=False)
def load_samples(path: str) -> list[dict]:
"""Load samples from a JSONL file or a directory of ``*.jsonl`` splits."""
p = Path(path)
files = (
[f for f in sorted(p.glob("*.jsonl")) if not f.name.endswith(".failures.jsonl")]
if p.is_dir()
else [p]
)
samples: list[dict] = []
for f in files:
if not f.exists():
continue
for line in f.read_text().splitlines():
line = line.strip()
if not line:
continue
s = json.loads(line)
meta = s.get("metadata")
s["metadata"] = json.loads(meta) if isinstance(meta, str) else (meta or {})
s["_file"] = f.name
samples.append(s)
return samples


def _highlight_line(text: str, line_start: int, labels: list[dict]) -> str:
"""Escape one line and wrap the parts overlapped by a label span."""
events = []
for label in labels:
a = max(label["start"], line_start) - line_start
b = min(label["end"], line_start + len(text)) - line_start
if a < b:
events.append((a, b, label))
if not events:
return html.escape(text)
events.sort()
out: list[str] = []
pos = 0
for a, b, label in events:
if a < pos:
continue
out.append(html.escape(text[pos:a]))
bg, border = CATEGORY_STYLE.get(label.get("category", ""), ("#eeeeee", "#888888"))
tip = html.escape(label.get("explanation", "") or label.get("category", ""))
out.append(
f'<span title="{tip}" style="background:{bg};color:#111;'
f'border-bottom:2px solid {border};border-radius:3px;">{html.escape(text[a:b])}</span>'
)
pos = b
out.append(html.escape(text[pos:]))
return "".join(out)


_HEADER = ("in file ", "replace:", "with:", "add:")
_CODE = (
"font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:0.82rem;"
"white-space:pre-wrap;word-break:break-word;padding:2px 10px;margin:0;"
)


def render_answer(answer: str, labels: list[dict]) -> str:
"""Render the answer: edit-style answers as a before→after diff, else a code block."""
is_edit = "```" in answer and any(m in answer.lower() for m in ("replace:", ", add:"))
rows: list[str] = []
pos = 0
in_code = False
side = "to" # current fenced block is removed ('from') or added ('to')
for line in answer.split("\n"):
start, stripped = pos, line.strip()
pos += len(line) + 1
if is_edit and stripped.startswith("```"):
in_code = not in_code
continue
if is_edit and not in_code and (
stripped.lower().startswith("in file ") or stripped.lower() in ("replace:", "with:")
):
side = "from" if "replace" in stripped.lower() else "to"
rows.append(f"<div style='{_CODE}color:#8b949e;font-weight:600;'>{html.escape(line)}</div>")
continue
content = _highlight_line(line, start, labels)
if is_edit and in_code:
bg, mark = ("#3a1d1d", "-") if side == "from" else ("#16301c", "+")
rows.append(
f"<div style='{_CODE}background:{bg};color:#e6edf3;'>"
f"<span style='color:#6e7681'>{mark} </span>{content}</div>"
)
else:
rows.append(f"<div style='{_CODE}color:#e6edf3;'>{content}</div>")
return (
"<div style='background:#0d1117;border-radius:8px;padding:10px 4px;overflow-x:auto;'>"
+ "".join(rows)
+ "</div>"
)


def legend() -> str:
"""Return an HTML legend mapping each category to its highlight colour."""
chips = []
for cat, (bg, border) in CATEGORY_STYLE.items():
chips.append(
f'<span style="background:{bg};border-bottom:2px solid {border};'
f'border-radius:3px;padding:1px 6px;margin-right:8px;">{cat}</span>'
)
return "<div style='margin:4px 0 12px'>" + "".join(chips) + "</div>"


def main() -> None:
"""Run the Streamlit viewer."""
st.set_page_config(page_title="Code Hallucination Viewer", layout="wide")
st.title("Code-Agent Hallucination Viewer")

with st.sidebar:
st.header("Source")
default = DEFAULT_DIR if Path(DEFAULT_DIR).exists() else ""
choices = sorted({str(f.parent) for f in Path("data/v2").glob("*/*.jsonl")})
path = st.selectbox("Directory", choices, index=choices.index(default) if default in choices else 0) if choices else ""
path = st.text_input("…or path", value=path or DEFAULT_DIR)

samples = load_samples(path) if path else []
if not samples:
st.info(f"No samples found at `{path}`. Generate some, or point to a JSONL file/dir.")
return

with st.sidebar:
st.header("Filter")
only = st.radio("Show", ["all", "hallucinated", "clean"], horizontal=True)
modes = sorted({s["metadata"].get("hallucination_mode") for s in samples if s["labels"]} - {None})
mode = st.selectbox("Mode", ["any", *modes])
cats = sorted({label["category"] for s in samples for label in s["labels"]})
cat = st.selectbox("Category", ["any", *cats])
repos = sorted({s["metadata"].get("instance_id", "").split("__")[0] for s in samples})
repo = st.selectbox("Repo", ["any", *repos])
query = st.text_input("Search request/answer")

def keep(s: dict) -> bool:
if only == "hallucinated" and not s["labels"]:
return False
if only == "clean" and s["labels"]:
return False
if mode != "any" and s["metadata"].get("hallucination_mode") != mode:
return False
if cat != "any" and not any(label["category"] == cat for label in s["labels"]):
return False
if repo != "any" and not s["metadata"].get("instance_id", "").startswith(repo):
return False
if query and query.lower() not in (s.get("question", "") + s.get("answer", "")).lower():
return False
return True

filtered = [s for s in samples if keep(s)]
st.caption(
f"{len(filtered)} / {len(samples)} samples · "
f"{sum(1 for s in filtered if s['labels'])} hallucinated"
)
if not filtered:
st.warning("No samples match the filters.")
return

idx = st.number_input("Sample", 0, len(filtered) - 1, 0, 1)
s = filtered[int(idx)]
meta = s["metadata"]

chips = " · ".join(
x for x in [
f"**{meta.get('instance_id', '?')}**",
f"`{s['_file']}`",
f"mode: `{meta.get('hallucination_mode', '—')}`" if s["labels"] else "**clean**",
f"style: `{meta.get('answer_style', '?')}`",
] if x
)
st.markdown(chips)

st.subheader("Developer request")
st.markdown(f"> {s.get('question', '')}")

left, right = st.columns([1, 1])
with left:
st.subheader("Answer")
st.markdown(legend(), unsafe_allow_html=True)
st.markdown(render_answer(s.get("answer", ""), s["labels"]), unsafe_allow_html=True)
if s["labels"]:
st.subheader("Labels")
for label in sorted(s["labels"], key=lambda label: label["start"]):
seg = s["answer"][label["start"]:label["end"]]
st.markdown(
f"- **{label['category']}** / `{label.get('subcategory')}` — "
f"`{seg[:80]}` \n {label.get('explanation', '')}"
)
with right:
st.subheader("Context")
st.code(s.get("context", ""), language="python")


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion demo/detection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "lettucedetect",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand Down
2 changes: 1 addition & 1 deletion demo/streamlit_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def main():
def load_detector():
return HallucinationDetector(
method="transformer",
model_path="KRLabsOrg/lettucedect-base-modernbert-en-v1",
model_path="output/hallucination_detection_ettin_17m",
)

detector = load_detector()
Expand Down
62 changes: 37 additions & 25 deletions docs/code-hallucination/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ All pipeline configuration is centralized in `scripts/code_hallucination/config.
| `API_BASE_URL` | `https://api.groq.com/openai/v1` | OpenAI-compatible API endpoint |
| `MODEL` | `moonshotai/kimi-k2-instruct-0905` | Model name |
| `BATCH_SIZE` | `1` | Concurrent requests. Set >1 for local vLLM to saturate GPU |
| `CONTEXT7_API_KEY` | (none) | API key for Context7 documentation service |
| `GITHUB_TOKEN` | (none) | Raises the GitHub-raw rate limit for repo grounding |
| `CONTEXT7_API_KEY` | (none) | Enables Context7 third-party API signature grounding |
| `MAX_ANSWER_CHARS` | `10000` | Skip instances whose chosen answer exceeds this (keeps answers trainable) |

These can also be overridden via CLI flags (`--api-key`, `--base-url`, `--model`).

Expand All @@ -19,47 +21,57 @@ These can also be overridden via CLI flags (`--api-key`, `--base-url`, `--model`
| Parameter | Default | Description |
|-----------|---------|-------------|
| `HALLUCINATION_RATIO` | `0.4` | Fraction of instances that get hallucination injection |
| `DOCS_RATIO` | `0.2` | Fraction of instances that get Context7 documentation |
| `MAX_FILE_CHARS` | `12000` | Maximum characters per source file |
| `MAX_CONTEXT7_CHARS` | `4000` | Maximum characters per library doc |
| `MAX_CONTEXT7_CHARS` | `4000` | Maximum characters fetched per Context7 lookup |
| `LLM_TEMPERATURE` | `0.7` | Temperature for query rewriting |
| `HALLUCINATION_TEMPERATURE` | `0.8` | Temperature for hallucination injection (higher for variety) |
| `MAX_RETRIES` | `3` | API retry attempts |
| `RETRY_DELAY` | `2.0` | Base delay between retries (seconds) |

## Answer Format Weights
## Answer Source

| Format | Weight | Description |
|--------|--------|-------------|
| `code_with_explanation` | 0.40 | Natural AI assistant response with prose + code block (LLM-generated) |
| `complete_function` | 0.25 | Full patched function body via AST |
| `fragment` | 0.20 | Added/changed lines from diff |
| `edit_style` | 0.15 | "In file X, replace Y with Z" |
The answer is set by the generator's `--answer-source` flag:

| Value | Description |
|-------|-------------|
| `gold` (default) | the project's real fix, used verbatim — no model call. Rendered per-instance as `function` (largest patched function fitting the cap), `fragment` (the hunk), or `edit` (`In file X, replace Y with Z`) |
| `generated` | an LLM writes a coherent solution to the request |

## Grounding

Answer references missing from the context are grounded in four tiers
(`answer_grounding.py`): the patch's modified functions, the changed files,
modules the answer imports, and modules the changed files import (base mixins /
sibling modules → cross-module `self.method` calls). Structural fabrications on
third-party APIs additionally get a Context7 `Library signatures` block. Set
`GITHUB_TOKEN` and `CONTEXT7_API_KEY` to raise the respective limits.

## Hallucination Types

Assigned round-robin across injected instances:
Injected per instance and mapped to the unified taxonomy:

- **wrong_implementation** → `contradiction` — wrong logic, condition, field, or value
- **unrequested_change** → `unsupported_addition` — an extra block or side effect the request never asked for
- **fabricated_api** → `fabricated_reference` — a method/attribute/keyword that does not exist on a real object

- **structural** — Non-existent APIs, wrong methods, invented parameters
- **behavioral** — Wrong values, logic errors, swapped conditions
- **semantic** — Code that looks correct but does something subtly different
The `--struct-ratio` flag sets the share of hallucinated samples that receive a
`fabricated_api` (structural) edit; the rest are intent edits.

## File Paths

All data is stored under `data/code_hallucination/`:
Cached preparation inputs live under `data/code_hallucination/`:

| Path | Description |
|------|-------------|
| `swebench_instances.json` | Phase 1: loaded instances |
| `repos/` | Phase 2: bare git clones |
| `source_cache/` | Phase 2: per-instance source data |
| `queries.jsonl` | Phase 3: rewritten queries |
| `documentation.jsonl` | Phase 4: library docs |
| `formats.jsonl` | Phase 5: assigned formats |
| `hallucinated_samples.jsonl` | Phase 6: injected hallucinations |
| `code_hallucination_data.json` | Phase 7: final dataset |
| `code_hallucination_metadata.json` | Phase 7: metadata |
| `validation_report.txt` | Phase 9: quality report |
| `swebench_instances.json` | loaded SWE-bench instances |
| `repos/` | bare git clones |
| `source_cache/` | per-instance source + gold edit |
| `queries.jsonl` | rewritten developer requests |
| `documentation.jsonl` | cached Context7 library docs (folded into context when present) |

Generated samples are written under the generator's `--out` (e.g.
`data/v2/code_agent/`) as `{train,dev,test}.jsonl` plus matching
`.failures.jsonl`.

## Data Sources

Expand Down
Loading
Loading