From e0ece3efa8e19d067a34c58fb574f28923134716 Mon Sep 17 00:00:00 2001 From: Mickael Farina Date: Mon, 4 May 2026 10:04:12 +0200 Subject: [PATCH] hotfix: plan-time retry on hallucinated skill names (PR #41) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User repro 2026-05-04 09:58: Project: "Read all markdown files in ~/codec-repo/docs/ and create an index.md that lists each file with its first heading and a one-line description" Result: Plan failed: plan invalid: plan references unknown skills: ['file_read'] Same hallucination CLASS as PR #35 but at a different LAYER. PR #35 fixed retries during execution (codec_agent_runner). This is failing earlier — at plan validation, before the plan is even saved. The user never even got to the approve-or-reject step. Root cause: Qwen drafts plans naming skills that don't exist. `file_read` and `fetch_url` are the two we've seen. The actual file-reading skill is `file_ops` (which reads, writes, appends, lists). The actual URL fetch is `web_fetch`. The user-visible result was the same as PR #35 — project mode dies before doing anything useful. Fix (mirrors PR #35's pattern at planning layer): 1. After validate_plan_skills returns ok=False, instead of raising, build a corrective prompt listing the missing skills, the FULL allowed registry, and the three most common confusions (file_read→file_ops, fetch_url→web_fetch, read_file→file_ops). 2. Re-call _qwen_chat ONCE with the appended correction. 3. Re-validate the second draft. If valid, use it. If not, raise with BOTH attempts in the message so the user sees the model is consistently confused (vs a one-off transient miss). 4. If the retry call itself fails (Qwen flakes between attempts), raise with the ORIGINAL validation error — more diagnostic than "qwen flaked on retry". Also: - Strengthen _PLAN_SYSTEM_PROMPT with the same three confusion hints so the FIRST draft is more likely to succeed (cuts the retry rate). Tests (3 new in tests/test_agent_plan.py — all pass): - test_draft_plan_retries_on_hallucinated_skill_then_succeeds Reproduces the exact user case: file_read on attempt 1, file_ops on attempt 2, plan succeeds. - test_draft_plan_retry_also_fails_raises_with_both_attempts Both attempts hallucinate (file_read, then read_file): error message contains both for diagnostic value. - test_draft_plan_retry_qwen_unavailable_surfaces_original_error Retry call raises ConnectionError: original validation error surfaces with "retry failed" appended. All 3 existing draft_plan tests still pass — backward-compat preserved. The existing test_draft_plan_rejects_unknown_skill now exercises BOTH attempts (fake_qwen_chat returns same bad plan each time) and still raises with the missing skill in the message. Total: 35/35 file pass + 7 pre-existing pynput env failures (unchanged). Co-Authored-By: Claude Opus 4.7 (1M context) --- codec_agent_plan.py | 69 +++++++++++++++++++++++-- tests/test_agent_plan.py | 107 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+), 4 deletions(-) diff --git a/codec_agent_plan.py b/codec_agent_plan.py index adccec5..cd19f46 100644 --- a/codec_agent_plan.py +++ b/codec_agent_plan.py @@ -326,6 +326,7 @@ def _qwen_chat(user_prompt: str, system_prompt: str = "", Rules: - Output ONLY valid JSON. No prose before or after. - skills_needed MUST be skill names from the user-supplied registry list. Never invent skill names. + Common confusions to avoid: there is NO `file_read` skill (use `file_ops`, which reads, writes, appends, lists). There is NO `fetch_url` (use `web_fetch`). There is NO `read_file` (use `file_ops`). If you can't find an exact match in the registry list, pick the closest match — never invent. - write_paths default to ~/.codec/agents/{agent_id}/artifacts/** unless the project explicitly requires writing elsewhere. - destructive_ops list any irreversible operations (deletes, payments, sending emails on user's behalf). They will require additional consent at runtime. - estimated_duration_minutes is your best honest guess. @@ -406,12 +407,72 @@ def draft_plan(agent_id: str, description: str, registry=None, raise PlanValidationError(f"plan schema invalid: {e}") ok, missing = validate_plan_skills(plan, registry=registry) - if not ok: + if ok: + return plan + + # PR #41: plan-time hallucination retry. Mirror of the execution-time + # retry shipped in PR #35 (codec_agent_runner._build_correction_nudge). + # Real-world Qwen drift hits both layers — at execution it picks + # `fetch_url` instead of `web_fetch`; at planning it picks `file_read` + # instead of `file_ops`. Same cure: re-prompt ONCE with an explicit + # closed-world correction, fail hard if the second draft still misses. + log.info("[%s] plan referenced unknown skills %s; retrying with correction nudge", + agent_id, missing) + correction = ( + f"\n\nYour previous draft referenced these skills which DO NOT EXIST " + f"in the registry: {sorted(missing)}.\n" + f"You MUST pick from this exact list — do not invent names, do not " + f"add suffixes (no _v2, no _read, no _write versions of unrelated " + f"skills). The full allowed set is:\n" + f" {', '.join(available_skills)}\n\n" + f"Common confusions:\n" + f" - Need to read a file? Use `file_ops` (it reads, writes, " + f"appends, lists). There is NO `file_read` skill.\n" + f" - Need to fetch a URL? Use `web_fetch`. There is NO `fetch_url`.\n" + f" - Need to search files? Use `file_search`.\n\n" + f"Re-emit the entire JSON plan with valid skill names only." + ) + retry_prompt = user_prompt + correction + try: + raw2 = _qwen_chat(retry_prompt, _PLAN_SYSTEM_PROMPT) + except (QwenUnavailableError, ConnectionError, OSError, RuntimeError) as e: + # If the retry call itself fails, surface the ORIGINAL validation + # error — that's more diagnostic than "qwen flaked on retry". raise PlanValidationError( - f"plan references unknown skills: {missing}" + f"plan references unknown skills: {missing} " + f"(retry failed: {e})" ) - - return plan + raw2 = raw2.strip() + if raw2.startswith("```"): + raw2 = re.sub(r"^```(?:json)?\s*", "", raw2) + raw2 = re.sub(r"\s*```\s*$", "", raw2) + try: + d2 = json.loads(raw2) + except json.JSONDecodeError as e: + raise PlanValidationError( + f"plan references unknown skills: {missing} " + f"(retry returned non-JSON: {e})" + ) + if d2.get("too_vague"): + raise PlanValidationError("too_vague: description needs clarification") + d2.setdefault("schema", PLAN_SCHEMA_VERSION) + d2.setdefault("agent_id", agent_id) + for cp in d2.get("checkpoints", []): + cp.setdefault("id", _stable_checkpoint_id(cp)) + try: + plan2 = plan_from_dict(d2) + except (KeyError, ValueError, TypeError) as e: + raise PlanValidationError(f"retry plan schema invalid: {e}") + ok2, missing2 = validate_plan_skills(plan2, registry=registry) + if not ok2: + # Second miss — give up. Surface BOTH attempts so the user can see + # the model is consistently confused (e.g. truly unfixable phrasing). + raise PlanValidationError( + f"plan references unknown skills after retry: " + f"first={sorted(missing)}, second={sorted(missing2)}" + ) + log.info("[%s] retry succeeded; using corrected plan", agent_id) + return plan2 def _stable_checkpoint_id(cp_dict: Dict[str, Any]) -> str: diff --git a/tests/test_agent_plan.py b/tests/test_agent_plan.py index bc33490..e04bf4d 100644 --- a/tests/test_agent_plan.py +++ b/tests/test_agent_plan.py @@ -287,6 +287,113 @@ def raise_connection(*a, **k): ) +# ───────────────────────────────────────────────────────────────────────────── +# PR #41 — Plan-time hallucination retry (3 tests) +# ───────────────────────────────────────────────────────────────────────────── + +def _make_plan_response(skill_name): + """Build a minimal valid-shape plan JSON using one skill name. Used by + the retry tests below.""" + return json.dumps({ + "goals": ["read markdown files"], + "checkpoints": [ + {"title": "scan", "description": "list and read", + "skills_needed": [skill_name], + "expected_output": "index.md written", "step_budget": 30}, + ], + "permission_manifest": { + "read_paths": ["~/codec-repo/docs/**"], + "write_paths": ["~/codec-repo/docs/index.md"], + "network_domains": [], + "skills": [skill_name], "destructive_ops": [], + }, + "estimated_duration_minutes": 5, "assumptions": [], + }) + + +def test_draft_plan_retries_on_hallucinated_skill_then_succeeds(monkeypatch): + """Real reproducer from 2026-05-04 09:58: user asked CODEC to read + markdown files; Qwen drafted a plan with skill `file_read` (does not + exist; correct skill is `file_ops`). PR #41 retries ONCE with a + correction nudge — second draft picks `file_ops` and succeeds.""" + import codec_agent_plan as cap + + calls = {"n": 0} + def fake_qwen(prompt, *a, **k): + calls["n"] += 1 + if calls["n"] == 1: + return _make_plan_response("file_read") # hallucinated + return _make_plan_response("file_ops") # corrected + monkeypatch.setattr(cap, "_qwen_chat", fake_qwen) + + fake_registry = MagicMock() + fake_registry.names.return_value = ["file_ops", "file_search"] + + plan = cap.draft_plan( + agent_id="agent_test", + description="Read markdown files in docs and create an index", + registry=fake_registry, + ) + # The corrected plan was used + assert calls["n"] == 2 + assert plan.checkpoints[0].skills_needed == ["file_ops"] + + +def test_draft_plan_retry_also_fails_raises_with_both_attempts(monkeypatch): + """If the second attempt ALSO hallucinates, raise with both attempts + in the error message so the user can see Qwen is consistently confused + (vs a one-off transient miss).""" + import codec_agent_plan as cap + + responses = iter([ + _make_plan_response("file_read"), # first miss + _make_plan_response("read_file"), # second miss (different bad name) + ]) + monkeypatch.setattr(cap, "_qwen_chat", lambda *a, **k: next(responses)) + + fake_registry = MagicMock() + fake_registry.names.return_value = ["file_ops"] + + with pytest.raises(cap.PlanValidationError) as exc_info: + cap.draft_plan( + agent_id="agent_test", + description="some project", + registry=fake_registry, + ) + msg = str(exc_info.value) + assert "after retry" in msg + assert "file_read" in msg # first attempt + assert "read_file" in msg # second attempt + + +def test_draft_plan_retry_qwen_unavailable_surfaces_original_error(monkeypatch): + """If the retry call itself fails (Qwen flakes between attempts), surface + the ORIGINAL validation error — that's more diagnostic than 'qwen flaked + on retry'.""" + import codec_agent_plan as cap + + calls = {"n": 0} + def flaky(*a, **k): + calls["n"] += 1 + if calls["n"] == 1: + return _make_plan_response("file_read") + raise ConnectionError("qwen died between attempts") + monkeypatch.setattr(cap, "_qwen_chat", flaky) + + fake_registry = MagicMock() + fake_registry.names.return_value = ["file_ops"] + + with pytest.raises(cap.PlanValidationError) as exc_info: + cap.draft_plan( + agent_id="agent_test", + description="x", + registry=fake_registry, + ) + msg = str(exc_info.value) + assert "file_read" in msg # original missing skill present + assert "retry failed" in msg # cause noted + + # ───────────────────────────────────────────────────────────────────────────── # Task 7 — Vague-description clarifying loop (Q3) (2 tests) # ─────────────────────────────────────────────────────────────────────────────