Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion capabilities/web-security/capability.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
schema: 1
name: web-security
version: "1.0.3"
version: "1.0.4"
description: >
Web application penetration testing with 30+ attack technique playbooks
covering request smuggling, cache poisoning, SSRF, SSTI, DOM
Expand All @@ -9,6 +9,9 @@ description: >
integration via MCP, credential management, DNS rebinding, phone
verification, and vulnerability verification.

hooks:
- hooks/interrupted_tool_result.py

mcp:
servers:
caido:
Expand Down
156 changes: 156 additions & 0 deletions capabilities/web-security/hooks/interrupted_tool_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""Recover from provider interruption sentinels after tool execution."""

from __future__ import annotations

import asyncio
import re
from dataclasses import dataclass

from dreadnode.agents.events import AgentEnd, GenerationStep, ToolEnd, ToolError
from dreadnode.agents.reactions import Continue
from dreadnode.core.hook import hook

_INTERRUPTION_SENTINEL = re.compile(
r"^\[?\s*response interrupted by a tool call result\.\s*\]?$",
re.IGNORECASE,
)
_MAX_RECOVERIES_PER_AGENT = 2
_MAX_SUMMARY_CHARS = 600


@dataclass(slots=True)
class _ToolOutcome:
tool_name: str
summary: str


@dataclass(slots=True)
class _AgentState:
last_tool_outcome: _ToolOutcome | None = None
recoveries: int = 0


_STATE_LOCK = asyncio.Lock()
_AGENT_STATE: dict[str, _AgentState] = {}


def _normalize_text(value: object | None) -> str | None:
"""Collapse tool output into a short, stable single-line summary."""
if value is None:
return None

text = " ".join(str(value).split()).strip()
if not text:
return None
if len(text) <= _MAX_SUMMARY_CHARS:
return text
return f"{text[:_MAX_SUMMARY_CHARS - 3].rstrip()}..."


def _extract_assistant_text(event: GenerationStep) -> str | None:
"""Return the last assistant text only when it is a plain text turn."""
if not event.messages:
return None

last_message = event.messages[-1]
if getattr(last_message, "role", None) != "assistant":
return None
if getattr(last_message, "tool_calls", None):
return None

return _normalize_text(getattr(last_message, "content", None))


def _is_interruption_sentinel(text: str | None) -> bool:
"""Match the provider sentinel exactly to avoid false positives."""
if text is None:
return False
return _INTERRUPTION_SENTINEL.fullmatch(text) is not None


def _tool_end_summary(event: ToolEnd) -> str:
"""Describe the last completed tool call for recovery feedback."""
if event.error:
detail = _normalize_text(event.error)
if detail:
return f"{event.tool_call.name} returned an error: {detail}"
return f"{event.tool_call.name} returned an error."

detail = _normalize_text(event.result)
if detail:
return f"{event.tool_call.name} returned: {detail}"
return f"{event.tool_call.name} completed without output."


def _tool_error_summary(event: ToolError) -> str:
"""Describe an uncaught tool exception for recovery feedback."""
detail = _normalize_text(event.error)
if detail:
return f"{event.tool_call.name} raised an error: {detail}"
return f"{event.tool_call.name} raised an error."


def _recovery_feedback(state: _AgentState) -> str:
"""Build the corrective prompt appended after the sentinel turn."""
base = (
"Your last response was a transport artifact "
"(`[Response interrupted by a tool call result.]`), not a valid assistant turn. "
"Ignore it."
)
if state.last_tool_outcome is None:
return f"{base} Continue from the current conversation state and take the next best action."
return (
f"{base} The last tool outcome was: {state.last_tool_outcome.summary} "
"Continue from that result and take the next best action."
)


@hook(ToolEnd)
async def remember_tool_end(event: ToolEnd) -> None:
"""Remember the most recent tool completion for later recovery."""
async with _STATE_LOCK:
state = _AGENT_STATE.setdefault(event.agent_id, _AgentState())
state.last_tool_outcome = _ToolOutcome(
tool_name=event.tool_call.name,
summary=_tool_end_summary(event),
)


@hook(ToolError)
async def remember_tool_error(event: ToolError) -> None:
"""Remember uncaught tool failures for later recovery."""
async with _STATE_LOCK:
state = _AGENT_STATE.setdefault(event.agent_id, _AgentState())
state.last_tool_outcome = _ToolOutcome(
tool_name=event.tool_call.name,
summary=_tool_error_summary(event),
)


@hook(GenerationStep)
async def recover_interrupted_tool_result(event: GenerationStep) -> Continue | None:
"""Continue the run when the model emits the interruption sentinel."""
assistant_text = _extract_assistant_text(event)

async with _STATE_LOCK:
state = _AGENT_STATE.setdefault(event.agent_id, _AgentState())

if not _is_interruption_sentinel(assistant_text):
if assistant_text:
state.recoveries = 0
return None

if state.recoveries >= _MAX_RECOVERIES_PER_AGENT:
return None

state.recoveries += 1
feedback = _recovery_feedback(state)

return Continue(feedback=feedback)


@hook(AgentEnd)
async def clear_recovery_state(event: AgentEnd) -> None:
"""Drop per-agent recovery state when the run ends."""
async with _STATE_LOCK:
_AGENT_STATE.pop(event.agent_id, None)
31 changes: 25 additions & 6 deletions capabilities/web-security/tests/test_bbscope.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@
import httpx
import pytest

pytestmark = pytest.mark.asyncio

# Add tools directory to path for import
_REPO_ROOT = Path(__file__).resolve()
while _REPO_ROOT != _REPO_ROOT.parent:
if (_REPO_ROOT / "dreadnode" / "web-security" / "tools").is_dir():
if (_REPO_ROOT / "capabilities" / "web-security" / "tools").is_dir():
break
_REPO_ROOT = _REPO_ROOT.parent
sys.path.insert(0, str(_REPO_ROOT / "dreadnode" / "web-security" / "tools"))
sys.path.insert(0, str(_REPO_ROOT / "capabilities" / "web-security" / "tools"))

from bbscope import BBScope

Expand Down Expand Up @@ -56,12 +54,21 @@ def test_all_tools_have_catch(self, toolset: BBScope) -> None:


class TestFind:
@pytest.mark.asyncio
async def test_find_with_results(self, toolset: BBScope) -> None:
mock_data = {
"query": "example.com",
"programs": [
{"platform": "h1", "handle": "example", "url": "https://hackerone.com/example"},
{"platform": "bc", "handle": "example-bc", "url": "https://bugcrowd.com/example-bc"},
{
"platform": "h1",
"handle": "example",
"url": "https://hackerone.com/example",
},
{
"platform": "bc",
"handle": "example-bc",
"url": "https://bugcrowd.com/example-bc",
},
],
"total_count": 2,
}
Expand All @@ -76,6 +83,7 @@ async def test_find_with_results(self, toolset: BBScope) -> None:
assert "example" in result
assert "BC" in result

@pytest.mark.asyncio
async def test_find_no_results(self, toolset: BBScope) -> None:
mock_data = {"query": "nonexistent.invalid", "programs": [], "total_count": 0}
with patch.object(toolset, "_get_client") as mock_client:
Expand All @@ -86,6 +94,7 @@ async def test_find_no_results(self, toolset: BBScope) -> None:
result = await toolset.find(query="nonexistent.invalid")
assert "No bug bounty programs found" in result

@pytest.mark.asyncio
async def test_find_api_error(self, toolset: BBScope) -> None:
with patch.object(toolset, "_get_client") as mock_client:
client = AsyncMock()
Expand All @@ -98,6 +107,7 @@ async def test_find_api_error(self, toolset: BBScope) -> None:


class TestProgram:
@pytest.mark.asyncio
async def test_program_details(self, toolset: BBScope) -> None:
mock_data = {
"platform": "h1",
Expand All @@ -119,6 +129,7 @@ async def test_program_details(self, toolset: BBScope) -> None:
assert "*.example.com" in result
assert "In-scope targets: 5" in result

@pytest.mark.asyncio
async def test_program_vdp(self, toolset: BBScope) -> None:
mock_data = {
"platform": "bc",
Expand All @@ -138,11 +149,13 @@ async def test_program_vdp(self, toolset: BBScope) -> None:
result = await toolset.program(platform="bc", handle="test")
assert "VDP" in result

@pytest.mark.asyncio
async def test_program_invalid_platform(self, toolset: BBScope) -> None:
result = await toolset.program(platform="invalid", handle="test")
assert "Error" in result
assert "Invalid platform" in result

@pytest.mark.asyncio
async def test_program_not_found(self, toolset: BBScope) -> None:
with patch.object(toolset, "_get_client") as mock_client:
client = AsyncMock()
Expand All @@ -154,6 +167,7 @@ async def test_program_not_found(self, toolset: BBScope) -> None:


class TestTargets:
@pytest.mark.asyncio
async def test_targets_wildcards(self, toolset: BBScope) -> None:
mock_data = ["*.example.com", "*.test.org"]
with patch.object(toolset, "_get_client") as mock_client:
Expand All @@ -165,16 +179,19 @@ async def test_targets_wildcards(self, toolset: BBScope) -> None:
assert "*.example.com" in result
assert "2 wildcards" in result

@pytest.mark.asyncio
async def test_targets_invalid_type(self, toolset: BBScope) -> None:
result = await toolset.targets(target_type="invalid")
assert "Error" in result
assert "Invalid target_type" in result

@pytest.mark.asyncio
async def test_targets_invalid_platform(self, toolset: BBScope) -> None:
result = await toolset.targets(target_type="domains", platform="invalid")
assert "Error" in result
assert "Invalid platform" in result

@pytest.mark.asyncio
async def test_targets_with_limit(self, toolset: BBScope) -> None:
mock_data = [f"target{i}.com" for i in range(200)]
with patch.object(toolset, "_get_client") as mock_client:
Expand All @@ -188,6 +205,7 @@ async def test_targets_with_limit(self, toolset: BBScope) -> None:


class TestUpdates:
@pytest.mark.asyncio
async def test_updates_today(self, toolset: BBScope) -> None:
mock_data = {
"updates": [
Expand Down Expand Up @@ -215,6 +233,7 @@ async def test_updates_today(self, toolset: BBScope) -> None:
assert "new.example.com" in result
assert "added" in result

@pytest.mark.asyncio
async def test_updates_no_results(self, toolset: BBScope) -> None:
mock_data = {"updates": [], "total_count": 0}
with patch.object(toolset, "_get_client") as mock_client:
Expand Down
Loading
Loading