From d6dd9e313b1b792404a08887215e1c18ddc05758 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Tue, 3 Mar 2026 11:28:10 +0000
Subject: [PATCH 1/3] Fix critic.mdx rendering issue caused by embedded triple
 backticks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Python example file (34_critic_example.py) contains triple backticks
inside a multi-line string (used to show example file content in the
task prompt). When synced to the MDX documentation, these embedded
backticks were closing the markdown code block prematurely, causing:

- The content after the first embedded ``` to render as regular markdown
- Repeated content appearing multiple times on the page
- Broken page structure

This fix:
1. Updates sync_code_blocks.py to escape embedded triple backticks by
   inserting zero-width spaces between them (`​`​`), which:
   - Prevents markdown parser from treating them as code block delimiters
   - Preserves visual appearance in the rendered code block
2. Updates comparison logic to handle both escaped and unescaped content
3. Fixes the critic.mdx file with properly escaped content

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .github/scripts/sync_code_blocks.py |  36 +-
 sdk/guides/critic.mdx               | 501 +---------------------------
 2 files changed, 36 insertions(+), 501 deletions(-)

diff --git a/.github/scripts/sync_code_blocks.py b/.github/scripts/sync_code_blocks.py
index 9dfca3e2..a438a462 100755
--- a/.github/scripts/sync_code_blocks.py
+++ b/.github/scripts/sync_code_blocks.py
@@ -50,7 +50,9 @@ def extract_code_blocks(content: str) -> list[tuple[str, str, str, int, int]]:
     matches: list[tuple[str, str, str, int, int]] = []
     
     # Pattern for Python files
-    python_pattern = r'```python[^\n]*\s+([^\s]+\.py)\n(.*?)```'
+    # The closing ``` must be at the start of a line (after newline)
+    # This prevents matching embedded ``` inside the code content
+    python_pattern = r'```python[^\n]*\s+([^\s]+\.py)\n(.*?)\n```(?=\n|$)'
     for match in re.finditer(python_pattern, content, re.DOTALL):
         file_ref = match.group(1)
         code_content = match.group(2)
@@ -59,7 +61,7 @@ def extract_code_blocks(content: str) -> list[tuple[str, str, str, int, int]]:
         matches.append(('python', file_ref, code_content, start_pos, end_pos))
     
     # Pattern for YAML files
-    yaml_pattern = r'```yaml[^\n]*\s+([^\s]+\.ya?ml)\n(.*?)```'
+    yaml_pattern = r'```yaml[^\n]*\s+([^\s]+\.ya?ml)\n(.*?)\n```(?=\n|$)'
     for match in re.finditer(yaml_pattern, content, re.DOTALL):
         file_ref = match.group(1)
         code_content = match.group(2)
@@ -97,6 +99,21 @@ def normalize_content(content: str) -> str:
     return "\n".join(line.rstrip() for line in content.splitlines())
 
 
+def escape_embedded_backticks(content: str) -> str:
+    """
+    Escape triple backticks inside source code to prevent breaking markdown code blocks.
+    
+    This handles the case where Python code contains triple backticks in strings
+    (e.g., in docstrings or multi-line strings with markdown examples).
+    
+    Strategy: Replace ``` with a zero-width space between backticks: `​`​`
+    This preserves the visual appearance while preventing markdown parsing issues.
+    """
+    # Use a zero-width space (U+200B) between backticks
+    # This makes ``` render correctly in the code block without closing it
+    return content.replace("```", "`\u200b`\u200b`")
+
+
 def resolve_paths() -> tuple[Path, Path]:
     """
     Determine docs root and agent-sdk path robustly across CI and local layouts.
@@ -164,10 +181,14 @@ def update_doc_file(
         if actual_content is None:
             continue
 
+        # When comparing, we need to account for backtick escaping
+        # The doc may already have escaped backticks, so we compare both versions
         old_normalized = normalize_content(old_code)
         actual_normalized = normalize_content(actual_content)
+        actual_escaped_normalized = normalize_content(escape_embedded_backticks(actual_content))
 
-        if old_normalized != actual_normalized:
+        # Check if content differs (considering both escaped and unescaped versions)
+        if old_normalized != actual_normalized and old_normalized != actual_escaped_normalized:
             print(f"\n📝 Found difference in {doc_path.name} for {file_ref}")
             print("   Updating code block...")
 
@@ -181,11 +202,14 @@ def update_doc_file(
             )
             if opening_line_match:
                 opening_line = opening_line_match.group(0)
+                # Escape any embedded triple backticks in the source content
+                # to prevent them from closing the markdown code block
+                escaped_content = escape_embedded_backticks(actual_content)
                 # Preserve trailing newline behavior
-                if actual_content.endswith("\n"):
-                    new_block = f"{opening_line}\n{actual_content}```"
+                if escaped_content.endswith("\n"):
+                    new_block = f"{opening_line}\n{escaped_content}```"
                 else:
-                    new_block = f"{opening_line}\n{actual_content}\n```"
+                    new_block = f"{opening_line}\n{escaped_content}\n```"
                 old_block = new_content[adj_start:adj_end]
 
                 new_content = new_content[:adj_start] + new_block + new_content[adj_end:]
diff --git a/sdk/guides/critic.mdx b/sdk/guides/critic.mdx
index 5c831cb4..9fbffccf 100644
--- a/sdk/guides/critic.mdx
+++ b/sdk/guides/critic.mdx
@@ -305,503 +305,13 @@ Write tests that verify:
 ## Verification Steps
 
 1. Create a sample file `sample.txt` with this EXACT content (no trailing newline):
-```
-Hello world!
-This is a well-known test file.
-
-It has 5 lines, including empty ones.
-Numbers like 42 and 3.14 don't count as words.
-```
-
-2. Run: `python wordstats/cli.py sample.txt`
-   Expected output:
-   - Lines: 5
-   - Words: 21
-   - Chars: 130
-   - Unique words: 21
-
-3. Run the tests: `python -m pytest wordstats/tests/ -v`
-   ALL tests must pass.
-
-The task is complete ONLY when:
-- All files exist
-- The CLI outputs the correct stats for sample.txt
-- All 5+ tests pass
-"""
-
-
-llm_api_key = get_required_env("LLM_API_KEY")
-llm = LLM(
-    # Use a weaker model to increase likelihood of needing multiple iterations
-    model="anthropic/claude-haiku-4-5",
-    api_key=llm_api_key,
-    top_p=0.95,
-    base_url=os.getenv("LLM_BASE_URL", None),
-)
-
-# Setup critic with iterative refinement config
-# The IterativeRefinementConfig tells Conversation.run() to automatically
-# retry the task if the critic score is below the threshold
-iterative_config = IterativeRefinementConfig(
-    success_threshold=SUCCESS_THRESHOLD,
-    max_iterations=MAX_ITERATIONS,
-)
-
-# Auto-configure critic for All-Hands proxy or use explicit env vars
-critic = get_default_critic(llm)
-if critic is None:
-    print("⚠️  No All-Hands LLM proxy detected, trying explicit env vars...")
-    critic = APIBasedCritic(
-        server_url=get_required_env("CRITIC_SERVER_URL"),
-        api_key=get_required_env("CRITIC_API_KEY"),
-        model_name=get_required_env("CRITIC_MODEL_NAME"),
-        iterative_refinement=iterative_config,
-    )
-else:
-    # Add iterative refinement config to the auto-configured critic
-    critic = critic.model_copy(update={"iterative_refinement": iterative_config})
-
-# Create agent with critic (iterative refinement is built into the critic)
-agent = Agent(
-    llm=llm,
-    tools=[
-        Tool(name=TerminalTool.name),
-        Tool(name=FileEditorTool.name),
-        Tool(name=TaskTrackerTool.name),
-    ],
-    critic=critic,
-)
-
-# Create workspace
-workspace = Path(tempfile.mkdtemp(prefix="critic_demo_"))
-print(f"📁 Created workspace: {workspace}")
-
-# Create conversation - iterative refinement is handled automatically
-# by Conversation.run() based on the critic's config
-conversation = Conversation(
-    agent=agent,
-    workspace=str(workspace),
-)
-
-print("\n" + "=" * 70)
-print("🚀 Starting Iterative Refinement with Critic Model")
-print("=" * 70)
-print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}")
-print(f"Max iterations: {MAX_ITERATIONS}")
-
-# Send the task and run - Conversation.run() handles retries automatically
-conversation.send_message(INITIAL_TASK_PROMPT)
-conversation.run()
-
-# Print additional info about created files
-print("\nCreated files:")
-for path in sorted(workspace.rglob("*")):
-    if path.is_file():
-        relative = path.relative_to(workspace)
-        print(f"  - {relative}")
-
-# Report cost
-cost = llm.metrics.accumulated_cost
-print(f"\nEXAMPLE_COST: {cost:.4f}")
-```
+`​`​`
 Hello world!
 This is a well-known test file.
 
 It has 5 lines, including empty ones.
 Numbers like 42 and 3.14 don't count as words.
-```
-
-2. Run: `python wordstats/cli.py sample.txt`
-   Expected output:
-   - Lines: 5
-   - Words: 21
-   - Chars: 130
-   - Unique words: 21
-
-3. Run the tests: `python -m pytest wordstats/tests/ -v`
-   ALL tests must pass.
-
-The task is complete ONLY when:
-- All files exist
-- The CLI outputs the correct stats for sample.txt
-- All 5+ tests pass
-"""
-
-
-llm_api_key = get_required_env("LLM_API_KEY")
-llm = LLM(
-    # Use a weaker model to increase likelihood of needing multiple iterations
-    model="anthropic/claude-haiku-4-5",
-    api_key=llm_api_key,
-    top_p=0.95,
-    base_url=os.getenv("LLM_BASE_URL", None),
-)
-
-# Setup critic with iterative refinement config
-# The IterativeRefinementConfig tells Conversation.run() to automatically
-# retry the task if the critic score is below the threshold
-iterative_config = IterativeRefinementConfig(
-    success_threshold=SUCCESS_THRESHOLD,
-    max_iterations=MAX_ITERATIONS,
-)
-
-# Auto-configure critic for All-Hands proxy or use explicit env vars
-critic = get_default_critic(llm)
-if critic is None:
-    print("⚠️  No All-Hands LLM proxy detected, trying explicit env vars...")
-    critic = APIBasedCritic(
-        server_url=get_required_env("CRITIC_SERVER_URL"),
-        api_key=get_required_env("CRITIC_API_KEY"),
-        model_name=get_required_env("CRITIC_MODEL_NAME"),
-        iterative_refinement=iterative_config,
-    )
-else:
-    # Add iterative refinement config to the auto-configured critic
-    critic = critic.model_copy(update={"iterative_refinement": iterative_config})
-
-# Create agent with critic (iterative refinement is built into the critic)
-agent = Agent(
-    llm=llm,
-    tools=[
-        Tool(name=TerminalTool.name),
-        Tool(name=FileEditorTool.name),
-        Tool(name=TaskTrackerTool.name),
-    ],
-    critic=critic,
-)
-
-# Create workspace
-workspace = Path(tempfile.mkdtemp(prefix="critic_demo_"))
-print(f"📁 Created workspace: {workspace}")
-
-# Create conversation - iterative refinement is handled automatically
-# by Conversation.run() based on the critic's config
-conversation = Conversation(
-    agent=agent,
-    workspace=str(workspace),
-)
-
-print("\n" + "=" * 70)
-print("🚀 Starting Iterative Refinement with Critic Model")
-print("=" * 70)
-print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}")
-print(f"Max iterations: {MAX_ITERATIONS}")
-
-# Send the task and run - Conversation.run() handles retries automatically
-conversation.send_message(INITIAL_TASK_PROMPT)
-conversation.run()
-
-# Print additional info about created files
-print("\nCreated files:")
-for path in sorted(workspace.rglob("*")):
-    if path.is_file():
-        relative = path.relative_to(workspace)
-        print(f"  - {relative}")
-
-# Report cost
-cost = llm.metrics.accumulated_cost
-print(f"\nEXAMPLE_COST: {cost:.4f}")
-```
-Hello world!
-This is a well-known test file.
-
-It has 5 lines, including empty ones.
-Numbers like 42 and 3.14 don't count as words.
-```
-
-2. Run: `python wordstats/cli.py sample.txt`
-   Expected output:
-   - Lines: 5
-   - Words: 21
-   - Chars: 130
-   - Unique words: 21
-
-3. Run the tests: `python -m pytest wordstats/tests/ -v`
-   ALL tests must pass.
-
-The task is complete ONLY when:
-- All files exist
-- The CLI outputs the correct stats for sample.txt
-- All 5+ tests pass
-"""
-
-
-llm_api_key = get_required_env("LLM_API_KEY")
-llm = LLM(
-    # Use a weaker model to increase likelihood of needing multiple iterations
-    model="anthropic/claude-haiku-4-5",
-    api_key=llm_api_key,
-    top_p=0.95,
-    base_url=os.getenv("LLM_BASE_URL", None),
-)
-
-# Setup critic with iterative refinement config
-# The IterativeRefinementConfig tells Conversation.run() to automatically
-# retry the task if the critic score is below the threshold
-iterative_config = IterativeRefinementConfig(
-    success_threshold=SUCCESS_THRESHOLD,
-    max_iterations=MAX_ITERATIONS,
-)
-
-# Auto-configure critic for All-Hands proxy or use explicit env vars
-critic = get_default_critic(llm)
-if critic is None:
-    print("⚠️  No All-Hands LLM proxy detected, trying explicit env vars...")
-    critic = APIBasedCritic(
-        server_url=get_required_env("CRITIC_SERVER_URL"),
-        api_key=get_required_env("CRITIC_API_KEY"),
-        model_name=get_required_env("CRITIC_MODEL_NAME"),
-        iterative_refinement=iterative_config,
-    )
-else:
-    # Add iterative refinement config to the auto-configured critic
-    critic = critic.model_copy(update={"iterative_refinement": iterative_config})
-
-# Create agent with critic (iterative refinement is built into the critic)
-agent = Agent(
-    llm=llm,
-    tools=[
-        Tool(name=TerminalTool.name),
-        Tool(name=FileEditorTool.name),
-        Tool(name=TaskTrackerTool.name),
-    ],
-    critic=critic,
-)
-
-# Create workspace
-workspace = Path(tempfile.mkdtemp(prefix="critic_demo_"))
-print(f"📁 Created workspace: {workspace}")
-
-# Create conversation - iterative refinement is handled automatically
-# by Conversation.run() based on the critic's config
-conversation = Conversation(
-    agent=agent,
-    workspace=str(workspace),
-)
-
-print("\n" + "=" * 70)
-print("🚀 Starting Iterative Refinement with Critic Model")
-print("=" * 70)
-print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}")
-print(f"Max iterations: {MAX_ITERATIONS}")
-
-# Send the task and run - Conversation.run() handles retries automatically
-conversation.send_message(INITIAL_TASK_PROMPT)
-conversation.run()
-
-# Print additional info about created files
-print("\nCreated files:")
-for path in sorted(workspace.rglob("*")):
-    if path.is_file():
-        relative = path.relative_to(workspace)
-        print(f"  - {relative}")
-
-# Report cost
-cost = llm.metrics.accumulated_cost
-print(f"\nEXAMPLE_COST: {cost:.4f}")
-```
-Hello world!
-This is a well-known test file.
-
-It has 5 lines, including empty ones.
-Numbers like 42 and 3.14 don't count as words.
-```
-
-2. Run: `python wordstats/cli.py sample.txt`
-   Expected output:
-   - Lines: 5
-   - Words: 21
-   - Chars: 130
-   - Unique words: 21
-
-3. Run the tests: `python -m pytest wordstats/tests/ -v`
-   ALL tests must pass.
-
-The task is complete ONLY when:
-- All files exist
-- The CLI outputs the correct stats for sample.txt
-- All 5+ tests pass
-"""
-
-
-llm_api_key = get_required_env("LLM_API_KEY")
-llm = LLM(
-    # Use a weaker model to increase likelihood of needing multiple iterations
-    model="anthropic/claude-haiku-4-5",
-    api_key=llm_api_key,
-    top_p=0.95,
-    base_url=os.getenv("LLM_BASE_URL", None),
-)
-
-# Setup critic with iterative refinement config
-# The IterativeRefinementConfig tells Conversation.run() to automatically
-# retry the task if the critic score is below the threshold
-iterative_config = IterativeRefinementConfig(
-    success_threshold=SUCCESS_THRESHOLD,
-    max_iterations=MAX_ITERATIONS,
-)
-
-# Auto-configure critic for All-Hands proxy or use explicit env vars
-critic = get_default_critic(llm)
-if critic is None:
-    print("⚠️  No All-Hands LLM proxy detected, trying explicit env vars...")
-    critic = APIBasedCritic(
-        server_url=get_required_env("CRITIC_SERVER_URL"),
-        api_key=get_required_env("CRITIC_API_KEY"),
-        model_name=get_required_env("CRITIC_MODEL_NAME"),
-        iterative_refinement=iterative_config,
-    )
-else:
-    # Add iterative refinement config to the auto-configured critic
-    critic = critic.model_copy(update={"iterative_refinement": iterative_config})
-
-# Create agent with critic (iterative refinement is built into the critic)
-agent = Agent(
-    llm=llm,
-    tools=[
-        Tool(name=TerminalTool.name),
-        Tool(name=FileEditorTool.name),
-        Tool(name=TaskTrackerTool.name),
-    ],
-    critic=critic,
-)
-
-# Create workspace
-workspace = Path(tempfile.mkdtemp(prefix="critic_demo_"))
-print(f"📁 Created workspace: {workspace}")
-
-# Create conversation - iterative refinement is handled automatically
-# by Conversation.run() based on the critic's config
-conversation = Conversation(
-    agent=agent,
-    workspace=str(workspace),
-)
-
-print("\n" + "=" * 70)
-print("🚀 Starting Iterative Refinement with Critic Model")
-print("=" * 70)
-print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}")
-print(f"Max iterations: {MAX_ITERATIONS}")
-
-# Send the task and run - Conversation.run() handles retries automatically
-conversation.send_message(INITIAL_TASK_PROMPT)
-conversation.run()
-
-# Print additional info about created files
-print("\nCreated files:")
-for path in sorted(workspace.rglob("*")):
-    if path.is_file():
-        relative = path.relative_to(workspace)
-        print(f"  - {relative}")
-
-# Report cost
-cost = llm.metrics.accumulated_cost
-print(f"\nEXAMPLE_COST: {cost:.4f}")
-```
-Hello world!
-This is a well-known test file.
-
-It has 5 lines, including empty ones.
-Numbers like 42 and 3.14 don't count as words.
-```
-
-2. Run: `python wordstats/cli.py sample.txt`
-   Expected output:
-   - Lines: 5
-   - Words: 21
-   - Chars: 130
-   - Unique words: 21
-
-3. Run the tests: `python -m pytest wordstats/tests/ -v`
-   ALL tests must pass.
-
-The task is complete ONLY when:
-- All files exist
-- The CLI outputs the correct stats for sample.txt
-- All 5+ tests pass
-"""
-
-
-llm_api_key = get_required_env("LLM_API_KEY")
-llm = LLM(
-    # Use a weaker model to increase likelihood of needing multiple iterations
-    model="anthropic/claude-haiku-4-5",
-    api_key=llm_api_key,
-    top_p=0.95,
-    base_url=os.getenv("LLM_BASE_URL", None),
-)
-
-# Setup critic with iterative refinement config
-# The IterativeRefinementConfig tells Conversation.run() to automatically
-# retry the task if the critic score is below the threshold
-iterative_config = IterativeRefinementConfig(
-    success_threshold=SUCCESS_THRESHOLD,
-    max_iterations=MAX_ITERATIONS,
-)
-
-# Auto-configure critic for All-Hands proxy or use explicit env vars
-critic = get_default_critic(llm)
-if critic is None:
-    print("⚠️  No All-Hands LLM proxy detected, trying explicit env vars...")
-    critic = APIBasedCritic(
-        server_url=get_required_env("CRITIC_SERVER_URL"),
-        api_key=get_required_env("CRITIC_API_KEY"),
-        model_name=get_required_env("CRITIC_MODEL_NAME"),
-        iterative_refinement=iterative_config,
-    )
-else:
-    # Add iterative refinement config to the auto-configured critic
-    critic = critic.model_copy(update={"iterative_refinement": iterative_config})
-
-# Create agent with critic (iterative refinement is built into the critic)
-agent = Agent(
-    llm=llm,
-    tools=[
-        Tool(name=TerminalTool.name),
-        Tool(name=FileEditorTool.name),
-        Tool(name=TaskTrackerTool.name),
-    ],
-    critic=critic,
-)
-
-# Create workspace
-workspace = Path(tempfile.mkdtemp(prefix="critic_demo_"))
-print(f"📁 Created workspace: {workspace}")
-
-# Create conversation - iterative refinement is handled automatically
-# by Conversation.run() based on the critic's config
-conversation = Conversation(
-    agent=agent,
-    workspace=str(workspace),
-)
-
-print("\n" + "=" * 70)
-print("🚀 Starting Iterative Refinement with Critic Model")
-print("=" * 70)
-print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}")
-print(f"Max iterations: {MAX_ITERATIONS}")
-
-# Send the task and run - Conversation.run() handles retries automatically
-conversation.send_message(INITIAL_TASK_PROMPT)
-conversation.run()
-
-# Print additional info about created files
-print("\nCreated files:")
-for path in sorted(workspace.rglob("*")):
-    if path.is_file():
-        relative = path.relative_to(workspace)
-        print(f"  - {relative}")
-
-# Report cost
-cost = llm.metrics.accumulated_cost
-print(f"\nEXAMPLE_COST: {cost:.4f}")
-```
-Hello world!
-This is a well-known test file.
-
-It has 5 lines, including empty ones.
-Numbers like 42 and 3.14 don't count as words.
-```
+`​`​`
 
 2. Run: `python wordstats/cli.py sample.txt`
    Expected output:
@@ -821,12 +331,13 @@ The task is complete ONLY when:
 
 
 llm_api_key = get_required_env("LLM_API_KEY")
+# Use a weaker model to increase likelihood of needing multiple iterations
+llm_model = os.getenv("LLM_MODEL", "anthropic/claude-haiku-4-5-20251001")
 llm = LLM(
-    # Use a weaker model to increase likelihood of needing multiple iterations
-    model="anthropic/claude-haiku-4-5",
+    model=llm_model,
     api_key=llm_api_key,
     top_p=0.95,
-    base_url=os.getenv("LLM_BASE_URL", None),
+    base_url=os.getenv("LLM_BASE_URL"),
 )
 
 # Setup critic with iterative refinement config

From 4439bc3a24bb216d074ff29226cad8092c358f5d Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Tue, 3 Mar 2026 11:28:16 +0000
Subject: [PATCH 2/3] Sync code blocks with latest agent-sdk examples

Updates code examples in documentation files to match current
source files in the agent-sdk repository.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 sdk/guides/agent-acp.mdx                  | 14 +++-
 sdk/guides/agent-delegation.mdx           | 80 ++++++++++++++++-------
 sdk/guides/agent-server/local-server.mdx  |  5 +-
 sdk/guides/browser-session-recording.mdx  |  2 +-
 sdk/guides/github-workflows/pr-review.mdx |  3 +-
 5 files changed, 76 insertions(+), 28 deletions(-)

diff --git a/sdk/guides/agent-acp.mdx b/sdk/guides/agent-acp.mdx
index d5d479c9..58e999d0 100644
--- a/sdk/guides/agent-acp.mdx
+++ b/sdk/guides/agent-acp.mdx
@@ -106,7 +106,9 @@ This example is available on GitHub: [examples/01_standalone_sdk/40_acp_agent_ex
 """Example: Using ACPAgent with Claude Code ACP server.
 
 This example shows how to use an ACP-compatible server (claude-code-acp)
-as the agent backend instead of direct LLM calls.
+as the agent backend instead of direct LLM calls.  It also demonstrates
+``ask_agent()`` — a stateless side-question that forks the ACP session
+and leaves the main conversation untouched.
 
 Prerequisites:
     - Node.js / npx available
@@ -122,17 +124,25 @@ from openhands.sdk.agent import ACPAgent
 from openhands.sdk.conversation import Conversation
 
 
-agent = ACPAgent(acp_command=["npx", "-y", "claude-code-acp"])
+agent = ACPAgent(acp_command=["npx", "-y", "@zed-industries/claude-code-acp"])
 
 try:
     cwd = os.getcwd()
     conversation = Conversation(agent=agent, workspace=cwd)
 
+    # --- Main conversation turn ---
     conversation.send_message(
         "List the Python source files under openhands-sdk/openhands/sdk/agent/, "
         "then read the __init__.py and summarize what agent classes are exported."
     )
     conversation.run()
+
+    # --- ask_agent: stateless side-question via fork_session ---
+    print("\n--- ask_agent ---")
+    response = conversation.ask_agent(
+        "Based on what you just saw, which agent class is the newest addition?"
+    )
+    print(f"ask_agent response: {response}")
 finally:
     # Clean up the ACP server subprocess
     agent.close()
diff --git a/sdk/guides/agent-delegation.mdx b/sdk/guides/agent-delegation.mdx
index c368a178..dac6a8f4 100644
--- a/sdk/guides/agent-delegation.mdx
+++ b/sdk/guides/agent-delegation.mdx
@@ -168,8 +168,6 @@ which then merges both analyses into a single consolidated report.
 
 import os
 
-from pydantic import SecretStr
-
 from openhands.sdk import (
     LLM,
     Agent,
@@ -179,13 +177,13 @@ from openhands.sdk import (
     get_logger,
 )
 from openhands.sdk.context import Skill
+from openhands.sdk.subagent import register_agent
 from openhands.sdk.tool import register_tool
 from openhands.tools.delegate import (
     DelegateTool,
     DelegationVisualizer,
-    register_agent,
 )
-from openhands.tools.preset.default import get_default_tools
+from openhands.tools.preset.default import get_default_tools, register_builtins_agents
 
 
 ONLY_RUN_SIMPLE_DELEGATION = False
@@ -193,22 +191,18 @@ ONLY_RUN_SIMPLE_DELEGATION = False
 logger = get_logger(__name__)
 
 # Configure LLM and agent
-# You can get an API key from https://app.all-hands.dev/settings/api-keys
-api_key = os.getenv("LLM_API_KEY")
-assert api_key is not None, "LLM_API_KEY environment variable is not set."
-model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
 llm = LLM(
-    model=model,
-    api_key=SecretStr(api_key),
+    model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"),
+    api_key=os.getenv("LLM_API_KEY"),
     base_url=os.environ.get("LLM_BASE_URL", None),
     usage_id="agent",
 )
 
 cwd = os.getcwd()
 
-register_tool("DelegateTool", DelegateTool)
-tools = get_default_tools(enable_browser=False)
-tools.append(Tool(name="DelegateTool"))
+tools = get_default_tools(enable_browser=True)
+tools.append(Tool(name=DelegateTool.name))
+register_builtins_agents()
 
 main_agent = Agent(
     llm=llm,
@@ -220,7 +214,7 @@ conversation = Conversation(
     visualizer=DelegationVisualizer(name="Delegator"),
 )
 
-task_message = (
+conversation.send_message(
     "Forget about coding. Let's switch to travel planning. "
     "Let's plan a trip to London. I have two issues I need to solve: "
     "Lodging: what are the best areas to stay at while keeping budget in mind? "
@@ -231,7 +225,6 @@ task_message = (
     "They should keep it short. After getting the results, merge both analyses "
     "into a single consolidated report.\n\n"
 )
-conversation.send_message(task_message)
 conversation.run()
 
 conversation.send_message(
@@ -240,18 +233,57 @@ conversation.send_message(
 conversation.run()
 
 # Report cost for simple delegation example
-cost_1 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
-print(f"EXAMPLE_COST (simple delegation): {cost_1}")
+cost_simple = conversation.conversation_stats.get_combined_metrics().accumulated_cost
+print(f"EXAMPLE_COST (simple delegation): {cost_simple}")
 
 print("Simple delegation example done!", "\n" * 20)
 
-
-# -------- Agent Delegation Second Part: User-Defined Agent Types --------
-
 if ONLY_RUN_SIMPLE_DELEGATION:
+    # For CI: always emit the EXAMPLE_COST marker before exiting.
+    print(f"EXAMPLE_COST: {cost_simple}")
     exit(0)
 
 
+# -------- Agent Delegation Second Part: Built-in Agent Types (Explore + Bash) --------
+
+main_agent = Agent(
+    llm=llm,
+    tools=[Tool(name=DelegateTool.name)],
+)
+conversation = Conversation(
+    agent=main_agent,
+    workspace=cwd,
+    visualizer=DelegationVisualizer(name="Delegator (builtins)"),
+)
+
+builtin_task_message = (
+    "Demonstrate SDK built-in sub-agent types. "
+    "1) Spawn an 'explore' sub-agent and ask it to list the markdown files in "
+    "openhands-sdk/openhands/sdk/subagent/builtins/ and summarize what each "
+    "built-in agent type is for (based on the file contents). "
+    "2) Spawn a 'bash' sub-agent and ask it to run `python --version` in the "
+    "terminal and return the exact output. "
+    "3) Merge both results into a short report. "
+    "Do not use internet access."
+)
+
+print("=" * 100)
+print("Demonstrating built-in agent delegation (explore + bash)...")
+print("=" * 100)
+
+conversation.send_message(builtin_task_message)
+conversation.run()
+
+# Report cost for builtin agent types example
+cost_builtin = conversation.conversation_stats.get_combined_metrics().accumulated_cost
+print(f"EXAMPLE_COST (builtin agents): {cost_builtin}")
+
+print("Built-in agent delegation example done!", "\n" * 20)
+
+
+# -------- Agent Delegation Third Part: User-Defined Agent Types --------
+
+
 def create_lodging_planner(llm: LLM) -> Agent:
     """Create a lodging planner focused on London stays."""
     skills = [
@@ -349,13 +381,15 @@ conversation.send_message(
 conversation.run()
 
 # Report cost for user-defined agent types example
-cost_2 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
-print(f"EXAMPLE_COST (user-defined agents): {cost_2}")
+cost_user_defined = (
+    conversation.conversation_stats.get_combined_metrics().accumulated_cost
+)
+print(f"EXAMPLE_COST (user-defined agents): {cost_user_defined}")
 
 print("All done!")
 
 # Full example cost report for CI workflow
-print(f"EXAMPLE_COST: {cost_1 + cost_2}")
+print(f"EXAMPLE_COST: {cost_simple + cost_builtin + cost_user_defined}")
 ```
 
 <RunExampleCode path_to_script="examples/01_standalone_sdk/25_agent_delegation.py"/>
diff --git a/sdk/guides/agent-server/local-server.mdx b/sdk/guides/agent-server/local-server.mdx
index 4ef87103..541c5038 100644
--- a/sdk/guides/agent-server/local-server.mdx
+++ b/sdk/guides/agent-server/local-server.mdx
@@ -111,6 +111,7 @@ This example shows how to programmatically start a local agent server and intera
 import os
 import subprocess
 import sys
+import tempfile
 import threading
 import time
 
@@ -268,7 +269,9 @@ with ManagedAPIServer(port=8001) as server:
 
     # Create RemoteConversation with callbacks
     # NOTE: Workspace is required for RemoteConversation
-    workspace = Workspace(host=server.base_url)
+    # Use a temp directory that exists and is accessible in CI environments
+    temp_workspace_dir = tempfile.mkdtemp(prefix="agent_server_demo_")
+    workspace = Workspace(host=server.base_url, working_dir=temp_workspace_dir)
     result = workspace.execute_command("pwd")
     logger.info(
         f"Command '{result.command}' completed with exit code {result.exit_code}"
diff --git a/sdk/guides/browser-session-recording.mdx b/sdk/guides/browser-session-recording.mdx
index 7c2a9925..39a50f09 100644
--- a/sdk/guides/browser-session-recording.mdx
+++ b/sdk/guides/browser-session-recording.mdx
@@ -46,7 +46,7 @@ browsing session.
 The recording will be automatically saved to the persistence directory when
 browser_stop_recording is called. You can replay it with:
     - rrweb-player: https://github.com/rrweb-io/rrweb/tree/master/packages/rrweb-player
-    - Online viewer: https://www.rrweb.io/
+    - Online viewer: https://www.rrweb.io/demo/
 """
 
 import json
diff --git a/sdk/guides/github-workflows/pr-review.mdx b/sdk/guides/github-workflows/pr-review.mdx
index f6816a23..00f43d0f 100644
--- a/sdk/guides/github-workflows/pr-review.mdx
+++ b/sdk/guides/github-workflows/pr-review.mdx
@@ -162,7 +162,8 @@ jobs:
             - name: Run PR Review
               uses: ./.github/actions/pr-review
               with:
-                  # LLM configuration
+                  # LLM model(s) to use. Can be comma-separated for A/B testing
+                  # - one model will be randomly selected per review
                   llm-model: anthropic/claude-sonnet-4-5-20250929
                   llm-base-url: ''
                   # Review style: roasted (other option: standard)

From f578e1bc5e613069003c511ac383e9572a0c5471 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Tue, 3 Mar 2026 11:47:52 +0000
Subject: [PATCH 3/3] Address PR review comments: improve backtick escaping
 robustness

- Add comprehensive docstring explaining zero-width space tradeoff
- Handle empty/falsy content in escape_embedded_backticks
- Fix regex pattern to handle code blocks without trailing newlines
- Simplify comparison logic with clearer variable names
- Add unit tests for escape_embedded_backticks and related functions

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .github/scripts/sync_code_blocks.py |  40 +++++--
 tests/test_sync_code_blocks.py      | 173 ++++++++++++++++++++++++++++
 2 files changed, 201 insertions(+), 12 deletions(-)
 create mode 100644 tests/test_sync_code_blocks.py

diff --git a/.github/scripts/sync_code_blocks.py b/.github/scripts/sync_code_blocks.py
index a438a462..e57f6526 100755
--- a/.github/scripts/sync_code_blocks.py
+++ b/.github/scripts/sync_code_blocks.py
@@ -50,21 +50,26 @@ def extract_code_blocks(content: str) -> list[tuple[str, str, str, int, int]]:
     matches: list[tuple[str, str, str, int, int]] = []
     
     # Pattern for Python files
-    # The closing ``` must be at the start of a line (after newline)
-    # This prevents matching embedded ``` inside the code content
-    python_pattern = r'```python[^\n]*\s+([^\s]+\.py)\n(.*?)\n```(?=\n|$)'
+    # The closing ``` must be at the start of a line (after newline) OR at the very end
+    # The \n? before ``` makes the trailing newline optional to handle edge cases
+    # where content doesn't have a trailing newline
+    python_pattern = r'```python[^\n]*\s+([^\s]+\.py)\n(.*?)\n?```(?=\n|$)'
     for match in re.finditer(python_pattern, content, re.DOTALL):
         file_ref = match.group(1)
         code_content = match.group(2)
+        # Strip trailing newline from code content if present (will be re-added during update)
+        code_content = code_content.rstrip('\n')
         start_pos = match.start()
         end_pos = match.end()
         matches.append(('python', file_ref, code_content, start_pos, end_pos))
     
     # Pattern for YAML files
-    yaml_pattern = r'```yaml[^\n]*\s+([^\s]+\.ya?ml)\n(.*?)\n```(?=\n|$)'
+    yaml_pattern = r'```yaml[^\n]*\s+([^\s]+\.ya?ml)\n(.*?)\n?```(?=\n|$)'
     for match in re.finditer(yaml_pattern, content, re.DOTALL):
         file_ref = match.group(1)
         code_content = match.group(2)
+        # Strip trailing newline from code content if present (will be re-added during update)
+        code_content = code_content.rstrip('\n')
         start_pos = match.start()
         end_pos = match.end()
         matches.append(('yaml', file_ref, code_content, start_pos, end_pos))
@@ -108,7 +113,20 @@ def escape_embedded_backticks(content: str) -> str:
     
     Strategy: Replace ``` with a zero-width space between backticks: `​`​`
     This preserves the visual appearance while preventing markdown parsing issues.
+    
+    Tradeoff note: Zero-width spaces (U+200B) are invisible and will be copied when
+    users copy-paste code from the docs. This could cause subtle issues if users paste
+    code containing these characters. However, this is acceptable because:
+    1. The affected code is primarily display content (example outputs), not executable
+    2. Alternative approaches (like changing source files) aren't feasible since we
+       sync from an external repository (agent-sdk)
+    3. Most modern editors will highlight invisible Unicode characters
+    
+    The function is idempotent - applying it multiple times produces the same result
+    since we only replace actual triple backticks, not already-escaped sequences.
     """
+    if not content:
+        return content
     # Use a zero-width space (U+200B) between backticks
     # This makes ``` render correctly in the code block without closing it
     return content.replace("```", "`\u200b`\u200b`")
@@ -181,14 +199,12 @@ def update_doc_file(
         if actual_content is None:
             continue
 
-        # When comparing, we need to account for backtick escaping
-        # The doc may already have escaped backticks, so we compare both versions
-        old_normalized = normalize_content(old_code)
-        actual_normalized = normalize_content(actual_content)
-        actual_escaped_normalized = normalize_content(escape_embedded_backticks(actual_content))
-
-        # Check if content differs (considering both escaped and unescaped versions)
-        if old_normalized != actual_normalized and old_normalized != actual_escaped_normalized:
+        # Compare normalized versions: old doc content vs escaped actual content
+        # We always compare against escaped version since that's what will be written
+        old_display = normalize_content(old_code)
+        new_display = normalize_content(escape_embedded_backticks(actual_content))
+        
+        if old_display != new_display:
             print(f"\n📝 Found difference in {doc_path.name} for {file_ref}")
             print("   Updating code block...")
 
diff --git a/tests/test_sync_code_blocks.py b/tests/test_sync_code_blocks.py
new file mode 100644
index 00000000..057a8c11
--- /dev/null
+++ b/tests/test_sync_code_blocks.py
@@ -0,0 +1,173 @@
+"""
+Tests for the sync_code_blocks.py script functionality.
+
+These tests ensure the backtick escaping logic works correctly to prevent
+markdown rendering issues in documentation.
+"""
+
+import sys
+from pathlib import Path
+
+# Add the script directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent / ".github" / "scripts"))
+
+from sync_code_blocks import escape_embedded_backticks, extract_code_blocks, normalize_content
+
+
+class TestEscapeEmbeddedBackticks:
+    """Tests for the escape_embedded_backticks function."""
+
+    def test_basic_triple_backticks(self):
+        """Basic case: single triple backtick sequence gets escaped."""
+        content = "Some code with ``` backticks"
+        result = escape_embedded_backticks(content)
+        # Should have zero-width spaces between backticks
+        assert "```" not in result
+        assert "`\u200b`\u200b`" in result
+
+    def test_multiple_occurrences(self):
+        """Multiple triple backtick sequences all get escaped."""
+        content = "```python\ncode\n```\nMore ```text```"
+        result = escape_embedded_backticks(content)
+        # Original has 4 triple backticks: opening, closing, and two more in "```text```"
+        assert result.count("`\u200b`\u200b`") == 4
+        assert "```" not in result
+
+    def test_idempotency(self):
+        """Applying the function multiple times produces same result."""
+        content = "Hello ``` world ``` test"
+        once = escape_embedded_backticks(content)
+        twice = escape_embedded_backticks(once)
+        # Already escaped content should not be re-escaped
+        # Since we replace "```" and the escaped version is "`​`​`" (with ZWS),
+        # applying again should not find any more "```" to replace
+        assert once == twice
+
+    def test_four_backticks(self):
+        """Four backticks should have three escaped and one regular."""
+        content = "````"  # 4 backticks
+        result = escape_embedded_backticks(content)
+        # "````" -> "`​`​``" (first 3 escaped, 4th remains)
+        assert "`\u200b`\u200b``" in result
+
+    def test_five_backticks(self):
+        """Five backticks: only one group of 3 is replaced, leaving 2."""
+        content = "`````"  # 5 backticks
+        result = escape_embedded_backticks(content)
+        # str.replace() replaces non-overlapping occurrences from left to right
+        # "`````" -> "`​`​`" + "``" (first 3 replaced, last 2 remain as regular backticks)
+        assert result == "`\u200b`\u200b```"
+        # One escaped group + 2 regular backticks remaining
+        assert result.count("`\u200b`\u200b`") == 1
+
+    def test_six_backticks(self):
+        """Six backticks (two groups of three) both get escaped."""
+        content = "``````"  # 6 backticks
+        result = escape_embedded_backticks(content)
+        # Should become two escaped groups
+        assert "```" not in result
+        assert result.count("`\u200b`\u200b`") == 2
+
+    def test_empty_string(self):
+        """Empty string returns empty string."""
+        result = escape_embedded_backticks("")
+        assert result == ""
+
+    def test_none_like_empty(self):
+        """Empty/falsy content returns as-is."""
+        result = escape_embedded_backticks("")
+        assert result == ""
+
+    def test_no_backticks(self):
+        """Content without triple backticks is unchanged."""
+        content = "Regular code without triple backticks: ` `` `"
+        result = escape_embedded_backticks(content)
+        assert result == content
+
+    def test_mixed_content(self):
+        """Real-world example with markdown in Python string."""
+        content = '''def example():
+    """Example with markdown.
+    
+    ```python
+    print("hello")
+    ```
+    """
+    pass'''
+        result = escape_embedded_backticks(content)
+        assert "```python" not in result
+        assert "`\u200b`\u200b`python" in result
+        assert "```\n    \"\"\"" not in result
+
+    def test_preserves_other_content(self):
+        """Escaping preserves all other content exactly."""
+        content = "Hello ``` world"
+        result = escape_embedded_backticks(content)
+        assert result == "Hello `\u200b`\u200b` world"
+
+
+class TestExtractCodeBlocks:
+    """Tests for the extract_code_blocks function."""
+
+    def test_basic_python_block(self):
+        """Extract a basic Python code block."""
+        content = '''```python icon="python" expandable examples/test.py
+print("hello")
+```
+'''
+        blocks = extract_code_blocks(content)
+        assert len(blocks) == 1
+        assert blocks[0][0] == 'python'  # language
+        assert blocks[0][1] == 'examples/test.py'  # file_ref
+        assert 'print("hello")' in blocks[0][2]  # code_content
+
+    def test_yaml_block(self):
+        """Extract a YAML code block."""
+        content = '''```yaml icon="yaml" examples/config.yml
+key: value
+```
+'''
+        blocks = extract_code_blocks(content)
+        assert len(blocks) == 1
+        assert blocks[0][0] == 'yaml'
+        assert blocks[0][1] == 'examples/config.yml'
+
+    def test_block_without_trailing_newline(self):
+        """Handle code blocks without trailing newline before closing backticks."""
+        content = '''```python icon="python" examples/test.py
+code_without_trailing_newline```
+'''
+        blocks = extract_code_blocks(content)
+        assert len(blocks) == 1
+        assert 'code_without_trailing_newline' in blocks[0][2]
+
+    def test_multiple_blocks(self):
+        """Extract multiple code blocks."""
+        content = '''```python examples/a.py
+code a
+```
+
+```yaml examples/b.yaml
+key: b
+```
+'''
+        blocks = extract_code_blocks(content)
+        assert len(blocks) == 2
+
+
+class TestNormalizeContent:
+    """Tests for the normalize_content function."""
+
+    def test_removes_trailing_whitespace(self):
+        """Trailing whitespace on lines is removed."""
+        content = "line1   \nline2\t\n"
+        result = normalize_content(content)
+        assert result == "line1\nline2"
+
+    def test_normalizes_line_endings(self):
+        """Different line endings are normalized."""
+        content = "line1\r\nline2\rline3"
+        result = normalize_content(content)
+        # splitlines() handles all line ending types
+        lines = result.split('\n')
+        assert len(lines) == 3