From d6dd9e313b1b792404a08887215e1c18ddc05758 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 3 Mar 2026 11:28:10 +0000 Subject: [PATCH 1/3] Fix critic.mdx rendering issue caused by embedded triple backticks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Python example file (34_critic_example.py) contains triple backticks inside a multi-line string (used to show example file content in the task prompt). When synced to the MDX documentation, these embedded backticks were closing the markdown code block prematurely, causing: - The content after the first embedded ``` to render as regular markdown - Repeated content appearing multiple times on the page - Broken page structure This fix: 1. Updates sync_code_blocks.py to escape embedded triple backticks by inserting zero-width spaces between them (`​`​`), which: - Prevents markdown parser from treating them as code block delimiters - Preserves visual appearance in the rendered code block 2. Updates comparison logic to handle both escaped and unescaped content 3. Fixes the critic.mdx file with properly escaped content Co-authored-by: openhands --- .github/scripts/sync_code_blocks.py | 36 +- sdk/guides/critic.mdx | 501 +--------------------------- 2 files changed, 36 insertions(+), 501 deletions(-) diff --git a/.github/scripts/sync_code_blocks.py b/.github/scripts/sync_code_blocks.py index 9dfca3e2..a438a462 100755 --- a/.github/scripts/sync_code_blocks.py +++ b/.github/scripts/sync_code_blocks.py @@ -50,7 +50,9 @@ def extract_code_blocks(content: str) -> list[tuple[str, str, str, int, int]]: matches: list[tuple[str, str, str, int, int]] = [] # Pattern for Python files - python_pattern = r'```python[^\n]*\s+([^\s]+\.py)\n(.*?)```' + # The closing ``` must be at the start of a line (after newline) + # This prevents matching embedded ``` inside the code content + python_pattern = r'```python[^\n]*\s+([^\s]+\.py)\n(.*?)\n```(?=\n|$)' for match in re.finditer(python_pattern, content, re.DOTALL): file_ref = match.group(1) code_content = match.group(2) @@ -59,7 +61,7 @@ def extract_code_blocks(content: str) -> list[tuple[str, str, str, int, int]]: matches.append(('python', file_ref, code_content, start_pos, end_pos)) # Pattern for YAML files - yaml_pattern = r'```yaml[^\n]*\s+([^\s]+\.ya?ml)\n(.*?)```' + yaml_pattern = r'```yaml[^\n]*\s+([^\s]+\.ya?ml)\n(.*?)\n```(?=\n|$)' for match in re.finditer(yaml_pattern, content, re.DOTALL): file_ref = match.group(1) code_content = match.group(2) @@ -97,6 +99,21 @@ def normalize_content(content: str) -> str: return "\n".join(line.rstrip() for line in content.splitlines()) +def escape_embedded_backticks(content: str) -> str: + """ + Escape triple backticks inside source code to prevent breaking markdown code blocks. + + This handles the case where Python code contains triple backticks in strings + (e.g., in docstrings or multi-line strings with markdown examples). + + Strategy: Replace ``` with a zero-width space between backticks: `​`​` + This preserves the visual appearance while preventing markdown parsing issues. + """ + # Use a zero-width space (U+200B) between backticks + # This makes ``` render correctly in the code block without closing it + return content.replace("```", "`\u200b`\u200b`") + + def resolve_paths() -> tuple[Path, Path]: """ Determine docs root and agent-sdk path robustly across CI and local layouts. @@ -164,10 +181,14 @@ def update_doc_file( if actual_content is None: continue + # When comparing, we need to account for backtick escaping + # The doc may already have escaped backticks, so we compare both versions old_normalized = normalize_content(old_code) actual_normalized = normalize_content(actual_content) + actual_escaped_normalized = normalize_content(escape_embedded_backticks(actual_content)) - if old_normalized != actual_normalized: + # Check if content differs (considering both escaped and unescaped versions) + if old_normalized != actual_normalized and old_normalized != actual_escaped_normalized: print(f"\nšŸ“ Found difference in {doc_path.name} for {file_ref}") print(" Updating code block...") @@ -181,11 +202,14 @@ def update_doc_file( ) if opening_line_match: opening_line = opening_line_match.group(0) + # Escape any embedded triple backticks in the source content + # to prevent them from closing the markdown code block + escaped_content = escape_embedded_backticks(actual_content) # Preserve trailing newline behavior - if actual_content.endswith("\n"): - new_block = f"{opening_line}\n{actual_content}```" + if escaped_content.endswith("\n"): + new_block = f"{opening_line}\n{escaped_content}```" else: - new_block = f"{opening_line}\n{actual_content}\n```" + new_block = f"{opening_line}\n{escaped_content}\n```" old_block = new_content[adj_start:adj_end] new_content = new_content[:adj_start] + new_block + new_content[adj_end:] diff --git a/sdk/guides/critic.mdx b/sdk/guides/critic.mdx index 5c831cb4..9fbffccf 100644 --- a/sdk/guides/critic.mdx +++ b/sdk/guides/critic.mdx @@ -305,503 +305,13 @@ Write tests that verify: ## Verification Steps 1. Create a sample file `sample.txt` with this EXACT content (no trailing newline): -``` -Hello world! -This is a well-known test file. - -It has 5 lines, including empty ones. -Numbers like 42 and 3.14 don't count as words. -``` - -2. Run: `python wordstats/cli.py sample.txt` - Expected output: - - Lines: 5 - - Words: 21 - - Chars: 130 - - Unique words: 21 - -3. Run the tests: `python -m pytest wordstats/tests/ -v` - ALL tests must pass. - -The task is complete ONLY when: -- All files exist -- The CLI outputs the correct stats for sample.txt -- All 5+ tests pass -""" - - -llm_api_key = get_required_env("LLM_API_KEY") -llm = LLM( - # Use a weaker model to increase likelihood of needing multiple iterations - model="anthropic/claude-haiku-4-5", - api_key=llm_api_key, - top_p=0.95, - base_url=os.getenv("LLM_BASE_URL", None), -) - -# Setup critic with iterative refinement config -# The IterativeRefinementConfig tells Conversation.run() to automatically -# retry the task if the critic score is below the threshold -iterative_config = IterativeRefinementConfig( - success_threshold=SUCCESS_THRESHOLD, - max_iterations=MAX_ITERATIONS, -) - -# Auto-configure critic for All-Hands proxy or use explicit env vars -critic = get_default_critic(llm) -if critic is None: - print("āš ļø No All-Hands LLM proxy detected, trying explicit env vars...") - critic = APIBasedCritic( - server_url=get_required_env("CRITIC_SERVER_URL"), - api_key=get_required_env("CRITIC_API_KEY"), - model_name=get_required_env("CRITIC_MODEL_NAME"), - iterative_refinement=iterative_config, - ) -else: - # Add iterative refinement config to the auto-configured critic - critic = critic.model_copy(update={"iterative_refinement": iterative_config}) - -# Create agent with critic (iterative refinement is built into the critic) -agent = Agent( - llm=llm, - tools=[ - Tool(name=TerminalTool.name), - Tool(name=FileEditorTool.name), - Tool(name=TaskTrackerTool.name), - ], - critic=critic, -) - -# Create workspace -workspace = Path(tempfile.mkdtemp(prefix="critic_demo_")) -print(f"šŸ“ Created workspace: {workspace}") - -# Create conversation - iterative refinement is handled automatically -# by Conversation.run() based on the critic's config -conversation = Conversation( - agent=agent, - workspace=str(workspace), -) - -print("\n" + "=" * 70) -print("šŸš€ Starting Iterative Refinement with Critic Model") -print("=" * 70) -print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}") -print(f"Max iterations: {MAX_ITERATIONS}") - -# Send the task and run - Conversation.run() handles retries automatically -conversation.send_message(INITIAL_TASK_PROMPT) -conversation.run() - -# Print additional info about created files -print("\nCreated files:") -for path in sorted(workspace.rglob("*")): - if path.is_file(): - relative = path.relative_to(workspace) - print(f" - {relative}") - -# Report cost -cost = llm.metrics.accumulated_cost -print(f"\nEXAMPLE_COST: {cost:.4f}") -``` +`​`​` Hello world! This is a well-known test file. It has 5 lines, including empty ones. Numbers like 42 and 3.14 don't count as words. -``` - -2. Run: `python wordstats/cli.py sample.txt` - Expected output: - - Lines: 5 - - Words: 21 - - Chars: 130 - - Unique words: 21 - -3. Run the tests: `python -m pytest wordstats/tests/ -v` - ALL tests must pass. - -The task is complete ONLY when: -- All files exist -- The CLI outputs the correct stats for sample.txt -- All 5+ tests pass -""" - - -llm_api_key = get_required_env("LLM_API_KEY") -llm = LLM( - # Use a weaker model to increase likelihood of needing multiple iterations - model="anthropic/claude-haiku-4-5", - api_key=llm_api_key, - top_p=0.95, - base_url=os.getenv("LLM_BASE_URL", None), -) - -# Setup critic with iterative refinement config -# The IterativeRefinementConfig tells Conversation.run() to automatically -# retry the task if the critic score is below the threshold -iterative_config = IterativeRefinementConfig( - success_threshold=SUCCESS_THRESHOLD, - max_iterations=MAX_ITERATIONS, -) - -# Auto-configure critic for All-Hands proxy or use explicit env vars -critic = get_default_critic(llm) -if critic is None: - print("āš ļø No All-Hands LLM proxy detected, trying explicit env vars...") - critic = APIBasedCritic( - server_url=get_required_env("CRITIC_SERVER_URL"), - api_key=get_required_env("CRITIC_API_KEY"), - model_name=get_required_env("CRITIC_MODEL_NAME"), - iterative_refinement=iterative_config, - ) -else: - # Add iterative refinement config to the auto-configured critic - critic = critic.model_copy(update={"iterative_refinement": iterative_config}) - -# Create agent with critic (iterative refinement is built into the critic) -agent = Agent( - llm=llm, - tools=[ - Tool(name=TerminalTool.name), - Tool(name=FileEditorTool.name), - Tool(name=TaskTrackerTool.name), - ], - critic=critic, -) - -# Create workspace -workspace = Path(tempfile.mkdtemp(prefix="critic_demo_")) -print(f"šŸ“ Created workspace: {workspace}") - -# Create conversation - iterative refinement is handled automatically -# by Conversation.run() based on the critic's config -conversation = Conversation( - agent=agent, - workspace=str(workspace), -) - -print("\n" + "=" * 70) -print("šŸš€ Starting Iterative Refinement with Critic Model") -print("=" * 70) -print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}") -print(f"Max iterations: {MAX_ITERATIONS}") - -# Send the task and run - Conversation.run() handles retries automatically -conversation.send_message(INITIAL_TASK_PROMPT) -conversation.run() - -# Print additional info about created files -print("\nCreated files:") -for path in sorted(workspace.rglob("*")): - if path.is_file(): - relative = path.relative_to(workspace) - print(f" - {relative}") - -# Report cost -cost = llm.metrics.accumulated_cost -print(f"\nEXAMPLE_COST: {cost:.4f}") -``` -Hello world! -This is a well-known test file. - -It has 5 lines, including empty ones. -Numbers like 42 and 3.14 don't count as words. -``` - -2. Run: `python wordstats/cli.py sample.txt` - Expected output: - - Lines: 5 - - Words: 21 - - Chars: 130 - - Unique words: 21 - -3. Run the tests: `python -m pytest wordstats/tests/ -v` - ALL tests must pass. - -The task is complete ONLY when: -- All files exist -- The CLI outputs the correct stats for sample.txt -- All 5+ tests pass -""" - - -llm_api_key = get_required_env("LLM_API_KEY") -llm = LLM( - # Use a weaker model to increase likelihood of needing multiple iterations - model="anthropic/claude-haiku-4-5", - api_key=llm_api_key, - top_p=0.95, - base_url=os.getenv("LLM_BASE_URL", None), -) - -# Setup critic with iterative refinement config -# The IterativeRefinementConfig tells Conversation.run() to automatically -# retry the task if the critic score is below the threshold -iterative_config = IterativeRefinementConfig( - success_threshold=SUCCESS_THRESHOLD, - max_iterations=MAX_ITERATIONS, -) - -# Auto-configure critic for All-Hands proxy or use explicit env vars -critic = get_default_critic(llm) -if critic is None: - print("āš ļø No All-Hands LLM proxy detected, trying explicit env vars...") - critic = APIBasedCritic( - server_url=get_required_env("CRITIC_SERVER_URL"), - api_key=get_required_env("CRITIC_API_KEY"), - model_name=get_required_env("CRITIC_MODEL_NAME"), - iterative_refinement=iterative_config, - ) -else: - # Add iterative refinement config to the auto-configured critic - critic = critic.model_copy(update={"iterative_refinement": iterative_config}) - -# Create agent with critic (iterative refinement is built into the critic) -agent = Agent( - llm=llm, - tools=[ - Tool(name=TerminalTool.name), - Tool(name=FileEditorTool.name), - Tool(name=TaskTrackerTool.name), - ], - critic=critic, -) - -# Create workspace -workspace = Path(tempfile.mkdtemp(prefix="critic_demo_")) -print(f"šŸ“ Created workspace: {workspace}") - -# Create conversation - iterative refinement is handled automatically -# by Conversation.run() based on the critic's config -conversation = Conversation( - agent=agent, - workspace=str(workspace), -) - -print("\n" + "=" * 70) -print("šŸš€ Starting Iterative Refinement with Critic Model") -print("=" * 70) -print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}") -print(f"Max iterations: {MAX_ITERATIONS}") - -# Send the task and run - Conversation.run() handles retries automatically -conversation.send_message(INITIAL_TASK_PROMPT) -conversation.run() - -# Print additional info about created files -print("\nCreated files:") -for path in sorted(workspace.rglob("*")): - if path.is_file(): - relative = path.relative_to(workspace) - print(f" - {relative}") - -# Report cost -cost = llm.metrics.accumulated_cost -print(f"\nEXAMPLE_COST: {cost:.4f}") -``` -Hello world! -This is a well-known test file. - -It has 5 lines, including empty ones. -Numbers like 42 and 3.14 don't count as words. -``` - -2. Run: `python wordstats/cli.py sample.txt` - Expected output: - - Lines: 5 - - Words: 21 - - Chars: 130 - - Unique words: 21 - -3. Run the tests: `python -m pytest wordstats/tests/ -v` - ALL tests must pass. - -The task is complete ONLY when: -- All files exist -- The CLI outputs the correct stats for sample.txt -- All 5+ tests pass -""" - - -llm_api_key = get_required_env("LLM_API_KEY") -llm = LLM( - # Use a weaker model to increase likelihood of needing multiple iterations - model="anthropic/claude-haiku-4-5", - api_key=llm_api_key, - top_p=0.95, - base_url=os.getenv("LLM_BASE_URL", None), -) - -# Setup critic with iterative refinement config -# The IterativeRefinementConfig tells Conversation.run() to automatically -# retry the task if the critic score is below the threshold -iterative_config = IterativeRefinementConfig( - success_threshold=SUCCESS_THRESHOLD, - max_iterations=MAX_ITERATIONS, -) - -# Auto-configure critic for All-Hands proxy or use explicit env vars -critic = get_default_critic(llm) -if critic is None: - print("āš ļø No All-Hands LLM proxy detected, trying explicit env vars...") - critic = APIBasedCritic( - server_url=get_required_env("CRITIC_SERVER_URL"), - api_key=get_required_env("CRITIC_API_KEY"), - model_name=get_required_env("CRITIC_MODEL_NAME"), - iterative_refinement=iterative_config, - ) -else: - # Add iterative refinement config to the auto-configured critic - critic = critic.model_copy(update={"iterative_refinement": iterative_config}) - -# Create agent with critic (iterative refinement is built into the critic) -agent = Agent( - llm=llm, - tools=[ - Tool(name=TerminalTool.name), - Tool(name=FileEditorTool.name), - Tool(name=TaskTrackerTool.name), - ], - critic=critic, -) - -# Create workspace -workspace = Path(tempfile.mkdtemp(prefix="critic_demo_")) -print(f"šŸ“ Created workspace: {workspace}") - -# Create conversation - iterative refinement is handled automatically -# by Conversation.run() based on the critic's config -conversation = Conversation( - agent=agent, - workspace=str(workspace), -) - -print("\n" + "=" * 70) -print("šŸš€ Starting Iterative Refinement with Critic Model") -print("=" * 70) -print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}") -print(f"Max iterations: {MAX_ITERATIONS}") - -# Send the task and run - Conversation.run() handles retries automatically -conversation.send_message(INITIAL_TASK_PROMPT) -conversation.run() - -# Print additional info about created files -print("\nCreated files:") -for path in sorted(workspace.rglob("*")): - if path.is_file(): - relative = path.relative_to(workspace) - print(f" - {relative}") - -# Report cost -cost = llm.metrics.accumulated_cost -print(f"\nEXAMPLE_COST: {cost:.4f}") -``` -Hello world! -This is a well-known test file. - -It has 5 lines, including empty ones. -Numbers like 42 and 3.14 don't count as words. -``` - -2. Run: `python wordstats/cli.py sample.txt` - Expected output: - - Lines: 5 - - Words: 21 - - Chars: 130 - - Unique words: 21 - -3. Run the tests: `python -m pytest wordstats/tests/ -v` - ALL tests must pass. - -The task is complete ONLY when: -- All files exist -- The CLI outputs the correct stats for sample.txt -- All 5+ tests pass -""" - - -llm_api_key = get_required_env("LLM_API_KEY") -llm = LLM( - # Use a weaker model to increase likelihood of needing multiple iterations - model="anthropic/claude-haiku-4-5", - api_key=llm_api_key, - top_p=0.95, - base_url=os.getenv("LLM_BASE_URL", None), -) - -# Setup critic with iterative refinement config -# The IterativeRefinementConfig tells Conversation.run() to automatically -# retry the task if the critic score is below the threshold -iterative_config = IterativeRefinementConfig( - success_threshold=SUCCESS_THRESHOLD, - max_iterations=MAX_ITERATIONS, -) - -# Auto-configure critic for All-Hands proxy or use explicit env vars -critic = get_default_critic(llm) -if critic is None: - print("āš ļø No All-Hands LLM proxy detected, trying explicit env vars...") - critic = APIBasedCritic( - server_url=get_required_env("CRITIC_SERVER_URL"), - api_key=get_required_env("CRITIC_API_KEY"), - model_name=get_required_env("CRITIC_MODEL_NAME"), - iterative_refinement=iterative_config, - ) -else: - # Add iterative refinement config to the auto-configured critic - critic = critic.model_copy(update={"iterative_refinement": iterative_config}) - -# Create agent with critic (iterative refinement is built into the critic) -agent = Agent( - llm=llm, - tools=[ - Tool(name=TerminalTool.name), - Tool(name=FileEditorTool.name), - Tool(name=TaskTrackerTool.name), - ], - critic=critic, -) - -# Create workspace -workspace = Path(tempfile.mkdtemp(prefix="critic_demo_")) -print(f"šŸ“ Created workspace: {workspace}") - -# Create conversation - iterative refinement is handled automatically -# by Conversation.run() based on the critic's config -conversation = Conversation( - agent=agent, - workspace=str(workspace), -) - -print("\n" + "=" * 70) -print("šŸš€ Starting Iterative Refinement with Critic Model") -print("=" * 70) -print(f"Success threshold: {SUCCESS_THRESHOLD:.0%}") -print(f"Max iterations: {MAX_ITERATIONS}") - -# Send the task and run - Conversation.run() handles retries automatically -conversation.send_message(INITIAL_TASK_PROMPT) -conversation.run() - -# Print additional info about created files -print("\nCreated files:") -for path in sorted(workspace.rglob("*")): - if path.is_file(): - relative = path.relative_to(workspace) - print(f" - {relative}") - -# Report cost -cost = llm.metrics.accumulated_cost -print(f"\nEXAMPLE_COST: {cost:.4f}") -``` -Hello world! -This is a well-known test file. - -It has 5 lines, including empty ones. -Numbers like 42 and 3.14 don't count as words. -``` +`​`​` 2. Run: `python wordstats/cli.py sample.txt` Expected output: @@ -821,12 +331,13 @@ The task is complete ONLY when: llm_api_key = get_required_env("LLM_API_KEY") +# Use a weaker model to increase likelihood of needing multiple iterations +llm_model = os.getenv("LLM_MODEL", "anthropic/claude-haiku-4-5-20251001") llm = LLM( - # Use a weaker model to increase likelihood of needing multiple iterations - model="anthropic/claude-haiku-4-5", + model=llm_model, api_key=llm_api_key, top_p=0.95, - base_url=os.getenv("LLM_BASE_URL", None), + base_url=os.getenv("LLM_BASE_URL"), ) # Setup critic with iterative refinement config From 4439bc3a24bb216d074ff29226cad8092c358f5d Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 3 Mar 2026 11:28:16 +0000 Subject: [PATCH 2/3] Sync code blocks with latest agent-sdk examples Updates code examples in documentation files to match current source files in the agent-sdk repository. Co-authored-by: openhands --- sdk/guides/agent-acp.mdx | 14 +++- sdk/guides/agent-delegation.mdx | 80 ++++++++++++++++------- sdk/guides/agent-server/local-server.mdx | 5 +- sdk/guides/browser-session-recording.mdx | 2 +- sdk/guides/github-workflows/pr-review.mdx | 3 +- 5 files changed, 76 insertions(+), 28 deletions(-) diff --git a/sdk/guides/agent-acp.mdx b/sdk/guides/agent-acp.mdx index d5d479c9..58e999d0 100644 --- a/sdk/guides/agent-acp.mdx +++ b/sdk/guides/agent-acp.mdx @@ -106,7 +106,9 @@ This example is available on GitHub: [examples/01_standalone_sdk/40_acp_agent_ex """Example: Using ACPAgent with Claude Code ACP server. This example shows how to use an ACP-compatible server (claude-code-acp) -as the agent backend instead of direct LLM calls. +as the agent backend instead of direct LLM calls. It also demonstrates +``ask_agent()`` — a stateless side-question that forks the ACP session +and leaves the main conversation untouched. Prerequisites: - Node.js / npx available @@ -122,17 +124,25 @@ from openhands.sdk.agent import ACPAgent from openhands.sdk.conversation import Conversation -agent = ACPAgent(acp_command=["npx", "-y", "claude-code-acp"]) +agent = ACPAgent(acp_command=["npx", "-y", "@zed-industries/claude-code-acp"]) try: cwd = os.getcwd() conversation = Conversation(agent=agent, workspace=cwd) + # --- Main conversation turn --- conversation.send_message( "List the Python source files under openhands-sdk/openhands/sdk/agent/, " "then read the __init__.py and summarize what agent classes are exported." ) conversation.run() + + # --- ask_agent: stateless side-question via fork_session --- + print("\n--- ask_agent ---") + response = conversation.ask_agent( + "Based on what you just saw, which agent class is the newest addition?" + ) + print(f"ask_agent response: {response}") finally: # Clean up the ACP server subprocess agent.close() diff --git a/sdk/guides/agent-delegation.mdx b/sdk/guides/agent-delegation.mdx index c368a178..dac6a8f4 100644 --- a/sdk/guides/agent-delegation.mdx +++ b/sdk/guides/agent-delegation.mdx @@ -168,8 +168,6 @@ which then merges both analyses into a single consolidated report. import os -from pydantic import SecretStr - from openhands.sdk import ( LLM, Agent, @@ -179,13 +177,13 @@ from openhands.sdk import ( get_logger, ) from openhands.sdk.context import Skill +from openhands.sdk.subagent import register_agent from openhands.sdk.tool import register_tool from openhands.tools.delegate import ( DelegateTool, DelegationVisualizer, - register_agent, ) -from openhands.tools.preset.default import get_default_tools +from openhands.tools.preset.default import get_default_tools, register_builtins_agents ONLY_RUN_SIMPLE_DELEGATION = False @@ -193,22 +191,18 @@ ONLY_RUN_SIMPLE_DELEGATION = False logger = get_logger(__name__) # Configure LLM and agent -# You can get an API key from https://app.all-hands.dev/settings/api-keys -api_key = os.getenv("LLM_API_KEY") -assert api_key is not None, "LLM_API_KEY environment variable is not set." -model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929") llm = LLM( - model=model, - api_key=SecretStr(api_key), + model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"), + api_key=os.getenv("LLM_API_KEY"), base_url=os.environ.get("LLM_BASE_URL", None), usage_id="agent", ) cwd = os.getcwd() -register_tool("DelegateTool", DelegateTool) -tools = get_default_tools(enable_browser=False) -tools.append(Tool(name="DelegateTool")) +tools = get_default_tools(enable_browser=True) +tools.append(Tool(name=DelegateTool.name)) +register_builtins_agents() main_agent = Agent( llm=llm, @@ -220,7 +214,7 @@ conversation = Conversation( visualizer=DelegationVisualizer(name="Delegator"), ) -task_message = ( +conversation.send_message( "Forget about coding. Let's switch to travel planning. " "Let's plan a trip to London. I have two issues I need to solve: " "Lodging: what are the best areas to stay at while keeping budget in mind? " @@ -231,7 +225,6 @@ task_message = ( "They should keep it short. After getting the results, merge both analyses " "into a single consolidated report.\n\n" ) -conversation.send_message(task_message) conversation.run() conversation.send_message( @@ -240,18 +233,57 @@ conversation.send_message( conversation.run() # Report cost for simple delegation example -cost_1 = conversation.conversation_stats.get_combined_metrics().accumulated_cost -print(f"EXAMPLE_COST (simple delegation): {cost_1}") +cost_simple = conversation.conversation_stats.get_combined_metrics().accumulated_cost +print(f"EXAMPLE_COST (simple delegation): {cost_simple}") print("Simple delegation example done!", "\n" * 20) - -# -------- Agent Delegation Second Part: User-Defined Agent Types -------- - if ONLY_RUN_SIMPLE_DELEGATION: + # For CI: always emit the EXAMPLE_COST marker before exiting. + print(f"EXAMPLE_COST: {cost_simple}") exit(0) +# -------- Agent Delegation Second Part: Built-in Agent Types (Explore + Bash) -------- + +main_agent = Agent( + llm=llm, + tools=[Tool(name=DelegateTool.name)], +) +conversation = Conversation( + agent=main_agent, + workspace=cwd, + visualizer=DelegationVisualizer(name="Delegator (builtins)"), +) + +builtin_task_message = ( + "Demonstrate SDK built-in sub-agent types. " + "1) Spawn an 'explore' sub-agent and ask it to list the markdown files in " + "openhands-sdk/openhands/sdk/subagent/builtins/ and summarize what each " + "built-in agent type is for (based on the file contents). " + "2) Spawn a 'bash' sub-agent and ask it to run `python --version` in the " + "terminal and return the exact output. " + "3) Merge both results into a short report. " + "Do not use internet access." +) + +print("=" * 100) +print("Demonstrating built-in agent delegation (explore + bash)...") +print("=" * 100) + +conversation.send_message(builtin_task_message) +conversation.run() + +# Report cost for builtin agent types example +cost_builtin = conversation.conversation_stats.get_combined_metrics().accumulated_cost +print(f"EXAMPLE_COST (builtin agents): {cost_builtin}") + +print("Built-in agent delegation example done!", "\n" * 20) + + +# -------- Agent Delegation Third Part: User-Defined Agent Types -------- + + def create_lodging_planner(llm: LLM) -> Agent: """Create a lodging planner focused on London stays.""" skills = [ @@ -349,13 +381,15 @@ conversation.send_message( conversation.run() # Report cost for user-defined agent types example -cost_2 = conversation.conversation_stats.get_combined_metrics().accumulated_cost -print(f"EXAMPLE_COST (user-defined agents): {cost_2}") +cost_user_defined = ( + conversation.conversation_stats.get_combined_metrics().accumulated_cost +) +print(f"EXAMPLE_COST (user-defined agents): {cost_user_defined}") print("All done!") # Full example cost report for CI workflow -print(f"EXAMPLE_COST: {cost_1 + cost_2}") +print(f"EXAMPLE_COST: {cost_simple + cost_builtin + cost_user_defined}") ``` diff --git a/sdk/guides/agent-server/local-server.mdx b/sdk/guides/agent-server/local-server.mdx index 4ef87103..541c5038 100644 --- a/sdk/guides/agent-server/local-server.mdx +++ b/sdk/guides/agent-server/local-server.mdx @@ -111,6 +111,7 @@ This example shows how to programmatically start a local agent server and intera import os import subprocess import sys +import tempfile import threading import time @@ -268,7 +269,9 @@ with ManagedAPIServer(port=8001) as server: # Create RemoteConversation with callbacks # NOTE: Workspace is required for RemoteConversation - workspace = Workspace(host=server.base_url) + # Use a temp directory that exists and is accessible in CI environments + temp_workspace_dir = tempfile.mkdtemp(prefix="agent_server_demo_") + workspace = Workspace(host=server.base_url, working_dir=temp_workspace_dir) result = workspace.execute_command("pwd") logger.info( f"Command '{result.command}' completed with exit code {result.exit_code}" diff --git a/sdk/guides/browser-session-recording.mdx b/sdk/guides/browser-session-recording.mdx index 7c2a9925..39a50f09 100644 --- a/sdk/guides/browser-session-recording.mdx +++ b/sdk/guides/browser-session-recording.mdx @@ -46,7 +46,7 @@ browsing session. The recording will be automatically saved to the persistence directory when browser_stop_recording is called. You can replay it with: - rrweb-player: https://github.com/rrweb-io/rrweb/tree/master/packages/rrweb-player - - Online viewer: https://www.rrweb.io/ + - Online viewer: https://www.rrweb.io/demo/ """ import json diff --git a/sdk/guides/github-workflows/pr-review.mdx b/sdk/guides/github-workflows/pr-review.mdx index f6816a23..00f43d0f 100644 --- a/sdk/guides/github-workflows/pr-review.mdx +++ b/sdk/guides/github-workflows/pr-review.mdx @@ -162,7 +162,8 @@ jobs: - name: Run PR Review uses: ./.github/actions/pr-review with: - # LLM configuration + # LLM model(s) to use. Can be comma-separated for A/B testing + # - one model will be randomly selected per review llm-model: anthropic/claude-sonnet-4-5-20250929 llm-base-url: '' # Review style: roasted (other option: standard) From f578e1bc5e613069003c511ac383e9572a0c5471 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 3 Mar 2026 11:47:52 +0000 Subject: [PATCH 3/3] Address PR review comments: improve backtick escaping robustness - Add comprehensive docstring explaining zero-width space tradeoff - Handle empty/falsy content in escape_embedded_backticks - Fix regex pattern to handle code blocks without trailing newlines - Simplify comparison logic with clearer variable names - Add unit tests for escape_embedded_backticks and related functions Co-authored-by: openhands --- .github/scripts/sync_code_blocks.py | 40 +++++-- tests/test_sync_code_blocks.py | 173 ++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+), 12 deletions(-) create mode 100644 tests/test_sync_code_blocks.py diff --git a/.github/scripts/sync_code_blocks.py b/.github/scripts/sync_code_blocks.py index a438a462..e57f6526 100755 --- a/.github/scripts/sync_code_blocks.py +++ b/.github/scripts/sync_code_blocks.py @@ -50,21 +50,26 @@ def extract_code_blocks(content: str) -> list[tuple[str, str, str, int, int]]: matches: list[tuple[str, str, str, int, int]] = [] # Pattern for Python files - # The closing ``` must be at the start of a line (after newline) - # This prevents matching embedded ``` inside the code content - python_pattern = r'```python[^\n]*\s+([^\s]+\.py)\n(.*?)\n```(?=\n|$)' + # The closing ``` must be at the start of a line (after newline) OR at the very end + # The \n? before ``` makes the trailing newline optional to handle edge cases + # where content doesn't have a trailing newline + python_pattern = r'```python[^\n]*\s+([^\s]+\.py)\n(.*?)\n?```(?=\n|$)' for match in re.finditer(python_pattern, content, re.DOTALL): file_ref = match.group(1) code_content = match.group(2) + # Strip trailing newline from code content if present (will be re-added during update) + code_content = code_content.rstrip('\n') start_pos = match.start() end_pos = match.end() matches.append(('python', file_ref, code_content, start_pos, end_pos)) # Pattern for YAML files - yaml_pattern = r'```yaml[^\n]*\s+([^\s]+\.ya?ml)\n(.*?)\n```(?=\n|$)' + yaml_pattern = r'```yaml[^\n]*\s+([^\s]+\.ya?ml)\n(.*?)\n?```(?=\n|$)' for match in re.finditer(yaml_pattern, content, re.DOTALL): file_ref = match.group(1) code_content = match.group(2) + # Strip trailing newline from code content if present (will be re-added during update) + code_content = code_content.rstrip('\n') start_pos = match.start() end_pos = match.end() matches.append(('yaml', file_ref, code_content, start_pos, end_pos)) @@ -108,7 +113,20 @@ def escape_embedded_backticks(content: str) -> str: Strategy: Replace ``` with a zero-width space between backticks: `​`​` This preserves the visual appearance while preventing markdown parsing issues. + + Tradeoff note: Zero-width spaces (U+200B) are invisible and will be copied when + users copy-paste code from the docs. This could cause subtle issues if users paste + code containing these characters. However, this is acceptable because: + 1. The affected code is primarily display content (example outputs), not executable + 2. Alternative approaches (like changing source files) aren't feasible since we + sync from an external repository (agent-sdk) + 3. Most modern editors will highlight invisible Unicode characters + + The function is idempotent - applying it multiple times produces the same result + since we only replace actual triple backticks, not already-escaped sequences. """ + if not content: + return content # Use a zero-width space (U+200B) between backticks # This makes ``` render correctly in the code block without closing it return content.replace("```", "`\u200b`\u200b`") @@ -181,14 +199,12 @@ def update_doc_file( if actual_content is None: continue - # When comparing, we need to account for backtick escaping - # The doc may already have escaped backticks, so we compare both versions - old_normalized = normalize_content(old_code) - actual_normalized = normalize_content(actual_content) - actual_escaped_normalized = normalize_content(escape_embedded_backticks(actual_content)) - - # Check if content differs (considering both escaped and unescaped versions) - if old_normalized != actual_normalized and old_normalized != actual_escaped_normalized: + # Compare normalized versions: old doc content vs escaped actual content + # We always compare against escaped version since that's what will be written + old_display = normalize_content(old_code) + new_display = normalize_content(escape_embedded_backticks(actual_content)) + + if old_display != new_display: print(f"\nšŸ“ Found difference in {doc_path.name} for {file_ref}") print(" Updating code block...") diff --git a/tests/test_sync_code_blocks.py b/tests/test_sync_code_blocks.py new file mode 100644 index 00000000..057a8c11 --- /dev/null +++ b/tests/test_sync_code_blocks.py @@ -0,0 +1,173 @@ +""" +Tests for the sync_code_blocks.py script functionality. + +These tests ensure the backtick escaping logic works correctly to prevent +markdown rendering issues in documentation. +""" + +import sys +from pathlib import Path + +# Add the script directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / ".github" / "scripts")) + +from sync_code_blocks import escape_embedded_backticks, extract_code_blocks, normalize_content + + +class TestEscapeEmbeddedBackticks: + """Tests for the escape_embedded_backticks function.""" + + def test_basic_triple_backticks(self): + """Basic case: single triple backtick sequence gets escaped.""" + content = "Some code with ``` backticks" + result = escape_embedded_backticks(content) + # Should have zero-width spaces between backticks + assert "```" not in result + assert "`\u200b`\u200b`" in result + + def test_multiple_occurrences(self): + """Multiple triple backtick sequences all get escaped.""" + content = "```python\ncode\n```\nMore ```text```" + result = escape_embedded_backticks(content) + # Original has 4 triple backticks: opening, closing, and two more in "```text```" + assert result.count("`\u200b`\u200b`") == 4 + assert "```" not in result + + def test_idempotency(self): + """Applying the function multiple times produces same result.""" + content = "Hello ``` world ``` test" + once = escape_embedded_backticks(content) + twice = escape_embedded_backticks(once) + # Already escaped content should not be re-escaped + # Since we replace "```" and the escaped version is "`​`​`" (with ZWS), + # applying again should not find any more "```" to replace + assert once == twice + + def test_four_backticks(self): + """Four backticks should have three escaped and one regular.""" + content = "````" # 4 backticks + result = escape_embedded_backticks(content) + # "````" -> "`​`​``" (first 3 escaped, 4th remains) + assert "`\u200b`\u200b``" in result + + def test_five_backticks(self): + """Five backticks: only one group of 3 is replaced, leaving 2.""" + content = "`````" # 5 backticks + result = escape_embedded_backticks(content) + # str.replace() replaces non-overlapping occurrences from left to right + # "`````" -> "`​`​`" + "``" (first 3 replaced, last 2 remain as regular backticks) + assert result == "`\u200b`\u200b```" + # One escaped group + 2 regular backticks remaining + assert result.count("`\u200b`\u200b`") == 1 + + def test_six_backticks(self): + """Six backticks (two groups of three) both get escaped.""" + content = "``````" # 6 backticks + result = escape_embedded_backticks(content) + # Should become two escaped groups + assert "```" not in result + assert result.count("`\u200b`\u200b`") == 2 + + def test_empty_string(self): + """Empty string returns empty string.""" + result = escape_embedded_backticks("") + assert result == "" + + def test_none_like_empty(self): + """Empty/falsy content returns as-is.""" + result = escape_embedded_backticks("") + assert result == "" + + def test_no_backticks(self): + """Content without triple backticks is unchanged.""" + content = "Regular code without triple backticks: ` `` `" + result = escape_embedded_backticks(content) + assert result == content + + def test_mixed_content(self): + """Real-world example with markdown in Python string.""" + content = '''def example(): + """Example with markdown. + + ```python + print("hello") + ``` + """ + pass''' + result = escape_embedded_backticks(content) + assert "```python" not in result + assert "`\u200b`\u200b`python" in result + assert "```\n \"\"\"" not in result + + def test_preserves_other_content(self): + """Escaping preserves all other content exactly.""" + content = "Hello ``` world" + result = escape_embedded_backticks(content) + assert result == "Hello `\u200b`\u200b` world" + + +class TestExtractCodeBlocks: + """Tests for the extract_code_blocks function.""" + + def test_basic_python_block(self): + """Extract a basic Python code block.""" + content = '''```python icon="python" expandable examples/test.py +print("hello") +``` +''' + blocks = extract_code_blocks(content) + assert len(blocks) == 1 + assert blocks[0][0] == 'python' # language + assert blocks[0][1] == 'examples/test.py' # file_ref + assert 'print("hello")' in blocks[0][2] # code_content + + def test_yaml_block(self): + """Extract a YAML code block.""" + content = '''```yaml icon="yaml" examples/config.yml +key: value +``` +''' + blocks = extract_code_blocks(content) + assert len(blocks) == 1 + assert blocks[0][0] == 'yaml' + assert blocks[0][1] == 'examples/config.yml' + + def test_block_without_trailing_newline(self): + """Handle code blocks without trailing newline before closing backticks.""" + content = '''```python icon="python" examples/test.py +code_without_trailing_newline``` +''' + blocks = extract_code_blocks(content) + assert len(blocks) == 1 + assert 'code_without_trailing_newline' in blocks[0][2] + + def test_multiple_blocks(self): + """Extract multiple code blocks.""" + content = '''```python examples/a.py +code a +``` + +```yaml examples/b.yaml +key: b +``` +''' + blocks = extract_code_blocks(content) + assert len(blocks) == 2 + + +class TestNormalizeContent: + """Tests for the normalize_content function.""" + + def test_removes_trailing_whitespace(self): + """Trailing whitespace on lines is removed.""" + content = "line1 \nline2\t\n" + result = normalize_content(content) + assert result == "line1\nline2" + + def test_normalizes_line_endings(self): + """Different line endings are normalized.""" + content = "line1\r\nline2\rline3" + result = normalize_content(content) + # splitlines() handles all line ending types + lines = result.split('\n') + assert len(lines) == 3