diff --git a/.gitignore b/.gitignore
index 7b64dc3e..5ae49a4a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -172,3 +172,4 @@ swarmexp
 swarmlog
 werewolves_swarm
 .claude
+jobs
diff --git a/ajet/task_reader/tracing_reader/filters/llm_evaluate_filter.py b/ajet/task_reader/tracing_reader/filters/llm_evaluate_filter.py
index b49c480a..0d2c6846 100644
--- a/ajet/task_reader/tracing_reader/filters/llm_evaluate_filter.py
+++ b/ajet/task_reader/tracing_reader/filters/llm_evaluate_filter.py
@@ -1,10 +1,6 @@
 import os
+from loguru import logger
 from typing import Iterable, List
-
-from agentscope.agent import ReActAgent
-from agentscope.formatter import DashScopeMultiAgentFormatter
-from agentscope.message import Msg
-from agentscope.model import DashScopeChatModel
 from pydantic import BaseModel, Field
 
 from ajet.schema.task import Task
@@ -61,6 +57,9 @@ def __init__(
         print_reason: bool = True,
     ) -> None:
         """Filter that evaluates the quality of tasks using LLM."""
+        from agentscope.agent import ReActAgent
+        from agentscope.formatter import DashScopeMultiAgentFormatter
+        from agentscope.model import DashScopeChatModel
 
         self._print_reason = print_reason
         self.external_llm_fn = create_external_llm_fn(
@@ -78,6 +77,8 @@ def __init__(
         )
 
     async def filter(self, tasks: Iterable[Task]) -> List[Task]:
+        from agentscope.message import Msg
+
         kept: List[Task] = []
         for task in tasks:
             payload = "query: " + task.main_query + "\n" "answer: " + task.metadata.get(
diff --git a/tutorial/opencode_build_skillbench_agent.prompt.md b/tutorial/opencode_build_skillbench_agent.prompt.md
index e69de29b..f21aed29 100644
--- a/tutorial/opencode_build_skillbench_agent.prompt.md
+++ b/tutorial/opencode_build_skillbench_agent.prompt.md
@@ -0,0 +1,32 @@
+
+
+
+# Train SkillBench with AgentJet Swarm with Vibe Coding
+
+result is generated by `claude sonnet 4.5`
+
+=============================
+
+你的任务是训练这个仓库中的智能体：https://github.com/benchflow-ai/skillsbench.git
+仓库你需要下载到 ./tmp/skillsbench_swarm_test
+这是在调试过程中你可以使用的模型（openrouter）
+  "url": "https://openrouter-openrouter-esyubhyrxv.ap-northeast-1.fcapp.run/api/v1",
+  "key": "sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+  "model": "qwen/qwen3-max"
+
+待训练模型是在GPU服务器上的路径是：
+  /mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2.5-7B-Instruct
+另外SkillBench 可以选择使用多种智能体，你需要选择 opencode
+
+
+
+
+你的skill（首先读取该SKILL文件，获取必要知识）：
+- ajet/copilot/train-complex-blackbox/SKILL.md
+
+
+You must test tutorial/opencode_build_skillsbench/run_episode.py
+这是在调试过程中你可以使用的模型（openrouter）
+  "url": "https://openrouter-openrouter-esyubhyrxv.ap-northeast-1.fcapp.run/api/v1",
+  "key": "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"
+  "model": "qwen/qwen3-max"
\ No newline at end of file
diff --git a/tutorial/opencode_build_skillsbench/agent_roll.py b/tutorial/opencode_build_skillsbench/agent_roll.py
new file mode 100644
index 00000000..28925bd8
--- /dev/null
+++ b/tutorial/opencode_build_skillsbench/agent_roll.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+AgentJet training script for SkillsBench with OpenCode agent.
+"""
+
+import os
+import sys
+from pathlib import Path
+from ajet.schema.task import Task, WorkflowOutput
+from ajet.copilot.job import AgentJetJob
+from ajet.task_reader import RouterTaskReader
+from ajet.utils.thread_executors import PeriodicDrainThreadPoolExecutor
+from ajet.tuner_lib.as_oai_baseurl_apikey import OpenaiBaseUrlAndApiKey
+from ajet.default_config.ajet_default import AjetTaskReader, JsonlDatasetFile, JsonlTrainingFp
+from ajet.tuner_lib.experimental.as_swarm_client import SwarmClient
+from tutorial.opencode_build_skillsbench.get_training_dataset_item_list import get_training_dataset_item_list
+from tutorial.opencode_build_skillsbench.run_episode import run_episode
+# tutorial/opencode_build_skillsbench
+
+
+# Training configuration
+NUM_EPOCH = 10000
+AJET_SWARM_URL = os.getenv("AJET_SWARM_URL", "http://localhost:10086")
+REMOTE_MODEL_PATH = os.getenv("REMOTE_MODEL_PATH", "/mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2__5-14B-Instruct")
+
+
+class SkillsBenchTaskReader:
+    """Custom task reader for SkillsBench dataset."""
+
+    def __init__(self):
+        self.tasks = get_training_dataset_item_list()
+        print(f"Loaded {len(self.tasks)} SkillsBench tasks")
+
+    def generate_training_tasks(self):
+        """Generate training tasks in AgentJet format."""
+        for task_data in self.tasks:
+            # Create a Task object for each SkillsBench task
+            task = Task(
+                task_id=task_data["task_id"],
+                metadata={
+                    "task_id": task_data["task_id"],
+                    "task_path": task_data["task_path"],
+                }
+            )
+            yield task
+
+
+def execute_agent(task: Task, api_baseurl_key: OpenaiBaseUrlAndApiKey) -> WorkflowOutput:
+    """
+    Execute the OpenCode agent on a SkillsBench task.
+
+    Args:
+        task: AgentJet Task object containing task_id and task_path
+        api_baseurl_key: API credentials from swarm server
+
+    Returns:
+        WorkflowOutput with reward and metadata
+    """
+
+    task_id = task.metadata["task_id"]
+    task_path = task.metadata["task_path"]
+
+    # Use run_episode from run_episode.py
+    # The model parameter is ignored in run_episode (hardcoded model is used)
+    model = "placeholder-model"
+
+    try:
+        # Call the imported run_episode function
+        reward, metadata = run_episode(
+            task_id=task_id,
+            task_path=task_path,
+            api_key=api_baseurl_key.api_key,
+            base_url=api_baseurl_key.base_url,
+            model=model,
+        )
+
+        return WorkflowOutput(reward=float(reward), metadata=metadata)
+
+    except Exception as e:
+        print(f"ERROR: Exception during task execution: {e}\n")
+        metadata = {
+            "task_id": task_id,
+            "task_path": task_path,
+            "success": False,
+            "error": str(e),
+        }
+        return WorkflowOutput(reward=0.0, metadata=metadata)
+
+
+def main():
+
+    # Create custom task reader
+    dataset = SkillsBenchTaskReader()
+
+    ajet_job = AgentJetJob(
+        base_yaml_config="tutorial/opencode_build_skillsbench/skillbench.yaml",
+        algorithm="grpo",
+        experiment_name="skillbench_swarm",
+        max_env_worker=128,
+    )
+
+    # Hand shake with remote swarm server
+    swarm_worker = SwarmClient(AJET_SWARM_URL)
+    swarm_worker.auto_sync_train_config_and_start_engine(
+        ajet_job,
+        # force_restart=True,
+    )
+
+    GRPO_N = ajet_job.num_repeat
+    REMOTE_BATCH_SIZE = ajet_job.batch_size
+
+    def rollout(task):
+        # begin episode
+        episode_uuid, api_baseurl_key = swarm_worker.begin_episode(discard_episode_timeout=240)
+        # execute agent ( base_url = api_baseurl_key.base_url, api_key = api_baseurl_key.api_key )
+        workflow_output = execute_agent(task, api_baseurl_key)  # reward is in `workflow_output`
+        # report output back to swarm remote
+        swarm_worker.end_episode(task, episode_uuid, workflow_output)
+        return
+
+
+    executor = PeriodicDrainThreadPoolExecutor(workers=GRPO_N*REMOTE_BATCH_SIZE, max_parallel=4, auto_retry=True, block_first_run=False)
+    for _ in range(NUM_EPOCH):
+        for _, task in enumerate(dataset.generate_training_tasks()):
+            for _ in range(GRPO_N):
+                executor.submit_with_periodic_drain(fn=rollout, task=task)
+
+    return
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tutorial/opencode_build_skillsbench/get_training_dataset_item_list.py b/tutorial/opencode_build_skillsbench/get_training_dataset_item_list.py
new file mode 100644
index 00000000..9f5dc9b4
--- /dev/null
+++ b/tutorial/opencode_build_skillsbench/get_training_dataset_item_list.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Dataset collector for SkillsBench tasks.
+Returns a list of training task identifiers.
+"""
+
+import os
+from pathlib import Path
+from typing import List, Dict
+
+
+def get_training_dataset_item_list() -> List[Dict[str, str]]:
+    """
+    Get list of SkillsBench tasks for training.
+    
+    Returns:
+        List of dicts, each containing task metadata:
+        - task_id: unique identifier for the task
+        - task_path: full path to the task directory
+    """
+    # Path to skillsbench repository
+    skillsbench_root = Path("/root/AgentJet/tmp/skillsbench_swarm_test")
+    tasks_dir = skillsbench_root / "tasks"
+    
+    if not tasks_dir.exists():
+        raise FileNotFoundError(f"Tasks directory not found: {tasks_dir}")
+    
+    task_list = []
+    
+    # Iterate through all task directories
+    for task_path in sorted(tasks_dir.iterdir()):
+        if not task_path.is_dir():
+            continue
+            
+        task_id = task_path.name
+        
+        # Verify this is a valid task (has required files)
+        instruction_file = task_path / "instruction.md"
+        task_toml = task_path / "task.toml"
+        tests_dir = task_path / "tests"
+        
+        if not (instruction_file.exists() and task_toml.exists() and tests_dir.exists()):
+            print(f"Warning: Skipping invalid task: {task_id}")
+            continue
+        
+        task_list.append({
+            "task_id": task_id,
+            "task_path": str(task_path),
+        })
+    
+    print(f"Found {len(task_list)} valid tasks for training")
+    return task_list
+
+
+if __name__ == "__main__":
+    # Test the function
+    tasks = get_training_dataset_item_list()
+    print(f"\nTotal tasks: {len(tasks)}")
+    print("\nFirst 5 tasks:")
+    for i, task in enumerate(tasks[:5]):
+        print(f"{i+1}. {task['task_id']}")
diff --git a/tutorial/opencode_build_skillsbench/run_episode.py b/tutorial/opencode_build_skillsbench/run_episode.py
new file mode 100644
index 00000000..86124e2a
--- /dev/null
+++ b/tutorial/opencode_build_skillsbench/run_episode.py
@@ -0,0 +1,379 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Episode runner for SkillsBench tasks with OpenCode agent.
+Runs a single training episode and computes reward.
+"""
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import tempfile
+import uuid
+from pathlib import Path
+from typing import Tuple
+
+
+# Special version of docker-compose-base.yaml that should be used
+SPECIAL_DOCKER_COMPOSE_CONTENT = """services:
+  main:
+    environment:
+      - OPENCODE_CONFIG_CONTENT=${OPENCODE_CONFIG_CONTENT}
+    volumes:
+      - ${HOST_VERIFIER_LOGS_PATH}:${ENV_VERIFIER_LOGS_PATH}
+      - ${HOST_AGENT_LOGS_PATH}:${ENV_AGENT_LOGS_PATH}
+    deploy:
+      resources:
+        limits:
+          cpus: ${CPUS}
+          memory: ${MEMORY}
+    network_mode: host
+"""
+
+
+def check_and_fix_docker_compose() -> bool:
+    """
+    Check if harbor's docker-compose-base.yaml is the special version.
+    If not, update it to the special version.
+
+    Returns:
+        bool: True if file was modified, False if already correct
+    """
+    try:
+        # Find harbor installation path
+        result = subprocess.run(
+            ["/root/.local/share/uv/tools/harbor/bin/python", "-c",
+             "import harbor; import os; print(os.path.dirname(harbor.__file__))"],
+            capture_output=True,
+            text=True,
+            timeout=10
+        )
+
+        if result.returncode != 0:
+            print("Warning: Could not locate harbor installation, skipping docker-compose check")
+            return False
+
+        harbor_path = result.stdout.strip()
+        docker_compose_path = Path(harbor_path) / "environments" / "docker" / "docker-compose-base.yaml"
+
+        if not docker_compose_path.exists():
+            print(f"Warning: docker-compose-base.yaml not found at {docker_compose_path}")
+            return False
+
+        # Read current content
+        current_content = docker_compose_path.read_text()
+
+        # Compare with special version (strip whitespace for comparison)
+        if current_content.strip() == SPECIAL_DOCKER_COMPOSE_CONTENT.strip():
+            print(f"✓ docker-compose-base.yaml is already the special version")
+            return False
+
+        # Update to special version
+        print(f"! docker-compose-base.yaml is NOT the special version")
+        print(f"  Updating {docker_compose_path} to special version...")
+        docker_compose_path.write_text(SPECIAL_DOCKER_COMPOSE_CONTENT)
+        print(f"✓ Updated docker-compose-base.yaml to special version")
+        return True
+
+    except Exception as e:
+        print(f"Warning: Error checking docker-compose-base.yaml: {e}")
+        return False
+
+
+def run_episode(task_id: str, task_path: str, api_key: str, base_url: str, model: str = "qwen/qwen3-max") -> Tuple[float, dict]:
+    """
+    Run a single episode for a SkillsBench task using OpenCode agent.
+
+    Args:
+        task_id: Unique identifier for the task
+        task_path: Full path to the task directory
+        api_key: API key for the model provider
+        base_url: Base URL for the model provider
+        model: Model identifier (ignored, hardcoded to use huggingface/Qwen/Qwen3-235B-A22B-Instruct-2507)
+
+    Returns:
+        Tuple of (reward, metadata):
+        - reward: float between 0 and 1 (1 = pass, 0 = fail)
+        - metadata: dict with execution details
+    """
+    # Hardcoded model - do not change
+    actual_model_with_provider = "huggingface/Qwen25-14B"
+    actual_model_name = "Qwen25-14B"
+
+    # Generate a unique job name to avoid concurrent conflicts
+    # Format: {task_id}_{uuid} to make it both human-readable and unique
+    job_name = f"{task_id}_{uuid.uuid4().hex[:12]}"
+
+    print(f"\n{'='*60}")
+    print(f"Running episode for task: {task_id}")
+    print(f"Job name: {job_name}")
+    print(f"Task path: {task_path}")
+    print(f"Model: {actual_model_with_provider}")
+    print(f"Base URL: {base_url}")
+    print(f"API Key: {api_key[:10]}..." if len(api_key) > 10 else f"API Key: {api_key}")
+    print(f"{'='*60}\n")
+
+    # Check and fix docker-compose-base.yaml before running
+    print("Checking harbor docker-compose-base.yaml configuration...")
+    check_and_fix_docker_compose()
+    print()
+
+    # Set up environment variables for the agent
+    env = os.environ.copy()
+
+    # Create OPENCODE_CONFIG_CONTENT with the provided api_key and base_url
+    opencode_config = {
+        "$schema": "https://opencode.ai/config.json",
+        "model": actual_model_with_provider,
+        "provider": {
+            "huggingface": {
+                "npm": "@ai-sdk/openai-compatible",
+                "name": "vLLM Provider",
+                "options": {
+                    "baseURL": base_url,
+                    "apiKey": api_key
+                },
+                "models": {
+                    actual_model_name: {
+                        "name": "vLLM Model",
+                        "limit": {
+                            "context": 20*1000,
+                            "output": 10*1000
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    print(f"export OPENCODE_CONFIG_CONTENT='{json.dumps(opencode_config)}'")
+    import time
+    time.sleep(10000)
+
+    env["OPENCODE_CONFIG_CONTENT"] = json.dumps(opencode_config)
+    print(f"Set OPENCODE_CONFIG_CONTENT environment variable")
+    print(f"Config: {json.dumps(opencode_config, indent=2)}\n")
+
+    # Construct harbor run command
+    # harbor run -p <task_path> -a opencode -m <model> --job-name <unique_job_name>
+    cmd = [
+        "harbor", "run",
+        "-p", task_path,
+        "-a", "opencode",
+        "-m", actual_model_with_provider,
+        "--job-name", job_name,
+    ]
+
+    print(f"Command: {' '.join(cmd)}")
+    print(f"\nStarting execution...\n")
+
+    metadata = {
+        "task_id": task_id,
+        "task_path": task_path,
+        "job_name": job_name,
+        "model": actual_model_with_provider,
+        "base_url": base_url,
+        "success": False,
+        "reward": 0.0,
+        "error": None,
+    }
+
+    try:
+        # Run the command and capture output
+        result = subprocess.run(
+            cmd,
+            env=env,
+            capture_output=True,
+            text=True,
+            timeout=3600,  # 1 hour timeout
+        )
+
+        print(f"STDOUT:\n{result.stdout}")
+        print(f"\nSTDERR:\n{result.stderr}")
+        print(f"\nReturn code: {result.returncode}")
+
+        # Parse the results
+        # Harbor writes results to jobs/<job_name>/<task_id>/verifier/reward.txt
+        # We use the job_name to locate the specific job directory
+        reward = parse_harbor_results(result.stdout, result.stderr, result.returncode, job_name)
+
+        metadata["success"] = (reward > 0)
+        metadata["reward"] = reward
+        metadata["stdout"] = result.stdout
+        metadata["stderr"] = result.stderr
+        metadata["returncode"] = result.returncode
+
+        print(f"\n{'='*60}")
+        print(f"Episode completed")
+        print(f"Reward: {reward}")
+        print(f"Success: {metadata['success']}")
+        print(f"{'='*60}\n")
+
+        return reward, metadata
+
+    except subprocess.TimeoutExpired as e:
+        print(f"ERROR: Episode timed out after 3600 seconds")
+        metadata["error"] = "timeout"
+        metadata["timeout"] = True
+        return 0.0, metadata
+
+    except Exception as e:
+        print(f"ERROR: Exception during episode execution: {e}")
+        metadata["error"] = str(e)
+        return 0.0, metadata
+
+
+def parse_harbor_results(stdout: str, stderr: str, returncode: int, job_name: str) -> float:
+    """
+    Parse Harbor execution results to extract reward.
+
+    Harbor typically outputs the results location and we can read reward.txt
+    from the jobs directory.
+
+    Args:
+        stdout: Standard output from harbor run
+        stderr: Standard error from harbor run
+        returncode: Return code from harbor run
+        job_name: The unique job name used for this run
+
+    Returns:
+        float: reward value (0.0 or 1.0)
+    """
+    # First priority: Use the job_name to directly locate the result directory
+    # Harbor creates directories as: jobs/<job_name>/<task_name>/verifier/reward.txt
+    jobs_dir = Path("jobs")
+    print(f"Searching for results in job directory: {job_name}")
+
+    if not jobs_dir.exists():
+        print(f"WARNING: Jobs directory does not exist: {jobs_dir}")
+    else:
+        job_dir = jobs_dir / job_name
+        print(f"Looking for job directory: {job_dir}")
+
+        if not job_dir.exists():
+            print(f"WARNING: Job directory does not exist: {job_dir}")
+            print(f"Available job directories:")
+            for d in sorted(jobs_dir.iterdir())[-5:]:  # Show last 5
+                print(f"  - {d.name}")
+        else:
+            print(f"Found job directory: {job_dir}")
+            # Look for verifier/reward.txt in this specific job directory
+            reward_files = list(job_dir.rglob("reward.txt"))
+            print(f"Found {len(reward_files)} reward.txt file(s) in job directory")
+
+            for reward_file in reward_files:
+                print(f"Checking reward file: {reward_file}")
+                if "verifier" in str(reward_file):
+                    try:
+                        reward_value = float(reward_file.read_text().strip())
+                        print(f"✓ Successfully read reward from {reward_file}: {reward_value}")
+                        return reward_value
+                    except Exception as e:
+                        print(f"ERROR: Failed to read reward file {reward_file}: {e}")
+                else:
+                    print(f"Skipping non-verifier reward file: {reward_file}")
+
+    # Second priority: Look for results directory in stdout
+    # Harbor typically prints something like "Results saved to: jobs/..."
+    for line in stdout.split('\n'):
+        if 'Results saved to:' in line or 'results' in line.lower():
+            # Try to extract path
+            parts = line.split()
+            for part in parts:
+                if 'jobs/' in part:
+                    results_path = Path(part.strip())
+                    reward_file = results_path / "verifier" / "reward.txt"
+                    if reward_file.exists():
+                        try:
+                            reward_value = float(reward_file.read_text().strip())
+                            print(f"Found reward in {reward_file}: {reward_value}")
+                            return reward_value
+                        except Exception as e:
+                            print(f"Error reading reward file: {e}")
+
+    # Fallback: Look for any jobs directory and find the most recent one
+    # WARNING: This is unreliable in concurrent scenarios and should only be used as last resort
+    print("WARNING: Falling back to searching by modification time (unreliable in concurrent scenarios)")
+    if jobs_dir.exists():
+        # Get all subdirectories sorted by modification time (most recent first)
+        job_dirs = sorted(
+            [d for d in jobs_dir.iterdir() if d.is_dir()],
+            key=lambda x: x.stat().st_mtime,
+            reverse=True
+        )
+
+        # Only check the most recent directory to avoid picking up concurrent runs
+        for job_dir in job_dirs[:1]:  # Only check the most recent one
+            print(f"Checking most recent job directory: {job_dir.name}")
+            # Look for verifier/reward.txt in any subdirectory
+            for reward_file in job_dir.rglob("reward.txt"):
+                try:
+                    reward_value = float(reward_file.read_text().strip())
+                    print(f"Found reward in {reward_file}: {reward_value}")
+                    return reward_value
+                except Exception as e:
+                    print(f"Error reading reward file {reward_file}: {e}")
+
+    # If we can't find the reward file, check return code
+    # Harbor typically returns 0 on success
+    if returncode == 0:
+        # Check if there are any test failures mentioned in output
+        if "FAILED" in stdout or "FAILED" in stderr:
+            print("Tests FAILED according to output")
+            return 0.0
+        elif "PASSED" in stdout or "passed" in stdout.lower():
+            print("Tests PASSED according to output")
+            return 1.0
+
+    # Default to 0 if we can't determine success
+    print("Could not determine reward, defaulting to 0.0")
+    return 0.0
+
+
+# Example usage:
+# python3 tutorial/opencode_build_skillsbench/run_episode.py \
+#   --task-id adaptive-cruise-control \
+#   --task-path /root/AgentJet/tmp/skillsbench_swarm_test/tasks/adaptive-cruise-control \
+#   --api-key "sk-123467" \
+#   --base-url "http://127.0.0.1:2888/v1" \
+#   --model "huggingface/Qwen/Qwen3-235B-A22B-Instruct-2507" \
+#   --output results.json
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run a single SkillsBench training episode")
+    parser.add_argument("--task-id", required=True, help="Task identifier")
+    parser.add_argument("--task-path", required=True, help="Path to task directory")
+    parser.add_argument("--api-key", required=True, help="API key for the model provider")
+    parser.add_argument("--base-url", required=True, help="Base URL for the model provider")
+    parser.add_argument("--model", default="qwen/qwen3-max", help="Model identifier (ignored, hardcoded model will be used)")
+    parser.add_argument("--output", help="Output file for results (JSON)")
+
+    args = parser.parse_args()
+
+    reward, metadata = run_episode(
+        task_id=args.task_id,
+        task_path=args.task_path,
+        api_key=args.api_key,
+        base_url=args.base_url,
+        model=args.model,
+    )
+
+    # Save results if output file specified
+    if args.output:
+        output_data = {
+            "reward": reward,
+            "metadata": metadata,
+        }
+        with open(args.output, 'w') as f:
+            json.dump(output_data, f, indent=2)
+        print(f"\nResults saved to: {args.output}")
+
+    # Exit with code based on success
+    sys.exit(0 if reward > 0 else 1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tutorial/opencode_build_skillsbench/skillbench.yaml b/tutorial/opencode_build_skillsbench/skillbench.yaml
new file mode 100644
index 00000000..e096e8f6
--- /dev/null
+++ b/tutorial/opencode_build_skillsbench/skillbench.yaml
@@ -0,0 +1,74 @@
+# ------------------ main config ------------------
+ajet:
+  project_name: example_werewolves_swarm
+  experiment_dir: "auto"  # {exp-dir}/{experiment_name}
+
+  model:
+    # ✨ select model to be trained
+    path: /mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2___5-7B-Instruct
+
+
+  rollout:
+    user_workflow: null
+    temperature: 0.7
+    max_env_worker: 64
+    num_repeat: 6
+    agent_madness_reward: 0.0
+    tensor_model_parallel_size: 1
+    # max_num_seqs: 40
+    # monitor LLM's abormal behaviors during rollout
+    compute_madness_checklist:
+      - "nonsense"
+    max_response_length_in_one_turn: 1024
+    max_model_len: 22000
+
+  task_reader:
+    type: random_dummy # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo` or `data_generation` or `random_dummy`
+
+  task_judge:
+    # ✨ select evaluation function
+    judge_protocol: null
+
+  # the experimental ZeroMQ interchange server feature that allows `tuner.as_oai_baseurl_apikey` feature
+  enable_interchange_server: True
+  # train in cloud, run episode locally
+  enable_swarm_mode: True
+  # both swarm / oai share the same interchange server
+  interchange_server:
+    interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or  'ipc' (1 node)
+    interchange_server_port: 10086
+    num_fastapi_process: 2  # 1, 2 or 4 is fine
+    max_fastapi_threads: 512  # 64 or 128 is fine
+    max_inference_tracker_threads: 64 # recommend to be equal to `ajet.rollout.max_env_worker`
+    already_started: False # do not edit, used by `swarm`
+
+  swarm_mode_sample_collection_method: "rollout_until_finish_enough_tasks"
+
+  debug:
+    debug_max_parallel: 1
+    debug_first_n_tasks: 1
+
+  data:
+    train_batch_size: 32
+    max_prompt_length: 4000
+    max_response_length: 18000
+
+  trainer_common:
+    save_freq: 5
+    test_freq: 9999999
+    total_epochs: 9999999
+    total_training_steps: 25
+    nnodes: 1
+    n_gpus_per_node: 8
+
+# ------------------ do not edit ------------------
+hydra:
+  searchpath:
+    - file://ajet/default_config
+    - file://ajet/default_config/verl
+
+# ------------------ do not edit ------------------
+defaults:
+  - verl_default
+  - ajet_default
+  - _self_