Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,4 @@ swarmexp
swarmlog
werewolves_swarm
.claude
jobs
11 changes: 6 additions & 5 deletions ajet/task_reader/tracing_reader/filters/llm_evaluate_filter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
import os
from loguru import logger
from typing import Iterable, List

from agentscope.agent import ReActAgent
from agentscope.formatter import DashScopeMultiAgentFormatter
from agentscope.message import Msg
from agentscope.model import DashScopeChatModel
from pydantic import BaseModel, Field

from ajet.schema.task import Task
Expand Down Expand Up @@ -61,6 +57,9 @@ def __init__(
print_reason: bool = True,
) -> None:
"""Filter that evaluates the quality of tasks using LLM."""
from agentscope.agent import ReActAgent
from agentscope.formatter import DashScopeMultiAgentFormatter
from agentscope.model import DashScopeChatModel

self._print_reason = print_reason
self.external_llm_fn = create_external_llm_fn(
Expand All @@ -78,6 +77,8 @@ def __init__(
)

async def filter(self, tasks: Iterable[Task]) -> List[Task]:
from agentscope.message import Msg

kept: List[Task] = []
for task in tasks:
payload = "query: " + task.main_query + "\n" "answer: " + task.metadata.get(
Expand Down
32 changes: 32 additions & 0 deletions tutorial/opencode_build_skillbench_agent.prompt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@



# Train SkillBench with AgentJet Swarm with Vibe Coding

result is generated by `claude sonnet 4.5`

=============================

你的任务是训练这个仓库中的智能体:https://github.com/benchflow-ai/skillsbench.git
仓库你需要下载到 ./tmp/skillsbench_swarm_test
这是在调试过程中你可以使用的模型(openrouter)
"url": "https://openrouter-openrouter-esyubhyrxv.ap-northeast-1.fcapp.run/api/v1",
"key": "sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"model": "qwen/qwen3-max"

待训练模型是在GPU服务器上的路径是:
/mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2.5-7B-Instruct
另外SkillBench 可以选择使用多种智能体,你需要选择 opencode




你的skill(首先读取该SKILL文件,获取必要知识):
- ajet/copilot/train-complex-blackbox/SKILL.md


You must test tutorial/opencode_build_skillsbench/run_episode.py
这是在调试过程中你可以使用的模型(openrouter)
"url": "https://openrouter-openrouter-esyubhyrxv.ap-northeast-1.fcapp.run/api/v1",
"key": "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"
"model": "qwen/qwen3-max"
134 changes: 134 additions & 0 deletions tutorial/opencode_build_skillsbench/agent_roll.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AgentJet training script for SkillsBench with OpenCode agent.
"""

import os
import sys
from pathlib import Path
from ajet.schema.task import Task, WorkflowOutput
from ajet.copilot.job import AgentJetJob
from ajet.task_reader import RouterTaskReader
from ajet.utils.thread_executors import PeriodicDrainThreadPoolExecutor
from ajet.tuner_lib.as_oai_baseurl_apikey import OpenaiBaseUrlAndApiKey
from ajet.default_config.ajet_default import AjetTaskReader, JsonlDatasetFile, JsonlTrainingFp
from ajet.tuner_lib.experimental.as_swarm_client import SwarmClient
from tutorial.opencode_build_skillsbench.get_training_dataset_item_list import get_training_dataset_item_list
from tutorial.opencode_build_skillsbench.run_episode import run_episode
# tutorial/opencode_build_skillsbench


# Training configuration
NUM_EPOCH = 10000
AJET_SWARM_URL = os.getenv("AJET_SWARM_URL", "http://localhost:10086")
REMOTE_MODEL_PATH = os.getenv("REMOTE_MODEL_PATH", "/mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2__5-14B-Instruct")


class SkillsBenchTaskReader:
"""Custom task reader for SkillsBench dataset."""

def __init__(self):
self.tasks = get_training_dataset_item_list()
print(f"Loaded {len(self.tasks)} SkillsBench tasks")

def generate_training_tasks(self):
"""Generate training tasks in AgentJet format."""
for task_data in self.tasks:
# Create a Task object for each SkillsBench task
task = Task(
task_id=task_data["task_id"],
metadata={
"task_id": task_data["task_id"],
"task_path": task_data["task_path"],
}
)
yield task


def execute_agent(task: Task, api_baseurl_key: OpenaiBaseUrlAndApiKey) -> WorkflowOutput:
"""
Execute the OpenCode agent on a SkillsBench task.

Args:
task: AgentJet Task object containing task_id and task_path
api_baseurl_key: API credentials from swarm server

Returns:
WorkflowOutput with reward and metadata
"""

task_id = task.metadata["task_id"]
task_path = task.metadata["task_path"]

# Use run_episode from run_episode.py
# The model parameter is ignored in run_episode (hardcoded model is used)
model = "placeholder-model"

try:
# Call the imported run_episode function
reward, metadata = run_episode(
task_id=task_id,
task_path=task_path,
api_key=api_baseurl_key.api_key,
base_url=api_baseurl_key.base_url,
model=model,
)

return WorkflowOutput(reward=float(reward), metadata=metadata)

except Exception as e:
print(f"ERROR: Exception during task execution: {e}\n")
metadata = {
"task_id": task_id,
"task_path": task_path,
"success": False,
"error": str(e),
}
return WorkflowOutput(reward=0.0, metadata=metadata)


def main():

# Create custom task reader
dataset = SkillsBenchTaskReader()

ajet_job = AgentJetJob(
base_yaml_config="tutorial/opencode_build_skillsbench/skillbench.yaml",
algorithm="grpo",
experiment_name="skillbench_swarm",
max_env_worker=128,
)

# Hand shake with remote swarm server
swarm_worker = SwarmClient(AJET_SWARM_URL)
swarm_worker.auto_sync_train_config_and_start_engine(
ajet_job,
# force_restart=True,
)

GRPO_N = ajet_job.num_repeat
REMOTE_BATCH_SIZE = ajet_job.batch_size

def rollout(task):
# begin episode
episode_uuid, api_baseurl_key = swarm_worker.begin_episode(discard_episode_timeout=240)
# execute agent ( base_url = api_baseurl_key.base_url, api_key = api_baseurl_key.api_key )
workflow_output = execute_agent(task, api_baseurl_key) # reward is in `workflow_output`
# report output back to swarm remote
swarm_worker.end_episode(task, episode_uuid, workflow_output)
return


executor = PeriodicDrainThreadPoolExecutor(workers=GRPO_N*REMOTE_BATCH_SIZE, max_parallel=4, auto_retry=True, block_first_run=False)
for _ in range(NUM_EPOCH):
for _, task in enumerate(dataset.generate_training_tasks()):
for _ in range(GRPO_N):
executor.submit_with_periodic_drain(fn=rollout, task=task)

return



if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Dataset collector for SkillsBench tasks.
Returns a list of training task identifiers.
"""

import os
from pathlib import Path
from typing import List, Dict


def get_training_dataset_item_list() -> List[Dict[str, str]]:
"""
Get list of SkillsBench tasks for training.

Returns:
List of dicts, each containing task metadata:
- task_id: unique identifier for the task
- task_path: full path to the task directory
"""
# Path to skillsbench repository
skillsbench_root = Path("/root/AgentJet/tmp/skillsbench_swarm_test")
tasks_dir = skillsbench_root / "tasks"

if not tasks_dir.exists():
raise FileNotFoundError(f"Tasks directory not found: {tasks_dir}")

task_list = []

# Iterate through all task directories
for task_path in sorted(tasks_dir.iterdir()):
if not task_path.is_dir():
continue

task_id = task_path.name

# Verify this is a valid task (has required files)
instruction_file = task_path / "instruction.md"
task_toml = task_path / "task.toml"
tests_dir = task_path / "tests"

if not (instruction_file.exists() and task_toml.exists() and tests_dir.exists()):
print(f"Warning: Skipping invalid task: {task_id}")
continue

task_list.append({
"task_id": task_id,
"task_path": str(task_path),
})

print(f"Found {len(task_list)} valid tasks for training")
return task_list


if __name__ == "__main__":
# Test the function
tasks = get_training_dataset_item_list()
print(f"\nTotal tasks: {len(tasks)}")
print("\nFirst 5 tasks:")
for i, task in enumerate(tasks[:5]):
print(f"{i+1}. {task['task_id']}")
Loading
Loading