diff --git a/.gitignore b/.gitignore index 7b64dc3e..5ae49a4a 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,4 @@ swarmexp swarmlog werewolves_swarm .claude +jobs diff --git a/ajet/task_reader/tracing_reader/filters/llm_evaluate_filter.py b/ajet/task_reader/tracing_reader/filters/llm_evaluate_filter.py index b49c480a..0d2c6846 100644 --- a/ajet/task_reader/tracing_reader/filters/llm_evaluate_filter.py +++ b/ajet/task_reader/tracing_reader/filters/llm_evaluate_filter.py @@ -1,10 +1,6 @@ import os +from loguru import logger from typing import Iterable, List - -from agentscope.agent import ReActAgent -from agentscope.formatter import DashScopeMultiAgentFormatter -from agentscope.message import Msg -from agentscope.model import DashScopeChatModel from pydantic import BaseModel, Field from ajet.schema.task import Task @@ -61,6 +57,9 @@ def __init__( print_reason: bool = True, ) -> None: """Filter that evaluates the quality of tasks using LLM.""" + from agentscope.agent import ReActAgent + from agentscope.formatter import DashScopeMultiAgentFormatter + from agentscope.model import DashScopeChatModel self._print_reason = print_reason self.external_llm_fn = create_external_llm_fn( @@ -78,6 +77,8 @@ def __init__( ) async def filter(self, tasks: Iterable[Task]) -> List[Task]: + from agentscope.message import Msg + kept: List[Task] = [] for task in tasks: payload = "query: " + task.main_query + "\n" "answer: " + task.metadata.get( diff --git a/tutorial/opencode_build_skillbench_agent.prompt.md b/tutorial/opencode_build_skillbench_agent.prompt.md index e69de29b..f21aed29 100644 --- a/tutorial/opencode_build_skillbench_agent.prompt.md +++ b/tutorial/opencode_build_skillbench_agent.prompt.md @@ -0,0 +1,32 @@ + + + +# Train SkillBench with AgentJet Swarm with Vibe Coding + +result is generated by `claude sonnet 4.5` + +============================= + +你的任务是训练这个仓库中的智能体:https://github.com/benchflow-ai/skillsbench.git +仓库你需要下载到 ./tmp/skillsbench_swarm_test +这是在调试过程中你可以使用的模型(openrouter) + "url": "https://openrouter-openrouter-esyubhyrxv.ap-northeast-1.fcapp.run/api/v1", + "key": "sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "model": "qwen/qwen3-max" + +待训练模型是在GPU服务器上的路径是: + /mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2.5-7B-Instruct +另外SkillBench 可以选择使用多种智能体,你需要选择 opencode + + + + +你的skill(首先读取该SKILL文件,获取必要知识): +- ajet/copilot/train-complex-blackbox/SKILL.md + + +You must test tutorial/opencode_build_skillsbench/run_episode.py +这是在调试过程中你可以使用的模型(openrouter) + "url": "https://openrouter-openrouter-esyubhyrxv.ap-northeast-1.fcapp.run/api/v1", + "key": "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" + "model": "qwen/qwen3-max" \ No newline at end of file diff --git a/tutorial/opencode_build_skillsbench/agent_roll.py b/tutorial/opencode_build_skillsbench/agent_roll.py new file mode 100644 index 00000000..28925bd8 --- /dev/null +++ b/tutorial/opencode_build_skillsbench/agent_roll.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +AgentJet training script for SkillsBench with OpenCode agent. +""" + +import os +import sys +from pathlib import Path +from ajet.schema.task import Task, WorkflowOutput +from ajet.copilot.job import AgentJetJob +from ajet.task_reader import RouterTaskReader +from ajet.utils.thread_executors import PeriodicDrainThreadPoolExecutor +from ajet.tuner_lib.as_oai_baseurl_apikey import OpenaiBaseUrlAndApiKey +from ajet.default_config.ajet_default import AjetTaskReader, JsonlDatasetFile, JsonlTrainingFp +from ajet.tuner_lib.experimental.as_swarm_client import SwarmClient +from tutorial.opencode_build_skillsbench.get_training_dataset_item_list import get_training_dataset_item_list +from tutorial.opencode_build_skillsbench.run_episode import run_episode +# tutorial/opencode_build_skillsbench + + +# Training configuration +NUM_EPOCH = 10000 +AJET_SWARM_URL = os.getenv("AJET_SWARM_URL", "http://localhost:10086") +REMOTE_MODEL_PATH = os.getenv("REMOTE_MODEL_PATH", "/mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2__5-14B-Instruct") + + +class SkillsBenchTaskReader: + """Custom task reader for SkillsBench dataset.""" + + def __init__(self): + self.tasks = get_training_dataset_item_list() + print(f"Loaded {len(self.tasks)} SkillsBench tasks") + + def generate_training_tasks(self): + """Generate training tasks in AgentJet format.""" + for task_data in self.tasks: + # Create a Task object for each SkillsBench task + task = Task( + task_id=task_data["task_id"], + metadata={ + "task_id": task_data["task_id"], + "task_path": task_data["task_path"], + } + ) + yield task + + +def execute_agent(task: Task, api_baseurl_key: OpenaiBaseUrlAndApiKey) -> WorkflowOutput: + """ + Execute the OpenCode agent on a SkillsBench task. + + Args: + task: AgentJet Task object containing task_id and task_path + api_baseurl_key: API credentials from swarm server + + Returns: + WorkflowOutput with reward and metadata + """ + + task_id = task.metadata["task_id"] + task_path = task.metadata["task_path"] + + # Use run_episode from run_episode.py + # The model parameter is ignored in run_episode (hardcoded model is used) + model = "placeholder-model" + + try: + # Call the imported run_episode function + reward, metadata = run_episode( + task_id=task_id, + task_path=task_path, + api_key=api_baseurl_key.api_key, + base_url=api_baseurl_key.base_url, + model=model, + ) + + return WorkflowOutput(reward=float(reward), metadata=metadata) + + except Exception as e: + print(f"ERROR: Exception during task execution: {e}\n") + metadata = { + "task_id": task_id, + "task_path": task_path, + "success": False, + "error": str(e), + } + return WorkflowOutput(reward=0.0, metadata=metadata) + + +def main(): + + # Create custom task reader + dataset = SkillsBenchTaskReader() + + ajet_job = AgentJetJob( + base_yaml_config="tutorial/opencode_build_skillsbench/skillbench.yaml", + algorithm="grpo", + experiment_name="skillbench_swarm", + max_env_worker=128, + ) + + # Hand shake with remote swarm server + swarm_worker = SwarmClient(AJET_SWARM_URL) + swarm_worker.auto_sync_train_config_and_start_engine( + ajet_job, + # force_restart=True, + ) + + GRPO_N = ajet_job.num_repeat + REMOTE_BATCH_SIZE = ajet_job.batch_size + + def rollout(task): + # begin episode + episode_uuid, api_baseurl_key = swarm_worker.begin_episode(discard_episode_timeout=240) + # execute agent ( base_url = api_baseurl_key.base_url, api_key = api_baseurl_key.api_key ) + workflow_output = execute_agent(task, api_baseurl_key) # reward is in `workflow_output` + # report output back to swarm remote + swarm_worker.end_episode(task, episode_uuid, workflow_output) + return + + + executor = PeriodicDrainThreadPoolExecutor(workers=GRPO_N*REMOTE_BATCH_SIZE, max_parallel=4, auto_retry=True, block_first_run=False) + for _ in range(NUM_EPOCH): + for _, task in enumerate(dataset.generate_training_tasks()): + for _ in range(GRPO_N): + executor.submit_with_periodic_drain(fn=rollout, task=task) + + return + + + +if __name__ == "__main__": + main() diff --git a/tutorial/opencode_build_skillsbench/get_training_dataset_item_list.py b/tutorial/opencode_build_skillsbench/get_training_dataset_item_list.py new file mode 100644 index 00000000..9f5dc9b4 --- /dev/null +++ b/tutorial/opencode_build_skillsbench/get_training_dataset_item_list.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Dataset collector for SkillsBench tasks. +Returns a list of training task identifiers. +""" + +import os +from pathlib import Path +from typing import List, Dict + + +def get_training_dataset_item_list() -> List[Dict[str, str]]: + """ + Get list of SkillsBench tasks for training. + + Returns: + List of dicts, each containing task metadata: + - task_id: unique identifier for the task + - task_path: full path to the task directory + """ + # Path to skillsbench repository + skillsbench_root = Path("/root/AgentJet/tmp/skillsbench_swarm_test") + tasks_dir = skillsbench_root / "tasks" + + if not tasks_dir.exists(): + raise FileNotFoundError(f"Tasks directory not found: {tasks_dir}") + + task_list = [] + + # Iterate through all task directories + for task_path in sorted(tasks_dir.iterdir()): + if not task_path.is_dir(): + continue + + task_id = task_path.name + + # Verify this is a valid task (has required files) + instruction_file = task_path / "instruction.md" + task_toml = task_path / "task.toml" + tests_dir = task_path / "tests" + + if not (instruction_file.exists() and task_toml.exists() and tests_dir.exists()): + print(f"Warning: Skipping invalid task: {task_id}") + continue + + task_list.append({ + "task_id": task_id, + "task_path": str(task_path), + }) + + print(f"Found {len(task_list)} valid tasks for training") + return task_list + + +if __name__ == "__main__": + # Test the function + tasks = get_training_dataset_item_list() + print(f"\nTotal tasks: {len(tasks)}") + print("\nFirst 5 tasks:") + for i, task in enumerate(tasks[:5]): + print(f"{i+1}. {task['task_id']}") diff --git a/tutorial/opencode_build_skillsbench/run_episode.py b/tutorial/opencode_build_skillsbench/run_episode.py new file mode 100644 index 00000000..86124e2a --- /dev/null +++ b/tutorial/opencode_build_skillsbench/run_episode.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Episode runner for SkillsBench tasks with OpenCode agent. +Runs a single training episode and computes reward. +""" + +import argparse +import json +import os +import subprocess +import sys +import tempfile +import uuid +from pathlib import Path +from typing import Tuple + + +# Special version of docker-compose-base.yaml that should be used +SPECIAL_DOCKER_COMPOSE_CONTENT = """services: + main: + environment: + - OPENCODE_CONFIG_CONTENT=${OPENCODE_CONFIG_CONTENT} + volumes: + - ${HOST_VERIFIER_LOGS_PATH}:${ENV_VERIFIER_LOGS_PATH} + - ${HOST_AGENT_LOGS_PATH}:${ENV_AGENT_LOGS_PATH} + deploy: + resources: + limits: + cpus: ${CPUS} + memory: ${MEMORY} + network_mode: host +""" + + +def check_and_fix_docker_compose() -> bool: + """ + Check if harbor's docker-compose-base.yaml is the special version. + If not, update it to the special version. + + Returns: + bool: True if file was modified, False if already correct + """ + try: + # Find harbor installation path + result = subprocess.run( + ["/root/.local/share/uv/tools/harbor/bin/python", "-c", + "import harbor; import os; print(os.path.dirname(harbor.__file__))"], + capture_output=True, + text=True, + timeout=10 + ) + + if result.returncode != 0: + print("Warning: Could not locate harbor installation, skipping docker-compose check") + return False + + harbor_path = result.stdout.strip() + docker_compose_path = Path(harbor_path) / "environments" / "docker" / "docker-compose-base.yaml" + + if not docker_compose_path.exists(): + print(f"Warning: docker-compose-base.yaml not found at {docker_compose_path}") + return False + + # Read current content + current_content = docker_compose_path.read_text() + + # Compare with special version (strip whitespace for comparison) + if current_content.strip() == SPECIAL_DOCKER_COMPOSE_CONTENT.strip(): + print(f"✓ docker-compose-base.yaml is already the special version") + return False + + # Update to special version + print(f"! docker-compose-base.yaml is NOT the special version") + print(f" Updating {docker_compose_path} to special version...") + docker_compose_path.write_text(SPECIAL_DOCKER_COMPOSE_CONTENT) + print(f"✓ Updated docker-compose-base.yaml to special version") + return True + + except Exception as e: + print(f"Warning: Error checking docker-compose-base.yaml: {e}") + return False + + +def run_episode(task_id: str, task_path: str, api_key: str, base_url: str, model: str = "qwen/qwen3-max") -> Tuple[float, dict]: + """ + Run a single episode for a SkillsBench task using OpenCode agent. + + Args: + task_id: Unique identifier for the task + task_path: Full path to the task directory + api_key: API key for the model provider + base_url: Base URL for the model provider + model: Model identifier (ignored, hardcoded to use huggingface/Qwen/Qwen3-235B-A22B-Instruct-2507) + + Returns: + Tuple of (reward, metadata): + - reward: float between 0 and 1 (1 = pass, 0 = fail) + - metadata: dict with execution details + """ + # Hardcoded model - do not change + actual_model_with_provider = "huggingface/Qwen25-14B" + actual_model_name = "Qwen25-14B" + + # Generate a unique job name to avoid concurrent conflicts + # Format: {task_id}_{uuid} to make it both human-readable and unique + job_name = f"{task_id}_{uuid.uuid4().hex[:12]}" + + print(f"\n{'='*60}") + print(f"Running episode for task: {task_id}") + print(f"Job name: {job_name}") + print(f"Task path: {task_path}") + print(f"Model: {actual_model_with_provider}") + print(f"Base URL: {base_url}") + print(f"API Key: {api_key[:10]}..." if len(api_key) > 10 else f"API Key: {api_key}") + print(f"{'='*60}\n") + + # Check and fix docker-compose-base.yaml before running + print("Checking harbor docker-compose-base.yaml configuration...") + check_and_fix_docker_compose() + print() + + # Set up environment variables for the agent + env = os.environ.copy() + + # Create OPENCODE_CONFIG_CONTENT with the provided api_key and base_url + opencode_config = { + "$schema": "https://opencode.ai/config.json", + "model": actual_model_with_provider, + "provider": { + "huggingface": { + "npm": "@ai-sdk/openai-compatible", + "name": "vLLM Provider", + "options": { + "baseURL": base_url, + "apiKey": api_key + }, + "models": { + actual_model_name: { + "name": "vLLM Model", + "limit": { + "context": 20*1000, + "output": 10*1000 + } + } + } + } + } + } + + print(f"export OPENCODE_CONFIG_CONTENT='{json.dumps(opencode_config)}'") + import time + time.sleep(10000) + + env["OPENCODE_CONFIG_CONTENT"] = json.dumps(opencode_config) + print(f"Set OPENCODE_CONFIG_CONTENT environment variable") + print(f"Config: {json.dumps(opencode_config, indent=2)}\n") + + # Construct harbor run command + # harbor run -p -a opencode -m --job-name + cmd = [ + "harbor", "run", + "-p", task_path, + "-a", "opencode", + "-m", actual_model_with_provider, + "--job-name", job_name, + ] + + print(f"Command: {' '.join(cmd)}") + print(f"\nStarting execution...\n") + + metadata = { + "task_id": task_id, + "task_path": task_path, + "job_name": job_name, + "model": actual_model_with_provider, + "base_url": base_url, + "success": False, + "reward": 0.0, + "error": None, + } + + try: + # Run the command and capture output + result = subprocess.run( + cmd, + env=env, + capture_output=True, + text=True, + timeout=3600, # 1 hour timeout + ) + + print(f"STDOUT:\n{result.stdout}") + print(f"\nSTDERR:\n{result.stderr}") + print(f"\nReturn code: {result.returncode}") + + # Parse the results + # Harbor writes results to jobs///verifier/reward.txt + # We use the job_name to locate the specific job directory + reward = parse_harbor_results(result.stdout, result.stderr, result.returncode, job_name) + + metadata["success"] = (reward > 0) + metadata["reward"] = reward + metadata["stdout"] = result.stdout + metadata["stderr"] = result.stderr + metadata["returncode"] = result.returncode + + print(f"\n{'='*60}") + print(f"Episode completed") + print(f"Reward: {reward}") + print(f"Success: {metadata['success']}") + print(f"{'='*60}\n") + + return reward, metadata + + except subprocess.TimeoutExpired as e: + print(f"ERROR: Episode timed out after 3600 seconds") + metadata["error"] = "timeout" + metadata["timeout"] = True + return 0.0, metadata + + except Exception as e: + print(f"ERROR: Exception during episode execution: {e}") + metadata["error"] = str(e) + return 0.0, metadata + + +def parse_harbor_results(stdout: str, stderr: str, returncode: int, job_name: str) -> float: + """ + Parse Harbor execution results to extract reward. + + Harbor typically outputs the results location and we can read reward.txt + from the jobs directory. + + Args: + stdout: Standard output from harbor run + stderr: Standard error from harbor run + returncode: Return code from harbor run + job_name: The unique job name used for this run + + Returns: + float: reward value (0.0 or 1.0) + """ + # First priority: Use the job_name to directly locate the result directory + # Harbor creates directories as: jobs///verifier/reward.txt + jobs_dir = Path("jobs") + print(f"Searching for results in job directory: {job_name}") + + if not jobs_dir.exists(): + print(f"WARNING: Jobs directory does not exist: {jobs_dir}") + else: + job_dir = jobs_dir / job_name + print(f"Looking for job directory: {job_dir}") + + if not job_dir.exists(): + print(f"WARNING: Job directory does not exist: {job_dir}") + print(f"Available job directories:") + for d in sorted(jobs_dir.iterdir())[-5:]: # Show last 5 + print(f" - {d.name}") + else: + print(f"Found job directory: {job_dir}") + # Look for verifier/reward.txt in this specific job directory + reward_files = list(job_dir.rglob("reward.txt")) + print(f"Found {len(reward_files)} reward.txt file(s) in job directory") + + for reward_file in reward_files: + print(f"Checking reward file: {reward_file}") + if "verifier" in str(reward_file): + try: + reward_value = float(reward_file.read_text().strip()) + print(f"✓ Successfully read reward from {reward_file}: {reward_value}") + return reward_value + except Exception as e: + print(f"ERROR: Failed to read reward file {reward_file}: {e}") + else: + print(f"Skipping non-verifier reward file: {reward_file}") + + # Second priority: Look for results directory in stdout + # Harbor typically prints something like "Results saved to: jobs/..." + for line in stdout.split('\n'): + if 'Results saved to:' in line or 'results' in line.lower(): + # Try to extract path + parts = line.split() + for part in parts: + if 'jobs/' in part: + results_path = Path(part.strip()) + reward_file = results_path / "verifier" / "reward.txt" + if reward_file.exists(): + try: + reward_value = float(reward_file.read_text().strip()) + print(f"Found reward in {reward_file}: {reward_value}") + return reward_value + except Exception as e: + print(f"Error reading reward file: {e}") + + # Fallback: Look for any jobs directory and find the most recent one + # WARNING: This is unreliable in concurrent scenarios and should only be used as last resort + print("WARNING: Falling back to searching by modification time (unreliable in concurrent scenarios)") + if jobs_dir.exists(): + # Get all subdirectories sorted by modification time (most recent first) + job_dirs = sorted( + [d for d in jobs_dir.iterdir() if d.is_dir()], + key=lambda x: x.stat().st_mtime, + reverse=True + ) + + # Only check the most recent directory to avoid picking up concurrent runs + for job_dir in job_dirs[:1]: # Only check the most recent one + print(f"Checking most recent job directory: {job_dir.name}") + # Look for verifier/reward.txt in any subdirectory + for reward_file in job_dir.rglob("reward.txt"): + try: + reward_value = float(reward_file.read_text().strip()) + print(f"Found reward in {reward_file}: {reward_value}") + return reward_value + except Exception as e: + print(f"Error reading reward file {reward_file}: {e}") + + # If we can't find the reward file, check return code + # Harbor typically returns 0 on success + if returncode == 0: + # Check if there are any test failures mentioned in output + if "FAILED" in stdout or "FAILED" in stderr: + print("Tests FAILED according to output") + return 0.0 + elif "PASSED" in stdout or "passed" in stdout.lower(): + print("Tests PASSED according to output") + return 1.0 + + # Default to 0 if we can't determine success + print("Could not determine reward, defaulting to 0.0") + return 0.0 + + +# Example usage: +# python3 tutorial/opencode_build_skillsbench/run_episode.py \ +# --task-id adaptive-cruise-control \ +# --task-path /root/AgentJet/tmp/skillsbench_swarm_test/tasks/adaptive-cruise-control \ +# --api-key "sk-123467" \ +# --base-url "http://127.0.0.1:2888/v1" \ +# --model "huggingface/Qwen/Qwen3-235B-A22B-Instruct-2507" \ +# --output results.json + + +def main(): + parser = argparse.ArgumentParser(description="Run a single SkillsBench training episode") + parser.add_argument("--task-id", required=True, help="Task identifier") + parser.add_argument("--task-path", required=True, help="Path to task directory") + parser.add_argument("--api-key", required=True, help="API key for the model provider") + parser.add_argument("--base-url", required=True, help="Base URL for the model provider") + parser.add_argument("--model", default="qwen/qwen3-max", help="Model identifier (ignored, hardcoded model will be used)") + parser.add_argument("--output", help="Output file for results (JSON)") + + args = parser.parse_args() + + reward, metadata = run_episode( + task_id=args.task_id, + task_path=args.task_path, + api_key=args.api_key, + base_url=args.base_url, + model=args.model, + ) + + # Save results if output file specified + if args.output: + output_data = { + "reward": reward, + "metadata": metadata, + } + with open(args.output, 'w') as f: + json.dump(output_data, f, indent=2) + print(f"\nResults saved to: {args.output}") + + # Exit with code based on success + sys.exit(0 if reward > 0 else 1) + + +if __name__ == "__main__": + main() diff --git a/tutorial/opencode_build_skillsbench/skillbench.yaml b/tutorial/opencode_build_skillsbench/skillbench.yaml new file mode 100644 index 00000000..e096e8f6 --- /dev/null +++ b/tutorial/opencode_build_skillsbench/skillbench.yaml @@ -0,0 +1,74 @@ +# ------------------ main config ------------------ +ajet: + project_name: example_werewolves_swarm + experiment_dir: "auto" # {exp-dir}/{experiment_name} + + model: + # ✨ select model to be trained + path: /mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2___5-7B-Instruct + + + rollout: + user_workflow: null + temperature: 0.7 + max_env_worker: 64 + num_repeat: 6 + agent_madness_reward: 0.0 + tensor_model_parallel_size: 1 + # max_num_seqs: 40 + # monitor LLM's abormal behaviors during rollout + compute_madness_checklist: + - "nonsense" + max_response_length_in_one_turn: 1024 + max_model_len: 22000 + + task_reader: + type: random_dummy # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo` or `data_generation` or `random_dummy` + + task_judge: + # ✨ select evaluation function + judge_protocol: null + + # the experimental ZeroMQ interchange server feature that allows `tuner.as_oai_baseurl_apikey` feature + enable_interchange_server: True + # train in cloud, run episode locally + enable_swarm_mode: True + # both swarm / oai share the same interchange server + interchange_server: + interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or 'ipc' (1 node) + interchange_server_port: 10086 + num_fastapi_process: 2 # 1, 2 or 4 is fine + max_fastapi_threads: 512 # 64 or 128 is fine + max_inference_tracker_threads: 64 # recommend to be equal to `ajet.rollout.max_env_worker` + already_started: False # do not edit, used by `swarm` + + swarm_mode_sample_collection_method: "rollout_until_finish_enough_tasks" + + debug: + debug_max_parallel: 1 + debug_first_n_tasks: 1 + + data: + train_batch_size: 32 + max_prompt_length: 4000 + max_response_length: 18000 + + trainer_common: + save_freq: 5 + test_freq: 9999999 + total_epochs: 9999999 + total_training_steps: 25 + nnodes: 1 + n_gpus_per_node: 8 + +# ------------------ do not edit ------------------ +hydra: + searchpath: + - file://ajet/default_config + - file://ajet/default_config/verl + +# ------------------ do not edit ------------------ +defaults: + - verl_default + - ajet_default + - _self_