modelscope · fanqiNO1 · Jul 3, 2026
diff --git a/ms_agent/tools/search/sirchmunk_search.py b/ms_agent/tools/search/sirchmunk_search.py
@@ -7,11 +7,14 @@
 
 import asyncio
 import json
-from loguru import logger
 from omegaconf import DictConfig
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional
 
+from ms_agent.utils.logger import get_logger
+
+logger = get_logger()
+
 
 def _paths_from_block(block: Any) -> List[str]:
     if block is None:

diff --git a/projects/skill_evolution/README.md b/projects/skill_evolution/README.md
diff --git a/projects/skill_evolution/README_zh.md b/projects/skill_evolution/README_zh.md
diff --git a/projects/skill_evolution/agents/macro_skill_manager.yaml b/projects/skill_evolution/agents/macro_skill_manager.yaml
diff --git a/projects/skill_evolution/agents/micro_skill_manager.yaml b/projects/skill_evolution/agents/micro_skill_manager.yaml
@@ -0,0 +1,58 @@
+llm:
+  service: openai
+  model: qwen3.6-flash
+  openai_api_key:
+  openai_base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
+
+
+generation_config:
+  max_tokens: 64000
+  extra_body:
+    enable_thinking: true
+
+
+prompt:
+  system: |
+    You are a micro skill manager who manages the skills, including creating and editing skills. Your responsibility is to manage and update the skills based on the provided skills update suggestions and current rejected updates. Your output will update the skills, and the updated skills will be evaluated on the validation set to verify whether the skills are improved, so you need to provide accurate, reliable and hallucination-free skills updates. You must follow these guidelines:
+
+    Your Steps:
+    1. Read ALL the provided skills update suggestions.
+    2. Analyze the suggestions and determine whether to create or edit the skills based on the suggestions.
+    3. Understand the recent rejected updates for the corresponding viewed skills, and avoid making the same mistakes that led to the rejection of the previous updates.
+    4. You can use `skill_view` to view the skill to be updated, and you can use the tool `skill_manage` to create or edit the skill. You can use the tool `skill_manage` multiple times to update the skill until you are satisfied with the updated skill.
+    5. You MUST update the skills via invoking the tool `skill_manage` instead of directly outputting the updated skills in plain text.
+    6. You MUST list the current skills via invoking the tool `skill_list` before updating the skills to ensure that you are aware of the current skills and avoid creating duplicate skills.
+    7. You CANNOT create or edit any skills that are not mentioned in the provided skills update suggestions. You can only create or edit the skills that are mentioned in the provided skills update suggestions.
+    8. You CANNOT delete any skills.
+
+    Your Optimization Goals:
+    1. [Priority] Ensure your updated skills are actually improved.
+    2. [Secondary] Reduce token usage while ensuring the quality of your updated skills.
+
+
+skills:
+  auto_discovery: false
+  enable_manage: true
+
+
+tools:
+  code_executor:
+    mcp: false
+    implementation: python_env
+    exclude:
+      - notebook_executor
+      - python_executor
+      - reset_executor
+      - get_executor_info
+  file_system:
+    mcp: false
+    include:
+      - read_file
+      - write_file
+      - edit_file
+      - grep
+      - glob
+
+
+max_chat_round: 10
+enable_snapshots: false
diff --git a/projects/skill_evolution/agents/reflector.yaml b/projects/skill_evolution/agents/reflector.yaml
@@ -0,0 +1,55 @@
+llm:
+  service: openai
+  model: qwen3.6-flash
+  openai_api_key:
+  openai_base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
+
+
+generation_config:
+  max_tokens: 64000
+  extra_body:
+    enable_thinking: true
+
+
+prompt:
+  system: |
+    You are a pattern analyst who identifies COMMON success-or-failure patterns based on the provided trajectories. Your responsibility is to analyze the MOST IMPORTANT COMMON patterns across trajectories and provide suggestions to update the skills viewed in the trajectories. Your output will be used by a micro skill manager agent to update the skills, so you need to provide accurate, reliable, and hallucination-free information. You must follow these guidelines:
+
+    [Definition]
+    Trajectory: In this guideline, a trajectory is defined as a complete interaction process between the agent and the environment, which include multiple steps, including viewing skills, agent actions, and environment feedbacks.
+      * Example: A QA trajectory. Agent responds to the question after viewing the skills.
+      * Example: An embodied trajectory. Agent views the skills, performs actions and get the feedbacks from the environment.
+
+    [Definition]
+    Pattern: In this guideline, a pattern is defined as WHY the agent succeeds or fails in the trajectory. It is a common reason across multiple trajectories.
+      * Example: The viewed skills lack some relevant rules or the skills are misleading.
+      * Example: The agent finds the correct answer but gives a wrong format.
+
+    [Definition]
+    Suggestion: In this guideline, a suggestion is defined as HOW to update the skills based on the identified COMMON patterns. It is a common solution across multiple trajectories.
+      * Example: Add some relevant rules to the skills or fix the misleading content in the skills.
+      * Example: Update the answer format in the skills to avoid wrong format.
+
+    Your Steps:
+    1. Read ALL the provided trajectories.
+    2. Compare the agent's answer and the gold answer(s), refer to the detailed evaluation results, and understand WHY the agent succeeds or fails in the trajectory.
+    3. Identify the MOST IMPORTANT AND RELEVANT COMMON patterns across the trajectories.
+    4. Provide suggestions to update the skills viewed in the trajectories. 
+    5. Sugeestions MUST be generalized and applicable to ALL the trajectories, not just a specific trajectory. You should NOT provide suggestions that are only applicable to a specific trajectory.
+    6. Suggestions MUST be actionable and specific, not vague or general. You should NOT provide suggestions that are too general or abstract, such as "improve the skills" or "fix the skills". You should provide specific suggestions that can be directly applied to update the skills.
+    7. Suggestions MUST be clear and MUST NOT contain risks of leaking test data or causing skills overfitting. You should NOT provide suggestions that are too specific to the test data or the trajectories, such as "add the answer to question X in the skills" or "fix the skills based on the feedback in trajectory Y". You should provide suggestions that are general and applicable to all possible test data and trajectories.
+
+    Output Example:
+    Patterns:
+    1. [identified pattern 1]
+    ...
+    Suggestions:
+    [suggestion content]
+
+    Your Optimization Goals:
+    1. [Priority] Ensure your suggestions are generalized, actionable, and clear.
+    2. [Secondary] Reduce token usage while ensuring the quality of your suggestions. 
+
+
+max_chat_round: 10
+enable_snapshots: false
diff --git a/projects/skill_evolution/agents/rollout.yaml b/projects/skill_evolution/agents/rollout.yaml
@@ -0,0 +1,39 @@
+llm:
+  service: openai
+  model: qwen3.6-flash
+  openai_api_key:
+  openai_base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
+
+
+generation_config:
+  max_tokens: 64000
+  extra_body:
+    enable_thinking: false
+
+
+skills:
+  auto_discovery: false
+  enable_manage: false
+
+
+tools:
+  code_executor:
+    mcp: false
+    implementation: python_env
+    exclude:
+      - notebook_executor
+      - python_executor
+      - reset_executor
+      - get_executor_info
+  file_system:
+    mcp: false
+    include:
+      - read_file
+      - write_file
+      - edit_file
+      - grep
+      - glob
+
+
+max_chat_round: 10
+enable_snapshots: false
diff --git a/projects/skill_evolution/config.yaml b/projects/skill_evolution/config.yaml
@@ -0,0 +1,20 @@
+agents:
+  rollout:
+    agent_config: agents/rollout.yaml
+
+  reflector:
+    agent_config: agents/reflector.yaml
+
+  micro_skill_manager:
+    agent_config: agents/micro_skill_manager.yaml
+
+  macro_skill_manager:
+    agent_config: agents/macro_skill_manager.yaml
+
+train:
+  num_epochs: 1
+  batch_size: 40
+  max_workers: 10
+  reflection_trigger_size: 1
+  reflection_group_size: 4
+  max_rejected_update_buffer_size: 3
diff --git a/projects/skill_evolution/run.py b/projects/skill_evolution/run.py
@@ -0,0 +1,60 @@
+import asyncio
+
+from tasks.base import BaseDataset, BaseEvaluator, BaseRolloutEnv
+from skill_evolution_workflow import SkillEvolutionWorkflow
+
+
+async def run_workflow(
+    config_file: str,
+    init_skills_path: str,
+    workdir: str,
+    train_set: BaseDataset,
+    val_set: BaseDataset,
+    test_set: BaseDataset,
+    rollout_env: BaseRolloutEnv,
+    evaluator: BaseEvaluator,
+):
+    skill_evolution_workflow = SkillEvolutionWorkflow(
+        config_file=config_file,
+        init_skills_path=init_skills_path,
+        workdir=workdir,
+    )
+    await skill_evolution_workflow.run(
+        train_set=train_set,
+        val_set=val_set,
+        test_set=test_set,
+        rollout_env=rollout_env,
+        evaluator=evaluator,
+    )
+    # import os
+    # data_batch = train_set.get_batch(batch_size=10)
+    # await skill_evolution_workflow._train_step(
+    #     current_skills_path=init_skills_path,
+    #     data_batch=data_batch,
+    #     rollout_env=rollout_env,
+    #     evaluator=evaluator,
+    #     sub_workdir=os.path.join(workdir, "train_step")
+    # )
+
+if __name__ == "__main__":
+    from tasks.searchqa import SearchQADataset, SearchQAEvaluator, SearchQARolloutEnv
+
+    train_set = SearchQADataset(data_path="../../../data/minimal_searchqa_split/train/items.json", is_train=True)
+    val_set = SearchQADataset(data_path="../../../data/minimal_searchqa_split/val/items.json", is_train=False)
+    test_set = SearchQADataset(data_path="../../../data/minimal_searchqa_split/test/items.json", is_train=False)
+    rollout_env = SearchQARolloutEnv()
+    evaluator = SearchQAEvaluator()
+
+    config_file = "./config.yaml"
+
+    coroutine = run_workflow(
+        config_file=config_file,
+        init_skills_path="../../../results/msagent_searchqa_qwen36flash/init_skills",
+        workdir="../../../results/msagent_searchqa_qwen36flash/workdir",
+        train_set=train_set,
+        val_set=val_set,
+        test_set=test_set,
+        rollout_env=rollout_env,
+        evaluator=evaluator,
+    )
+    asyncio.run(coroutine)