diff --git a/ms_agent/agent/llm_agent.py b/ms_agent/agent/llm_agent.py
index 5f2ddf2e7..f3bf3184c 100644
--- a/ms_agent/agent/llm_agent.py
+++ b/ms_agent/agent/llm_agent.py
@@ -7,7 +7,7 @@
 import threading
 import uuid
 from contextlib import contextmanager
-from copy import deepcopy
+from copy import deepcopy, copy
 from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
 
 import json
@@ -24,6 +24,9 @@
 from ms_agent.utils import async_retry, read_history, save_history
 from ms_agent.utils.constants import DEFAULT_TAG, DEFAULT_USER
 from ms_agent.utils.logger import get_logger
+from ms_agent.skill.catalog import SkillCatalog
+from ms_agent.skill.prompt_injector import SkillPromptInjector
+from ms_agent.skill.skill_tools import SkillToolSet
 from omegaconf import DictConfig, OmegaConf
 
 from ..config.config import Config, ConfigLifecycleHandler
@@ -35,17 +38,17 @@
 class LLMAgent(Agent):
     """
     An agent designed to run LLM-based tasks with support for tools, memory,
-    planning, callbacks, and automatic skill execution.
+    planning, callbacks, and skill integration.
 
     This class provides a full lifecycle for running an LLM agent, including:
     - Prompt preparation
     - Chat history management
     - External tool calling
     - Memory retrieval and updating
-    - Planning logic
     - Stream or non-stream response generation
     - Callback hooks at various stages of execution
-    - Automatic skill detection and execution (AutoSkills integration)
+    - Skill system: skill discovery (skills_list), viewing (skill_view),
+      and management (skill_manage) as standard tools
 
     Args:
         config (DictConfig): Pre-loaded configuration object.
@@ -54,28 +57,12 @@ class LLMAgent(Agent):
         **kwargs: Additional keyword arguments passed to the parent Agent constructor.
 
     Skills Configuration (in config.skills):
-        path: Path(s) to skill directories.
-        enable_retrieve: Whether to use retriever (None=auto based on skill count).
-        retrieve_args: Arguments for HybridRetriever (top_k, min_score).
-        max_candidate_skills: Maximum candidate skills to consider.
-        max_retries: Maximum retry attempts for skill execution.
-        work_dir: Working directory for skill execution.
-        use_sandbox: Whether to use Docker sandbox.
-        auto_execute: Whether to auto-execute skills after retrieval.
-
-    Example:
-        ```python
-        config = DictConfig({
-            'llm': {...},
-            'skills': {
-                'path': '/path/to/skills',
-                'auto_execute': True,
-                'work_dir': '/path/to/workspace'
-            }
-        })
-        agent = LLMAgent(config, tag='my-agent')
-        result = await agent.run('Generate a PDF report for Q4 sales of Apple')
-        ```
+        path: Path(s) to skill directories or ModelScope repo IDs.
+        sources: Structured source list (type, path, repo_id, url, etc.).
+        auto_discover: Auto-scan CWD/skills/ directory.
+        enable_manage: Enable skill_manage tool for runtime CRUD.
+        whitelist: Skill ID whitelist (null=all, []=none, [ids]=specific).
+        disabled: List of disabled skill IDs.
     """
 
     AGENT_NAME = 'LLMAgent'
@@ -107,7 +94,7 @@ def __init__(
         self.tool_manager: Optional[ToolManager] = None
         self.memory_tools: List[Memory] = []
         self.rag: Optional[RAG] = None
-        self.knowledge_search: Optional[SirschmunkSearch] = None
+        self.knowledge_search: Optional[SirchmunkSearch] = None
         self.llm: Optional[LLM] = None
         self.runtime: Optional[Runtime] = None
         self.max_chat_round: int = 0
@@ -119,237 +106,74 @@ def __init__(
         self.mcp_client = kwargs.get('mcp_client', None)
         self.config_handler = self.register_config_handler()
 
-        # AutoSkills integration (lazy initialization)
-        self._auto_skills = None
-        self._auto_skills_initialized = False
-        self._last_skill_result = None
-        self._skill_mode_active = False
-
-    def _get_skills_config(self) -> Optional[DictConfig]:
-        """Get skills configuration from agent config."""
-        if hasattr(self.config, 'skills') and self.config.skills:
-            return self.config.skills
-        return None
-
-    def _ensure_auto_skills(self) -> bool:
-        """
-        Ensure AutoSkills is initialized (lazy initialization).
-
-        Returns:
-            True if AutoSkills is available and initialized.
-        """
-        if self._auto_skills_initialized:
-            return self._auto_skills is not None
-
-        skills_config = self._get_skills_config()
-        if not skills_config:
-            self._auto_skills_initialized = True
-            return False
-
-        skills_path = getattr(skills_config, 'path', None)
-        if not skills_path:
-            logger.debug('No skills path configured')
-            self._auto_skills_initialized = True
-            return False
-
-        # Ensure LLM is initialized
-        if self.llm is None:
-            self.prepare_llm()
-
-        try:
-            from ms_agent.skill.auto_skills import AutoSkills
-
-            # Check sandbox requirements
-            use_sandbox = getattr(skills_config, 'use_sandbox', True)
-            if use_sandbox:
-                from ms_agent.utils.docker_utils import is_docker_daemon_running
-
-                if not is_docker_daemon_running():
-                    logger.warning(
-                        'Docker not running, disabling sandbox for skills')
-                    use_sandbox = False
-
-            # Build retrieve args
-            retrieve_args = {}
-            if hasattr(skills_config, 'retrieve_args'):
-                retrieve_args = OmegaConf.to_container(
-                    skills_config.retrieve_args)
-
-            self._auto_skills = AutoSkills(
-                skills=skills_path,
-                llm=self.llm,
-                enable_retrieve=getattr(skills_config, 'enable_retrieve',
-                                        None),
-                retrieve_args=retrieve_args,
-                max_candidate_skills=getattr(skills_config,
-                                             'max_candidate_skills', 10),
-                max_retries=getattr(skills_config, 'max_retries', 3),
-                work_dir=getattr(skills_config, 'work_dir', None),
-                use_sandbox=use_sandbox,
-            )
-            logger.info(
-                f'AutoSkills initialized with {len(self._auto_skills.all_skills)} skills'
-            )
-            self._auto_skills_initialized = True
-            return True
-
-        except Exception as e:
-            logger.warning(f'Failed to initialize AutoSkills: {e}')
-            self._auto_skills_initialized = True
-            return False
-
-    @property
-    def skills_available(self) -> bool:
-        """Check if AutoSkills is available."""
-        return self._ensure_auto_skills()
-
-    @property
-    def auto_skills(self):
-        """Get AutoSkills instance (maybe None if not configured)."""
-        self._ensure_auto_skills()
-        return self._auto_skills
-
-    async def should_use_skills(self, query: str) -> bool:
-        """
-        Determine if the query should use skills.
+        # Skill system (initialized in prepare_skills)
+        self._skill_catalog = None
+        self._skill_injector = None
 
-        Combines keyword detection with LLM-based analysis.
-
-        Args:
-            query: User's query string.
-
-        Returns:
-            True if skills should be used for this query.
-        """
-        if not self._ensure_auto_skills():
-            return False
-
-        skills_config = self._get_skills_config()
-        if not skills_config:
-            return False
-        skills_path = getattr(skills_config, 'path', None)
-        if not skills_path:
-            return False
-
-        # Use LLM analysis for ambiguous queries
-        try:
-            needs_skills, _, _, _ = self._auto_skills._analyze_query(query)
-            return needs_skills
-        except Exception as e:
-            logger.error(f'Skill analysis error: {e}')
-            return False
-
-    async def get_skill_dag(self, query: str):
-        """
-        Get skill DAG for a query without executing.
-
-        Args:
-            query: User's query string.
-
-        Returns:
-            SkillDAGResult containing the execution plan, or None if unavailable.
-        """
-        if not self._ensure_auto_skills():
-            return None
-        return await self._auto_skills.get_skill_dag(query)
-
-    async def execute_skills(self, query: str, execution_input=None):
-        """
-        Execute skills for a query.
-
-        Args:
-            query: User's query string.
-            execution_input: Optional initial input for skills.
+    async def prepare_skills(self):
+        """Initialize the skill system from config.skills.
 
-        Returns:
-            SkillDAGResult with execution results, or None if unavailable.
+        Sets up SkillCatalog, SkillPromptInjector, and registers
+        SkillToolSet into ToolManager.
         """
-        if not self._ensure_auto_skills():
-            return None
-
-        skills_config = self._get_skills_config()
-        stop_on_failure = (
-            getattr(skills_config, 'stop_on_failure', True)
-            if skills_config else True)
-
-        result = await self._auto_skills.run(
-            query=query,
-            execution_input=execution_input,
-            stop_on_failure=stop_on_failure,
-        )
-        self._last_skill_result = result
-        return result
+        if not hasattr(self.config, 'skills') or not self.config.skills:
+            return
 
-    def _format_skill_result_as_messages(self, dag_result) -> List[Message]:
-        """
-        Format skill execution result as messages for agent history.
+        skills_config = self.config.skills
+        self._skill_catalog = SkillCatalog(config=skills_config)
+        self._skill_catalog.load_from_config(skills_config)
+
+        self._skill_injector = SkillPromptInjector(self._skill_catalog)
+
+        enable_manage = getattr(skills_config, 'enable_manage', False)
+        skill_toolset = SkillToolSet(
+            self.config, self._skill_catalog,
+            enable_manage=enable_manage)
+        await skill_toolset.connect()
+        self.tool_manager.register_tool(skill_toolset)
+
+        # Index the newly added tool into the live tool registry.
+        # We cannot call reindex_tool() because it would duplicate
+        # already-indexed tools; instead we index just this one.
+        tools = await skill_toolset.get_tools()
+        spliter = self.tool_manager.TOOL_SPLITER
+        for server_name, tool_list in tools.items():
+            for tool in tool_list:
+                key = f"{server_name}{spliter}{tool['tool_name']}"
+                tool = copy(tool)
+                tool['tool_name'] = key
+                self.tool_manager._tool_index[key] = (
+                    skill_toolset, server_name, tool)
+
+        self._check_skill_tool_dependencies()
+
+    def _check_skill_tool_dependencies(self):
+        """Warn if skills are enabled but essential tools are missing."""
+        if (not self._skill_catalog
+                or not self._skill_catalog.get_enabled_skills()):
+            return
 
-        Args:
-            dag_result: SkillDAGResult from skill execution.
+        has_tools = hasattr(self.config, 'tools') and self.config.tools
+        warnings = []
 
-        Returns:
-            List of Message objects describing the result.
-        """
-        messages = []
-
-        # Handle chat-only response
-        if dag_result.chat_response:
-            messages.append(
-                Message(role='assistant', content=dag_result.chat_response))
-            return messages
-
-        # Handle incomplete skills
-        if not dag_result.is_complete:
-            content = "I couldn't find suitable skills for this task."
-            if dag_result.clarification:
-                content += f'\n\n{dag_result.clarification}'
-            messages.append(Message(role='assistant', content=content))
-            return messages
-
-        # Format execution result
-        if dag_result.execution_result:
-            exec_result = dag_result.execution_result
-            skill_names = list(dag_result.selected_skills.keys())
-
-            if exec_result.success:
-                content = f"Successfully executed {len(skill_names)} skill(s): {', '.join(skill_names)}\n\n"
-
-                # Add output summaries
-                for skill_id, result in exec_result.results.items():
-                    if result.success and result.output:
-                        output = result.output
-                        if output.stdout:
-                            stdout_preview = output.stdout[:1000]
-                            if len(output.stdout) > 1000:
-                                stdout_preview += '...'
-                            content += f'**{skill_id} output:**\n{stdout_preview}\n\n'
-                        if output.output_files:
-                            content += f'**Generated files:** {list(output.output_files.values())}\n\n'
-
-                content += (
-                    f'Total execution time: {exec_result.total_duration_ms:.2f}ms'
-                )
-            else:
-                content = 'Skill execution completed with errors.\n\n'
-                for skill_id, result in exec_result.results.items():
-                    if not result.success:
-                        content += f'**{skill_id} failed:** {result.error}\n'
+        if not has_tools or not hasattr(self.config.tools, 'file_system'):
+            warnings.append(
+                "file_system (read_file, write_file) - needed for "
+                "reading skill scripts and writing outputs")
 
-            messages.append(Message(role='assistant', content=content))
-        else:
-            # DAG only, no execution
-            skill_names = list(dag_result.selected_skills.keys())
-            content = f'Found {len(skill_names)} relevant skill(s) for your task:\n'
-            for skill_id, skill in dag_result.selected_skills.items():
-                desc_preview = skill.description[:100]
-                if len(skill.description) > 100:
-                    desc_preview += '...'
-                content += f'- **{skill.name}** ({skill_id}): {desc_preview}\n'
-            content += f'\nExecution order: {dag_result.execution_order}'
-
-            messages.append(Message(role='assistant', content=content))
+        if not has_tools or not hasattr(self.config.tools, 'code_executor'):
+            warnings.append(
+                "code_executor (python, shell execution) - needed for "
+                "running skill scripts")
 
-        return messages
+        if warnings:
+            logger.warning(
+                "Skills are configured but the following recommended tools "
+                "are not enabled. Skills that depend on these tools may not "
+                "work correctly:\n"
+                + "\n".join(f"  - {w}" for w in warnings)
+                + "\nAdd them to your agent config under 'tools:' to enable."
+            )
 
     def register_callback(self, callback: Callback):
         """
@@ -633,6 +457,13 @@ async def create_messages(
                     content=self.system or LLMAgent.DEFAULT_SYSTEM),
                 Message(role='user', content=messages or self.query),
             ]
+
+        # Inject skill prompt section into system message
+        if self._skill_injector:
+            skill_section = self._skill_injector.build_skill_prompt_section()
+            if skill_section:
+                messages[0].content += "\n\n" + skill_section
+
         return messages
 
     async def do_rag(self, messages: List[Message]):
@@ -672,64 +503,6 @@ async def do_rag(self, messages: List[Message]):
                     f'Relevant context retrieved from codebase search:\n\n{context}\n\n'
                     f'User question: {query}')
 
-    async def do_skill(self,
-                       messages: List[Message]) -> Optional[List[Message]]:
-        """
-        Process skill-related query if applicable.
-
-        Analyzes the user query, determines if skills should be used,
-        and executes the skill pipeline if appropriate.
-
-        Args:
-            messages: Normalized message list with system and user messages
-
-        Returns:
-            Updated messages with skill results if successful and should return,
-            None if no skill processing or fallback to standard agent
-        """
-        # Extract user query from normalized messages
-        query = (
-            messages[1].content
-            if len(messages) > 1 and messages[1].role == 'user' else None)
-
-        if not query:
-            return None
-
-        # Check if skills should be used for this query
-        if not await self.should_use_skills(query):
-            return None
-
-        logger.info('Query detected as skill-related, using skill processing.')
-        self._skill_mode_active = True
-
-        try:
-            skills_config = self._get_skills_config()
-            auto_execute = (
-                getattr(skills_config, 'auto_execute', True)
-                if skills_config else True)
-
-            if auto_execute:
-                dag_result = await self.execute_skills(query)
-            else:
-                dag_result = await self.get_skill_dag(query)
-
-            if dag_result:
-                skill_messages = self._format_skill_result_as_messages(
-                    dag_result)
-                for msg in skill_messages:
-                    messages.append(msg)
-                return messages
-
-            # dag_result is None/empty, fallback to standard agent
-            self._skill_mode_active = False
-            return None
-
-        except Exception as e:
-            logger.warning(
-                f'Skill execution failed: {e}, falling back to standard agent')
-            self._skill_mode_active = False
-            return None
-
     async def load_memory(self):
         """Initialize and append memory tool instances based on the configuration provided in the global config.
 
@@ -1091,16 +864,14 @@ def save_history(self, messages: List[Message], **kwargs):
 
     async def run_loop(self, messages: Union[List[Message], str],
                        **kwargs) -> AsyncGenerator[Any, Any]:
-        """
-        Run the agent, mainly contains a llm calling and tool calling loop.
+        """Run the agent loop (LLM generation + tool calling).
 
-        If skills are configured, skill-related queries will be automatically routed to skill execution.
+        Skills, when configured, are exposed as standard tools
+        (skills_list, skill_view, skill_manage) and injected into
+        the system prompt—no special routing needed.
 
         Args:
-            messages (Union[List[Message], str]): Input data for the agent. Can be a raw string prompt,
-                                               or a list of previous interaction messages.
-        Returns:
-            List[Message]: A list of message objects representing the agent's response or interaction history.
+            messages: Input prompt string or list of Message objects.
         """
         try:
             self.max_chat_round = getattr(self.config, 'max_chat_round',
@@ -1109,6 +880,7 @@ async def run_loop(self, messages: Union[List[Message], str],
             self.prepare_llm()
             self.prepare_runtime()
             await self.prepare_tools()
+            await self.prepare_skills()
             await self.load_memory()
             await self.prepare_rag()
             await self.prepare_knowledge_search()
@@ -1121,19 +893,7 @@ async def run_loop(self, messages: Union[List[Message], str],
             self.config, self.runtime, messages = self.read_history(messages)
 
             if self.runtime.round == 0:
-                # New task: create standardized messages first
                 messages = await self.create_messages(messages)
-
-                # Try skill processing first
-                skill_result = await self.do_skill(messages)
-                if skill_result is not None:
-                    await self.on_task_begin(skill_result)
-                    yield skill_result
-                    await self.on_task_end(skill_result)
-                    await self.cleanup_tools()
-                    return
-
-                # Standard processing continues
                 await self.do_rag(messages)
                 await self.on_task_begin(messages)
 
diff --git a/ms_agent/skill/__init__.py b/ms_agent/skill/__init__.py
index 611082129..84046a936 100644
--- a/ms_agent/skill/__init__.py
+++ b/ms_agent/skill/__init__.py
@@ -1,8 +1,21 @@
 # Copyright (c) ModelScope Contributors. All rights reserved.
-from .auto_skills import AutoSkills, DAGExecutionResult, SkillDAGResult
+from .catalog import SkillCatalog
+from .loader import SkillLoader, load_skills
+from .prompt_injector import SkillPromptInjector
+from .schema import SkillFile, SkillSchema, SkillSchemaParser
+from .skill_tools import SkillToolSet
+from .sources import SkillSource, SkillSourceType, parse_skill_source
 
 __all__ = [
-    'AutoSkills',
-    'SkillDAGResult',
-    'DAGExecutionResult',
+    'SkillSchema',
+    'SkillSchemaParser',
+    'SkillFile',
+    'SkillLoader',
+    'load_skills',
+    'SkillSource',
+    'SkillSourceType',
+    'parse_skill_source',
+    'SkillCatalog',
+    'SkillPromptInjector',
+    'SkillToolSet',
 ]
diff --git a/ms_agent/skill/auto_skills.py b/ms_agent/skill/auto_skills.py
deleted file mode 100644
index 170c49c91..000000000
--- a/ms_agent/skill/auto_skills.py
+++ /dev/null
@@ -1,1908 +0,0 @@
-# flake8: noqa
-# isort: skip_file
-# yapf: disable
-import asyncio
-import logging
-import os
-import re
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
-
-import json
-from ms_agent.llm import LLM
-from ms_agent.llm.utils import Message
-from ms_agent.retriever.hybrid_retriever import HybridRetriever
-from ms_agent.skill.container import (ExecutionInput, ExecutionOutput,
-                                      ExecutorType, SkillContainer)
-from ms_agent.skill.loader import load_skills
-from ms_agent.skill.prompts import (PROMPT_ANALYZE_EXECUTION_ERROR,
-                                    PROMPT_ANALYZE_QUERY_FOR_SKILLS,
-                                    PROMPT_BUILD_SKILLS_DAG,
-                                    PROMPT_DIRECT_SELECT_SKILLS,
-                                    PROMPT_FILTER_SKILLS_DEEP,
-                                    PROMPT_FILTER_SKILLS_FAST,
-                                    PROMPT_SKILL_ANALYSIS_PLAN,
-                                    PROMPT_SKILL_EXECUTION_COMMAND)
-from ms_agent.skill.schema import SkillContext, SkillExecutionPlan, SkillSchema
-from ms_agent.utils.logger import get_logger
-
-logger = get_logger()
-
-
-def _configure_logger_to_dir(log_dir: Path) -> None:
-    """
-    Configure the logger to output to a specific directory.
-
-    Args:
-        log_dir: Directory path for log files.
-    """
-    log_dir.mkdir(parents=True, exist_ok=True)
-    log_file = log_dir / 'ms_agent.log'
-
-    # Get current log level from environment
-    log_level_str = os.getenv('LOG_LEVEL', 'INFO').upper()
-    log_level = getattr(logging, log_level_str, logging.INFO)
-
-    # Update logger level to respect current LOG_LEVEL env var
-    logger.setLevel(log_level)
-    for handler in logger.handlers:
-        handler.setLevel(log_level)
-
-    # Check if file handler for this path already exists
-    for handler in logger.handlers:
-        if isinstance(handler, logging.FileHandler):
-            if Path(handler.baseFilename).resolve() == log_file.resolve():
-                return  # Already configured
-
-    # Remove existing file handlers and add new one
-    for handler in logger.handlers[:]:
-        if isinstance(handler, logging.FileHandler):
-            logger.removeHandler(handler)
-
-    file_handler = logging.FileHandler(str(log_file), mode='a')
-    file_handler.setFormatter(logging.Formatter('[%(levelname)s:%(name)s] %(message)s'))
-    file_handler.setLevel(log_level)
-    logger.addHandler(file_handler)
-    logger.info(f'Logger configured to output to: {log_file}')
-
-
-@dataclass
-class SkillExecutionResult:
-    """
-    Result of executing a single skill.
-
-    Attributes:
-        skill_id: Identifier of the executed skill.
-        success: Whether execution was successful.
-        output: ExecutionOutput from container.
-        error: Error message if execution failed.
-    """
-    skill_id: str
-    success: bool = False
-    output: Optional[ExecutionOutput] = None
-    error: Optional[str] = None
-
-
-@dataclass
-class DAGExecutionResult:
-    """
-    Result of executing the entire skill DAG.
-
-    Attributes:
-        success: Whether all skills executed successfully.
-        results: Dict mapping skill_id to SkillExecutionResult.
-        execution_order: Actual execution order (with parallel groups).
-        total_duration_ms: Total execution duration in milliseconds.
-    """
-    success: bool = False
-    results: Dict[str, SkillExecutionResult] = field(default_factory=dict)
-    execution_order: List[Union[str, List[str]]] = field(default_factory=list)
-    total_duration_ms: float = 0.0
-
-    def get_skill_output(self, skill_id: str) -> Optional[ExecutionOutput]:
-        """Get output from a specific skill execution."""
-        result = self.results.get(skill_id)
-        return result.output if result else None
-
-
-class SkillAnalyzer:
-    """
-    Progressive skill analyzer for incremental context loading.
-
-    Implements two-phase analysis:
-    1. Plan Phase: Analyze skill metadata + content to create execution plan
-    2. Load Phase: Load only required resources based on plan
-    """
-
-    def __init__(self, llm: 'LLM'):
-        """
-        Initialize skill analyzer.
-
-        Args:
-            llm: LLM instance for analysis.
-        """
-        self.llm = llm
-
-    def _llm_generate(self, prompt: str) -> str:
-        """Generate LLM response from prompt."""
-        from ms_agent.llm.utils import Message
-        messages = [Message(role='user', content=prompt)]
-        logger.debug(f'Input msg to LLM in SkillAnalyzer: {messages}')
-        response = self.llm.generate(messages=messages)
-        res = response.content if hasattr(response,
-                                           'content') else str(response)
-        logger.debug(f'LLM response in SkillAnalyzer: {res}')
-        return res
-
-    def _parse_json_response(self, response: str) -> Dict[str, Any]:
-        """Parse JSON from LLM response with robust extraction."""
-        # Remove markdown code blocks if present
-        response = re.sub(r'```json\s*', '', response)
-        response = re.sub(r'```\s*$', '', response)
-        response = response.strip()
-
-        # Try direct parsing first
-        try:
-            return json.loads(response)
-        except json.JSONDecodeError:
-            pass
-
-        # Try to extract JSON object from response
-        try:
-            # Find the outermost JSON object
-            start = response.find('{')
-            if start != -1:
-                # Find matching closing brace
-                depth = 0
-                for i, char in enumerate(response[start:], start):
-                    if char == '{':
-                        depth += 1
-                    elif char == '}':
-                        depth -= 1
-                        if depth == 0:
-                            json_str = response[start:i + 1]
-                            return json.loads(json_str)
-        except json.JSONDecodeError:
-            pass
-
-        # Try regex extraction as fallback
-        try:
-            json_match = re.search(r'\{[\s\S]*\}', response)
-            if json_match:
-                return json.loads(json_match.group())
-        except json.JSONDecodeError:
-            pass
-
-        logger.warning(f'Failed to parse JSON: {response[:500]}...')
-        return {}
-
-    def analyze_skill_plan(self,
-                           skill: SkillSchema,
-                           query: str,
-                           root_path: Path = None) -> SkillContext:
-        """
-        Phase 1: Analyze skill and create execution plan.
-
-        Only loads skill metadata and content (SKILL.md), not scripts/resources.
-
-        Args:
-            skill: SkillSchema to analyze.
-            query: User's query to fulfill.
-            root_path: Root path for skill context.
-
-        Returns:
-            SkillContext with execution plan (resources not yet loaded).
-        """
-        # Create context with lazy loading
-        context = SkillContext(
-            skill=skill,
-            query=query,
-            root_path=root_path or skill.skill_path.parent)
-
-        # Build prompt with skill overview (not full content)
-        prompt = PROMPT_SKILL_ANALYSIS_PLAN.format(
-            query=query,
-            skill_id=skill.skill_id,
-            skill_name=skill.name,
-            skill_description=skill.description,
-            skill_content=skill.content[:4000] if skill.content else '',
-            scripts_list=', '.join(context.get_scripts_list()) or 'None',
-            references_list=', '.join(context.get_references_list()) or 'None',
-            resources_list=', '.join(context.get_resources_list()) or 'None')
-
-        response = self._llm_generate(prompt)
-        parsed = self._parse_json_response(response)
-
-        # Build execution plan
-        plan = SkillExecutionPlan(
-            can_handle=parsed.get('can_handle', False),
-            plan_summary=parsed.get('plan_summary', ''),
-            steps=parsed.get('steps', []),
-            required_scripts=parsed.get('required_scripts', []),
-            required_references=parsed.get('required_references', []),
-            required_resources=parsed.get('required_resources', []),
-            required_packages=parsed.get('required_packages', []),
-            parameters=parsed.get('parameters', {}),
-            reasoning=parsed.get('reasoning', ''))
-
-        context.plan = plan
-        context.spec.plan = plan.plan_summary
-
-        logger.info(
-            f'Skill analysis plan: can_handle={plan.can_handle}, '
-            f'scripts={plan.required_scripts}, refs={plan.required_references}, '
-            f'packages={plan.required_packages}'
-        )
-
-        return context
-
-    def load_skill_resources(self, context: SkillContext) -> SkillContext:
-        """
-        Phase 2: Load resources based on execution plan.
-
-        Args:
-            context: SkillContext with plan from Phase 1.
-
-        Returns:
-            SkillContext with loaded resources.
-        """
-        if not context.plan or not context.plan.can_handle:
-            logger.warning('No valid plan, skipping resource loading')
-            return context
-
-        context.load_from_plan()
-        logger.info(
-            f'Loaded resources: scripts={len(context.scripts)}, '
-            f'refs={len(context.references)}, res={len(context.resources)}')
-
-        return context
-
-    def generate_execution_commands(
-            self, context: SkillContext) -> List[Dict[str, Any]]:
-        """
-        Generate execution commands from loaded context.
-
-        Args:
-            context: SkillContext with loaded resources.
-
-        Returns:
-            List of execution command dictionaries.
-        """
-        if not context.plan:
-            return []
-
-        prompt = PROMPT_SKILL_EXECUTION_COMMAND.format(
-            query=context.query,
-            skill_id=context.skill.skill_id,
-            execution_plan=json.dumps(
-                {
-                    'plan_summary': context.plan.plan_summary,
-                    'steps': context.plan.steps,
-                    'parameters': context.plan.parameters,
-                },
-                indent=2),
-            scripts_content=context.get_loaded_scripts_content(),
-            references_content=context.get_loaded_references_content()[:2000],
-            resources_content=context.get_loaded_resources_content()[:2000])
-
-        response = self._llm_generate(prompt)
-        parsed = self._parse_json_response(response)
-
-        commands = parsed.get('commands', [])
-
-        # Fallback: if no commands generated, try to use loaded scripts directly
-        if not commands:
-            # If no scripts loaded yet, try to load all available scripts
-            if not context.scripts and context.skill.scripts:
-                logger.info(
-                    f'Loading all scripts as fallback: {[s.name for s in context.skill.scripts]}')
-                context.load_scripts()  # Load all scripts
-
-            if context.scripts:
-                logger.warning(
-                    f'No commands generated, using {len(context.scripts)} loaded scripts as fallback')
-                # context.scripts is List[Dict] with keys: name, file, path, abs_path, content
-                for script_info in context.scripts:
-                    script_name = script_info.get('name', '')
-                    script_content = script_info.get('content', '')
-                    if script_name.endswith('.py') and script_content:
-                        commands.append({
-                            'type': 'python_code',
-                            'code': script_content,
-                            'requirements': context.plan.required_packages if context.plan else []
-                        })
-                    elif script_name.endswith('.sh') and script_content:
-                        commands.append({
-                            'type': 'shell',
-                            'code': script_content
-                        })
-
-        context.spec.tasks = json.dumps(commands, indent=2)
-
-        return commands
-
-    async def analyze_and_prepare(
-            self,
-            skill: SkillSchema,
-            query: str,
-            root_path: Path = None
-    ) -> Tuple[SkillContext, List[Dict[str, Any]]]:
-        """
-        Complete progressive analysis: plan -> load -> generate commands.
-
-        Args:
-            skill: SkillSchema to analyze.
-            query: User's query.
-            root_path: Root path for context.
-
-        Returns:
-            Tuple of (SkillContext, execution_commands).
-        """
-        # Phase 1: Create plan
-        context = await asyncio.to_thread(self.analyze_skill_plan, skill,
-                                          query, root_path)
-
-        if not context.plan or not context.plan.can_handle:
-            return context, []
-
-        # Phase 2: Load resources
-        await asyncio.to_thread(self.load_skill_resources, context)
-
-        # Phase 3: Generate commands
-        commands = await asyncio.to_thread(self.generate_execution_commands,
-                                           context)
-
-        return context, commands
-
-
-@dataclass
-class SkillDAGResult:
-    """
-    Result of AutoSkills run containing the skill execution DAG.
-
-    Attributes:
-        dag: Adjacency list representation of skill dependencies.
-        execution_order: Topologically sorted list of skill_ids (sublists = parallel).
-        selected_skills: Dict of selected SkillSchema objects.
-        is_complete: Whether the skills are sufficient for the task.
-        clarification: Optional clarification question if skills are insufficient.
-        chat_response: Direct response if no skills needed (chat-only mode).
-        execution_result: Result of DAG execution (populated after execute_dag).
-    """
-    dag: Dict[str, List[str]] = field(default_factory=dict)
-    execution_order: List[Union[str, List[str]]] = field(default_factory=list)
-    selected_skills: Dict[str, SkillSchema] = field(default_factory=dict)
-    is_complete: bool = False
-    clarification: Optional[str] = None
-    chat_response: Optional[str] = None
-    execution_result: Optional[DAGExecutionResult] = None
-
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert SkillDAGResult to dictionary."""
-        return {
-            'dag':
-            self.dag,
-            'execution_order':
-            self.execution_order,
-            'selected_skills':
-            {k: v.__dict__
-             for k, v in self.selected_skills.items()},
-            'is_complete':
-            self.is_complete,
-            'clarification':
-            self.clarification,
-            'chat_response':
-            self.chat_response,
-            'execution_result':
-            self.execution_result.__dict__ if self.execution_result else None,
-        }
-
-
-class DAGExecutor:
-    """
-    Executor for skill DAG with dependency-aware parallel execution.
-
-    Handles execution order parsing, input/output linking between skills,
-    and parallel execution of independent skills.
-    Supports progressive skill analysis for incremental context loading.
-    """
-
-    def __init__(self,
-                 container: SkillContainer,
-                 skills: Dict[str, SkillSchema],
-                 workspace_dir: Optional[Path] = None,
-                 llm: 'LLM' = None,
-                 enable_progressive_analysis: bool = True,
-                 enable_self_reflection: bool = True,
-                 max_retries: int = 3):
-        """
-        Initialize DAG executor.
-
-        Args:
-            container: SkillContainer for executing skills.
-            skills: Dict of skill_id to SkillSchema.
-            workspace_dir: Optional workspace directory for skill execution.
-            llm: LLM instance for progressive skill analysis.
-            enable_progressive_analysis: Whether to use progressive analysis.
-            enable_self_reflection: Whether to analyze errors and retry on failure.
-            max_retries: Maximum retry attempts for failed executions.
-        """
-        self.container = container
-        self.skills = skills
-        self.workspace_dir = workspace_dir or container.workspace_dir
-        self.llm = llm
-        self.enable_progressive_analysis = enable_progressive_analysis and llm is not None
-        self.enable_self_reflection = enable_self_reflection and llm is not None
-        self.max_retries = max_retries
-
-        # Skill analyzer for progressive analysis
-        self._analyzer: Optional[SkillAnalyzer] = None
-        if self.enable_progressive_analysis:
-            self._analyzer = SkillAnalyzer(llm)
-
-        # Execution state: stores outputs keyed by skill_id
-        self._outputs: Dict[str, ExecutionOutput] = {}
-
-        # Skill contexts from progressive analysis
-        self._contexts: Dict[str, SkillContext] = {}
-
-        # Track execution attempts for retry logging
-        self._execution_attempts: Dict[str, int] = {}
-
-    def _get_skill_dependencies(self, skill_id: str,
-                                dag: Dict[str, List[str]]) -> List[str]:
-        """
-        Get direct dependencies of a skill from the DAG.
-
-        Args:
-            skill_id: The skill to get dependencies for.
-            dag: Adjacency list where dag[A] = [B, C] means A depends on B, C.
-
-        Returns:
-            List of skill_ids that this skill depends on.
-        """
-        return dag.get(skill_id, [])
-
-    def _build_execution_input(
-            self,
-            skill_id: str,
-            dag: Dict[str, List[str]],
-            execution_input: Optional[ExecutionInput] = None) -> ExecutionInput:
-        """
-        Build execution input for a skill, linking outputs from dependencies.
-
-        Args:
-            skill_id: The skill to build input for.
-            dag: Skill dependency DAG.
-            execution_input: Optional user-provided input.
-
-        Returns:
-            ExecutionInput with linked dependency outputs.
-        """
-        base_input = execution_input or ExecutionInput()
-
-        # Get outputs from upstream dependencies
-        dependencies = self._get_skill_dependencies(skill_id, dag)
-        upstream_data: Dict[str, Any] = {}
-
-        for dep_id in dependencies:
-            if dep_id in self._outputs:
-                dep_output = self._outputs[dep_id]
-                # Pass stdout/return_value as upstream data
-                upstream_data[dep_id] = {
-                    'stdout': dep_output.stdout,
-                    'stderr': dep_output.stderr,
-                    'return_value': dep_output.return_value,
-                    'exit_code': dep_output.exit_code,
-                    'output_files':
-                    {k: str(v)
-                     for k, v in dep_output.output_files.items()},
-                }
-
-        # Inject upstream data into environment variables as JSON
-        env_vars = base_input.env_vars.copy()
-        if upstream_data:
-            env_vars['UPSTREAM_OUTPUTS'] = json.dumps(upstream_data)
-            # Also provide individual upstream references
-            for dep_id, data in upstream_data.items():
-                safe_key = dep_id.replace('-', '_').replace('.', '_').replace('@', '_').replace('/', '_').upper()
-                if data.get('stdout'):
-                    env_vars[f'UPSTREAM_{safe_key}_STDOUT'] = data[
-                        'stdout'][:4096]
-
-        return ExecutionInput(
-            args=base_input.args,
-            kwargs=base_input.kwargs,
-            env_vars=env_vars,
-            input_files=base_input.input_files,
-            stdin=base_input.stdin,
-            working_dir=base_input.working_dir,
-            requirements=base_input.requirements,
-        )
-
-    def _determine_executor_type(self, skill: SkillSchema) -> ExecutorType:
-        """
-        Determine the executor type based on skill scripts.
-
-        Args:
-            skill: SkillSchema to analyze.
-
-        Returns:
-            ExecutorType for the skill's primary script.
-        """
-        if not skill.scripts:
-            return ExecutorType.PYTHON_CODE
-
-        # Check first script's extension
-        primary_script = skill.scripts[0]
-        ext = primary_script.type.lower()
-
-        if ext in ['.py']:
-            return ExecutorType.PYTHON_SCRIPT
-        elif ext in ['.sh', '.bash']:
-            return ExecutorType.SHELL
-        elif ext in ['.js', '.mjs']:
-            return ExecutorType.JAVASCRIPT
-        else:
-            return ExecutorType.PYTHON_CODE
-
-    async def _execute_single_skill(
-            self,
-            skill_id: str,
-            dag: Dict[str, List[str]],
-            execution_input: Optional[ExecutionInput] = None,
-            query: str = '') -> SkillExecutionResult:
-        """
-        Execute a single skill with dependency-linked input.
-
-        Uses progressive analysis if enabled:
-        1. Analyze skill to create execution plan
-        2. Load only required resources
-        3. Generate and execute commands
-
-        Args:
-            skill_id: ID of the skill to execute.
-            dag: Skill dependency DAG.
-            execution_input: Optional user-provided input.
-            query: User query for progressive analysis.
-
-        Returns:
-            SkillExecutionResult with execution outcome.
-        """
-        skill = self.skills.get(skill_id)
-        if not skill:
-            return SkillExecutionResult(
-                skill_id=skill_id,
-                success=False,
-                error=f'Skill not found: {skill_id}')
-
-        try:
-            # Build base input with upstream outputs
-            exec_input = self._build_execution_input(skill_id, dag, execution_input)
-
-            # Use progressive analysis if enabled
-            if self.enable_progressive_analysis and self._analyzer:
-                return await self._execute_with_progressive_analysis(
-                    skill, skill_id, exec_input, query)
-
-            # Fallback: direct execution without progressive analysis
-            return await self._execute_direct(skill, skill_id, exec_input)
-
-        except Exception as e:
-            logger.error(f'Skill execution failed for {skill_id}: {e}')
-            return SkillExecutionResult(
-                skill_id=skill_id, success=False, error=str(e))
-
-    async def _execute_with_progressive_analysis(
-            self, skill: SkillSchema, skill_id: str,
-            exec_input: ExecutionInput, query: str) -> SkillExecutionResult:
-        """
-        Execute skill using progressive analysis.
-
-        Args:
-            skill: SkillSchema to execute.
-            skill_id: Skill identifier.
-            exec_input: Execution input with upstream data.
-            query: User query for context.
-
-        Returns:
-            SkillExecutionResult with execution outcome.
-        """
-        # Phase 1 & 2: Analyze and load resources
-        # Use skill's directory as root_path for proper file resolution
-        context, commands = await self._analyzer.analyze_and_prepare(
-            skill, query, skill.skill_path)
-
-        # Store context for reference
-        self._contexts[skill_id] = context
-
-        # Mount skill directory in container for sandbox access
-        self.container.mount_skill_directory(skill_id, skill.skill_path)
-
-        if not context.plan or not context.plan.can_handle:
-            return SkillExecutionResult(
-                skill_id=skill_id,
-                success=False,
-                error=
-                f'Skill cannot handle query: {context.plan.reasoning if context.plan else "No plan"}'
-            )
-
-        if not commands:
-            return SkillExecutionResult(
-                skill_id=skill_id,
-                success=False,
-                error='No execution commands generated')
-
-        # Phase 3: Execute commands with retry support for all types
-        outputs: List[ExecutionOutput] = []
-        for cmd in commands:
-            cmd_type = cmd.get('type', 'python_code')
-
-            # Use retry mechanism for all command types
-            if self.enable_self_reflection:
-                output = await self._execute_command_with_retry(
-                    cmd=cmd,
-                    cmd_type=cmd_type,
-                    skill_id=skill_id,
-                    exec_input=exec_input,
-                    context=context,
-                    skill=skill,
-                    query=query)
-            else:
-                # Self-reflection disabled - execute without retry
-                output = await self._execute_command(cmd, cmd_type, skill_id,
-                                                     exec_input, context)
-            outputs.append(output)
-
-            if output.exit_code != 0:
-                # Stop on first failure (after retries exhausted)
-                break
-
-        # Merge outputs
-        final_output = self._merge_outputs(outputs)
-
-        # Store output for downstream skills
-        self._outputs[skill_id] = final_output
-        self.container.spec.link_upstream(skill_id, final_output)
-
-        return SkillExecutionResult(
-            skill_id=skill_id,
-            success=(final_output.exit_code == 0),
-            output=final_output,
-            error=final_output.stderr if final_output.exit_code != 0 else None)
-
-    async def _execute_direct(
-            self, skill: SkillSchema, skill_id: str,
-            exec_input: ExecutionInput) -> SkillExecutionResult:
-        """
-        Execute skill directly without progressive analysis.
-
-        Args:
-            skill: SkillSchema to execute.
-            skill_id: Skill identifier.
-            exec_input: Execution input.
-
-        Returns:
-            SkillExecutionResult with execution outcome.
-        """
-        # Mount skill directory for sandbox access
-        self.container.mount_skill_directory(skill_id, skill.skill_path)
-
-        executor_type = self._determine_executor_type(skill)
-
-        if skill.scripts:
-            script_path = skill.scripts[0].path
-            output = await self.container.execute(
-                executor_type=executor_type,
-                skill_id=skill_id,
-                script_path=script_path,
-                input_spec=exec_input)
-        else:
-            output = await self.container.execute_python_code(
-                code=skill.content or '# No executable content',
-                skill_id=skill_id,
-                input_spec=exec_input)
-
-        self._outputs[skill_id] = output
-        self.container.spec.link_upstream(skill_id, output)
-
-        return SkillExecutionResult(
-            skill_id=skill_id,
-            success=(output.exit_code == 0),
-            output=output,
-            error=output.stderr if output.exit_code != 0 else None)
-
-    async def _execute_command(self, cmd: Dict[str, Any], cmd_type: str,
-                               skill_id: str, exec_input: ExecutionInput,
-                               context: SkillContext) -> ExecutionOutput:
-        """
-        Execute a single command from progressive analysis.
-
-        Args:
-            cmd: Command dictionary.
-            cmd_type: Type of command (python_script, shell, etc.).
-            skill_id: Skill identifier.
-            exec_input: Base execution input.
-            context: SkillContext with loaded resources.
-
-        Returns:
-            ExecutionOutput from command execution.
-        """
-        # Merge parameters into input
-        params = cmd.get('parameters', {})
-        # Use skill directory as working directory for proper file access
-        working_dir = exec_input.working_dir or context.skill_dir
-
-        # Collect all requirements: from plan, command, and input
-        all_requirements = []
-        if context.plan and context.plan.required_packages:
-            all_requirements.extend(context.plan.required_packages)
-        all_requirements.extend(cmd.get('requirements', []))
-        all_requirements.extend(exec_input.requirements)
-        # Deduplicate while preserving order
-        seen = set()
-        unique_requirements = []
-        for req in all_requirements:
-            if req not in seen:
-                seen.add(req)
-                unique_requirements.append(req)
-
-        merged_input = ExecutionInput(
-            args=exec_input.args + list(params.values()),
-            kwargs={
-                **exec_input.kwargs,
-                **params
-            },
-            env_vars={
-                **exec_input.env_vars,
-                'SKILL_DIR': str(context.skill_dir),
-                **{k.upper(): str(v)
-                   for k, v in params.items()}
-            },
-            input_files=exec_input.input_files,
-            stdin=exec_input.stdin,
-            working_dir=working_dir,
-            requirements=unique_requirements)
-
-        if cmd_type == 'python_script':
-            script_path = cmd.get('path')
-            if script_path:
-                # Resolve path relative to skill directory
-                full_path = context.skill_dir / script_path
-                if not full_path.exists():
-                    full_path = context.root_path / script_path
-                return await self.container.execute_python_script(
-                    script_path=full_path,
-                    skill_id=skill_id,
-                    input_spec=merged_input)
-            else:
-                code = cmd.get('code', '')
-                return await self.container.execute_python_code(
-                    code=code, skill_id=skill_id, input_spec=merged_input)
-
-        elif cmd_type == 'python_code':
-            code = cmd.get('code', '')
-            return await self.container.execute_python_code(
-                code=code, skill_id=skill_id, input_spec=merged_input)
-
-        elif cmd_type == 'shell':
-            command = cmd.get('code') or cmd.get('command', '')
-            return await self.container.execute_shell(
-                command=command, skill_id=skill_id, input_spec=merged_input)
-
-        elif cmd_type == 'javascript':
-            code = cmd.get('code', '')
-            return await self.container.execute_javascript(
-                code=code, skill_id=skill_id, input_spec=merged_input)
-
-        else:
-            # Default to python code
-            code = cmd.get('code', '')
-            return await self.container.execute_python_code(
-                code=code, skill_id=skill_id, input_spec=merged_input)
-
-    async def _execute_command_with_retry(
-            self, cmd: Dict[str, Any], cmd_type: str,
-            skill_id: str, exec_input: ExecutionInput,
-            context: SkillContext, skill: SkillSchema,
-            query: str) -> ExecutionOutput:
-        """
-        Execute a command with retry logic for all execution types.
-
-        Always retries up to max_retries times. Uses LLM analysis to improve
-        the fix between retries when self-reflection is enabled.
-
-        Args:
-            cmd: Command dictionary.
-            cmd_type: Type of command.
-            skill_id: Skill identifier.
-            exec_input: Base execution input.
-            context: SkillContext.
-            skill: SkillSchema for error analysis.
-            query: User query for context.
-
-        Returns:
-            ExecutionOutput from command execution.
-        """
-        current_cmd = cmd.copy()
-        last_output = None
-
-        for attempt in range(1, self.max_retries + 1):
-            self._execution_attempts[skill_id] = attempt
-            logger.info(f'[{skill_id}] Execution attempt {attempt}/{self.max_retries}')
-
-            # Execute the command
-            output = await self._execute_command(
-                current_cmd, cmd_type, skill_id, exec_input, context)
-            last_output = output
-
-            # Check if successful
-            if output.exit_code == 0:
-                if attempt > 1:
-                    logger.info(
-                        f'[{skill_id}] Execution succeeded after {attempt} attempts')
-                return output
-
-            # Collect error info
-            error_msg = output.stderr[:500] if output.stderr else 'Unknown error'
-            logger.warning(f'[{skill_id}] Attempt {attempt} failed: {error_msg[:200]}')
-
-            # Last attempt - no need to analyze
-            if attempt >= self.max_retries:
-                logger.warning(
-                    f'[{skill_id}] Max retries ({self.max_retries}) reached')
-                continue
-
-            # Try to analyze and fix if self-reflection is enabled
-            if self.enable_self_reflection and cmd_type in ('python_code', 'python_script'):
-                code = current_cmd.get('code', '')
-                if code:
-                    logger.info(f'[{skill_id}] Analyzing error for retry...')
-                    analysis = self._analyze_execution_error(
-                        skill=skill,
-                        failed_code=code,
-                        output=output,
-                        query=query,
-                        attempt=attempt)
-
-                    error_info = analysis.get('error_analysis', {})
-                    is_fixable = error_info.get('is_fixable', False)
-                    fixed_code = analysis.get('fixed_code')
-                    additional_reqs = analysis.get('additional_requirements', [])
-
-                    logger.info(
-                        f'[{skill_id}] Error analysis: type={error_info.get("error_type")}, '
-                        f'fixable={is_fixable}')
-
-                    # Apply fix if available
-                    if is_fixable and fixed_code:
-                        current_cmd = current_cmd.copy()
-                        current_cmd['code'] = fixed_code
-                        logger.info(f'[{skill_id}] Applying fix')
-
-                    # Add additional requirements
-                    if additional_reqs:
-                        logger.info(f'[{skill_id}] Adding requirements: {additional_reqs}')
-                        exec_input = ExecutionInput(
-                            args=exec_input.args,
-                            kwargs=exec_input.kwargs,
-                            env_vars=exec_input.env_vars,
-                            input_files=exec_input.input_files,
-                            working_dir=exec_input.working_dir,
-                            requirements=list(set(exec_input.requirements + additional_reqs)))
-            else:
-                logger.info(f'[{skill_id}] Retrying without code modification')
-
-        logger.error(f'[{skill_id}] All {self.max_retries} attempts failed')
-        return last_output
-
-    def _merge_outputs(self,
-                       outputs: List[ExecutionOutput]) -> ExecutionOutput:
-        """Merge multiple execution outputs into one."""
-        if not outputs:
-            return ExecutionOutput()
-        if len(outputs) == 1:
-            return outputs[0]
-
-        # Merge all outputs
-        merged_stdout = '\n'.join(o.stdout for o in outputs if o.stdout)
-        merged_stderr = '\n'.join(o.stderr for o in outputs if o.stderr)
-        final_exit_code = next(
-            (o.exit_code for o in outputs if o.exit_code != 0), 0)
-        total_duration = sum(o.duration_ms for o in outputs)
-
-        # Merge output files
-        merged_files = {}
-        for o in outputs:
-            merged_files.update(o.output_files)
-
-        return ExecutionOutput(
-            stdout=merged_stdout,
-            stderr=merged_stderr,
-            exit_code=final_exit_code,
-            output_files=merged_files,
-            duration_ms=total_duration)
-
-    def _analyze_execution_error(
-            self,
-            skill: SkillSchema,
-            failed_code: str,
-            output: ExecutionOutput,
-            query: str,
-            attempt: int) -> Dict[str, Any]:
-        """
-        Analyze failed execution and generate a fix using LLM.
-
-        Args:
-            skill: The skill that failed.
-            failed_code: The code that failed.
-            output: ExecutionOutput with error details.
-            query: Original user query.
-            attempt: Current retry attempt number.
-
-        Returns:
-            Dict with error analysis and fixed code.
-        """
-        if not self.llm:
-            return {'error_analysis': {'is_fixable': False},
-                    'fixed_code': None}
-
-        prompt = PROMPT_ANALYZE_EXECUTION_ERROR.format(
-            query=query,
-            skill_id=skill.skill_id,
-            skill_name=skill.name,
-            failed_code=failed_code[:8000],  # Limit code length
-            stderr=output.stderr[:3000] if output.stderr else '',
-            stdout=output.stdout[:1000] if output.stdout else '',
-            attempt=attempt,
-            max_attempts=self.max_retries)
-
-        try:
-            response = self.llm.generate(
-                messages=[Message(role='user', content=prompt)])
-            # Parse JSON response - handle different response formats
-            response_text = (response.content if hasattr(response, 'content')
-                             else str(response)).strip()
-            # Extract JSON from response
-            json_match = re.search(r'\{[\s\S]*\}', response_text)
-            if json_match:
-                return json.loads(json_match.group())
-        except Exception as e:
-            logger.warning(f'Error analyzing execution failure: {e}')
-
-        return {'error_analysis': {'is_fixable': False}, 'fixed_code': None}
-
-    async def _execute_parallel_group(
-            self,
-            skill_ids: List[str],
-            dag: Dict[str, List[str]],
-            execution_input: Optional[ExecutionInput] = None,
-            query: str = '') -> List[SkillExecutionResult]:
-        """
-        Execute a group of skills in parallel.
-
-        Args:
-            skill_ids: List of skill_ids to execute concurrently.
-            dag: Skill dependency DAG.
-            execution_input: Optional user-provided input.
-            query: User query for progressive analysis.
-
-        Returns:
-            List of SkillExecutionResult for each skill.
-        """
-        tasks = [
-            self._execute_single_skill(sid, dag, execution_input, query)
-            for sid in skill_ids
-        ]
-        return await asyncio.gather(*tasks)
-
-    async def execute(self,
-                      dag: Dict[str, List[str]],
-                      execution_order: List[Union[str, List[str]]],
-                      execution_input: Optional[ExecutionInput] = None,
-                      stop_on_failure: bool = True,
-                      query: str = '') -> DAGExecutionResult:
-        """
-        Execute the skill DAG according to execution order.
-
-        Execution order format: [skill1, skill2, [skill3, skill4], skill5, ...]
-        - Single string items are executed sequentially
-        - List items (sublists) are executed in parallel
-
-        Args:
-            dag: Skill dependency DAG (adjacency list).
-            execution_order: Ordered list with parallel groups as sublists.
-            execution_input: Optional initial input for all skills.
-            stop_on_failure: Whether to stop execution on first failure.
-            query: User query for progressive skill analysis.
-
-        Returns:
-            DAGExecutionResult with all execution outcomes.
-        """
-        import time
-        start_time = time.time()
-
-        results: Dict[str, SkillExecutionResult] = {}
-        actual_order: List[Union[str, List[str]]] = []
-        all_success = True
-
-        for item in execution_order:
-            if isinstance(item, list):
-                # Parallel execution group
-                group_results = await self._execute_parallel_group(
-                    item, dag, execution_input, query)
-                for res in group_results:
-                    results[res.skill_id] = res
-                    if not res.success:
-                        all_success = False
-                actual_order.append(item)
-
-                if not all_success and stop_on_failure:
-                    logger.warning(
-                        f'Stopping DAG execution due to failure in parallel group: {item}'
-                    )
-                    break
-            else:
-                # Sequential execution
-                result = await self._execute_single_skill(
-                    item, dag, execution_input, query)
-                results[result.skill_id] = result
-                actual_order.append(item)
-
-                if not result.success:
-                    all_success = False
-                    if stop_on_failure:
-                        logger.warning(
-                            f'Stopping DAG execution due to failure: {item}')
-                        break
-
-        total_duration = (time.time() - start_time) * 1000
-
-        return DAGExecutionResult(
-            success=all_success,
-            results=results,
-            execution_order=actual_order,
-            total_duration_ms=total_duration)
-
-    def get_skill_context(self, skill_id: str) -> Optional[SkillContext]:
-        """Get the skill context from progressive analysis."""
-        return self._contexts.get(skill_id)
-
-    def get_all_contexts(self) -> Dict[str, SkillContext]:
-        """Get all skill contexts from progressive analysis."""
-        return self._contexts.copy()
-
-    def get_executed_skill_ids(self) -> List[str]:
-        """Get list of skill_ids that have been executed with contexts."""
-        return list(self._contexts.keys())
-
-
-class AutoSkills:
-    """
-    Automatic skill retrieval and DAG construction for user queries.
-
-    Uses hybrid retrieval (dense + sparse) to find relevant skills,
-    with LLM-based analysis and reflection loop for completeness checking.
-    Supports DAG-based skill execution with dependency management.
-    """
-
-    def __init__(self,
-                 skills: Union[str, List[str], List[SkillSchema]],
-                 llm: LLM,
-                 enable_retrieve: Union[bool, None] = None,
-                 retrieve_args: Dict[str, Any] = None,
-                 max_candidate_skills: int = 10,
-                 max_retries: int = 3,
-                 work_dir: Optional[Union[str, Path]] = None,
-                 use_sandbox: bool = True,
-                 **kwargs):
-        """
-        Initialize AutoSkills with skills corpus and retriever.
-
-        Args:
-            skills: Path(s) to skill directories or list of SkillSchema.
-                Alternatively, single repo_id or list of repo_ids from ModelScope.
-                e.g. skills='ms-agent/claude_skills', refer to `https://modelscope.cn/models/ms-agent/claude_skills`
-            llm: LLM instance for query analysis and evaluation.
-            enable_retrieve: If True, use HybridRetriever for skill search.
-                If False, put all skills into LLM context for direct selection.
-                If None, enable search only if skills > 10 automatically.
-            retrieve_args: Additional arguments for HybridRetriever.
-                Attributes:
-                    top_k: Number of top results to retrieve per query.
-                    min_score: Minimum score threshold for retrieval.
-            max_candidate_skills: Maximum number of candidate skills to consider.
-            max_retries: Maximum retry attempts for failed executions for each skill.
-            work_dir: Working directory for skill execution.
-            use_sandbox: Whether to use Docker sandbox for execution.
-
-        Examples:
-            >>> from omegaconf import DictConfig
-            >>> from ms_agent.llm.openai_llm import OpenAI
-            >>> from ms_agent.skill.auto_skills import SkillDAGResult
-            >>> config = DictConfig(
-                {
-                    'llm': {
-                        'service': 'openai',
-                        'model': 'gpt-4',
-                        'openai_api_key': 'your-api-key',
-                        'openai_base_url': 'your-base-url'
-                        }
-                    }
-            >>> )
-            >>> llm_instance = OpenAI.from_config(config)
-            >>> auto_skills = AutoSkills(
-                skills='/path/to/skills',
-                llm=llm_instance,
-                )
-            >>> async def main():
-                    result: SkillDAGResult = await auto_skills.run(query='Analyze sales data and generate mock report for Nvidia Q4 2025 in PDF format.')
-                    print(result.execution_result)
-            >>> import asyncio
-            >>> asyncio.run(main())
-        """
-        # Dict of <skill_id, SkillSchema>
-        self.all_skills: Dict[str, SkillSchema] = load_skills(skills=skills)
-        logger.info(f'Loaded {len(self.all_skills)} skills from {skills}')
-
-        self.llm = llm
-        self.enable_retrieve = len(
-            self.all_skills) > 10 if enable_retrieve is None else enable_retrieve
-        retrieve_args = retrieve_args or {}
-        self.top_k = retrieve_args.get('top_k', 3)
-        self.min_score = retrieve_args.get('min_score', 0.8)
-        self.max_candidate_skills = max_candidate_skills
-        self.max_retries = max_retries
-        self.work_dir = Path(work_dir) if work_dir else None
-        self.use_sandbox = use_sandbox
-        self.kwargs = kwargs
-
-        if self.use_sandbox:
-            from ms_agent.utils.docker_utils import is_docker_daemon_running
-            if not is_docker_daemon_running():
-                raise RuntimeError(
-                    'Docker daemon is not running. Please start Docker to use sandbox mode.'
-                )
-
-        # Configure logger to output to work_dir/logs if work_dir is specified
-        if self.work_dir:
-            _configure_logger_to_dir(self.work_dir / 'logs')
-
-        # Build corpus and skill_id mapping
-        self.corpus: List[str] = []
-        self.corpus_to_skill_id: Dict[str, str] = {}
-        self._build_corpus()
-
-        # Initialize retriever only if search is enabled
-        self.retriever: Optional[HybridRetriever] = None
-        if self.enable_retrieve and self.corpus:
-            self.retriever = HybridRetriever(corpus=self.corpus, **kwargs)
-
-        # Container and executor (lazy initialization)
-        self._container: Optional[SkillContainer] = None
-        self._executor: Optional[DAGExecutor] = None
-
-    def _build_corpus(self):
-        """Build corpus from skills for retriever indexing."""
-        for skill_id, skill in self.all_skills.items():
-            # Concatenate skill_id, name, description as corpus document
-            doc = f'[{skill_id}] {skill.name}: {skill.description}'
-            self.corpus.append(doc)
-            self.corpus_to_skill_id[doc] = skill_id
-
-    def _extract_skill_id_from_doc(self, doc: str) -> Optional[str]:
-        """Extract skill_id from corpus document string."""
-        # First try direct lookup
-        if doc in self.corpus_to_skill_id:
-            return self.corpus_to_skill_id[doc]
-        # Fallback: extract from [skill_id] pattern
-        match = re.match(r'\[([^\]]+)\]', doc)
-        return match.group(1) if match else None
-
-    def _parse_json_response(self, response: str) -> Dict[str, Any]:
-        """Parse JSON from LLM response with robust extraction."""
-        # Remove markdown code blocks if present
-        response = re.sub(r'```json\s*', '', response)
-        response = re.sub(r'```\s*$', '', response)
-        response = response.strip()
-
-        # Try direct parsing first
-        try:
-            return json.loads(response)
-        except json.JSONDecodeError:
-            pass
-
-        # Try to extract JSON object from response
-        try:
-            # Find the outermost JSON object
-            start = response.find('{')
-            if start != -1:
-                # Find matching closing brace
-                depth = 0
-                for i, char in enumerate(response[start:], start):
-                    if char == '{':
-                        depth += 1
-                    elif char == '}':
-                        depth -= 1
-                        if depth == 0:
-                            json_str = response[start:i + 1]
-                            return json.loads(json_str)
-        except json.JSONDecodeError:
-            pass
-
-        # Try regex extraction as fallback
-        try:
-            json_match = re.search(r'\{[\s\S]*\}', response)
-            if json_match:
-                return json.loads(json_match.group())
-        except json.JSONDecodeError:
-            pass
-
-        logger.warning(f'Failed to parse JSON response: {response[:300]}...')
-        return {}
-
-    def _get_skills_overview(self, limit: int = 20) -> str:
-        """Generate a brief overview of all available skills."""
-        lines = []
-        for skill_id, skill in self.all_skills.items():
-            lines.append(
-                f'- [{skill_id}] {skill.name}: {skill.description[:200]}')
-        return '\n'.join(lines[:limit])  # Limit to avoid token overflow
-
-    def _get_all_skills_context(self) -> str:
-        """Generate full context of all skills for direct LLM selection."""
-        lines = []
-        for skill_id, skill in self.all_skills.items():
-            lines.append(f'- [{skill_id}] {skill.name}\n  {skill.description}')
-        return '\n'.join(lines)
-
-    def _format_retrieved_skills(self, skill_ids: Set[str]) -> str:
-        """Format retrieved skills for LLM prompt."""
-        lines = []
-        for skill_id in skill_ids:
-            if skill_id in self.all_skills:
-                skill = self.all_skills[skill_id]
-                lines.append(
-                    f'- [{skill_id}] {skill.name}\n  {skill.description}\n Main Content: {skill.content[:3000]}')
-        return '\n'.join(lines)
-
-    def _llm_generate(self, prompt: str) -> str:
-        """Generate LLM response from prompt."""
-        messages = [Message(role='user', content=prompt)]
-        logger.debug(f'Input msg to LLM: {messages}')       # set env `LOG_LEVEL=DEBUG`
-        response = self.llm.generate(messages=messages)
-        res = response.content if hasattr(response,
-                                           'content') else str(response)
-        logger.debug('LLM response: {}'.format(res))
-        return res
-
-    async def _async_llm_generate(self, prompt: str) -> str:
-        """Async wrapper for LLM generation."""
-        return await asyncio.to_thread(self._llm_generate, prompt)
-
-    def _analyze_query(
-        self,
-        query: str,
-    ) -> Tuple[bool, str, List[str], Optional[str]]:
-        """
-        Analyze user query to determine if skills are needed.
-
-        Args:
-            query: User's original query.
-
-        Returns:
-            Tuple of (needs_skills, intent_summary, skill_queries, chat_response).
-        """
-        prompt = PROMPT_ANALYZE_QUERY_FOR_SKILLS.format(
-            query=query, skills_overview=self._get_skills_overview())
-        response = self._llm_generate(prompt)
-        parsed = self._parse_json_response(response)
-
-        needs_skills = parsed.get('needs_skills', True)
-        intent = parsed.get('intent_summary', query)
-        queries = parsed.get('skill_queries', [query])
-        chat_response = parsed.get('chat_response')
-        return needs_skills, intent, queries if queries else [query
-                                                              ], chat_response
-
-    async def _async_retrieve_skills(self, queries: List[str]) -> Set[str]:
-        """
-        Retrieve skills for multiple queries in parallel.
-
-        Args:
-            queries: List of search queries.
-
-        Returns:
-            Set of unique skill_ids from all queries.
-        """
-        if not self.retriever:
-            return set()
-
-        # Run parallel async searches
-        tasks = [
-            self.retriever.async_search(
-                query=q, top_k=self.top_k, min_score=self.min_score)
-            for q in queries
-        ]
-        results = await asyncio.gather(*tasks)
-
-        # Collect unique skill_ids
-        skill_ids = set()
-        for result_list in results:
-            for doc, score in result_list:
-                skill_id = self._extract_skill_id_from_doc(doc)
-                if skill_id:
-                    skill_ids.add(skill_id)
-        return skill_ids
-
-    def _filter_skills(
-            self,
-            query: str,
-            skill_ids: Set[str],
-            mode: Literal['fast', 'deep'] = 'fast'
-    ) -> Set[str]:
-        """
-        Filter skills based on relevance to the query.
-
-        Args:
-            query: User's query.
-            skill_ids: Set of candidate skill_ids.
-            mode: 'fast' for name+description only, 'deep' for full content analysis.
-
-        Returns:
-            Set of filtered skill_ids that are relevant.
-        """
-        if len(skill_ids) <= 1:
-            return skill_ids
-
-        # Format candidate skills based on mode
-        if mode == 'deep':
-            # Include name, description, and content (truncated)
-            skill_entries = []
-            for sid in skill_ids:
-                if sid not in self.all_skills:
-                    continue
-                skill = self.all_skills[sid]
-                content = skill.content[:3000] if skill.content else ''
-                entry = (
-                    f'### [{sid}] {skill.name}\n'
-                    f'**Description**: {skill.description}\n'
-                    f'**Content**: {content}'
-                )
-                skill_entries.append(entry)
-            candidate_skills_text = '\n\n'.join(skill_entries)
-            prompt = PROMPT_FILTER_SKILLS_DEEP.format(
-                query=query,
-                candidate_skills=candidate_skills_text)
-        else:
-            # Fast mode: name and description only
-            candidate_skills_text = '\n'.join([
-                f'- [{sid}] {self.all_skills[sid].name}: {self.all_skills[sid].description}'
-                for sid in skill_ids if sid in self.all_skills
-            ])
-            prompt = PROMPT_FILTER_SKILLS_FAST.format(
-                query=query,
-                candidate_skills=candidate_skills_text)
-
-        response = self._llm_generate(prompt)
-        parsed = self._parse_json_response(response)
-
-        filtered_ids = parsed.get('filtered_skill_ids', list(skill_ids))
-
-        # For deep mode, also check skill_analysis for can_execute
-        if mode == 'deep':
-            skill_analysis = parsed.get('skill_analysis', {})
-            final_ids = []
-            for sid in filtered_ids:
-                analysis = skill_analysis.get(sid, {})
-                # Keep skill if can_execute is True or not specified
-                if analysis.get('can_execute', True):
-                    final_ids.append(sid)
-                else:
-                    logger.info(
-                        f'Removing skill [{sid}]: cannot execute - '
-                        f'{analysis.get("reason", "")[:200]}'
-                    )
-            filtered_ids = final_ids
-
-        logger.info(
-            f'Filter ({mode}): {len(skill_ids)} -> {len(filtered_ids)} skills. '
-            f'Reason: {parsed.get("reasoning", "")[:1000]}'
-        )
-
-        return set(filtered_ids)
-
-    def _build_dag(self, query: str, skill_ids: Set[str]) -> Dict[str, Any]:
-        """
-        Filter skills and build execution DAG.
-
-        Performs deep filtering and DAG construction in one LLM call.
-
-        Args:
-            query: Original user query.
-            skill_ids: Set of candidate skill_ids.
-
-        Returns:
-            Dict containing 'filtered_skill_ids', 'dag', and 'execution_order'.
-        """
-        skills_info = self._format_retrieved_skills(skill_ids)
-        prompt = PROMPT_BUILD_SKILLS_DAG.format(
-            query=query, selected_skills=skills_info)
-        response = self._llm_generate(prompt)
-        parsed = self._parse_json_response(response)
-
-        # Get filtered skills and validate they exist in input
-        raw_filtered = parsed.get('filtered_skill_ids', list(skill_ids))
-        filtered_ids = set(sid for sid in raw_filtered if sid in skill_ids)
-
-        # If no valid IDs returned, keep all input skills
-        if not filtered_ids:
-            logger.warning('No valid skill IDs in LLM response, keeping all input skills')
-            filtered_ids = skill_ids
-
-        logger.info(f'DAG filter: {len(skill_ids)} -> {len(filtered_ids)} skills')
-
-        # Validate and clean DAG - only keep valid skill IDs
-        raw_dag = parsed.get('dag', {})
-        dag = {}
-        for sid, deps in raw_dag.items():
-            if sid in filtered_ids:
-                # Filter dependencies to only valid skill IDs
-                valid_deps = [d for d in deps if d in filtered_ids]
-                dag[sid] = valid_deps
-
-        # Ensure all filtered skills are in DAG
-        for sid in filtered_ids:
-            if sid not in dag:
-                dag[sid] = []
-
-        # Validate execution_order - only keep valid skill IDs
-        raw_order = parsed.get('execution_order', [])
-        order = self._validate_execution_order(raw_order, filtered_ids)
-
-        # Fallback: derive execution_order from DAG using topological sort
-        if not order and filtered_ids:
-            order = self._topological_sort_dag(dag)
-            logger.info(f'Derived execution_order from DAG: {order}')
-
-        return {
-            'filtered_skill_ids': filtered_ids,
-            'dag': dag,
-            'execution_order': order
-        }
-
-    def _validate_execution_order(
-            self,
-            raw_order: List[Union[str, List[str]]],
-            valid_ids: Set[str]
-    ) -> List[Union[str, List[str]]]:
-        """
-        Validate execution order, keeping only valid skill IDs.
-
-        Args:
-            raw_order: Raw execution order from LLM.
-            valid_ids: Set of valid skill IDs.
-
-        Returns:
-            Validated execution order with only valid skill IDs.
-        """
-        result = []
-        for item in raw_order:
-            if isinstance(item, list):
-                valid_group = [sid for sid in item if sid in valid_ids]
-                if valid_group:
-                    if len(valid_group) == 1:
-                        result.append(valid_group[0])
-                    else:
-                        result.append(valid_group)
-            elif item in valid_ids:
-                result.append(item)
-        return result
-
-    def _topological_sort_dag(self, dag: Dict[str, List[str]]) -> List[str]:
-        """
-        Perform topological sort on DAG to get execution order.
-
-        Args:
-            dag: Adjacency list where dag[A] = [B, C] means A depends on B, C.
-
-        Returns:
-            Topologically sorted list of skill IDs (dependencies first).
-        """
-        if not dag:
-            return []
-
-        # Calculate in-degree for each node
-        in_degree = {node: 0 for node in dag}
-        for node, deps in dag.items():
-            for dep in deps:
-                if dep in in_degree:
-                    pass  # dep is a dependency, node depends on it
-            # Count how many nodes depend on this node
-        for node, deps in dag.items():
-            for dep in deps:
-                if dep not in in_degree:
-                    in_degree[dep] = 0
-
-        # Recalculate: in dag[A] = [B], A depends on B, so B must come before A
-        # We need to build reverse mapping
-        in_degree = {node: 0 for node in dag}
-        for dep in set(d for deps in dag.values() for d in deps):
-            if dep not in in_degree:
-                in_degree[dep] = 0
-
-        for node, deps in dag.items():
-            in_degree[node] = len(deps)
-
-        # Start with nodes that have no dependencies
-        queue = [node for node, degree in in_degree.items() if degree == 0]
-        result = []
-
-        while queue:
-            # Sort for deterministic order
-            queue.sort()
-            node = queue.pop(0)
-            result.append(node)
-
-            # Reduce in-degree for nodes that depend on this node
-            for other_node, deps in dag.items():
-                if node in deps and other_node in in_degree:
-                    in_degree[other_node] -= 1
-                    if in_degree[other_node] == 0:
-                        queue.append(other_node)
-
-        # If not all nodes processed, there might be a cycle or disconnected nodes
-        remaining = set(dag.keys()) - set(result)
-        if remaining:
-            logger.warning(f'Topological sort incomplete, adding remaining: {remaining}')
-            result.extend(sorted(remaining))
-
-        return result
-
-    def _filter_execution_order(
-            self,
-            execution_order: List[Union[str, List[str]]],
-            valid_skill_ids: Set[str]
-    ) -> List[Union[str, List[str]]]:
-        """
-        Filter execution order to only include valid skill_ids.
-
-        Args:
-            execution_order: Original execution order (may contain parallel groups).
-            valid_skill_ids: Set of skill_ids that should be kept.
-
-        Returns:
-            Filtered execution order with only valid skills.
-        """
-        filtered = []
-        for item in execution_order:
-            if isinstance(item, list):
-                # Parallel group: filter and keep if any remain
-                filtered_group = [sid for sid in item if sid in valid_skill_ids]
-                if filtered_group:
-                    if len(filtered_group) == 1:
-                        filtered.append(filtered_group[0])
-                    else:
-                        filtered.append(filtered_group)
-            elif item in valid_skill_ids:
-                filtered.append(item)
-        return filtered
-
-    def _direct_select_skills(self, query: str) -> SkillDAGResult:
-        """
-        Directly select skills using LLM with all skills in context.
-
-        Used when enable_retrieve=False. Puts all skills into LLM context
-        and lets LLM select relevant skills and build DAG in one call.
-
-        Args:
-            query: User's task query.
-
-        Returns:
-            SkillDAGResult containing the skill execution DAG.
-        """
-        prompt = PROMPT_DIRECT_SELECT_SKILLS.format(
-            query=query, all_skills=self._get_all_skills_context())
-        response = self._llm_generate(prompt)
-        parsed = self._parse_json_response(response)
-
-        # Handle chat-only response
-        needs_skills = parsed.get('needs_skills', True)
-        chat_response = parsed.get('chat_response')
-
-        if not needs_skills:
-            logger.info('Chat-only query, no skills needed')
-            if chat_response:
-                print(f'\n[Chat Response]\n{chat_response}\n')
-            return SkillDAGResult(
-                is_complete=True, chat_response=chat_response)
-
-        # Extract selected skills and DAG
-        selected_ids = parsed.get('selected_skill_ids', [])
-        dag = parsed.get('dag', {})
-        order = parsed.get('execution_order', [])
-
-        # Validate skill_ids exist
-        valid_ids = {sid for sid in selected_ids if sid in self.all_skills}
-        selected = {sid: self.all_skills[sid] for sid in valid_ids}
-
-        logger.info(f'Direct selection: {valid_ids}')
-
-        return SkillDAGResult(
-            dag=dag,
-            execution_order=order,
-            selected_skills=selected,
-            is_complete=bool(valid_ids),
-            clarification=None if valid_ids else 'No relevant skills found.')
-
-    async def get_skill_dag(self, query: str) -> SkillDAGResult:
-        """
-        Run the autonomous skill retrieval and DAG construction loop.
-
-        Iteratively retrieves skills, evaluates completeness with reflection,
-        and builds execution DAG. Loop terminates when:
-        - Query is chat-only (no skills needed)
-        - Max iterations reached
-        - Skills are deemed complete for the task
-        - Clarification from user is needed
-
-        Args:
-            query: User's task query.
-
-        Returns:
-            SkillDAGResult containing the skill execution DAG.
-        """
-        if not self.all_skills:
-            logger.warning('No skills loaded, returning empty result')
-            return SkillDAGResult()
-
-        # Direct selection mode: put all skills into LLM context
-        if not self.enable_retrieve:
-            logger.info('Direct selection mode (enable_retrieve=False)')
-            return self._direct_select_skills(query)
-
-        # Search mode: use HybridRetriever
-        if not self.retriever:
-            logger.warning('Retriever not initialized, returning empty result')
-            return SkillDAGResult()
-
-        # Step 1: Analyze query to determine if skills are needed
-        needs_skills, intent, skill_queries, chat_response = self._analyze_query(
-            query)
-        logger.info(f'Needs skills: {needs_skills}, Intent: {intent}')
-
-        # If chat-only, return empty DAG with chat response
-        if not needs_skills:
-            logger.info('Chat-only query, no skills needed')
-            if chat_response:
-                print(f'\n[Chat Response]\n{chat_response}\n')
-            return SkillDAGResult(
-                is_complete=True, chat_response=chat_response)
-
-        clarification: Optional[str] = None
-
-        # Step 2: Retrieve skills
-        collected_skills = await self._async_retrieve_skills(skill_queries)
-        logger.info(f'Retrieved skills: {collected_skills}')
-
-        if not collected_skills:
-            clarification = 'No relevant skills found. Please provide more details.'
-            return SkillDAGResult(
-                is_complete=False, clarification=clarification)
-
-        # Limit candidate skills to max_candidate_skills
-        if len(collected_skills) > self.max_candidate_skills:
-            logger.warning(
-                f'Too many candidate skills ({len(collected_skills)}), '
-                f'limiting to {self.max_candidate_skills}'
-            )
-            collected_skills = set(list(collected_skills)[:self.max_candidate_skills])
-
-        # Step 3: Fast filter by name/description
-        collected_skills = self._filter_skills(query, collected_skills, mode='fast')
-        logger.info(f'After fast filter: {collected_skills}')
-
-        if len(collected_skills) > 1:
-            collected_skills = self._filter_skills(query, collected_skills, mode='deep')
-            logger.info(f'After deep filter: {collected_skills}')
-
-        if not collected_skills:
-            clarification = 'No relevant skills found after filtering. Please refine your query.'
-            return SkillDAGResult(
-                is_complete=False, clarification=clarification)
-
-        # Step 4: Build DAG with integrated deep filtering
-        dag_result = self._build_dag(query, collected_skills)
-
-        filtered_ids = dag_result.get('filtered_skill_ids', collected_skills)
-        skills_dag: Dict[str, Any] = dag_result.get('dag', {})
-        execution_order: List[str] = dag_result.get('execution_order', [])
-
-        if not filtered_ids:
-            clarification = 'No relevant skills found after filtering. Please refine your query.'
-            return SkillDAGResult(
-                is_complete=False, clarification=clarification)
-
-        # Build selected skills dict from filtered results
-        selected = {
-            sid: self.all_skills[sid]
-            for sid in filtered_ids if sid in self.all_skills
-        }
-
-        logger.info(
-            f'Final DAG built with skills: {skills_dag}, execution order: {execution_order}'
-        )
-
-        return SkillDAGResult(
-            dag=skills_dag,
-            execution_order=execution_order,
-            selected_skills=selected,
-            is_complete=(clarification is None),
-            clarification=clarification)
-
-    def _get_container(self) -> SkillContainer:
-        """Get or create SkillContainer instance."""
-        if self._container is None:
-            self._container = SkillContainer(
-                workspace_dir=self.work_dir,
-                use_sandbox=self.use_sandbox,
-                **{
-                    k: v
-                    for k, v in self.kwargs.items() if k in [
-                        'timeout', 'image', 'memory_limit',
-                        'enable_security_check', 'network_enabled'
-                    ]
-                })
-        return self._container
-
-    def _get_executor(self) -> DAGExecutor:
-        """Get or create DAGExecutor instance."""
-        if self._executor is None:
-            container = self._get_container()
-            self._executor = DAGExecutor(
-                container=container,
-                skills=self.all_skills,
-                workspace_dir=self.work_dir,
-                llm=self.llm,
-                enable_progressive_analysis=True,
-                max_retries=self.max_retries)
-        return self._executor
-
-    async def execute_dag(self,
-                          dag_result: SkillDAGResult,
-                          execution_input: Optional[ExecutionInput] = None,
-                          stop_on_failure: bool = True,
-                          query: str = '') -> DAGExecutionResult:
-        """
-        Execute the skill DAG from a SkillDAGResult.
-
-        Executes skills according to the execution_order, handling:
-        - Sequential execution for single skill items
-        - Parallel execution for skill groups (sublists)
-        - Input/output linking between dependent skills
-        - Progressive skill analysis (plan -> load -> execute)
-
-        Args:
-            dag_result: SkillDAGResult containing DAG and execution order.
-            execution_input: Optional initial input for skills.
-            stop_on_failure: Whether to stop on first failure.
-            query: User query for progressive skill analysis.
-
-        Returns:
-            DAGExecutionResult with all execution outcomes.
-        """
-        if not dag_result.is_complete:
-            logger.warning('DAG is not complete, execution may fail')
-
-        if not dag_result.execution_order:
-            logger.warning('Empty execution order, nothing to execute')
-            return DAGExecutionResult(success=True)
-
-        executor = self._get_executor()
-        result = await executor.execute(
-            dag=dag_result.dag,
-            execution_order=dag_result.execution_order,
-            execution_input=execution_input,
-            stop_on_failure=stop_on_failure,
-            query=query)
-
-        # Attach result to dag_result for convenience
-        dag_result.execution_result = result
-
-        logger.info(f'DAG execution completed: success={result.success}, '
-                    f'duration={result.total_duration_ms:.2f}ms')
-
-        return result
-
-    def get_execution_spec(self) -> Optional[str]:
-        """Get the execution spec log as markdown string."""
-        if self._container:
-            return self._container.get_spec_log()
-        return None
-
-    def save_execution_spec(self,
-                            output_path: Optional[Union[str, Path]] = None):
-        """Save the execution spec to a markdown file."""
-        if self._container:
-            self._container.save_spec_log(output_path)
-
-    def cleanup(self, keep_spec: bool = True):
-        """Clean up container workspace."""
-        if self._container:
-            self._container.cleanup(keep_spec=keep_spec)
-
-    def get_skill_context(self, skill_id: str) -> Optional[SkillContext]:
-        """
-        Get the skill context for an executed skill.
-
-        Args:
-            skill_id: The skill identifier (e.g., 'pdf@latest').
-
-        Returns:
-            SkillContext if the skill was executed, None otherwise.
-        """
-        if self._executor:
-            return self._executor.get_skill_context(skill_id)
-        return None
-
-    def get_all_skill_contexts(self) -> Dict[str, SkillContext]:
-        """
-        Get all skill contexts from executed skills.
-
-        Returns:
-            Dict mapping skill_id to SkillContext.
-        """
-        if self._executor:
-            return self._executor.get_all_contexts()
-        return {}
-
-    def get_executed_skill_ids(self) -> List[str]:
-        """
-        Get list of skill_ids that were executed.
-
-        Returns:
-            List of skill_ids with available contexts.
-        """
-        if self._executor:
-            return self._executor.get_executed_skill_ids()
-        return []
-
-    async def run(
-            self,
-            query: str,
-            execution_input: Optional[ExecutionInput] = None,
-            stop_on_failure: bool = True
-    ) -> SkillDAGResult:
-        """
-        Run skill retrieval and execute the resulting DAG in one call.
-
-        Combines get_skill_dag() and execute_dag().
-        Uses progressive skill analysis for each skill execution.
-
-        Args:
-            query: User's task query.
-            execution_input: Optional initial input for skills.
-            stop_on_failure: Whether to stop on first failure.
-
-        Returns:
-            SkillDAGResult with execution_result populated.
-        """
-        dag_result = await self.get_skill_dag(query)
-
-        # Skip execution for chat-only results
-        if dag_result.chat_response:
-            logger.info('Chat-only response, skipping execution')
-            return dag_result
-
-        # Skip if skills are incomplete
-        if not dag_result.is_complete:
-            logger.warning(f'Skills incomplete: {dag_result.clarification}')
-            return dag_result
-
-        # Execute the DAG
-        if dag_result.execution_order:
-            await self.execute_dag(
-                dag_result, execution_input, stop_on_failure, query=query)
-
-        return dag_result
diff --git a/ms_agent/skill/catalog.py b/ms_agent/skill/catalog.py
new file mode 100644
index 000000000..75a040201
--- /dev/null
+++ b/ms_agent/skill/catalog.py
@@ -0,0 +1,302 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+import os
+import shutil
+import subprocess
+import tempfile
+import zipfile
+from pathlib import Path
+from typing import Dict, List, Optional, Set
+
+import requests
+
+from ms_agent.utils.logger import get_logger
+
+from .loader import SkillLoader
+from .schema import SkillSchema, SkillSchemaParser
+from .sources import SkillSource, SkillSourceType, parse_skill_source
+
+logger = get_logger()
+
+MODELSCOPE_SKILL_API = (
+    "https://www.modelscope.cn/api/v1/skills/{skill_id}/archive/zip/master")
+
+
+def _download_skill_zip(skill_id: str, local_dir: str) -> str:
+    """Download a skill archive from the ModelScope skill hub and extract it.
+
+    This is a pure-HTTP fallback that does not require ``modelscope>=1.35.2``.
+    The directory naming follows the SDK convention: ``<element_name>``.
+    """
+    url = MODELSCOPE_SKILL_API.format(skill_id=skill_id)
+    os.makedirs(local_dir, exist_ok=True)
+
+    _owner, name = skill_id.split("/", 1)
+    skill_dir = os.path.join(local_dir, name)
+
+    resp = requests.get(url, stream=True, timeout=120)
+    resp.raise_for_status()
+
+    zip_path = os.path.join(local_dir, f"{name}.zip")
+    try:
+        with open(zip_path, "wb") as fh:
+            for chunk in resp.iter_content(chunk_size=8192):
+                if chunk:
+                    fh.write(chunk)
+
+        if os.path.exists(skill_dir):
+            shutil.rmtree(skill_dir)
+        os.makedirs(skill_dir, exist_ok=True)
+
+        with zipfile.ZipFile(zip_path, "r") as zf:
+            zf.extractall(skill_dir)
+
+        entries = os.listdir(skill_dir)
+        if len(entries) == 1:
+            nested = os.path.join(skill_dir, entries[0])
+            if os.path.isdir(nested):
+                for item in os.listdir(nested):
+                    shutil.move(
+                        os.path.join(nested, item),
+                        os.path.join(skill_dir, item))
+                os.rmdir(nested)
+    finally:
+        if os.path.exists(zip_path):
+            os.remove(zip_path)
+
+    logger.info(f"Skill {skill_id} downloaded to {skill_dir}")
+    return skill_dir
+
+BUILTIN_SKILLS_DIR = Path(__file__).parent.parent / "skills"
+if not BUILTIN_SKILLS_DIR.exists():
+    _repo_root = Path(__file__).parent.parent.parent
+    _candidate = _repo_root / "skills"
+    if _candidate.exists():
+        BUILTIN_SKILLS_DIR = _candidate
+
+USER_SKILLS_DIR = Path.home() / ".ms_agent" / "skills"
+
+
+class SkillCatalog:
+    """Unified skill catalog that loads, caches, and manages skills
+    from multiple sources with priority-based override semantics.
+    """
+
+    def __init__(self, config=None):
+        self._skills: Dict[str, SkillSchema] = {}
+        self._sources: List[SkillSource] = []
+        self._loader = SkillLoader()
+        self._config = config
+        self._disabled_skills: Set[str] = set()
+        self._whitelist: Optional[Set[str]] = None
+        self._cache_version: int = 0
+        self._summary_cache: Optional[str] = None
+        self._summary_cache_version: int = -1
+
+    # ------------------------------------------------------------------ #
+    #  Loading
+    # ------------------------------------------------------------------ #
+
+    def load_from_config(self, skills_config) -> None:
+        """Load skills following the three-tier priority scan:
+        built-in -> user home -> workspace / config-specified.
+        """
+        sources: List[SkillSource] = []
+
+        # 1. Built-in skills (lowest priority)
+        if BUILTIN_SKILLS_DIR.exists():
+            sources.append(
+                SkillSource(type=SkillSourceType.LOCAL_DIR,
+                            path=str(BUILTIN_SKILLS_DIR)))
+
+        # 2. User home skills
+        for subdir in ("installed", "custom"):
+            d = USER_SKILLS_DIR / subdir
+            if d.exists():
+                sources.append(
+                    SkillSource(type=SkillSourceType.LOCAL_DIR,
+                                path=str(d)))
+
+        # 3a. Structured sources (higher priority)
+        if hasattr(skills_config, "sources") and skills_config.sources:
+            for src_cfg in skills_config.sources:
+                sources.append(
+                    SkillSource(
+                        type=SkillSourceType(src_cfg.type),
+                        path=getattr(src_cfg, "path", None),
+                        repo_id=getattr(src_cfg, "repo_id", None),
+                        url=getattr(src_cfg, "url", None),
+                        revision=getattr(src_cfg, "revision", None),
+                        subdir=getattr(src_cfg, "subdir", None),
+                        enabled=getattr(src_cfg, "enabled", True),
+                    ))
+        # 3b. Simple path list (backward compat)
+        elif hasattr(skills_config, "path") and skills_config.path:
+            paths = skills_config.path
+            if isinstance(paths, str):
+                paths = [paths]
+            for p in paths:
+                sources.append(parse_skill_source(str(p)))
+
+        # 4. Workspace auto-discover (highest priority)
+        if getattr(skills_config, "auto_discover", False):
+            workspace_skills = Path.cwd() / "skills"
+            if workspace_skills.exists():
+                sources.append(
+                    SkillSource(type=SkillSourceType.LOCAL_DIR,
+                                path=str(workspace_skills)))
+
+        self._sources = sources
+        self.load_from_sources(sources)
+
+        # Apply whitelist / disabled filters
+        if hasattr(skills_config, "whitelist"):
+            wl = skills_config.whitelist
+            if wl is None:
+                self._whitelist = None
+            elif isinstance(wl, (list, tuple)):
+                self._whitelist = set(wl) if wl else set()
+        if hasattr(skills_config, "disabled") and skills_config.disabled:
+            self._disabled_skills = set(skills_config.disabled)
+
+    def load_from_sources(self, sources: List[SkillSource]) -> None:
+        self._sources = sources
+        for source in sources:
+            if not source.enabled:
+                continue
+            try:
+                skills = self._materialize_and_load(source)
+                for skill in skills.values():
+                    self._register_skill(skill)
+            except Exception as e:
+                logger.warning(f"Failed to load skill source {source}: {e}")
+
+    def _materialize_and_load(
+            self, source: SkillSource) -> Dict[str, SkillSchema]:
+        if source.type == SkillSourceType.LOCAL_DIR:
+            return self._loader.load_skills(source.path)
+        elif source.type == SkillSourceType.MODELSCOPE:
+            return self._load_from_modelscope(source)
+        elif source.type == SkillSourceType.GIT:
+            return self._load_from_git(source)
+        return {}
+
+    def _load_from_modelscope(
+            self, source: SkillSource) -> Dict[str, SkillSchema]:
+        try:
+            from modelscope.hub.api import HubApi
+            api = HubApi()
+            local_dir = str(USER_SKILLS_DIR / "installed")
+            local_path = api.download_skill(
+                skill_id=source.repo_id, local_dir=local_dir)
+        except (ImportError, AttributeError):
+            local_path = _download_skill_zip(
+                source.repo_id,
+                str(USER_SKILLS_DIR / "installed"))
+        if source.subdir:
+            local_path = str(Path(local_path) / source.subdir)
+        return self._loader.load_skills(local_path)
+
+    def _load_from_git(self, source: SkillSource) -> Dict[str, SkillSchema]:
+        dest = Path(tempfile.mkdtemp(prefix="ms_agent_skill_"))
+        cmd = ["git", "clone", "--depth", "1"]
+        if source.revision:
+            cmd += ["--branch", source.revision]
+        cmd += [source.url, str(dest)]
+        subprocess.run(cmd, check=True, capture_output=True)
+        local_path = str(dest / source.subdir) if source.subdir else str(dest)
+        return self._loader.load_skills(local_path)
+
+    def _register_skill(self, skill: SkillSchema) -> None:
+        """Register a skill; later registrations override earlier ones."""
+        self._skills[skill.skill_id] = skill
+        self._invalidate_cache()
+
+    # ------------------------------------------------------------------ #
+    #  Query
+    # ------------------------------------------------------------------ #
+
+    def get_enabled_skills(self) -> Dict[str, SkillSchema]:
+        result = {}
+        for sid, skill in self._skills.items():
+            if sid in self._disabled_skills:
+                continue
+            if self._whitelist is not None and sid not in self._whitelist:
+                continue
+            result[sid] = skill
+        return result
+
+    def get_always_skills(self) -> Dict[str, SkillSchema]:
+        result = {}
+        for sid, skill in self.get_enabled_skills().items():
+            frontmatter = SkillSchemaParser.parse_yaml_frontmatter(
+                skill.content)
+            if frontmatter and frontmatter.get("always", False):
+                result[sid] = skill
+        return result
+
+    def get_skill(self, skill_id: str) -> Optional[SkillSchema]:
+        return self._skills.get(skill_id)
+
+    # ------------------------------------------------------------------ #
+    #  Hot reload
+    # ------------------------------------------------------------------ #
+
+    def reload(self) -> None:
+        self._skills.clear()
+        self.load_from_sources(self._sources)
+
+    def reload_skill(self, skill_id: str) -> Optional[SkillSchema]:
+        skill = self._skills.get(skill_id)
+        if skill and skill.skill_path.exists():
+            reloaded = self._loader.reload_skill(str(skill.skill_path))
+            if reloaded:
+                self._skills[skill_id] = reloaded
+                self._invalidate_cache()
+                return reloaded
+        return None
+
+    def add_skill(self, skill_path: str) -> Optional[SkillSchema]:
+        skills = self._loader.load_skills(skill_path)
+        for skill in skills.values():
+            self._register_skill(skill)
+            return skill
+        return None
+
+    def remove_skill(self, skill_id: str) -> bool:
+        if skill_id in self._skills:
+            del self._skills[skill_id]
+            self._invalidate_cache()
+            return True
+        return False
+
+    def enable_skill(self, skill_id: str) -> None:
+        self._disabled_skills.discard(skill_id)
+        self._invalidate_cache()
+
+    def disable_skill(self, skill_id: str) -> None:
+        self._disabled_skills.add(skill_id)
+        self._invalidate_cache()
+
+    # ------------------------------------------------------------------ #
+    #  Summary cache
+    # ------------------------------------------------------------------ #
+
+    def _invalidate_cache(self) -> None:
+        self._cache_version += 1
+
+    def get_skills_summary(self) -> str:
+        if self._summary_cache_version == self._cache_version:
+            return self._summary_cache or ""
+        self._summary_cache = self._build_summary()
+        self._summary_cache_version = self._cache_version
+        return self._summary_cache
+
+    def _build_summary(self) -> str:
+        skills = self.get_enabled_skills()
+        if not skills:
+            return ""
+        lines = []
+        for sid, skill in sorted(skills.items()):
+            lines.append(
+                f"- **{skill.name}** (`{sid}`): {skill.description}")
+        return "\n".join(lines)
diff --git a/ms_agent/skill/container.py b/ms_agent/skill/container.py
deleted file mode 100644
index 51d96f6f3..000000000
--- a/ms_agent/skill/container.py
+++ /dev/null
@@ -1,1443 +0,0 @@
-# Copyright (c) ModelScope Contributors. All rights reserved.
-"""
-Skill Execution Container
-
-Provides a unified, secure execution environment for skills using EnclaveSandbox.
-Supports multiple languages (Python, Shell, JavaScript) with Docker-based isolation.
-Cross-platform support (Mac/Linux/Windows) with RCE prevention.
-
-Execution modes:
-- use_sandbox=True: Execute in Docker sandbox (default, recommended for untrusted code)
-- use_sandbox=False: Execute locally with security checks (for trusted code or no Docker)
-"""
-import asyncio
-import os
-import platform
-import re
-import shutil
-import subprocess
-import sys
-import tempfile
-import uuid
-from dataclasses import dataclass, field
-from datetime import datetime
-from enum import Enum
-from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Union
-
-from ms_agent.utils.logger import get_logger
-
-logger = get_logger()
-
-# Security: Patterns to detect potentially dangerous code (sandbox mode)
-# Note: These are checked only in sandbox mode for stricter isolation
-DANGEROUS_PATTERNS = [
-    r'os\.system\s*\(',  # os.system
-    r'subprocess\.call\s*\([^)]*shell\s*=\s*True',  # subprocess with shell=True
-    r'open\s*\([^)]*["\']\/etc',  # Reading system files
-    r'rm\s+-rf\s+\/',  # Dangerous rm commands
-    r'chmod\s+777',  # Dangerous chmod
-    r'curl\s+.*\|\s*sh',  # Piped curl execution
-    r'wget\s+.*\|\s*sh',  # Piped wget execution
-]
-
-# Additional patterns for local execution (stricter but reasonable)
-# Note: eval/exec are allowed as they're commonly used in generated code
-LOCAL_DANGEROUS_PATTERNS = DANGEROUS_PATTERNS + [
-    r'shutil\.rmtree\s*\([^)]*["\']/',  # Removing root paths
-    r'pathlib\.Path\s*\([^)]*["\']/',  # Accessing root paths
-]
-
-# Allowed file extensions for local script execution
-ALLOWED_SCRIPT_EXTENSIONS = {'.py', '.sh', '.bash', '.js', '.mjs'}
-
-
-class ExecutorType(Enum):
-    """Supported executor types for skill execution."""
-    PYTHON_SCRIPT = 'python_script'
-    PYTHON_CODE = 'python_code'
-    PYTHON_FUNCTION = 'python_function'
-    SHELL = 'shell'
-    JAVASCRIPT = 'javascript'
-
-
-class ExecutionStatus(Enum):
-    """Execution status codes."""
-    PENDING = 'pending'
-    RUNNING = 'running'
-    SUCCESS = 'success'
-    FAILED = 'failed'
-    TIMEOUT = 'timeout'
-    CANCELLED = 'cancelled'
-    SECURITY_BLOCKED = 'security_blocked'
-
-
-@dataclass
-class ExecutionInput:
-    """
-    Input specification for skill execution.
-
-    Attributes:
-        args: Command line arguments or positional parameters.
-        kwargs: Keyword arguments for function calls.
-        env_vars: Environment variables to set during execution.
-        input_files: Dict of input files {name: path or content}.
-        stdin: Standard input content.
-        working_dir: Working directory for execution.
-        requirements: Python packages to install before execution.
-    """
-    args: List[Any] = field(default_factory=list)
-    kwargs: Dict[str, Any] = field(default_factory=dict)
-    env_vars: Dict[str, str] = field(default_factory=dict)
-    input_files: Dict[str, Union[str, Path]] = field(default_factory=dict)
-    stdin: Optional[str] = None
-    working_dir: Optional[Path] = None
-    requirements: List[str] = field(default_factory=list)
-
-    def to_dict(self) -> Dict[str, Any]:
-        return {
-            'args': self.args,
-            'kwargs': self.kwargs,
-            'env_vars': self.env_vars,
-            'input_files': {k: str(v)
-                            for k, v in self.input_files.items()},
-            'stdin': self.stdin,
-            'working_dir': str(self.working_dir) if self.working_dir else None,
-            'requirements': self.requirements,
-        }
-
-
-@dataclass
-class ExecutionOutput:
-    """
-    Output specification for skill execution.
-
-    Attributes:
-        return_value: Return value from function execution.
-        stdout: Standard output content.
-        stderr: Standard error content.
-        exit_code: Process exit code.
-        output_files: Dict of output files {name: path}.
-        artifacts: Any generated artifacts (data, objects, etc.).
-        duration_ms: Execution duration in milliseconds.
-    """
-    return_value: Any = None
-    stdout: str = ''
-    stderr: str = ''
-    exit_code: int = 0
-    output_files: Dict[str, Path] = field(default_factory=dict)
-    artifacts: Dict[str, Any] = field(default_factory=dict)
-    duration_ms: float = 0.0
-
-    def to_dict(self) -> Dict[str, Any]:
-        return {
-            'return_value':
-            str(self.return_value) if self.return_value else None,
-            'stdout': self.stdout,
-            'stderr': self.stderr,
-            'exit_code': self.exit_code,
-            'output_files': {k: str(v)
-                             for k, v in self.output_files.items()},
-            'artifacts': list(self.artifacts.keys()),
-            'duration_ms': self.duration_ms,
-        }
-
-
-@dataclass
-class ExecutionRecord:
-    """
-    A single execution record in the spec log.
-
-    Attributes:
-        execution_id: Unique identifier for this execution.
-        skill_id: The skill being executed.
-        executor_type: Type of executor used.
-        script_path: Path to the script (if applicable).
-        function_name: Name of the function (if applicable).
-        input_spec: Input specification.
-        output_spec: Output specification.
-        status: Execution status.
-        start_time: Execution start time.
-        end_time: Execution end time.
-        error_message: Error message if failed.
-        sandbox_used: Whether sandbox was used for execution.
-    """
-    execution_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
-    skill_id: str = ''
-    executor_type: ExecutorType = ExecutorType.PYTHON_SCRIPT
-    script_path: Optional[str] = None
-    function_name: Optional[str] = None
-    input_spec: ExecutionInput = field(default_factory=ExecutionInput)
-    output_spec: ExecutionOutput = field(default_factory=ExecutionOutput)
-    status: ExecutionStatus = ExecutionStatus.PENDING
-    start_time: Optional[datetime] = None
-    end_time: Optional[datetime] = None
-    error_message: Optional[str] = None
-    sandbox_used: bool = True
-
-    def to_markdown(self) -> str:
-        """Convert execution record to markdown format."""
-        lines = [
-            f'### Execution: `{self.execution_id}`',
-            '',
-            f'- **Skill ID**: `{self.skill_id}`',
-            f'- **Executor**: `{self.executor_type.value}`',
-            f'- **Status**: `{self.status.value}`',
-            f'- **Sandbox**: `{"Yes" if self.sandbox_used else "No"}`',
-        ]
-
-        if self.script_path:
-            lines.append(f'- **Script**: `{self.script_path}`')
-        if self.function_name:
-            lines.append(f'- **Function**: `{self.function_name}`')
-
-        if self.start_time:
-            lines.append(f'- **Start Time**: `{self.start_time.isoformat()}`')
-        if self.end_time:
-            lines.append(f'- **End Time**: `{self.end_time.isoformat()}`')
-
-        lines.append(f'- **Duration**: `{self.output_spec.duration_ms:.2f}ms`')
-
-        # Input section
-        lines.extend(['', '#### Input', ''])
-        if self.input_spec.args:
-            lines.append(f'- **Args**: `{self.input_spec.args}`')
-        if self.input_spec.kwargs:
-            lines.append(f'- **Kwargs**: `{self.input_spec.kwargs}`')
-        if self.input_spec.input_files:
-            lines.append('- **Input Files**:')
-            for name, path in self.input_spec.input_files.items():
-                lines.append(f'  - `{name}`: `{path}`')
-        if self.input_spec.requirements:
-            lines.append(
-                f'- **Requirements**: `{self.input_spec.requirements}`')
-
-        # Output section
-        lines.extend(['', '#### Output', ''])
-        lines.append(f'- **Exit Code**: `{self.output_spec.exit_code}`')
-
-        if self.output_spec.stdout:
-            stdout_preview = self.output_spec.stdout[:1000]
-            lines.extend(['', '**stdout**:', '```', stdout_preview, '```'])
-        if self.output_spec.stderr:
-            stderr_preview = self.output_spec.stderr[:1000]
-            lines.extend(['', '**stderr**:', '```', stderr_preview, '```'])
-        if self.output_spec.output_files:
-            lines.append('- **Output Files**:')
-            for name, path in self.output_spec.output_files.items():
-                lines.append(f'  - `{name}`: `{path}`')
-
-        if self.error_message:
-            lines.extend(
-                ['', '#### Error', '', f'```\n{self.error_message}\n```'])
-
-        lines.append('')
-        return '\n'.join(lines)
-
-
-@dataclass
-class ExecutionSpec:
-    """
-    Specification log for tracking execution flow across skills.
-
-    Attributes:
-        spec_id: Unique identifier for this spec.
-        title: Title of the execution spec.
-        description: Description of the execution flow.
-        records: List of execution records.
-        created_at: Creation timestamp.
-        upstream_outputs: Outputs from upstream skills available as inputs.
-    """
-    spec_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
-    title: str = 'Skill Execution Spec'
-    description: str = ''
-    records: List[ExecutionRecord] = field(default_factory=list)
-    created_at: datetime = field(default_factory=datetime.now)
-    upstream_outputs: Dict[str, ExecutionOutput] = field(default_factory=dict)
-
-    def add_record(self, record: ExecutionRecord):
-        """Add an execution record to the spec."""
-        self.records.append(record)
-
-    def get_output(self, execution_id: str) -> Optional[ExecutionOutput]:
-        """Get output from a specific execution by ID."""
-        for record in self.records:
-            if record.execution_id == execution_id:
-                return record.output_spec
-        return None
-
-    def link_upstream(self, skill_id: str, output: ExecutionOutput):
-        """Link upstream skill output for downstream consumption."""
-        self.upstream_outputs[skill_id] = output
-
-    def to_markdown(self) -> str:
-        """Convert entire spec to markdown format."""
-        lines = [
-            f'# {self.title}',
-            '',
-            f'**Spec ID**: `{self.spec_id}`',
-            f'**Created**: `{self.created_at.isoformat()}`',
-            '',
-        ]
-
-        if self.description:
-            lines.extend([self.description, ''])
-
-        # Summary
-        total = len(self.records)
-        success = sum(1 for r in self.records
-                      if r.status == ExecutionStatus.SUCCESS)
-        failed = sum(1 for r in self.records
-                     if r.status == ExecutionStatus.FAILED)
-        blocked = sum(1 for r in self.records
-                      if r.status == ExecutionStatus.SECURITY_BLOCKED)
-
-        lines.extend([
-            '## Summary',
-            '',
-            f'- **Total Executions**: {total}',
-            f'- **Successful**: {success}',
-            f'- **Failed**: {failed}',
-            f'- **Security Blocked**: {blocked}',
-            '',
-            '---',
-            '',
-            '## Execution Records',
-            '',
-        ])
-
-        for record in self.records:
-            lines.append(record.to_markdown())
-            lines.append('---')
-            lines.append('')
-
-        return '\n'.join(lines)
-
-    def save(self, output_path: Union[str, Path]):
-        """Save spec to markdown file."""
-        output_path = Path(output_path)
-        output_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(output_path, 'w', encoding='utf-8') as f:
-            f.write(self.to_markdown())
-        logger.info(f'Execution spec saved to: {output_path}')
-
-
-class SkillContainer:
-    """
-    Secure container for executing skills.
-
-    Supports two execution modes:
-    - use_sandbox=True: Execute in Docker sandbox via ms-enclave (recommended for untrusted code)
-    - use_sandbox=False: Execute locally with security checks (for trusted code or no Docker)
-
-    Features:
-    - Docker-based isolation via ms-enclave
-    - Python scripts, Python code, shell commands, and JavaScript support
-    - Cross-platform support (Mac/Linux/Windows)
-    - RCE prevention and security checks
-    """
-
-    # Container paths for sandbox (following AgentSkill pattern)
-    SANDBOX_ROOT = '/sandbox'
-    SANDBOX_OUTPUT_DIR = '/sandbox/outputs'
-    SANDBOX_WORK_DIR = '/sandbox/scripts'
-
-    def __init__(self,
-                 workspace_dir: Optional[Union[str, Path]] = None,
-                 timeout: int = 300,
-                 image: str = 'python:3.11-slim',
-                 memory_limit: str = '512m',
-                 enable_security_check: bool = True,
-                 network_enabled: bool = False,
-                 use_sandbox: bool = True):
-        """
-        Initialize the skill container.
-
-        Args:
-            workspace_dir: Host working directory for I/O. Creates temp dir if None.
-            timeout: Default execution timeout in seconds.
-            image: Docker image for sandbox execution.
-            memory_limit: Memory limit for sandbox container.
-            enable_security_check: Whether to check code for dangerous patterns.
-            network_enabled: Whether to enable network in sandbox (disabled by default for security).
-            use_sandbox: Whether to use Docker sandbox (True) or local execution (False).
-        """
-        # Ensure workspace_dir is an absolute path (required by Docker)
-        if workspace_dir:
-            self.workspace_dir = Path(workspace_dir).resolve()
-        else:
-            self.workspace_dir = Path(
-                tempfile.mkdtemp(prefix='skill_container_')).resolve()
-        self.workspace_dir.mkdir(parents=True, exist_ok=True)
-
-        self.timeout = timeout
-        self.image = image
-        self.memory_limit = memory_limit
-        self.enable_security_check = enable_security_check
-        self.network_enabled = network_enabled
-        self.use_sandbox = use_sandbox
-        self.spec = ExecutionSpec()
-
-        # Host directories for I/O management (only outputs, scripts, logs)
-        self.output_dir = self.workspace_dir / 'outputs'
-        self.scripts_dir = self.workspace_dir / 'scripts'
-        self.logs_dir = self.workspace_dir / 'logs'
-        self.output_dir.mkdir(exist_ok=True)
-        self.scripts_dir.mkdir(exist_ok=True)
-        self.logs_dir.mkdir(exist_ok=True)
-
-        # Sandbox instance (lazy initialization)
-        self._sandbox = None
-
-        # Skill directories to mount in sandbox
-        self._skill_dirs: Dict[str, str] = {}
-
-        # Warn about local execution risks
-        if not self.use_sandbox:
-            logger.warning(
-                'SkillContainer running in LOCAL mode (use_sandbox=False). '
-                'Scripts will execute directly on this machine. '
-                'Ensure you trust the code being executed!')
-
-        logger.info(f'SkillContainer initialized at: {self.workspace_dir} '
-                    f'[mode: {"sandbox" if self.use_sandbox else "local"}]')
-
-    def _get_sandbox(self):
-        """
-        Get or create EnclaveSandbox instance with volume mounts.
-
-        Volume mapping follows AgentSkill pattern:
-        - workspace_dir -> /sandbox (rw mode for full access)
-        - Additional skill directories are mounted to /sandbox/skills/
-        """
-        if self._sandbox is None:
-            from ms_agent.sandbox.sandbox import EnclaveSandbox
-
-            # Mount entire workspace to /sandbox following AgentSkill pattern
-            # This allows scripts to access inputs/, outputs/, scripts/ subdirs
-            volumes = [
-                (str(self.workspace_dir.resolve()), self.SANDBOX_ROOT, 'rw'),
-            ]
-
-            # Add additional skill directory mounts
-            for skill_id, skill_dir in self._skill_dirs.items():
-                safe_id = skill_id.replace('@', '_').replace('/', '_')
-                sandbox_path = f'{self.SANDBOX_ROOT}/skills/{safe_id}'
-                volumes.append(
-                    (str(Path(skill_dir).resolve()), sandbox_path, 'ro'))
-
-            self._sandbox = EnclaveSandbox(
-                image=self.image,
-                memory_limit=self.memory_limit,
-                volumes=volumes,
-            )
-        return self._sandbox
-
-    def mount_skill_directory(self, skill_id: str, skill_dir: Union[str,
-                                                                    Path]):
-        """
-        Mount a skill directory for sandbox access.
-
-        Args:
-            skill_id: Unique identifier for the skill.
-            skill_dir: Path to the skill directory.
-        """
-        self._skill_dirs[skill_id] = str(Path(skill_dir).resolve())
-        # Reset sandbox to recreate with new mount
-        self._sandbox = None
-
-    def get_skill_sandbox_path(self, skill_id: str) -> str:
-        """
-        Get the sandbox path for a mounted skill directory.
-
-        Args:
-            skill_id: The skill identifier.
-
-        Returns:
-            Path inside sandbox where skill is mounted.
-        """
-        safe_id = skill_id.replace('@', '_').replace('/', '_')
-        return f'{self.SANDBOX_ROOT}/skills/{safe_id}'
-
-    def _security_check(self,
-                        code: str,
-                        is_local: bool = False) -> tuple[bool, str]:
-        """
-        Check code for potentially dangerous patterns.
-
-        Args:
-            code: Code string to check.
-            is_local: If True, use stricter patterns for local execution.
-
-        Returns:
-            Tuple of (is_safe, reason).
-        """
-        if not self.enable_security_check:
-            return True, ''
-
-        # Use stricter patterns for local execution
-        patterns = LOCAL_DANGEROUS_PATTERNS if is_local else DANGEROUS_PATTERNS
-
-        for pattern in patterns:
-            if re.search(pattern, code, re.IGNORECASE):
-                return False, f'Dangerous pattern detected: {pattern}'
-
-        return True, ''
-
-    def _validate_path_in_workspace(self, path: Path) -> bool:
-        """
-        Validate that a path is within the workspace directory.
-
-        Security measure for local execution to prevent path traversal.
-
-        Args:
-            path: Path to validate.
-
-        Returns:
-            True if path is within workspace, False otherwise.
-        """
-        try:
-            resolved = path.resolve()
-            return str(resolved).startswith(str(self.workspace_dir.resolve()))
-        except (OSError, ValueError):
-            return False
-
-    def _validate_script_extension(self, script_path: Path) -> bool:
-        """
-        Validate that script has an allowed extension.
-
-        Args:
-            script_path: Path to the script file.
-
-        Returns:
-            True if extension is allowed, False otherwise.
-        """
-        return script_path.suffix.lower() in ALLOWED_SCRIPT_EXTENSIONS
-
-    def _collect_output_files(self) -> Dict[str, Path]:
-        """Collect output files from output directory."""
-        outputs = {}
-        if self.output_dir.exists():
-            for f in self.output_dir.iterdir():
-                if f.is_file():
-                    outputs[f.name] = f
-        return outputs
-
-    def _create_record(self,
-                       skill_id: str,
-                       executor_type: ExecutorType,
-                       input_spec: ExecutionInput,
-                       script_path: str = None,
-                       function_name: str = None,
-                       sandbox_used: bool = None) -> ExecutionRecord:
-        """Create a new execution record."""
-        return ExecutionRecord(
-            skill_id=skill_id,
-            executor_type=executor_type,
-            script_path=script_path,
-            function_name=function_name,
-            input_spec=input_spec,
-            status=ExecutionStatus.PENDING,
-            sandbox_used=sandbox_used
-            if sandbox_used is not None else self.use_sandbox)
-
-    # -------------------------------------------------------------------------
-    # Local Execution Helpers (for use_sandbox=False mode)
-    # -------------------------------------------------------------------------
-
-    def _local_run_subprocess(self,
-                              cmd: List[str],
-                              env: Dict[str, str] = None,
-                              cwd: Path = None,
-                              stdin_input: str = None) -> tuple[str, str, int]:
-        """
-        Run subprocess locally with security restrictions.
-
-        Cross-platform support with timeout and resource limits.
-
-        Args:
-            cmd: Command list to execute.
-            env: Environment variables.
-            cwd: Working directory.
-            stdin_input: Input to pass to stdin.
-
-        Returns:
-            Tuple of (stdout, stderr, exit_code).
-        """
-        # Setup environment
-        run_env = os.environ.copy()
-        run_env['SKILL_OUTPUT_DIR'] = str(self.output_dir)
-        if env:
-            run_env.update(env)
-
-        # Use workspace as default cwd
-        work_dir = cwd or self.workspace_dir
-
-        try:
-            result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=self.timeout,
-                cwd=str(work_dir),
-                env=run_env,
-                stdin=subprocess.PIPE if stdin_input else None,
-                input=stdin_input,
-            )
-            return result.stdout, result.stderr, result.returncode
-        except subprocess.TimeoutExpired:
-            return '', f'Execution timed out after {self.timeout}s', -1
-        except Exception as e:
-            return '', str(e), -1
-
-    def _get_python_executable(self) -> str:
-        """Get the Python executable for the current platform."""
-        return sys.executable
-
-    def _get_shell_executable(self) -> List[str]:
-        """Get the shell executable for the current platform."""
-        if platform.system() == 'Windows':
-            return ['cmd', '/c']
-        else:
-            return ['/bin/sh', '-c']
-
-    def _get_node_executable(self) -> str:
-        """Get the Node.js executable for the current platform."""
-        if platform.system() == 'Windows':
-            return 'node.exe'
-        return 'node'
-
-    async def _local_install_requirements(
-            self, requirements: List[str]) -> tuple[bool, str]:
-        """
-        Install Python requirements locally using pip.
-
-        Args:
-            requirements: List of packages to install.
-
-        Returns:
-            Tuple of (success, error_message).
-        """
-        if not requirements:
-            return True, ''
-
-        try:
-            cmd = [
-                self._get_python_executable(), '-m', 'pip', 'install',
-                '--quiet', '--disable-pip-version-check'
-            ] + requirements
-
-            stdout, stderr, exit_code = self._local_run_subprocess(cmd)
-
-            if exit_code != 0:
-                logger.warning(f'Failed to install requirements: {stderr}')
-                return False, stderr
-
-            logger.info(f'Installed requirements: {requirements}')
-            return True, ''
-        except Exception as e:
-            logger.error(f'Error installing requirements: {e}')
-            return False, str(e)
-
-    async def _local_execute_python_code(
-            self, code: str,
-            input_spec: ExecutionInput) -> tuple[str, str, int]:
-        """
-        Execute Python code locally.
-
-        Args:
-            code: Python code to execute.
-            input_spec: Input specification.
-
-        Returns:
-            Tuple of (stdout, stderr, exit_code).
-        """
-        # Install requirements first if any
-        if input_spec.requirements:
-            success, error = await self._local_install_requirements(
-                input_spec.requirements)
-            if not success:
-                return '', f'Failed to install requirements: {error}', -1
-
-        # Write code to temp file
-        script_file = self.scripts_dir / f'_temp_{uuid.uuid4().hex[:8]}.py'
-        try:
-            # Generate environment setup
-            env_setup = self._generate_local_env_setup(input_spec)
-            full_code = env_setup + '\n' + code
-
-            with open(script_file, 'w', encoding='utf-8') as f:
-                f.write(full_code)
-
-            # Build command
-            cmd = [self._get_python_executable(), str(script_file)]
-            cmd.extend([str(arg) for arg in input_spec.args])
-
-            # Use working_dir from input_spec for proper resource access
-            cwd = input_spec.working_dir if input_spec.working_dir else None
-
-            stdout, stderr, exit_code = self._local_run_subprocess(
-                cmd,
-                env=input_spec.env_vars,
-                cwd=cwd,
-                stdin_input=input_spec.stdin)
-
-            # Keep script in scripts folder for logging/debugging
-            return stdout, stderr, exit_code
-        except Exception as e:
-            logger.error(f'Local Python execution failed: {e}')
-            raise
-
-    async def _local_execute_shell(
-            self, command: str,
-            input_spec: ExecutionInput) -> tuple[str, str, int]:
-        """
-        Execute shell command locally.
-
-        Args:
-            command: Shell command to execute.
-            input_spec: Input specification.
-
-        Returns:
-            Tuple of (stdout, stderr, exit_code).
-        """
-        shell_exec = self._get_shell_executable()
-
-        # Build full command with environment exports
-        if platform.system() == 'Windows':
-            # Windows: use set for environment
-            env_cmds = [f'set {k}={v}' for k, v in input_spec.env_vars.items()]
-            full_cmd = ' && '.join(env_cmds
-                                   + [command]) if env_cmds else command
-            cmd = shell_exec + [full_cmd]
-        else:
-            # Unix: use export
-            env_cmds = [
-                f"export {k}='{v}'" for k, v in input_spec.env_vars.items()
-            ]
-            full_cmd = ' && '.join(env_cmds
-                                   + [command]) if env_cmds else command
-            cmd = shell_exec + [full_cmd]
-
-        # Use working_dir from input_spec for proper resource access
-        cwd = input_spec.working_dir if input_spec.working_dir else None
-
-        return self._local_run_subprocess(
-            cmd,
-            env=input_spec.env_vars,
-            cwd=cwd,
-            stdin_input=input_spec.stdin)
-
-    async def _local_execute_javascript(
-            self, js_code: str,
-            input_spec: ExecutionInput) -> tuple[str, str, int]:
-        """
-        Execute JavaScript code locally via Node.js.
-
-        Args:
-            js_code: JavaScript code to execute.
-            input_spec: Input specification.
-
-        Returns:
-            Tuple of (stdout, stderr, exit_code).
-        """
-        # Write code to temp file
-        script_file = self.scripts_dir / f'_temp_{uuid.uuid4().hex[:8]}.js'
-        try:
-            # Generate environment setup
-            env_setup = self._generate_local_js_env_setup(input_spec)
-            full_code = env_setup + '\n' + js_code
-
-            with open(script_file, 'w', encoding='utf-8') as f:
-                f.write(full_code)
-
-            # Build command
-            cmd = [self._get_node_executable(), str(script_file)]
-            cmd.extend([str(arg) for arg in input_spec.args])
-
-            # Use working_dir from input_spec for proper resource access
-            cwd = input_spec.working_dir if input_spec.working_dir else None
-
-            # Keep script in scripts folder for logging/debugging
-            return self._local_run_subprocess(
-                cmd,
-                env=input_spec.env_vars,
-                cwd=cwd,
-                stdin_input=input_spec.stdin)
-        except Exception as e:
-            logger.error(f'Local JavaScript execution failed: {e}')
-            raise
-
-    def _generate_local_env_setup(self, input_spec: ExecutionInput) -> str:
-        """Generate Python code to setup environment for local execution."""
-        lines = [
-            'import os',
-            'import sys',
-            '',
-            '# Setup environment for local execution',
-            f"os.environ['SKILL_OUTPUT_DIR'] = {repr(str(self.output_dir))}",
-            f"os.environ['SKILL_LOGS_DIR'] = {repr(str(self.logs_dir))}",
-            '',
-            '# Helper functions for I/O paths',
-            'def get_output_path(filename):',
-            '    """Get the full path for an output file. ALL outputs should use this."""',
-            "    return os.path.join(os.environ['SKILL_OUTPUT_DIR'], filename)",
-            '',
-            f'SKILL_OUTPUT_DIR = {repr(str(self.output_dir))}',
-            f'SKILL_LOGS_DIR = {repr(str(self.logs_dir))}',
-        ]
-
-        # Add working directory to sys.path for imports and change to it
-        if input_spec.working_dir:
-            work_dir = str(input_spec.working_dir)
-            lines.extend([
-                '',
-                '# Setup working directory for resource access (READ-ONLY for resources)',
-                f'_skill_dir = {repr(work_dir)}',
-                "os.environ['SKILL_DIR'] = _skill_dir",
-                'SKILL_DIR = _skill_dir',
-                'if _skill_dir not in sys.path:',
-                '    sys.path.insert(0, _skill_dir)',
-                'os.chdir(_skill_dir)',
-            ])
-
-        # Add custom env vars
-        for key, value in input_spec.env_vars.items():
-            lines.append(f'os.environ[{repr(key)}] = {repr(value)}')
-
-        # Add args
-        if input_spec.args:
-            lines.append('')
-            lines.append('# Command line arguments')
-            args_str = repr(input_spec.args)
-            lines.append(f'ARGS = {args_str}')
-            lines.append('sys.argv = ["script.py"] + [str(a) for a in ARGS]')
-
-        lines.append('')
-        return '\n'.join(lines)
-
-    def _generate_local_js_env_setup(self, input_spec: ExecutionInput) -> str:
-        """Generate JavaScript code to setup environment for local execution."""
-        lines = [
-            '// Environment setup for local execution',
-            f'process.env.SKILL_OUTPUT_DIR = {repr(str(self.output_dir))};',
-            f'process.env.SKILL_LOGS_DIR = {repr(str(self.logs_dir))};',
-        ]
-
-        for key, value in input_spec.env_vars.items():
-            lines.append(f'process.env.{key} = {repr(value)};')
-
-        lines.append('')
-        return '\n'.join(lines)
-
-    def _parse_sandbox_result(self,
-                              results: Dict[str, Any]) -> tuple[str, str, int]:
-        """Parse sandbox execution results into stdout, stderr, exit_code."""
-        stdout_parts = []
-        stderr_parts = []
-        exit_code = 0
-
-        for executor_type in ['python_executor', 'shell_executor']:
-            if executor_type in results:
-                for result in results[executor_type]:
-                    if result.get('output'):
-                        stdout_parts.append(result['output'])
-                    if result.get('error'):
-                        stderr_parts.append(result['error'])
-                    if result.get('status', 0) != 0:
-                        exit_code = result.get('status', -1)
-
-        return '\n'.join(stdout_parts), '\n'.join(stderr_parts), exit_code
-
-    async def _execute_in_sandbox(
-            self,
-            python_code: Union[str, List[str]] = None,
-            shell_command: Union[str, List[str]] = None,
-            requirements: List[str] = None) -> Dict[str, Any]:
-        """Execute code in EnclaveSandbox."""
-        sandbox = self._get_sandbox()
-        return await sandbox.async_execute(
-            python_code=python_code,
-            shell_command=shell_command,
-            requirements=requirements)
-
-    async def execute_python_script(
-            self,
-            script_path: Union[str, Path],
-            skill_id: str = 'unknown',
-            input_spec: ExecutionInput = None) -> ExecutionOutput:
-        """
-        Execute a Python script file.
-
-        Uses sandbox mode or local mode based on use_sandbox setting.
-
-        Args:
-            script_path: Path to the Python script.
-            skill_id: Identifier of the skill being executed.
-            input_spec: Input specification.
-
-        Returns:
-            ExecutionOutput with results.
-        """
-        input_spec = input_spec or ExecutionInput()
-        script_path = Path(script_path)
-
-        record = self._create_record(
-            skill_id=skill_id,
-            executor_type=ExecutorType.PYTHON_SCRIPT,
-            input_spec=input_spec,
-            script_path=str(script_path))
-
-        record.start_time = datetime.now()
-        record.status = ExecutionStatus.RUNNING
-
-        try:
-            # Read script content
-            with open(script_path, 'r', encoding='utf-8') as f:
-                code = f.read()
-
-            # Security check (stricter for local mode)
-            is_safe, reason = self._security_check(
-                code, is_local=not self.use_sandbox)
-            if not is_safe:
-                record.status = ExecutionStatus.SECURITY_BLOCKED
-                record.error_message = reason
-                output = ExecutionOutput(
-                    stderr=f'Security check failed: {reason}', exit_code=-1)
-                record.end_time = datetime.now()
-                record.output_spec = output
-                self.spec.add_record(record)
-                return output
-
-            start_time = datetime.now()
-
-            if self.use_sandbox:
-                # Sandbox mode: inject environment and execute
-                env_setup = self._generate_env_setup(input_spec, {})
-                full_code = env_setup + '\n' + code
-
-                results = await self._execute_in_sandbox(
-                    python_code=full_code,
-                    requirements=input_spec.requirements)
-                stdout, stderr, exit_code = self._parse_sandbox_result(results)
-            else:
-                # Local mode: execute directly
-                stdout, stderr, exit_code = await self._local_execute_python_code(
-                    code, input_spec)
-
-            end_time = datetime.now()
-
-            output = ExecutionOutput(
-                stdout=stdout,
-                stderr=stderr,
-                exit_code=exit_code,
-                output_files=self._collect_output_files(),
-                duration_ms=(end_time - start_time).total_seconds() * 1000)
-
-            record.status = (
-                ExecutionStatus.SUCCESS
-                if exit_code == 0 else ExecutionStatus.FAILED)
-
-        except Exception as e:
-            output = ExecutionOutput(stderr=str(e), exit_code=-1)
-            record.status = ExecutionStatus.FAILED
-            record.error_message = str(e)
-            logger.error(f'Python script execution failed: {e}')
-
-        record.end_time = datetime.now()
-        record.output_spec = output
-        self.spec.add_record(record)
-        return output
-
-    async def execute_python_code(
-            self,
-            code: str,
-            skill_id: str = 'unknown',
-            input_spec: ExecutionInput = None) -> ExecutionOutput:
-        """
-        Execute Python code string.
-
-        Uses sandbox mode or local mode based on use_sandbox setting.
-
-        Args:
-            code: Python code to execute.
-            skill_id: Identifier of the skill being executed.
-            input_spec: Input specification.
-
-        Returns:
-            ExecutionOutput with results.
-        """
-        input_spec = input_spec or ExecutionInput()
-
-        record = self._create_record(
-            skill_id=skill_id,
-            executor_type=ExecutorType.PYTHON_CODE,
-            input_spec=input_spec,
-            script_path='<inline>')
-
-        record.start_time = datetime.now()
-        record.status = ExecutionStatus.RUNNING
-
-        try:
-            # Security check (stricter for local mode)
-            is_safe, reason = self._security_check(
-                code, is_local=not self.use_sandbox)
-            if not is_safe:
-                record.status = ExecutionStatus.SECURITY_BLOCKED
-                record.error_message = reason
-                output = ExecutionOutput(
-                    stderr=f'Security check failed: {reason}', exit_code=-1)
-                record.end_time = datetime.now()
-                record.output_spec = output
-                self.spec.add_record(record)
-                return output
-
-            start_time = datetime.now()
-
-            if self.use_sandbox:
-                # Sandbox mode
-                env_setup = self._generate_env_setup(input_spec, {})
-                full_code = env_setup + '\n' + code
-
-                results = await self._execute_in_sandbox(
-                    python_code=full_code,
-                    requirements=input_spec.requirements)
-                stdout, stderr, exit_code = self._parse_sandbox_result(results)
-            else:
-                # Local mode
-                stdout, stderr, exit_code = await self._local_execute_python_code(
-                    code, input_spec)
-
-            end_time = datetime.now()
-
-            output = ExecutionOutput(
-                stdout=stdout,
-                stderr=stderr,
-                exit_code=exit_code,
-                output_files=self._collect_output_files(),
-                duration_ms=(end_time - start_time).total_seconds() * 1000)
-
-            record.status = (
-                ExecutionStatus.SUCCESS
-                if exit_code == 0 else ExecutionStatus.FAILED)
-
-        except Exception as e:
-            output = ExecutionOutput(stderr=str(e), exit_code=-1)
-            record.status = ExecutionStatus.FAILED
-            record.error_message = str(e)
-            logger.error(f'Python code execution failed: {e}')
-
-        record.end_time = datetime.now()
-        record.output_spec = output
-        self.spec.add_record(record)
-        return output
-
-    def _generate_env_setup(self, input_spec: ExecutionInput,
-                            sandbox_files: Dict[str, str]) -> str:
-        """Generate Python code to setup environment variables and paths."""
-        sandbox_logs_dir = f'{self.SANDBOX_ROOT}/logs'
-        lines = [
-            'import os',
-            'import sys',
-            '',
-            '# Setup environment',
-            f"os.environ['SKILL_OUTPUT_DIR'] = '{self.SANDBOX_OUTPUT_DIR}'",
-            f"os.environ['SKILL_LOGS_DIR'] = '{sandbox_logs_dir}'",
-            '',
-            '# Helper functions for I/O paths',
-            'def get_output_path(filename):',
-            '    """Get the full path for an output file. ALL outputs should use this."""',
-            "    return os.path.join(os.environ['SKILL_OUTPUT_DIR'], filename)",
-            '',
-            f"SKILL_OUTPUT_DIR = '{self.SANDBOX_OUTPUT_DIR}'",
-            f"SKILL_LOGS_DIR = '{sandbox_logs_dir}'",
-        ]
-
-        # Add custom env vars
-        for key, value in input_spec.env_vars.items():
-            # Sanitize value to prevent injection
-            safe_value = value.replace("'", "\\'")
-            lines.append(f"os.environ['{key}'] = '{safe_value}'")
-
-        # Add args
-        if input_spec.args:
-            lines.append('')
-            lines.append('# Command line arguments')
-            args_str = repr(input_spec.args)
-            lines.append(f'ARGS = {args_str}')
-            lines.append('sys.argv = ["script.py"] + [str(a) for a in ARGS]')
-
-        lines.append('')
-        return '\n'.join(lines)
-
-    def execute_python_function(
-            self,
-            func: Callable,
-            skill_id: str = 'unknown',
-            input_spec: ExecutionInput = None) -> ExecutionOutput:
-        """
-        Execute a Python function directly (local execution, not sandboxed).
-
-        Note: Function execution runs locally as it cannot be serialized to sandbox.
-        Use execute_python_code for sandboxed execution.
-
-        Args:
-            func: Python callable to execute.
-            skill_id: Identifier of the skill being executed.
-            input_spec: Input specification with args and kwargs.
-
-        Returns:
-            ExecutionOutput with results.
-        """
-        input_spec = input_spec or ExecutionInput()
-
-        record = self._create_record(
-            skill_id=skill_id,
-            executor_type=ExecutorType.PYTHON_FUNCTION,
-            input_spec=input_spec,
-            function_name=func.__name__)
-        record.sandbox_used = False  # Local execution
-
-        record.start_time = datetime.now()
-        record.status = ExecutionStatus.RUNNING
-
-        try:
-            # Add helper paths to kwargs
-            kwargs = input_spec.kwargs.copy()
-            kwargs['_output_dir'] = self.output_dir
-
-            start_time = datetime.now()
-            return_value = func(*input_spec.args, **kwargs)
-            end_time = datetime.now()
-
-            output = ExecutionOutput(
-                return_value=return_value,
-                exit_code=0,
-                output_files=self._collect_output_files(),
-                duration_ms=(end_time - start_time).total_seconds() * 1000)
-
-            record.status = ExecutionStatus.SUCCESS
-
-        except Exception as e:
-            output = ExecutionOutput(stderr=str(e), exit_code=-1)
-            record.status = ExecutionStatus.FAILED
-            record.error_message = str(e)
-            logger.error(f'Python function execution failed: {e}')
-
-        record.end_time = datetime.now()
-        record.output_spec = output
-        self.spec.add_record(record)
-        return output
-
-    async def execute_shell(
-            self,
-            command: Union[str, List[str]],
-            skill_id: str = 'unknown',
-            input_spec: ExecutionInput = None) -> ExecutionOutput:
-        """
-        Execute a shell command.
-
-        Uses sandbox mode or local mode based on use_sandbox setting.
-
-        Args:
-            command: Shell command string or list of commands.
-            skill_id: Identifier of the skill being executed.
-            input_spec: Input specification.
-
-        Returns:
-            ExecutionOutput with results.
-        """
-        input_spec = input_spec or ExecutionInput()
-
-        cmd_str = command if isinstance(command, str) else ' && '.join(command)
-
-        record = self._create_record(
-            skill_id=skill_id,
-            executor_type=ExecutorType.SHELL,
-            input_spec=input_spec,
-            script_path=cmd_str[:200])
-
-        record.start_time = datetime.now()
-        record.status = ExecutionStatus.RUNNING
-
-        try:
-            # Security check (stricter for local mode)
-            is_safe, reason = self._security_check(
-                cmd_str, is_local=not self.use_sandbox)
-            if not is_safe:
-                record.status = ExecutionStatus.SECURITY_BLOCKED
-                record.error_message = reason
-                output = ExecutionOutput(
-                    stderr=f'Security check failed: {reason}', exit_code=-1)
-                record.end_time = datetime.now()
-                record.output_spec = output
-                self.spec.add_record(record)
-                return output
-
-            start_time = datetime.now()
-
-            if self.use_sandbox:
-                # Sandbox mode: prepend environment setup
-                env_exports = [
-                    f"export SKILL_OUTPUT_DIR='{self.SANDBOX_OUTPUT_DIR}'",
-                ]
-                for key, value in input_spec.env_vars.items():
-                    safe_value = value.replace("'", "\\'")
-                    env_exports.append(f"export {key}='{safe_value}'")
-
-                full_cmd = ' && '.join(env_exports + [cmd_str])
-
-                results = await self._execute_in_sandbox(shell_command=full_cmd
-                                                         )
-                stdout, stderr, exit_code = self._parse_sandbox_result(results)
-            else:
-                # Local mode
-                stdout, stderr, exit_code = await self._local_execute_shell(
-                    cmd_str, input_spec)
-
-            end_time = datetime.now()
-
-            output = ExecutionOutput(
-                stdout=stdout,
-                stderr=stderr,
-                exit_code=exit_code,
-                output_files=self._collect_output_files(),
-                duration_ms=(end_time - start_time).total_seconds() * 1000)
-
-            record.status = (
-                ExecutionStatus.SUCCESS
-                if exit_code == 0 else ExecutionStatus.FAILED)
-
-        except Exception as e:
-            output = ExecutionOutput(stderr=str(e), exit_code=-1)
-            record.status = ExecutionStatus.FAILED
-            record.error_message = str(e)
-            logger.error(f'Shell execution failed: {e}')
-
-        record.end_time = datetime.now()
-        record.output_spec = output
-        self.spec.add_record(record)
-        return output
-
-    async def execute_javascript(self,
-                                 script_path: Union[str, Path] = None,
-                                 code: str = None,
-                                 skill_id: str = 'unknown',
-                                 input_spec: ExecutionInput = None,
-                                 runtime: str = 'node') -> ExecutionOutput:
-        """
-        Execute JavaScript code via Node.js.
-
-        Uses sandbox mode or local mode based on use_sandbox setting.
-
-        Args:
-            script_path: Path to JavaScript file.
-            code: Inline JavaScript code (if no script_path).
-            skill_id: Identifier of the skill being executed.
-            input_spec: Input specification.
-            runtime: JavaScript runtime ('node' or 'deno').
-
-        Returns:
-            ExecutionOutput with results.
-        """
-        input_spec = input_spec or ExecutionInput()
-
-        record = self._create_record(
-            skill_id=skill_id,
-            executor_type=ExecutorType.JAVASCRIPT,
-            input_spec=input_spec,
-            script_path=str(script_path) if script_path else '<inline>')
-
-        record.start_time = datetime.now()
-        record.status = ExecutionStatus.RUNNING
-
-        try:
-            # Get JavaScript code
-            if script_path:
-                with open(script_path, 'r', encoding='utf-8') as f:
-                    js_code = f.read()
-            elif code:
-                js_code = code
-            else:
-                raise ValueError('Either script_path or code must be provided')
-
-            # Security check (stricter for local mode)
-            is_safe, reason = self._security_check(
-                js_code, is_local=not self.use_sandbox)
-            if not is_safe:
-                record.status = ExecutionStatus.SECURITY_BLOCKED
-                record.error_message = reason
-                output = ExecutionOutput(
-                    stderr=f'Security check failed: {reason}', exit_code=-1)
-                record.end_time = datetime.now()
-                record.output_spec = output
-                self.spec.add_record(record)
-                return output
-
-            start_time = datetime.now()
-
-            if self.use_sandbox:
-                # Sandbox mode: write JS file and execute
-                js_filename = f'script_{uuid.uuid4().hex[:8]}.js'
-                js_path = self.scripts_dir / js_filename
-                sandbox_js_path = f'{self.SANDBOX_WORK_DIR}/{js_filename}'
-
-                # Inject environment into JS code
-                env_inject = self._generate_js_env_setup(input_spec, {})
-                full_js_code = env_inject + '\n' + js_code
-
-                with open(js_path, 'w', encoding='utf-8') as f:
-                    f.write(full_js_code)
-
-                # Build shell command to run JS
-                args_str = ' '.join(f'"{arg}"' for arg in input_spec.args)
-                shell_cmd = f'{runtime} {sandbox_js_path} {args_str}'
-
-                results = await self._execute_in_sandbox(
-                    shell_command=shell_cmd)
-                stdout, stderr, exit_code = self._parse_sandbox_result(results)
-            else:
-                # Local mode
-                stdout, stderr, exit_code = await self._local_execute_javascript(
-                    js_code, input_spec)
-
-            end_time = datetime.now()
-
-            output = ExecutionOutput(
-                stdout=stdout,
-                stderr=stderr,
-                exit_code=exit_code,
-                output_files=self._collect_output_files(),
-                duration_ms=(end_time - start_time).total_seconds() * 1000)
-
-            record.status = (
-                ExecutionStatus.SUCCESS
-                if exit_code == 0 else ExecutionStatus.FAILED)
-
-        except Exception as e:
-            output = ExecutionOutput(stderr=str(e), exit_code=-1)
-            record.status = ExecutionStatus.FAILED
-            record.error_message = str(e)
-            logger.error(f'JavaScript execution failed: {e}')
-
-        record.end_time = datetime.now()
-        record.output_spec = output
-        self.spec.add_record(record)
-        return output
-
-    def _generate_js_env_setup(self, input_spec: ExecutionInput,
-                               sandbox_files: Dict[str, str]) -> str:
-        """Generate JavaScript code to setup environment."""
-        lines = [
-            '// Environment setup',
-            f"process.env.SKILL_OUTPUT_DIR = '{self.SANDBOX_OUTPUT_DIR}';",
-        ]
-
-        for key, value in input_spec.env_vars.items():
-            safe_value = value.replace("'", "\\'")
-            lines.append(f"process.env.{key} = '{safe_value}';")
-
-        lines.append('')
-        return '\n'.join(lines)
-
-    async def execute(self,
-                      executor_type: ExecutorType,
-                      skill_id: str = 'unknown',
-                      script_path: Union[str, Path] = None,
-                      func: Callable = None,
-                      command: Union[str, List[str]] = None,
-                      code: str = None,
-                      input_spec: ExecutionInput = None,
-                      **kwargs) -> ExecutionOutput:
-        """
-        Unified async execution interface.
-
-        Args:
-            executor_type: Type of executor to use.
-            skill_id: Identifier of the skill.
-            script_path: Path to script file (for PYTHON_SCRIPT, JAVASCRIPT).
-            func: Callable function (for PYTHON_FUNCTION).
-            command: Shell command (for SHELL).
-            code: Inline code (for PYTHON_CODE, JAVASCRIPT).
-            input_spec: Input specification.
-            **kwargs: Additional executor-specific arguments.
-
-        Returns:
-            ExecutionOutput with results.
-        """
-        if executor_type == ExecutorType.PYTHON_SCRIPT:
-            return await self.execute_python_script(
-                script_path=script_path,
-                skill_id=skill_id,
-                input_spec=input_spec)
-        elif executor_type == ExecutorType.PYTHON_CODE:
-            return await self.execute_python_code(
-                code=code, skill_id=skill_id, input_spec=input_spec)
-        elif executor_type == ExecutorType.PYTHON_FUNCTION:
-            return self.execute_python_function(
-                func=func, skill_id=skill_id, input_spec=input_spec)
-        elif executor_type == ExecutorType.SHELL:
-            return await self.execute_shell(
-                command=command, skill_id=skill_id, input_spec=input_spec)
-        elif executor_type == ExecutorType.JAVASCRIPT:
-            return await self.execute_javascript(
-                script_path=script_path,
-                code=code,
-                skill_id=skill_id,
-                input_spec=input_spec,
-                **kwargs)
-        else:
-            raise ValueError(f'Unsupported executor type: {executor_type}')
-
-    def execute_sync(self,
-                     executor_type: ExecutorType,
-                     skill_id: str = 'unknown',
-                     **kwargs) -> ExecutionOutput:
-        """Synchronous wrapper for execute()."""
-        return asyncio.run(self.execute(executor_type, skill_id, **kwargs))
-
-    def link_skills(self,
-                    upstream_skill_id: str,
-                    downstream_input_key: str,
-                    output_key: str = None) -> Optional[Any]:
-        """
-        Link output from upstream skill to downstream skill input.
-
-        Args:
-            upstream_skill_id: ID of the upstream skill.
-            downstream_input_key: Key to use in downstream input.
-            output_key: Specific output key to link (e.g., 'return_value', 'stdout').
-
-        Returns:
-            The linked value, or None if not found.
-        """
-        if upstream_skill_id in self.spec.upstream_outputs:
-            output = self.spec.upstream_outputs[upstream_skill_id]
-            if output_key:
-                return getattr(output, output_key, None)
-            return output.return_value or output.stdout
-        return None
-
-    def get_spec_log(self) -> str:
-        """Get the execution spec as markdown string."""
-        return self.spec.to_markdown()
-
-    def save_spec_log(self, output_path: Union[str, Path] = None):
-        """Save the execution spec to a markdown file in logs directory."""
-        if output_path is None:
-            output_path = self.logs_dir / 'execution_spec.md'
-        self.spec.save(output_path)
-        logger.info(f'Saved execution spec to: {output_path}')
-
-    def cleanup(self, keep_spec: bool = True):
-        """
-        Clean up workspace directory.
-
-        Args:
-            keep_spec: If True, saves spec before cleanup.
-        """
-        if keep_spec:
-            self.save_spec_log()
-        if self.workspace_dir.exists():
-            shutil.rmtree(self.workspace_dir)
-            logger.info(f'Cleaned up workspace: {self.workspace_dir}')
diff --git a/ms_agent/skill/loader.py b/ms_agent/skill/loader.py
index 1f5dca2a7..e2664c3d8 100644
--- a/ms_agent/skill/loader.py
+++ b/ms_agent/skill/loader.py
@@ -41,12 +41,7 @@ def load_skills(
             logger.warning('No skills provided to load.')
             return all_skills
 
-        def is_skill_id(s: str) -> bool:
-            return '/' in s and len(s.split('/')) == 2 and all(
-                s.split('/')) and not os.path.exists(s)
-
         if isinstance(skills, str):
-            # Could be a single skill path, root path of skills, or skill ID on ModelScope hub
             skill_list = [skills]
         elif all(isinstance(s, str) for s in skills) or all(
                 isinstance(s, SkillSchema) for s in skills):
@@ -55,12 +50,6 @@ def is_skill_id(s: str) -> bool:
             raise ValueError('Invalid skills input type.')
 
         for skill in skill_list:
-
-            if is_skill_id(skill):
-                from modelscope import snapshot_download
-                skill_path: str = snapshot_download(repo_id=skill)
-                skill = skill_path
-
             if isinstance(skill, SkillSchema):
                 skill_key = self._get_skill_key(skill=skill)
                 all_skills[skill_key] = skill
diff --git a/ms_agent/skill/prompt_injector.py b/ms_agent/skill/prompt_injector.py
new file mode 100644
index 000000000..f916e307a
--- /dev/null
+++ b/ms_agent/skill/prompt_injector.py
@@ -0,0 +1,56 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+import re
+
+
+class SkillPromptInjector:
+    """Builds the skill section to inject into the system prompt."""
+
+    SKILL_SECTION_HEADER = """# Available Skills
+
+You have access to specialized skills that extend your capabilities.
+Each skill is a set of instructions and resources for handling specific tasks.
+
+**How to use skills:**
+1. Review the skill summaries below to find relevant skills.
+2. Call `skill_view(skill_id)` to read the full instructions of a skill.
+3. Follow the skill's instructions using your available tools (code execution, file operations, web search, etc.).
+4. Do NOT call `skill_view` unless you actually need the skill's guidance.
+"""
+
+    ALWAYS_SKILLS_HEADER = (
+        "# Active Skills\n\n"
+        "The following skills are always active. Follow their instructions.\n")
+
+    def __init__(self, catalog):
+        self._catalog = catalog
+
+    def build_skill_prompt_section(self) -> str:
+        """Build the skill section for system prompt injection.
+
+        Returns empty string when no skills are available.
+        """
+        parts = []
+
+        # Part 1: always-active skills (full body injection)
+        always_skills = self._catalog.get_always_skills()
+        if always_skills:
+            parts.append(self.ALWAYS_SKILLS_HEADER)
+            for sid, skill in always_skills.items():
+                content = self._strip_frontmatter(skill.content)
+                parts.append(f"## {skill.name}\n\n{content}\n")
+
+        # Part 2: summary index of all enabled skills
+        summary = self._catalog.get_skills_summary()
+        if summary:
+            parts.append(self.SKILL_SECTION_HEADER)
+            parts.append(summary)
+            parts.append("")
+
+        return "\n".join(parts)
+
+    @staticmethod
+    def _strip_frontmatter(content: str) -> str:
+        """Remove YAML frontmatter from markdown content."""
+        return re.sub(
+            r'^---\s*\n.*?\n---\s*\n', '', content,
+            flags=re.DOTALL).strip()
diff --git a/ms_agent/skill/prompts.py b/ms_agent/skill/prompts.py
deleted file mode 100644
index a91746f97..000000000
--- a/ms_agent/skill/prompts.py
+++ /dev/null
@@ -1,439 +0,0 @@
-# flake8: noqa
-# yapf: disable
-
-DEFAULT_PLAN = """
-
-"""
-
-DEFAULT_TASKS = """
-
-"""
-
-DEFAULT_IMPLEMENTATION = """
-
-"""
-
-
-PROMPT_SKILL_PLAN = """
-According to the user's request:\n {query}\n,
-analyze the following skill content and breakdown the necessary steps to complete the task step by step, considering any dependencies or prerequisites that may be required.
-According to following sections: `SKILL_MD_CONTEXT`, `REFERENCE_CONTEXT`, `SCRIPT_CONTEXT` and `RESOURCE_CONTEXT`, you **MUST** identify the most relevant **FILES** (if any) and outline a detailed plan to accomplish the user's request.
-{skill_md_context} {reference_context} {script_context} {resource_context}
-\n\nThe format of your response:\n
-<QUERY>
-... The user's original query ...
-</QUERY>
-
-
-<PLAN>
-... The concise and clear step-by-step plan to accomplish the user's request ...
-</PLAN>
-
-
-<SCRIPTS>
-... The most relevant SCRIPTS (if any) in JSON format ...
-</SCRIPTS>
-
-
-<REFERENCES>
-... The most relevant REFERENCES (if any) in JSON format ...
-</REFERENCES>
-
-
-<RESOURCES>
-... The most relevant RESOURCES (if any) in JSON format ...
-</RESOURCES>
-
-"""
-
-
-PROMPT_SKILL_TASKS = """
-According to `SKILL PLAN CONTEXT`:\n\n{skill_plan_context}\n\n
-Provide a concise and precise TODO-LIST of implementations required to execute the plan, **MUST** be as concise as possible.
-Each task should be specific, actionable, and clearly defined to ensure successful completion of the overall plan.
-The format of your response: \n
-<QUERY>
-... The user's original query ...
-</QUERY>
-
-
-<TASKS>
-... A concise and clear TODO-LIST of implementations required to execute the plan ...
-</TASKS>
-
-"""
-
-
-SCRIPTS_IMPLEMENTATION_FORMAT = """[
-    {
-        "script": "<script_path_1>",
-        "parameters": {
-            "param1": "value1",
-            "param2": "value2"
-        }
-    },
-    {
-        "script": "<script_path_2>",
-        "parameters": {
-            "param1": "value1",
-            "param2": "value2"
-        }
-    }
-]"""
-
-PROMPT_TASKS_IMPLEMENTATION = """
-According to relevant content of `SCRIPTS`, `REFERENCES` and `RESOURCES`:\n\n{script_contents}\n\n{reference_contents}\n\n{resource_contents}\n\n
-
-You **MUST** strictly implement the todo-list in `SKILL_TASKS_CONTEXT` step by step:\n\n{skill_tasks_context}\n\n
-
-There are 3 scenarios for response, your response **MUST** strictly follow one of the above scenarios, **MUST** be as concise as possible:
-
-Scenario-1: Execute Script(s) with Parameters, especially for python scripts, in the format of:
-<IMPLEMENTATION>
-{scripts_implementation_format}
-</IMPLEMENTATION>
-
-Scenario-2: No Script Execution Needed, like JavaScript、HTML code generation, please output the final answer directly, in the format of:
-<IMPLEMENTATION>
-```html
-```
-...
-or
-```javascript
-```
-</IMPLEMENTATION>
-
-Scenario-3: Unable to Execute Any Script, Provide Reason, in the format of:
-<IMPLEMENTATION>
-... The reason why unable to execute any script ...
-</IMPLEMENTATION>
-
-"""
-
-
-PROMPT_SKILL_FINAL_SUMMARY = """
-Given the comprehensive context:\n\n{comprehensive_context}\n\n
-Provide a concise summary of the entire process, highlighting key actions taken, decisions made, and the final outcome achieved.
-Ensure the summary is clear and informative.
-"""
-
-
-# ============================================================
-# AutoSkills Prompts - for automatic skill retrieval and DAG
-# ============================================================
-
-PROMPT_ANALYZE_QUERY_FOR_SKILLS = """You are a skill analyzer. Given a user query, identify what types of skills/capabilities are needed, or just chatting is sufficient.
-
-User Query: {query}
-
-Available Skills Overview:
-{skills_overview}
-
-Analyze the query and determine:
-1. Whether this query requires specific skills/capabilities to fulfill
-2. If skills are needed, what capabilities/functions are directly required
-3. What prerequisites or dependencies might be required
-
-Output in JSON format:
-{{
-    "needs_skills": true/false,
-    "intent_summary": "Brief description of user intent",
-    "skill_queries": ["query1", "query2", ...],
-    "chat_response": "Direct response if no skills needed, null otherwise",
-    "reasoning": "Brief explanation"
-}}
-
-Notes:
-- Set `needs_skills` to false if the query is casual chat, greeting, or can be answered directly without special skills.
-- If `needs_skills` is false, provide the `chat_response` with a helpful direct answer.
-- If `needs_skills` is true, `skill_queries` should contain search queries for finding relevant skills.
-"""
-
-PROMPT_FILTER_SKILLS_FAST = """Quickly filter candidate skills based on their name and description.
-
-User Query: {query}
-
-Candidate Skills:
-{candidate_skills}
-
-For each skill, determine if it's POTENTIALLY relevant to the user's query based on:
-1. Does the skill name suggest it can help with the task?
-2. Does the skill description indicate capabilities matching the user's needs?
-
-Output in JSON format:
-{{
-    "filtered_skill_ids": ["skill_id_1", "skill_id_2", ...],
-    "reasoning": "Brief explanation of filtering"
-}}
-
-Notes:
-- Only include skills that are POTENTIALLY useful for the task.
-- This is a quick filter - when in doubt, INCLUDE the skill for further analysis.
-- Focus on the main task output format/type matching (e.g., PDF generation needs PDF skill).
-"""
-
-PROMPT_FILTER_SKILLS_DEEP = """Analyze and filter candidate skills based on their full capabilities.
-
-User Query: {query}
-
-Candidate Skills (with detailed content):
-{candidate_skills}
-
-For each skill, evaluate:
-1. **Capability Match**: Can this skill actually PRODUCE the required output?
-2. **Task Completeness**: Can this skill independently complete the task, or does it need other skills?
-3. **Redundancy**: Are there overlapping skills that do the same thing?
-
-Output in JSON format:
-{{
-    "filtered_skill_ids": ["skill_id_1", "skill_id_2", ...],
-    "skill_analysis": {{
-        "skill_id_1": {{
-            "can_execute": true/false,
-            "reason": "Why this skill can/cannot execute the task"
-        }},
-        ...
-    }},
-    "reasoning": "Overall filtering explanation"
-}}
-
-**CRITICAL**:
-- Only include skills that can ACTUALLY execute and produce the required output.
-- Remove redundant skills - keep only the most suitable one for each capability.
-- The task specified by the user may require the collaboration of multiple skills to be successfully completed.
-"""
-
-PROMPT_BUILD_SKILLS_DAG = """Filter candidate skills and build execution DAG.
-
-User Query: {query}
-
-Candidate Skills (USE THESE EXACT IDs in your response):
-{selected_skills}
-
-**Tasks:**
-1. **Filter**: Keep only skills that can ACTUALLY produce required output. Remove redundant/unnecessary skills.
-2. **Build DAG**: Define dependencies and execution order using the EXACT skill IDs from above (e.g., `pdf@latest`, `pptx@latest`).
-
-**Output JSON:**
-{{
-    "filtered_skill_ids": ["exact_skill_id_from_list", ...],
-    "dag": {{
-        "exact_skill_id_1": ["depends_on_skill_id"],
-        "exact_skill_id_2": []
-    }},
-    "execution_order": ["first_skill_id", "second_skill_id", ...],
-    "reasoning": "Brief explanation"
-}}
-
-**CRITICAL RULES:**
-- **ONLY use exact skill IDs from the Candidate Skills list** (e.g., `pdf@latest`, `pptx@latest`, NOT invented names like `create_pdf` or `generate_report`)
-- Minimal sufficiency: smallest skill set that fully satisfies the query
-- Deduplicate: keep only the most effective skill when overlapping
-- `execution_order` MUST contain ALL skills from `filtered_skill_ids`, ordered by dependencies (parallel execution as nested lists)
-- In `dag`, each skill maps to its dependencies (skills it depends on), empty list `[]` means no dependencies
-"""
-
-PROMPT_DIRECT_SELECT_SKILLS = """You are a skill selector. Given a user query and all available skills, select the relevant skills and build an execution DAG.
-
-User Query: {query}
-
-All Available Skills (USE THESE EXACT IDs):
-{all_skills}
-
-Tasks:
-1. Determine if this query needs skills or is just casual chat
-2. If skills are needed, select relevant skills using their EXACT IDs from the list above
-3. Build a dependency DAG for the selected skills
-
-Output in JSON format:
-{{
-    "needs_skills": true/false,
-    "chat_response": "Direct response if no skills needed, null otherwise",
-    "selected_skill_ids": ["exact_skill_id_from_list", ...],
-    "dag": {{
-        "exact_skill_id_1": ["depends_on_skill_id"],
-        "exact_skill_id_2": [],
-        ...
-    }},
-    "execution_order": ["first_skill_id", "second_skill_id", ...],
-    "reasoning": "Brief explanation of skill selection and dependencies"
-}}
-
-**CRITICAL:**
-- **ONLY use exact skill IDs from the Available Skills list** (e.g., `pdf@latest`, `pptx@latest`, NOT invented names)
-- Set `needs_skills` to false if the query is casual chat or can be answered directly
-- `execution_order` MUST contain ALL skills from `selected_skill_ids`, ordered by dependencies
-- In `dag`, each skill maps to its dependencies (skills it depends on), empty list `[]` means no dependencies
-"""
-
-# ============================================================
-# Progressive Skill Analysis Prompts
-# ============================================================
-
-PROMPT_SKILL_ANALYSIS_PLAN = """You are analyzing a skill to create an execution plan.
-
-**IMPORTANT CONTEXT**:
-This skill may be ONE OF SEVERAL skills in a execution chain. It does NOT need to fulfill
-the ENTIRE user query - it only needs to handle its specific sub-task/capability.
-
-For example:
-- If query is "Generate a PDF report with charts", a PDF skill only needs to create PDFs
-- If query is "Analyze data and visualize results", a chart skill only needs visualization
-- Each skill contributes its specialized capability to the overall task
-
-User Query: {query}
-
-Skill Information:
-- Skill ID: {skill_id}
-- Name: {skill_name}
-- Description: {skill_description}
-
-Skill Content (SKILL.md):
-{skill_content}
-
-Available Resources Overview:
-- Scripts: {scripts_list}
-- References: {references_list}
-- Resources: {resources_list}
-
-Tasks:
-1. Understand what this specific skill can do based on its description and content
-2. Determine if this skill can contribute to the user's query (even partially)
-3. Create a step-by-step execution plan for this skill's specific capability
-4. Identify which scripts, references, and resources are needed
-
-Output in JSON format:
-{{
-    "can_handle": true/false,
-    "contribution": "What specific part of the query this skill handles",
-    "plan_summary": "Brief summary of the execution plan",
-    "steps": [
-        {{"step": 1, "action": "description", "type": "script|reference|resource|code"}},
-        ...
-    ],
-    "required_scripts": ["script_name1", "script_name2", ...],
-    "required_references": ["ref_name1", ...],
-    "required_resources": ["resource_name1", ...],
-    "required_packages": ["python_package1", "python_package2", ...],
-    "parameters": {{"param1": "value or <user_input>", ...}},
-    "reasoning": "Why this plan will work"
-}}
-
-**CRITICAL - When to set can_handle**:
-- Set `can_handle: true` if this skill can CONTRIBUTE to the query, even if it only handles a sub-task
-- Set `can_handle: true` if the skill's core capability is RELEVANT to any part of the query
-- Set `can_handle: false` ONLY if the skill has ZERO relevance to the query
-- DO NOT reject a skill just because it can't fulfill the ENTIRE query
-
-Notes:
-- Only include resources that are actually needed for execution.
-- Steps should be actionable and specific.
-- Parameters should include any values extracted from the query.
-- Extract Python package dependencies from skill content (e.g., reportlab, pandas, numpy).
-"""
-
-PROMPT_SKILL_EXECUTION_COMMAND = """Based on the execution plan and loaded resources, generate the execution command(s).
-
-User Query: {query}
-Skill ID: {skill_id}
-
-Execution Plan:
-{execution_plan}
-
-Loaded Scripts:
-{scripts_content}
-
-Loaded References:
-{references_content}
-
-Loaded Resources:
-{resources_content}
-
-**IMPORTANT Environment Variables:**
-- `SKILL_OUTPUT_DIR`: Directory where ALL output files MUST be saved (e.g., PDFs, images, data files)
-- `SKILL_DIR`: The skill's directory (for accessing resources like fonts, templates)
-- `SKILL_LOGS_DIR`: Directory for logs and intermediate files
-
-Generate the specific execution command(s) needed.
-
-Output in JSON format:
-{{
-    "execution_type": "script|code|shell",
-    "commands": [
-        {{
-            "type": "python_script|python_code|shell|javascript",
-            "path": "script_path (if applicable)",
-            "code": "inline code (if applicable)",
-            "parameters": {{"param1": "value", ...}},
-            "working_dir": "working directory (optional)",
-            "requirements": ["package1", "package2", ...]
-        }},
-        ...
-    ],
-    "expected_output": "Description of expected output"
-}}
-
-**CRITICAL OUTPUT RULE:**
-- ALL generated files (PDFs, images, reports, etc.) MUST be saved to `os.environ['SKILL_OUTPUT_DIR']`
-- Use `os.path.join(os.environ['SKILL_OUTPUT_DIR'], 'filename.pdf')` for output paths
-- NEVER save output files to the current working directory or skill directory
-- The skill directory should be READ-ONLY for resources, not for output
-"""
-
-PROMPT_ANALYZE_EXECUTION_ERROR = """You are analyzing a failed code execution to diagnose and fix the error.
-
-**User Query**: {query}
-
-**Skill ID**: {skill_id}
-**Skill Name**: {skill_name}
-
-**Failed Code**:
-```python
-{failed_code}
-```
-
-**Error Message (stderr)**:
-```
-{stderr}
-```
-
-**stdout (if any)**:
-```
-{stdout}
-```
-
-**Attempt**: {attempt}/{max_attempts}
-
-**Available Environment Variables**:
-- SKILL_OUTPUT_DIR: Directory for output files
-- SKILL_DIR: Skill's directory for resources (fonts, templates, etc.)
-- SKILL_LOGS_DIR: Directory for logs
-
-**Helper Functions Available**:
-- get_output_path(filename): Returns full path for output file
-
-Analyze the error and provide a fix:
-
-1. Identify the root cause of the error
-2. Determine if it's fixable through code modification
-3. Generate corrected code that addresses the issue
-
-Output in JSON format:
-{{
-    "error_analysis": {{
-        "error_type": "ModuleNotFoundError|FileNotFoundError|SyntaxError|RuntimeError|etc",
-        "root_cause": "Brief description of what caused the error",
-        "is_fixable": true/false,
-        "fix_strategy": "Description of how to fix"
-    }},
-    "fixed_code": "Complete fixed Python code (or null if unfixable)",
-    "additional_requirements": ["package1", "package2"],
-    "explanation": "What was changed and why"
-}}
-
-**IMPORTANT**:
-- Provide COMPLETE fixed code, not just the changed parts
-- Ensure output paths use get_output_path() or os.environ['SKILL_OUTPUT_DIR']
-- If the error is about missing packages, add them to additional_requirements
-- If the error cannot be fixed (e.g., requires user input), set is_fixable to false
-"""
diff --git a/ms_agent/skill/schema.py b/ms_agent/skill/schema.py
index 722e0acc4..1f7f09f44 100644
--- a/ms_agent/skill/schema.py
+++ b/ms_agent/skill/schema.py
@@ -8,13 +8,11 @@
 import re
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 
 import yaml
 from ms_agent.utils.logger import logger
 
-from .spec import Spec
-
 SUPPORTED_SCRIPT_EXT = ('.py', '.sh', '.js')
 SUPPORTED_READ_EXT = ('.md', '.txt', '.py', '.json', '.yaml', '.yml', '.sh',
                       '.js', '.html', '.xml')
@@ -354,276 +352,3 @@ def validate_skill_schema(schema: SkillSchema) -> List[str]:
         return errors
 
 
-@dataclass
-class SkillExecutionPlan:
-    """
-    Execution plan generated from progressive skill analysis.
-
-    Attributes:
-        can_handle: Whether the skill can handle the user query.
-        plan_summary: Brief summary of the execution plan.
-        steps: List of execution steps.
-        required_scripts: Script names needed for execution.
-        required_references: Reference names needed.
-        required_resources: Resource names needed.
-        required_packages: Python packages needed for execution.
-        parameters: Parameters extracted from user query.
-        reasoning: Explanation of the plan.
-    """
-    can_handle: bool = False
-    plan_summary: str = ''
-    steps: List[Dict[str, Any]] = field(default_factory=list)
-    required_scripts: List[str] = field(default_factory=list)
-    required_references: List[str] = field(default_factory=list)
-    required_resources: List[str] = field(default_factory=list)
-    required_packages: List[str] = field(default_factory=list)
-    parameters: Dict[str, Any] = field(default_factory=dict)
-    reasoning: str = ''
-
-
-@dataclass
-class SkillContext:
-    """
-    Context information for executing a Skill.
-
-    Supports progressive/lazy loading - resources are only loaded when needed.
-    """
-
-    # The target skill
-    skill: SkillSchema
-
-    # User query that triggered this skill
-    query: str = ''
-
-    # The working directory (absolute path to skills folder's parent directory)
-    root_path: Path = field(
-        default_factory=lambda: Path.cwd().parent.resolve())
-
-    # Execution plan from progressive analysis
-    plan: Optional[SkillExecutionPlan] = None
-
-    # Loaded scripts (lazy loaded based on plan)
-    scripts: List[Dict[str, Any]] = field(default_factory=list)
-
-    # Loaded references (lazy loaded based on plan)
-    references: List[Dict[str, Any]] = field(default_factory=list)
-
-    # Loaded resources (lazy loaded based on plan)
-    resources: List[Dict[str, Any]] = field(default_factory=list)
-
-    # The SPEC context for execution tracking
-    spec: Optional[Spec] = None
-
-    # Whether resources have been loaded
-    _resources_loaded: bool = field(default=False, repr=False)
-
-    @staticmethod
-    def _read_file_content(file_path: Union[str, Path]) -> str:
-        """
-        Read the content of a file.
-
-        Args:
-            file_path: Path to the file
-
-        Returns:
-            Content of the file as a string
-        """
-        file_path = Path(file_path)
-
-        if not file_path.exists() or not file_path.is_file():
-            return ''
-
-        ext = file_path.suffix.lower()
-        if ext in SUPPORTED_READ_EXT:
-            try:
-                with open(file_path, 'r', encoding='utf-8') as f:
-                    return f.read()
-            except Exception as e:
-                logger.error(f'Failed to read file {file_path}: {e}')
-                return ''
-
-        return ''
-
-    def __post_init__(self):
-        """Initialize SPEC context only, defer resource loading."""
-        if self.spec is None:
-            self.spec = Spec(plan='', tasks='')
-
-    @property
-    def skill_dir(self) -> Path:
-        """Get the skill's directory path."""
-        return self.skill.skill_path
-
-    def get_scripts_list(self) -> List[str]:
-        """Get list of available script names without loading content."""
-        return [s.name for s in self.skill.scripts]
-
-    def get_references_list(self) -> List[str]:
-        """Get list of available reference names without loading content."""
-        return [r.name for r in self.skill.references]
-
-    def get_resources_list(self) -> List[str]:
-        """Get list of available resource names without loading content."""
-        return [
-            r.name for r in self.skill.resources
-            if r.name not in ['SKILL.md', 'LICENSE.txt']
-        ]
-
-    def _get_resource_path(self, file_path: Path) -> str:
-        """
-        Get path string for a resource file.
-
-        Tries relative path first, falls back to absolute path.
-
-        Args:
-            file_path: Path to the resource file.
-
-        Returns:
-            Path string (relative if possible, absolute otherwise).
-        """
-        resolved_path = file_path.resolve()
-        try:
-            return str(resolved_path.relative_to(self.root_path.resolve()))
-        except ValueError:
-            # Path is not under root_path, use absolute path
-            return str(resolved_path)
-
-    def load_scripts(self, names: List[str] = None) -> List[Dict[str, Any]]:
-        """
-        Load specific scripts by name, or all if names is None.
-
-        Args:
-            names: List of script names to load, or None for all.
-
-        Returns:
-            List of loaded script dictionaries with content.
-        """
-        target_scripts = self.skill.scripts
-        if names:
-            target_scripts = [s for s in self.skill.scripts if s.name in names]
-
-        loaded = []
-        for script in target_scripts:
-            abs_path = script.path.resolve()
-            loaded.append({
-                'name': script.name,
-                'file': script.to_dict(),
-                'path': self._get_resource_path(script.path),
-                'abs_path': str(abs_path),
-                'content': self._read_file_content(abs_path),
-            })
-        self.scripts.extend(loaded)
-        return loaded
-
-    def load_references(self, names: List[str] = None) -> List[Dict[str, Any]]:
-        """
-        Load specific references by name, or all if names is None.
-
-        Args:
-            names: List of reference names to load, or None for all.
-
-        Returns:
-            List of loaded reference dictionaries with content.
-        """
-        target_refs = self.skill.references
-        if names:
-            target_refs = [r for r in self.skill.references if r.name in names]
-
-        loaded = []
-        for ref in target_refs:
-            abs_path = ref.path.resolve()
-            loaded.append({
-                'name': ref.name,
-                'file': ref.to_dict(),
-                'path': self._get_resource_path(ref.path),
-                'abs_path': str(abs_path),
-                'content': self._read_file_content(abs_path),
-            })
-        self.references.extend(loaded)
-        return loaded
-
-    def load_resources(self, names: List[str] = None) -> List[Dict[str, Any]]:
-        """
-        Load specific resources by name, or all if names is None.
-
-        Args:
-            names: List of resource names to load, or None for all.
-
-        Returns:
-            List of loaded resource dictionaries with content.
-        """
-        target_res = [
-            r for r in self.skill.resources
-            if r.name not in ['SKILL.md', 'LICENSE.txt']
-        ]
-        if names:
-            target_res = [r for r in target_res if r.name in names]
-
-        loaded = []
-        for res in target_res:
-            abs_path = res.path.resolve()
-            loaded.append({
-                'name': res.name,
-                'file': res.to_dict(),
-                'path': self._get_resource_path(res.path),
-                'abs_path': str(abs_path),
-                'content': self._read_file_content(abs_path),
-            })
-        self.resources.extend(loaded)
-        return loaded
-
-    def load_from_plan(self) -> None:
-        """
-        Load resources based on the execution plan.
-
-        Loads only the scripts, references, and resources specified in the plan.
-        """
-        if self._resources_loaded or not self.plan:
-            return
-
-        if self.plan.required_scripts:
-            self.load_scripts(self.plan.required_scripts)
-
-        if self.plan.required_references:
-            self.load_references(self.plan.required_references)
-
-        if self.plan.required_resources:
-            self.load_resources(self.plan.required_resources)
-
-        self._resources_loaded = True
-
-    def load_all(self) -> None:
-        """Load all available resources (scripts, references, resources)."""
-        if self._resources_loaded:
-            return
-        self.load_scripts()
-        self.load_references()
-        self.load_resources()
-        self._resources_loaded = True
-
-    def get_loaded_scripts_content(self) -> str:
-        """Get formatted content of all loaded scripts."""
-        if not self.scripts:
-            return 'No scripts loaded.'
-        parts = []
-        for s in self.scripts:
-            parts.append(f"<!-- {s['path']} -->\n{s['content']}")
-        return '\n\n'.join(parts)
-
-    def get_loaded_references_content(self) -> str:
-        """Get formatted content of all loaded references."""
-        if not self.references:
-            return 'No references loaded.'
-        parts = []
-        for r in self.references:
-            parts.append(f"<!-- {r['path']} -->\n{r['content']}")
-        return '\n\n'.join(parts)
-
-    def get_loaded_resources_content(self) -> str:
-        """Get formatted content of all loaded resources."""
-        if not self.resources:
-            return 'No resources loaded.'
-        parts = []
-        for r in self.resources:
-            parts.append(f"<!-- {r['path']} -->\n{r['content']}")
-        return '\n\n'.join(parts)
diff --git a/ms_agent/skill/skill_tools.py b/ms_agent/skill/skill_tools.py
new file mode 100644
index 000000000..467f7f364
--- /dev/null
+++ b/ms_agent/skill/skill_tools.py
@@ -0,0 +1,345 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+import json
+import os
+import shutil
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+from ms_agent.tools.base import ToolBase
+from ms_agent.utils.logger import get_logger
+
+from .catalog import USER_SKILLS_DIR
+from .schema import SkillSchemaParser
+
+logger = get_logger()
+
+
+class SkillToolSet(ToolBase):
+    """Exposes skill discovery and management as standard tools
+    registered through ToolManager.
+
+    Provided tools:
+      - skills_list:  browse available skills
+      - skill_view:   read full skill content or attached files
+      - skill_manage: create / edit / delete skills (optional)
+    """
+
+    TOOL_SERVER_NAME = "skills"
+
+    def __init__(self, config, catalog, *, enable_manage: bool = False):
+        super().__init__(config)
+        self._catalog = catalog
+        self._enable_manage = enable_manage
+
+    async def connect(self) -> None:
+        pass
+
+    async def cleanup(self) -> None:
+        pass
+
+    # ------------------------------------------------------------------ #
+    #  Tool schema
+    # ------------------------------------------------------------------ #
+
+    async def _get_tools_inner(self) -> Dict[str, Any]:
+        tools = []
+
+        tools.append({
+            "tool_name": "skills_list",
+            "description": (
+                "List all available skills with their names and descriptions. "
+                "Use this to discover what skills are available before viewing "
+                "their full content."),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "tag": {
+                        "type": "string",
+                        "description":
+                            "Optional tag to filter skills by category",
+                    }
+                },
+            },
+        })
+
+        tools.append({
+            "tool_name": "skill_view",
+            "description": (
+                "View the full content of a skill, including its instructions, "
+                "available scripts, references, and resources. "
+                "You can also view a specific file within the skill directory. "
+                "After reading a skill, follow its instructions using your "
+                "available tools."),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "skill_id": {
+                        "type": "string",
+                        "description": "The skill identifier",
+                    },
+                    "file_path": {
+                        "type": "string",
+                        "description": (
+                            "Optional: relative path to a specific file "
+                            "within the skill directory (e.g. "
+                            "'scripts/search.py'). If omitted, returns "
+                            "the main SKILL.md content."),
+                    },
+                },
+                "required": ["skill_id"],
+            },
+        })
+
+        if self._enable_manage:
+            tools.append({
+                "tool_name": "skill_manage",
+                "description": (
+                    "Create, edit, or delete a skill. Use this to save "
+                    "reusable procedures that you learn during "
+                    "conversations."),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "action": {
+                            "type": "string",
+                            "enum": ["create", "edit", "delete"],
+                            "description": "The action to perform",
+                        },
+                        "skill_id": {
+                            "type": "string",
+                            "description":
+                                "Skill identifier (hyphen-case)",
+                        },
+                        "content": {
+                            "type": "string",
+                            "description": (
+                                "For create/edit: full SKILL.md content "
+                                "including YAML frontmatter"),
+                        },
+                    },
+                    "required": ["action", "skill_id"],
+                },
+            })
+
+        return {self.TOOL_SERVER_NAME: tools}
+
+    # ------------------------------------------------------------------ #
+    #  Dispatch
+    # ------------------------------------------------------------------ #
+
+    async def call_tool(self, server_name: str, *, tool_name: str,
+                        tool_args: dict) -> str:
+        if tool_name == "skills_list":
+            return self._handle_skills_list(tool_args)
+        elif tool_name == "skill_view":
+            return self._handle_skill_view(tool_args)
+        elif tool_name == "skill_manage" and self._enable_manage:
+            return self._handle_skill_manage(tool_args)
+        raise ValueError(f"Unknown skill tool: {tool_name}")
+
+    # ------------------------------------------------------------------ #
+    #  skills_list
+    # ------------------------------------------------------------------ #
+
+    def _handle_skills_list(self, args: dict) -> str:
+        tag_filter = args.get("tag")
+        skills = self._catalog.get_enabled_skills()
+
+        if tag_filter:
+            skills = {
+                sid: s for sid, s in skills.items()
+                if tag_filter in (s.tags or [])
+            }
+
+        if not skills:
+            return "No skills available."
+
+        result = []
+        for sid, skill in sorted(skills.items()):
+            entry = {
+                "skill_id": sid,
+                "name": skill.name,
+                "description": skill.description,
+                "version": skill.version,
+                "tags": skill.tags or [],
+                "has_scripts": len(skill.scripts) > 0,
+                "has_references": len(skill.references) > 0,
+            }
+            result.append(entry)
+
+        return json.dumps(
+            {"skills": result, "total": len(result)},
+            ensure_ascii=False, indent=2)
+
+    # ------------------------------------------------------------------ #
+    #  skill_view
+    # ------------------------------------------------------------------ #
+
+    def _handle_skill_view(self, args: dict) -> str:
+        skill_id = args.get("skill_id", "")
+        file_path = args.get("file_path")
+
+        skill = self._catalog.get_skill(skill_id)
+        if not skill:
+            return json.dumps({"error": f"Skill '{skill_id}' not found"})
+
+        if file_path:
+            return self._read_skill_file(skill, file_path)
+
+        result: Dict[str, Any] = {
+            "skill_id": skill.skill_id,
+            "name": skill.name,
+            "description": skill.description,
+            "skill_dir": str(skill.skill_path),
+            "content": skill.content,
+            "linked_files": {
+                "scripts": [s.name for s in skill.scripts],
+                "references": [r.name for r in skill.references],
+                "resources": [
+                    r.name for r in skill.resources
+                    if r.name not in ("SKILL.md", "LICENSE.txt")
+                ],
+            },
+        }
+
+        dep_status = self._check_requirements(skill)
+        if dep_status:
+            result["requirements_status"] = dep_status
+
+        return json.dumps(result, ensure_ascii=False, indent=2)
+
+    def _read_skill_file(self, skill, file_path: str) -> str:
+        """Read a file inside the skill directory with traversal protection."""
+        target = (skill.skill_path / file_path).resolve()
+        skill_root = skill.skill_path.resolve()
+
+        if not str(target).startswith(str(skill_root)):
+            return json.dumps({"error": "Path traversal not allowed"})
+
+        if not target.exists():
+            return json.dumps({"error": f"File not found: {file_path}"})
+
+        try:
+            content = target.read_text(encoding="utf-8")
+            return json.dumps(
+                {"file_path": file_path, "content": content},
+                ensure_ascii=False)
+        except Exception as e:
+            return json.dumps({"error": f"Failed to read file: {e}"})
+
+    def _check_requirements(self, skill) -> Optional[dict]:
+        frontmatter = SkillSchemaParser.parse_yaml_frontmatter(skill.content)
+        if not frontmatter:
+            return None
+
+        requires = frontmatter.get("requires", {})
+        if not requires:
+            return None
+
+        status: Dict[str, Any] = {}
+        required_env = requires.get("env", [])
+        if required_env:
+            missing = [v for v in required_env if v not in os.environ]
+            if missing:
+                status["missing_env_vars"] = missing
+
+        required_tools = requires.get("tools", [])
+        if required_tools:
+            status["required_tools"] = required_tools
+
+        return status if status else None
+
+    # ------------------------------------------------------------------ #
+    #  skill_manage
+    # ------------------------------------------------------------------ #
+
+    def _handle_skill_manage(self, args: dict) -> str:
+        action = args.get("action", "")
+        skill_id = args.get("skill_id", "")
+
+        if action == "create":
+            return self._create_skill(skill_id, args.get("content", ""))
+        elif action == "edit":
+            return self._edit_skill(skill_id, args.get("content", ""))
+        elif action == "delete":
+            return self._delete_skill(skill_id)
+        return json.dumps({"error": f"Unknown action: {action}"})
+
+    def _create_skill(self, skill_id: str, content: str) -> str:
+        custom_dir = self._get_custom_skills_dir()
+        skill_dir = custom_dir / skill_id
+
+        if skill_dir.exists():
+            return json.dumps(
+                {"error": f"Skill '{skill_id}' already exists"})
+
+        frontmatter = SkillSchemaParser.parse_yaml_frontmatter(content)
+        if (not frontmatter or "name" not in frontmatter
+                or "description" not in frontmatter):
+            return json.dumps({
+                "error":
+                    "Invalid SKILL.md: must have YAML frontmatter "
+                    "with 'name' and 'description'"
+            })
+
+        skill_dir.mkdir(parents=True, exist_ok=True)
+        (skill_dir / "SKILL.md").write_text(content, encoding="utf-8")
+
+        skill = self._catalog.add_skill(str(skill_dir))
+        if skill:
+            return json.dumps({
+                "success": True,
+                "skill_id": skill.skill_id,
+                "message": f"Skill '{skill.name}' created successfully",
+            })
+        return json.dumps({"error": "Failed to load created skill"})
+
+    def _edit_skill(self, skill_id: str, content: str) -> str:
+        skill = self._catalog.get_skill(skill_id)
+        if not skill:
+            return json.dumps(
+                {"error": f"Skill '{skill_id}' not found"})
+
+        frontmatter = SkillSchemaParser.parse_yaml_frontmatter(content)
+        if (not frontmatter or "name" not in frontmatter
+                or "description" not in frontmatter):
+            return json.dumps({
+                "error":
+                    "Invalid content: must have YAML frontmatter "
+                    "with 'name' and 'description'"
+            })
+
+        skill_md_path = skill.skill_path / "SKILL.md"
+        skill_md_path.write_text(content, encoding="utf-8")
+
+        reloaded = self._catalog.reload_skill(skill_id)
+        if reloaded:
+            return json.dumps({
+                "success": True,
+                "message": f"Skill '{skill_id}' updated successfully",
+            })
+        return json.dumps({"error": "Failed to reload updated skill"})
+
+    def _delete_skill(self, skill_id: str) -> str:
+        skill = self._catalog.get_skill(skill_id)
+        if not skill:
+            return json.dumps(
+                {"error": f"Skill '{skill_id}' not found"})
+
+        custom_dir = self._get_custom_skills_dir().resolve()
+        if not str(skill.skill_path.resolve()).startswith(str(custom_dir)):
+            return json.dumps(
+                {"error": "Can only delete custom skills"})
+
+        shutil.rmtree(skill.skill_path)
+        self._catalog.remove_skill(skill_id)
+
+        return json.dumps({
+            "success": True,
+            "message": f"Skill '{skill_id}' deleted successfully",
+        })
+
+    def _get_custom_skills_dir(self) -> Path:
+        base = USER_SKILLS_DIR / "custom"
+        base.mkdir(parents=True, exist_ok=True)
+        return base
diff --git a/ms_agent/skill/sources.py b/ms_agent/skill/sources.py
new file mode 100644
index 000000000..23e9de67a
--- /dev/null
+++ b/ms_agent/skill/sources.py
@@ -0,0 +1,94 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+import os
+import re
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+
+
+class SkillSourceType(Enum):
+    LOCAL_DIR = "local"
+    MODELSCOPE = "modelscope"
+    GIT = "git"
+
+
+@dataclass
+class SkillSource:
+    type: SkillSourceType
+    path: Optional[str] = None
+    repo_id: Optional[str] = None
+    url: Optional[str] = None
+    revision: Optional[str] = None
+    subdir: Optional[str] = None
+    enabled: bool = True
+
+
+_MODELSCOPE_URI_RE = re.compile(
+    r'^modelscope://(?P<repo>[^@#]+)(?:@(?P<rev>[^#]+))?(?:#(?P<sub>.+))?$')
+
+_MODELSCOPE_SKILL_URL_RE = re.compile(
+    r'^https?://(?:www\.)?modelscope\.(?:cn|ai)/skills/'
+    r'(?P<repo>[^/]+/[^/]+)(?:/.*)?$')
+
+_OWNER_REPO_RE = re.compile(r'^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')
+
+_AT_PREFIX_RE = re.compile(
+    r'^@(?P<repo>[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+)$')
+
+
+def _looks_like_path(raw: str) -> bool:
+    """Return True when *raw* is clearly meant to be a local filesystem path
+    rather than a hub identifier (e.g. starts with ``/``, ``./``, ``~``, or
+    contains path separators that don't match the ``owner/repo`` pattern).
+    """
+    return raw.startswith(('/', './', '../', '~'))
+
+
+def parse_skill_source(raw: str) -> SkillSource:
+    """Parse a raw string into a SkillSource.
+
+    Supported formats (checked in order):
+      - /abs/path  or  ./rel/path  or  ~/path     -> LOCAL_DIR
+      - modelscope://owner/repo[@rev][#subdir]     -> MODELSCOPE
+      - https://modelscope.cn/skills/owner/repo    -> MODELSCOPE
+      - @owner/repo  (CLI shorthand)               -> MODELSCOPE
+      - https://... or git://...                   -> GIT
+      - owner/repo  (when path does not exist)     -> MODELSCOPE
+      - anything else                              -> LOCAL_DIR
+    """
+    if _looks_like_path(raw):
+        resolved = str(Path(raw).expanduser().resolve())
+        return SkillSource(type=SkillSourceType.LOCAL_DIR, path=resolved)
+
+    m = _MODELSCOPE_URI_RE.match(raw)
+    if m:
+        return SkillSource(
+            type=SkillSourceType.MODELSCOPE,
+            repo_id=m.group('repo'),
+            revision=m.group('rev'),
+            subdir=m.group('sub'),
+        )
+
+    m = _MODELSCOPE_SKILL_URL_RE.match(raw)
+    if m:
+        return SkillSource(
+            type=SkillSourceType.MODELSCOPE,
+            repo_id=m.group('repo'),
+        )
+
+    m = _AT_PREFIX_RE.match(raw)
+    if m:
+        return SkillSource(
+            type=SkillSourceType.MODELSCOPE,
+            repo_id=m.group('repo'),
+        )
+
+    if raw.startswith(('https://', 'http://', 'git://')):
+        return SkillSource(type=SkillSourceType.GIT, url=raw)
+
+    if _OWNER_REPO_RE.match(raw) and not os.path.exists(raw):
+        return SkillSource(type=SkillSourceType.MODELSCOPE, repo_id=raw)
+
+    resolved = str(Path(raw).resolve()) if not os.path.isabs(raw) else raw
+    return SkillSource(type=SkillSourceType.LOCAL_DIR, path=resolved)
diff --git a/ms_agent/skill/spec.py b/ms_agent/skill/spec.py
deleted file mode 100644
index 3c666b8d4..000000000
--- a/ms_agent/skill/spec.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright (c) ModelScope Contributors. All rights reserved.
-import os
-from dataclasses import dataclass
-
-from .prompts import DEFAULT_IMPLEMENTATION, DEFAULT_PLAN, DEFAULT_TASKS
-
-
-@dataclass
-class Spec:
-    """
-    Specification for an AI agent's task planning and execution.
-    """
-
-    plan: str
-
-    tasks: str
-
-    implementation: str = ''
-
-    def __post_init__(self):
-
-        if not self.plan:
-            self.plan = DEFAULT_PLAN
-
-        if not self.tasks:
-            self.tasks = DEFAULT_TASKS
-
-        if not self.implementation:
-            self.implementation = DEFAULT_IMPLEMENTATION
-
-    def dump(self, output_dir: str) -> str:
-        """
-        Dump the spec to the specified output directory.
-
-        Args:
-            output_dir (str): The directory to dump the spec files.
-
-        Returns:
-            str: The path to the dumped spec directory.
-        """
-        output_path: str = os.path.join(output_dir, '.spec')
-        os.makedirs(output_path, exist_ok=True)
-
-        with open(
-                os.path.join(output_path, 'plan.md'), 'w',
-                encoding='utf-8') as f:
-            f.write(self.plan)
-
-        with open(
-                os.path.join(output_path, 'tasks.md'), 'w',
-                encoding='utf-8') as f:
-            f.write(self.tasks)
-
-        with open(
-                os.path.join(output_path, 'implementation.md'),
-                'w',
-                encoding='utf-8') as f:
-            f.write(self.implementation)
-
-        return output_path
-
-
-if __name__ == '__main__':
-    spec = Spec(plan='', tasks='')
-    print('Plan:', spec.plan)
-    print('Tasks:', spec.tasks)
-    print('Implementation:', spec.implementation)
diff --git a/requirements/framework.txt b/requirements/framework.txt
index 7b7c84fc1..2796900a0 100644
--- a/requirements/framework.txt
+++ b/requirements/framework.txt
@@ -6,7 +6,7 @@ json5
 markdown
 matplotlib
 mcp
-modelscope
+modelscope>=1.35.2
 moviepy
 numpy
 omegaconf
diff --git a/tests/skills/test_claude_skills.py b/tests/skills/test_claude_skills.py
deleted file mode 100644
index 8008ecc57..000000000
--- a/tests/skills/test_claude_skills.py
+++ /dev/null
@@ -1,812 +0,0 @@
-"""
-Unit tests for Claude Skills using AutoSkills.
-
-These tests cover the 16 skills in projects/agent_skills/skills/claude_skills:
-1. algorithmic-art - Generative art with p5.js
-2. brand-guidelines - Anthropic brand styling
-3. canvas-design - Visual art in PNG/PDF
-4. doc-coauthoring - Documentation workflow
-5. docx - Word document operations
-6. frontend-design - Frontend UI design
-7. internal-comms - Internal communications
-8. mcp-builder - MCP server creation
-9. pdf - PDF manipulation
-10. pptx - PowerPoint operations
-11. skill-creator - Skill creation guide
-12. slack-gif-creator - Slack GIF creation
-13. theme-factory - Theme styling
-14. web-artifacts-builder - React/HTML artifacts
-15. webapp-testing - Playwright testing
-16. xlsx - Excel/spreadsheet operations
-
-Usage:
-    # Run all tests
-    python -m unittest tests.skills.test_claude_skills -v
-
-    # Run specific test class
-    python -m unittest tests.skills.test_claude_skills.TestClaudeSkillsRetrieval -v
-
-    # Run specific test method
-    python -m unittest tests.skills.test_claude_skills.TestClaudeSkillsRetrieval.test_pdf_skill -v
-"""
-import asyncio
-import os
-import shutil
-import tempfile
-import unittest
-from pathlib import Path
-
-from ms_agent.llm.openai_llm import OpenAI
-from ms_agent.skill.auto_skills import AutoSkills
-from omegaconf import DictConfig
-
-
-#### Prerequisites ####
-# - ALL ENVs: # LLM_MODEL, OPENAI_API_KEY, OPENAI_BASE_URL, SKILLS_PATH, WORK_DIR, IS_REMOVE_WORK_DIR, USE_SANDBOX
-# - Get SKILLS_PATH: git clone https://github.com/anthropics/skills.git and set the path `skills/skills` directory.
-
-
-IS_REMOVE_WORK_DIR: bool = os.getenv('IS_REMOVE_WORK_DIR',
-                                     'true').lower() == 'true'
-
-USE_SANDBOX: bool = os.getenv('USE_SANDBOX',
-                                'false').lower() == 'true'
-
-
-def get_llm_config() -> DictConfig:
-    """Get LLM configuration from environment variables."""
-    return DictConfig({
-        'llm': {
-            'service':
-            'openai',
-            'model':
-            os.getenv('LLM_MODEL', 'qwen3-max'),
-            'openai_api_key':
-            os.getenv('OPENAI_API_KEY'),
-            'openai_base_url':
-            os.getenv('OPENAI_BASE_URL',
-                      'https://dashscope.aliyuncs.com/compatible-mode/v1')
-        }
-    })
-
-
-def get_skills_path() -> str:
-    """Get the path to claude_skills directory."""
-    skills_path = os.getenv('SKILLS_PATH')
-    if skills_path:
-        return skills_path
-    # Default path relative to project root
-    return str(
-        Path(__file__).parent.parent.parent / 'projects' / 'agent_skills'
-        / 'skills' / 'claude_skills')
-
-
-def get_work_dir() -> str:
-    """Get work directory from env or create temp directory."""
-    work_dir = os.getenv('WORK_DIR')
-    if work_dir:
-        os.makedirs(work_dir, exist_ok=True)
-        return work_dir
-    return tempfile.mkdtemp(prefix='ms_agent_test_')
-
-
-def run_async(coro):
-    """Helper to run async coroutines in sync context."""
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        return loop.run_until_complete(coro)
-    finally:
-        loop.close()
-
-
-class TestClaudeSkillsRetrieval(unittest.TestCase):
-    """Test skill retrieval and DAG building for each skill category."""
-
-    def setUp(self):
-        """Setup test fixtures before each test."""
-        self.config = get_llm_config()
-        self.skills_path = get_skills_path()
-        self.work_dir = get_work_dir()
-
-        # Skip test if no API key
-        if not self.config.llm.openai_api_key:
-            self.skipTest('OPENAI_API_KEY not set')
-
-        # Create AutoSkills instance for this test
-        self.auto_skills = AutoSkills(
-            skills=self.skills_path,
-            llm=OpenAI.from_config(self.config),
-            use_sandbox=USE_SANDBOX,
-            work_dir=self.work_dir,
-        )
-
-    def tearDown(self):
-        """Cleanup after each test."""
-        # Clean up the temporary work directory (only if not from env)
-        if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists(
-                self.work_dir) and not os.getenv('WORK_DIR'):
-            try:
-                shutil.rmtree(self.work_dir)
-            except Exception as e:
-                print(f'Warning: Failed to clean up work_dir: {e}')
-
-        # Clean up AutoSkills instance
-        if hasattr(self, 'auto_skills'):
-            self.auto_skills = None
-
-    def _run_skill_retrieval_test(self, queries: list, skill_name: str):
-        """
-        Helper method to run skill retrieval test.
-
-        Args:
-            queries: List of user queries to test.
-            skill_name: Name of the skill being tested.
-        """
-        for query in queries:
-            with self.subTest(query=query):
-                result = run_async(self.auto_skills.get_skill_dag(query))
-                self.assertIsNotNone(
-                    result, f'Result should not be None for: {query}')
-
-                # Assert skills_dag and execution_order are not empty
-                self.assertTrue(
-                    result.dag,
-                    f'skills_dag should not be empty for: {query}')
-                self.assertTrue(
-                    result.execution_order,
-                    f'execution_order should not be empty for: {query}')
-
-                if result.selected_skills:
-                    skill_ids = list(result.selected_skills.keys())
-                    print(f'\n[{skill_name}] Query: {query}')
-                    print(f'[{skill_name}] Retrieved skills: {skill_ids}')
-                    print(f'[{skill_name}] Execution order: {result.execution_order}')
-
-    def test_algorithmic_art_skill(self):
-        """
-        Test algorithmic-art skill retrieval.
-
-        Skill: Creates generative art using p5.js with seeded randomness.
-        Capabilities: Algorithmic philosophy creation, p5.js implementation,
-                     flow fields, particle systems, interactive artifacts.
-        """
-        queries = [
-            'Create a generative art piece with flowing particles that looks organic',
-            'Make an algorithmic art using flow fields and Perlin noise',
-            'I want to create interactive p5.js artwork with seeded randomness',
-        ]
-        self._run_skill_retrieval_test(queries, 'algorithmic-art')
-
-    def test_brand_guidelines_skill(self):
-        """
-        Test brand-guidelines skill retrieval.
-
-        Skill: Applies Anthropic's brand colors and typography.
-        Capabilities: Brand color application, typography styling,
-                     visual formatting, corporate identity.
-        """
-        queries = [
-            'Apply Anthropic brand colors to my presentation',
-            'Style this document with official brand guidelines',
-            'Format this artifact using company design standards',
-        ]
-        self._run_skill_retrieval_test(queries, 'brand-guidelines')
-
-    def test_canvas_design_skill(self):
-        """
-        Test canvas-design skill retrieval.
-
-        Skill: Creates visual art in PNG and PDF documents.
-        Capabilities: Design philosophy creation, poster design,
-                     static visual art, composition, color theory.
-        """
-        queries = [
-            'Create a beautiful minimalist poster design in PDF format',
-            'Design an artistic visual piece using canvas with modern aesthetics',
-            'Make a museum-quality art poster with geometric patterns',
-        ]
-        self._run_skill_retrieval_test(queries, 'canvas-design')
-
-    def test_doc_coauthoring_skill(self):
-        """
-        Test doc-coauthoring skill retrieval.
-
-        Skill: Guides users through documentation co-authoring workflow.
-        Capabilities: Context gathering, section refinement,
-                     reader testing, iterative document creation.
-        """
-        queries = [
-            'Help me write a technical design document for a new API',
-            'I need to create a product requirements document (PRD)',
-            'Draft a decision doc for our architecture proposal',
-        ]
-        self._run_skill_retrieval_test(queries, 'doc-coauthoring')
-
-    def test_docx_skill(self):
-        """
-        Test docx skill retrieval.
-
-        Skill: Comprehensive Word document creation, editing, and analysis.
-        Capabilities: Document creation, tracked changes, comments,
-                     formatting preservation, text extraction.
-        """
-        queries = [
-            'Create a professional Word document with headers and bullet points',
-            'Edit this docx file and add tracked changes to section 3',
-            'Extract text from this Word document and analyze its structure',
-            'Add comments to this docx file for review',
-        ]
-        self._run_skill_retrieval_test(queries, 'docx')
-
-    def test_frontend_design_skill(self):
-        """
-        Test frontend-design skill retrieval.
-
-        Skill: Creates distinctive, production-grade frontend interfaces.
-        Capabilities: Web components, landing pages, dashboards,
-                     React components, HTML/CSS layouts, UI styling.
-        """
-        queries = [
-            'Build a modern landing page with bold typography and animations',
-            'Create a React dashboard component with distinctive styling',
-            'Design a web interface that avoids generic AI aesthetics',
-            'Make a beautiful HTML/CSS card component with hover effects',
-        ]
-        self._run_skill_retrieval_test(queries, 'frontend-design')
-
-    def test_internal_comms_skill(self):
-        """
-        Test internal-comms skill retrieval.
-
-        Skill: Writes internal communications in company formats.
-        Capabilities: 3P updates (Progress/Plans/Problems), newsletters,
-                     FAQs, status reports, incident reports.
-        """
-        queries = [
-            'Write a 3P update for our weekly team meeting',
-            'Draft a company newsletter about Q4 achievements',
-            'Create FAQ responses for the new product launch',
-            'Write an incident report for yesterday\'s outage',
-        ]
-        self._run_skill_retrieval_test(queries, 'internal-comms')
-
-    def test_mcp_builder_skill(self):
-        """
-        Test mcp-builder skill retrieval.
-
-        Skill: Creates MCP servers for LLM-external service interaction.
-        Capabilities: MCP protocol implementation, tool design,
-                     API integration, TypeScript/Python SDK usage.
-        """
-        queries = [
-            'Build an MCP server to integrate with GitHub API',
-            'Create an MCP tool that enables Claude to search databases',
-            'Implement a Model Context Protocol server in TypeScript',
-        ]
-        self._run_skill_retrieval_test(queries, 'mcp-builder')
-
-    def test_pdf_skill(self):
-        """
-        Test pdf skill retrieval.
-
-        Skill: Comprehensive PDF manipulation toolkit.
-        Capabilities: Text/table extraction, PDF creation,
-                     merging/splitting, form filling, watermarks.
-        """
-        queries = [
-            'Extract all tables from this PDF document',
-            'Create a new PDF report with charts and formatted text',
-            'Merge multiple PDF files into one document',
-            'Fill out this PDF form with the provided data',
-            'Split this large PDF into separate pages',
-        ]
-        self._run_skill_retrieval_test(queries, 'pdf')
-
-    def test_pptx_skill(self):
-        """
-        Test pptx skill retrieval.
-
-        Skill: PowerPoint creation, editing, and analysis.
-        Capabilities: Presentation creation, template editing,
-                     slide layouts, speaker notes, thumbnails.
-        """
-        queries = [
-            'Create a PowerPoint presentation about machine learning',
-            'Edit this pptx file to update the charts and styling',
-            'Generate a slide deck using this template with new content',
-            'Add speaker notes to all slides in this presentation',
-        ]
-        self._run_skill_retrieval_test(queries, 'pptx')
-
-    def test_skill_creator_skill(self):
-        """
-        Test skill-creator skill retrieval.
-
-        Skill: Guide for creating effective skills.
-        Capabilities: Skill design, SKILL.md creation,
-                     resource bundling, workflow definition.
-        """
-        queries = [
-            'Create a new skill for image processing with Python',
-            'Help me design a skill that extends Claude\'s capabilities',
-            'Build a custom skill with scripts and reference documents',
-        ]
-        self._run_skill_retrieval_test(queries, 'skill-creator')
-
-    def test_slack_gif_creator_skill(self):
-        """
-        Test slack-gif-creator skill retrieval.
-
-        Skill: Creates animated GIFs optimized for Slack.
-        Capabilities: GIF creation, animation (shake, pulse, bounce),
-                     Slack emoji optimization, frame composition.
-        """
-        queries = [
-            'Make a bouncing star GIF for Slack emoji',
-            'Create an animated celebration GIF optimized for Slack',
-            'Generate a pulsing heart animation for team chat',
-        ]
-        self._run_skill_retrieval_test(queries, 'slack-gif-creator')
-
-    def test_theme_factory_skill(self):
-        """
-        Test theme-factory skill retrieval.
-
-        Skill: Styles artifacts with pre-set or custom themes.
-        Capabilities: Theme application, color palettes,
-                     font pairings, visual consistency.
-        """
-        queries = [
-            'Apply the Ocean Depths theme to my presentation',
-            'Style this document with the Tech Innovation theme',
-            'Create a custom theme with warm earth tones for my slides',
-        ]
-        self._run_skill_retrieval_test(queries, 'theme-factory')
-
-    def test_web_artifacts_builder_skill(self):
-        """
-        Test web-artifacts-builder skill retrieval.
-
-        Skill: Builds elaborate HTML artifacts using React/Tailwind.
-        Capabilities: React components, shadcn/ui, Tailwind CSS,
-                     single-file HTML bundling.
-        """
-        queries = [
-            'Build a complex React dashboard with shadcn/ui components',
-            'Create a multi-component HTML artifact with state management',
-            'Develop an interactive web app with Tailwind CSS styling',
-        ]
-        self._run_skill_retrieval_test(queries, 'web-artifacts-builder')
-
-    def test_webapp_testing_skill(self):
-        """
-        Test webapp-testing skill retrieval.
-
-        Skill: Tests local web applications using Playwright.
-        Capabilities: Browser automation, screenshot capture,
-                     UI interaction, server lifecycle management.
-        """
-        queries = [
-            'Test this web application using Playwright automation',
-            'Capture screenshots of my local webapp running on port 3000',
-            'Debug UI behavior by inspecting the rendered DOM',
-            'Verify frontend functionality with automated browser tests',
-        ]
-        self._run_skill_retrieval_test(queries, 'webapp-testing')
-
-    def test_xlsx_skill(self):
-        """
-        Test xlsx skill retrieval.
-
-        Skill: Comprehensive Excel/spreadsheet operations.
-        Capabilities: Spreadsheet creation, formulas, formatting,
-                     data analysis, visualization, recalculation.
-        """
-        queries = [
-            'Create an Excel financial model with formulas and formatting',
-            'Analyze data in this spreadsheet and create summary charts',
-            'Build a budget tracker spreadsheet with automatic calculations',
-            'Modify this xlsx file to add new formulas and preserve formatting',
-        ]
-        self._run_skill_retrieval_test(queries, 'xlsx')
-
-
-class TestSkillsCombination(unittest.TestCase):
-    """Test skill retrieval for queries requiring multiple skills."""
-
-    def setUp(self):
-        """Setup test fixtures before each test."""
-        self.config = get_llm_config()
-        self.skills_path = get_skills_path()
-        self.work_dir = get_work_dir()
-
-        if not self.config.llm.openai_api_key:
-            self.skipTest('OPENAI_API_KEY not set')
-
-        self.auto_skills = AutoSkills(
-            skills=self.skills_path,
-            llm=OpenAI.from_config(self.config),
-            use_sandbox=USE_SANDBOX,
-            work_dir=self.work_dir,
-        )
-
-    def tearDown(self):
-        """Cleanup after each test."""
-        if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists(
-                self.work_dir) and not os.getenv('WORK_DIR'):
-            try:
-                shutil.rmtree(self.work_dir)
-            except Exception as e:
-                print(f'Warning: Failed to clean up work_dir: {e}')
-
-        if hasattr(self, 'auto_skills'):
-            self.auto_skills = None
-
-    def _assert_dag_result(self, result, query: str):
-        """Assert common DAG result validations."""
-        self.assertIsNotNone(result, f'Result should not be None for: {query}')
-        self.assertTrue(
-            result.dag,
-            f'skills_dag should not be empty for: {query}')
-        self.assertTrue(
-            result.execution_order,
-            f'execution_order should not be empty for: {query}')
-
-    def test_document_with_theme(self):
-        """
-        Test combining document creation with theme styling.
-
-        Expected: pptx + theme-factory or docx + brand-guidelines
-        """
-        query = 'Create a PowerPoint presentation about AI and apply Ocean Depths theme'
-        result = run_async(self.auto_skills.get_skill_dag(query))
-
-        self._assert_dag_result(result, query)
-        if result.selected_skills:
-            skill_ids = list(result.selected_skills.keys())
-            print(f'\n[Combination] Query: {query}')
-            print(f'[Combination] Retrieved skills: {skill_ids}')
-            print(f'[Combination] Execution order: {result.execution_order}')
-
-    def test_frontend_with_testing(self):
-        """
-        Test combining frontend design with webapp testing.
-
-        Expected: frontend-design + webapp-testing
-        """
-        query = 'Build a React dashboard and test it with Playwright'
-        result = run_async(self.auto_skills.get_skill_dag(query))
-
-        self._assert_dag_result(result, query)
-        if result.selected_skills:
-            skill_ids = list(result.selected_skills.keys())
-            print(f'\n[Combination] Query: {query}')
-            print(f'[Combination] Retrieved skills: {skill_ids}')
-            print(f'[Combination] Execution order: {result.execution_order}')
-
-    def test_pdf_and_xlsx_data(self):
-        """
-        Test combining PDF and Excel operations.
-
-        Expected: pdf + xlsx for data extraction and reporting
-        """
-        query = 'Extract data from PDF tables and create an Excel analysis report'
-        result = run_async(self.auto_skills.get_skill_dag(query))
-
-        self._assert_dag_result(result, query)
-        if result.selected_skills:
-            skill_ids = list(result.selected_skills.keys())
-            print(f'\n[Combination] Query: {query}')
-            print(f'[Combination] Retrieved skills: {skill_ids}')
-            print(f'[Combination] Execution order: {result.execution_order}')
-
-    def test_doc_with_brand_styling(self):
-        """
-        Test combining document creation with brand guidelines.
-
-        Expected: docx + brand-guidelines
-        """
-        query = 'Create a Word document and apply Anthropic brand styling'
-        result = run_async(self.auto_skills.get_skill_dag(query))
-
-        self._assert_dag_result(result, query)
-        if result.selected_skills:
-            skill_ids = list(result.selected_skills.keys())
-            print(f'\n[Combination] Query: {query}')
-            print(f'[Combination] Retrieved skills: {skill_ids}')
-            print(f'[Combination] Execution order: {result.execution_order}')
-
-
-class TestSkillsExecution(unittest.TestCase):
-    """
-    Test full skill execution pipeline.
-
-    Note: These tests require actual LLM API access and may take longer.
-    """
-
-    def setUp(self):
-        """Setup test fixtures before each test."""
-        self.config = get_llm_config()
-        self.skills_path = get_skills_path()
-        self.work_dir = get_work_dir()
-
-        if not self.config.llm.openai_api_key:
-            self.skipTest('OPENAI_API_KEY not set')
-
-        self.auto_skills = AutoSkills(
-            skills=self.skills_path,
-            llm=OpenAI.from_config(self.config),
-            use_sandbox=USE_SANDBOX,
-            work_dir=self.work_dir,
-            max_retries=3,
-        )
-
-    def tearDown(self):
-        """Cleanup after each test."""
-        # Clean up any output files generated during execution
-        if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists(
-                self.work_dir) and not os.getenv('WORK_DIR'):
-            try:
-                shutil.rmtree(self.work_dir)
-            except Exception as e:
-                print(f'Warning: Failed to clean up work_dir: {e}')
-
-        if IS_REMOVE_WORK_DIR and hasattr(self, 'auto_skills'):
-            # Clean up executor if exists
-            if hasattr(self.auto_skills,
-                       '_executor') and self.auto_skills._executor:
-                try:
-                    self.auto_skills.cleanup()
-                except Exception as e:
-                    print(f'Warning: Failed to cleanup auto_skills: {e}')
-            self.auto_skills = None
-
-    def test_execute_pdf_creation(self):
-        """
-        Test full execution of PDF creation skill.
-
-        This test verifies end-to-end skill execution.
-        """
-        query = "Create a simple PDF report titled 'Test Report' with basic text content"
-        result = run_async(self.auto_skills.run(query))
-
-        self.assertIsNotNone(result, f'Result should not be None for: {query}')
-        print(f'\n[Execution] Query: {query}')
-        print(f'[Execution] Is complete: {result.is_complete}')
-
-        # Assert execution_result even if None
-        if result.execution_result:
-            print(f'[Execution] Success: {result.execution_result.success}')
-            print(
-                f'[Execution] Skills executed: {list(result.execution_result.results.keys())}'
-            )
-            self.assertTrue(
-                result.execution_result.success,
-                f'Execution should succeed for: {query}')
-        else:
-            self.fail(f'execution_result should not be None for: {query}')
-
-    def test_execute_xlsx_creation(self):
-        """Test full execution of Excel creation skill."""
-        query = 'Create an Excel spreadsheet with a simple budget table and SUM formula'
-        result = run_async(self.auto_skills.run(query))
-
-        self.assertIsNotNone(result, f'Result should not be None for: {query}')
-        print(f'\n[Execution] Query: {query}')
-        print(f'[Execution] Is complete: {result.is_complete}')
-
-        if result.execution_result:
-            print(f'[Execution] Success: {result.execution_result.success}')
-            self.assertTrue(
-                result.execution_result.success,
-                f'Execution should succeed for: {query}')
-        else:
-            self.fail(f'execution_result should not be None for: {query}')
-
-    def test_execute_slack_gif(self):
-        """Test full execution of Slack GIF creation skill."""
-        query = 'Create a simple bouncing dot animation GIF for Slack emoji'
-        result = run_async(self.auto_skills.run(query))
-
-        self.assertIsNotNone(result, f'Result should not be None for: {query}')
-        print(f'\n[Execution] Query: {query}')
-        print(f'[Execution] Is complete: {result.is_complete}')
-
-        if result.execution_result:
-            print(f'[Execution] Success: {result.execution_result.success}')
-            self.assertTrue(
-                result.execution_result.success,
-                f'Execution should succeed for: {query}')
-        else:
-            self.fail(f'execution_result should not be None for: {query}')
-
-
-class TestChatOnlyQueries(unittest.TestCase):
-    """Test queries that should be handled as chat-only (no skill retrieval)."""
-
-    def setUp(self):
-        """Setup test fixtures before each test."""
-        self.config = get_llm_config()
-        self.skills_path = get_skills_path()
-        self.work_dir = get_work_dir()
-
-        if not self.config.llm.openai_api_key:
-            self.skipTest('OPENAI_API_KEY not set')
-
-        self.auto_skills = AutoSkills(
-            skills=self.skills_path,
-            llm=OpenAI.from_config(self.config),
-            use_sandbox=USE_SANDBOX,
-            work_dir=self.work_dir,
-        )
-
-    def tearDown(self):
-        """Cleanup after each test."""
-        if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists(
-                self.work_dir) and not os.getenv('WORK_DIR'):
-            try:
-                shutil.rmtree(self.work_dir)
-            except Exception as e:
-                print(f'Warning: Failed to clean up work_dir: {e}')
-
-        if hasattr(self, 'auto_skills'):
-            self.auto_skills = None
-
-    def test_general_chat_queries(self):
-        """Test that general chat queries return chat-only response."""
-        queries = [
-            'What is the capital of France?',
-            'Tell me a joke about programming',
-            'Explain what machine learning is',
-        ]
-
-        for query in queries:
-            with self.subTest(query=query):
-                result = run_async(self.auto_skills.get_skill_dag(query))
-                self.assertIsNotNone(result, f'Result should not be None for: {query}')
-
-                print(f'\n[Chat] Query: {query}')
-                print(
-                    f'[Chat] Chat response: {result.chat_response is not None}'
-                )
-                print(
-                    f'[Chat] Selected skills: {list(result.selected_skills.keys()) if result.selected_skills else "None"}'
-                )
-
-                # For chat-only queries, chat_response should be present
-                # OR it should have empty skills (no execution needed)
-                is_chat_only = (result.chat_response is not None or
-                                not result.selected_skills)
-                self.assertTrue(
-                    is_chat_only,
-                    f'Query should be handled as chat-only: {query}')
-
-
-class TestSkillDAGStructure(unittest.TestCase):
-    """Test the structure and validity of skill DAG results."""
-
-    def setUp(self):
-        """Setup test fixtures before each test."""
-        self.config = get_llm_config()
-        self.skills_path = get_skills_path()
-        self.work_dir = get_work_dir()
-
-        if not self.config.llm.openai_api_key:
-            self.skipTest('OPENAI_API_KEY not set')
-
-        self.auto_skills = AutoSkills(
-            skills=self.skills_path,
-            llm=OpenAI.from_config(self.config),
-            use_sandbox=USE_SANDBOX,
-            work_dir=self.work_dir,
-        )
-
-    def tearDown(self):
-        """Cleanup after each test."""
-        if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists(
-                self.work_dir) and not os.getenv('WORK_DIR'):
-            try:
-                shutil.rmtree(self.work_dir)
-            except Exception as e:
-                print(f'Warning: Failed to clean up work_dir: {e}')
-
-        if hasattr(self, 'auto_skills'):
-            self.auto_skills = None
-
-    def test_dag_result_has_required_fields(self):
-        """Test that DAG result contains all required fields."""
-        query = 'Create a PDF document'
-        result = run_async(self.auto_skills.get_skill_dag(query))
-
-        self.assertIsNotNone(result, f'Result should not be None for: {query}')
-
-        # Check required attributes exist
-        self.assertTrue(hasattr(result, 'is_complete'))
-        self.assertTrue(hasattr(result, 'selected_skills'))
-        self.assertTrue(hasattr(result, 'dag'))
-        self.assertTrue(hasattr(result, 'execution_order'))
-        self.assertTrue(hasattr(result, 'clarification'))
-        self.assertTrue(hasattr(result, 'chat_response'))
-
-        # Assert skills_dag and execution_order are not empty
-        self.assertTrue(
-            result.dag,
-            f'skills_dag should not be empty for: {query}')
-        self.assertTrue(
-            result.execution_order,
-            f'execution_order should not be empty for: {query}')
-
-    def test_execution_order_contains_valid_skills(self):
-        """Test that execution order only contains valid skill IDs."""
-        query = 'Create a PowerPoint presentation and apply theme'
-        result = run_async(self.auto_skills.get_skill_dag(query))
-
-        self.assertIsNotNone(result, f'Result should not be None for: {query}')
-        self.assertTrue(
-            result.dag,
-            f'skills_dag should not be empty for: {query}')
-        self.assertTrue(
-            result.execution_order,
-            f'execution_order should not be empty for: {query}')
-
-        if result.execution_order and result.selected_skills:
-            # Flatten execution order (may contain nested lists for parallel execution)
-            flat_order = []
-            for item in result.execution_order:
-                if isinstance(item, list):
-                    flat_order.extend(item)
-                else:
-                    flat_order.append(item)
-
-            # All skills in execution order should be in selected_skills
-            for skill_id in flat_order:
-                self.assertIn(
-                    skill_id, result.selected_skills,
-                    f'Skill {skill_id} in execution_order but not in selected_skills'
-                )
-
-    def test_skills_dag_structure(self):
-        """Test that skills DAG has valid adjacency list structure."""
-        query = 'Extract PDF data and create Excel report'
-        result = run_async(self.auto_skills.get_skill_dag(query))
-
-        self.assertIsNotNone(result, f'Result should not be None for: {query}')
-        self.assertTrue(
-            result.dag,
-            f'skills_dag should not be empty for: {query}')
-        self.assertTrue(
-            result.execution_order,
-            f'execution_order should not be empty for: {query}')
-
-        if result.dag:
-            # DAG should be a dict
-            self.assertIsInstance(result.dag, dict)
-
-            # Each value should be a list of dependencies
-            for skill_id, deps in result.dag.items():
-                self.assertIsInstance(
-                    deps, list,
-                    f'Dependencies for {skill_id} should be a list')
-
-
-# Test suite for running all tests
-def suite():
-    """Create test suite with all test cases."""
-    loader = unittest.TestLoader()
-    test_suite = unittest.TestSuite()
-
-    test_suite.addTests(
-        loader.loadTestsFromTestCase(TestClaudeSkillsRetrieval))
-    test_suite.addTests(loader.loadTestsFromTestCase(TestSkillsCombination))
-    test_suite.addTests(loader.loadTestsFromTestCase(TestSkillsExecution))
-    test_suite.addTests(loader.loadTestsFromTestCase(TestChatOnlyQueries))
-    test_suite.addTests(loader.loadTestsFromTestCase(TestSkillDAGStructure))
-
-    return test_suite
-
-
-if __name__ == '__main__':
-    # Run tests with verbosity
-    runner = unittest.TextTestRunner(verbosity=2)
-    runner.run(suite())
diff --git a/tests/skills/test_dag_upstream_downstream.py b/tests/skills/test_dag_upstream_downstream.py
deleted file mode 100644
index ab130f876..000000000
--- a/tests/skills/test_dag_upstream_downstream.py
+++ /dev/null
@@ -1,900 +0,0 @@
-"""
-Unit tests for Skill DAG upstream-downstream data passing.
-
-=== Overview ===
-
-This test module validates the core DAG execution mechanism in AutoSkills:
-when multiple skills are chained in a Directed Acyclic Graph (DAG), the
-outputs (stdout, return_value, output_files, etc.) from upstream skills
-are correctly propagated to downstream skills via environment variables.
-
-=== Features Tested ===
-
-1. **Upstream output storage**: After a skill executes, its ExecutionOutput
-   is stored in DAGExecutor._outputs and linked via container.spec.link_upstream().
-
-2. **Environment variable injection**: DAGExecutor._build_execution_input()
-   reads upstream outputs and injects them as:
-   - UPSTREAM_OUTPUTS: Full JSON dict of all dependency outputs.
-   - UPSTREAM_<SAFE_KEY>_STDOUT: Per-dependency stdout shortcut variable.
-
-3. **Sequential data flow**: A → B → C chain where each skill reads and
-   transforms data from its predecessor.
-
-4. **Full DAGExecutor.execute() pipeline**: End-to-end test through the
-   public execute() method, verifying internal wiring.
-
-5. **Mixed parallel + sequential DAG**: A → [B, C] → D pattern where B and
-   C run in parallel (both depending on A), then D merges both results.
-
-6. **container.link_skills() API**: Verifies the SkillContainer helper that
-   retrieves linked upstream outputs programmatically.
-
-7. **output_files propagation**: Upstream output files (written to
-   SKILL_OUTPUT_DIR) are captured and exposed in UPSTREAM_OUTPUTS JSON.
-
-=== Workflow ===
-
-Each test follows this pattern:
-  1. Create mock SkillSchema objects backed by temporary directories.
-  2. Instantiate SkillContainer (local mode, no sandbox) and DAGExecutor
-     (no LLM, no progressive analysis).
-  3. Execute Python code snippets as mock skill scripts.
-  4. Verify upstream data is available in downstream environment variables.
-  5. Assert correctness of data transformation across the DAG.
-
-=== Working Directory Structure ===
-
-All intermediate results are stored under a temporary directory:
-
-    <temp_root>/
-    ├── test_upstream_downstream/
-    │   ├── skills/                     # Mock skill definitions
-    │   │   ├── skill_a/SKILL.md
-    │   │   ├── skill_b/SKILL.md
-    │   │   └── skill_c/SKILL.md
-    │   └── workspace/
-    │       ├── outputs/                # Skill output files (e.g., data.json)
-    │       ├── scripts/                # Generated temp execution scripts
-    │       └── logs/                   # Execution spec logs
-    ├── test_full_pipeline/
-    │   ├── skills/
-    │   └── workspace/
-    └── test_parallel_mixed/
-        ├── skills/
-        └── workspace/
-
-=== Prerequisites ===
-
-- Python >= 3.10
-- ms_agent package installed (editable mode: pip install -e .)
-- No external LLM API key required (tests use mock code, no LLM calls).
-- No sandbox/Docker required (tests run in local mode).
-
-=== Usage ===
-
-    # Run all tests in this module
-    python -m unittest tests.skills.test_dag_upstream_downstream -v
-
-    # Run a specific test class
-    python -m unittest tests.skills.test_dag_upstream_downstream.TestDAGUpstreamDownstream -v
-
-    # Run a specific test method
-    python -m unittest tests.skills.test_dag_upstream_downstream.TestDAGFullPipeline.test_sequential_pipeline -v
-
-=== Environment Variables ===
-
-    KEEP_TEST_ARTIFACTS=true|false  (default: true)
-        Whether to keep intermediate results after tests finish.
-        Set to 'false' to auto-clean temp directories in tearDown.
-"""
-import asyncio
-import json
-import os
-import shutil
-import tempfile
-import unittest
-from pathlib import Path
-from typing import Dict, List, Optional
-
-from ms_agent.skill.auto_skills import DAGExecutor, SkillExecutionResult
-from ms_agent.skill.container import (ExecutionInput, ExecutionOutput,
-                                      SkillContainer)
-from ms_agent.skill.schema import SkillFile, SkillSchema
-
-# ---------------------------------------------------------------------------
-# Global control: whether to keep intermediate artifacts after tests.
-# Set KEEP_TEST_ARTIFACTS=false to auto-clean.
-# ---------------------------------------------------------------------------
-KEEP_TEST_ARTIFACTS: bool = os.getenv('KEEP_TEST_ARTIFACTS',
-                                      'true').lower() == 'true'
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def run_async(coro):
-    """Run an async coroutine in a new event loop (sync context helper)."""
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        return loop.run_until_complete(coro)
-    finally:
-        loop.close()
-
-
-def create_mock_skill(skill_id: str, name: str, description: str,
-                      skill_dir: Path) -> SkillSchema:
-    """
-    Create a minimal mock SkillSchema backed by a real directory.
-
-    Args:
-        skill_id: Unique skill identifier (e.g., 'skill_a@latest').
-        name: Human-readable skill name.
-        description: Short description of the skill.
-        skill_dir: Filesystem path for the skill directory.
-
-    Returns:
-        A SkillSchema instance pointing to the created directory.
-    """
-    skill_dir.mkdir(parents=True, exist_ok=True)
-    skill_md = skill_dir / 'SKILL.md'
-    skill_md.write_text(
-        f'---\nname: {name}\ndescription: {description}\n---\n'
-        f'# {name}\n{description}\n')
-
-    return SkillSchema(
-        skill_id=skill_id,
-        name=name,
-        description=description,
-        content=f'# {name}\n{description}',
-        files=[SkillFile(name='SKILL.md', type='.md', path=skill_md)],
-        skill_path=skill_dir,
-        version='latest',
-    )
-
-
-# ============================================================================
-# Test 1: Direct upstream-downstream data flow
-# ============================================================================
-
-class TestDAGUpstreamDownstream(unittest.TestCase):
-    """
-    Test upstream -> downstream data flow through DAGExecutor.
-
-    Scenario: skill_a -> skill_b -> skill_c
-    - skill_a generates JSON data and writes an output file.
-    - skill_b reads skill_a's stdout via UPSTREAM_OUTPUTS env var.
-    - skill_c aggregates outputs from both skill_a and skill_b.
-    """
-
-    def setUp(self):
-        """Create temp directories, mock skills, container, and executor."""
-        self.test_root = Path(
-            tempfile.mkdtemp(prefix='test_dag_upstream_downstream_'))
-        self.skills_dir = self.test_root / 'skills'
-        self.workspace_dir = self.test_root / 'workspace'
-
-        # Create mock skills
-        self.skill_a = create_mock_skill(
-            'skill_a@latest', 'Data Generator',
-            'Generates data and outputs to stdout',
-            self.skills_dir / 'skill_a')
-        self.skill_b = create_mock_skill(
-            'skill_b@latest', 'Data Processor',
-            'Processes upstream data',
-            self.skills_dir / 'skill_b')
-        self.skill_c = create_mock_skill(
-            'skill_c@latest', 'Report Builder',
-            'Builds report from all upstream outputs',
-            self.skills_dir / 'skill_c')
-
-        self.skills = {
-            'skill_a@latest': self.skill_a,
-            'skill_b@latest': self.skill_b,
-            'skill_c@latest': self.skill_c,
-        }
-
-        self.container = SkillContainer(
-            workspace_dir=self.workspace_dir, use_sandbox=False)
-
-        self.executor = DAGExecutor(
-            container=self.container,
-            skills=self.skills,
-            workspace_dir=self.workspace_dir,
-            llm=None,
-            enable_progressive_analysis=False,
-            enable_self_reflection=False,
-        )
-
-        # DAG: skill_a -> skill_b -> skill_c
-        self.dag = {
-            'skill_a@latest': [],
-            'skill_b@latest': ['skill_a@latest'],
-            'skill_c@latest': ['skill_a@latest', 'skill_b@latest'],
-        }
-
-    def tearDown(self):
-        """Clean up temp directory unless KEEP_TEST_ARTIFACTS is set."""
-        if not KEEP_TEST_ARTIFACTS and self.test_root.exists():
-            try:
-                shutil.rmtree(self.test_root)
-            except Exception as e:
-                print(f'Warning: Failed to clean up {self.test_root}: {e}')
-
-        self.executor = None
-        self.container = None
-
-    def test_skill_a_output_stored(self):
-        """After executing skill_a, its output is stored in executor._outputs."""
-        code_a = (
-            'import os, json\n'
-            'output_dir = os.environ.get("SKILL_OUTPUT_DIR", "/tmp")\n'
-            'data = {"revenue": 1000000, "quarter": "Q4", "year": 2024}\n'
-            'print(json.dumps(data))\n'
-            'output_file = os.path.join(output_dir, "data.json")\n'
-            'with open(output_file, "w") as f:\n'
-            '    json.dump(data, f)\n'
-            'print(f"Output file: {output_file}")\n'
-        )
-
-        exec_input = self.executor._build_execution_input(
-            'skill_a@latest', self.dag)
-        output_a = run_async(self.container.execute_python_code(
-            code=code_a, skill_id='skill_a@latest', input_spec=exec_input))
-
-        self.executor._outputs['skill_a@latest'] = output_a
-        self.container.spec.link_upstream('skill_a@latest', output_a)
-
-        self.assertEqual(output_a.exit_code, 0,
-                         f'skill_a should succeed, stderr: {output_a.stderr}')
-        self.assertIn('revenue', output_a.stdout)
-        self.assertIn('skill_a@latest', self.executor._outputs)
-
-    def test_upstream_env_vars_injected(self):
-        """skill_b's execution input contains UPSTREAM env vars from skill_a."""
-        # Simulate skill_a output
-        output_a = ExecutionOutput(
-            stdout='{"revenue": 1000000}\n',
-            stderr='',
-            exit_code=0,
-            output_files={'data.json': Path('/tmp/data.json')},
-            duration_ms=100.0,
-        )
-        self.executor._outputs['skill_a@latest'] = output_a
-
-        exec_input_b = self.executor._build_execution_input(
-            'skill_b@latest', self.dag)
-
-        # Verify UPSTREAM_OUTPUTS JSON
-        self.assertIn('UPSTREAM_OUTPUTS', exec_input_b.env_vars,
-                       'UPSTREAM_OUTPUTS should be set')
-        upstream_json = json.loads(exec_input_b.env_vars['UPSTREAM_OUTPUTS'])
-        self.assertIn('skill_a@latest', upstream_json)
-        self.assertEqual(upstream_json['skill_a@latest']['exit_code'], 0)
-        self.assertIn('revenue', upstream_json['skill_a@latest']['stdout'])
-
-        # Verify individual upstream shortcut env var
-        self.assertIn('UPSTREAM_SKILL_A_LATEST_STDOUT', exec_input_b.env_vars,
-                       'Per-skill stdout shortcut should be set')
-
-    def test_downstream_reads_upstream_data(self):
-        """skill_b can parse skill_a's stdout from UPSTREAM_OUTPUTS."""
-        # Execute skill_a
-        code_a = (
-            'import json\n'
-            'print(json.dumps({"revenue": 1000000, "quarter": "Q4"}))\n'
-        )
-        exec_input_a = self.executor._build_execution_input(
-            'skill_a@latest', self.dag)
-        output_a = run_async(self.container.execute_python_code(
-            code=code_a, skill_id='skill_a@latest', input_spec=exec_input_a))
-        self.executor._outputs['skill_a@latest'] = output_a
-        self.container.spec.link_upstream('skill_a@latest', output_a)
-
-        # Execute skill_b
-        code_b = (
-            'import os, json\n'
-            'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n'
-            'data = json.loads(upstream["skill_a@latest"]["stdout"].strip())\n'
-            'result = {"processed_revenue": data["revenue"] * 1.1}\n'
-            'print(json.dumps(result))\n'
-        )
-        exec_input_b = self.executor._build_execution_input(
-            'skill_b@latest', self.dag)
-        output_b = run_async(self.container.execute_python_code(
-            code=code_b, skill_id='skill_b@latest', input_spec=exec_input_b))
-
-        self.assertEqual(output_b.exit_code, 0,
-                         f'skill_b failed: {output_b.stderr}')
-        result_b = json.loads(output_b.stdout.strip())
-        self.assertAlmostEqual(result_b['processed_revenue'], 1100000.0)
-
-    def test_multi_upstream_aggregation(self):
-        """skill_c receives outputs from both skill_a and skill_b."""
-        # Simulate skill_a and skill_b outputs
-        self.executor._outputs['skill_a@latest'] = ExecutionOutput(
-            stdout='A_DATA\n', stderr='', exit_code=0, duration_ms=10)
-        self.executor._outputs['skill_b@latest'] = ExecutionOutput(
-            stdout='B_DATA\n', stderr='', exit_code=0, duration_ms=10)
-
-        exec_input_c = self.executor._build_execution_input(
-            'skill_c@latest', self.dag)
-        upstream_json = json.loads(exec_input_c.env_vars['UPSTREAM_OUTPUTS'])
-
-        self.assertIn('skill_a@latest', upstream_json,
-                       'skill_a should be in upstream data')
-        self.assertIn('skill_b@latest', upstream_json,
-                       'skill_b should be in upstream data')
-        self.assertEqual(len(upstream_json), 2,
-                         'skill_c should see exactly 2 upstream skills')
-
-    def test_output_files_propagated(self):
-        """Upstream output_files paths are included in UPSTREAM_OUTPUTS JSON."""
-        # Simulate skill_a with output files
-        self.executor._outputs['skill_a@latest'] = ExecutionOutput(
-            stdout='done\n',
-            stderr='',
-            exit_code=0,
-            output_files={
-                'report.pdf': Path('/workspace/outputs/report.pdf'),
-                'data.csv': Path('/workspace/outputs/data.csv'),
-            },
-            duration_ms=50,
-        )
-
-        exec_input_b = self.executor._build_execution_input(
-            'skill_b@latest', self.dag)
-        upstream_json = json.loads(exec_input_b.env_vars['UPSTREAM_OUTPUTS'])
-        output_files = upstream_json['skill_a@latest']['output_files']
-
-        self.assertIn('report.pdf', output_files)
-        self.assertIn('data.csv', output_files)
-
-    def test_link_skills_api(self):
-        """container.link_skills() returns correct upstream output."""
-        output_a = ExecutionOutput(
-            stdout='hello from A\n', stderr='', exit_code=0, duration_ms=10)
-        self.container.spec.link_upstream('skill_a@latest', output_a)
-
-        linked = self.container.link_skills(
-            'skill_a@latest', 'input_data', 'stdout')
-        self.assertEqual(linked, 'hello from A\n')
-
-        # Non-existent upstream returns None
-        missing = self.container.link_skills(
-            'nonexistent@latest', 'input_data', 'stdout')
-        self.assertIsNone(missing)
-
-    def test_full_three_skill_chain(self):
-        """End-to-end: skill_a -> skill_b -> skill_c with real execution."""
-        # skill_a: generate data
-        code_a = (
-            'import os, json\n'
-            'output_dir = os.environ.get("SKILL_OUTPUT_DIR", "/tmp")\n'
-            'data = {"revenue": 1000000, "quarter": "Q4", "year": 2024}\n'
-            'print(json.dumps(data))\n'
-            'with open(os.path.join(output_dir, "data.json"), "w") as f:\n'
-            '    json.dump(data, f)\n'
-        )
-        exec_input_a = self.executor._build_execution_input(
-            'skill_a@latest', self.dag)
-        output_a = run_async(self.container.execute_python_code(
-            code=code_a, skill_id='skill_a@latest', input_spec=exec_input_a))
-        self.executor._outputs['skill_a@latest'] = output_a
-        self.container.spec.link_upstream('skill_a@latest', output_a)
-        self.assertEqual(output_a.exit_code, 0)
-
-        # skill_b: process skill_a output
-        code_b = (
-            'import os, json\n'
-            'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n'
-            'a_stdout = upstream["skill_a@latest"]["stdout"].strip()\n'
-            'data = json.loads(a_stdout)\n'
-            'processed = {"processed_revenue": data["revenue"] * 1.1, "source": "skill_a"}\n'
-            'print(json.dumps(processed))\n'
-        )
-        exec_input_b = self.executor._build_execution_input(
-            'skill_b@latest', self.dag)
-        output_b = run_async(self.container.execute_python_code(
-            code=code_b, skill_id='skill_b@latest', input_spec=exec_input_b))
-        self.executor._outputs['skill_b@latest'] = output_b
-        self.container.spec.link_upstream('skill_b@latest', output_b)
-        self.assertEqual(output_b.exit_code, 0,
-                         f'skill_b failed: {output_b.stderr}')
-
-        # skill_c: aggregate both
-        code_c = (
-            'import os, json\n'
-            'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n'
-            'print(f"Total upstream skills: {len(upstream)}")\n'
-            'for sid, data in upstream.items():\n'
-            '    print(f"From {sid}: exit_code={data[\'exit_code\']}")\n'
-        )
-        exec_input_c = self.executor._build_execution_input(
-            'skill_c@latest', self.dag)
-        output_c = run_async(self.container.execute_python_code(
-            code=code_c, skill_id='skill_c@latest', input_spec=exec_input_c))
-
-        self.assertEqual(output_c.exit_code, 0,
-                         f'skill_c failed: {output_c.stderr}')
-        self.assertIn('Total upstream skills: 2', output_c.stdout)
-
-
-# ============================================================================
-# Test 2: Full DAGExecutor.execute() pipeline
-# ============================================================================
-
-class TestDAGFullPipeline(unittest.TestCase):
-    """
-    Test the full DAGExecutor.execute() method with sequential skills.
-
-    Scenario: adder (outputs 42) -> doubler (reads 42, outputs 84)
-    Verifies the complete internal wiring: execute() -> _execute_single_skill
-    -> _build_execution_input -> env_vars propagation.
-    """
-
-    def setUp(self):
-        """Create temp directories, mock skills, container, and executor."""
-        self.test_root = Path(
-            tempfile.mkdtemp(prefix='test_dag_full_pipeline_'))
-        self.skills_dir = self.test_root / 'skills'
-        self.workspace_dir = self.test_root / 'workspace'
-
-        self.skill_a = create_mock_skill(
-            'adder@latest', 'Adder', 'Generates a number',
-            self.skills_dir / 'adder')
-        self.skill_b = create_mock_skill(
-            'doubler@latest', 'Doubler', 'Doubles upstream number',
-            self.skills_dir / 'doubler')
-
-        self.skills = {
-            'adder@latest': self.skill_a,
-            'doubler@latest': self.skill_b,
-        }
-
-        self.container = SkillContainer(
-            workspace_dir=self.workspace_dir, use_sandbox=False)
-
-        self.executor = DAGExecutor(
-            container=self.container,
-            skills=self.skills,
-            workspace_dir=self.workspace_dir,
-            llm=None,
-            enable_progressive_analysis=False,
-            enable_self_reflection=False,
-        )
-
-        self.dag = {
-            'adder@latest': [],
-            'doubler@latest': ['adder@latest'],
-        }
-        self.execution_order = ['adder@latest', 'doubler@latest']
-
-    def tearDown(self):
-        """Clean up temp directory unless KEEP_TEST_ARTIFACTS is set."""
-        if not KEEP_TEST_ARTIFACTS and self.test_root.exists():
-            try:
-                shutil.rmtree(self.test_root)
-            except Exception as e:
-                print(f'Warning: Failed to clean up {self.test_root}: {e}')
-
-        self.executor = None
-        self.container = None
-
-    def test_sequential_pipeline(self):
-        """adder outputs 42, doubler reads it and outputs 84."""
-        container = self.container
-        executor = self.executor
-
-        async def mock_execute_single(
-                skill_id, dag, execution_input=None, query=''):
-            exec_input = executor._build_execution_input(
-                skill_id, dag, execution_input)
-
-            if skill_id == 'adder@latest':
-                code = 'print(42)'
-            elif skill_id == 'doubler@latest':
-                code = (
-                    'import os, json\n'
-                    'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n'
-                    'val = int(upstream["adder@latest"]["stdout"].strip())\n'
-                    'print(val * 2)\n'
-                )
-            else:
-                return SkillExecutionResult(
-                    skill_id=skill_id, success=False, error='Unknown')
-
-            output = await container.execute_python_code(
-                code=code, skill_id=skill_id, input_spec=exec_input)
-            executor._outputs[skill_id] = output
-            container.spec.link_upstream(skill_id, output)
-            return SkillExecutionResult(
-                skill_id=skill_id,
-                success=(output.exit_code == 0),
-                output=output,
-                error=output.stderr if output.exit_code != 0 else None)
-
-        executor._execute_single_skill = mock_execute_single
-
-        result = run_async(executor.execute(
-            dag=self.dag,
-            execution_order=self.execution_order,
-            stop_on_failure=True,
-            query='test'))
-
-        self.assertTrue(result.success, 'DAG execution should succeed')
-
-        adder_out = result.results['adder@latest'].output.stdout.strip()
-        self.assertEqual(adder_out, '42', f'Expected 42, got: {adder_out}')
-
-        doubler_out = result.results['doubler@latest'].output.stdout.strip()
-        self.assertEqual(doubler_out, '84', f'Expected 84, got: {doubler_out}')
-
-    def test_failure_stops_pipeline(self):
-        """When upstream skill fails and stop_on_failure=True, pipeline stops."""
-        container = self.container
-        executor = self.executor
-
-        async def mock_execute_single(
-                skill_id, dag, execution_input=None, query=''):
-            exec_input = executor._build_execution_input(
-                skill_id, dag, execution_input)
-
-            if skill_id == 'adder@latest':
-                code = 'import sys; print("error", file=sys.stderr); sys.exit(1)'
-            else:
-                code = 'print("should not run")'
-
-            output = await container.execute_python_code(
-                code=code, skill_id=skill_id, input_spec=exec_input)
-            executor._outputs[skill_id] = output
-            return SkillExecutionResult(
-                skill_id=skill_id,
-                success=(output.exit_code == 0),
-                output=output,
-                error=output.stderr if output.exit_code != 0 else None)
-
-        executor._execute_single_skill = mock_execute_single
-
-        result = run_async(executor.execute(
-            dag=self.dag,
-            execution_order=self.execution_order,
-            stop_on_failure=True,
-            query='test'))
-
-        self.assertFalse(result.success, 'DAG should fail')
-        self.assertIn('adder@latest', result.results)
-        # doubler should not have been executed
-        self.assertNotIn('doubler@latest', result.results,
-                         'doubler should not run when adder fails')
-
-
-# ============================================================================
-# Test 3: Parallel + Sequential mixed DAG
-# ============================================================================
-
-class TestDAGParallelMixed(unittest.TestCase):
-    """
-    Test a mixed DAG with parallel and sequential execution.
-
-    Scenario: gen -> [proc_x, proc_y] -> merge
-    - gen outputs BASE_VALUE=100
-    - proc_x reads gen, outputs X_RESULT=110 (100+10)
-    - proc_y reads gen, outputs Y_RESULT=200 (100*2)
-    - merge reads both, outputs MERGED=310 (110+200)
-    proc_x and proc_y run in parallel.
-    """
-
-    def setUp(self):
-        """Create temp directories, mock skills, container, and executor."""
-        self.test_root = Path(
-            tempfile.mkdtemp(prefix='test_dag_parallel_mixed_'))
-        self.skills_dir = self.test_root / 'skills'
-        self.workspace_dir = self.test_root / 'workspace'
-
-        skill_names = ['gen', 'proc_x', 'proc_y', 'merge']
-        self.skills = {}
-        for sname in skill_names:
-            sid = f'{sname}@latest'
-            sdir = self.skills_dir / sname
-            self.skills[sid] = create_mock_skill(
-                sid, sname, f'{sname} skill', sdir)
-
-        self.container = SkillContainer(
-            workspace_dir=self.workspace_dir, use_sandbox=False)
-
-        self.executor = DAGExecutor(
-            container=self.container,
-            skills=self.skills,
-            workspace_dir=self.workspace_dir,
-            llm=None,
-            enable_progressive_analysis=False,
-            enable_self_reflection=False,
-        )
-
-        self.dag = {
-            'gen@latest': [],
-            'proc_x@latest': ['gen@latest'],
-            'proc_y@latest': ['gen@latest'],
-            'merge@latest': ['proc_x@latest', 'proc_y@latest'],
-        }
-        self.execution_order = [
-            'gen@latest',
-            ['proc_x@latest', 'proc_y@latest'],
-            'merge@latest',
-        ]
-
-    def tearDown(self):
-        """Clean up temp directory unless KEEP_TEST_ARTIFACTS is set."""
-        if not KEEP_TEST_ARTIFACTS and self.test_root.exists():
-            try:
-                shutil.rmtree(self.test_root)
-            except Exception as e:
-                print(f'Warning: Failed to clean up {self.test_root}: {e}')
-
-        self.executor = None
-        self.container = None
-
-    def test_parallel_then_merge(self):
-        """gen=100 -> proc_x=110, proc_y=200 (parallel) -> merge=310."""
-        container = self.container
-        executor = self.executor
-
-        codes = {
-            'gen@latest': 'print("BASE_VALUE=100")',
-            'proc_x@latest': (
-                'import os, json\n'
-                'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n'
-                'gen_stdout = upstream["gen@latest"]["stdout"].strip()\n'
-                'val = int(gen_stdout.split("=")[1])\n'
-                'print(f"X_RESULT={val + 10}")\n'
-            ),
-            'proc_y@latest': (
-                'import os, json\n'
-                'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n'
-                'gen_stdout = upstream["gen@latest"]["stdout"].strip()\n'
-                'val = int(gen_stdout.split("=")[1])\n'
-                'print(f"Y_RESULT={val * 2}")\n'
-            ),
-            'merge@latest': (
-                'import os, json\n'
-                'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n'
-                'x_stdout = upstream["proc_x@latest"]["stdout"].strip()\n'
-                'y_stdout = upstream["proc_y@latest"]["stdout"].strip()\n'
-                'x_val = int(x_stdout.split("=")[1])\n'
-                'y_val = int(y_stdout.split("=")[1])\n'
-                'print(f"MERGED={x_val + y_val}")\n'
-            ),
-        }
-
-        async def mock_execute_single(
-                skill_id, dag, execution_input=None, query=''):
-            exec_input = executor._build_execution_input(
-                skill_id, dag, execution_input)
-            code = codes.get(skill_id, 'print("unknown")')
-            output = await container.execute_python_code(
-                code=code, skill_id=skill_id, input_spec=exec_input)
-            executor._outputs[skill_id] = output
-            container.spec.link_upstream(skill_id, output)
-            return SkillExecutionResult(
-                skill_id=skill_id,
-                success=(output.exit_code == 0),
-                output=output,
-                error=output.stderr if output.exit_code != 0 else None)
-
-        executor._execute_single_skill = mock_execute_single
-
-        result = run_async(executor.execute(
-            dag=self.dag,
-            execution_order=self.execution_order,
-            stop_on_failure=True,
-            query='test parallel'))
-
-        self.assertTrue(result.success, 'DAG should succeed')
-
-        gen_out = result.results['gen@latest'].output.stdout.strip()
-        self.assertEqual(gen_out, 'BASE_VALUE=100')
-
-        x_out = result.results['proc_x@latest'].output.stdout.strip()
-        self.assertEqual(x_out, 'X_RESULT=110',
-                         f'proc_x should output 110, got: {x_out}')
-
-        y_out = result.results['proc_y@latest'].output.stdout.strip()
-        self.assertEqual(y_out, 'Y_RESULT=200',
-                         f'proc_y should output 200, got: {y_out}')
-
-        merge_out = result.results['merge@latest'].output.stdout.strip()
-        self.assertEqual(merge_out, 'MERGED=310',
-                         f'merge should output 310, got: {merge_out}')
-
-    def test_parallel_skills_both_receive_upstream(self):
-        """Both proc_x and proc_y independently receive gen's output."""
-        # Simulate gen output
-        self.executor._outputs['gen@latest'] = ExecutionOutput(
-            stdout='BASE_VALUE=100\n', stderr='', exit_code=0, duration_ms=10)
-
-        input_x = self.executor._build_execution_input(
-            'proc_x@latest', self.dag)
-        input_y = self.executor._build_execution_input(
-            'proc_y@latest', self.dag)
-
-        # Both should have UPSTREAM_OUTPUTS
-        for label, inp in [('proc_x', input_x), ('proc_y', input_y)]:
-            with self.subTest(skill=label):
-                self.assertIn('UPSTREAM_OUTPUTS', inp.env_vars)
-                upstream = json.loads(inp.env_vars['UPSTREAM_OUTPUTS'])
-                self.assertIn('gen@latest', upstream)
-                self.assertIn('BASE_VALUE=100',
-                              upstream['gen@latest']['stdout'])
-
-    def test_merge_receives_both_parallel_outputs(self):
-        """merge skill receives outputs from both proc_x and proc_y."""
-        self.executor._outputs['proc_x@latest'] = ExecutionOutput(
-            stdout='X_RESULT=110\n', stderr='', exit_code=0, duration_ms=10)
-        self.executor._outputs['proc_y@latest'] = ExecutionOutput(
-            stdout='Y_RESULT=200\n', stderr='', exit_code=0, duration_ms=10)
-
-        input_merge = self.executor._build_execution_input(
-            'merge@latest', self.dag)
-        upstream = json.loads(input_merge.env_vars['UPSTREAM_OUTPUTS'])
-
-        self.assertIn('proc_x@latest', upstream)
-        self.assertIn('proc_y@latest', upstream)
-        self.assertIn('X_RESULT=110', upstream['proc_x@latest']['stdout'])
-        self.assertIn('Y_RESULT=200', upstream['proc_y@latest']['stdout'])
-
-
-# ============================================================================
-# Test 4: Edge cases and robustness
-# ============================================================================
-
-class TestDAGEdgeCases(unittest.TestCase):
-    """Test edge cases in DAG upstream-downstream data passing."""
-
-    def setUp(self):
-        """Create temp directories and basic infrastructure."""
-        self.test_root = Path(
-            tempfile.mkdtemp(prefix='test_dag_edge_cases_'))
-        self.skills_dir = self.test_root / 'skills'
-        self.workspace_dir = self.test_root / 'workspace'
-
-        self.skill_a = create_mock_skill(
-            'solo@latest', 'Solo', 'Standalone skill',
-            self.skills_dir / 'solo')
-        self.skills = {'solo@latest': self.skill_a}
-
-        self.container = SkillContainer(
-            workspace_dir=self.workspace_dir, use_sandbox=False)
-
-        self.executor = DAGExecutor(
-            container=self.container,
-            skills=self.skills,
-            workspace_dir=self.workspace_dir,
-            llm=None,
-            enable_progressive_analysis=False,
-            enable_self_reflection=False,
-        )
-
-    def tearDown(self):
-        """Clean up temp directory unless KEEP_TEST_ARTIFACTS is set."""
-        if not KEEP_TEST_ARTIFACTS and self.test_root.exists():
-            try:
-                shutil.rmtree(self.test_root)
-            except Exception as e:
-                print(f'Warning: Failed to clean up {self.test_root}: {e}')
-
-        self.executor = None
-        self.container = None
-
-    def test_no_upstream_no_env_vars(self):
-        """Skill with no dependencies has no UPSTREAM env vars."""
-        dag = {'solo@latest': []}
-        exec_input = self.executor._build_execution_input(
-            'solo@latest', dag)
-
-        self.assertNotIn('UPSTREAM_OUTPUTS', exec_input.env_vars,
-                         'No UPSTREAM_OUTPUTS for skill without deps')
-
-    def test_upstream_with_empty_stdout(self):
-        """Upstream with empty stdout still appears in UPSTREAM_OUTPUTS."""
-        # Add a second skill that depends on solo
-        dep_skill = create_mock_skill(
-            'dep@latest', 'Dep', 'Depends on solo',
-            self.skills_dir / 'dep')
-        self.skills['dep@latest'] = dep_skill
-
-        self.executor._outputs['solo@latest'] = ExecutionOutput(
-            stdout='', stderr='', exit_code=0, duration_ms=10)
-
-        dag = {
-            'solo@latest': [],
-            'dep@latest': ['solo@latest'],
-        }
-        exec_input = self.executor._build_execution_input(
-            'dep@latest', dag)
-        upstream = json.loads(exec_input.env_vars['UPSTREAM_OUTPUTS'])
-
-        self.assertIn('solo@latest', upstream)
-        self.assertEqual(upstream['solo@latest']['stdout'], '')
-        # No individual STDOUT shortcut since stdout is empty
-        self.assertNotIn('UPSTREAM_SOLO_LATEST_STDOUT', exec_input.env_vars)
-
-    def test_upstream_with_failed_exit_code(self):
-        """Upstream failure data is still passed to downstream."""
-        dep_skill = create_mock_skill(
-            'dep@latest', 'Dep', 'Depends on solo',
-            self.skills_dir / 'dep')
-        self.skills['dep@latest'] = dep_skill
-
-        self.executor._outputs['solo@latest'] = ExecutionOutput(
-            stdout='partial output\n',
-            stderr='something went wrong\n',
-            exit_code=1,
-            duration_ms=10,
-        )
-
-        dag = {
-            'solo@latest': [],
-            'dep@latest': ['solo@latest'],
-        }
-        exec_input = self.executor._build_execution_input(
-            'dep@latest', dag)
-        upstream = json.loads(exec_input.env_vars['UPSTREAM_OUTPUTS'])
-
-        self.assertEqual(upstream['solo@latest']['exit_code'], 1)
-        self.assertIn('something went wrong',
-                       upstream['solo@latest']['stderr'])
-
-    def test_safe_key_special_characters(self):
-        """Skill IDs with @, -, . are sanitized in env var names."""
-        special_skill = create_mock_skill(
-            'my-tool.v2@latest', 'MyTool', 'Tool with special chars',
-            self.skills_dir / 'my_tool')
-        self.skills['my-tool.v2@latest'] = special_skill
-
-        dep_skill = create_mock_skill(
-            'consumer@latest', 'Consumer', 'Depends on special',
-            self.skills_dir / 'consumer')
-        self.skills['consumer@latest'] = dep_skill
-
-        self.executor._outputs['my-tool.v2@latest'] = ExecutionOutput(
-            stdout='special output\n', stderr='', exit_code=0, duration_ms=10)
-
-        dag = {
-            'my-tool.v2@latest': [],
-            'consumer@latest': ['my-tool.v2@latest'],
-        }
-        exec_input = self.executor._build_execution_input(
-            'consumer@latest', dag)
-
-        # Safe key: my-tool.v2@latest -> MY_TOOL_V2_LATEST
-        expected_key = 'UPSTREAM_MY_TOOL_V2_LATEST_STDOUT'
-        self.assertIn(expected_key, exec_input.env_vars,
-                       f'{expected_key} should be in env_vars, '
-                       f'got keys: {list(exec_input.env_vars.keys())}')
-
-
-# ============================================================================
-# Test suite
-# ============================================================================
-
-def suite():
-    """Create test suite with all test cases."""
-    loader = unittest.TestLoader()
-    test_suite = unittest.TestSuite()
-    test_suite.addTests(
-        loader.loadTestsFromTestCase(TestDAGUpstreamDownstream))
-    test_suite.addTests(
-        loader.loadTestsFromTestCase(TestDAGFullPipeline))
-    test_suite.addTests(
-        loader.loadTestsFromTestCase(TestDAGParallelMixed))
-    test_suite.addTests(
-        loader.loadTestsFromTestCase(TestDAGEdgeCases))
-    return test_suite
-
-
-if __name__ == '__main__':
-    runner = unittest.TextTestRunner(verbosity=2)
-    runner.run(suite())
diff --git a/tests/skills/test_skill.py b/tests/skills/test_skill.py
new file mode 100644
index 000000000..9a881627b
--- /dev/null
+++ b/tests/skills/test_skill.py
@@ -0,0 +1,1008 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+"""Tests for the Skill module.
+
+Covers:
+  - SkillSource / parse_skill_source
+  - SkillCatalog (load, filter, cache, hot-reload)
+  - SkillPromptInjector
+  - SkillToolSet (skills_list, skill_view, skill_manage)
+  - LLMAgent integration (prepare_skills, create_messages)
+  - SkillLoader
+  - SkillSchema parsing / validation
+  - End-to-end pipeline
+
+Fixture skills: examples/skills/claude_skills (docx, pdf)
+"""
+import asyncio
+import json
+import os
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from omegaconf import DictConfig, OmegaConf
+
+CLAUDE_SKILLS_DIR = (
+    Path(__file__).resolve().parent.parent.parent
+    / "examples" / "skills" / "claude_skills"
+)
+
+
+def _make_skill_dir(base: Path, skill_id: str, name: str, desc: str,
+                    *, always: bool = False, tags=None,
+                    requires=None, extra_body: str = "") -> Path:
+    """Create a minimal skill directory with SKILL.md."""
+    d = base / skill_id
+    d.mkdir(parents=True, exist_ok=True)
+    lines = [
+        "---",
+        f"name: {name}",
+        f'description: "{desc}"',
+    ]
+    if always:
+        lines.append("always: true")
+    if tags:
+        lines.append(f"tags: {tags}")
+    if requires:
+        lines.append("requires:")
+        if "tools" in requires:
+            lines.append(f"  tools: {requires['tools']}")
+        if "env" in requires:
+            lines.append(f"  env: {requires['env']}")
+    lines.append("---")
+    lines.append("")
+    lines.append(f"# {name}")
+    lines.append("")
+    lines.append(f"Instructions for {name}.")
+    if extra_body:
+        lines.append(extra_body)
+    (d / "SKILL.md").write_text("\n".join(lines), encoding="utf-8")
+    return d
+
+
+# ============================================================
+# 1. SkillSource / parse_skill_source
+# ============================================================
+
+class TestSkillSource(unittest.TestCase):
+
+    def test_local_absolute_path(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source(str(CLAUDE_SKILLS_DIR))
+        self.assertEqual(src.type.value, "local")
+        self.assertEqual(src.path, str(CLAUDE_SKILLS_DIR))
+
+    def test_local_relative_dot_path(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source("./skills")
+        self.assertEqual(src.type.value, "local")
+        self.assertTrue(os.path.isabs(src.path))
+
+    def test_local_relative_dotdot_path(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source("../some/path")
+        self.assertEqual(src.type.value, "local")
+        self.assertTrue(os.path.isabs(src.path))
+
+    def test_local_tilde_path(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source("~/my_skills")
+        self.assertEqual(src.type.value, "local")
+        self.assertNotIn("~", src.path)
+
+    def test_modelscope_uri(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source("modelscope://owner/repo@v1.0#subdir")
+        self.assertEqual(src.type.value, "modelscope")
+        self.assertEqual(src.repo_id, "owner/repo")
+        self.assertEqual(src.revision, "v1.0")
+        self.assertEqual(src.subdir, "subdir")
+
+    def test_modelscope_skill_url(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source(
+            "https://modelscope.cn/skills/BaiduDrive/baidu-drive")
+        self.assertEqual(src.type.value, "modelscope")
+        self.assertEqual(src.repo_id, "BaiduDrive/baidu-drive")
+
+    def test_modelscope_skill_url_with_files_suffix(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source(
+            "https://www.modelscope.cn/skills/BaiduDrive/baidu-drive/files")
+        self.assertEqual(src.type.value, "modelscope")
+        self.assertEqual(src.repo_id, "BaiduDrive/baidu-drive")
+
+    def test_at_prefix_shorthand(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source("@MiniMax-AI/minimax-pdf")
+        self.assertEqual(src.type.value, "modelscope")
+        self.assertEqual(src.repo_id, "MiniMax-AI/minimax-pdf")
+
+    def test_git_url(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source("https://github.com/user/repo.git")
+        self.assertEqual(src.type.value, "git")
+        self.assertEqual(src.url, "https://github.com/user/repo.git")
+
+    def test_owner_repo_pattern(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source("ms-agent/research_skills")
+        self.assertEqual(src.type.value, "modelscope")
+        self.assertEqual(src.repo_id, "ms-agent/research_skills")
+
+    def test_nonexistent_abs_path_becomes_local(self):
+        from ms_agent.skill.sources import parse_skill_source
+        src = parse_skill_source("/nonexistent/path/to/skills")
+        self.assertEqual(src.type.value, "local")
+        self.assertEqual(src.path, "/nonexistent/path/to/skills")
+
+
+# ============================================================
+# 1b. Catalog download paths (ModelScope SDK / HTTP fallback / Git)
+# ============================================================
+
+class TestCatalogDownloadModelScopeSDK(unittest.TestCase):
+    """_load_from_modelscope via the real SDK HubApi.download_skill."""
+
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp, ignore_errors=True)
+
+    def test_sdk_download_produces_skill(self):
+        """HubApi.download_skill → directory with SKILL.md → SkillLoader OK."""
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+        import ms_agent.skill.catalog as cat_mod
+        orig = cat_mod.USER_SKILLS_DIR
+        cat_mod.USER_SKILLS_DIR = self.tmp
+        try:
+            cat = SkillCatalog()
+            cat.load_from_sources([
+                SkillSource(type=SkillSourceType.MODELSCOPE,
+                            repo_id="BaiduDrive/baidu-drive"),
+            ])
+            skills = cat.get_enabled_skills()
+            self.assertEqual(len(skills), 1)
+            skill = list(skills.values())[0]
+            self.assertEqual(skill.name, "baidu-drive")
+            self.assertTrue(len(skill.scripts) > 0)
+        finally:
+            cat_mod.USER_SKILLS_DIR = orig
+
+    def test_sdk_download_skill_dir_name(self):
+        """SDK names the directory by element_name only (no owner prefix)."""
+        from modelscope.hub.api import HubApi
+        api = HubApi()
+        path = api.download_skill("BaiduDrive/baidu-drive",
+                                  local_dir=str(self.tmp))
+        self.assertEqual(os.path.basename(path), "baidu-drive")
+        self.assertTrue(os.path.exists(os.path.join(path, "SKILL.md")))
+
+
+class TestCatalogDownloadHTTPFallback(unittest.TestCase):
+    """_download_skill_zip (pure-HTTP, no SDK dependency)."""
+
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp, ignore_errors=True)
+
+    def test_http_fallback_downloads_and_extracts(self):
+        from ms_agent.skill.catalog import _download_skill_zip
+        path = _download_skill_zip("BaiduDrive/baidu-drive", str(self.tmp))
+        self.assertEqual(os.path.basename(path), "baidu-drive")
+        self.assertTrue(os.path.exists(os.path.join(path, "SKILL.md")))
+
+    def test_http_fallback_naming_matches_sdk(self):
+        """Fallback and SDK produce the same directory basename."""
+        from ms_agent.skill.catalog import _download_skill_zip
+        path = _download_skill_zip("BaiduDrive/baidu-drive", str(self.tmp))
+        self.assertEqual(os.path.basename(path), "baidu-drive")
+
+    def test_http_fallback_used_when_sdk_missing(self):
+        """When HubApi import fails, we fall through to HTTP fallback."""
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+        import ms_agent.skill.catalog as cat_mod
+        orig = cat_mod.USER_SKILLS_DIR
+        cat_mod.USER_SKILLS_DIR = self.tmp
+
+        real_import = __builtins__.__import__ if hasattr(
+            __builtins__, '__import__') else __import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "modelscope.hub.api":
+                raise ImportError("mocked SDK unavailable")
+            return real_import(name, *args, **kwargs)
+
+        try:
+            cat = SkillCatalog()
+            with patch("builtins.__import__", side_effect=mock_import):
+                cat.load_from_sources([
+                    SkillSource(type=SkillSourceType.MODELSCOPE,
+                                repo_id="BaiduDrive/baidu-drive"),
+                ])
+            skills = cat.get_enabled_skills()
+            self.assertEqual(len(skills), 1)
+        finally:
+            cat_mod.USER_SKILLS_DIR = orig
+
+    def test_http_fallback_invalid_skill_id_raises(self):
+        from ms_agent.skill.catalog import _download_skill_zip
+        with self.assertRaises(Exception):
+            _download_skill_zip("nonexistent/fake-skill-xyz",
+                                str(self.tmp))
+
+
+class TestCatalogDownloadGit(unittest.TestCase):
+    """_load_from_git via real git clone."""
+
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+        self.skill_dir = self.tmp / "repo"
+        self.skill_dir.mkdir()
+        _make_skill_dir(self.skill_dir, "test-skill", "TestSkill",
+                        "A test skill")
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp, ignore_errors=True)
+
+    def test_git_clone_loads_skills(self):
+        """Mock git clone by providing a local path that mimics the flow."""
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+
+        cat = SkillCatalog()
+        cat.load_from_sources([
+            SkillSource(type=SkillSourceType.LOCAL_DIR,
+                        path=str(self.skill_dir)),
+        ])
+        skills = cat.get_enabled_skills()
+        self.assertIn("test-skill", skills)
+
+    @patch("subprocess.run")
+    def test_git_clone_invocation(self, mock_run):
+        """Verify the git clone command is correctly constructed."""
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+
+        mock_run.return_value = MagicMock(returncode=0)
+
+        skill_in_dest = None
+
+        def side_effect(cmd, **kwargs):
+            dest_path = cmd[-1]
+            _make_skill_dir(Path(dest_path), "cloned", "Cloned",
+                            "A cloned skill")
+            return MagicMock(returncode=0)
+
+        mock_run.side_effect = side_effect
+
+        cat = SkillCatalog()
+        cat.load_from_sources([
+            SkillSource(type=SkillSourceType.GIT,
+                        url="https://github.com/user/skills-repo.git",
+                        revision="main"),
+        ])
+
+        call_args = mock_run.call_args[0][0]
+        self.assertIn("git", call_args)
+        self.assertIn("clone", call_args)
+        self.assertIn("--depth", call_args)
+        self.assertIn("--branch", call_args)
+        self.assertIn("main", call_args)
+        self.assertIn("https://github.com/user/skills-repo.git", call_args)
+
+    @patch("subprocess.run")
+    def test_git_clone_with_subdir(self, mock_run):
+        """Git source with subdir only loads from subdirectory."""
+
+        def side_effect(cmd, **kwargs):
+            dest_path = Path(cmd[-1])
+            sub = dest_path / "sub"
+            _make_skill_dir(sub, "nested", "Nested", "Nested skill")
+            _make_skill_dir(dest_path, "root-skill", "Root", "Root skill")
+            return MagicMock(returncode=0)
+
+        mock_run.side_effect = side_effect
+
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+
+        cat = SkillCatalog()
+        cat.load_from_sources([
+            SkillSource(type=SkillSourceType.GIT,
+                        url="https://github.com/user/repo.git",
+                        subdir="sub"),
+        ])
+        skills = cat.get_enabled_skills()
+        self.assertIn("nested", skills)
+        self.assertNotIn("root-skill", skills)
+
+
+# ============================================================
+# 2. SkillCatalog
+# ============================================================
+
+class TestSkillCatalog(unittest.TestCase):
+
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+        _make_skill_dir(self.tmp, "alpha", "Alpha", "Skill alpha",
+                        tags="[demo]")
+        _make_skill_dir(self.tmp, "beta", "Beta", "Skill beta",
+                        always=True, tags="[demo, test]")
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp, ignore_errors=True)
+
+    def _make_catalog(self, path=None):
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+        catalog = SkillCatalog()
+        catalog.load_from_sources([
+            SkillSource(type=SkillSourceType.LOCAL_DIR,
+                        path=str(path or self.tmp))
+        ])
+        return catalog
+
+    def test_load_local_skills(self):
+        catalog = self._make_catalog()
+        skills = catalog.get_enabled_skills()
+        self.assertIn("alpha", skills)
+        self.assertIn("beta", skills)
+        self.assertEqual(skills["alpha"].name, "Alpha")
+
+    def test_load_claude_skills(self):
+        catalog = self._make_catalog(CLAUDE_SKILLS_DIR)
+        skills = catalog.get_enabled_skills()
+        self.assertIn("docx", skills)
+        self.assertIn("pdf", skills)
+        self.assertEqual(skills["docx"].name, "docx")
+
+    def test_always_skills(self):
+        catalog = self._make_catalog()
+        always = catalog.get_always_skills()
+        self.assertIn("beta", always)
+        self.assertNotIn("alpha", always)
+
+    def test_disable_skill(self):
+        catalog = self._make_catalog()
+        catalog.disable_skill("alpha")
+        skills = catalog.get_enabled_skills()
+        self.assertNotIn("alpha", skills)
+        self.assertIn("beta", skills)
+
+    def test_enable_after_disable(self):
+        catalog = self._make_catalog()
+        catalog.disable_skill("alpha")
+        catalog.enable_skill("alpha")
+        self.assertIn("alpha", catalog.get_enabled_skills())
+
+    def test_whitelist_filters(self):
+        catalog = self._make_catalog()
+        catalog._whitelist = {"alpha"}
+        skills = catalog.get_enabled_skills()
+        self.assertIn("alpha", skills)
+        self.assertNotIn("beta", skills)
+
+    def test_whitelist_empty_disables_all(self):
+        catalog = self._make_catalog()
+        catalog._whitelist = set()
+        self.assertEqual(len(catalog.get_enabled_skills()), 0)
+
+    def test_whitelist_none_allows_all(self):
+        catalog = self._make_catalog()
+        catalog._whitelist = None
+        self.assertEqual(len(catalog.get_enabled_skills()), 2)
+
+    def test_get_skill_by_id(self):
+        catalog = self._make_catalog()
+        skill = catalog.get_skill("alpha")
+        self.assertIsNotNone(skill)
+        self.assertEqual(skill.name, "Alpha")
+
+    def test_get_nonexistent_skill(self):
+        catalog = self._make_catalog()
+        self.assertIsNone(catalog.get_skill("nonexistent"))
+
+    def test_remove_skill(self):
+        catalog = self._make_catalog()
+        self.assertTrue(catalog.remove_skill("alpha"))
+        self.assertIsNone(catalog.get_skill("alpha"))
+
+    def test_remove_nonexistent(self):
+        catalog = self._make_catalog()
+        self.assertFalse(catalog.remove_skill("nonexistent"))
+
+    def test_add_skill_dynamically(self):
+        _make_skill_dir(self.tmp, "gamma", "Gamma", "Skill gamma")
+        catalog = self._make_catalog()
+        catalog.remove_skill("gamma")
+        self.assertIsNone(catalog.get_skill("gamma"))
+        skill = catalog.add_skill(str(self.tmp / "gamma"))
+        self.assertIsNotNone(skill)
+        self.assertEqual(skill.name, "Gamma")
+
+    def test_summary_cache(self):
+        catalog = self._make_catalog()
+        s1 = catalog.get_skills_summary()
+        self.assertIn("Alpha", s1)
+        self.assertIn("Beta", s1)
+        s2 = catalog.get_skills_summary()
+        self.assertIs(s1, s2)
+
+    def test_summary_invalidated_on_change(self):
+        catalog = self._make_catalog()
+        s1 = catalog.get_skills_summary()
+        catalog.disable_skill("alpha")
+        s2 = catalog.get_skills_summary()
+        self.assertNotEqual(s1, s2)
+        self.assertNotIn("Alpha", s2)
+
+    def test_later_source_overrides_earlier(self):
+        tmp2 = Path(tempfile.mkdtemp())
+        try:
+            _make_skill_dir(tmp2, "alpha", "Alpha Override",
+                            "Overridden description")
+            from ms_agent.skill.catalog import SkillCatalog
+            from ms_agent.skill.sources import SkillSource, SkillSourceType
+            catalog = SkillCatalog()
+            catalog.load_from_sources([
+                SkillSource(type=SkillSourceType.LOCAL_DIR,
+                            path=str(self.tmp)),
+                SkillSource(type=SkillSourceType.LOCAL_DIR,
+                            path=str(tmp2)),
+            ])
+            self.assertEqual(
+                catalog.get_skill("alpha").name, "Alpha Override")
+        finally:
+            shutil.rmtree(tmp2, ignore_errors=True)
+
+    def test_reload(self):
+        catalog = self._make_catalog()
+        (self.tmp / "alpha" / "SKILL.md").write_text(
+            '---\nname: Alpha\ndescription: "Updated"\n---\n# Alpha\n',
+            encoding="utf-8")
+        catalog.reload()
+        self.assertEqual(catalog.get_skill("alpha").description, "Updated")
+
+    def test_load_from_config_path_string(self):
+        from ms_agent.skill.catalog import SkillCatalog
+        cfg = OmegaConf.create({"path": str(self.tmp)})
+        catalog = SkillCatalog(config=cfg)
+        catalog.load_from_config(cfg)
+        self.assertIn("alpha", catalog.get_enabled_skills())
+
+    def test_load_from_config_path_list(self):
+        from ms_agent.skill.catalog import SkillCatalog
+        cfg = OmegaConf.create({"path": [str(self.tmp)]})
+        catalog = SkillCatalog(config=cfg)
+        catalog.load_from_config(cfg)
+        self.assertIn("alpha", catalog.get_enabled_skills())
+
+    def test_load_from_config_with_disabled(self):
+        from ms_agent.skill.catalog import SkillCatalog
+        cfg = OmegaConf.create({
+            "path": [str(self.tmp)],
+            "disabled": ["alpha"],
+        })
+        catalog = SkillCatalog(config=cfg)
+        catalog.load_from_config(cfg)
+        self.assertNotIn("alpha", catalog.get_enabled_skills())
+        self.assertIn("beta", catalog.get_enabled_skills())
+
+
+# ============================================================
+# 3. SkillPromptInjector
+# ============================================================
+
+class TestSkillPromptInjector(unittest.TestCase):
+
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+        _make_skill_dir(self.tmp, "always-skill", "AlwaysSkill",
+                        "Always active", always=True)
+        _make_skill_dir(self.tmp, "normal-skill", "NormalSkill",
+                        "Normal skill")
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp, ignore_errors=True)
+
+    def _make_injector(self):
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.prompt_injector import SkillPromptInjector
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+        catalog = SkillCatalog()
+        catalog.load_from_sources([
+            SkillSource(type=SkillSourceType.LOCAL_DIR,
+                        path=str(self.tmp))
+        ])
+        return SkillPromptInjector(catalog)
+
+    def test_build_with_always_and_normal(self):
+        inj = self._make_injector()
+        section = inj.build_skill_prompt_section()
+        self.assertIn("Active Skills", section)
+        self.assertIn("AlwaysSkill", section)
+        self.assertIn("Available Skills", section)
+        self.assertIn("NormalSkill", section)
+
+    def test_always_skill_body_injected(self):
+        inj = self._make_injector()
+        section = inj.build_skill_prompt_section()
+        self.assertIn("Instructions for AlwaysSkill", section)
+
+    def test_frontmatter_stripped_from_always(self):
+        inj = self._make_injector()
+        section = inj.build_skill_prompt_section()
+        self.assertNotIn("always: true", section)
+
+    def test_empty_catalog_returns_empty(self):
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.prompt_injector import SkillPromptInjector
+        catalog = SkillCatalog()
+        inj = SkillPromptInjector(catalog)
+        self.assertEqual(inj.build_skill_prompt_section(), "")
+
+    def test_strip_frontmatter_static(self):
+        from ms_agent.skill.prompt_injector import SkillPromptInjector
+        content = "---\nname: Test\n---\n\nBody text."
+        result = SkillPromptInjector._strip_frontmatter(content)
+        self.assertEqual(result, "Body text.")
+        self.assertNotIn("---", result)
+
+    def test_no_always_skills_omits_active_section(self):
+        """When no skills are marked always, only the Available section appears."""
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.prompt_injector import SkillPromptInjector
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+        catalog = SkillCatalog()
+        catalog.load_from_sources([
+            SkillSource(type=SkillSourceType.LOCAL_DIR,
+                        path=str(CLAUDE_SKILLS_DIR))
+        ])
+        inj = SkillPromptInjector(catalog)
+        section = inj.build_skill_prompt_section()
+        self.assertNotIn("Active Skills", section)
+        self.assertIn("Available Skills", section)
+        self.assertIn("docx", section)
+        self.assertIn("pdf", section)
+
+
+# ============================================================
+# 4. SkillToolSet
+# ============================================================
+
+class TestSkillToolSet(unittest.TestCase):
+
+    def setUp(self):
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.skill_tools import SkillToolSet
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+
+        self.tmp = Path(tempfile.mkdtemp())
+        _make_skill_dir(self.tmp, "demo", "Demo Skill", "A demo skill",
+                        tags="[demo, test]",
+                        requires={"tools": "[web_search]",
+                                  "env": "[NONEXISTENT_VAR]"})
+        scripts_dir = self.tmp / "demo" / "scripts"
+        scripts_dir.mkdir(exist_ok=True)
+        (scripts_dir / "helper.py").write_text(
+            "print('hello')", encoding="utf-8")
+
+        self.catalog = SkillCatalog()
+        self.catalog.load_from_sources([
+            SkillSource(type=SkillSourceType.LOCAL_DIR,
+                        path=str(self.tmp))
+        ])
+
+        config = DictConfig({})
+        self.toolset = SkillToolSet(config, self.catalog, enable_manage=True)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp, ignore_errors=True)
+
+    def test_get_tools_includes_all(self):
+        tools = asyncio.get_event_loop().run_until_complete(
+            self.toolset._get_tools_inner())
+        names = [t["tool_name"] for t in tools["skills"]]
+        self.assertIn("skills_list", names)
+        self.assertIn("skill_view", names)
+        self.assertIn("skill_manage", names)
+
+    def test_get_tools_without_manage(self):
+        from ms_agent.skill.skill_tools import SkillToolSet
+        ts = SkillToolSet(DictConfig({}), self.catalog, enable_manage=False)
+        tools = asyncio.get_event_loop().run_until_complete(
+            ts._get_tools_inner())
+        names = [t["tool_name"] for t in tools["skills"]]
+        self.assertNotIn("skill_manage", names)
+
+    def test_skills_list(self):
+        result = self.toolset._handle_skills_list({})
+        data = json.loads(result)
+        self.assertEqual(data["total"], 1)
+        self.assertEqual(data["skills"][0]["skill_id"], "demo")
+        self.assertEqual(data["skills"][0]["name"], "Demo Skill")
+
+    def test_skills_list_with_tag_filter(self):
+        result = self.toolset._handle_skills_list({"tag": "demo"})
+        data = json.loads(result)
+        self.assertEqual(data["total"], 1)
+
+    def test_skills_list_with_nonexistent_tag(self):
+        result = self.toolset._handle_skills_list(
+            {"tag": "nonexistent"})
+        self.assertEqual(result, "No skills available.")
+
+    def test_skill_view_main_content(self):
+        result = self.toolset._handle_skill_view({"skill_id": "demo"})
+        data = json.loads(result)
+        self.assertEqual(data["skill_id"], "demo")
+        self.assertIn("Demo Skill", data["content"])
+        self.assertIn("scripts", data["linked_files"])
+
+    def test_skill_view_nonexistent(self):
+        result = self.toolset._handle_skill_view(
+            {"skill_id": "nonexistent"})
+        data = json.loads(result)
+        self.assertIn("error", data)
+
+    def test_skill_view_file(self):
+        result = self.toolset._handle_skill_view({
+            "skill_id": "demo",
+            "file_path": "scripts/helper.py",
+        })
+        data = json.loads(result)
+        self.assertIn("print('hello')", data["content"])
+
+    def test_skill_view_path_traversal_blocked(self):
+        result = self.toolset._handle_skill_view({
+            "skill_id": "demo",
+            "file_path": "../../etc/passwd",
+        })
+        data = json.loads(result)
+        self.assertIn("error", data)
+
+    def test_skill_view_missing_file(self):
+        result = self.toolset._handle_skill_view({
+            "skill_id": "demo",
+            "file_path": "scripts/nonexistent.py",
+        })
+        data = json.loads(result)
+        self.assertIn("error", data)
+
+    def test_skill_view_requirements_check(self):
+        result = self.toolset._handle_skill_view({"skill_id": "demo"})
+        data = json.loads(result)
+        self.assertIn("requirements_status", data)
+        status = data["requirements_status"]
+        self.assertIn("NONEXISTENT_VAR", status["missing_env_vars"])
+
+    def test_skill_manage_create_and_delete(self):
+        content = (
+            '---\nname: New Skill\ndescription: "A new skill"\n---\n'
+            '# New Skill\n\nInstructions.')
+        with patch.object(self.toolset, '_get_custom_skills_dir',
+                          return_value=self.tmp / "_custom"):
+            result = self.toolset._handle_skill_manage({
+                "action": "create",
+                "skill_id": "new-skill",
+                "content": content,
+            })
+            data = json.loads(result)
+            self.assertTrue(data.get("success"))
+
+            self.assertIsNotNone(self.catalog.get_skill("new-skill"))
+
+            result = self.toolset._handle_skill_manage({
+                "action": "delete",
+                "skill_id": "new-skill",
+            })
+            data = json.loads(result)
+            self.assertTrue(data.get("success"))
+            self.assertIsNone(self.catalog.get_skill("new-skill"))
+
+    def test_skill_manage_create_duplicate(self):
+        content = (
+            '---\nname: Demo Dup\ndescription: "dup"\n---\n# Dup\n')
+        with patch.object(self.toolset, '_get_custom_skills_dir',
+                          return_value=self.tmp):
+            result = self.toolset._handle_skill_manage({
+                "action": "create",
+                "skill_id": "demo",
+                "content": content,
+            })
+            data = json.loads(result)
+            self.assertIn("error", data)
+
+    def test_skill_manage_create_invalid_frontmatter(self):
+        with patch.object(self.toolset, '_get_custom_skills_dir',
+                          return_value=self.tmp / "_custom2"):
+            result = self.toolset._handle_skill_manage({
+                "action": "create",
+                "skill_id": "bad-skill",
+                "content": "No frontmatter here.",
+            })
+            data = json.loads(result)
+            self.assertIn("error", data)
+
+    def test_skill_manage_edit(self):
+        new_content = (
+            '---\nname: Demo Skill\ndescription: "Updated desc"\n---\n'
+            '# Demo Skill Updated\n')
+        result = self.toolset._handle_skill_manage({
+            "action": "edit",
+            "skill_id": "demo",
+            "content": new_content,
+        })
+        data = json.loads(result)
+        self.assertTrue(data.get("success"))
+        self.assertEqual(
+            self.catalog.get_skill("demo").description, "Updated desc")
+
+    def test_call_tool_dispatch(self):
+        result = asyncio.get_event_loop().run_until_complete(
+            self.toolset.call_tool(
+                "skills", tool_name="skills_list", tool_args={}))
+        self.assertIn("demo", result)
+
+    def test_skill_view_claude_skills(self):
+        """Verify skill_view works with real claude_skills fixtures."""
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.skill_tools import SkillToolSet
+        from ms_agent.skill.sources import SkillSource, SkillSourceType
+
+        catalog = SkillCatalog()
+        catalog.load_from_sources([
+            SkillSource(type=SkillSourceType.LOCAL_DIR,
+                        path=str(CLAUDE_SKILLS_DIR))
+        ])
+        ts = SkillToolSet(DictConfig({}), catalog, enable_manage=False)
+
+        result = ts._handle_skill_view({"skill_id": "pdf"})
+        data = json.loads(result)
+        self.assertEqual(data["name"], "pdf")
+        self.assertIn("PDF Processing Guide", data["content"])
+        self.assertIn("scripts", data["linked_files"])
+
+        result = ts._handle_skill_view({"skill_id": "docx"})
+        data = json.loads(result)
+        self.assertEqual(data["name"], "docx")
+        self.assertIn("DOCX creation", data["content"])
+
+
+# ============================================================
+# 5. SkillLoader
+# ============================================================
+
+class TestSkillLoader(unittest.TestCase):
+
+    def test_load_claude_skills(self):
+        from ms_agent.skill.loader import SkillLoader
+        loader = SkillLoader()
+        skills = loader.load_skills(str(CLAUDE_SKILLS_DIR))
+        ids = [s.skill_id for s in skills.values()]
+        self.assertIn("docx", ids)
+        self.assertIn("pdf", ids)
+
+    def test_reload_skill(self):
+        from ms_agent.skill.loader import SkillLoader
+        loader = SkillLoader()
+        loader.load_skills(str(CLAUDE_SKILLS_DIR))
+        reloaded = loader.reload_skill(str(CLAUDE_SKILLS_DIR / "pdf"))
+        self.assertIsNotNone(reloaded)
+        self.assertEqual(reloaded.name, "pdf")
+
+    def test_skill_has_scripts(self):
+        from ms_agent.skill.loader import SkillLoader
+        loader = SkillLoader()
+        skills = loader.load_skills(str(CLAUDE_SKILLS_DIR))
+        pdf_skill = skills.get("pdf") or skills.get("pdf@latest")
+        self.assertIsNotNone(pdf_skill)
+        script_names = [s.name for s in pdf_skill.scripts]
+        self.assertTrue(
+            len(script_names) > 0,
+            "pdf skill should have scripts")
+
+    def test_skill_has_references(self):
+        from ms_agent.skill.loader import SkillLoader
+        loader = SkillLoader()
+        skills = loader.load_skills(str(CLAUDE_SKILLS_DIR))
+        pdf_skill = skills.get("pdf") or skills.get("pdf@latest")
+        self.assertIsNotNone(pdf_skill)
+        ref_names = [r.name for r in pdf_skill.references]
+        self.assertIn("reference.md", ref_names)
+        self.assertIn("forms.md", ref_names)
+
+
+# ============================================================
+# 6. Integration: LLMAgent.prepare_skills + create_messages
+# ============================================================
+
+class TestLLMAgentSkillIntegration(unittest.TestCase):
+
+    def _make_agent(self, skills_path):
+        from ms_agent.agent.llm_agent import LLMAgent
+        config = OmegaConf.create({
+            "llm": {
+                "model": "qwen-max",
+                "api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            },
+            "skills": {
+                "path": [str(skills_path)],
+            },
+            "prompt": {
+                "system": "You are a test agent.",
+            },
+        })
+        return LLMAgent(config=config, tag="test-agent")
+
+    def test_prepare_skills_loads_catalog(self):
+        agent = self._make_agent(CLAUDE_SKILLS_DIR)
+        agent.tool_manager = MagicMock()
+        asyncio.get_event_loop().run_until_complete(
+            agent.prepare_skills())
+        self.assertIsNotNone(agent._skill_catalog)
+        self.assertIsNotNone(agent._skill_injector)
+        agent.tool_manager.register_tool.assert_called_once()
+
+    def test_prepare_skills_noop_without_config(self):
+        from ms_agent.agent.llm_agent import LLMAgent
+        config = OmegaConf.create({
+            "llm": {
+                "model": "qwen-max",
+                "api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            },
+            "prompt": {"system": "Test"},
+        })
+        agent = LLMAgent(config=config, tag="no-skill-agent")
+        asyncio.get_event_loop().run_until_complete(
+            agent.prepare_skills())
+        self.assertIsNone(agent._skill_catalog)
+        self.assertIsNone(agent._skill_injector)
+
+    def test_create_messages_injects_skill_section(self):
+        agent = self._make_agent(CLAUDE_SKILLS_DIR)
+        agent.tool_manager = MagicMock()
+        asyncio.get_event_loop().run_until_complete(
+            agent.prepare_skills())
+
+        msgs = asyncio.get_event_loop().run_until_complete(
+            agent.create_messages("Hello"))
+
+        system_content = msgs[0].content
+        self.assertIn("Available Skills", system_content)
+        self.assertIn("docx", system_content)
+        self.assertIn("pdf", system_content)
+        self.assertIn("skill_view", system_content)
+
+    def test_create_messages_no_injection_without_skills(self):
+        from ms_agent.agent.llm_agent import LLMAgent
+        config = OmegaConf.create({
+            "llm": {
+                "model": "qwen-max",
+                "api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            },
+            "prompt": {"system": "You are a test agent."},
+        })
+        agent = LLMAgent(config=config, tag="no-skill")
+        msgs = asyncio.get_event_loop().run_until_complete(
+            agent.create_messages("Hello"))
+        self.assertNotIn("Available Skills", msgs[0].content)
+
+    def test_create_messages_with_always_skill(self):
+        """Verify always-skill injection using an inline fixture."""
+        from ms_agent.agent.llm_agent import LLMAgent
+        tmp = Path(tempfile.mkdtemp())
+        try:
+            _make_skill_dir(tmp, "greeter", "Greeter",
+                            "Auto-greet", always=True)
+            _make_skill_dir(tmp, "helper", "Helper", "A helper")
+            config = OmegaConf.create({
+                "llm": {"model": "qwen-max"},
+                "skills": {"path": [str(tmp)]},
+                "prompt": {"system": "Test agent."},
+            })
+            agent = LLMAgent(config=config, tag="always-test")
+            agent.tool_manager = MagicMock()
+            asyncio.get_event_loop().run_until_complete(
+                agent.prepare_skills())
+            msgs = asyncio.get_event_loop().run_until_complete(
+                agent.create_messages("Hi"))
+            content = msgs[0].content
+            self.assertIn("Active Skills", content)
+            self.assertIn("Greeter", content)
+            self.assertIn("Instructions for Greeter", content)
+            self.assertIn("Available Skills", content)
+            self.assertIn("helper", content)
+        finally:
+            shutil.rmtree(tmp, ignore_errors=True)
+
+
+# ============================================================
+# 7. Schema parsing and validation
+# ============================================================
+
+class TestSchemaPreserved(unittest.TestCase):
+
+    def test_skill_schema_parser_works(self):
+        from ms_agent.skill.schema import SkillSchemaParser
+        skill = SkillSchemaParser.parse_skill_directory(
+            CLAUDE_SKILLS_DIR / "pdf")
+        self.assertIsNotNone(skill)
+        self.assertEqual(skill.skill_id, "pdf")
+        self.assertEqual(skill.name, "pdf")
+        self.assertTrue(len(skill.scripts) > 0)
+
+    def test_docx_skill_has_references(self):
+        from ms_agent.skill.schema import SkillSchemaParser
+        skill = SkillSchemaParser.parse_skill_directory(
+            CLAUDE_SKILLS_DIR / "docx")
+        self.assertIsNotNone(skill)
+        self.assertEqual(skill.skill_id, "docx")
+        ref_names = [r.name for r in skill.references]
+        self.assertIn("docx-js.md", ref_names)
+        self.assertIn("ooxml.md", ref_names)
+
+    def test_frontmatter_parsing(self):
+        from ms_agent.skill.schema import SkillSchemaParser
+        content = '---\nname: Test\ndescription: "desc"\n---\nBody'
+        fm = SkillSchemaParser.parse_yaml_frontmatter(content)
+        self.assertEqual(fm["name"], "Test")
+
+    def test_skill_schema_validation(self):
+        from ms_agent.skill.schema import SkillSchemaParser
+        skill = SkillSchemaParser.parse_skill_directory(
+            CLAUDE_SKILLS_DIR / "pdf")
+        errors = SkillSchemaParser.validate_skill_schema(skill)
+        self.assertEqual(len(errors), 0)
+
+
+# ============================================================
+# 8. End-to-end pipeline
+# ============================================================
+
+class TestEndToEnd(unittest.TestCase):
+
+    def test_full_pipeline_with_claude_skills(self):
+        from ms_agent.skill.catalog import SkillCatalog
+        from ms_agent.skill.prompt_injector import SkillPromptInjector
+        from ms_agent.skill.skill_tools import SkillToolSet
+
+        cfg = OmegaConf.create({"path": [str(CLAUDE_SKILLS_DIR)]})
+        catalog = SkillCatalog(config=cfg)
+        catalog.load_from_config(cfg)
+
+        skills = catalog.get_enabled_skills()
+        self.assertIn("docx", skills)
+        self.assertIn("pdf", skills)
+
+        injector = SkillPromptInjector(catalog)
+        section = injector.build_skill_prompt_section()
+        self.assertIn("Available Skills", section)
+        self.assertIn("docx", section)
+        self.assertIn("pdf", section)
+
+        toolset = SkillToolSet(
+            DictConfig({}), catalog, enable_manage=False)
+
+        list_result = toolset._handle_skills_list({})
+        data = json.loads(list_result)
+        self.assertGreaterEqual(data["total"], 2)
+
+        view_result = toolset._handle_skill_view(
+            {"skill_id": "pdf"})
+        view_data = json.loads(view_result)
+        self.assertEqual(view_data["name"], "pdf")
+        self.assertIn("scripts", view_data["linked_files"])
+
+
+if __name__ == "__main__":
+    unittest.main()