diff --git a/ms_agent/agent/llm_agent.py b/ms_agent/agent/llm_agent.py index 5f2ddf2e7..f3bf3184c 100644 --- a/ms_agent/agent/llm_agent.py +++ b/ms_agent/agent/llm_agent.py @@ -7,7 +7,7 @@ import threading import uuid from contextlib import contextmanager -from copy import deepcopy +from copy import deepcopy, copy from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union import json @@ -24,6 +24,9 @@ from ms_agent.utils import async_retry, read_history, save_history from ms_agent.utils.constants import DEFAULT_TAG, DEFAULT_USER from ms_agent.utils.logger import get_logger +from ms_agent.skill.catalog import SkillCatalog +from ms_agent.skill.prompt_injector import SkillPromptInjector +from ms_agent.skill.skill_tools import SkillToolSet from omegaconf import DictConfig, OmegaConf from ..config.config import Config, ConfigLifecycleHandler @@ -35,17 +38,17 @@ class LLMAgent(Agent): """ An agent designed to run LLM-based tasks with support for tools, memory, - planning, callbacks, and automatic skill execution. + planning, callbacks, and skill integration. This class provides a full lifecycle for running an LLM agent, including: - Prompt preparation - Chat history management - External tool calling - Memory retrieval and updating - - Planning logic - Stream or non-stream response generation - Callback hooks at various stages of execution - - Automatic skill detection and execution (AutoSkills integration) + - Skill system: skill discovery (skills_list), viewing (skill_view), + and management (skill_manage) as standard tools Args: config (DictConfig): Pre-loaded configuration object. @@ -54,28 +57,12 @@ class LLMAgent(Agent): **kwargs: Additional keyword arguments passed to the parent Agent constructor. Skills Configuration (in config.skills): - path: Path(s) to skill directories. - enable_retrieve: Whether to use retriever (None=auto based on skill count). - retrieve_args: Arguments for HybridRetriever (top_k, min_score). - max_candidate_skills: Maximum candidate skills to consider. - max_retries: Maximum retry attempts for skill execution. - work_dir: Working directory for skill execution. - use_sandbox: Whether to use Docker sandbox. - auto_execute: Whether to auto-execute skills after retrieval. - - Example: - ```python - config = DictConfig({ - 'llm': {...}, - 'skills': { - 'path': '/path/to/skills', - 'auto_execute': True, - 'work_dir': '/path/to/workspace' - } - }) - agent = LLMAgent(config, tag='my-agent') - result = await agent.run('Generate a PDF report for Q4 sales of Apple') - ``` + path: Path(s) to skill directories or ModelScope repo IDs. + sources: Structured source list (type, path, repo_id, url, etc.). + auto_discover: Auto-scan CWD/skills/ directory. + enable_manage: Enable skill_manage tool for runtime CRUD. + whitelist: Skill ID whitelist (null=all, []=none, [ids]=specific). + disabled: List of disabled skill IDs. """ AGENT_NAME = 'LLMAgent' @@ -107,7 +94,7 @@ def __init__( self.tool_manager: Optional[ToolManager] = None self.memory_tools: List[Memory] = [] self.rag: Optional[RAG] = None - self.knowledge_search: Optional[SirschmunkSearch] = None + self.knowledge_search: Optional[SirchmunkSearch] = None self.llm: Optional[LLM] = None self.runtime: Optional[Runtime] = None self.max_chat_round: int = 0 @@ -119,237 +106,74 @@ def __init__( self.mcp_client = kwargs.get('mcp_client', None) self.config_handler = self.register_config_handler() - # AutoSkills integration (lazy initialization) - self._auto_skills = None - self._auto_skills_initialized = False - self._last_skill_result = None - self._skill_mode_active = False - - def _get_skills_config(self) -> Optional[DictConfig]: - """Get skills configuration from agent config.""" - if hasattr(self.config, 'skills') and self.config.skills: - return self.config.skills - return None - - def _ensure_auto_skills(self) -> bool: - """ - Ensure AutoSkills is initialized (lazy initialization). - - Returns: - True if AutoSkills is available and initialized. - """ - if self._auto_skills_initialized: - return self._auto_skills is not None - - skills_config = self._get_skills_config() - if not skills_config: - self._auto_skills_initialized = True - return False - - skills_path = getattr(skills_config, 'path', None) - if not skills_path: - logger.debug('No skills path configured') - self._auto_skills_initialized = True - return False - - # Ensure LLM is initialized - if self.llm is None: - self.prepare_llm() - - try: - from ms_agent.skill.auto_skills import AutoSkills - - # Check sandbox requirements - use_sandbox = getattr(skills_config, 'use_sandbox', True) - if use_sandbox: - from ms_agent.utils.docker_utils import is_docker_daemon_running - - if not is_docker_daemon_running(): - logger.warning( - 'Docker not running, disabling sandbox for skills') - use_sandbox = False - - # Build retrieve args - retrieve_args = {} - if hasattr(skills_config, 'retrieve_args'): - retrieve_args = OmegaConf.to_container( - skills_config.retrieve_args) - - self._auto_skills = AutoSkills( - skills=skills_path, - llm=self.llm, - enable_retrieve=getattr(skills_config, 'enable_retrieve', - None), - retrieve_args=retrieve_args, - max_candidate_skills=getattr(skills_config, - 'max_candidate_skills', 10), - max_retries=getattr(skills_config, 'max_retries', 3), - work_dir=getattr(skills_config, 'work_dir', None), - use_sandbox=use_sandbox, - ) - logger.info( - f'AutoSkills initialized with {len(self._auto_skills.all_skills)} skills' - ) - self._auto_skills_initialized = True - return True - - except Exception as e: - logger.warning(f'Failed to initialize AutoSkills: {e}') - self._auto_skills_initialized = True - return False - - @property - def skills_available(self) -> bool: - """Check if AutoSkills is available.""" - return self._ensure_auto_skills() - - @property - def auto_skills(self): - """Get AutoSkills instance (maybe None if not configured).""" - self._ensure_auto_skills() - return self._auto_skills - - async def should_use_skills(self, query: str) -> bool: - """ - Determine if the query should use skills. + # Skill system (initialized in prepare_skills) + self._skill_catalog = None + self._skill_injector = None - Combines keyword detection with LLM-based analysis. - - Args: - query: User's query string. - - Returns: - True if skills should be used for this query. - """ - if not self._ensure_auto_skills(): - return False - - skills_config = self._get_skills_config() - if not skills_config: - return False - skills_path = getattr(skills_config, 'path', None) - if not skills_path: - return False - - # Use LLM analysis for ambiguous queries - try: - needs_skills, _, _, _ = self._auto_skills._analyze_query(query) - return needs_skills - except Exception as e: - logger.error(f'Skill analysis error: {e}') - return False - - async def get_skill_dag(self, query: str): - """ - Get skill DAG for a query without executing. - - Args: - query: User's query string. - - Returns: - SkillDAGResult containing the execution plan, or None if unavailable. - """ - if not self._ensure_auto_skills(): - return None - return await self._auto_skills.get_skill_dag(query) - - async def execute_skills(self, query: str, execution_input=None): - """ - Execute skills for a query. - - Args: - query: User's query string. - execution_input: Optional initial input for skills. + async def prepare_skills(self): + """Initialize the skill system from config.skills. - Returns: - SkillDAGResult with execution results, or None if unavailable. + Sets up SkillCatalog, SkillPromptInjector, and registers + SkillToolSet into ToolManager. """ - if not self._ensure_auto_skills(): - return None - - skills_config = self._get_skills_config() - stop_on_failure = ( - getattr(skills_config, 'stop_on_failure', True) - if skills_config else True) - - result = await self._auto_skills.run( - query=query, - execution_input=execution_input, - stop_on_failure=stop_on_failure, - ) - self._last_skill_result = result - return result + if not hasattr(self.config, 'skills') or not self.config.skills: + return - def _format_skill_result_as_messages(self, dag_result) -> List[Message]: - """ - Format skill execution result as messages for agent history. + skills_config = self.config.skills + self._skill_catalog = SkillCatalog(config=skills_config) + self._skill_catalog.load_from_config(skills_config) + + self._skill_injector = SkillPromptInjector(self._skill_catalog) + + enable_manage = getattr(skills_config, 'enable_manage', False) + skill_toolset = SkillToolSet( + self.config, self._skill_catalog, + enable_manage=enable_manage) + await skill_toolset.connect() + self.tool_manager.register_tool(skill_toolset) + + # Index the newly added tool into the live tool registry. + # We cannot call reindex_tool() because it would duplicate + # already-indexed tools; instead we index just this one. + tools = await skill_toolset.get_tools() + spliter = self.tool_manager.TOOL_SPLITER + for server_name, tool_list in tools.items(): + for tool in tool_list: + key = f"{server_name}{spliter}{tool['tool_name']}" + tool = copy(tool) + tool['tool_name'] = key + self.tool_manager._tool_index[key] = ( + skill_toolset, server_name, tool) + + self._check_skill_tool_dependencies() + + def _check_skill_tool_dependencies(self): + """Warn if skills are enabled but essential tools are missing.""" + if (not self._skill_catalog + or not self._skill_catalog.get_enabled_skills()): + return - Args: - dag_result: SkillDAGResult from skill execution. + has_tools = hasattr(self.config, 'tools') and self.config.tools + warnings = [] - Returns: - List of Message objects describing the result. - """ - messages = [] - - # Handle chat-only response - if dag_result.chat_response: - messages.append( - Message(role='assistant', content=dag_result.chat_response)) - return messages - - # Handle incomplete skills - if not dag_result.is_complete: - content = "I couldn't find suitable skills for this task." - if dag_result.clarification: - content += f'\n\n{dag_result.clarification}' - messages.append(Message(role='assistant', content=content)) - return messages - - # Format execution result - if dag_result.execution_result: - exec_result = dag_result.execution_result - skill_names = list(dag_result.selected_skills.keys()) - - if exec_result.success: - content = f"Successfully executed {len(skill_names)} skill(s): {', '.join(skill_names)}\n\n" - - # Add output summaries - for skill_id, result in exec_result.results.items(): - if result.success and result.output: - output = result.output - if output.stdout: - stdout_preview = output.stdout[:1000] - if len(output.stdout) > 1000: - stdout_preview += '...' - content += f'**{skill_id} output:**\n{stdout_preview}\n\n' - if output.output_files: - content += f'**Generated files:** {list(output.output_files.values())}\n\n' - - content += ( - f'Total execution time: {exec_result.total_duration_ms:.2f}ms' - ) - else: - content = 'Skill execution completed with errors.\n\n' - for skill_id, result in exec_result.results.items(): - if not result.success: - content += f'**{skill_id} failed:** {result.error}\n' + if not has_tools or not hasattr(self.config.tools, 'file_system'): + warnings.append( + "file_system (read_file, write_file) - needed for " + "reading skill scripts and writing outputs") - messages.append(Message(role='assistant', content=content)) - else: - # DAG only, no execution - skill_names = list(dag_result.selected_skills.keys()) - content = f'Found {len(skill_names)} relevant skill(s) for your task:\n' - for skill_id, skill in dag_result.selected_skills.items(): - desc_preview = skill.description[:100] - if len(skill.description) > 100: - desc_preview += '...' - content += f'- **{skill.name}** ({skill_id}): {desc_preview}\n' - content += f'\nExecution order: {dag_result.execution_order}' - - messages.append(Message(role='assistant', content=content)) + if not has_tools or not hasattr(self.config.tools, 'code_executor'): + warnings.append( + "code_executor (python, shell execution) - needed for " + "running skill scripts") - return messages + if warnings: + logger.warning( + "Skills are configured but the following recommended tools " + "are not enabled. Skills that depend on these tools may not " + "work correctly:\n" + + "\n".join(f" - {w}" for w in warnings) + + "\nAdd them to your agent config under 'tools:' to enable." + ) def register_callback(self, callback: Callback): """ @@ -633,6 +457,13 @@ async def create_messages( content=self.system or LLMAgent.DEFAULT_SYSTEM), Message(role='user', content=messages or self.query), ] + + # Inject skill prompt section into system message + if self._skill_injector: + skill_section = self._skill_injector.build_skill_prompt_section() + if skill_section: + messages[0].content += "\n\n" + skill_section + return messages async def do_rag(self, messages: List[Message]): @@ -672,64 +503,6 @@ async def do_rag(self, messages: List[Message]): f'Relevant context retrieved from codebase search:\n\n{context}\n\n' f'User question: {query}') - async def do_skill(self, - messages: List[Message]) -> Optional[List[Message]]: - """ - Process skill-related query if applicable. - - Analyzes the user query, determines if skills should be used, - and executes the skill pipeline if appropriate. - - Args: - messages: Normalized message list with system and user messages - - Returns: - Updated messages with skill results if successful and should return, - None if no skill processing or fallback to standard agent - """ - # Extract user query from normalized messages - query = ( - messages[1].content - if len(messages) > 1 and messages[1].role == 'user' else None) - - if not query: - return None - - # Check if skills should be used for this query - if not await self.should_use_skills(query): - return None - - logger.info('Query detected as skill-related, using skill processing.') - self._skill_mode_active = True - - try: - skills_config = self._get_skills_config() - auto_execute = ( - getattr(skills_config, 'auto_execute', True) - if skills_config else True) - - if auto_execute: - dag_result = await self.execute_skills(query) - else: - dag_result = await self.get_skill_dag(query) - - if dag_result: - skill_messages = self._format_skill_result_as_messages( - dag_result) - for msg in skill_messages: - messages.append(msg) - return messages - - # dag_result is None/empty, fallback to standard agent - self._skill_mode_active = False - return None - - except Exception as e: - logger.warning( - f'Skill execution failed: {e}, falling back to standard agent') - self._skill_mode_active = False - return None - async def load_memory(self): """Initialize and append memory tool instances based on the configuration provided in the global config. @@ -1091,16 +864,14 @@ def save_history(self, messages: List[Message], **kwargs): async def run_loop(self, messages: Union[List[Message], str], **kwargs) -> AsyncGenerator[Any, Any]: - """ - Run the agent, mainly contains a llm calling and tool calling loop. + """Run the agent loop (LLM generation + tool calling). - If skills are configured, skill-related queries will be automatically routed to skill execution. + Skills, when configured, are exposed as standard tools + (skills_list, skill_view, skill_manage) and injected into + the system prompt—no special routing needed. Args: - messages (Union[List[Message], str]): Input data for the agent. Can be a raw string prompt, - or a list of previous interaction messages. - Returns: - List[Message]: A list of message objects representing the agent's response or interaction history. + messages: Input prompt string or list of Message objects. """ try: self.max_chat_round = getattr(self.config, 'max_chat_round', @@ -1109,6 +880,7 @@ async def run_loop(self, messages: Union[List[Message], str], self.prepare_llm() self.prepare_runtime() await self.prepare_tools() + await self.prepare_skills() await self.load_memory() await self.prepare_rag() await self.prepare_knowledge_search() @@ -1121,19 +893,7 @@ async def run_loop(self, messages: Union[List[Message], str], self.config, self.runtime, messages = self.read_history(messages) if self.runtime.round == 0: - # New task: create standardized messages first messages = await self.create_messages(messages) - - # Try skill processing first - skill_result = await self.do_skill(messages) - if skill_result is not None: - await self.on_task_begin(skill_result) - yield skill_result - await self.on_task_end(skill_result) - await self.cleanup_tools() - return - - # Standard processing continues await self.do_rag(messages) await self.on_task_begin(messages) diff --git a/ms_agent/skill/__init__.py b/ms_agent/skill/__init__.py index 611082129..84046a936 100644 --- a/ms_agent/skill/__init__.py +++ b/ms_agent/skill/__init__.py @@ -1,8 +1,21 @@ # Copyright (c) ModelScope Contributors. All rights reserved. -from .auto_skills import AutoSkills, DAGExecutionResult, SkillDAGResult +from .catalog import SkillCatalog +from .loader import SkillLoader, load_skills +from .prompt_injector import SkillPromptInjector +from .schema import SkillFile, SkillSchema, SkillSchemaParser +from .skill_tools import SkillToolSet +from .sources import SkillSource, SkillSourceType, parse_skill_source __all__ = [ - 'AutoSkills', - 'SkillDAGResult', - 'DAGExecutionResult', + 'SkillSchema', + 'SkillSchemaParser', + 'SkillFile', + 'SkillLoader', + 'load_skills', + 'SkillSource', + 'SkillSourceType', + 'parse_skill_source', + 'SkillCatalog', + 'SkillPromptInjector', + 'SkillToolSet', ] diff --git a/ms_agent/skill/auto_skills.py b/ms_agent/skill/auto_skills.py deleted file mode 100644 index 170c49c91..000000000 --- a/ms_agent/skill/auto_skills.py +++ /dev/null @@ -1,1908 +0,0 @@ -# flake8: noqa -# isort: skip_file -# yapf: disable -import asyncio -import logging -import os -import re -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union - -import json -from ms_agent.llm import LLM -from ms_agent.llm.utils import Message -from ms_agent.retriever.hybrid_retriever import HybridRetriever -from ms_agent.skill.container import (ExecutionInput, ExecutionOutput, - ExecutorType, SkillContainer) -from ms_agent.skill.loader import load_skills -from ms_agent.skill.prompts import (PROMPT_ANALYZE_EXECUTION_ERROR, - PROMPT_ANALYZE_QUERY_FOR_SKILLS, - PROMPT_BUILD_SKILLS_DAG, - PROMPT_DIRECT_SELECT_SKILLS, - PROMPT_FILTER_SKILLS_DEEP, - PROMPT_FILTER_SKILLS_FAST, - PROMPT_SKILL_ANALYSIS_PLAN, - PROMPT_SKILL_EXECUTION_COMMAND) -from ms_agent.skill.schema import SkillContext, SkillExecutionPlan, SkillSchema -from ms_agent.utils.logger import get_logger - -logger = get_logger() - - -def _configure_logger_to_dir(log_dir: Path) -> None: - """ - Configure the logger to output to a specific directory. - - Args: - log_dir: Directory path for log files. - """ - log_dir.mkdir(parents=True, exist_ok=True) - log_file = log_dir / 'ms_agent.log' - - # Get current log level from environment - log_level_str = os.getenv('LOG_LEVEL', 'INFO').upper() - log_level = getattr(logging, log_level_str, logging.INFO) - - # Update logger level to respect current LOG_LEVEL env var - logger.setLevel(log_level) - for handler in logger.handlers: - handler.setLevel(log_level) - - # Check if file handler for this path already exists - for handler in logger.handlers: - if isinstance(handler, logging.FileHandler): - if Path(handler.baseFilename).resolve() == log_file.resolve(): - return # Already configured - - # Remove existing file handlers and add new one - for handler in logger.handlers[:]: - if isinstance(handler, logging.FileHandler): - logger.removeHandler(handler) - - file_handler = logging.FileHandler(str(log_file), mode='a') - file_handler.setFormatter(logging.Formatter('[%(levelname)s:%(name)s] %(message)s')) - file_handler.setLevel(log_level) - logger.addHandler(file_handler) - logger.info(f'Logger configured to output to: {log_file}') - - -@dataclass -class SkillExecutionResult: - """ - Result of executing a single skill. - - Attributes: - skill_id: Identifier of the executed skill. - success: Whether execution was successful. - output: ExecutionOutput from container. - error: Error message if execution failed. - """ - skill_id: str - success: bool = False - output: Optional[ExecutionOutput] = None - error: Optional[str] = None - - -@dataclass -class DAGExecutionResult: - """ - Result of executing the entire skill DAG. - - Attributes: - success: Whether all skills executed successfully. - results: Dict mapping skill_id to SkillExecutionResult. - execution_order: Actual execution order (with parallel groups). - total_duration_ms: Total execution duration in milliseconds. - """ - success: bool = False - results: Dict[str, SkillExecutionResult] = field(default_factory=dict) - execution_order: List[Union[str, List[str]]] = field(default_factory=list) - total_duration_ms: float = 0.0 - - def get_skill_output(self, skill_id: str) -> Optional[ExecutionOutput]: - """Get output from a specific skill execution.""" - result = self.results.get(skill_id) - return result.output if result else None - - -class SkillAnalyzer: - """ - Progressive skill analyzer for incremental context loading. - - Implements two-phase analysis: - 1. Plan Phase: Analyze skill metadata + content to create execution plan - 2. Load Phase: Load only required resources based on plan - """ - - def __init__(self, llm: 'LLM'): - """ - Initialize skill analyzer. - - Args: - llm: LLM instance for analysis. - """ - self.llm = llm - - def _llm_generate(self, prompt: str) -> str: - """Generate LLM response from prompt.""" - from ms_agent.llm.utils import Message - messages = [Message(role='user', content=prompt)] - logger.debug(f'Input msg to LLM in SkillAnalyzer: {messages}') - response = self.llm.generate(messages=messages) - res = response.content if hasattr(response, - 'content') else str(response) - logger.debug(f'LLM response in SkillAnalyzer: {res}') - return res - - def _parse_json_response(self, response: str) -> Dict[str, Any]: - """Parse JSON from LLM response with robust extraction.""" - # Remove markdown code blocks if present - response = re.sub(r'```json\s*', '', response) - response = re.sub(r'```\s*$', '', response) - response = response.strip() - - # Try direct parsing first - try: - return json.loads(response) - except json.JSONDecodeError: - pass - - # Try to extract JSON object from response - try: - # Find the outermost JSON object - start = response.find('{') - if start != -1: - # Find matching closing brace - depth = 0 - for i, char in enumerate(response[start:], start): - if char == '{': - depth += 1 - elif char == '}': - depth -= 1 - if depth == 0: - json_str = response[start:i + 1] - return json.loads(json_str) - except json.JSONDecodeError: - pass - - # Try regex extraction as fallback - try: - json_match = re.search(r'\{[\s\S]*\}', response) - if json_match: - return json.loads(json_match.group()) - except json.JSONDecodeError: - pass - - logger.warning(f'Failed to parse JSON: {response[:500]}...') - return {} - - def analyze_skill_plan(self, - skill: SkillSchema, - query: str, - root_path: Path = None) -> SkillContext: - """ - Phase 1: Analyze skill and create execution plan. - - Only loads skill metadata and content (SKILL.md), not scripts/resources. - - Args: - skill: SkillSchema to analyze. - query: User's query to fulfill. - root_path: Root path for skill context. - - Returns: - SkillContext with execution plan (resources not yet loaded). - """ - # Create context with lazy loading - context = SkillContext( - skill=skill, - query=query, - root_path=root_path or skill.skill_path.parent) - - # Build prompt with skill overview (not full content) - prompt = PROMPT_SKILL_ANALYSIS_PLAN.format( - query=query, - skill_id=skill.skill_id, - skill_name=skill.name, - skill_description=skill.description, - skill_content=skill.content[:4000] if skill.content else '', - scripts_list=', '.join(context.get_scripts_list()) or 'None', - references_list=', '.join(context.get_references_list()) or 'None', - resources_list=', '.join(context.get_resources_list()) or 'None') - - response = self._llm_generate(prompt) - parsed = self._parse_json_response(response) - - # Build execution plan - plan = SkillExecutionPlan( - can_handle=parsed.get('can_handle', False), - plan_summary=parsed.get('plan_summary', ''), - steps=parsed.get('steps', []), - required_scripts=parsed.get('required_scripts', []), - required_references=parsed.get('required_references', []), - required_resources=parsed.get('required_resources', []), - required_packages=parsed.get('required_packages', []), - parameters=parsed.get('parameters', {}), - reasoning=parsed.get('reasoning', '')) - - context.plan = plan - context.spec.plan = plan.plan_summary - - logger.info( - f'Skill analysis plan: can_handle={plan.can_handle}, ' - f'scripts={plan.required_scripts}, refs={plan.required_references}, ' - f'packages={plan.required_packages}' - ) - - return context - - def load_skill_resources(self, context: SkillContext) -> SkillContext: - """ - Phase 2: Load resources based on execution plan. - - Args: - context: SkillContext with plan from Phase 1. - - Returns: - SkillContext with loaded resources. - """ - if not context.plan or not context.plan.can_handle: - logger.warning('No valid plan, skipping resource loading') - return context - - context.load_from_plan() - logger.info( - f'Loaded resources: scripts={len(context.scripts)}, ' - f'refs={len(context.references)}, res={len(context.resources)}') - - return context - - def generate_execution_commands( - self, context: SkillContext) -> List[Dict[str, Any]]: - """ - Generate execution commands from loaded context. - - Args: - context: SkillContext with loaded resources. - - Returns: - List of execution command dictionaries. - """ - if not context.plan: - return [] - - prompt = PROMPT_SKILL_EXECUTION_COMMAND.format( - query=context.query, - skill_id=context.skill.skill_id, - execution_plan=json.dumps( - { - 'plan_summary': context.plan.plan_summary, - 'steps': context.plan.steps, - 'parameters': context.plan.parameters, - }, - indent=2), - scripts_content=context.get_loaded_scripts_content(), - references_content=context.get_loaded_references_content()[:2000], - resources_content=context.get_loaded_resources_content()[:2000]) - - response = self._llm_generate(prompt) - parsed = self._parse_json_response(response) - - commands = parsed.get('commands', []) - - # Fallback: if no commands generated, try to use loaded scripts directly - if not commands: - # If no scripts loaded yet, try to load all available scripts - if not context.scripts and context.skill.scripts: - logger.info( - f'Loading all scripts as fallback: {[s.name for s in context.skill.scripts]}') - context.load_scripts() # Load all scripts - - if context.scripts: - logger.warning( - f'No commands generated, using {len(context.scripts)} loaded scripts as fallback') - # context.scripts is List[Dict] with keys: name, file, path, abs_path, content - for script_info in context.scripts: - script_name = script_info.get('name', '') - script_content = script_info.get('content', '') - if script_name.endswith('.py') and script_content: - commands.append({ - 'type': 'python_code', - 'code': script_content, - 'requirements': context.plan.required_packages if context.plan else [] - }) - elif script_name.endswith('.sh') and script_content: - commands.append({ - 'type': 'shell', - 'code': script_content - }) - - context.spec.tasks = json.dumps(commands, indent=2) - - return commands - - async def analyze_and_prepare( - self, - skill: SkillSchema, - query: str, - root_path: Path = None - ) -> Tuple[SkillContext, List[Dict[str, Any]]]: - """ - Complete progressive analysis: plan -> load -> generate commands. - - Args: - skill: SkillSchema to analyze. - query: User's query. - root_path: Root path for context. - - Returns: - Tuple of (SkillContext, execution_commands). - """ - # Phase 1: Create plan - context = await asyncio.to_thread(self.analyze_skill_plan, skill, - query, root_path) - - if not context.plan or not context.plan.can_handle: - return context, [] - - # Phase 2: Load resources - await asyncio.to_thread(self.load_skill_resources, context) - - # Phase 3: Generate commands - commands = await asyncio.to_thread(self.generate_execution_commands, - context) - - return context, commands - - -@dataclass -class SkillDAGResult: - """ - Result of AutoSkills run containing the skill execution DAG. - - Attributes: - dag: Adjacency list representation of skill dependencies. - execution_order: Topologically sorted list of skill_ids (sublists = parallel). - selected_skills: Dict of selected SkillSchema objects. - is_complete: Whether the skills are sufficient for the task. - clarification: Optional clarification question if skills are insufficient. - chat_response: Direct response if no skills needed (chat-only mode). - execution_result: Result of DAG execution (populated after execute_dag). - """ - dag: Dict[str, List[str]] = field(default_factory=dict) - execution_order: List[Union[str, List[str]]] = field(default_factory=list) - selected_skills: Dict[str, SkillSchema] = field(default_factory=dict) - is_complete: bool = False - clarification: Optional[str] = None - chat_response: Optional[str] = None - execution_result: Optional[DAGExecutionResult] = None - - def to_dict(self) -> Dict[str, Any]: - """Convert SkillDAGResult to dictionary.""" - return { - 'dag': - self.dag, - 'execution_order': - self.execution_order, - 'selected_skills': - {k: v.__dict__ - for k, v in self.selected_skills.items()}, - 'is_complete': - self.is_complete, - 'clarification': - self.clarification, - 'chat_response': - self.chat_response, - 'execution_result': - self.execution_result.__dict__ if self.execution_result else None, - } - - -class DAGExecutor: - """ - Executor for skill DAG with dependency-aware parallel execution. - - Handles execution order parsing, input/output linking between skills, - and parallel execution of independent skills. - Supports progressive skill analysis for incremental context loading. - """ - - def __init__(self, - container: SkillContainer, - skills: Dict[str, SkillSchema], - workspace_dir: Optional[Path] = None, - llm: 'LLM' = None, - enable_progressive_analysis: bool = True, - enable_self_reflection: bool = True, - max_retries: int = 3): - """ - Initialize DAG executor. - - Args: - container: SkillContainer for executing skills. - skills: Dict of skill_id to SkillSchema. - workspace_dir: Optional workspace directory for skill execution. - llm: LLM instance for progressive skill analysis. - enable_progressive_analysis: Whether to use progressive analysis. - enable_self_reflection: Whether to analyze errors and retry on failure. - max_retries: Maximum retry attempts for failed executions. - """ - self.container = container - self.skills = skills - self.workspace_dir = workspace_dir or container.workspace_dir - self.llm = llm - self.enable_progressive_analysis = enable_progressive_analysis and llm is not None - self.enable_self_reflection = enable_self_reflection and llm is not None - self.max_retries = max_retries - - # Skill analyzer for progressive analysis - self._analyzer: Optional[SkillAnalyzer] = None - if self.enable_progressive_analysis: - self._analyzer = SkillAnalyzer(llm) - - # Execution state: stores outputs keyed by skill_id - self._outputs: Dict[str, ExecutionOutput] = {} - - # Skill contexts from progressive analysis - self._contexts: Dict[str, SkillContext] = {} - - # Track execution attempts for retry logging - self._execution_attempts: Dict[str, int] = {} - - def _get_skill_dependencies(self, skill_id: str, - dag: Dict[str, List[str]]) -> List[str]: - """ - Get direct dependencies of a skill from the DAG. - - Args: - skill_id: The skill to get dependencies for. - dag: Adjacency list where dag[A] = [B, C] means A depends on B, C. - - Returns: - List of skill_ids that this skill depends on. - """ - return dag.get(skill_id, []) - - def _build_execution_input( - self, - skill_id: str, - dag: Dict[str, List[str]], - execution_input: Optional[ExecutionInput] = None) -> ExecutionInput: - """ - Build execution input for a skill, linking outputs from dependencies. - - Args: - skill_id: The skill to build input for. - dag: Skill dependency DAG. - execution_input: Optional user-provided input. - - Returns: - ExecutionInput with linked dependency outputs. - """ - base_input = execution_input or ExecutionInput() - - # Get outputs from upstream dependencies - dependencies = self._get_skill_dependencies(skill_id, dag) - upstream_data: Dict[str, Any] = {} - - for dep_id in dependencies: - if dep_id in self._outputs: - dep_output = self._outputs[dep_id] - # Pass stdout/return_value as upstream data - upstream_data[dep_id] = { - 'stdout': dep_output.stdout, - 'stderr': dep_output.stderr, - 'return_value': dep_output.return_value, - 'exit_code': dep_output.exit_code, - 'output_files': - {k: str(v) - for k, v in dep_output.output_files.items()}, - } - - # Inject upstream data into environment variables as JSON - env_vars = base_input.env_vars.copy() - if upstream_data: - env_vars['UPSTREAM_OUTPUTS'] = json.dumps(upstream_data) - # Also provide individual upstream references - for dep_id, data in upstream_data.items(): - safe_key = dep_id.replace('-', '_').replace('.', '_').replace('@', '_').replace('/', '_').upper() - if data.get('stdout'): - env_vars[f'UPSTREAM_{safe_key}_STDOUT'] = data[ - 'stdout'][:4096] - - return ExecutionInput( - args=base_input.args, - kwargs=base_input.kwargs, - env_vars=env_vars, - input_files=base_input.input_files, - stdin=base_input.stdin, - working_dir=base_input.working_dir, - requirements=base_input.requirements, - ) - - def _determine_executor_type(self, skill: SkillSchema) -> ExecutorType: - """ - Determine the executor type based on skill scripts. - - Args: - skill: SkillSchema to analyze. - - Returns: - ExecutorType for the skill's primary script. - """ - if not skill.scripts: - return ExecutorType.PYTHON_CODE - - # Check first script's extension - primary_script = skill.scripts[0] - ext = primary_script.type.lower() - - if ext in ['.py']: - return ExecutorType.PYTHON_SCRIPT - elif ext in ['.sh', '.bash']: - return ExecutorType.SHELL - elif ext in ['.js', '.mjs']: - return ExecutorType.JAVASCRIPT - else: - return ExecutorType.PYTHON_CODE - - async def _execute_single_skill( - self, - skill_id: str, - dag: Dict[str, List[str]], - execution_input: Optional[ExecutionInput] = None, - query: str = '') -> SkillExecutionResult: - """ - Execute a single skill with dependency-linked input. - - Uses progressive analysis if enabled: - 1. Analyze skill to create execution plan - 2. Load only required resources - 3. Generate and execute commands - - Args: - skill_id: ID of the skill to execute. - dag: Skill dependency DAG. - execution_input: Optional user-provided input. - query: User query for progressive analysis. - - Returns: - SkillExecutionResult with execution outcome. - """ - skill = self.skills.get(skill_id) - if not skill: - return SkillExecutionResult( - skill_id=skill_id, - success=False, - error=f'Skill not found: {skill_id}') - - try: - # Build base input with upstream outputs - exec_input = self._build_execution_input(skill_id, dag, execution_input) - - # Use progressive analysis if enabled - if self.enable_progressive_analysis and self._analyzer: - return await self._execute_with_progressive_analysis( - skill, skill_id, exec_input, query) - - # Fallback: direct execution without progressive analysis - return await self._execute_direct(skill, skill_id, exec_input) - - except Exception as e: - logger.error(f'Skill execution failed for {skill_id}: {e}') - return SkillExecutionResult( - skill_id=skill_id, success=False, error=str(e)) - - async def _execute_with_progressive_analysis( - self, skill: SkillSchema, skill_id: str, - exec_input: ExecutionInput, query: str) -> SkillExecutionResult: - """ - Execute skill using progressive analysis. - - Args: - skill: SkillSchema to execute. - skill_id: Skill identifier. - exec_input: Execution input with upstream data. - query: User query for context. - - Returns: - SkillExecutionResult with execution outcome. - """ - # Phase 1 & 2: Analyze and load resources - # Use skill's directory as root_path for proper file resolution - context, commands = await self._analyzer.analyze_and_prepare( - skill, query, skill.skill_path) - - # Store context for reference - self._contexts[skill_id] = context - - # Mount skill directory in container for sandbox access - self.container.mount_skill_directory(skill_id, skill.skill_path) - - if not context.plan or not context.plan.can_handle: - return SkillExecutionResult( - skill_id=skill_id, - success=False, - error= - f'Skill cannot handle query: {context.plan.reasoning if context.plan else "No plan"}' - ) - - if not commands: - return SkillExecutionResult( - skill_id=skill_id, - success=False, - error='No execution commands generated') - - # Phase 3: Execute commands with retry support for all types - outputs: List[ExecutionOutput] = [] - for cmd in commands: - cmd_type = cmd.get('type', 'python_code') - - # Use retry mechanism for all command types - if self.enable_self_reflection: - output = await self._execute_command_with_retry( - cmd=cmd, - cmd_type=cmd_type, - skill_id=skill_id, - exec_input=exec_input, - context=context, - skill=skill, - query=query) - else: - # Self-reflection disabled - execute without retry - output = await self._execute_command(cmd, cmd_type, skill_id, - exec_input, context) - outputs.append(output) - - if output.exit_code != 0: - # Stop on first failure (after retries exhausted) - break - - # Merge outputs - final_output = self._merge_outputs(outputs) - - # Store output for downstream skills - self._outputs[skill_id] = final_output - self.container.spec.link_upstream(skill_id, final_output) - - return SkillExecutionResult( - skill_id=skill_id, - success=(final_output.exit_code == 0), - output=final_output, - error=final_output.stderr if final_output.exit_code != 0 else None) - - async def _execute_direct( - self, skill: SkillSchema, skill_id: str, - exec_input: ExecutionInput) -> SkillExecutionResult: - """ - Execute skill directly without progressive analysis. - - Args: - skill: SkillSchema to execute. - skill_id: Skill identifier. - exec_input: Execution input. - - Returns: - SkillExecutionResult with execution outcome. - """ - # Mount skill directory for sandbox access - self.container.mount_skill_directory(skill_id, skill.skill_path) - - executor_type = self._determine_executor_type(skill) - - if skill.scripts: - script_path = skill.scripts[0].path - output = await self.container.execute( - executor_type=executor_type, - skill_id=skill_id, - script_path=script_path, - input_spec=exec_input) - else: - output = await self.container.execute_python_code( - code=skill.content or '# No executable content', - skill_id=skill_id, - input_spec=exec_input) - - self._outputs[skill_id] = output - self.container.spec.link_upstream(skill_id, output) - - return SkillExecutionResult( - skill_id=skill_id, - success=(output.exit_code == 0), - output=output, - error=output.stderr if output.exit_code != 0 else None) - - async def _execute_command(self, cmd: Dict[str, Any], cmd_type: str, - skill_id: str, exec_input: ExecutionInput, - context: SkillContext) -> ExecutionOutput: - """ - Execute a single command from progressive analysis. - - Args: - cmd: Command dictionary. - cmd_type: Type of command (python_script, shell, etc.). - skill_id: Skill identifier. - exec_input: Base execution input. - context: SkillContext with loaded resources. - - Returns: - ExecutionOutput from command execution. - """ - # Merge parameters into input - params = cmd.get('parameters', {}) - # Use skill directory as working directory for proper file access - working_dir = exec_input.working_dir or context.skill_dir - - # Collect all requirements: from plan, command, and input - all_requirements = [] - if context.plan and context.plan.required_packages: - all_requirements.extend(context.plan.required_packages) - all_requirements.extend(cmd.get('requirements', [])) - all_requirements.extend(exec_input.requirements) - # Deduplicate while preserving order - seen = set() - unique_requirements = [] - for req in all_requirements: - if req not in seen: - seen.add(req) - unique_requirements.append(req) - - merged_input = ExecutionInput( - args=exec_input.args + list(params.values()), - kwargs={ - **exec_input.kwargs, - **params - }, - env_vars={ - **exec_input.env_vars, - 'SKILL_DIR': str(context.skill_dir), - **{k.upper(): str(v) - for k, v in params.items()} - }, - input_files=exec_input.input_files, - stdin=exec_input.stdin, - working_dir=working_dir, - requirements=unique_requirements) - - if cmd_type == 'python_script': - script_path = cmd.get('path') - if script_path: - # Resolve path relative to skill directory - full_path = context.skill_dir / script_path - if not full_path.exists(): - full_path = context.root_path / script_path - return await self.container.execute_python_script( - script_path=full_path, - skill_id=skill_id, - input_spec=merged_input) - else: - code = cmd.get('code', '') - return await self.container.execute_python_code( - code=code, skill_id=skill_id, input_spec=merged_input) - - elif cmd_type == 'python_code': - code = cmd.get('code', '') - return await self.container.execute_python_code( - code=code, skill_id=skill_id, input_spec=merged_input) - - elif cmd_type == 'shell': - command = cmd.get('code') or cmd.get('command', '') - return await self.container.execute_shell( - command=command, skill_id=skill_id, input_spec=merged_input) - - elif cmd_type == 'javascript': - code = cmd.get('code', '') - return await self.container.execute_javascript( - code=code, skill_id=skill_id, input_spec=merged_input) - - else: - # Default to python code - code = cmd.get('code', '') - return await self.container.execute_python_code( - code=code, skill_id=skill_id, input_spec=merged_input) - - async def _execute_command_with_retry( - self, cmd: Dict[str, Any], cmd_type: str, - skill_id: str, exec_input: ExecutionInput, - context: SkillContext, skill: SkillSchema, - query: str) -> ExecutionOutput: - """ - Execute a command with retry logic for all execution types. - - Always retries up to max_retries times. Uses LLM analysis to improve - the fix between retries when self-reflection is enabled. - - Args: - cmd: Command dictionary. - cmd_type: Type of command. - skill_id: Skill identifier. - exec_input: Base execution input. - context: SkillContext. - skill: SkillSchema for error analysis. - query: User query for context. - - Returns: - ExecutionOutput from command execution. - """ - current_cmd = cmd.copy() - last_output = None - - for attempt in range(1, self.max_retries + 1): - self._execution_attempts[skill_id] = attempt - logger.info(f'[{skill_id}] Execution attempt {attempt}/{self.max_retries}') - - # Execute the command - output = await self._execute_command( - current_cmd, cmd_type, skill_id, exec_input, context) - last_output = output - - # Check if successful - if output.exit_code == 0: - if attempt > 1: - logger.info( - f'[{skill_id}] Execution succeeded after {attempt} attempts') - return output - - # Collect error info - error_msg = output.stderr[:500] if output.stderr else 'Unknown error' - logger.warning(f'[{skill_id}] Attempt {attempt} failed: {error_msg[:200]}') - - # Last attempt - no need to analyze - if attempt >= self.max_retries: - logger.warning( - f'[{skill_id}] Max retries ({self.max_retries}) reached') - continue - - # Try to analyze and fix if self-reflection is enabled - if self.enable_self_reflection and cmd_type in ('python_code', 'python_script'): - code = current_cmd.get('code', '') - if code: - logger.info(f'[{skill_id}] Analyzing error for retry...') - analysis = self._analyze_execution_error( - skill=skill, - failed_code=code, - output=output, - query=query, - attempt=attempt) - - error_info = analysis.get('error_analysis', {}) - is_fixable = error_info.get('is_fixable', False) - fixed_code = analysis.get('fixed_code') - additional_reqs = analysis.get('additional_requirements', []) - - logger.info( - f'[{skill_id}] Error analysis: type={error_info.get("error_type")}, ' - f'fixable={is_fixable}') - - # Apply fix if available - if is_fixable and fixed_code: - current_cmd = current_cmd.copy() - current_cmd['code'] = fixed_code - logger.info(f'[{skill_id}] Applying fix') - - # Add additional requirements - if additional_reqs: - logger.info(f'[{skill_id}] Adding requirements: {additional_reqs}') - exec_input = ExecutionInput( - args=exec_input.args, - kwargs=exec_input.kwargs, - env_vars=exec_input.env_vars, - input_files=exec_input.input_files, - working_dir=exec_input.working_dir, - requirements=list(set(exec_input.requirements + additional_reqs))) - else: - logger.info(f'[{skill_id}] Retrying without code modification') - - logger.error(f'[{skill_id}] All {self.max_retries} attempts failed') - return last_output - - def _merge_outputs(self, - outputs: List[ExecutionOutput]) -> ExecutionOutput: - """Merge multiple execution outputs into one.""" - if not outputs: - return ExecutionOutput() - if len(outputs) == 1: - return outputs[0] - - # Merge all outputs - merged_stdout = '\n'.join(o.stdout for o in outputs if o.stdout) - merged_stderr = '\n'.join(o.stderr for o in outputs if o.stderr) - final_exit_code = next( - (o.exit_code for o in outputs if o.exit_code != 0), 0) - total_duration = sum(o.duration_ms for o in outputs) - - # Merge output files - merged_files = {} - for o in outputs: - merged_files.update(o.output_files) - - return ExecutionOutput( - stdout=merged_stdout, - stderr=merged_stderr, - exit_code=final_exit_code, - output_files=merged_files, - duration_ms=total_duration) - - def _analyze_execution_error( - self, - skill: SkillSchema, - failed_code: str, - output: ExecutionOutput, - query: str, - attempt: int) -> Dict[str, Any]: - """ - Analyze failed execution and generate a fix using LLM. - - Args: - skill: The skill that failed. - failed_code: The code that failed. - output: ExecutionOutput with error details. - query: Original user query. - attempt: Current retry attempt number. - - Returns: - Dict with error analysis and fixed code. - """ - if not self.llm: - return {'error_analysis': {'is_fixable': False}, - 'fixed_code': None} - - prompt = PROMPT_ANALYZE_EXECUTION_ERROR.format( - query=query, - skill_id=skill.skill_id, - skill_name=skill.name, - failed_code=failed_code[:8000], # Limit code length - stderr=output.stderr[:3000] if output.stderr else '', - stdout=output.stdout[:1000] if output.stdout else '', - attempt=attempt, - max_attempts=self.max_retries) - - try: - response = self.llm.generate( - messages=[Message(role='user', content=prompt)]) - # Parse JSON response - handle different response formats - response_text = (response.content if hasattr(response, 'content') - else str(response)).strip() - # Extract JSON from response - json_match = re.search(r'\{[\s\S]*\}', response_text) - if json_match: - return json.loads(json_match.group()) - except Exception as e: - logger.warning(f'Error analyzing execution failure: {e}') - - return {'error_analysis': {'is_fixable': False}, 'fixed_code': None} - - async def _execute_parallel_group( - self, - skill_ids: List[str], - dag: Dict[str, List[str]], - execution_input: Optional[ExecutionInput] = None, - query: str = '') -> List[SkillExecutionResult]: - """ - Execute a group of skills in parallel. - - Args: - skill_ids: List of skill_ids to execute concurrently. - dag: Skill dependency DAG. - execution_input: Optional user-provided input. - query: User query for progressive analysis. - - Returns: - List of SkillExecutionResult for each skill. - """ - tasks = [ - self._execute_single_skill(sid, dag, execution_input, query) - for sid in skill_ids - ] - return await asyncio.gather(*tasks) - - async def execute(self, - dag: Dict[str, List[str]], - execution_order: List[Union[str, List[str]]], - execution_input: Optional[ExecutionInput] = None, - stop_on_failure: bool = True, - query: str = '') -> DAGExecutionResult: - """ - Execute the skill DAG according to execution order. - - Execution order format: [skill1, skill2, [skill3, skill4], skill5, ...] - - Single string items are executed sequentially - - List items (sublists) are executed in parallel - - Args: - dag: Skill dependency DAG (adjacency list). - execution_order: Ordered list with parallel groups as sublists. - execution_input: Optional initial input for all skills. - stop_on_failure: Whether to stop execution on first failure. - query: User query for progressive skill analysis. - - Returns: - DAGExecutionResult with all execution outcomes. - """ - import time - start_time = time.time() - - results: Dict[str, SkillExecutionResult] = {} - actual_order: List[Union[str, List[str]]] = [] - all_success = True - - for item in execution_order: - if isinstance(item, list): - # Parallel execution group - group_results = await self._execute_parallel_group( - item, dag, execution_input, query) - for res in group_results: - results[res.skill_id] = res - if not res.success: - all_success = False - actual_order.append(item) - - if not all_success and stop_on_failure: - logger.warning( - f'Stopping DAG execution due to failure in parallel group: {item}' - ) - break - else: - # Sequential execution - result = await self._execute_single_skill( - item, dag, execution_input, query) - results[result.skill_id] = result - actual_order.append(item) - - if not result.success: - all_success = False - if stop_on_failure: - logger.warning( - f'Stopping DAG execution due to failure: {item}') - break - - total_duration = (time.time() - start_time) * 1000 - - return DAGExecutionResult( - success=all_success, - results=results, - execution_order=actual_order, - total_duration_ms=total_duration) - - def get_skill_context(self, skill_id: str) -> Optional[SkillContext]: - """Get the skill context from progressive analysis.""" - return self._contexts.get(skill_id) - - def get_all_contexts(self) -> Dict[str, SkillContext]: - """Get all skill contexts from progressive analysis.""" - return self._contexts.copy() - - def get_executed_skill_ids(self) -> List[str]: - """Get list of skill_ids that have been executed with contexts.""" - return list(self._contexts.keys()) - - -class AutoSkills: - """ - Automatic skill retrieval and DAG construction for user queries. - - Uses hybrid retrieval (dense + sparse) to find relevant skills, - with LLM-based analysis and reflection loop for completeness checking. - Supports DAG-based skill execution with dependency management. - """ - - def __init__(self, - skills: Union[str, List[str], List[SkillSchema]], - llm: LLM, - enable_retrieve: Union[bool, None] = None, - retrieve_args: Dict[str, Any] = None, - max_candidate_skills: int = 10, - max_retries: int = 3, - work_dir: Optional[Union[str, Path]] = None, - use_sandbox: bool = True, - **kwargs): - """ - Initialize AutoSkills with skills corpus and retriever. - - Args: - skills: Path(s) to skill directories or list of SkillSchema. - Alternatively, single repo_id or list of repo_ids from ModelScope. - e.g. skills='ms-agent/claude_skills', refer to `https://modelscope.cn/models/ms-agent/claude_skills` - llm: LLM instance for query analysis and evaluation. - enable_retrieve: If True, use HybridRetriever for skill search. - If False, put all skills into LLM context for direct selection. - If None, enable search only if skills > 10 automatically. - retrieve_args: Additional arguments for HybridRetriever. - Attributes: - top_k: Number of top results to retrieve per query. - min_score: Minimum score threshold for retrieval. - max_candidate_skills: Maximum number of candidate skills to consider. - max_retries: Maximum retry attempts for failed executions for each skill. - work_dir: Working directory for skill execution. - use_sandbox: Whether to use Docker sandbox for execution. - - Examples: - >>> from omegaconf import DictConfig - >>> from ms_agent.llm.openai_llm import OpenAI - >>> from ms_agent.skill.auto_skills import SkillDAGResult - >>> config = DictConfig( - { - 'llm': { - 'service': 'openai', - 'model': 'gpt-4', - 'openai_api_key': 'your-api-key', - 'openai_base_url': 'your-base-url' - } - } - >>> ) - >>> llm_instance = OpenAI.from_config(config) - >>> auto_skills = AutoSkills( - skills='/path/to/skills', - llm=llm_instance, - ) - >>> async def main(): - result: SkillDAGResult = await auto_skills.run(query='Analyze sales data and generate mock report for Nvidia Q4 2025 in PDF format.') - print(result.execution_result) - >>> import asyncio - >>> asyncio.run(main()) - """ - # Dict of - self.all_skills: Dict[str, SkillSchema] = load_skills(skills=skills) - logger.info(f'Loaded {len(self.all_skills)} skills from {skills}') - - self.llm = llm - self.enable_retrieve = len( - self.all_skills) > 10 if enable_retrieve is None else enable_retrieve - retrieve_args = retrieve_args or {} - self.top_k = retrieve_args.get('top_k', 3) - self.min_score = retrieve_args.get('min_score', 0.8) - self.max_candidate_skills = max_candidate_skills - self.max_retries = max_retries - self.work_dir = Path(work_dir) if work_dir else None - self.use_sandbox = use_sandbox - self.kwargs = kwargs - - if self.use_sandbox: - from ms_agent.utils.docker_utils import is_docker_daemon_running - if not is_docker_daemon_running(): - raise RuntimeError( - 'Docker daemon is not running. Please start Docker to use sandbox mode.' - ) - - # Configure logger to output to work_dir/logs if work_dir is specified - if self.work_dir: - _configure_logger_to_dir(self.work_dir / 'logs') - - # Build corpus and skill_id mapping - self.corpus: List[str] = [] - self.corpus_to_skill_id: Dict[str, str] = {} - self._build_corpus() - - # Initialize retriever only if search is enabled - self.retriever: Optional[HybridRetriever] = None - if self.enable_retrieve and self.corpus: - self.retriever = HybridRetriever(corpus=self.corpus, **kwargs) - - # Container and executor (lazy initialization) - self._container: Optional[SkillContainer] = None - self._executor: Optional[DAGExecutor] = None - - def _build_corpus(self): - """Build corpus from skills for retriever indexing.""" - for skill_id, skill in self.all_skills.items(): - # Concatenate skill_id, name, description as corpus document - doc = f'[{skill_id}] {skill.name}: {skill.description}' - self.corpus.append(doc) - self.corpus_to_skill_id[doc] = skill_id - - def _extract_skill_id_from_doc(self, doc: str) -> Optional[str]: - """Extract skill_id from corpus document string.""" - # First try direct lookup - if doc in self.corpus_to_skill_id: - return self.corpus_to_skill_id[doc] - # Fallback: extract from [skill_id] pattern - match = re.match(r'\[([^\]]+)\]', doc) - return match.group(1) if match else None - - def _parse_json_response(self, response: str) -> Dict[str, Any]: - """Parse JSON from LLM response with robust extraction.""" - # Remove markdown code blocks if present - response = re.sub(r'```json\s*', '', response) - response = re.sub(r'```\s*$', '', response) - response = response.strip() - - # Try direct parsing first - try: - return json.loads(response) - except json.JSONDecodeError: - pass - - # Try to extract JSON object from response - try: - # Find the outermost JSON object - start = response.find('{') - if start != -1: - # Find matching closing brace - depth = 0 - for i, char in enumerate(response[start:], start): - if char == '{': - depth += 1 - elif char == '}': - depth -= 1 - if depth == 0: - json_str = response[start:i + 1] - return json.loads(json_str) - except json.JSONDecodeError: - pass - - # Try regex extraction as fallback - try: - json_match = re.search(r'\{[\s\S]*\}', response) - if json_match: - return json.loads(json_match.group()) - except json.JSONDecodeError: - pass - - logger.warning(f'Failed to parse JSON response: {response[:300]}...') - return {} - - def _get_skills_overview(self, limit: int = 20) -> str: - """Generate a brief overview of all available skills.""" - lines = [] - for skill_id, skill in self.all_skills.items(): - lines.append( - f'- [{skill_id}] {skill.name}: {skill.description[:200]}') - return '\n'.join(lines[:limit]) # Limit to avoid token overflow - - def _get_all_skills_context(self) -> str: - """Generate full context of all skills for direct LLM selection.""" - lines = [] - for skill_id, skill in self.all_skills.items(): - lines.append(f'- [{skill_id}] {skill.name}\n {skill.description}') - return '\n'.join(lines) - - def _format_retrieved_skills(self, skill_ids: Set[str]) -> str: - """Format retrieved skills for LLM prompt.""" - lines = [] - for skill_id in skill_ids: - if skill_id in self.all_skills: - skill = self.all_skills[skill_id] - lines.append( - f'- [{skill_id}] {skill.name}\n {skill.description}\n Main Content: {skill.content[:3000]}') - return '\n'.join(lines) - - def _llm_generate(self, prompt: str) -> str: - """Generate LLM response from prompt.""" - messages = [Message(role='user', content=prompt)] - logger.debug(f'Input msg to LLM: {messages}') # set env `LOG_LEVEL=DEBUG` - response = self.llm.generate(messages=messages) - res = response.content if hasattr(response, - 'content') else str(response) - logger.debug('LLM response: {}'.format(res)) - return res - - async def _async_llm_generate(self, prompt: str) -> str: - """Async wrapper for LLM generation.""" - return await asyncio.to_thread(self._llm_generate, prompt) - - def _analyze_query( - self, - query: str, - ) -> Tuple[bool, str, List[str], Optional[str]]: - """ - Analyze user query to determine if skills are needed. - - Args: - query: User's original query. - - Returns: - Tuple of (needs_skills, intent_summary, skill_queries, chat_response). - """ - prompt = PROMPT_ANALYZE_QUERY_FOR_SKILLS.format( - query=query, skills_overview=self._get_skills_overview()) - response = self._llm_generate(prompt) - parsed = self._parse_json_response(response) - - needs_skills = parsed.get('needs_skills', True) - intent = parsed.get('intent_summary', query) - queries = parsed.get('skill_queries', [query]) - chat_response = parsed.get('chat_response') - return needs_skills, intent, queries if queries else [query - ], chat_response - - async def _async_retrieve_skills(self, queries: List[str]) -> Set[str]: - """ - Retrieve skills for multiple queries in parallel. - - Args: - queries: List of search queries. - - Returns: - Set of unique skill_ids from all queries. - """ - if not self.retriever: - return set() - - # Run parallel async searches - tasks = [ - self.retriever.async_search( - query=q, top_k=self.top_k, min_score=self.min_score) - for q in queries - ] - results = await asyncio.gather(*tasks) - - # Collect unique skill_ids - skill_ids = set() - for result_list in results: - for doc, score in result_list: - skill_id = self._extract_skill_id_from_doc(doc) - if skill_id: - skill_ids.add(skill_id) - return skill_ids - - def _filter_skills( - self, - query: str, - skill_ids: Set[str], - mode: Literal['fast', 'deep'] = 'fast' - ) -> Set[str]: - """ - Filter skills based on relevance to the query. - - Args: - query: User's query. - skill_ids: Set of candidate skill_ids. - mode: 'fast' for name+description only, 'deep' for full content analysis. - - Returns: - Set of filtered skill_ids that are relevant. - """ - if len(skill_ids) <= 1: - return skill_ids - - # Format candidate skills based on mode - if mode == 'deep': - # Include name, description, and content (truncated) - skill_entries = [] - for sid in skill_ids: - if sid not in self.all_skills: - continue - skill = self.all_skills[sid] - content = skill.content[:3000] if skill.content else '' - entry = ( - f'### [{sid}] {skill.name}\n' - f'**Description**: {skill.description}\n' - f'**Content**: {content}' - ) - skill_entries.append(entry) - candidate_skills_text = '\n\n'.join(skill_entries) - prompt = PROMPT_FILTER_SKILLS_DEEP.format( - query=query, - candidate_skills=candidate_skills_text) - else: - # Fast mode: name and description only - candidate_skills_text = '\n'.join([ - f'- [{sid}] {self.all_skills[sid].name}: {self.all_skills[sid].description}' - for sid in skill_ids if sid in self.all_skills - ]) - prompt = PROMPT_FILTER_SKILLS_FAST.format( - query=query, - candidate_skills=candidate_skills_text) - - response = self._llm_generate(prompt) - parsed = self._parse_json_response(response) - - filtered_ids = parsed.get('filtered_skill_ids', list(skill_ids)) - - # For deep mode, also check skill_analysis for can_execute - if mode == 'deep': - skill_analysis = parsed.get('skill_analysis', {}) - final_ids = [] - for sid in filtered_ids: - analysis = skill_analysis.get(sid, {}) - # Keep skill if can_execute is True or not specified - if analysis.get('can_execute', True): - final_ids.append(sid) - else: - logger.info( - f'Removing skill [{sid}]: cannot execute - ' - f'{analysis.get("reason", "")[:200]}' - ) - filtered_ids = final_ids - - logger.info( - f'Filter ({mode}): {len(skill_ids)} -> {len(filtered_ids)} skills. ' - f'Reason: {parsed.get("reasoning", "")[:1000]}' - ) - - return set(filtered_ids) - - def _build_dag(self, query: str, skill_ids: Set[str]) -> Dict[str, Any]: - """ - Filter skills and build execution DAG. - - Performs deep filtering and DAG construction in one LLM call. - - Args: - query: Original user query. - skill_ids: Set of candidate skill_ids. - - Returns: - Dict containing 'filtered_skill_ids', 'dag', and 'execution_order'. - """ - skills_info = self._format_retrieved_skills(skill_ids) - prompt = PROMPT_BUILD_SKILLS_DAG.format( - query=query, selected_skills=skills_info) - response = self._llm_generate(prompt) - parsed = self._parse_json_response(response) - - # Get filtered skills and validate they exist in input - raw_filtered = parsed.get('filtered_skill_ids', list(skill_ids)) - filtered_ids = set(sid for sid in raw_filtered if sid in skill_ids) - - # If no valid IDs returned, keep all input skills - if not filtered_ids: - logger.warning('No valid skill IDs in LLM response, keeping all input skills') - filtered_ids = skill_ids - - logger.info(f'DAG filter: {len(skill_ids)} -> {len(filtered_ids)} skills') - - # Validate and clean DAG - only keep valid skill IDs - raw_dag = parsed.get('dag', {}) - dag = {} - for sid, deps in raw_dag.items(): - if sid in filtered_ids: - # Filter dependencies to only valid skill IDs - valid_deps = [d for d in deps if d in filtered_ids] - dag[sid] = valid_deps - - # Ensure all filtered skills are in DAG - for sid in filtered_ids: - if sid not in dag: - dag[sid] = [] - - # Validate execution_order - only keep valid skill IDs - raw_order = parsed.get('execution_order', []) - order = self._validate_execution_order(raw_order, filtered_ids) - - # Fallback: derive execution_order from DAG using topological sort - if not order and filtered_ids: - order = self._topological_sort_dag(dag) - logger.info(f'Derived execution_order from DAG: {order}') - - return { - 'filtered_skill_ids': filtered_ids, - 'dag': dag, - 'execution_order': order - } - - def _validate_execution_order( - self, - raw_order: List[Union[str, List[str]]], - valid_ids: Set[str] - ) -> List[Union[str, List[str]]]: - """ - Validate execution order, keeping only valid skill IDs. - - Args: - raw_order: Raw execution order from LLM. - valid_ids: Set of valid skill IDs. - - Returns: - Validated execution order with only valid skill IDs. - """ - result = [] - for item in raw_order: - if isinstance(item, list): - valid_group = [sid for sid in item if sid in valid_ids] - if valid_group: - if len(valid_group) == 1: - result.append(valid_group[0]) - else: - result.append(valid_group) - elif item in valid_ids: - result.append(item) - return result - - def _topological_sort_dag(self, dag: Dict[str, List[str]]) -> List[str]: - """ - Perform topological sort on DAG to get execution order. - - Args: - dag: Adjacency list where dag[A] = [B, C] means A depends on B, C. - - Returns: - Topologically sorted list of skill IDs (dependencies first). - """ - if not dag: - return [] - - # Calculate in-degree for each node - in_degree = {node: 0 for node in dag} - for node, deps in dag.items(): - for dep in deps: - if dep in in_degree: - pass # dep is a dependency, node depends on it - # Count how many nodes depend on this node - for node, deps in dag.items(): - for dep in deps: - if dep not in in_degree: - in_degree[dep] = 0 - - # Recalculate: in dag[A] = [B], A depends on B, so B must come before A - # We need to build reverse mapping - in_degree = {node: 0 for node in dag} - for dep in set(d for deps in dag.values() for d in deps): - if dep not in in_degree: - in_degree[dep] = 0 - - for node, deps in dag.items(): - in_degree[node] = len(deps) - - # Start with nodes that have no dependencies - queue = [node for node, degree in in_degree.items() if degree == 0] - result = [] - - while queue: - # Sort for deterministic order - queue.sort() - node = queue.pop(0) - result.append(node) - - # Reduce in-degree for nodes that depend on this node - for other_node, deps in dag.items(): - if node in deps and other_node in in_degree: - in_degree[other_node] -= 1 - if in_degree[other_node] == 0: - queue.append(other_node) - - # If not all nodes processed, there might be a cycle or disconnected nodes - remaining = set(dag.keys()) - set(result) - if remaining: - logger.warning(f'Topological sort incomplete, adding remaining: {remaining}') - result.extend(sorted(remaining)) - - return result - - def _filter_execution_order( - self, - execution_order: List[Union[str, List[str]]], - valid_skill_ids: Set[str] - ) -> List[Union[str, List[str]]]: - """ - Filter execution order to only include valid skill_ids. - - Args: - execution_order: Original execution order (may contain parallel groups). - valid_skill_ids: Set of skill_ids that should be kept. - - Returns: - Filtered execution order with only valid skills. - """ - filtered = [] - for item in execution_order: - if isinstance(item, list): - # Parallel group: filter and keep if any remain - filtered_group = [sid for sid in item if sid in valid_skill_ids] - if filtered_group: - if len(filtered_group) == 1: - filtered.append(filtered_group[0]) - else: - filtered.append(filtered_group) - elif item in valid_skill_ids: - filtered.append(item) - return filtered - - def _direct_select_skills(self, query: str) -> SkillDAGResult: - """ - Directly select skills using LLM with all skills in context. - - Used when enable_retrieve=False. Puts all skills into LLM context - and lets LLM select relevant skills and build DAG in one call. - - Args: - query: User's task query. - - Returns: - SkillDAGResult containing the skill execution DAG. - """ - prompt = PROMPT_DIRECT_SELECT_SKILLS.format( - query=query, all_skills=self._get_all_skills_context()) - response = self._llm_generate(prompt) - parsed = self._parse_json_response(response) - - # Handle chat-only response - needs_skills = parsed.get('needs_skills', True) - chat_response = parsed.get('chat_response') - - if not needs_skills: - logger.info('Chat-only query, no skills needed') - if chat_response: - print(f'\n[Chat Response]\n{chat_response}\n') - return SkillDAGResult( - is_complete=True, chat_response=chat_response) - - # Extract selected skills and DAG - selected_ids = parsed.get('selected_skill_ids', []) - dag = parsed.get('dag', {}) - order = parsed.get('execution_order', []) - - # Validate skill_ids exist - valid_ids = {sid for sid in selected_ids if sid in self.all_skills} - selected = {sid: self.all_skills[sid] for sid in valid_ids} - - logger.info(f'Direct selection: {valid_ids}') - - return SkillDAGResult( - dag=dag, - execution_order=order, - selected_skills=selected, - is_complete=bool(valid_ids), - clarification=None if valid_ids else 'No relevant skills found.') - - async def get_skill_dag(self, query: str) -> SkillDAGResult: - """ - Run the autonomous skill retrieval and DAG construction loop. - - Iteratively retrieves skills, evaluates completeness with reflection, - and builds execution DAG. Loop terminates when: - - Query is chat-only (no skills needed) - - Max iterations reached - - Skills are deemed complete for the task - - Clarification from user is needed - - Args: - query: User's task query. - - Returns: - SkillDAGResult containing the skill execution DAG. - """ - if not self.all_skills: - logger.warning('No skills loaded, returning empty result') - return SkillDAGResult() - - # Direct selection mode: put all skills into LLM context - if not self.enable_retrieve: - logger.info('Direct selection mode (enable_retrieve=False)') - return self._direct_select_skills(query) - - # Search mode: use HybridRetriever - if not self.retriever: - logger.warning('Retriever not initialized, returning empty result') - return SkillDAGResult() - - # Step 1: Analyze query to determine if skills are needed - needs_skills, intent, skill_queries, chat_response = self._analyze_query( - query) - logger.info(f'Needs skills: {needs_skills}, Intent: {intent}') - - # If chat-only, return empty DAG with chat response - if not needs_skills: - logger.info('Chat-only query, no skills needed') - if chat_response: - print(f'\n[Chat Response]\n{chat_response}\n') - return SkillDAGResult( - is_complete=True, chat_response=chat_response) - - clarification: Optional[str] = None - - # Step 2: Retrieve skills - collected_skills = await self._async_retrieve_skills(skill_queries) - logger.info(f'Retrieved skills: {collected_skills}') - - if not collected_skills: - clarification = 'No relevant skills found. Please provide more details.' - return SkillDAGResult( - is_complete=False, clarification=clarification) - - # Limit candidate skills to max_candidate_skills - if len(collected_skills) > self.max_candidate_skills: - logger.warning( - f'Too many candidate skills ({len(collected_skills)}), ' - f'limiting to {self.max_candidate_skills}' - ) - collected_skills = set(list(collected_skills)[:self.max_candidate_skills]) - - # Step 3: Fast filter by name/description - collected_skills = self._filter_skills(query, collected_skills, mode='fast') - logger.info(f'After fast filter: {collected_skills}') - - if len(collected_skills) > 1: - collected_skills = self._filter_skills(query, collected_skills, mode='deep') - logger.info(f'After deep filter: {collected_skills}') - - if not collected_skills: - clarification = 'No relevant skills found after filtering. Please refine your query.' - return SkillDAGResult( - is_complete=False, clarification=clarification) - - # Step 4: Build DAG with integrated deep filtering - dag_result = self._build_dag(query, collected_skills) - - filtered_ids = dag_result.get('filtered_skill_ids', collected_skills) - skills_dag: Dict[str, Any] = dag_result.get('dag', {}) - execution_order: List[str] = dag_result.get('execution_order', []) - - if not filtered_ids: - clarification = 'No relevant skills found after filtering. Please refine your query.' - return SkillDAGResult( - is_complete=False, clarification=clarification) - - # Build selected skills dict from filtered results - selected = { - sid: self.all_skills[sid] - for sid in filtered_ids if sid in self.all_skills - } - - logger.info( - f'Final DAG built with skills: {skills_dag}, execution order: {execution_order}' - ) - - return SkillDAGResult( - dag=skills_dag, - execution_order=execution_order, - selected_skills=selected, - is_complete=(clarification is None), - clarification=clarification) - - def _get_container(self) -> SkillContainer: - """Get or create SkillContainer instance.""" - if self._container is None: - self._container = SkillContainer( - workspace_dir=self.work_dir, - use_sandbox=self.use_sandbox, - **{ - k: v - for k, v in self.kwargs.items() if k in [ - 'timeout', 'image', 'memory_limit', - 'enable_security_check', 'network_enabled' - ] - }) - return self._container - - def _get_executor(self) -> DAGExecutor: - """Get or create DAGExecutor instance.""" - if self._executor is None: - container = self._get_container() - self._executor = DAGExecutor( - container=container, - skills=self.all_skills, - workspace_dir=self.work_dir, - llm=self.llm, - enable_progressive_analysis=True, - max_retries=self.max_retries) - return self._executor - - async def execute_dag(self, - dag_result: SkillDAGResult, - execution_input: Optional[ExecutionInput] = None, - stop_on_failure: bool = True, - query: str = '') -> DAGExecutionResult: - """ - Execute the skill DAG from a SkillDAGResult. - - Executes skills according to the execution_order, handling: - - Sequential execution for single skill items - - Parallel execution for skill groups (sublists) - - Input/output linking between dependent skills - - Progressive skill analysis (plan -> load -> execute) - - Args: - dag_result: SkillDAGResult containing DAG and execution order. - execution_input: Optional initial input for skills. - stop_on_failure: Whether to stop on first failure. - query: User query for progressive skill analysis. - - Returns: - DAGExecutionResult with all execution outcomes. - """ - if not dag_result.is_complete: - logger.warning('DAG is not complete, execution may fail') - - if not dag_result.execution_order: - logger.warning('Empty execution order, nothing to execute') - return DAGExecutionResult(success=True) - - executor = self._get_executor() - result = await executor.execute( - dag=dag_result.dag, - execution_order=dag_result.execution_order, - execution_input=execution_input, - stop_on_failure=stop_on_failure, - query=query) - - # Attach result to dag_result for convenience - dag_result.execution_result = result - - logger.info(f'DAG execution completed: success={result.success}, ' - f'duration={result.total_duration_ms:.2f}ms') - - return result - - def get_execution_spec(self) -> Optional[str]: - """Get the execution spec log as markdown string.""" - if self._container: - return self._container.get_spec_log() - return None - - def save_execution_spec(self, - output_path: Optional[Union[str, Path]] = None): - """Save the execution spec to a markdown file.""" - if self._container: - self._container.save_spec_log(output_path) - - def cleanup(self, keep_spec: bool = True): - """Clean up container workspace.""" - if self._container: - self._container.cleanup(keep_spec=keep_spec) - - def get_skill_context(self, skill_id: str) -> Optional[SkillContext]: - """ - Get the skill context for an executed skill. - - Args: - skill_id: The skill identifier (e.g., 'pdf@latest'). - - Returns: - SkillContext if the skill was executed, None otherwise. - """ - if self._executor: - return self._executor.get_skill_context(skill_id) - return None - - def get_all_skill_contexts(self) -> Dict[str, SkillContext]: - """ - Get all skill contexts from executed skills. - - Returns: - Dict mapping skill_id to SkillContext. - """ - if self._executor: - return self._executor.get_all_contexts() - return {} - - def get_executed_skill_ids(self) -> List[str]: - """ - Get list of skill_ids that were executed. - - Returns: - List of skill_ids with available contexts. - """ - if self._executor: - return self._executor.get_executed_skill_ids() - return [] - - async def run( - self, - query: str, - execution_input: Optional[ExecutionInput] = None, - stop_on_failure: bool = True - ) -> SkillDAGResult: - """ - Run skill retrieval and execute the resulting DAG in one call. - - Combines get_skill_dag() and execute_dag(). - Uses progressive skill analysis for each skill execution. - - Args: - query: User's task query. - execution_input: Optional initial input for skills. - stop_on_failure: Whether to stop on first failure. - - Returns: - SkillDAGResult with execution_result populated. - """ - dag_result = await self.get_skill_dag(query) - - # Skip execution for chat-only results - if dag_result.chat_response: - logger.info('Chat-only response, skipping execution') - return dag_result - - # Skip if skills are incomplete - if not dag_result.is_complete: - logger.warning(f'Skills incomplete: {dag_result.clarification}') - return dag_result - - # Execute the DAG - if dag_result.execution_order: - await self.execute_dag( - dag_result, execution_input, stop_on_failure, query=query) - - return dag_result diff --git a/ms_agent/skill/catalog.py b/ms_agent/skill/catalog.py new file mode 100644 index 000000000..75a040201 --- /dev/null +++ b/ms_agent/skill/catalog.py @@ -0,0 +1,302 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +import os +import shutil +import subprocess +import tempfile +import zipfile +from pathlib import Path +from typing import Dict, List, Optional, Set + +import requests + +from ms_agent.utils.logger import get_logger + +from .loader import SkillLoader +from .schema import SkillSchema, SkillSchemaParser +from .sources import SkillSource, SkillSourceType, parse_skill_source + +logger = get_logger() + +MODELSCOPE_SKILL_API = ( + "https://www.modelscope.cn/api/v1/skills/{skill_id}/archive/zip/master") + + +def _download_skill_zip(skill_id: str, local_dir: str) -> str: + """Download a skill archive from the ModelScope skill hub and extract it. + + This is a pure-HTTP fallback that does not require ``modelscope>=1.35.2``. + The directory naming follows the SDK convention: ````. + """ + url = MODELSCOPE_SKILL_API.format(skill_id=skill_id) + os.makedirs(local_dir, exist_ok=True) + + _owner, name = skill_id.split("/", 1) + skill_dir = os.path.join(local_dir, name) + + resp = requests.get(url, stream=True, timeout=120) + resp.raise_for_status() + + zip_path = os.path.join(local_dir, f"{name}.zip") + try: + with open(zip_path, "wb") as fh: + for chunk in resp.iter_content(chunk_size=8192): + if chunk: + fh.write(chunk) + + if os.path.exists(skill_dir): + shutil.rmtree(skill_dir) + os.makedirs(skill_dir, exist_ok=True) + + with zipfile.ZipFile(zip_path, "r") as zf: + zf.extractall(skill_dir) + + entries = os.listdir(skill_dir) + if len(entries) == 1: + nested = os.path.join(skill_dir, entries[0]) + if os.path.isdir(nested): + for item in os.listdir(nested): + shutil.move( + os.path.join(nested, item), + os.path.join(skill_dir, item)) + os.rmdir(nested) + finally: + if os.path.exists(zip_path): + os.remove(zip_path) + + logger.info(f"Skill {skill_id} downloaded to {skill_dir}") + return skill_dir + +BUILTIN_SKILLS_DIR = Path(__file__).parent.parent / "skills" +if not BUILTIN_SKILLS_DIR.exists(): + _repo_root = Path(__file__).parent.parent.parent + _candidate = _repo_root / "skills" + if _candidate.exists(): + BUILTIN_SKILLS_DIR = _candidate + +USER_SKILLS_DIR = Path.home() / ".ms_agent" / "skills" + + +class SkillCatalog: + """Unified skill catalog that loads, caches, and manages skills + from multiple sources with priority-based override semantics. + """ + + def __init__(self, config=None): + self._skills: Dict[str, SkillSchema] = {} + self._sources: List[SkillSource] = [] + self._loader = SkillLoader() + self._config = config + self._disabled_skills: Set[str] = set() + self._whitelist: Optional[Set[str]] = None + self._cache_version: int = 0 + self._summary_cache: Optional[str] = None + self._summary_cache_version: int = -1 + + # ------------------------------------------------------------------ # + # Loading + # ------------------------------------------------------------------ # + + def load_from_config(self, skills_config) -> None: + """Load skills following the three-tier priority scan: + built-in -> user home -> workspace / config-specified. + """ + sources: List[SkillSource] = [] + + # 1. Built-in skills (lowest priority) + if BUILTIN_SKILLS_DIR.exists(): + sources.append( + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(BUILTIN_SKILLS_DIR))) + + # 2. User home skills + for subdir in ("installed", "custom"): + d = USER_SKILLS_DIR / subdir + if d.exists(): + sources.append( + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(d))) + + # 3a. Structured sources (higher priority) + if hasattr(skills_config, "sources") and skills_config.sources: + for src_cfg in skills_config.sources: + sources.append( + SkillSource( + type=SkillSourceType(src_cfg.type), + path=getattr(src_cfg, "path", None), + repo_id=getattr(src_cfg, "repo_id", None), + url=getattr(src_cfg, "url", None), + revision=getattr(src_cfg, "revision", None), + subdir=getattr(src_cfg, "subdir", None), + enabled=getattr(src_cfg, "enabled", True), + )) + # 3b. Simple path list (backward compat) + elif hasattr(skills_config, "path") and skills_config.path: + paths = skills_config.path + if isinstance(paths, str): + paths = [paths] + for p in paths: + sources.append(parse_skill_source(str(p))) + + # 4. Workspace auto-discover (highest priority) + if getattr(skills_config, "auto_discover", False): + workspace_skills = Path.cwd() / "skills" + if workspace_skills.exists(): + sources.append( + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(workspace_skills))) + + self._sources = sources + self.load_from_sources(sources) + + # Apply whitelist / disabled filters + if hasattr(skills_config, "whitelist"): + wl = skills_config.whitelist + if wl is None: + self._whitelist = None + elif isinstance(wl, (list, tuple)): + self._whitelist = set(wl) if wl else set() + if hasattr(skills_config, "disabled") and skills_config.disabled: + self._disabled_skills = set(skills_config.disabled) + + def load_from_sources(self, sources: List[SkillSource]) -> None: + self._sources = sources + for source in sources: + if not source.enabled: + continue + try: + skills = self._materialize_and_load(source) + for skill in skills.values(): + self._register_skill(skill) + except Exception as e: + logger.warning(f"Failed to load skill source {source}: {e}") + + def _materialize_and_load( + self, source: SkillSource) -> Dict[str, SkillSchema]: + if source.type == SkillSourceType.LOCAL_DIR: + return self._loader.load_skills(source.path) + elif source.type == SkillSourceType.MODELSCOPE: + return self._load_from_modelscope(source) + elif source.type == SkillSourceType.GIT: + return self._load_from_git(source) + return {} + + def _load_from_modelscope( + self, source: SkillSource) -> Dict[str, SkillSchema]: + try: + from modelscope.hub.api import HubApi + api = HubApi() + local_dir = str(USER_SKILLS_DIR / "installed") + local_path = api.download_skill( + skill_id=source.repo_id, local_dir=local_dir) + except (ImportError, AttributeError): + local_path = _download_skill_zip( + source.repo_id, + str(USER_SKILLS_DIR / "installed")) + if source.subdir: + local_path = str(Path(local_path) / source.subdir) + return self._loader.load_skills(local_path) + + def _load_from_git(self, source: SkillSource) -> Dict[str, SkillSchema]: + dest = Path(tempfile.mkdtemp(prefix="ms_agent_skill_")) + cmd = ["git", "clone", "--depth", "1"] + if source.revision: + cmd += ["--branch", source.revision] + cmd += [source.url, str(dest)] + subprocess.run(cmd, check=True, capture_output=True) + local_path = str(dest / source.subdir) if source.subdir else str(dest) + return self._loader.load_skills(local_path) + + def _register_skill(self, skill: SkillSchema) -> None: + """Register a skill; later registrations override earlier ones.""" + self._skills[skill.skill_id] = skill + self._invalidate_cache() + + # ------------------------------------------------------------------ # + # Query + # ------------------------------------------------------------------ # + + def get_enabled_skills(self) -> Dict[str, SkillSchema]: + result = {} + for sid, skill in self._skills.items(): + if sid in self._disabled_skills: + continue + if self._whitelist is not None and sid not in self._whitelist: + continue + result[sid] = skill + return result + + def get_always_skills(self) -> Dict[str, SkillSchema]: + result = {} + for sid, skill in self.get_enabled_skills().items(): + frontmatter = SkillSchemaParser.parse_yaml_frontmatter( + skill.content) + if frontmatter and frontmatter.get("always", False): + result[sid] = skill + return result + + def get_skill(self, skill_id: str) -> Optional[SkillSchema]: + return self._skills.get(skill_id) + + # ------------------------------------------------------------------ # + # Hot reload + # ------------------------------------------------------------------ # + + def reload(self) -> None: + self._skills.clear() + self.load_from_sources(self._sources) + + def reload_skill(self, skill_id: str) -> Optional[SkillSchema]: + skill = self._skills.get(skill_id) + if skill and skill.skill_path.exists(): + reloaded = self._loader.reload_skill(str(skill.skill_path)) + if reloaded: + self._skills[skill_id] = reloaded + self._invalidate_cache() + return reloaded + return None + + def add_skill(self, skill_path: str) -> Optional[SkillSchema]: + skills = self._loader.load_skills(skill_path) + for skill in skills.values(): + self._register_skill(skill) + return skill + return None + + def remove_skill(self, skill_id: str) -> bool: + if skill_id in self._skills: + del self._skills[skill_id] + self._invalidate_cache() + return True + return False + + def enable_skill(self, skill_id: str) -> None: + self._disabled_skills.discard(skill_id) + self._invalidate_cache() + + def disable_skill(self, skill_id: str) -> None: + self._disabled_skills.add(skill_id) + self._invalidate_cache() + + # ------------------------------------------------------------------ # + # Summary cache + # ------------------------------------------------------------------ # + + def _invalidate_cache(self) -> None: + self._cache_version += 1 + + def get_skills_summary(self) -> str: + if self._summary_cache_version == self._cache_version: + return self._summary_cache or "" + self._summary_cache = self._build_summary() + self._summary_cache_version = self._cache_version + return self._summary_cache + + def _build_summary(self) -> str: + skills = self.get_enabled_skills() + if not skills: + return "" + lines = [] + for sid, skill in sorted(skills.items()): + lines.append( + f"- **{skill.name}** (`{sid}`): {skill.description}") + return "\n".join(lines) diff --git a/ms_agent/skill/container.py b/ms_agent/skill/container.py deleted file mode 100644 index 51d96f6f3..000000000 --- a/ms_agent/skill/container.py +++ /dev/null @@ -1,1443 +0,0 @@ -# Copyright (c) ModelScope Contributors. All rights reserved. -""" -Skill Execution Container - -Provides a unified, secure execution environment for skills using EnclaveSandbox. -Supports multiple languages (Python, Shell, JavaScript) with Docker-based isolation. -Cross-platform support (Mac/Linux/Windows) with RCE prevention. - -Execution modes: -- use_sandbox=True: Execute in Docker sandbox (default, recommended for untrusted code) -- use_sandbox=False: Execute locally with security checks (for trusted code or no Docker) -""" -import asyncio -import os -import platform -import re -import shutil -import subprocess -import sys -import tempfile -import uuid -from dataclasses import dataclass, field -from datetime import datetime -from enum import Enum -from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Union - -from ms_agent.utils.logger import get_logger - -logger = get_logger() - -# Security: Patterns to detect potentially dangerous code (sandbox mode) -# Note: These are checked only in sandbox mode for stricter isolation -DANGEROUS_PATTERNS = [ - r'os\.system\s*\(', # os.system - r'subprocess\.call\s*\([^)]*shell\s*=\s*True', # subprocess with shell=True - r'open\s*\([^)]*["\']\/etc', # Reading system files - r'rm\s+-rf\s+\/', # Dangerous rm commands - r'chmod\s+777', # Dangerous chmod - r'curl\s+.*\|\s*sh', # Piped curl execution - r'wget\s+.*\|\s*sh', # Piped wget execution -] - -# Additional patterns for local execution (stricter but reasonable) -# Note: eval/exec are allowed as they're commonly used in generated code -LOCAL_DANGEROUS_PATTERNS = DANGEROUS_PATTERNS + [ - r'shutil\.rmtree\s*\([^)]*["\']/', # Removing root paths - r'pathlib\.Path\s*\([^)]*["\']/', # Accessing root paths -] - -# Allowed file extensions for local script execution -ALLOWED_SCRIPT_EXTENSIONS = {'.py', '.sh', '.bash', '.js', '.mjs'} - - -class ExecutorType(Enum): - """Supported executor types for skill execution.""" - PYTHON_SCRIPT = 'python_script' - PYTHON_CODE = 'python_code' - PYTHON_FUNCTION = 'python_function' - SHELL = 'shell' - JAVASCRIPT = 'javascript' - - -class ExecutionStatus(Enum): - """Execution status codes.""" - PENDING = 'pending' - RUNNING = 'running' - SUCCESS = 'success' - FAILED = 'failed' - TIMEOUT = 'timeout' - CANCELLED = 'cancelled' - SECURITY_BLOCKED = 'security_blocked' - - -@dataclass -class ExecutionInput: - """ - Input specification for skill execution. - - Attributes: - args: Command line arguments or positional parameters. - kwargs: Keyword arguments for function calls. - env_vars: Environment variables to set during execution. - input_files: Dict of input files {name: path or content}. - stdin: Standard input content. - working_dir: Working directory for execution. - requirements: Python packages to install before execution. - """ - args: List[Any] = field(default_factory=list) - kwargs: Dict[str, Any] = field(default_factory=dict) - env_vars: Dict[str, str] = field(default_factory=dict) - input_files: Dict[str, Union[str, Path]] = field(default_factory=dict) - stdin: Optional[str] = None - working_dir: Optional[Path] = None - requirements: List[str] = field(default_factory=list) - - def to_dict(self) -> Dict[str, Any]: - return { - 'args': self.args, - 'kwargs': self.kwargs, - 'env_vars': self.env_vars, - 'input_files': {k: str(v) - for k, v in self.input_files.items()}, - 'stdin': self.stdin, - 'working_dir': str(self.working_dir) if self.working_dir else None, - 'requirements': self.requirements, - } - - -@dataclass -class ExecutionOutput: - """ - Output specification for skill execution. - - Attributes: - return_value: Return value from function execution. - stdout: Standard output content. - stderr: Standard error content. - exit_code: Process exit code. - output_files: Dict of output files {name: path}. - artifacts: Any generated artifacts (data, objects, etc.). - duration_ms: Execution duration in milliseconds. - """ - return_value: Any = None - stdout: str = '' - stderr: str = '' - exit_code: int = 0 - output_files: Dict[str, Path] = field(default_factory=dict) - artifacts: Dict[str, Any] = field(default_factory=dict) - duration_ms: float = 0.0 - - def to_dict(self) -> Dict[str, Any]: - return { - 'return_value': - str(self.return_value) if self.return_value else None, - 'stdout': self.stdout, - 'stderr': self.stderr, - 'exit_code': self.exit_code, - 'output_files': {k: str(v) - for k, v in self.output_files.items()}, - 'artifacts': list(self.artifacts.keys()), - 'duration_ms': self.duration_ms, - } - - -@dataclass -class ExecutionRecord: - """ - A single execution record in the spec log. - - Attributes: - execution_id: Unique identifier for this execution. - skill_id: The skill being executed. - executor_type: Type of executor used. - script_path: Path to the script (if applicable). - function_name: Name of the function (if applicable). - input_spec: Input specification. - output_spec: Output specification. - status: Execution status. - start_time: Execution start time. - end_time: Execution end time. - error_message: Error message if failed. - sandbox_used: Whether sandbox was used for execution. - """ - execution_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8]) - skill_id: str = '' - executor_type: ExecutorType = ExecutorType.PYTHON_SCRIPT - script_path: Optional[str] = None - function_name: Optional[str] = None - input_spec: ExecutionInput = field(default_factory=ExecutionInput) - output_spec: ExecutionOutput = field(default_factory=ExecutionOutput) - status: ExecutionStatus = ExecutionStatus.PENDING - start_time: Optional[datetime] = None - end_time: Optional[datetime] = None - error_message: Optional[str] = None - sandbox_used: bool = True - - def to_markdown(self) -> str: - """Convert execution record to markdown format.""" - lines = [ - f'### Execution: `{self.execution_id}`', - '', - f'- **Skill ID**: `{self.skill_id}`', - f'- **Executor**: `{self.executor_type.value}`', - f'- **Status**: `{self.status.value}`', - f'- **Sandbox**: `{"Yes" if self.sandbox_used else "No"}`', - ] - - if self.script_path: - lines.append(f'- **Script**: `{self.script_path}`') - if self.function_name: - lines.append(f'- **Function**: `{self.function_name}`') - - if self.start_time: - lines.append(f'- **Start Time**: `{self.start_time.isoformat()}`') - if self.end_time: - lines.append(f'- **End Time**: `{self.end_time.isoformat()}`') - - lines.append(f'- **Duration**: `{self.output_spec.duration_ms:.2f}ms`') - - # Input section - lines.extend(['', '#### Input', '']) - if self.input_spec.args: - lines.append(f'- **Args**: `{self.input_spec.args}`') - if self.input_spec.kwargs: - lines.append(f'- **Kwargs**: `{self.input_spec.kwargs}`') - if self.input_spec.input_files: - lines.append('- **Input Files**:') - for name, path in self.input_spec.input_files.items(): - lines.append(f' - `{name}`: `{path}`') - if self.input_spec.requirements: - lines.append( - f'- **Requirements**: `{self.input_spec.requirements}`') - - # Output section - lines.extend(['', '#### Output', '']) - lines.append(f'- **Exit Code**: `{self.output_spec.exit_code}`') - - if self.output_spec.stdout: - stdout_preview = self.output_spec.stdout[:1000] - lines.extend(['', '**stdout**:', '```', stdout_preview, '```']) - if self.output_spec.stderr: - stderr_preview = self.output_spec.stderr[:1000] - lines.extend(['', '**stderr**:', '```', stderr_preview, '```']) - if self.output_spec.output_files: - lines.append('- **Output Files**:') - for name, path in self.output_spec.output_files.items(): - lines.append(f' - `{name}`: `{path}`') - - if self.error_message: - lines.extend( - ['', '#### Error', '', f'```\n{self.error_message}\n```']) - - lines.append('') - return '\n'.join(lines) - - -@dataclass -class ExecutionSpec: - """ - Specification log for tracking execution flow across skills. - - Attributes: - spec_id: Unique identifier for this spec. - title: Title of the execution spec. - description: Description of the execution flow. - records: List of execution records. - created_at: Creation timestamp. - upstream_outputs: Outputs from upstream skills available as inputs. - """ - spec_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8]) - title: str = 'Skill Execution Spec' - description: str = '' - records: List[ExecutionRecord] = field(default_factory=list) - created_at: datetime = field(default_factory=datetime.now) - upstream_outputs: Dict[str, ExecutionOutput] = field(default_factory=dict) - - def add_record(self, record: ExecutionRecord): - """Add an execution record to the spec.""" - self.records.append(record) - - def get_output(self, execution_id: str) -> Optional[ExecutionOutput]: - """Get output from a specific execution by ID.""" - for record in self.records: - if record.execution_id == execution_id: - return record.output_spec - return None - - def link_upstream(self, skill_id: str, output: ExecutionOutput): - """Link upstream skill output for downstream consumption.""" - self.upstream_outputs[skill_id] = output - - def to_markdown(self) -> str: - """Convert entire spec to markdown format.""" - lines = [ - f'# {self.title}', - '', - f'**Spec ID**: `{self.spec_id}`', - f'**Created**: `{self.created_at.isoformat()}`', - '', - ] - - if self.description: - lines.extend([self.description, '']) - - # Summary - total = len(self.records) - success = sum(1 for r in self.records - if r.status == ExecutionStatus.SUCCESS) - failed = sum(1 for r in self.records - if r.status == ExecutionStatus.FAILED) - blocked = sum(1 for r in self.records - if r.status == ExecutionStatus.SECURITY_BLOCKED) - - lines.extend([ - '## Summary', - '', - f'- **Total Executions**: {total}', - f'- **Successful**: {success}', - f'- **Failed**: {failed}', - f'- **Security Blocked**: {blocked}', - '', - '---', - '', - '## Execution Records', - '', - ]) - - for record in self.records: - lines.append(record.to_markdown()) - lines.append('---') - lines.append('') - - return '\n'.join(lines) - - def save(self, output_path: Union[str, Path]): - """Save spec to markdown file.""" - output_path = Path(output_path) - output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'w', encoding='utf-8') as f: - f.write(self.to_markdown()) - logger.info(f'Execution spec saved to: {output_path}') - - -class SkillContainer: - """ - Secure container for executing skills. - - Supports two execution modes: - - use_sandbox=True: Execute in Docker sandbox via ms-enclave (recommended for untrusted code) - - use_sandbox=False: Execute locally with security checks (for trusted code or no Docker) - - Features: - - Docker-based isolation via ms-enclave - - Python scripts, Python code, shell commands, and JavaScript support - - Cross-platform support (Mac/Linux/Windows) - - RCE prevention and security checks - """ - - # Container paths for sandbox (following AgentSkill pattern) - SANDBOX_ROOT = '/sandbox' - SANDBOX_OUTPUT_DIR = '/sandbox/outputs' - SANDBOX_WORK_DIR = '/sandbox/scripts' - - def __init__(self, - workspace_dir: Optional[Union[str, Path]] = None, - timeout: int = 300, - image: str = 'python:3.11-slim', - memory_limit: str = '512m', - enable_security_check: bool = True, - network_enabled: bool = False, - use_sandbox: bool = True): - """ - Initialize the skill container. - - Args: - workspace_dir: Host working directory for I/O. Creates temp dir if None. - timeout: Default execution timeout in seconds. - image: Docker image for sandbox execution. - memory_limit: Memory limit for sandbox container. - enable_security_check: Whether to check code for dangerous patterns. - network_enabled: Whether to enable network in sandbox (disabled by default for security). - use_sandbox: Whether to use Docker sandbox (True) or local execution (False). - """ - # Ensure workspace_dir is an absolute path (required by Docker) - if workspace_dir: - self.workspace_dir = Path(workspace_dir).resolve() - else: - self.workspace_dir = Path( - tempfile.mkdtemp(prefix='skill_container_')).resolve() - self.workspace_dir.mkdir(parents=True, exist_ok=True) - - self.timeout = timeout - self.image = image - self.memory_limit = memory_limit - self.enable_security_check = enable_security_check - self.network_enabled = network_enabled - self.use_sandbox = use_sandbox - self.spec = ExecutionSpec() - - # Host directories for I/O management (only outputs, scripts, logs) - self.output_dir = self.workspace_dir / 'outputs' - self.scripts_dir = self.workspace_dir / 'scripts' - self.logs_dir = self.workspace_dir / 'logs' - self.output_dir.mkdir(exist_ok=True) - self.scripts_dir.mkdir(exist_ok=True) - self.logs_dir.mkdir(exist_ok=True) - - # Sandbox instance (lazy initialization) - self._sandbox = None - - # Skill directories to mount in sandbox - self._skill_dirs: Dict[str, str] = {} - - # Warn about local execution risks - if not self.use_sandbox: - logger.warning( - 'SkillContainer running in LOCAL mode (use_sandbox=False). ' - 'Scripts will execute directly on this machine. ' - 'Ensure you trust the code being executed!') - - logger.info(f'SkillContainer initialized at: {self.workspace_dir} ' - f'[mode: {"sandbox" if self.use_sandbox else "local"}]') - - def _get_sandbox(self): - """ - Get or create EnclaveSandbox instance with volume mounts. - - Volume mapping follows AgentSkill pattern: - - workspace_dir -> /sandbox (rw mode for full access) - - Additional skill directories are mounted to /sandbox/skills/ - """ - if self._sandbox is None: - from ms_agent.sandbox.sandbox import EnclaveSandbox - - # Mount entire workspace to /sandbox following AgentSkill pattern - # This allows scripts to access inputs/, outputs/, scripts/ subdirs - volumes = [ - (str(self.workspace_dir.resolve()), self.SANDBOX_ROOT, 'rw'), - ] - - # Add additional skill directory mounts - for skill_id, skill_dir in self._skill_dirs.items(): - safe_id = skill_id.replace('@', '_').replace('/', '_') - sandbox_path = f'{self.SANDBOX_ROOT}/skills/{safe_id}' - volumes.append( - (str(Path(skill_dir).resolve()), sandbox_path, 'ro')) - - self._sandbox = EnclaveSandbox( - image=self.image, - memory_limit=self.memory_limit, - volumes=volumes, - ) - return self._sandbox - - def mount_skill_directory(self, skill_id: str, skill_dir: Union[str, - Path]): - """ - Mount a skill directory for sandbox access. - - Args: - skill_id: Unique identifier for the skill. - skill_dir: Path to the skill directory. - """ - self._skill_dirs[skill_id] = str(Path(skill_dir).resolve()) - # Reset sandbox to recreate with new mount - self._sandbox = None - - def get_skill_sandbox_path(self, skill_id: str) -> str: - """ - Get the sandbox path for a mounted skill directory. - - Args: - skill_id: The skill identifier. - - Returns: - Path inside sandbox where skill is mounted. - """ - safe_id = skill_id.replace('@', '_').replace('/', '_') - return f'{self.SANDBOX_ROOT}/skills/{safe_id}' - - def _security_check(self, - code: str, - is_local: bool = False) -> tuple[bool, str]: - """ - Check code for potentially dangerous patterns. - - Args: - code: Code string to check. - is_local: If True, use stricter patterns for local execution. - - Returns: - Tuple of (is_safe, reason). - """ - if not self.enable_security_check: - return True, '' - - # Use stricter patterns for local execution - patterns = LOCAL_DANGEROUS_PATTERNS if is_local else DANGEROUS_PATTERNS - - for pattern in patterns: - if re.search(pattern, code, re.IGNORECASE): - return False, f'Dangerous pattern detected: {pattern}' - - return True, '' - - def _validate_path_in_workspace(self, path: Path) -> bool: - """ - Validate that a path is within the workspace directory. - - Security measure for local execution to prevent path traversal. - - Args: - path: Path to validate. - - Returns: - True if path is within workspace, False otherwise. - """ - try: - resolved = path.resolve() - return str(resolved).startswith(str(self.workspace_dir.resolve())) - except (OSError, ValueError): - return False - - def _validate_script_extension(self, script_path: Path) -> bool: - """ - Validate that script has an allowed extension. - - Args: - script_path: Path to the script file. - - Returns: - True if extension is allowed, False otherwise. - """ - return script_path.suffix.lower() in ALLOWED_SCRIPT_EXTENSIONS - - def _collect_output_files(self) -> Dict[str, Path]: - """Collect output files from output directory.""" - outputs = {} - if self.output_dir.exists(): - for f in self.output_dir.iterdir(): - if f.is_file(): - outputs[f.name] = f - return outputs - - def _create_record(self, - skill_id: str, - executor_type: ExecutorType, - input_spec: ExecutionInput, - script_path: str = None, - function_name: str = None, - sandbox_used: bool = None) -> ExecutionRecord: - """Create a new execution record.""" - return ExecutionRecord( - skill_id=skill_id, - executor_type=executor_type, - script_path=script_path, - function_name=function_name, - input_spec=input_spec, - status=ExecutionStatus.PENDING, - sandbox_used=sandbox_used - if sandbox_used is not None else self.use_sandbox) - - # ------------------------------------------------------------------------- - # Local Execution Helpers (for use_sandbox=False mode) - # ------------------------------------------------------------------------- - - def _local_run_subprocess(self, - cmd: List[str], - env: Dict[str, str] = None, - cwd: Path = None, - stdin_input: str = None) -> tuple[str, str, int]: - """ - Run subprocess locally with security restrictions. - - Cross-platform support with timeout and resource limits. - - Args: - cmd: Command list to execute. - env: Environment variables. - cwd: Working directory. - stdin_input: Input to pass to stdin. - - Returns: - Tuple of (stdout, stderr, exit_code). - """ - # Setup environment - run_env = os.environ.copy() - run_env['SKILL_OUTPUT_DIR'] = str(self.output_dir) - if env: - run_env.update(env) - - # Use workspace as default cwd - work_dir = cwd or self.workspace_dir - - try: - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=self.timeout, - cwd=str(work_dir), - env=run_env, - stdin=subprocess.PIPE if stdin_input else None, - input=stdin_input, - ) - return result.stdout, result.stderr, result.returncode - except subprocess.TimeoutExpired: - return '', f'Execution timed out after {self.timeout}s', -1 - except Exception as e: - return '', str(e), -1 - - def _get_python_executable(self) -> str: - """Get the Python executable for the current platform.""" - return sys.executable - - def _get_shell_executable(self) -> List[str]: - """Get the shell executable for the current platform.""" - if platform.system() == 'Windows': - return ['cmd', '/c'] - else: - return ['/bin/sh', '-c'] - - def _get_node_executable(self) -> str: - """Get the Node.js executable for the current platform.""" - if platform.system() == 'Windows': - return 'node.exe' - return 'node' - - async def _local_install_requirements( - self, requirements: List[str]) -> tuple[bool, str]: - """ - Install Python requirements locally using pip. - - Args: - requirements: List of packages to install. - - Returns: - Tuple of (success, error_message). - """ - if not requirements: - return True, '' - - try: - cmd = [ - self._get_python_executable(), '-m', 'pip', 'install', - '--quiet', '--disable-pip-version-check' - ] + requirements - - stdout, stderr, exit_code = self._local_run_subprocess(cmd) - - if exit_code != 0: - logger.warning(f'Failed to install requirements: {stderr}') - return False, stderr - - logger.info(f'Installed requirements: {requirements}') - return True, '' - except Exception as e: - logger.error(f'Error installing requirements: {e}') - return False, str(e) - - async def _local_execute_python_code( - self, code: str, - input_spec: ExecutionInput) -> tuple[str, str, int]: - """ - Execute Python code locally. - - Args: - code: Python code to execute. - input_spec: Input specification. - - Returns: - Tuple of (stdout, stderr, exit_code). - """ - # Install requirements first if any - if input_spec.requirements: - success, error = await self._local_install_requirements( - input_spec.requirements) - if not success: - return '', f'Failed to install requirements: {error}', -1 - - # Write code to temp file - script_file = self.scripts_dir / f'_temp_{uuid.uuid4().hex[:8]}.py' - try: - # Generate environment setup - env_setup = self._generate_local_env_setup(input_spec) - full_code = env_setup + '\n' + code - - with open(script_file, 'w', encoding='utf-8') as f: - f.write(full_code) - - # Build command - cmd = [self._get_python_executable(), str(script_file)] - cmd.extend([str(arg) for arg in input_spec.args]) - - # Use working_dir from input_spec for proper resource access - cwd = input_spec.working_dir if input_spec.working_dir else None - - stdout, stderr, exit_code = self._local_run_subprocess( - cmd, - env=input_spec.env_vars, - cwd=cwd, - stdin_input=input_spec.stdin) - - # Keep script in scripts folder for logging/debugging - return stdout, stderr, exit_code - except Exception as e: - logger.error(f'Local Python execution failed: {e}') - raise - - async def _local_execute_shell( - self, command: str, - input_spec: ExecutionInput) -> tuple[str, str, int]: - """ - Execute shell command locally. - - Args: - command: Shell command to execute. - input_spec: Input specification. - - Returns: - Tuple of (stdout, stderr, exit_code). - """ - shell_exec = self._get_shell_executable() - - # Build full command with environment exports - if platform.system() == 'Windows': - # Windows: use set for environment - env_cmds = [f'set {k}={v}' for k, v in input_spec.env_vars.items()] - full_cmd = ' && '.join(env_cmds - + [command]) if env_cmds else command - cmd = shell_exec + [full_cmd] - else: - # Unix: use export - env_cmds = [ - f"export {k}='{v}'" for k, v in input_spec.env_vars.items() - ] - full_cmd = ' && '.join(env_cmds - + [command]) if env_cmds else command - cmd = shell_exec + [full_cmd] - - # Use working_dir from input_spec for proper resource access - cwd = input_spec.working_dir if input_spec.working_dir else None - - return self._local_run_subprocess( - cmd, - env=input_spec.env_vars, - cwd=cwd, - stdin_input=input_spec.stdin) - - async def _local_execute_javascript( - self, js_code: str, - input_spec: ExecutionInput) -> tuple[str, str, int]: - """ - Execute JavaScript code locally via Node.js. - - Args: - js_code: JavaScript code to execute. - input_spec: Input specification. - - Returns: - Tuple of (stdout, stderr, exit_code). - """ - # Write code to temp file - script_file = self.scripts_dir / f'_temp_{uuid.uuid4().hex[:8]}.js' - try: - # Generate environment setup - env_setup = self._generate_local_js_env_setup(input_spec) - full_code = env_setup + '\n' + js_code - - with open(script_file, 'w', encoding='utf-8') as f: - f.write(full_code) - - # Build command - cmd = [self._get_node_executable(), str(script_file)] - cmd.extend([str(arg) for arg in input_spec.args]) - - # Use working_dir from input_spec for proper resource access - cwd = input_spec.working_dir if input_spec.working_dir else None - - # Keep script in scripts folder for logging/debugging - return self._local_run_subprocess( - cmd, - env=input_spec.env_vars, - cwd=cwd, - stdin_input=input_spec.stdin) - except Exception as e: - logger.error(f'Local JavaScript execution failed: {e}') - raise - - def _generate_local_env_setup(self, input_spec: ExecutionInput) -> str: - """Generate Python code to setup environment for local execution.""" - lines = [ - 'import os', - 'import sys', - '', - '# Setup environment for local execution', - f"os.environ['SKILL_OUTPUT_DIR'] = {repr(str(self.output_dir))}", - f"os.environ['SKILL_LOGS_DIR'] = {repr(str(self.logs_dir))}", - '', - '# Helper functions for I/O paths', - 'def get_output_path(filename):', - ' """Get the full path for an output file. ALL outputs should use this."""', - " return os.path.join(os.environ['SKILL_OUTPUT_DIR'], filename)", - '', - f'SKILL_OUTPUT_DIR = {repr(str(self.output_dir))}', - f'SKILL_LOGS_DIR = {repr(str(self.logs_dir))}', - ] - - # Add working directory to sys.path for imports and change to it - if input_spec.working_dir: - work_dir = str(input_spec.working_dir) - lines.extend([ - '', - '# Setup working directory for resource access (READ-ONLY for resources)', - f'_skill_dir = {repr(work_dir)}', - "os.environ['SKILL_DIR'] = _skill_dir", - 'SKILL_DIR = _skill_dir', - 'if _skill_dir not in sys.path:', - ' sys.path.insert(0, _skill_dir)', - 'os.chdir(_skill_dir)', - ]) - - # Add custom env vars - for key, value in input_spec.env_vars.items(): - lines.append(f'os.environ[{repr(key)}] = {repr(value)}') - - # Add args - if input_spec.args: - lines.append('') - lines.append('# Command line arguments') - args_str = repr(input_spec.args) - lines.append(f'ARGS = {args_str}') - lines.append('sys.argv = ["script.py"] + [str(a) for a in ARGS]') - - lines.append('') - return '\n'.join(lines) - - def _generate_local_js_env_setup(self, input_spec: ExecutionInput) -> str: - """Generate JavaScript code to setup environment for local execution.""" - lines = [ - '// Environment setup for local execution', - f'process.env.SKILL_OUTPUT_DIR = {repr(str(self.output_dir))};', - f'process.env.SKILL_LOGS_DIR = {repr(str(self.logs_dir))};', - ] - - for key, value in input_spec.env_vars.items(): - lines.append(f'process.env.{key} = {repr(value)};') - - lines.append('') - return '\n'.join(lines) - - def _parse_sandbox_result(self, - results: Dict[str, Any]) -> tuple[str, str, int]: - """Parse sandbox execution results into stdout, stderr, exit_code.""" - stdout_parts = [] - stderr_parts = [] - exit_code = 0 - - for executor_type in ['python_executor', 'shell_executor']: - if executor_type in results: - for result in results[executor_type]: - if result.get('output'): - stdout_parts.append(result['output']) - if result.get('error'): - stderr_parts.append(result['error']) - if result.get('status', 0) != 0: - exit_code = result.get('status', -1) - - return '\n'.join(stdout_parts), '\n'.join(stderr_parts), exit_code - - async def _execute_in_sandbox( - self, - python_code: Union[str, List[str]] = None, - shell_command: Union[str, List[str]] = None, - requirements: List[str] = None) -> Dict[str, Any]: - """Execute code in EnclaveSandbox.""" - sandbox = self._get_sandbox() - return await sandbox.async_execute( - python_code=python_code, - shell_command=shell_command, - requirements=requirements) - - async def execute_python_script( - self, - script_path: Union[str, Path], - skill_id: str = 'unknown', - input_spec: ExecutionInput = None) -> ExecutionOutput: - """ - Execute a Python script file. - - Uses sandbox mode or local mode based on use_sandbox setting. - - Args: - script_path: Path to the Python script. - skill_id: Identifier of the skill being executed. - input_spec: Input specification. - - Returns: - ExecutionOutput with results. - """ - input_spec = input_spec or ExecutionInput() - script_path = Path(script_path) - - record = self._create_record( - skill_id=skill_id, - executor_type=ExecutorType.PYTHON_SCRIPT, - input_spec=input_spec, - script_path=str(script_path)) - - record.start_time = datetime.now() - record.status = ExecutionStatus.RUNNING - - try: - # Read script content - with open(script_path, 'r', encoding='utf-8') as f: - code = f.read() - - # Security check (stricter for local mode) - is_safe, reason = self._security_check( - code, is_local=not self.use_sandbox) - if not is_safe: - record.status = ExecutionStatus.SECURITY_BLOCKED - record.error_message = reason - output = ExecutionOutput( - stderr=f'Security check failed: {reason}', exit_code=-1) - record.end_time = datetime.now() - record.output_spec = output - self.spec.add_record(record) - return output - - start_time = datetime.now() - - if self.use_sandbox: - # Sandbox mode: inject environment and execute - env_setup = self._generate_env_setup(input_spec, {}) - full_code = env_setup + '\n' + code - - results = await self._execute_in_sandbox( - python_code=full_code, - requirements=input_spec.requirements) - stdout, stderr, exit_code = self._parse_sandbox_result(results) - else: - # Local mode: execute directly - stdout, stderr, exit_code = await self._local_execute_python_code( - code, input_spec) - - end_time = datetime.now() - - output = ExecutionOutput( - stdout=stdout, - stderr=stderr, - exit_code=exit_code, - output_files=self._collect_output_files(), - duration_ms=(end_time - start_time).total_seconds() * 1000) - - record.status = ( - ExecutionStatus.SUCCESS - if exit_code == 0 else ExecutionStatus.FAILED) - - except Exception as e: - output = ExecutionOutput(stderr=str(e), exit_code=-1) - record.status = ExecutionStatus.FAILED - record.error_message = str(e) - logger.error(f'Python script execution failed: {e}') - - record.end_time = datetime.now() - record.output_spec = output - self.spec.add_record(record) - return output - - async def execute_python_code( - self, - code: str, - skill_id: str = 'unknown', - input_spec: ExecutionInput = None) -> ExecutionOutput: - """ - Execute Python code string. - - Uses sandbox mode or local mode based on use_sandbox setting. - - Args: - code: Python code to execute. - skill_id: Identifier of the skill being executed. - input_spec: Input specification. - - Returns: - ExecutionOutput with results. - """ - input_spec = input_spec or ExecutionInput() - - record = self._create_record( - skill_id=skill_id, - executor_type=ExecutorType.PYTHON_CODE, - input_spec=input_spec, - script_path='') - - record.start_time = datetime.now() - record.status = ExecutionStatus.RUNNING - - try: - # Security check (stricter for local mode) - is_safe, reason = self._security_check( - code, is_local=not self.use_sandbox) - if not is_safe: - record.status = ExecutionStatus.SECURITY_BLOCKED - record.error_message = reason - output = ExecutionOutput( - stderr=f'Security check failed: {reason}', exit_code=-1) - record.end_time = datetime.now() - record.output_spec = output - self.spec.add_record(record) - return output - - start_time = datetime.now() - - if self.use_sandbox: - # Sandbox mode - env_setup = self._generate_env_setup(input_spec, {}) - full_code = env_setup + '\n' + code - - results = await self._execute_in_sandbox( - python_code=full_code, - requirements=input_spec.requirements) - stdout, stderr, exit_code = self._parse_sandbox_result(results) - else: - # Local mode - stdout, stderr, exit_code = await self._local_execute_python_code( - code, input_spec) - - end_time = datetime.now() - - output = ExecutionOutput( - stdout=stdout, - stderr=stderr, - exit_code=exit_code, - output_files=self._collect_output_files(), - duration_ms=(end_time - start_time).total_seconds() * 1000) - - record.status = ( - ExecutionStatus.SUCCESS - if exit_code == 0 else ExecutionStatus.FAILED) - - except Exception as e: - output = ExecutionOutput(stderr=str(e), exit_code=-1) - record.status = ExecutionStatus.FAILED - record.error_message = str(e) - logger.error(f'Python code execution failed: {e}') - - record.end_time = datetime.now() - record.output_spec = output - self.spec.add_record(record) - return output - - def _generate_env_setup(self, input_spec: ExecutionInput, - sandbox_files: Dict[str, str]) -> str: - """Generate Python code to setup environment variables and paths.""" - sandbox_logs_dir = f'{self.SANDBOX_ROOT}/logs' - lines = [ - 'import os', - 'import sys', - '', - '# Setup environment', - f"os.environ['SKILL_OUTPUT_DIR'] = '{self.SANDBOX_OUTPUT_DIR}'", - f"os.environ['SKILL_LOGS_DIR'] = '{sandbox_logs_dir}'", - '', - '# Helper functions for I/O paths', - 'def get_output_path(filename):', - ' """Get the full path for an output file. ALL outputs should use this."""', - " return os.path.join(os.environ['SKILL_OUTPUT_DIR'], filename)", - '', - f"SKILL_OUTPUT_DIR = '{self.SANDBOX_OUTPUT_DIR}'", - f"SKILL_LOGS_DIR = '{sandbox_logs_dir}'", - ] - - # Add custom env vars - for key, value in input_spec.env_vars.items(): - # Sanitize value to prevent injection - safe_value = value.replace("'", "\\'") - lines.append(f"os.environ['{key}'] = '{safe_value}'") - - # Add args - if input_spec.args: - lines.append('') - lines.append('# Command line arguments') - args_str = repr(input_spec.args) - lines.append(f'ARGS = {args_str}') - lines.append('sys.argv = ["script.py"] + [str(a) for a in ARGS]') - - lines.append('') - return '\n'.join(lines) - - def execute_python_function( - self, - func: Callable, - skill_id: str = 'unknown', - input_spec: ExecutionInput = None) -> ExecutionOutput: - """ - Execute a Python function directly (local execution, not sandboxed). - - Note: Function execution runs locally as it cannot be serialized to sandbox. - Use execute_python_code for sandboxed execution. - - Args: - func: Python callable to execute. - skill_id: Identifier of the skill being executed. - input_spec: Input specification with args and kwargs. - - Returns: - ExecutionOutput with results. - """ - input_spec = input_spec or ExecutionInput() - - record = self._create_record( - skill_id=skill_id, - executor_type=ExecutorType.PYTHON_FUNCTION, - input_spec=input_spec, - function_name=func.__name__) - record.sandbox_used = False # Local execution - - record.start_time = datetime.now() - record.status = ExecutionStatus.RUNNING - - try: - # Add helper paths to kwargs - kwargs = input_spec.kwargs.copy() - kwargs['_output_dir'] = self.output_dir - - start_time = datetime.now() - return_value = func(*input_spec.args, **kwargs) - end_time = datetime.now() - - output = ExecutionOutput( - return_value=return_value, - exit_code=0, - output_files=self._collect_output_files(), - duration_ms=(end_time - start_time).total_seconds() * 1000) - - record.status = ExecutionStatus.SUCCESS - - except Exception as e: - output = ExecutionOutput(stderr=str(e), exit_code=-1) - record.status = ExecutionStatus.FAILED - record.error_message = str(e) - logger.error(f'Python function execution failed: {e}') - - record.end_time = datetime.now() - record.output_spec = output - self.spec.add_record(record) - return output - - async def execute_shell( - self, - command: Union[str, List[str]], - skill_id: str = 'unknown', - input_spec: ExecutionInput = None) -> ExecutionOutput: - """ - Execute a shell command. - - Uses sandbox mode or local mode based on use_sandbox setting. - - Args: - command: Shell command string or list of commands. - skill_id: Identifier of the skill being executed. - input_spec: Input specification. - - Returns: - ExecutionOutput with results. - """ - input_spec = input_spec or ExecutionInput() - - cmd_str = command if isinstance(command, str) else ' && '.join(command) - - record = self._create_record( - skill_id=skill_id, - executor_type=ExecutorType.SHELL, - input_spec=input_spec, - script_path=cmd_str[:200]) - - record.start_time = datetime.now() - record.status = ExecutionStatus.RUNNING - - try: - # Security check (stricter for local mode) - is_safe, reason = self._security_check( - cmd_str, is_local=not self.use_sandbox) - if not is_safe: - record.status = ExecutionStatus.SECURITY_BLOCKED - record.error_message = reason - output = ExecutionOutput( - stderr=f'Security check failed: {reason}', exit_code=-1) - record.end_time = datetime.now() - record.output_spec = output - self.spec.add_record(record) - return output - - start_time = datetime.now() - - if self.use_sandbox: - # Sandbox mode: prepend environment setup - env_exports = [ - f"export SKILL_OUTPUT_DIR='{self.SANDBOX_OUTPUT_DIR}'", - ] - for key, value in input_spec.env_vars.items(): - safe_value = value.replace("'", "\\'") - env_exports.append(f"export {key}='{safe_value}'") - - full_cmd = ' && '.join(env_exports + [cmd_str]) - - results = await self._execute_in_sandbox(shell_command=full_cmd - ) - stdout, stderr, exit_code = self._parse_sandbox_result(results) - else: - # Local mode - stdout, stderr, exit_code = await self._local_execute_shell( - cmd_str, input_spec) - - end_time = datetime.now() - - output = ExecutionOutput( - stdout=stdout, - stderr=stderr, - exit_code=exit_code, - output_files=self._collect_output_files(), - duration_ms=(end_time - start_time).total_seconds() * 1000) - - record.status = ( - ExecutionStatus.SUCCESS - if exit_code == 0 else ExecutionStatus.FAILED) - - except Exception as e: - output = ExecutionOutput(stderr=str(e), exit_code=-1) - record.status = ExecutionStatus.FAILED - record.error_message = str(e) - logger.error(f'Shell execution failed: {e}') - - record.end_time = datetime.now() - record.output_spec = output - self.spec.add_record(record) - return output - - async def execute_javascript(self, - script_path: Union[str, Path] = None, - code: str = None, - skill_id: str = 'unknown', - input_spec: ExecutionInput = None, - runtime: str = 'node') -> ExecutionOutput: - """ - Execute JavaScript code via Node.js. - - Uses sandbox mode or local mode based on use_sandbox setting. - - Args: - script_path: Path to JavaScript file. - code: Inline JavaScript code (if no script_path). - skill_id: Identifier of the skill being executed. - input_spec: Input specification. - runtime: JavaScript runtime ('node' or 'deno'). - - Returns: - ExecutionOutput with results. - """ - input_spec = input_spec or ExecutionInput() - - record = self._create_record( - skill_id=skill_id, - executor_type=ExecutorType.JAVASCRIPT, - input_spec=input_spec, - script_path=str(script_path) if script_path else '') - - record.start_time = datetime.now() - record.status = ExecutionStatus.RUNNING - - try: - # Get JavaScript code - if script_path: - with open(script_path, 'r', encoding='utf-8') as f: - js_code = f.read() - elif code: - js_code = code - else: - raise ValueError('Either script_path or code must be provided') - - # Security check (stricter for local mode) - is_safe, reason = self._security_check( - js_code, is_local=not self.use_sandbox) - if not is_safe: - record.status = ExecutionStatus.SECURITY_BLOCKED - record.error_message = reason - output = ExecutionOutput( - stderr=f'Security check failed: {reason}', exit_code=-1) - record.end_time = datetime.now() - record.output_spec = output - self.spec.add_record(record) - return output - - start_time = datetime.now() - - if self.use_sandbox: - # Sandbox mode: write JS file and execute - js_filename = f'script_{uuid.uuid4().hex[:8]}.js' - js_path = self.scripts_dir / js_filename - sandbox_js_path = f'{self.SANDBOX_WORK_DIR}/{js_filename}' - - # Inject environment into JS code - env_inject = self._generate_js_env_setup(input_spec, {}) - full_js_code = env_inject + '\n' + js_code - - with open(js_path, 'w', encoding='utf-8') as f: - f.write(full_js_code) - - # Build shell command to run JS - args_str = ' '.join(f'"{arg}"' for arg in input_spec.args) - shell_cmd = f'{runtime} {sandbox_js_path} {args_str}' - - results = await self._execute_in_sandbox( - shell_command=shell_cmd) - stdout, stderr, exit_code = self._parse_sandbox_result(results) - else: - # Local mode - stdout, stderr, exit_code = await self._local_execute_javascript( - js_code, input_spec) - - end_time = datetime.now() - - output = ExecutionOutput( - stdout=stdout, - stderr=stderr, - exit_code=exit_code, - output_files=self._collect_output_files(), - duration_ms=(end_time - start_time).total_seconds() * 1000) - - record.status = ( - ExecutionStatus.SUCCESS - if exit_code == 0 else ExecutionStatus.FAILED) - - except Exception as e: - output = ExecutionOutput(stderr=str(e), exit_code=-1) - record.status = ExecutionStatus.FAILED - record.error_message = str(e) - logger.error(f'JavaScript execution failed: {e}') - - record.end_time = datetime.now() - record.output_spec = output - self.spec.add_record(record) - return output - - def _generate_js_env_setup(self, input_spec: ExecutionInput, - sandbox_files: Dict[str, str]) -> str: - """Generate JavaScript code to setup environment.""" - lines = [ - '// Environment setup', - f"process.env.SKILL_OUTPUT_DIR = '{self.SANDBOX_OUTPUT_DIR}';", - ] - - for key, value in input_spec.env_vars.items(): - safe_value = value.replace("'", "\\'") - lines.append(f"process.env.{key} = '{safe_value}';") - - lines.append('') - return '\n'.join(lines) - - async def execute(self, - executor_type: ExecutorType, - skill_id: str = 'unknown', - script_path: Union[str, Path] = None, - func: Callable = None, - command: Union[str, List[str]] = None, - code: str = None, - input_spec: ExecutionInput = None, - **kwargs) -> ExecutionOutput: - """ - Unified async execution interface. - - Args: - executor_type: Type of executor to use. - skill_id: Identifier of the skill. - script_path: Path to script file (for PYTHON_SCRIPT, JAVASCRIPT). - func: Callable function (for PYTHON_FUNCTION). - command: Shell command (for SHELL). - code: Inline code (for PYTHON_CODE, JAVASCRIPT). - input_spec: Input specification. - **kwargs: Additional executor-specific arguments. - - Returns: - ExecutionOutput with results. - """ - if executor_type == ExecutorType.PYTHON_SCRIPT: - return await self.execute_python_script( - script_path=script_path, - skill_id=skill_id, - input_spec=input_spec) - elif executor_type == ExecutorType.PYTHON_CODE: - return await self.execute_python_code( - code=code, skill_id=skill_id, input_spec=input_spec) - elif executor_type == ExecutorType.PYTHON_FUNCTION: - return self.execute_python_function( - func=func, skill_id=skill_id, input_spec=input_spec) - elif executor_type == ExecutorType.SHELL: - return await self.execute_shell( - command=command, skill_id=skill_id, input_spec=input_spec) - elif executor_type == ExecutorType.JAVASCRIPT: - return await self.execute_javascript( - script_path=script_path, - code=code, - skill_id=skill_id, - input_spec=input_spec, - **kwargs) - else: - raise ValueError(f'Unsupported executor type: {executor_type}') - - def execute_sync(self, - executor_type: ExecutorType, - skill_id: str = 'unknown', - **kwargs) -> ExecutionOutput: - """Synchronous wrapper for execute().""" - return asyncio.run(self.execute(executor_type, skill_id, **kwargs)) - - def link_skills(self, - upstream_skill_id: str, - downstream_input_key: str, - output_key: str = None) -> Optional[Any]: - """ - Link output from upstream skill to downstream skill input. - - Args: - upstream_skill_id: ID of the upstream skill. - downstream_input_key: Key to use in downstream input. - output_key: Specific output key to link (e.g., 'return_value', 'stdout'). - - Returns: - The linked value, or None if not found. - """ - if upstream_skill_id in self.spec.upstream_outputs: - output = self.spec.upstream_outputs[upstream_skill_id] - if output_key: - return getattr(output, output_key, None) - return output.return_value or output.stdout - return None - - def get_spec_log(self) -> str: - """Get the execution spec as markdown string.""" - return self.spec.to_markdown() - - def save_spec_log(self, output_path: Union[str, Path] = None): - """Save the execution spec to a markdown file in logs directory.""" - if output_path is None: - output_path = self.logs_dir / 'execution_spec.md' - self.spec.save(output_path) - logger.info(f'Saved execution spec to: {output_path}') - - def cleanup(self, keep_spec: bool = True): - """ - Clean up workspace directory. - - Args: - keep_spec: If True, saves spec before cleanup. - """ - if keep_spec: - self.save_spec_log() - if self.workspace_dir.exists(): - shutil.rmtree(self.workspace_dir) - logger.info(f'Cleaned up workspace: {self.workspace_dir}') diff --git a/ms_agent/skill/loader.py b/ms_agent/skill/loader.py index 1f5dca2a7..e2664c3d8 100644 --- a/ms_agent/skill/loader.py +++ b/ms_agent/skill/loader.py @@ -41,12 +41,7 @@ def load_skills( logger.warning('No skills provided to load.') return all_skills - def is_skill_id(s: str) -> bool: - return '/' in s and len(s.split('/')) == 2 and all( - s.split('/')) and not os.path.exists(s) - if isinstance(skills, str): - # Could be a single skill path, root path of skills, or skill ID on ModelScope hub skill_list = [skills] elif all(isinstance(s, str) for s in skills) or all( isinstance(s, SkillSchema) for s in skills): @@ -55,12 +50,6 @@ def is_skill_id(s: str) -> bool: raise ValueError('Invalid skills input type.') for skill in skill_list: - - if is_skill_id(skill): - from modelscope import snapshot_download - skill_path: str = snapshot_download(repo_id=skill) - skill = skill_path - if isinstance(skill, SkillSchema): skill_key = self._get_skill_key(skill=skill) all_skills[skill_key] = skill diff --git a/ms_agent/skill/prompt_injector.py b/ms_agent/skill/prompt_injector.py new file mode 100644 index 000000000..f916e307a --- /dev/null +++ b/ms_agent/skill/prompt_injector.py @@ -0,0 +1,56 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +import re + + +class SkillPromptInjector: + """Builds the skill section to inject into the system prompt.""" + + SKILL_SECTION_HEADER = """# Available Skills + +You have access to specialized skills that extend your capabilities. +Each skill is a set of instructions and resources for handling specific tasks. + +**How to use skills:** +1. Review the skill summaries below to find relevant skills. +2. Call `skill_view(skill_id)` to read the full instructions of a skill. +3. Follow the skill's instructions using your available tools (code execution, file operations, web search, etc.). +4. Do NOT call `skill_view` unless you actually need the skill's guidance. +""" + + ALWAYS_SKILLS_HEADER = ( + "# Active Skills\n\n" + "The following skills are always active. Follow their instructions.\n") + + def __init__(self, catalog): + self._catalog = catalog + + def build_skill_prompt_section(self) -> str: + """Build the skill section for system prompt injection. + + Returns empty string when no skills are available. + """ + parts = [] + + # Part 1: always-active skills (full body injection) + always_skills = self._catalog.get_always_skills() + if always_skills: + parts.append(self.ALWAYS_SKILLS_HEADER) + for sid, skill in always_skills.items(): + content = self._strip_frontmatter(skill.content) + parts.append(f"## {skill.name}\n\n{content}\n") + + # Part 2: summary index of all enabled skills + summary = self._catalog.get_skills_summary() + if summary: + parts.append(self.SKILL_SECTION_HEADER) + parts.append(summary) + parts.append("") + + return "\n".join(parts) + + @staticmethod + def _strip_frontmatter(content: str) -> str: + """Remove YAML frontmatter from markdown content.""" + return re.sub( + r'^---\s*\n.*?\n---\s*\n', '', content, + flags=re.DOTALL).strip() diff --git a/ms_agent/skill/prompts.py b/ms_agent/skill/prompts.py deleted file mode 100644 index a91746f97..000000000 --- a/ms_agent/skill/prompts.py +++ /dev/null @@ -1,439 +0,0 @@ -# flake8: noqa -# yapf: disable - -DEFAULT_PLAN = """ - -""" - -DEFAULT_TASKS = """ - -""" - -DEFAULT_IMPLEMENTATION = """ - -""" - - -PROMPT_SKILL_PLAN = """ -According to the user's request:\n {query}\n, -analyze the following skill content and breakdown the necessary steps to complete the task step by step, considering any dependencies or prerequisites that may be required. -According to following sections: `SKILL_MD_CONTEXT`, `REFERENCE_CONTEXT`, `SCRIPT_CONTEXT` and `RESOURCE_CONTEXT`, you **MUST** identify the most relevant **FILES** (if any) and outline a detailed plan to accomplish the user's request. -{skill_md_context} {reference_context} {script_context} {resource_context} -\n\nThe format of your response:\n - -... The user's original query ... - - - - -... The concise and clear step-by-step plan to accomplish the user's request ... - - - - -... The most relevant SCRIPTS (if any) in JSON format ... - - - - -... The most relevant REFERENCES (if any) in JSON format ... - - - - -... The most relevant RESOURCES (if any) in JSON format ... - - -""" - - -PROMPT_SKILL_TASKS = """ -According to `SKILL PLAN CONTEXT`:\n\n{skill_plan_context}\n\n -Provide a concise and precise TODO-LIST of implementations required to execute the plan, **MUST** be as concise as possible. -Each task should be specific, actionable, and clearly defined to ensure successful completion of the overall plan. -The format of your response: \n - -... The user's original query ... - - - - -... A concise and clear TODO-LIST of implementations required to execute the plan ... - - -""" - - -SCRIPTS_IMPLEMENTATION_FORMAT = """[ - { - "script": "", - "parameters": { - "param1": "value1", - "param2": "value2" - } - }, - { - "script": "", - "parameters": { - "param1": "value1", - "param2": "value2" - } - } -]""" - -PROMPT_TASKS_IMPLEMENTATION = """ -According to relevant content of `SCRIPTS`, `REFERENCES` and `RESOURCES`:\n\n{script_contents}\n\n{reference_contents}\n\n{resource_contents}\n\n - -You **MUST** strictly implement the todo-list in `SKILL_TASKS_CONTEXT` step by step:\n\n{skill_tasks_context}\n\n - -There are 3 scenarios for response, your response **MUST** strictly follow one of the above scenarios, **MUST** be as concise as possible: - -Scenario-1: Execute Script(s) with Parameters, especially for python scripts, in the format of: - -{scripts_implementation_format} - - -Scenario-2: No Script Execution Needed, like JavaScript、HTML code generation, please output the final answer directly, in the format of: - -```html -``` -... -or -```javascript -``` - - -Scenario-3: Unable to Execute Any Script, Provide Reason, in the format of: - -... The reason why unable to execute any script ... - - -""" - - -PROMPT_SKILL_FINAL_SUMMARY = """ -Given the comprehensive context:\n\n{comprehensive_context}\n\n -Provide a concise summary of the entire process, highlighting key actions taken, decisions made, and the final outcome achieved. -Ensure the summary is clear and informative. -""" - - -# ============================================================ -# AutoSkills Prompts - for automatic skill retrieval and DAG -# ============================================================ - -PROMPT_ANALYZE_QUERY_FOR_SKILLS = """You are a skill analyzer. Given a user query, identify what types of skills/capabilities are needed, or just chatting is sufficient. - -User Query: {query} - -Available Skills Overview: -{skills_overview} - -Analyze the query and determine: -1. Whether this query requires specific skills/capabilities to fulfill -2. If skills are needed, what capabilities/functions are directly required -3. What prerequisites or dependencies might be required - -Output in JSON format: -{{ - "needs_skills": true/false, - "intent_summary": "Brief description of user intent", - "skill_queries": ["query1", "query2", ...], - "chat_response": "Direct response if no skills needed, null otherwise", - "reasoning": "Brief explanation" -}} - -Notes: -- Set `needs_skills` to false if the query is casual chat, greeting, or can be answered directly without special skills. -- If `needs_skills` is false, provide the `chat_response` with a helpful direct answer. -- If `needs_skills` is true, `skill_queries` should contain search queries for finding relevant skills. -""" - -PROMPT_FILTER_SKILLS_FAST = """Quickly filter candidate skills based on their name and description. - -User Query: {query} - -Candidate Skills: -{candidate_skills} - -For each skill, determine if it's POTENTIALLY relevant to the user's query based on: -1. Does the skill name suggest it can help with the task? -2. Does the skill description indicate capabilities matching the user's needs? - -Output in JSON format: -{{ - "filtered_skill_ids": ["skill_id_1", "skill_id_2", ...], - "reasoning": "Brief explanation of filtering" -}} - -Notes: -- Only include skills that are POTENTIALLY useful for the task. -- This is a quick filter - when in doubt, INCLUDE the skill for further analysis. -- Focus on the main task output format/type matching (e.g., PDF generation needs PDF skill). -""" - -PROMPT_FILTER_SKILLS_DEEP = """Analyze and filter candidate skills based on their full capabilities. - -User Query: {query} - -Candidate Skills (with detailed content): -{candidate_skills} - -For each skill, evaluate: -1. **Capability Match**: Can this skill actually PRODUCE the required output? -2. **Task Completeness**: Can this skill independently complete the task, or does it need other skills? -3. **Redundancy**: Are there overlapping skills that do the same thing? - -Output in JSON format: -{{ - "filtered_skill_ids": ["skill_id_1", "skill_id_2", ...], - "skill_analysis": {{ - "skill_id_1": {{ - "can_execute": true/false, - "reason": "Why this skill can/cannot execute the task" - }}, - ... - }}, - "reasoning": "Overall filtering explanation" -}} - -**CRITICAL**: -- Only include skills that can ACTUALLY execute and produce the required output. -- Remove redundant skills - keep only the most suitable one for each capability. -- The task specified by the user may require the collaboration of multiple skills to be successfully completed. -""" - -PROMPT_BUILD_SKILLS_DAG = """Filter candidate skills and build execution DAG. - -User Query: {query} - -Candidate Skills (USE THESE EXACT IDs in your response): -{selected_skills} - -**Tasks:** -1. **Filter**: Keep only skills that can ACTUALLY produce required output. Remove redundant/unnecessary skills. -2. **Build DAG**: Define dependencies and execution order using the EXACT skill IDs from above (e.g., `pdf@latest`, `pptx@latest`). - -**Output JSON:** -{{ - "filtered_skill_ids": ["exact_skill_id_from_list", ...], - "dag": {{ - "exact_skill_id_1": ["depends_on_skill_id"], - "exact_skill_id_2": [] - }}, - "execution_order": ["first_skill_id", "second_skill_id", ...], - "reasoning": "Brief explanation" -}} - -**CRITICAL RULES:** -- **ONLY use exact skill IDs from the Candidate Skills list** (e.g., `pdf@latest`, `pptx@latest`, NOT invented names like `create_pdf` or `generate_report`) -- Minimal sufficiency: smallest skill set that fully satisfies the query -- Deduplicate: keep only the most effective skill when overlapping -- `execution_order` MUST contain ALL skills from `filtered_skill_ids`, ordered by dependencies (parallel execution as nested lists) -- In `dag`, each skill maps to its dependencies (skills it depends on), empty list `[]` means no dependencies -""" - -PROMPT_DIRECT_SELECT_SKILLS = """You are a skill selector. Given a user query and all available skills, select the relevant skills and build an execution DAG. - -User Query: {query} - -All Available Skills (USE THESE EXACT IDs): -{all_skills} - -Tasks: -1. Determine if this query needs skills or is just casual chat -2. If skills are needed, select relevant skills using their EXACT IDs from the list above -3. Build a dependency DAG for the selected skills - -Output in JSON format: -{{ - "needs_skills": true/false, - "chat_response": "Direct response if no skills needed, null otherwise", - "selected_skill_ids": ["exact_skill_id_from_list", ...], - "dag": {{ - "exact_skill_id_1": ["depends_on_skill_id"], - "exact_skill_id_2": [], - ... - }}, - "execution_order": ["first_skill_id", "second_skill_id", ...], - "reasoning": "Brief explanation of skill selection and dependencies" -}} - -**CRITICAL:** -- **ONLY use exact skill IDs from the Available Skills list** (e.g., `pdf@latest`, `pptx@latest`, NOT invented names) -- Set `needs_skills` to false if the query is casual chat or can be answered directly -- `execution_order` MUST contain ALL skills from `selected_skill_ids`, ordered by dependencies -- In `dag`, each skill maps to its dependencies (skills it depends on), empty list `[]` means no dependencies -""" - -# ============================================================ -# Progressive Skill Analysis Prompts -# ============================================================ - -PROMPT_SKILL_ANALYSIS_PLAN = """You are analyzing a skill to create an execution plan. - -**IMPORTANT CONTEXT**: -This skill may be ONE OF SEVERAL skills in a execution chain. It does NOT need to fulfill -the ENTIRE user query - it only needs to handle its specific sub-task/capability. - -For example: -- If query is "Generate a PDF report with charts", a PDF skill only needs to create PDFs -- If query is "Analyze data and visualize results", a chart skill only needs visualization -- Each skill contributes its specialized capability to the overall task - -User Query: {query} - -Skill Information: -- Skill ID: {skill_id} -- Name: {skill_name} -- Description: {skill_description} - -Skill Content (SKILL.md): -{skill_content} - -Available Resources Overview: -- Scripts: {scripts_list} -- References: {references_list} -- Resources: {resources_list} - -Tasks: -1. Understand what this specific skill can do based on its description and content -2. Determine if this skill can contribute to the user's query (even partially) -3. Create a step-by-step execution plan for this skill's specific capability -4. Identify which scripts, references, and resources are needed - -Output in JSON format: -{{ - "can_handle": true/false, - "contribution": "What specific part of the query this skill handles", - "plan_summary": "Brief summary of the execution plan", - "steps": [ - {{"step": 1, "action": "description", "type": "script|reference|resource|code"}}, - ... - ], - "required_scripts": ["script_name1", "script_name2", ...], - "required_references": ["ref_name1", ...], - "required_resources": ["resource_name1", ...], - "required_packages": ["python_package1", "python_package2", ...], - "parameters": {{"param1": "value or ", ...}}, - "reasoning": "Why this plan will work" -}} - -**CRITICAL - When to set can_handle**: -- Set `can_handle: true` if this skill can CONTRIBUTE to the query, even if it only handles a sub-task -- Set `can_handle: true` if the skill's core capability is RELEVANT to any part of the query -- Set `can_handle: false` ONLY if the skill has ZERO relevance to the query -- DO NOT reject a skill just because it can't fulfill the ENTIRE query - -Notes: -- Only include resources that are actually needed for execution. -- Steps should be actionable and specific. -- Parameters should include any values extracted from the query. -- Extract Python package dependencies from skill content (e.g., reportlab, pandas, numpy). -""" - -PROMPT_SKILL_EXECUTION_COMMAND = """Based on the execution plan and loaded resources, generate the execution command(s). - -User Query: {query} -Skill ID: {skill_id} - -Execution Plan: -{execution_plan} - -Loaded Scripts: -{scripts_content} - -Loaded References: -{references_content} - -Loaded Resources: -{resources_content} - -**IMPORTANT Environment Variables:** -- `SKILL_OUTPUT_DIR`: Directory where ALL output files MUST be saved (e.g., PDFs, images, data files) -- `SKILL_DIR`: The skill's directory (for accessing resources like fonts, templates) -- `SKILL_LOGS_DIR`: Directory for logs and intermediate files - -Generate the specific execution command(s) needed. - -Output in JSON format: -{{ - "execution_type": "script|code|shell", - "commands": [ - {{ - "type": "python_script|python_code|shell|javascript", - "path": "script_path (if applicable)", - "code": "inline code (if applicable)", - "parameters": {{"param1": "value", ...}}, - "working_dir": "working directory (optional)", - "requirements": ["package1", "package2", ...] - }}, - ... - ], - "expected_output": "Description of expected output" -}} - -**CRITICAL OUTPUT RULE:** -- ALL generated files (PDFs, images, reports, etc.) MUST be saved to `os.environ['SKILL_OUTPUT_DIR']` -- Use `os.path.join(os.environ['SKILL_OUTPUT_DIR'], 'filename.pdf')` for output paths -- NEVER save output files to the current working directory or skill directory -- The skill directory should be READ-ONLY for resources, not for output -""" - -PROMPT_ANALYZE_EXECUTION_ERROR = """You are analyzing a failed code execution to diagnose and fix the error. - -**User Query**: {query} - -**Skill ID**: {skill_id} -**Skill Name**: {skill_name} - -**Failed Code**: -```python -{failed_code} -``` - -**Error Message (stderr)**: -``` -{stderr} -``` - -**stdout (if any)**: -``` -{stdout} -``` - -**Attempt**: {attempt}/{max_attempts} - -**Available Environment Variables**: -- SKILL_OUTPUT_DIR: Directory for output files -- SKILL_DIR: Skill's directory for resources (fonts, templates, etc.) -- SKILL_LOGS_DIR: Directory for logs - -**Helper Functions Available**: -- get_output_path(filename): Returns full path for output file - -Analyze the error and provide a fix: - -1. Identify the root cause of the error -2. Determine if it's fixable through code modification -3. Generate corrected code that addresses the issue - -Output in JSON format: -{{ - "error_analysis": {{ - "error_type": "ModuleNotFoundError|FileNotFoundError|SyntaxError|RuntimeError|etc", - "root_cause": "Brief description of what caused the error", - "is_fixable": true/false, - "fix_strategy": "Description of how to fix" - }}, - "fixed_code": "Complete fixed Python code (or null if unfixable)", - "additional_requirements": ["package1", "package2"], - "explanation": "What was changed and why" -}} - -**IMPORTANT**: -- Provide COMPLETE fixed code, not just the changed parts -- Ensure output paths use get_output_path() or os.environ['SKILL_OUTPUT_DIR'] -- If the error is about missing packages, add them to additional_requirements -- If the error cannot be fixed (e.g., requires user input), set is_fixable to false -""" diff --git a/ms_agent/skill/schema.py b/ms_agent/skill/schema.py index 722e0acc4..1f7f09f44 100644 --- a/ms_agent/skill/schema.py +++ b/ms_agent/skill/schema.py @@ -8,13 +8,11 @@ import re from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional import yaml from ms_agent.utils.logger import logger -from .spec import Spec - SUPPORTED_SCRIPT_EXT = ('.py', '.sh', '.js') SUPPORTED_READ_EXT = ('.md', '.txt', '.py', '.json', '.yaml', '.yml', '.sh', '.js', '.html', '.xml') @@ -354,276 +352,3 @@ def validate_skill_schema(schema: SkillSchema) -> List[str]: return errors -@dataclass -class SkillExecutionPlan: - """ - Execution plan generated from progressive skill analysis. - - Attributes: - can_handle: Whether the skill can handle the user query. - plan_summary: Brief summary of the execution plan. - steps: List of execution steps. - required_scripts: Script names needed for execution. - required_references: Reference names needed. - required_resources: Resource names needed. - required_packages: Python packages needed for execution. - parameters: Parameters extracted from user query. - reasoning: Explanation of the plan. - """ - can_handle: bool = False - plan_summary: str = '' - steps: List[Dict[str, Any]] = field(default_factory=list) - required_scripts: List[str] = field(default_factory=list) - required_references: List[str] = field(default_factory=list) - required_resources: List[str] = field(default_factory=list) - required_packages: List[str] = field(default_factory=list) - parameters: Dict[str, Any] = field(default_factory=dict) - reasoning: str = '' - - -@dataclass -class SkillContext: - """ - Context information for executing a Skill. - - Supports progressive/lazy loading - resources are only loaded when needed. - """ - - # The target skill - skill: SkillSchema - - # User query that triggered this skill - query: str = '' - - # The working directory (absolute path to skills folder's parent directory) - root_path: Path = field( - default_factory=lambda: Path.cwd().parent.resolve()) - - # Execution plan from progressive analysis - plan: Optional[SkillExecutionPlan] = None - - # Loaded scripts (lazy loaded based on plan) - scripts: List[Dict[str, Any]] = field(default_factory=list) - - # Loaded references (lazy loaded based on plan) - references: List[Dict[str, Any]] = field(default_factory=list) - - # Loaded resources (lazy loaded based on plan) - resources: List[Dict[str, Any]] = field(default_factory=list) - - # The SPEC context for execution tracking - spec: Optional[Spec] = None - - # Whether resources have been loaded - _resources_loaded: bool = field(default=False, repr=False) - - @staticmethod - def _read_file_content(file_path: Union[str, Path]) -> str: - """ - Read the content of a file. - - Args: - file_path: Path to the file - - Returns: - Content of the file as a string - """ - file_path = Path(file_path) - - if not file_path.exists() or not file_path.is_file(): - return '' - - ext = file_path.suffix.lower() - if ext in SUPPORTED_READ_EXT: - try: - with open(file_path, 'r', encoding='utf-8') as f: - return f.read() - except Exception as e: - logger.error(f'Failed to read file {file_path}: {e}') - return '' - - return '' - - def __post_init__(self): - """Initialize SPEC context only, defer resource loading.""" - if self.spec is None: - self.spec = Spec(plan='', tasks='') - - @property - def skill_dir(self) -> Path: - """Get the skill's directory path.""" - return self.skill.skill_path - - def get_scripts_list(self) -> List[str]: - """Get list of available script names without loading content.""" - return [s.name for s in self.skill.scripts] - - def get_references_list(self) -> List[str]: - """Get list of available reference names without loading content.""" - return [r.name for r in self.skill.references] - - def get_resources_list(self) -> List[str]: - """Get list of available resource names without loading content.""" - return [ - r.name for r in self.skill.resources - if r.name not in ['SKILL.md', 'LICENSE.txt'] - ] - - def _get_resource_path(self, file_path: Path) -> str: - """ - Get path string for a resource file. - - Tries relative path first, falls back to absolute path. - - Args: - file_path: Path to the resource file. - - Returns: - Path string (relative if possible, absolute otherwise). - """ - resolved_path = file_path.resolve() - try: - return str(resolved_path.relative_to(self.root_path.resolve())) - except ValueError: - # Path is not under root_path, use absolute path - return str(resolved_path) - - def load_scripts(self, names: List[str] = None) -> List[Dict[str, Any]]: - """ - Load specific scripts by name, or all if names is None. - - Args: - names: List of script names to load, or None for all. - - Returns: - List of loaded script dictionaries with content. - """ - target_scripts = self.skill.scripts - if names: - target_scripts = [s for s in self.skill.scripts if s.name in names] - - loaded = [] - for script in target_scripts: - abs_path = script.path.resolve() - loaded.append({ - 'name': script.name, - 'file': script.to_dict(), - 'path': self._get_resource_path(script.path), - 'abs_path': str(abs_path), - 'content': self._read_file_content(abs_path), - }) - self.scripts.extend(loaded) - return loaded - - def load_references(self, names: List[str] = None) -> List[Dict[str, Any]]: - """ - Load specific references by name, or all if names is None. - - Args: - names: List of reference names to load, or None for all. - - Returns: - List of loaded reference dictionaries with content. - """ - target_refs = self.skill.references - if names: - target_refs = [r for r in self.skill.references if r.name in names] - - loaded = [] - for ref in target_refs: - abs_path = ref.path.resolve() - loaded.append({ - 'name': ref.name, - 'file': ref.to_dict(), - 'path': self._get_resource_path(ref.path), - 'abs_path': str(abs_path), - 'content': self._read_file_content(abs_path), - }) - self.references.extend(loaded) - return loaded - - def load_resources(self, names: List[str] = None) -> List[Dict[str, Any]]: - """ - Load specific resources by name, or all if names is None. - - Args: - names: List of resource names to load, or None for all. - - Returns: - List of loaded resource dictionaries with content. - """ - target_res = [ - r for r in self.skill.resources - if r.name not in ['SKILL.md', 'LICENSE.txt'] - ] - if names: - target_res = [r for r in target_res if r.name in names] - - loaded = [] - for res in target_res: - abs_path = res.path.resolve() - loaded.append({ - 'name': res.name, - 'file': res.to_dict(), - 'path': self._get_resource_path(res.path), - 'abs_path': str(abs_path), - 'content': self._read_file_content(abs_path), - }) - self.resources.extend(loaded) - return loaded - - def load_from_plan(self) -> None: - """ - Load resources based on the execution plan. - - Loads only the scripts, references, and resources specified in the plan. - """ - if self._resources_loaded or not self.plan: - return - - if self.plan.required_scripts: - self.load_scripts(self.plan.required_scripts) - - if self.plan.required_references: - self.load_references(self.plan.required_references) - - if self.plan.required_resources: - self.load_resources(self.plan.required_resources) - - self._resources_loaded = True - - def load_all(self) -> None: - """Load all available resources (scripts, references, resources).""" - if self._resources_loaded: - return - self.load_scripts() - self.load_references() - self.load_resources() - self._resources_loaded = True - - def get_loaded_scripts_content(self) -> str: - """Get formatted content of all loaded scripts.""" - if not self.scripts: - return 'No scripts loaded.' - parts = [] - for s in self.scripts: - parts.append(f"\n{s['content']}") - return '\n\n'.join(parts) - - def get_loaded_references_content(self) -> str: - """Get formatted content of all loaded references.""" - if not self.references: - return 'No references loaded.' - parts = [] - for r in self.references: - parts.append(f"\n{r['content']}") - return '\n\n'.join(parts) - - def get_loaded_resources_content(self) -> str: - """Get formatted content of all loaded resources.""" - if not self.resources: - return 'No resources loaded.' - parts = [] - for r in self.resources: - parts.append(f"\n{r['content']}") - return '\n\n'.join(parts) diff --git a/ms_agent/skill/skill_tools.py b/ms_agent/skill/skill_tools.py new file mode 100644 index 000000000..467f7f364 --- /dev/null +++ b/ms_agent/skill/skill_tools.py @@ -0,0 +1,345 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +import json +import os +import shutil +from pathlib import Path +from typing import Any, Dict, Optional + +from ms_agent.tools.base import ToolBase +from ms_agent.utils.logger import get_logger + +from .catalog import USER_SKILLS_DIR +from .schema import SkillSchemaParser + +logger = get_logger() + + +class SkillToolSet(ToolBase): + """Exposes skill discovery and management as standard tools + registered through ToolManager. + + Provided tools: + - skills_list: browse available skills + - skill_view: read full skill content or attached files + - skill_manage: create / edit / delete skills (optional) + """ + + TOOL_SERVER_NAME = "skills" + + def __init__(self, config, catalog, *, enable_manage: bool = False): + super().__init__(config) + self._catalog = catalog + self._enable_manage = enable_manage + + async def connect(self) -> None: + pass + + async def cleanup(self) -> None: + pass + + # ------------------------------------------------------------------ # + # Tool schema + # ------------------------------------------------------------------ # + + async def _get_tools_inner(self) -> Dict[str, Any]: + tools = [] + + tools.append({ + "tool_name": "skills_list", + "description": ( + "List all available skills with their names and descriptions. " + "Use this to discover what skills are available before viewing " + "their full content."), + "parameters": { + "type": "object", + "properties": { + "tag": { + "type": "string", + "description": + "Optional tag to filter skills by category", + } + }, + }, + }) + + tools.append({ + "tool_name": "skill_view", + "description": ( + "View the full content of a skill, including its instructions, " + "available scripts, references, and resources. " + "You can also view a specific file within the skill directory. " + "After reading a skill, follow its instructions using your " + "available tools."), + "parameters": { + "type": "object", + "properties": { + "skill_id": { + "type": "string", + "description": "The skill identifier", + }, + "file_path": { + "type": "string", + "description": ( + "Optional: relative path to a specific file " + "within the skill directory (e.g. " + "'scripts/search.py'). If omitted, returns " + "the main SKILL.md content."), + }, + }, + "required": ["skill_id"], + }, + }) + + if self._enable_manage: + tools.append({ + "tool_name": "skill_manage", + "description": ( + "Create, edit, or delete a skill. Use this to save " + "reusable procedures that you learn during " + "conversations."), + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["create", "edit", "delete"], + "description": "The action to perform", + }, + "skill_id": { + "type": "string", + "description": + "Skill identifier (hyphen-case)", + }, + "content": { + "type": "string", + "description": ( + "For create/edit: full SKILL.md content " + "including YAML frontmatter"), + }, + }, + "required": ["action", "skill_id"], + }, + }) + + return {self.TOOL_SERVER_NAME: tools} + + # ------------------------------------------------------------------ # + # Dispatch + # ------------------------------------------------------------------ # + + async def call_tool(self, server_name: str, *, tool_name: str, + tool_args: dict) -> str: + if tool_name == "skills_list": + return self._handle_skills_list(tool_args) + elif tool_name == "skill_view": + return self._handle_skill_view(tool_args) + elif tool_name == "skill_manage" and self._enable_manage: + return self._handle_skill_manage(tool_args) + raise ValueError(f"Unknown skill tool: {tool_name}") + + # ------------------------------------------------------------------ # + # skills_list + # ------------------------------------------------------------------ # + + def _handle_skills_list(self, args: dict) -> str: + tag_filter = args.get("tag") + skills = self._catalog.get_enabled_skills() + + if tag_filter: + skills = { + sid: s for sid, s in skills.items() + if tag_filter in (s.tags or []) + } + + if not skills: + return "No skills available." + + result = [] + for sid, skill in sorted(skills.items()): + entry = { + "skill_id": sid, + "name": skill.name, + "description": skill.description, + "version": skill.version, + "tags": skill.tags or [], + "has_scripts": len(skill.scripts) > 0, + "has_references": len(skill.references) > 0, + } + result.append(entry) + + return json.dumps( + {"skills": result, "total": len(result)}, + ensure_ascii=False, indent=2) + + # ------------------------------------------------------------------ # + # skill_view + # ------------------------------------------------------------------ # + + def _handle_skill_view(self, args: dict) -> str: + skill_id = args.get("skill_id", "") + file_path = args.get("file_path") + + skill = self._catalog.get_skill(skill_id) + if not skill: + return json.dumps({"error": f"Skill '{skill_id}' not found"}) + + if file_path: + return self._read_skill_file(skill, file_path) + + result: Dict[str, Any] = { + "skill_id": skill.skill_id, + "name": skill.name, + "description": skill.description, + "skill_dir": str(skill.skill_path), + "content": skill.content, + "linked_files": { + "scripts": [s.name for s in skill.scripts], + "references": [r.name for r in skill.references], + "resources": [ + r.name for r in skill.resources + if r.name not in ("SKILL.md", "LICENSE.txt") + ], + }, + } + + dep_status = self._check_requirements(skill) + if dep_status: + result["requirements_status"] = dep_status + + return json.dumps(result, ensure_ascii=False, indent=2) + + def _read_skill_file(self, skill, file_path: str) -> str: + """Read a file inside the skill directory with traversal protection.""" + target = (skill.skill_path / file_path).resolve() + skill_root = skill.skill_path.resolve() + + if not str(target).startswith(str(skill_root)): + return json.dumps({"error": "Path traversal not allowed"}) + + if not target.exists(): + return json.dumps({"error": f"File not found: {file_path}"}) + + try: + content = target.read_text(encoding="utf-8") + return json.dumps( + {"file_path": file_path, "content": content}, + ensure_ascii=False) + except Exception as e: + return json.dumps({"error": f"Failed to read file: {e}"}) + + def _check_requirements(self, skill) -> Optional[dict]: + frontmatter = SkillSchemaParser.parse_yaml_frontmatter(skill.content) + if not frontmatter: + return None + + requires = frontmatter.get("requires", {}) + if not requires: + return None + + status: Dict[str, Any] = {} + required_env = requires.get("env", []) + if required_env: + missing = [v for v in required_env if v not in os.environ] + if missing: + status["missing_env_vars"] = missing + + required_tools = requires.get("tools", []) + if required_tools: + status["required_tools"] = required_tools + + return status if status else None + + # ------------------------------------------------------------------ # + # skill_manage + # ------------------------------------------------------------------ # + + def _handle_skill_manage(self, args: dict) -> str: + action = args.get("action", "") + skill_id = args.get("skill_id", "") + + if action == "create": + return self._create_skill(skill_id, args.get("content", "")) + elif action == "edit": + return self._edit_skill(skill_id, args.get("content", "")) + elif action == "delete": + return self._delete_skill(skill_id) + return json.dumps({"error": f"Unknown action: {action}"}) + + def _create_skill(self, skill_id: str, content: str) -> str: + custom_dir = self._get_custom_skills_dir() + skill_dir = custom_dir / skill_id + + if skill_dir.exists(): + return json.dumps( + {"error": f"Skill '{skill_id}' already exists"}) + + frontmatter = SkillSchemaParser.parse_yaml_frontmatter(content) + if (not frontmatter or "name" not in frontmatter + or "description" not in frontmatter): + return json.dumps({ + "error": + "Invalid SKILL.md: must have YAML frontmatter " + "with 'name' and 'description'" + }) + + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text(content, encoding="utf-8") + + skill = self._catalog.add_skill(str(skill_dir)) + if skill: + return json.dumps({ + "success": True, + "skill_id": skill.skill_id, + "message": f"Skill '{skill.name}' created successfully", + }) + return json.dumps({"error": "Failed to load created skill"}) + + def _edit_skill(self, skill_id: str, content: str) -> str: + skill = self._catalog.get_skill(skill_id) + if not skill: + return json.dumps( + {"error": f"Skill '{skill_id}' not found"}) + + frontmatter = SkillSchemaParser.parse_yaml_frontmatter(content) + if (not frontmatter or "name" not in frontmatter + or "description" not in frontmatter): + return json.dumps({ + "error": + "Invalid content: must have YAML frontmatter " + "with 'name' and 'description'" + }) + + skill_md_path = skill.skill_path / "SKILL.md" + skill_md_path.write_text(content, encoding="utf-8") + + reloaded = self._catalog.reload_skill(skill_id) + if reloaded: + return json.dumps({ + "success": True, + "message": f"Skill '{skill_id}' updated successfully", + }) + return json.dumps({"error": "Failed to reload updated skill"}) + + def _delete_skill(self, skill_id: str) -> str: + skill = self._catalog.get_skill(skill_id) + if not skill: + return json.dumps( + {"error": f"Skill '{skill_id}' not found"}) + + custom_dir = self._get_custom_skills_dir().resolve() + if not str(skill.skill_path.resolve()).startswith(str(custom_dir)): + return json.dumps( + {"error": "Can only delete custom skills"}) + + shutil.rmtree(skill.skill_path) + self._catalog.remove_skill(skill_id) + + return json.dumps({ + "success": True, + "message": f"Skill '{skill_id}' deleted successfully", + }) + + def _get_custom_skills_dir(self) -> Path: + base = USER_SKILLS_DIR / "custom" + base.mkdir(parents=True, exist_ok=True) + return base diff --git a/ms_agent/skill/sources.py b/ms_agent/skill/sources.py new file mode 100644 index 000000000..23e9de67a --- /dev/null +++ b/ms_agent/skill/sources.py @@ -0,0 +1,94 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +import os +import re +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Optional + + +class SkillSourceType(Enum): + LOCAL_DIR = "local" + MODELSCOPE = "modelscope" + GIT = "git" + + +@dataclass +class SkillSource: + type: SkillSourceType + path: Optional[str] = None + repo_id: Optional[str] = None + url: Optional[str] = None + revision: Optional[str] = None + subdir: Optional[str] = None + enabled: bool = True + + +_MODELSCOPE_URI_RE = re.compile( + r'^modelscope://(?P[^@#]+)(?:@(?P[^#]+))?(?:#(?P.+))?$') + +_MODELSCOPE_SKILL_URL_RE = re.compile( + r'^https?://(?:www\.)?modelscope\.(?:cn|ai)/skills/' + r'(?P[^/]+/[^/]+)(?:/.*)?$') + +_OWNER_REPO_RE = re.compile(r'^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$') + +_AT_PREFIX_RE = re.compile( + r'^@(?P[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+)$') + + +def _looks_like_path(raw: str) -> bool: + """Return True when *raw* is clearly meant to be a local filesystem path + rather than a hub identifier (e.g. starts with ``/``, ``./``, ``~``, or + contains path separators that don't match the ``owner/repo`` pattern). + """ + return raw.startswith(('/', './', '../', '~')) + + +def parse_skill_source(raw: str) -> SkillSource: + """Parse a raw string into a SkillSource. + + Supported formats (checked in order): + - /abs/path or ./rel/path or ~/path -> LOCAL_DIR + - modelscope://owner/repo[@rev][#subdir] -> MODELSCOPE + - https://modelscope.cn/skills/owner/repo -> MODELSCOPE + - @owner/repo (CLI shorthand) -> MODELSCOPE + - https://... or git://... -> GIT + - owner/repo (when path does not exist) -> MODELSCOPE + - anything else -> LOCAL_DIR + """ + if _looks_like_path(raw): + resolved = str(Path(raw).expanduser().resolve()) + return SkillSource(type=SkillSourceType.LOCAL_DIR, path=resolved) + + m = _MODELSCOPE_URI_RE.match(raw) + if m: + return SkillSource( + type=SkillSourceType.MODELSCOPE, + repo_id=m.group('repo'), + revision=m.group('rev'), + subdir=m.group('sub'), + ) + + m = _MODELSCOPE_SKILL_URL_RE.match(raw) + if m: + return SkillSource( + type=SkillSourceType.MODELSCOPE, + repo_id=m.group('repo'), + ) + + m = _AT_PREFIX_RE.match(raw) + if m: + return SkillSource( + type=SkillSourceType.MODELSCOPE, + repo_id=m.group('repo'), + ) + + if raw.startswith(('https://', 'http://', 'git://')): + return SkillSource(type=SkillSourceType.GIT, url=raw) + + if _OWNER_REPO_RE.match(raw) and not os.path.exists(raw): + return SkillSource(type=SkillSourceType.MODELSCOPE, repo_id=raw) + + resolved = str(Path(raw).resolve()) if not os.path.isabs(raw) else raw + return SkillSource(type=SkillSourceType.LOCAL_DIR, path=resolved) diff --git a/ms_agent/skill/spec.py b/ms_agent/skill/spec.py deleted file mode 100644 index 3c666b8d4..000000000 --- a/ms_agent/skill/spec.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) ModelScope Contributors. All rights reserved. -import os -from dataclasses import dataclass - -from .prompts import DEFAULT_IMPLEMENTATION, DEFAULT_PLAN, DEFAULT_TASKS - - -@dataclass -class Spec: - """ - Specification for an AI agent's task planning and execution. - """ - - plan: str - - tasks: str - - implementation: str = '' - - def __post_init__(self): - - if not self.plan: - self.plan = DEFAULT_PLAN - - if not self.tasks: - self.tasks = DEFAULT_TASKS - - if not self.implementation: - self.implementation = DEFAULT_IMPLEMENTATION - - def dump(self, output_dir: str) -> str: - """ - Dump the spec to the specified output directory. - - Args: - output_dir (str): The directory to dump the spec files. - - Returns: - str: The path to the dumped spec directory. - """ - output_path: str = os.path.join(output_dir, '.spec') - os.makedirs(output_path, exist_ok=True) - - with open( - os.path.join(output_path, 'plan.md'), 'w', - encoding='utf-8') as f: - f.write(self.plan) - - with open( - os.path.join(output_path, 'tasks.md'), 'w', - encoding='utf-8') as f: - f.write(self.tasks) - - with open( - os.path.join(output_path, 'implementation.md'), - 'w', - encoding='utf-8') as f: - f.write(self.implementation) - - return output_path - - -if __name__ == '__main__': - spec = Spec(plan='', tasks='') - print('Plan:', spec.plan) - print('Tasks:', spec.tasks) - print('Implementation:', spec.implementation) diff --git a/requirements/framework.txt b/requirements/framework.txt index 7b7c84fc1..2796900a0 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -6,7 +6,7 @@ json5 markdown matplotlib mcp -modelscope +modelscope>=1.35.2 moviepy numpy omegaconf diff --git a/tests/skills/test_claude_skills.py b/tests/skills/test_claude_skills.py deleted file mode 100644 index 8008ecc57..000000000 --- a/tests/skills/test_claude_skills.py +++ /dev/null @@ -1,812 +0,0 @@ -""" -Unit tests for Claude Skills using AutoSkills. - -These tests cover the 16 skills in projects/agent_skills/skills/claude_skills: -1. algorithmic-art - Generative art with p5.js -2. brand-guidelines - Anthropic brand styling -3. canvas-design - Visual art in PNG/PDF -4. doc-coauthoring - Documentation workflow -5. docx - Word document operations -6. frontend-design - Frontend UI design -7. internal-comms - Internal communications -8. mcp-builder - MCP server creation -9. pdf - PDF manipulation -10. pptx - PowerPoint operations -11. skill-creator - Skill creation guide -12. slack-gif-creator - Slack GIF creation -13. theme-factory - Theme styling -14. web-artifacts-builder - React/HTML artifacts -15. webapp-testing - Playwright testing -16. xlsx - Excel/spreadsheet operations - -Usage: - # Run all tests - python -m unittest tests.skills.test_claude_skills -v - - # Run specific test class - python -m unittest tests.skills.test_claude_skills.TestClaudeSkillsRetrieval -v - - # Run specific test method - python -m unittest tests.skills.test_claude_skills.TestClaudeSkillsRetrieval.test_pdf_skill -v -""" -import asyncio -import os -import shutil -import tempfile -import unittest -from pathlib import Path - -from ms_agent.llm.openai_llm import OpenAI -from ms_agent.skill.auto_skills import AutoSkills -from omegaconf import DictConfig - - -#### Prerequisites #### -# - ALL ENVs: # LLM_MODEL, OPENAI_API_KEY, OPENAI_BASE_URL, SKILLS_PATH, WORK_DIR, IS_REMOVE_WORK_DIR, USE_SANDBOX -# - Get SKILLS_PATH: git clone https://github.com/anthropics/skills.git and set the path `skills/skills` directory. - - -IS_REMOVE_WORK_DIR: bool = os.getenv('IS_REMOVE_WORK_DIR', - 'true').lower() == 'true' - -USE_SANDBOX: bool = os.getenv('USE_SANDBOX', - 'false').lower() == 'true' - - -def get_llm_config() -> DictConfig: - """Get LLM configuration from environment variables.""" - return DictConfig({ - 'llm': { - 'service': - 'openai', - 'model': - os.getenv('LLM_MODEL', 'qwen3-max'), - 'openai_api_key': - os.getenv('OPENAI_API_KEY'), - 'openai_base_url': - os.getenv('OPENAI_BASE_URL', - 'https://dashscope.aliyuncs.com/compatible-mode/v1') - } - }) - - -def get_skills_path() -> str: - """Get the path to claude_skills directory.""" - skills_path = os.getenv('SKILLS_PATH') - if skills_path: - return skills_path - # Default path relative to project root - return str( - Path(__file__).parent.parent.parent / 'projects' / 'agent_skills' - / 'skills' / 'claude_skills') - - -def get_work_dir() -> str: - """Get work directory from env or create temp directory.""" - work_dir = os.getenv('WORK_DIR') - if work_dir: - os.makedirs(work_dir, exist_ok=True) - return work_dir - return tempfile.mkdtemp(prefix='ms_agent_test_') - - -def run_async(coro): - """Helper to run async coroutines in sync context.""" - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - return loop.run_until_complete(coro) - finally: - loop.close() - - -class TestClaudeSkillsRetrieval(unittest.TestCase): - """Test skill retrieval and DAG building for each skill category.""" - - def setUp(self): - """Setup test fixtures before each test.""" - self.config = get_llm_config() - self.skills_path = get_skills_path() - self.work_dir = get_work_dir() - - # Skip test if no API key - if not self.config.llm.openai_api_key: - self.skipTest('OPENAI_API_KEY not set') - - # Create AutoSkills instance for this test - self.auto_skills = AutoSkills( - skills=self.skills_path, - llm=OpenAI.from_config(self.config), - use_sandbox=USE_SANDBOX, - work_dir=self.work_dir, - ) - - def tearDown(self): - """Cleanup after each test.""" - # Clean up the temporary work directory (only if not from env) - if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists( - self.work_dir) and not os.getenv('WORK_DIR'): - try: - shutil.rmtree(self.work_dir) - except Exception as e: - print(f'Warning: Failed to clean up work_dir: {e}') - - # Clean up AutoSkills instance - if hasattr(self, 'auto_skills'): - self.auto_skills = None - - def _run_skill_retrieval_test(self, queries: list, skill_name: str): - """ - Helper method to run skill retrieval test. - - Args: - queries: List of user queries to test. - skill_name: Name of the skill being tested. - """ - for query in queries: - with self.subTest(query=query): - result = run_async(self.auto_skills.get_skill_dag(query)) - self.assertIsNotNone( - result, f'Result should not be None for: {query}') - - # Assert skills_dag and execution_order are not empty - self.assertTrue( - result.dag, - f'skills_dag should not be empty for: {query}') - self.assertTrue( - result.execution_order, - f'execution_order should not be empty for: {query}') - - if result.selected_skills: - skill_ids = list(result.selected_skills.keys()) - print(f'\n[{skill_name}] Query: {query}') - print(f'[{skill_name}] Retrieved skills: {skill_ids}') - print(f'[{skill_name}] Execution order: {result.execution_order}') - - def test_algorithmic_art_skill(self): - """ - Test algorithmic-art skill retrieval. - - Skill: Creates generative art using p5.js with seeded randomness. - Capabilities: Algorithmic philosophy creation, p5.js implementation, - flow fields, particle systems, interactive artifacts. - """ - queries = [ - 'Create a generative art piece with flowing particles that looks organic', - 'Make an algorithmic art using flow fields and Perlin noise', - 'I want to create interactive p5.js artwork with seeded randomness', - ] - self._run_skill_retrieval_test(queries, 'algorithmic-art') - - def test_brand_guidelines_skill(self): - """ - Test brand-guidelines skill retrieval. - - Skill: Applies Anthropic's brand colors and typography. - Capabilities: Brand color application, typography styling, - visual formatting, corporate identity. - """ - queries = [ - 'Apply Anthropic brand colors to my presentation', - 'Style this document with official brand guidelines', - 'Format this artifact using company design standards', - ] - self._run_skill_retrieval_test(queries, 'brand-guidelines') - - def test_canvas_design_skill(self): - """ - Test canvas-design skill retrieval. - - Skill: Creates visual art in PNG and PDF documents. - Capabilities: Design philosophy creation, poster design, - static visual art, composition, color theory. - """ - queries = [ - 'Create a beautiful minimalist poster design in PDF format', - 'Design an artistic visual piece using canvas with modern aesthetics', - 'Make a museum-quality art poster with geometric patterns', - ] - self._run_skill_retrieval_test(queries, 'canvas-design') - - def test_doc_coauthoring_skill(self): - """ - Test doc-coauthoring skill retrieval. - - Skill: Guides users through documentation co-authoring workflow. - Capabilities: Context gathering, section refinement, - reader testing, iterative document creation. - """ - queries = [ - 'Help me write a technical design document for a new API', - 'I need to create a product requirements document (PRD)', - 'Draft a decision doc for our architecture proposal', - ] - self._run_skill_retrieval_test(queries, 'doc-coauthoring') - - def test_docx_skill(self): - """ - Test docx skill retrieval. - - Skill: Comprehensive Word document creation, editing, and analysis. - Capabilities: Document creation, tracked changes, comments, - formatting preservation, text extraction. - """ - queries = [ - 'Create a professional Word document with headers and bullet points', - 'Edit this docx file and add tracked changes to section 3', - 'Extract text from this Word document and analyze its structure', - 'Add comments to this docx file for review', - ] - self._run_skill_retrieval_test(queries, 'docx') - - def test_frontend_design_skill(self): - """ - Test frontend-design skill retrieval. - - Skill: Creates distinctive, production-grade frontend interfaces. - Capabilities: Web components, landing pages, dashboards, - React components, HTML/CSS layouts, UI styling. - """ - queries = [ - 'Build a modern landing page with bold typography and animations', - 'Create a React dashboard component with distinctive styling', - 'Design a web interface that avoids generic AI aesthetics', - 'Make a beautiful HTML/CSS card component with hover effects', - ] - self._run_skill_retrieval_test(queries, 'frontend-design') - - def test_internal_comms_skill(self): - """ - Test internal-comms skill retrieval. - - Skill: Writes internal communications in company formats. - Capabilities: 3P updates (Progress/Plans/Problems), newsletters, - FAQs, status reports, incident reports. - """ - queries = [ - 'Write a 3P update for our weekly team meeting', - 'Draft a company newsletter about Q4 achievements', - 'Create FAQ responses for the new product launch', - 'Write an incident report for yesterday\'s outage', - ] - self._run_skill_retrieval_test(queries, 'internal-comms') - - def test_mcp_builder_skill(self): - """ - Test mcp-builder skill retrieval. - - Skill: Creates MCP servers for LLM-external service interaction. - Capabilities: MCP protocol implementation, tool design, - API integration, TypeScript/Python SDK usage. - """ - queries = [ - 'Build an MCP server to integrate with GitHub API', - 'Create an MCP tool that enables Claude to search databases', - 'Implement a Model Context Protocol server in TypeScript', - ] - self._run_skill_retrieval_test(queries, 'mcp-builder') - - def test_pdf_skill(self): - """ - Test pdf skill retrieval. - - Skill: Comprehensive PDF manipulation toolkit. - Capabilities: Text/table extraction, PDF creation, - merging/splitting, form filling, watermarks. - """ - queries = [ - 'Extract all tables from this PDF document', - 'Create a new PDF report with charts and formatted text', - 'Merge multiple PDF files into one document', - 'Fill out this PDF form with the provided data', - 'Split this large PDF into separate pages', - ] - self._run_skill_retrieval_test(queries, 'pdf') - - def test_pptx_skill(self): - """ - Test pptx skill retrieval. - - Skill: PowerPoint creation, editing, and analysis. - Capabilities: Presentation creation, template editing, - slide layouts, speaker notes, thumbnails. - """ - queries = [ - 'Create a PowerPoint presentation about machine learning', - 'Edit this pptx file to update the charts and styling', - 'Generate a slide deck using this template with new content', - 'Add speaker notes to all slides in this presentation', - ] - self._run_skill_retrieval_test(queries, 'pptx') - - def test_skill_creator_skill(self): - """ - Test skill-creator skill retrieval. - - Skill: Guide for creating effective skills. - Capabilities: Skill design, SKILL.md creation, - resource bundling, workflow definition. - """ - queries = [ - 'Create a new skill for image processing with Python', - 'Help me design a skill that extends Claude\'s capabilities', - 'Build a custom skill with scripts and reference documents', - ] - self._run_skill_retrieval_test(queries, 'skill-creator') - - def test_slack_gif_creator_skill(self): - """ - Test slack-gif-creator skill retrieval. - - Skill: Creates animated GIFs optimized for Slack. - Capabilities: GIF creation, animation (shake, pulse, bounce), - Slack emoji optimization, frame composition. - """ - queries = [ - 'Make a bouncing star GIF for Slack emoji', - 'Create an animated celebration GIF optimized for Slack', - 'Generate a pulsing heart animation for team chat', - ] - self._run_skill_retrieval_test(queries, 'slack-gif-creator') - - def test_theme_factory_skill(self): - """ - Test theme-factory skill retrieval. - - Skill: Styles artifacts with pre-set or custom themes. - Capabilities: Theme application, color palettes, - font pairings, visual consistency. - """ - queries = [ - 'Apply the Ocean Depths theme to my presentation', - 'Style this document with the Tech Innovation theme', - 'Create a custom theme with warm earth tones for my slides', - ] - self._run_skill_retrieval_test(queries, 'theme-factory') - - def test_web_artifacts_builder_skill(self): - """ - Test web-artifacts-builder skill retrieval. - - Skill: Builds elaborate HTML artifacts using React/Tailwind. - Capabilities: React components, shadcn/ui, Tailwind CSS, - single-file HTML bundling. - """ - queries = [ - 'Build a complex React dashboard with shadcn/ui components', - 'Create a multi-component HTML artifact with state management', - 'Develop an interactive web app with Tailwind CSS styling', - ] - self._run_skill_retrieval_test(queries, 'web-artifacts-builder') - - def test_webapp_testing_skill(self): - """ - Test webapp-testing skill retrieval. - - Skill: Tests local web applications using Playwright. - Capabilities: Browser automation, screenshot capture, - UI interaction, server lifecycle management. - """ - queries = [ - 'Test this web application using Playwright automation', - 'Capture screenshots of my local webapp running on port 3000', - 'Debug UI behavior by inspecting the rendered DOM', - 'Verify frontend functionality with automated browser tests', - ] - self._run_skill_retrieval_test(queries, 'webapp-testing') - - def test_xlsx_skill(self): - """ - Test xlsx skill retrieval. - - Skill: Comprehensive Excel/spreadsheet operations. - Capabilities: Spreadsheet creation, formulas, formatting, - data analysis, visualization, recalculation. - """ - queries = [ - 'Create an Excel financial model with formulas and formatting', - 'Analyze data in this spreadsheet and create summary charts', - 'Build a budget tracker spreadsheet with automatic calculations', - 'Modify this xlsx file to add new formulas and preserve formatting', - ] - self._run_skill_retrieval_test(queries, 'xlsx') - - -class TestSkillsCombination(unittest.TestCase): - """Test skill retrieval for queries requiring multiple skills.""" - - def setUp(self): - """Setup test fixtures before each test.""" - self.config = get_llm_config() - self.skills_path = get_skills_path() - self.work_dir = get_work_dir() - - if not self.config.llm.openai_api_key: - self.skipTest('OPENAI_API_KEY not set') - - self.auto_skills = AutoSkills( - skills=self.skills_path, - llm=OpenAI.from_config(self.config), - use_sandbox=USE_SANDBOX, - work_dir=self.work_dir, - ) - - def tearDown(self): - """Cleanup after each test.""" - if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists( - self.work_dir) and not os.getenv('WORK_DIR'): - try: - shutil.rmtree(self.work_dir) - except Exception as e: - print(f'Warning: Failed to clean up work_dir: {e}') - - if hasattr(self, 'auto_skills'): - self.auto_skills = None - - def _assert_dag_result(self, result, query: str): - """Assert common DAG result validations.""" - self.assertIsNotNone(result, f'Result should not be None for: {query}') - self.assertTrue( - result.dag, - f'skills_dag should not be empty for: {query}') - self.assertTrue( - result.execution_order, - f'execution_order should not be empty for: {query}') - - def test_document_with_theme(self): - """ - Test combining document creation with theme styling. - - Expected: pptx + theme-factory or docx + brand-guidelines - """ - query = 'Create a PowerPoint presentation about AI and apply Ocean Depths theme' - result = run_async(self.auto_skills.get_skill_dag(query)) - - self._assert_dag_result(result, query) - if result.selected_skills: - skill_ids = list(result.selected_skills.keys()) - print(f'\n[Combination] Query: {query}') - print(f'[Combination] Retrieved skills: {skill_ids}') - print(f'[Combination] Execution order: {result.execution_order}') - - def test_frontend_with_testing(self): - """ - Test combining frontend design with webapp testing. - - Expected: frontend-design + webapp-testing - """ - query = 'Build a React dashboard and test it with Playwright' - result = run_async(self.auto_skills.get_skill_dag(query)) - - self._assert_dag_result(result, query) - if result.selected_skills: - skill_ids = list(result.selected_skills.keys()) - print(f'\n[Combination] Query: {query}') - print(f'[Combination] Retrieved skills: {skill_ids}') - print(f'[Combination] Execution order: {result.execution_order}') - - def test_pdf_and_xlsx_data(self): - """ - Test combining PDF and Excel operations. - - Expected: pdf + xlsx for data extraction and reporting - """ - query = 'Extract data from PDF tables and create an Excel analysis report' - result = run_async(self.auto_skills.get_skill_dag(query)) - - self._assert_dag_result(result, query) - if result.selected_skills: - skill_ids = list(result.selected_skills.keys()) - print(f'\n[Combination] Query: {query}') - print(f'[Combination] Retrieved skills: {skill_ids}') - print(f'[Combination] Execution order: {result.execution_order}') - - def test_doc_with_brand_styling(self): - """ - Test combining document creation with brand guidelines. - - Expected: docx + brand-guidelines - """ - query = 'Create a Word document and apply Anthropic brand styling' - result = run_async(self.auto_skills.get_skill_dag(query)) - - self._assert_dag_result(result, query) - if result.selected_skills: - skill_ids = list(result.selected_skills.keys()) - print(f'\n[Combination] Query: {query}') - print(f'[Combination] Retrieved skills: {skill_ids}') - print(f'[Combination] Execution order: {result.execution_order}') - - -class TestSkillsExecution(unittest.TestCase): - """ - Test full skill execution pipeline. - - Note: These tests require actual LLM API access and may take longer. - """ - - def setUp(self): - """Setup test fixtures before each test.""" - self.config = get_llm_config() - self.skills_path = get_skills_path() - self.work_dir = get_work_dir() - - if not self.config.llm.openai_api_key: - self.skipTest('OPENAI_API_KEY not set') - - self.auto_skills = AutoSkills( - skills=self.skills_path, - llm=OpenAI.from_config(self.config), - use_sandbox=USE_SANDBOX, - work_dir=self.work_dir, - max_retries=3, - ) - - def tearDown(self): - """Cleanup after each test.""" - # Clean up any output files generated during execution - if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists( - self.work_dir) and not os.getenv('WORK_DIR'): - try: - shutil.rmtree(self.work_dir) - except Exception as e: - print(f'Warning: Failed to clean up work_dir: {e}') - - if IS_REMOVE_WORK_DIR and hasattr(self, 'auto_skills'): - # Clean up executor if exists - if hasattr(self.auto_skills, - '_executor') and self.auto_skills._executor: - try: - self.auto_skills.cleanup() - except Exception as e: - print(f'Warning: Failed to cleanup auto_skills: {e}') - self.auto_skills = None - - def test_execute_pdf_creation(self): - """ - Test full execution of PDF creation skill. - - This test verifies end-to-end skill execution. - """ - query = "Create a simple PDF report titled 'Test Report' with basic text content" - result = run_async(self.auto_skills.run(query)) - - self.assertIsNotNone(result, f'Result should not be None for: {query}') - print(f'\n[Execution] Query: {query}') - print(f'[Execution] Is complete: {result.is_complete}') - - # Assert execution_result even if None - if result.execution_result: - print(f'[Execution] Success: {result.execution_result.success}') - print( - f'[Execution] Skills executed: {list(result.execution_result.results.keys())}' - ) - self.assertTrue( - result.execution_result.success, - f'Execution should succeed for: {query}') - else: - self.fail(f'execution_result should not be None for: {query}') - - def test_execute_xlsx_creation(self): - """Test full execution of Excel creation skill.""" - query = 'Create an Excel spreadsheet with a simple budget table and SUM formula' - result = run_async(self.auto_skills.run(query)) - - self.assertIsNotNone(result, f'Result should not be None for: {query}') - print(f'\n[Execution] Query: {query}') - print(f'[Execution] Is complete: {result.is_complete}') - - if result.execution_result: - print(f'[Execution] Success: {result.execution_result.success}') - self.assertTrue( - result.execution_result.success, - f'Execution should succeed for: {query}') - else: - self.fail(f'execution_result should not be None for: {query}') - - def test_execute_slack_gif(self): - """Test full execution of Slack GIF creation skill.""" - query = 'Create a simple bouncing dot animation GIF for Slack emoji' - result = run_async(self.auto_skills.run(query)) - - self.assertIsNotNone(result, f'Result should not be None for: {query}') - print(f'\n[Execution] Query: {query}') - print(f'[Execution] Is complete: {result.is_complete}') - - if result.execution_result: - print(f'[Execution] Success: {result.execution_result.success}') - self.assertTrue( - result.execution_result.success, - f'Execution should succeed for: {query}') - else: - self.fail(f'execution_result should not be None for: {query}') - - -class TestChatOnlyQueries(unittest.TestCase): - """Test queries that should be handled as chat-only (no skill retrieval).""" - - def setUp(self): - """Setup test fixtures before each test.""" - self.config = get_llm_config() - self.skills_path = get_skills_path() - self.work_dir = get_work_dir() - - if not self.config.llm.openai_api_key: - self.skipTest('OPENAI_API_KEY not set') - - self.auto_skills = AutoSkills( - skills=self.skills_path, - llm=OpenAI.from_config(self.config), - use_sandbox=USE_SANDBOX, - work_dir=self.work_dir, - ) - - def tearDown(self): - """Cleanup after each test.""" - if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists( - self.work_dir) and not os.getenv('WORK_DIR'): - try: - shutil.rmtree(self.work_dir) - except Exception as e: - print(f'Warning: Failed to clean up work_dir: {e}') - - if hasattr(self, 'auto_skills'): - self.auto_skills = None - - def test_general_chat_queries(self): - """Test that general chat queries return chat-only response.""" - queries = [ - 'What is the capital of France?', - 'Tell me a joke about programming', - 'Explain what machine learning is', - ] - - for query in queries: - with self.subTest(query=query): - result = run_async(self.auto_skills.get_skill_dag(query)) - self.assertIsNotNone(result, f'Result should not be None for: {query}') - - print(f'\n[Chat] Query: {query}') - print( - f'[Chat] Chat response: {result.chat_response is not None}' - ) - print( - f'[Chat] Selected skills: {list(result.selected_skills.keys()) if result.selected_skills else "None"}' - ) - - # For chat-only queries, chat_response should be present - # OR it should have empty skills (no execution needed) - is_chat_only = (result.chat_response is not None or - not result.selected_skills) - self.assertTrue( - is_chat_only, - f'Query should be handled as chat-only: {query}') - - -class TestSkillDAGStructure(unittest.TestCase): - """Test the structure and validity of skill DAG results.""" - - def setUp(self): - """Setup test fixtures before each test.""" - self.config = get_llm_config() - self.skills_path = get_skills_path() - self.work_dir = get_work_dir() - - if not self.config.llm.openai_api_key: - self.skipTest('OPENAI_API_KEY not set') - - self.auto_skills = AutoSkills( - skills=self.skills_path, - llm=OpenAI.from_config(self.config), - use_sandbox=USE_SANDBOX, - work_dir=self.work_dir, - ) - - def tearDown(self): - """Cleanup after each test.""" - if IS_REMOVE_WORK_DIR and hasattr(self, 'work_dir') and os.path.exists( - self.work_dir) and not os.getenv('WORK_DIR'): - try: - shutil.rmtree(self.work_dir) - except Exception as e: - print(f'Warning: Failed to clean up work_dir: {e}') - - if hasattr(self, 'auto_skills'): - self.auto_skills = None - - def test_dag_result_has_required_fields(self): - """Test that DAG result contains all required fields.""" - query = 'Create a PDF document' - result = run_async(self.auto_skills.get_skill_dag(query)) - - self.assertIsNotNone(result, f'Result should not be None for: {query}') - - # Check required attributes exist - self.assertTrue(hasattr(result, 'is_complete')) - self.assertTrue(hasattr(result, 'selected_skills')) - self.assertTrue(hasattr(result, 'dag')) - self.assertTrue(hasattr(result, 'execution_order')) - self.assertTrue(hasattr(result, 'clarification')) - self.assertTrue(hasattr(result, 'chat_response')) - - # Assert skills_dag and execution_order are not empty - self.assertTrue( - result.dag, - f'skills_dag should not be empty for: {query}') - self.assertTrue( - result.execution_order, - f'execution_order should not be empty for: {query}') - - def test_execution_order_contains_valid_skills(self): - """Test that execution order only contains valid skill IDs.""" - query = 'Create a PowerPoint presentation and apply theme' - result = run_async(self.auto_skills.get_skill_dag(query)) - - self.assertIsNotNone(result, f'Result should not be None for: {query}') - self.assertTrue( - result.dag, - f'skills_dag should not be empty for: {query}') - self.assertTrue( - result.execution_order, - f'execution_order should not be empty for: {query}') - - if result.execution_order and result.selected_skills: - # Flatten execution order (may contain nested lists for parallel execution) - flat_order = [] - for item in result.execution_order: - if isinstance(item, list): - flat_order.extend(item) - else: - flat_order.append(item) - - # All skills in execution order should be in selected_skills - for skill_id in flat_order: - self.assertIn( - skill_id, result.selected_skills, - f'Skill {skill_id} in execution_order but not in selected_skills' - ) - - def test_skills_dag_structure(self): - """Test that skills DAG has valid adjacency list structure.""" - query = 'Extract PDF data and create Excel report' - result = run_async(self.auto_skills.get_skill_dag(query)) - - self.assertIsNotNone(result, f'Result should not be None for: {query}') - self.assertTrue( - result.dag, - f'skills_dag should not be empty for: {query}') - self.assertTrue( - result.execution_order, - f'execution_order should not be empty for: {query}') - - if result.dag: - # DAG should be a dict - self.assertIsInstance(result.dag, dict) - - # Each value should be a list of dependencies - for skill_id, deps in result.dag.items(): - self.assertIsInstance( - deps, list, - f'Dependencies for {skill_id} should be a list') - - -# Test suite for running all tests -def suite(): - """Create test suite with all test cases.""" - loader = unittest.TestLoader() - test_suite = unittest.TestSuite() - - test_suite.addTests( - loader.loadTestsFromTestCase(TestClaudeSkillsRetrieval)) - test_suite.addTests(loader.loadTestsFromTestCase(TestSkillsCombination)) - test_suite.addTests(loader.loadTestsFromTestCase(TestSkillsExecution)) - test_suite.addTests(loader.loadTestsFromTestCase(TestChatOnlyQueries)) - test_suite.addTests(loader.loadTestsFromTestCase(TestSkillDAGStructure)) - - return test_suite - - -if __name__ == '__main__': - # Run tests with verbosity - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite()) diff --git a/tests/skills/test_dag_upstream_downstream.py b/tests/skills/test_dag_upstream_downstream.py deleted file mode 100644 index ab130f876..000000000 --- a/tests/skills/test_dag_upstream_downstream.py +++ /dev/null @@ -1,900 +0,0 @@ -""" -Unit tests for Skill DAG upstream-downstream data passing. - -=== Overview === - -This test module validates the core DAG execution mechanism in AutoSkills: -when multiple skills are chained in a Directed Acyclic Graph (DAG), the -outputs (stdout, return_value, output_files, etc.) from upstream skills -are correctly propagated to downstream skills via environment variables. - -=== Features Tested === - -1. **Upstream output storage**: After a skill executes, its ExecutionOutput - is stored in DAGExecutor._outputs and linked via container.spec.link_upstream(). - -2. **Environment variable injection**: DAGExecutor._build_execution_input() - reads upstream outputs and injects them as: - - UPSTREAM_OUTPUTS: Full JSON dict of all dependency outputs. - - UPSTREAM__STDOUT: Per-dependency stdout shortcut variable. - -3. **Sequential data flow**: A → B → C chain where each skill reads and - transforms data from its predecessor. - -4. **Full DAGExecutor.execute() pipeline**: End-to-end test through the - public execute() method, verifying internal wiring. - -5. **Mixed parallel + sequential DAG**: A → [B, C] → D pattern where B and - C run in parallel (both depending on A), then D merges both results. - -6. **container.link_skills() API**: Verifies the SkillContainer helper that - retrieves linked upstream outputs programmatically. - -7. **output_files propagation**: Upstream output files (written to - SKILL_OUTPUT_DIR) are captured and exposed in UPSTREAM_OUTPUTS JSON. - -=== Workflow === - -Each test follows this pattern: - 1. Create mock SkillSchema objects backed by temporary directories. - 2. Instantiate SkillContainer (local mode, no sandbox) and DAGExecutor - (no LLM, no progressive analysis). - 3. Execute Python code snippets as mock skill scripts. - 4. Verify upstream data is available in downstream environment variables. - 5. Assert correctness of data transformation across the DAG. - -=== Working Directory Structure === - -All intermediate results are stored under a temporary directory: - - / - ├── test_upstream_downstream/ - │ ├── skills/ # Mock skill definitions - │ │ ├── skill_a/SKILL.md - │ │ ├── skill_b/SKILL.md - │ │ └── skill_c/SKILL.md - │ └── workspace/ - │ ├── outputs/ # Skill output files (e.g., data.json) - │ ├── scripts/ # Generated temp execution scripts - │ └── logs/ # Execution spec logs - ├── test_full_pipeline/ - │ ├── skills/ - │ └── workspace/ - └── test_parallel_mixed/ - ├── skills/ - └── workspace/ - -=== Prerequisites === - -- Python >= 3.10 -- ms_agent package installed (editable mode: pip install -e .) -- No external LLM API key required (tests use mock code, no LLM calls). -- No sandbox/Docker required (tests run in local mode). - -=== Usage === - - # Run all tests in this module - python -m unittest tests.skills.test_dag_upstream_downstream -v - - # Run a specific test class - python -m unittest tests.skills.test_dag_upstream_downstream.TestDAGUpstreamDownstream -v - - # Run a specific test method - python -m unittest tests.skills.test_dag_upstream_downstream.TestDAGFullPipeline.test_sequential_pipeline -v - -=== Environment Variables === - - KEEP_TEST_ARTIFACTS=true|false (default: true) - Whether to keep intermediate results after tests finish. - Set to 'false' to auto-clean temp directories in tearDown. -""" -import asyncio -import json -import os -import shutil -import tempfile -import unittest -from pathlib import Path -from typing import Dict, List, Optional - -from ms_agent.skill.auto_skills import DAGExecutor, SkillExecutionResult -from ms_agent.skill.container import (ExecutionInput, ExecutionOutput, - SkillContainer) -from ms_agent.skill.schema import SkillFile, SkillSchema - -# --------------------------------------------------------------------------- -# Global control: whether to keep intermediate artifacts after tests. -# Set KEEP_TEST_ARTIFACTS=false to auto-clean. -# --------------------------------------------------------------------------- -KEEP_TEST_ARTIFACTS: bool = os.getenv('KEEP_TEST_ARTIFACTS', - 'true').lower() == 'true' - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def run_async(coro): - """Run an async coroutine in a new event loop (sync context helper).""" - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - return loop.run_until_complete(coro) - finally: - loop.close() - - -def create_mock_skill(skill_id: str, name: str, description: str, - skill_dir: Path) -> SkillSchema: - """ - Create a minimal mock SkillSchema backed by a real directory. - - Args: - skill_id: Unique skill identifier (e.g., 'skill_a@latest'). - name: Human-readable skill name. - description: Short description of the skill. - skill_dir: Filesystem path for the skill directory. - - Returns: - A SkillSchema instance pointing to the created directory. - """ - skill_dir.mkdir(parents=True, exist_ok=True) - skill_md = skill_dir / 'SKILL.md' - skill_md.write_text( - f'---\nname: {name}\ndescription: {description}\n---\n' - f'# {name}\n{description}\n') - - return SkillSchema( - skill_id=skill_id, - name=name, - description=description, - content=f'# {name}\n{description}', - files=[SkillFile(name='SKILL.md', type='.md', path=skill_md)], - skill_path=skill_dir, - version='latest', - ) - - -# ============================================================================ -# Test 1: Direct upstream-downstream data flow -# ============================================================================ - -class TestDAGUpstreamDownstream(unittest.TestCase): - """ - Test upstream -> downstream data flow through DAGExecutor. - - Scenario: skill_a -> skill_b -> skill_c - - skill_a generates JSON data and writes an output file. - - skill_b reads skill_a's stdout via UPSTREAM_OUTPUTS env var. - - skill_c aggregates outputs from both skill_a and skill_b. - """ - - def setUp(self): - """Create temp directories, mock skills, container, and executor.""" - self.test_root = Path( - tempfile.mkdtemp(prefix='test_dag_upstream_downstream_')) - self.skills_dir = self.test_root / 'skills' - self.workspace_dir = self.test_root / 'workspace' - - # Create mock skills - self.skill_a = create_mock_skill( - 'skill_a@latest', 'Data Generator', - 'Generates data and outputs to stdout', - self.skills_dir / 'skill_a') - self.skill_b = create_mock_skill( - 'skill_b@latest', 'Data Processor', - 'Processes upstream data', - self.skills_dir / 'skill_b') - self.skill_c = create_mock_skill( - 'skill_c@latest', 'Report Builder', - 'Builds report from all upstream outputs', - self.skills_dir / 'skill_c') - - self.skills = { - 'skill_a@latest': self.skill_a, - 'skill_b@latest': self.skill_b, - 'skill_c@latest': self.skill_c, - } - - self.container = SkillContainer( - workspace_dir=self.workspace_dir, use_sandbox=False) - - self.executor = DAGExecutor( - container=self.container, - skills=self.skills, - workspace_dir=self.workspace_dir, - llm=None, - enable_progressive_analysis=False, - enable_self_reflection=False, - ) - - # DAG: skill_a -> skill_b -> skill_c - self.dag = { - 'skill_a@latest': [], - 'skill_b@latest': ['skill_a@latest'], - 'skill_c@latest': ['skill_a@latest', 'skill_b@latest'], - } - - def tearDown(self): - """Clean up temp directory unless KEEP_TEST_ARTIFACTS is set.""" - if not KEEP_TEST_ARTIFACTS and self.test_root.exists(): - try: - shutil.rmtree(self.test_root) - except Exception as e: - print(f'Warning: Failed to clean up {self.test_root}: {e}') - - self.executor = None - self.container = None - - def test_skill_a_output_stored(self): - """After executing skill_a, its output is stored in executor._outputs.""" - code_a = ( - 'import os, json\n' - 'output_dir = os.environ.get("SKILL_OUTPUT_DIR", "/tmp")\n' - 'data = {"revenue": 1000000, "quarter": "Q4", "year": 2024}\n' - 'print(json.dumps(data))\n' - 'output_file = os.path.join(output_dir, "data.json")\n' - 'with open(output_file, "w") as f:\n' - ' json.dump(data, f)\n' - 'print(f"Output file: {output_file}")\n' - ) - - exec_input = self.executor._build_execution_input( - 'skill_a@latest', self.dag) - output_a = run_async(self.container.execute_python_code( - code=code_a, skill_id='skill_a@latest', input_spec=exec_input)) - - self.executor._outputs['skill_a@latest'] = output_a - self.container.spec.link_upstream('skill_a@latest', output_a) - - self.assertEqual(output_a.exit_code, 0, - f'skill_a should succeed, stderr: {output_a.stderr}') - self.assertIn('revenue', output_a.stdout) - self.assertIn('skill_a@latest', self.executor._outputs) - - def test_upstream_env_vars_injected(self): - """skill_b's execution input contains UPSTREAM env vars from skill_a.""" - # Simulate skill_a output - output_a = ExecutionOutput( - stdout='{"revenue": 1000000}\n', - stderr='', - exit_code=0, - output_files={'data.json': Path('/tmp/data.json')}, - duration_ms=100.0, - ) - self.executor._outputs['skill_a@latest'] = output_a - - exec_input_b = self.executor._build_execution_input( - 'skill_b@latest', self.dag) - - # Verify UPSTREAM_OUTPUTS JSON - self.assertIn('UPSTREAM_OUTPUTS', exec_input_b.env_vars, - 'UPSTREAM_OUTPUTS should be set') - upstream_json = json.loads(exec_input_b.env_vars['UPSTREAM_OUTPUTS']) - self.assertIn('skill_a@latest', upstream_json) - self.assertEqual(upstream_json['skill_a@latest']['exit_code'], 0) - self.assertIn('revenue', upstream_json['skill_a@latest']['stdout']) - - # Verify individual upstream shortcut env var - self.assertIn('UPSTREAM_SKILL_A_LATEST_STDOUT', exec_input_b.env_vars, - 'Per-skill stdout shortcut should be set') - - def test_downstream_reads_upstream_data(self): - """skill_b can parse skill_a's stdout from UPSTREAM_OUTPUTS.""" - # Execute skill_a - code_a = ( - 'import json\n' - 'print(json.dumps({"revenue": 1000000, "quarter": "Q4"}))\n' - ) - exec_input_a = self.executor._build_execution_input( - 'skill_a@latest', self.dag) - output_a = run_async(self.container.execute_python_code( - code=code_a, skill_id='skill_a@latest', input_spec=exec_input_a)) - self.executor._outputs['skill_a@latest'] = output_a - self.container.spec.link_upstream('skill_a@latest', output_a) - - # Execute skill_b - code_b = ( - 'import os, json\n' - 'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n' - 'data = json.loads(upstream["skill_a@latest"]["stdout"].strip())\n' - 'result = {"processed_revenue": data["revenue"] * 1.1}\n' - 'print(json.dumps(result))\n' - ) - exec_input_b = self.executor._build_execution_input( - 'skill_b@latest', self.dag) - output_b = run_async(self.container.execute_python_code( - code=code_b, skill_id='skill_b@latest', input_spec=exec_input_b)) - - self.assertEqual(output_b.exit_code, 0, - f'skill_b failed: {output_b.stderr}') - result_b = json.loads(output_b.stdout.strip()) - self.assertAlmostEqual(result_b['processed_revenue'], 1100000.0) - - def test_multi_upstream_aggregation(self): - """skill_c receives outputs from both skill_a and skill_b.""" - # Simulate skill_a and skill_b outputs - self.executor._outputs['skill_a@latest'] = ExecutionOutput( - stdout='A_DATA\n', stderr='', exit_code=0, duration_ms=10) - self.executor._outputs['skill_b@latest'] = ExecutionOutput( - stdout='B_DATA\n', stderr='', exit_code=0, duration_ms=10) - - exec_input_c = self.executor._build_execution_input( - 'skill_c@latest', self.dag) - upstream_json = json.loads(exec_input_c.env_vars['UPSTREAM_OUTPUTS']) - - self.assertIn('skill_a@latest', upstream_json, - 'skill_a should be in upstream data') - self.assertIn('skill_b@latest', upstream_json, - 'skill_b should be in upstream data') - self.assertEqual(len(upstream_json), 2, - 'skill_c should see exactly 2 upstream skills') - - def test_output_files_propagated(self): - """Upstream output_files paths are included in UPSTREAM_OUTPUTS JSON.""" - # Simulate skill_a with output files - self.executor._outputs['skill_a@latest'] = ExecutionOutput( - stdout='done\n', - stderr='', - exit_code=0, - output_files={ - 'report.pdf': Path('/workspace/outputs/report.pdf'), - 'data.csv': Path('/workspace/outputs/data.csv'), - }, - duration_ms=50, - ) - - exec_input_b = self.executor._build_execution_input( - 'skill_b@latest', self.dag) - upstream_json = json.loads(exec_input_b.env_vars['UPSTREAM_OUTPUTS']) - output_files = upstream_json['skill_a@latest']['output_files'] - - self.assertIn('report.pdf', output_files) - self.assertIn('data.csv', output_files) - - def test_link_skills_api(self): - """container.link_skills() returns correct upstream output.""" - output_a = ExecutionOutput( - stdout='hello from A\n', stderr='', exit_code=0, duration_ms=10) - self.container.spec.link_upstream('skill_a@latest', output_a) - - linked = self.container.link_skills( - 'skill_a@latest', 'input_data', 'stdout') - self.assertEqual(linked, 'hello from A\n') - - # Non-existent upstream returns None - missing = self.container.link_skills( - 'nonexistent@latest', 'input_data', 'stdout') - self.assertIsNone(missing) - - def test_full_three_skill_chain(self): - """End-to-end: skill_a -> skill_b -> skill_c with real execution.""" - # skill_a: generate data - code_a = ( - 'import os, json\n' - 'output_dir = os.environ.get("SKILL_OUTPUT_DIR", "/tmp")\n' - 'data = {"revenue": 1000000, "quarter": "Q4", "year": 2024}\n' - 'print(json.dumps(data))\n' - 'with open(os.path.join(output_dir, "data.json"), "w") as f:\n' - ' json.dump(data, f)\n' - ) - exec_input_a = self.executor._build_execution_input( - 'skill_a@latest', self.dag) - output_a = run_async(self.container.execute_python_code( - code=code_a, skill_id='skill_a@latest', input_spec=exec_input_a)) - self.executor._outputs['skill_a@latest'] = output_a - self.container.spec.link_upstream('skill_a@latest', output_a) - self.assertEqual(output_a.exit_code, 0) - - # skill_b: process skill_a output - code_b = ( - 'import os, json\n' - 'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n' - 'a_stdout = upstream["skill_a@latest"]["stdout"].strip()\n' - 'data = json.loads(a_stdout)\n' - 'processed = {"processed_revenue": data["revenue"] * 1.1, "source": "skill_a"}\n' - 'print(json.dumps(processed))\n' - ) - exec_input_b = self.executor._build_execution_input( - 'skill_b@latest', self.dag) - output_b = run_async(self.container.execute_python_code( - code=code_b, skill_id='skill_b@latest', input_spec=exec_input_b)) - self.executor._outputs['skill_b@latest'] = output_b - self.container.spec.link_upstream('skill_b@latest', output_b) - self.assertEqual(output_b.exit_code, 0, - f'skill_b failed: {output_b.stderr}') - - # skill_c: aggregate both - code_c = ( - 'import os, json\n' - 'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n' - 'print(f"Total upstream skills: {len(upstream)}")\n' - 'for sid, data in upstream.items():\n' - ' print(f"From {sid}: exit_code={data[\'exit_code\']}")\n' - ) - exec_input_c = self.executor._build_execution_input( - 'skill_c@latest', self.dag) - output_c = run_async(self.container.execute_python_code( - code=code_c, skill_id='skill_c@latest', input_spec=exec_input_c)) - - self.assertEqual(output_c.exit_code, 0, - f'skill_c failed: {output_c.stderr}') - self.assertIn('Total upstream skills: 2', output_c.stdout) - - -# ============================================================================ -# Test 2: Full DAGExecutor.execute() pipeline -# ============================================================================ - -class TestDAGFullPipeline(unittest.TestCase): - """ - Test the full DAGExecutor.execute() method with sequential skills. - - Scenario: adder (outputs 42) -> doubler (reads 42, outputs 84) - Verifies the complete internal wiring: execute() -> _execute_single_skill - -> _build_execution_input -> env_vars propagation. - """ - - def setUp(self): - """Create temp directories, mock skills, container, and executor.""" - self.test_root = Path( - tempfile.mkdtemp(prefix='test_dag_full_pipeline_')) - self.skills_dir = self.test_root / 'skills' - self.workspace_dir = self.test_root / 'workspace' - - self.skill_a = create_mock_skill( - 'adder@latest', 'Adder', 'Generates a number', - self.skills_dir / 'adder') - self.skill_b = create_mock_skill( - 'doubler@latest', 'Doubler', 'Doubles upstream number', - self.skills_dir / 'doubler') - - self.skills = { - 'adder@latest': self.skill_a, - 'doubler@latest': self.skill_b, - } - - self.container = SkillContainer( - workspace_dir=self.workspace_dir, use_sandbox=False) - - self.executor = DAGExecutor( - container=self.container, - skills=self.skills, - workspace_dir=self.workspace_dir, - llm=None, - enable_progressive_analysis=False, - enable_self_reflection=False, - ) - - self.dag = { - 'adder@latest': [], - 'doubler@latest': ['adder@latest'], - } - self.execution_order = ['adder@latest', 'doubler@latest'] - - def tearDown(self): - """Clean up temp directory unless KEEP_TEST_ARTIFACTS is set.""" - if not KEEP_TEST_ARTIFACTS and self.test_root.exists(): - try: - shutil.rmtree(self.test_root) - except Exception as e: - print(f'Warning: Failed to clean up {self.test_root}: {e}') - - self.executor = None - self.container = None - - def test_sequential_pipeline(self): - """adder outputs 42, doubler reads it and outputs 84.""" - container = self.container - executor = self.executor - - async def mock_execute_single( - skill_id, dag, execution_input=None, query=''): - exec_input = executor._build_execution_input( - skill_id, dag, execution_input) - - if skill_id == 'adder@latest': - code = 'print(42)' - elif skill_id == 'doubler@latest': - code = ( - 'import os, json\n' - 'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n' - 'val = int(upstream["adder@latest"]["stdout"].strip())\n' - 'print(val * 2)\n' - ) - else: - return SkillExecutionResult( - skill_id=skill_id, success=False, error='Unknown') - - output = await container.execute_python_code( - code=code, skill_id=skill_id, input_spec=exec_input) - executor._outputs[skill_id] = output - container.spec.link_upstream(skill_id, output) - return SkillExecutionResult( - skill_id=skill_id, - success=(output.exit_code == 0), - output=output, - error=output.stderr if output.exit_code != 0 else None) - - executor._execute_single_skill = mock_execute_single - - result = run_async(executor.execute( - dag=self.dag, - execution_order=self.execution_order, - stop_on_failure=True, - query='test')) - - self.assertTrue(result.success, 'DAG execution should succeed') - - adder_out = result.results['adder@latest'].output.stdout.strip() - self.assertEqual(adder_out, '42', f'Expected 42, got: {adder_out}') - - doubler_out = result.results['doubler@latest'].output.stdout.strip() - self.assertEqual(doubler_out, '84', f'Expected 84, got: {doubler_out}') - - def test_failure_stops_pipeline(self): - """When upstream skill fails and stop_on_failure=True, pipeline stops.""" - container = self.container - executor = self.executor - - async def mock_execute_single( - skill_id, dag, execution_input=None, query=''): - exec_input = executor._build_execution_input( - skill_id, dag, execution_input) - - if skill_id == 'adder@latest': - code = 'import sys; print("error", file=sys.stderr); sys.exit(1)' - else: - code = 'print("should not run")' - - output = await container.execute_python_code( - code=code, skill_id=skill_id, input_spec=exec_input) - executor._outputs[skill_id] = output - return SkillExecutionResult( - skill_id=skill_id, - success=(output.exit_code == 0), - output=output, - error=output.stderr if output.exit_code != 0 else None) - - executor._execute_single_skill = mock_execute_single - - result = run_async(executor.execute( - dag=self.dag, - execution_order=self.execution_order, - stop_on_failure=True, - query='test')) - - self.assertFalse(result.success, 'DAG should fail') - self.assertIn('adder@latest', result.results) - # doubler should not have been executed - self.assertNotIn('doubler@latest', result.results, - 'doubler should not run when adder fails') - - -# ============================================================================ -# Test 3: Parallel + Sequential mixed DAG -# ============================================================================ - -class TestDAGParallelMixed(unittest.TestCase): - """ - Test a mixed DAG with parallel and sequential execution. - - Scenario: gen -> [proc_x, proc_y] -> merge - - gen outputs BASE_VALUE=100 - - proc_x reads gen, outputs X_RESULT=110 (100+10) - - proc_y reads gen, outputs Y_RESULT=200 (100*2) - - merge reads both, outputs MERGED=310 (110+200) - proc_x and proc_y run in parallel. - """ - - def setUp(self): - """Create temp directories, mock skills, container, and executor.""" - self.test_root = Path( - tempfile.mkdtemp(prefix='test_dag_parallel_mixed_')) - self.skills_dir = self.test_root / 'skills' - self.workspace_dir = self.test_root / 'workspace' - - skill_names = ['gen', 'proc_x', 'proc_y', 'merge'] - self.skills = {} - for sname in skill_names: - sid = f'{sname}@latest' - sdir = self.skills_dir / sname - self.skills[sid] = create_mock_skill( - sid, sname, f'{sname} skill', sdir) - - self.container = SkillContainer( - workspace_dir=self.workspace_dir, use_sandbox=False) - - self.executor = DAGExecutor( - container=self.container, - skills=self.skills, - workspace_dir=self.workspace_dir, - llm=None, - enable_progressive_analysis=False, - enable_self_reflection=False, - ) - - self.dag = { - 'gen@latest': [], - 'proc_x@latest': ['gen@latest'], - 'proc_y@latest': ['gen@latest'], - 'merge@latest': ['proc_x@latest', 'proc_y@latest'], - } - self.execution_order = [ - 'gen@latest', - ['proc_x@latest', 'proc_y@latest'], - 'merge@latest', - ] - - def tearDown(self): - """Clean up temp directory unless KEEP_TEST_ARTIFACTS is set.""" - if not KEEP_TEST_ARTIFACTS and self.test_root.exists(): - try: - shutil.rmtree(self.test_root) - except Exception as e: - print(f'Warning: Failed to clean up {self.test_root}: {e}') - - self.executor = None - self.container = None - - def test_parallel_then_merge(self): - """gen=100 -> proc_x=110, proc_y=200 (parallel) -> merge=310.""" - container = self.container - executor = self.executor - - codes = { - 'gen@latest': 'print("BASE_VALUE=100")', - 'proc_x@latest': ( - 'import os, json\n' - 'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n' - 'gen_stdout = upstream["gen@latest"]["stdout"].strip()\n' - 'val = int(gen_stdout.split("=")[1])\n' - 'print(f"X_RESULT={val + 10}")\n' - ), - 'proc_y@latest': ( - 'import os, json\n' - 'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n' - 'gen_stdout = upstream["gen@latest"]["stdout"].strip()\n' - 'val = int(gen_stdout.split("=")[1])\n' - 'print(f"Y_RESULT={val * 2}")\n' - ), - 'merge@latest': ( - 'import os, json\n' - 'upstream = json.loads(os.environ.get("UPSTREAM_OUTPUTS", "{}"))\n' - 'x_stdout = upstream["proc_x@latest"]["stdout"].strip()\n' - 'y_stdout = upstream["proc_y@latest"]["stdout"].strip()\n' - 'x_val = int(x_stdout.split("=")[1])\n' - 'y_val = int(y_stdout.split("=")[1])\n' - 'print(f"MERGED={x_val + y_val}")\n' - ), - } - - async def mock_execute_single( - skill_id, dag, execution_input=None, query=''): - exec_input = executor._build_execution_input( - skill_id, dag, execution_input) - code = codes.get(skill_id, 'print("unknown")') - output = await container.execute_python_code( - code=code, skill_id=skill_id, input_spec=exec_input) - executor._outputs[skill_id] = output - container.spec.link_upstream(skill_id, output) - return SkillExecutionResult( - skill_id=skill_id, - success=(output.exit_code == 0), - output=output, - error=output.stderr if output.exit_code != 0 else None) - - executor._execute_single_skill = mock_execute_single - - result = run_async(executor.execute( - dag=self.dag, - execution_order=self.execution_order, - stop_on_failure=True, - query='test parallel')) - - self.assertTrue(result.success, 'DAG should succeed') - - gen_out = result.results['gen@latest'].output.stdout.strip() - self.assertEqual(gen_out, 'BASE_VALUE=100') - - x_out = result.results['proc_x@latest'].output.stdout.strip() - self.assertEqual(x_out, 'X_RESULT=110', - f'proc_x should output 110, got: {x_out}') - - y_out = result.results['proc_y@latest'].output.stdout.strip() - self.assertEqual(y_out, 'Y_RESULT=200', - f'proc_y should output 200, got: {y_out}') - - merge_out = result.results['merge@latest'].output.stdout.strip() - self.assertEqual(merge_out, 'MERGED=310', - f'merge should output 310, got: {merge_out}') - - def test_parallel_skills_both_receive_upstream(self): - """Both proc_x and proc_y independently receive gen's output.""" - # Simulate gen output - self.executor._outputs['gen@latest'] = ExecutionOutput( - stdout='BASE_VALUE=100\n', stderr='', exit_code=0, duration_ms=10) - - input_x = self.executor._build_execution_input( - 'proc_x@latest', self.dag) - input_y = self.executor._build_execution_input( - 'proc_y@latest', self.dag) - - # Both should have UPSTREAM_OUTPUTS - for label, inp in [('proc_x', input_x), ('proc_y', input_y)]: - with self.subTest(skill=label): - self.assertIn('UPSTREAM_OUTPUTS', inp.env_vars) - upstream = json.loads(inp.env_vars['UPSTREAM_OUTPUTS']) - self.assertIn('gen@latest', upstream) - self.assertIn('BASE_VALUE=100', - upstream['gen@latest']['stdout']) - - def test_merge_receives_both_parallel_outputs(self): - """merge skill receives outputs from both proc_x and proc_y.""" - self.executor._outputs['proc_x@latest'] = ExecutionOutput( - stdout='X_RESULT=110\n', stderr='', exit_code=0, duration_ms=10) - self.executor._outputs['proc_y@latest'] = ExecutionOutput( - stdout='Y_RESULT=200\n', stderr='', exit_code=0, duration_ms=10) - - input_merge = self.executor._build_execution_input( - 'merge@latest', self.dag) - upstream = json.loads(input_merge.env_vars['UPSTREAM_OUTPUTS']) - - self.assertIn('proc_x@latest', upstream) - self.assertIn('proc_y@latest', upstream) - self.assertIn('X_RESULT=110', upstream['proc_x@latest']['stdout']) - self.assertIn('Y_RESULT=200', upstream['proc_y@latest']['stdout']) - - -# ============================================================================ -# Test 4: Edge cases and robustness -# ============================================================================ - -class TestDAGEdgeCases(unittest.TestCase): - """Test edge cases in DAG upstream-downstream data passing.""" - - def setUp(self): - """Create temp directories and basic infrastructure.""" - self.test_root = Path( - tempfile.mkdtemp(prefix='test_dag_edge_cases_')) - self.skills_dir = self.test_root / 'skills' - self.workspace_dir = self.test_root / 'workspace' - - self.skill_a = create_mock_skill( - 'solo@latest', 'Solo', 'Standalone skill', - self.skills_dir / 'solo') - self.skills = {'solo@latest': self.skill_a} - - self.container = SkillContainer( - workspace_dir=self.workspace_dir, use_sandbox=False) - - self.executor = DAGExecutor( - container=self.container, - skills=self.skills, - workspace_dir=self.workspace_dir, - llm=None, - enable_progressive_analysis=False, - enable_self_reflection=False, - ) - - def tearDown(self): - """Clean up temp directory unless KEEP_TEST_ARTIFACTS is set.""" - if not KEEP_TEST_ARTIFACTS and self.test_root.exists(): - try: - shutil.rmtree(self.test_root) - except Exception as e: - print(f'Warning: Failed to clean up {self.test_root}: {e}') - - self.executor = None - self.container = None - - def test_no_upstream_no_env_vars(self): - """Skill with no dependencies has no UPSTREAM env vars.""" - dag = {'solo@latest': []} - exec_input = self.executor._build_execution_input( - 'solo@latest', dag) - - self.assertNotIn('UPSTREAM_OUTPUTS', exec_input.env_vars, - 'No UPSTREAM_OUTPUTS for skill without deps') - - def test_upstream_with_empty_stdout(self): - """Upstream with empty stdout still appears in UPSTREAM_OUTPUTS.""" - # Add a second skill that depends on solo - dep_skill = create_mock_skill( - 'dep@latest', 'Dep', 'Depends on solo', - self.skills_dir / 'dep') - self.skills['dep@latest'] = dep_skill - - self.executor._outputs['solo@latest'] = ExecutionOutput( - stdout='', stderr='', exit_code=0, duration_ms=10) - - dag = { - 'solo@latest': [], - 'dep@latest': ['solo@latest'], - } - exec_input = self.executor._build_execution_input( - 'dep@latest', dag) - upstream = json.loads(exec_input.env_vars['UPSTREAM_OUTPUTS']) - - self.assertIn('solo@latest', upstream) - self.assertEqual(upstream['solo@latest']['stdout'], '') - # No individual STDOUT shortcut since stdout is empty - self.assertNotIn('UPSTREAM_SOLO_LATEST_STDOUT', exec_input.env_vars) - - def test_upstream_with_failed_exit_code(self): - """Upstream failure data is still passed to downstream.""" - dep_skill = create_mock_skill( - 'dep@latest', 'Dep', 'Depends on solo', - self.skills_dir / 'dep') - self.skills['dep@latest'] = dep_skill - - self.executor._outputs['solo@latest'] = ExecutionOutput( - stdout='partial output\n', - stderr='something went wrong\n', - exit_code=1, - duration_ms=10, - ) - - dag = { - 'solo@latest': [], - 'dep@latest': ['solo@latest'], - } - exec_input = self.executor._build_execution_input( - 'dep@latest', dag) - upstream = json.loads(exec_input.env_vars['UPSTREAM_OUTPUTS']) - - self.assertEqual(upstream['solo@latest']['exit_code'], 1) - self.assertIn('something went wrong', - upstream['solo@latest']['stderr']) - - def test_safe_key_special_characters(self): - """Skill IDs with @, -, . are sanitized in env var names.""" - special_skill = create_mock_skill( - 'my-tool.v2@latest', 'MyTool', 'Tool with special chars', - self.skills_dir / 'my_tool') - self.skills['my-tool.v2@latest'] = special_skill - - dep_skill = create_mock_skill( - 'consumer@latest', 'Consumer', 'Depends on special', - self.skills_dir / 'consumer') - self.skills['consumer@latest'] = dep_skill - - self.executor._outputs['my-tool.v2@latest'] = ExecutionOutput( - stdout='special output\n', stderr='', exit_code=0, duration_ms=10) - - dag = { - 'my-tool.v2@latest': [], - 'consumer@latest': ['my-tool.v2@latest'], - } - exec_input = self.executor._build_execution_input( - 'consumer@latest', dag) - - # Safe key: my-tool.v2@latest -> MY_TOOL_V2_LATEST - expected_key = 'UPSTREAM_MY_TOOL_V2_LATEST_STDOUT' - self.assertIn(expected_key, exec_input.env_vars, - f'{expected_key} should be in env_vars, ' - f'got keys: {list(exec_input.env_vars.keys())}') - - -# ============================================================================ -# Test suite -# ============================================================================ - -def suite(): - """Create test suite with all test cases.""" - loader = unittest.TestLoader() - test_suite = unittest.TestSuite() - test_suite.addTests( - loader.loadTestsFromTestCase(TestDAGUpstreamDownstream)) - test_suite.addTests( - loader.loadTestsFromTestCase(TestDAGFullPipeline)) - test_suite.addTests( - loader.loadTestsFromTestCase(TestDAGParallelMixed)) - test_suite.addTests( - loader.loadTestsFromTestCase(TestDAGEdgeCases)) - return test_suite - - -if __name__ == '__main__': - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite()) diff --git a/tests/skills/test_skill.py b/tests/skills/test_skill.py new file mode 100644 index 000000000..9a881627b --- /dev/null +++ b/tests/skills/test_skill.py @@ -0,0 +1,1008 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +"""Tests for the Skill module. + +Covers: + - SkillSource / parse_skill_source + - SkillCatalog (load, filter, cache, hot-reload) + - SkillPromptInjector + - SkillToolSet (skills_list, skill_view, skill_manage) + - LLMAgent integration (prepare_skills, create_messages) + - SkillLoader + - SkillSchema parsing / validation + - End-to-end pipeline + +Fixture skills: examples/skills/claude_skills (docx, pdf) +""" +import asyncio +import json +import os +import shutil +import tempfile +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch + +from omegaconf import DictConfig, OmegaConf + +CLAUDE_SKILLS_DIR = ( + Path(__file__).resolve().parent.parent.parent + / "examples" / "skills" / "claude_skills" +) + + +def _make_skill_dir(base: Path, skill_id: str, name: str, desc: str, + *, always: bool = False, tags=None, + requires=None, extra_body: str = "") -> Path: + """Create a minimal skill directory with SKILL.md.""" + d = base / skill_id + d.mkdir(parents=True, exist_ok=True) + lines = [ + "---", + f"name: {name}", + f'description: "{desc}"', + ] + if always: + lines.append("always: true") + if tags: + lines.append(f"tags: {tags}") + if requires: + lines.append("requires:") + if "tools" in requires: + lines.append(f" tools: {requires['tools']}") + if "env" in requires: + lines.append(f" env: {requires['env']}") + lines.append("---") + lines.append("") + lines.append(f"# {name}") + lines.append("") + lines.append(f"Instructions for {name}.") + if extra_body: + lines.append(extra_body) + (d / "SKILL.md").write_text("\n".join(lines), encoding="utf-8") + return d + + +# ============================================================ +# 1. SkillSource / parse_skill_source +# ============================================================ + +class TestSkillSource(unittest.TestCase): + + def test_local_absolute_path(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source(str(CLAUDE_SKILLS_DIR)) + self.assertEqual(src.type.value, "local") + self.assertEqual(src.path, str(CLAUDE_SKILLS_DIR)) + + def test_local_relative_dot_path(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source("./skills") + self.assertEqual(src.type.value, "local") + self.assertTrue(os.path.isabs(src.path)) + + def test_local_relative_dotdot_path(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source("../some/path") + self.assertEqual(src.type.value, "local") + self.assertTrue(os.path.isabs(src.path)) + + def test_local_tilde_path(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source("~/my_skills") + self.assertEqual(src.type.value, "local") + self.assertNotIn("~", src.path) + + def test_modelscope_uri(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source("modelscope://owner/repo@v1.0#subdir") + self.assertEqual(src.type.value, "modelscope") + self.assertEqual(src.repo_id, "owner/repo") + self.assertEqual(src.revision, "v1.0") + self.assertEqual(src.subdir, "subdir") + + def test_modelscope_skill_url(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source( + "https://modelscope.cn/skills/BaiduDrive/baidu-drive") + self.assertEqual(src.type.value, "modelscope") + self.assertEqual(src.repo_id, "BaiduDrive/baidu-drive") + + def test_modelscope_skill_url_with_files_suffix(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source( + "https://www.modelscope.cn/skills/BaiduDrive/baidu-drive/files") + self.assertEqual(src.type.value, "modelscope") + self.assertEqual(src.repo_id, "BaiduDrive/baidu-drive") + + def test_at_prefix_shorthand(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source("@MiniMax-AI/minimax-pdf") + self.assertEqual(src.type.value, "modelscope") + self.assertEqual(src.repo_id, "MiniMax-AI/minimax-pdf") + + def test_git_url(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source("https://github.com/user/repo.git") + self.assertEqual(src.type.value, "git") + self.assertEqual(src.url, "https://github.com/user/repo.git") + + def test_owner_repo_pattern(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source("ms-agent/research_skills") + self.assertEqual(src.type.value, "modelscope") + self.assertEqual(src.repo_id, "ms-agent/research_skills") + + def test_nonexistent_abs_path_becomes_local(self): + from ms_agent.skill.sources import parse_skill_source + src = parse_skill_source("/nonexistent/path/to/skills") + self.assertEqual(src.type.value, "local") + self.assertEqual(src.path, "/nonexistent/path/to/skills") + + +# ============================================================ +# 1b. Catalog download paths (ModelScope SDK / HTTP fallback / Git) +# ============================================================ + +class TestCatalogDownloadModelScopeSDK(unittest.TestCase): + """_load_from_modelscope via the real SDK HubApi.download_skill.""" + + def setUp(self): + self.tmp = Path(tempfile.mkdtemp()) + + def tearDown(self): + shutil.rmtree(self.tmp, ignore_errors=True) + + def test_sdk_download_produces_skill(self): + """HubApi.download_skill → directory with SKILL.md → SkillLoader OK.""" + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.sources import SkillSource, SkillSourceType + import ms_agent.skill.catalog as cat_mod + orig = cat_mod.USER_SKILLS_DIR + cat_mod.USER_SKILLS_DIR = self.tmp + try: + cat = SkillCatalog() + cat.load_from_sources([ + SkillSource(type=SkillSourceType.MODELSCOPE, + repo_id="BaiduDrive/baidu-drive"), + ]) + skills = cat.get_enabled_skills() + self.assertEqual(len(skills), 1) + skill = list(skills.values())[0] + self.assertEqual(skill.name, "baidu-drive") + self.assertTrue(len(skill.scripts) > 0) + finally: + cat_mod.USER_SKILLS_DIR = orig + + def test_sdk_download_skill_dir_name(self): + """SDK names the directory by element_name only (no owner prefix).""" + from modelscope.hub.api import HubApi + api = HubApi() + path = api.download_skill("BaiduDrive/baidu-drive", + local_dir=str(self.tmp)) + self.assertEqual(os.path.basename(path), "baidu-drive") + self.assertTrue(os.path.exists(os.path.join(path, "SKILL.md"))) + + +class TestCatalogDownloadHTTPFallback(unittest.TestCase): + """_download_skill_zip (pure-HTTP, no SDK dependency).""" + + def setUp(self): + self.tmp = Path(tempfile.mkdtemp()) + + def tearDown(self): + shutil.rmtree(self.tmp, ignore_errors=True) + + def test_http_fallback_downloads_and_extracts(self): + from ms_agent.skill.catalog import _download_skill_zip + path = _download_skill_zip("BaiduDrive/baidu-drive", str(self.tmp)) + self.assertEqual(os.path.basename(path), "baidu-drive") + self.assertTrue(os.path.exists(os.path.join(path, "SKILL.md"))) + + def test_http_fallback_naming_matches_sdk(self): + """Fallback and SDK produce the same directory basename.""" + from ms_agent.skill.catalog import _download_skill_zip + path = _download_skill_zip("BaiduDrive/baidu-drive", str(self.tmp)) + self.assertEqual(os.path.basename(path), "baidu-drive") + + def test_http_fallback_used_when_sdk_missing(self): + """When HubApi import fails, we fall through to HTTP fallback.""" + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.sources import SkillSource, SkillSourceType + import ms_agent.skill.catalog as cat_mod + orig = cat_mod.USER_SKILLS_DIR + cat_mod.USER_SKILLS_DIR = self.tmp + + real_import = __builtins__.__import__ if hasattr( + __builtins__, '__import__') else __import__ + + def mock_import(name, *args, **kwargs): + if name == "modelscope.hub.api": + raise ImportError("mocked SDK unavailable") + return real_import(name, *args, **kwargs) + + try: + cat = SkillCatalog() + with patch("builtins.__import__", side_effect=mock_import): + cat.load_from_sources([ + SkillSource(type=SkillSourceType.MODELSCOPE, + repo_id="BaiduDrive/baidu-drive"), + ]) + skills = cat.get_enabled_skills() + self.assertEqual(len(skills), 1) + finally: + cat_mod.USER_SKILLS_DIR = orig + + def test_http_fallback_invalid_skill_id_raises(self): + from ms_agent.skill.catalog import _download_skill_zip + with self.assertRaises(Exception): + _download_skill_zip("nonexistent/fake-skill-xyz", + str(self.tmp)) + + +class TestCatalogDownloadGit(unittest.TestCase): + """_load_from_git via real git clone.""" + + def setUp(self): + self.tmp = Path(tempfile.mkdtemp()) + self.skill_dir = self.tmp / "repo" + self.skill_dir.mkdir() + _make_skill_dir(self.skill_dir, "test-skill", "TestSkill", + "A test skill") + + def tearDown(self): + shutil.rmtree(self.tmp, ignore_errors=True) + + def test_git_clone_loads_skills(self): + """Mock git clone by providing a local path that mimics the flow.""" + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.sources import SkillSource, SkillSourceType + + cat = SkillCatalog() + cat.load_from_sources([ + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(self.skill_dir)), + ]) + skills = cat.get_enabled_skills() + self.assertIn("test-skill", skills) + + @patch("subprocess.run") + def test_git_clone_invocation(self, mock_run): + """Verify the git clone command is correctly constructed.""" + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.sources import SkillSource, SkillSourceType + + mock_run.return_value = MagicMock(returncode=0) + + skill_in_dest = None + + def side_effect(cmd, **kwargs): + dest_path = cmd[-1] + _make_skill_dir(Path(dest_path), "cloned", "Cloned", + "A cloned skill") + return MagicMock(returncode=0) + + mock_run.side_effect = side_effect + + cat = SkillCatalog() + cat.load_from_sources([ + SkillSource(type=SkillSourceType.GIT, + url="https://github.com/user/skills-repo.git", + revision="main"), + ]) + + call_args = mock_run.call_args[0][0] + self.assertIn("git", call_args) + self.assertIn("clone", call_args) + self.assertIn("--depth", call_args) + self.assertIn("--branch", call_args) + self.assertIn("main", call_args) + self.assertIn("https://github.com/user/skills-repo.git", call_args) + + @patch("subprocess.run") + def test_git_clone_with_subdir(self, mock_run): + """Git source with subdir only loads from subdirectory.""" + + def side_effect(cmd, **kwargs): + dest_path = Path(cmd[-1]) + sub = dest_path / "sub" + _make_skill_dir(sub, "nested", "Nested", "Nested skill") + _make_skill_dir(dest_path, "root-skill", "Root", "Root skill") + return MagicMock(returncode=0) + + mock_run.side_effect = side_effect + + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.sources import SkillSource, SkillSourceType + + cat = SkillCatalog() + cat.load_from_sources([ + SkillSource(type=SkillSourceType.GIT, + url="https://github.com/user/repo.git", + subdir="sub"), + ]) + skills = cat.get_enabled_skills() + self.assertIn("nested", skills) + self.assertNotIn("root-skill", skills) + + +# ============================================================ +# 2. SkillCatalog +# ============================================================ + +class TestSkillCatalog(unittest.TestCase): + + def setUp(self): + self.tmp = Path(tempfile.mkdtemp()) + _make_skill_dir(self.tmp, "alpha", "Alpha", "Skill alpha", + tags="[demo]") + _make_skill_dir(self.tmp, "beta", "Beta", "Skill beta", + always=True, tags="[demo, test]") + + def tearDown(self): + shutil.rmtree(self.tmp, ignore_errors=True) + + def _make_catalog(self, path=None): + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.sources import SkillSource, SkillSourceType + catalog = SkillCatalog() + catalog.load_from_sources([ + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(path or self.tmp)) + ]) + return catalog + + def test_load_local_skills(self): + catalog = self._make_catalog() + skills = catalog.get_enabled_skills() + self.assertIn("alpha", skills) + self.assertIn("beta", skills) + self.assertEqual(skills["alpha"].name, "Alpha") + + def test_load_claude_skills(self): + catalog = self._make_catalog(CLAUDE_SKILLS_DIR) + skills = catalog.get_enabled_skills() + self.assertIn("docx", skills) + self.assertIn("pdf", skills) + self.assertEqual(skills["docx"].name, "docx") + + def test_always_skills(self): + catalog = self._make_catalog() + always = catalog.get_always_skills() + self.assertIn("beta", always) + self.assertNotIn("alpha", always) + + def test_disable_skill(self): + catalog = self._make_catalog() + catalog.disable_skill("alpha") + skills = catalog.get_enabled_skills() + self.assertNotIn("alpha", skills) + self.assertIn("beta", skills) + + def test_enable_after_disable(self): + catalog = self._make_catalog() + catalog.disable_skill("alpha") + catalog.enable_skill("alpha") + self.assertIn("alpha", catalog.get_enabled_skills()) + + def test_whitelist_filters(self): + catalog = self._make_catalog() + catalog._whitelist = {"alpha"} + skills = catalog.get_enabled_skills() + self.assertIn("alpha", skills) + self.assertNotIn("beta", skills) + + def test_whitelist_empty_disables_all(self): + catalog = self._make_catalog() + catalog._whitelist = set() + self.assertEqual(len(catalog.get_enabled_skills()), 0) + + def test_whitelist_none_allows_all(self): + catalog = self._make_catalog() + catalog._whitelist = None + self.assertEqual(len(catalog.get_enabled_skills()), 2) + + def test_get_skill_by_id(self): + catalog = self._make_catalog() + skill = catalog.get_skill("alpha") + self.assertIsNotNone(skill) + self.assertEqual(skill.name, "Alpha") + + def test_get_nonexistent_skill(self): + catalog = self._make_catalog() + self.assertIsNone(catalog.get_skill("nonexistent")) + + def test_remove_skill(self): + catalog = self._make_catalog() + self.assertTrue(catalog.remove_skill("alpha")) + self.assertIsNone(catalog.get_skill("alpha")) + + def test_remove_nonexistent(self): + catalog = self._make_catalog() + self.assertFalse(catalog.remove_skill("nonexistent")) + + def test_add_skill_dynamically(self): + _make_skill_dir(self.tmp, "gamma", "Gamma", "Skill gamma") + catalog = self._make_catalog() + catalog.remove_skill("gamma") + self.assertIsNone(catalog.get_skill("gamma")) + skill = catalog.add_skill(str(self.tmp / "gamma")) + self.assertIsNotNone(skill) + self.assertEqual(skill.name, "Gamma") + + def test_summary_cache(self): + catalog = self._make_catalog() + s1 = catalog.get_skills_summary() + self.assertIn("Alpha", s1) + self.assertIn("Beta", s1) + s2 = catalog.get_skills_summary() + self.assertIs(s1, s2) + + def test_summary_invalidated_on_change(self): + catalog = self._make_catalog() + s1 = catalog.get_skills_summary() + catalog.disable_skill("alpha") + s2 = catalog.get_skills_summary() + self.assertNotEqual(s1, s2) + self.assertNotIn("Alpha", s2) + + def test_later_source_overrides_earlier(self): + tmp2 = Path(tempfile.mkdtemp()) + try: + _make_skill_dir(tmp2, "alpha", "Alpha Override", + "Overridden description") + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.sources import SkillSource, SkillSourceType + catalog = SkillCatalog() + catalog.load_from_sources([ + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(self.tmp)), + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(tmp2)), + ]) + self.assertEqual( + catalog.get_skill("alpha").name, "Alpha Override") + finally: + shutil.rmtree(tmp2, ignore_errors=True) + + def test_reload(self): + catalog = self._make_catalog() + (self.tmp / "alpha" / "SKILL.md").write_text( + '---\nname: Alpha\ndescription: "Updated"\n---\n# Alpha\n', + encoding="utf-8") + catalog.reload() + self.assertEqual(catalog.get_skill("alpha").description, "Updated") + + def test_load_from_config_path_string(self): + from ms_agent.skill.catalog import SkillCatalog + cfg = OmegaConf.create({"path": str(self.tmp)}) + catalog = SkillCatalog(config=cfg) + catalog.load_from_config(cfg) + self.assertIn("alpha", catalog.get_enabled_skills()) + + def test_load_from_config_path_list(self): + from ms_agent.skill.catalog import SkillCatalog + cfg = OmegaConf.create({"path": [str(self.tmp)]}) + catalog = SkillCatalog(config=cfg) + catalog.load_from_config(cfg) + self.assertIn("alpha", catalog.get_enabled_skills()) + + def test_load_from_config_with_disabled(self): + from ms_agent.skill.catalog import SkillCatalog + cfg = OmegaConf.create({ + "path": [str(self.tmp)], + "disabled": ["alpha"], + }) + catalog = SkillCatalog(config=cfg) + catalog.load_from_config(cfg) + self.assertNotIn("alpha", catalog.get_enabled_skills()) + self.assertIn("beta", catalog.get_enabled_skills()) + + +# ============================================================ +# 3. SkillPromptInjector +# ============================================================ + +class TestSkillPromptInjector(unittest.TestCase): + + def setUp(self): + self.tmp = Path(tempfile.mkdtemp()) + _make_skill_dir(self.tmp, "always-skill", "AlwaysSkill", + "Always active", always=True) + _make_skill_dir(self.tmp, "normal-skill", "NormalSkill", + "Normal skill") + + def tearDown(self): + shutil.rmtree(self.tmp, ignore_errors=True) + + def _make_injector(self): + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.prompt_injector import SkillPromptInjector + from ms_agent.skill.sources import SkillSource, SkillSourceType + catalog = SkillCatalog() + catalog.load_from_sources([ + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(self.tmp)) + ]) + return SkillPromptInjector(catalog) + + def test_build_with_always_and_normal(self): + inj = self._make_injector() + section = inj.build_skill_prompt_section() + self.assertIn("Active Skills", section) + self.assertIn("AlwaysSkill", section) + self.assertIn("Available Skills", section) + self.assertIn("NormalSkill", section) + + def test_always_skill_body_injected(self): + inj = self._make_injector() + section = inj.build_skill_prompt_section() + self.assertIn("Instructions for AlwaysSkill", section) + + def test_frontmatter_stripped_from_always(self): + inj = self._make_injector() + section = inj.build_skill_prompt_section() + self.assertNotIn("always: true", section) + + def test_empty_catalog_returns_empty(self): + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.prompt_injector import SkillPromptInjector + catalog = SkillCatalog() + inj = SkillPromptInjector(catalog) + self.assertEqual(inj.build_skill_prompt_section(), "") + + def test_strip_frontmatter_static(self): + from ms_agent.skill.prompt_injector import SkillPromptInjector + content = "---\nname: Test\n---\n\nBody text." + result = SkillPromptInjector._strip_frontmatter(content) + self.assertEqual(result, "Body text.") + self.assertNotIn("---", result) + + def test_no_always_skills_omits_active_section(self): + """When no skills are marked always, only the Available section appears.""" + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.prompt_injector import SkillPromptInjector + from ms_agent.skill.sources import SkillSource, SkillSourceType + catalog = SkillCatalog() + catalog.load_from_sources([ + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(CLAUDE_SKILLS_DIR)) + ]) + inj = SkillPromptInjector(catalog) + section = inj.build_skill_prompt_section() + self.assertNotIn("Active Skills", section) + self.assertIn("Available Skills", section) + self.assertIn("docx", section) + self.assertIn("pdf", section) + + +# ============================================================ +# 4. SkillToolSet +# ============================================================ + +class TestSkillToolSet(unittest.TestCase): + + def setUp(self): + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.skill_tools import SkillToolSet + from ms_agent.skill.sources import SkillSource, SkillSourceType + + self.tmp = Path(tempfile.mkdtemp()) + _make_skill_dir(self.tmp, "demo", "Demo Skill", "A demo skill", + tags="[demo, test]", + requires={"tools": "[web_search]", + "env": "[NONEXISTENT_VAR]"}) + scripts_dir = self.tmp / "demo" / "scripts" + scripts_dir.mkdir(exist_ok=True) + (scripts_dir / "helper.py").write_text( + "print('hello')", encoding="utf-8") + + self.catalog = SkillCatalog() + self.catalog.load_from_sources([ + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(self.tmp)) + ]) + + config = DictConfig({}) + self.toolset = SkillToolSet(config, self.catalog, enable_manage=True) + + def tearDown(self): + shutil.rmtree(self.tmp, ignore_errors=True) + + def test_get_tools_includes_all(self): + tools = asyncio.get_event_loop().run_until_complete( + self.toolset._get_tools_inner()) + names = [t["tool_name"] for t in tools["skills"]] + self.assertIn("skills_list", names) + self.assertIn("skill_view", names) + self.assertIn("skill_manage", names) + + def test_get_tools_without_manage(self): + from ms_agent.skill.skill_tools import SkillToolSet + ts = SkillToolSet(DictConfig({}), self.catalog, enable_manage=False) + tools = asyncio.get_event_loop().run_until_complete( + ts._get_tools_inner()) + names = [t["tool_name"] for t in tools["skills"]] + self.assertNotIn("skill_manage", names) + + def test_skills_list(self): + result = self.toolset._handle_skills_list({}) + data = json.loads(result) + self.assertEqual(data["total"], 1) + self.assertEqual(data["skills"][0]["skill_id"], "demo") + self.assertEqual(data["skills"][0]["name"], "Demo Skill") + + def test_skills_list_with_tag_filter(self): + result = self.toolset._handle_skills_list({"tag": "demo"}) + data = json.loads(result) + self.assertEqual(data["total"], 1) + + def test_skills_list_with_nonexistent_tag(self): + result = self.toolset._handle_skills_list( + {"tag": "nonexistent"}) + self.assertEqual(result, "No skills available.") + + def test_skill_view_main_content(self): + result = self.toolset._handle_skill_view({"skill_id": "demo"}) + data = json.loads(result) + self.assertEqual(data["skill_id"], "demo") + self.assertIn("Demo Skill", data["content"]) + self.assertIn("scripts", data["linked_files"]) + + def test_skill_view_nonexistent(self): + result = self.toolset._handle_skill_view( + {"skill_id": "nonexistent"}) + data = json.loads(result) + self.assertIn("error", data) + + def test_skill_view_file(self): + result = self.toolset._handle_skill_view({ + "skill_id": "demo", + "file_path": "scripts/helper.py", + }) + data = json.loads(result) + self.assertIn("print('hello')", data["content"]) + + def test_skill_view_path_traversal_blocked(self): + result = self.toolset._handle_skill_view({ + "skill_id": "demo", + "file_path": "../../etc/passwd", + }) + data = json.loads(result) + self.assertIn("error", data) + + def test_skill_view_missing_file(self): + result = self.toolset._handle_skill_view({ + "skill_id": "demo", + "file_path": "scripts/nonexistent.py", + }) + data = json.loads(result) + self.assertIn("error", data) + + def test_skill_view_requirements_check(self): + result = self.toolset._handle_skill_view({"skill_id": "demo"}) + data = json.loads(result) + self.assertIn("requirements_status", data) + status = data["requirements_status"] + self.assertIn("NONEXISTENT_VAR", status["missing_env_vars"]) + + def test_skill_manage_create_and_delete(self): + content = ( + '---\nname: New Skill\ndescription: "A new skill"\n---\n' + '# New Skill\n\nInstructions.') + with patch.object(self.toolset, '_get_custom_skills_dir', + return_value=self.tmp / "_custom"): + result = self.toolset._handle_skill_manage({ + "action": "create", + "skill_id": "new-skill", + "content": content, + }) + data = json.loads(result) + self.assertTrue(data.get("success")) + + self.assertIsNotNone(self.catalog.get_skill("new-skill")) + + result = self.toolset._handle_skill_manage({ + "action": "delete", + "skill_id": "new-skill", + }) + data = json.loads(result) + self.assertTrue(data.get("success")) + self.assertIsNone(self.catalog.get_skill("new-skill")) + + def test_skill_manage_create_duplicate(self): + content = ( + '---\nname: Demo Dup\ndescription: "dup"\n---\n# Dup\n') + with patch.object(self.toolset, '_get_custom_skills_dir', + return_value=self.tmp): + result = self.toolset._handle_skill_manage({ + "action": "create", + "skill_id": "demo", + "content": content, + }) + data = json.loads(result) + self.assertIn("error", data) + + def test_skill_manage_create_invalid_frontmatter(self): + with patch.object(self.toolset, '_get_custom_skills_dir', + return_value=self.tmp / "_custom2"): + result = self.toolset._handle_skill_manage({ + "action": "create", + "skill_id": "bad-skill", + "content": "No frontmatter here.", + }) + data = json.loads(result) + self.assertIn("error", data) + + def test_skill_manage_edit(self): + new_content = ( + '---\nname: Demo Skill\ndescription: "Updated desc"\n---\n' + '# Demo Skill Updated\n') + result = self.toolset._handle_skill_manage({ + "action": "edit", + "skill_id": "demo", + "content": new_content, + }) + data = json.loads(result) + self.assertTrue(data.get("success")) + self.assertEqual( + self.catalog.get_skill("demo").description, "Updated desc") + + def test_call_tool_dispatch(self): + result = asyncio.get_event_loop().run_until_complete( + self.toolset.call_tool( + "skills", tool_name="skills_list", tool_args={})) + self.assertIn("demo", result) + + def test_skill_view_claude_skills(self): + """Verify skill_view works with real claude_skills fixtures.""" + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.skill_tools import SkillToolSet + from ms_agent.skill.sources import SkillSource, SkillSourceType + + catalog = SkillCatalog() + catalog.load_from_sources([ + SkillSource(type=SkillSourceType.LOCAL_DIR, + path=str(CLAUDE_SKILLS_DIR)) + ]) + ts = SkillToolSet(DictConfig({}), catalog, enable_manage=False) + + result = ts._handle_skill_view({"skill_id": "pdf"}) + data = json.loads(result) + self.assertEqual(data["name"], "pdf") + self.assertIn("PDF Processing Guide", data["content"]) + self.assertIn("scripts", data["linked_files"]) + + result = ts._handle_skill_view({"skill_id": "docx"}) + data = json.loads(result) + self.assertEqual(data["name"], "docx") + self.assertIn("DOCX creation", data["content"]) + + +# ============================================================ +# 5. SkillLoader +# ============================================================ + +class TestSkillLoader(unittest.TestCase): + + def test_load_claude_skills(self): + from ms_agent.skill.loader import SkillLoader + loader = SkillLoader() + skills = loader.load_skills(str(CLAUDE_SKILLS_DIR)) + ids = [s.skill_id for s in skills.values()] + self.assertIn("docx", ids) + self.assertIn("pdf", ids) + + def test_reload_skill(self): + from ms_agent.skill.loader import SkillLoader + loader = SkillLoader() + loader.load_skills(str(CLAUDE_SKILLS_DIR)) + reloaded = loader.reload_skill(str(CLAUDE_SKILLS_DIR / "pdf")) + self.assertIsNotNone(reloaded) + self.assertEqual(reloaded.name, "pdf") + + def test_skill_has_scripts(self): + from ms_agent.skill.loader import SkillLoader + loader = SkillLoader() + skills = loader.load_skills(str(CLAUDE_SKILLS_DIR)) + pdf_skill = skills.get("pdf") or skills.get("pdf@latest") + self.assertIsNotNone(pdf_skill) + script_names = [s.name for s in pdf_skill.scripts] + self.assertTrue( + len(script_names) > 0, + "pdf skill should have scripts") + + def test_skill_has_references(self): + from ms_agent.skill.loader import SkillLoader + loader = SkillLoader() + skills = loader.load_skills(str(CLAUDE_SKILLS_DIR)) + pdf_skill = skills.get("pdf") or skills.get("pdf@latest") + self.assertIsNotNone(pdf_skill) + ref_names = [r.name for r in pdf_skill.references] + self.assertIn("reference.md", ref_names) + self.assertIn("forms.md", ref_names) + + +# ============================================================ +# 6. Integration: LLMAgent.prepare_skills + create_messages +# ============================================================ + +class TestLLMAgentSkillIntegration(unittest.TestCase): + + def _make_agent(self, skills_path): + from ms_agent.agent.llm_agent import LLMAgent + config = OmegaConf.create({ + "llm": { + "model": "qwen-max", + "api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1", + }, + "skills": { + "path": [str(skills_path)], + }, + "prompt": { + "system": "You are a test agent.", + }, + }) + return LLMAgent(config=config, tag="test-agent") + + def test_prepare_skills_loads_catalog(self): + agent = self._make_agent(CLAUDE_SKILLS_DIR) + agent.tool_manager = MagicMock() + asyncio.get_event_loop().run_until_complete( + agent.prepare_skills()) + self.assertIsNotNone(agent._skill_catalog) + self.assertIsNotNone(agent._skill_injector) + agent.tool_manager.register_tool.assert_called_once() + + def test_prepare_skills_noop_without_config(self): + from ms_agent.agent.llm_agent import LLMAgent + config = OmegaConf.create({ + "llm": { + "model": "qwen-max", + "api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1", + }, + "prompt": {"system": "Test"}, + }) + agent = LLMAgent(config=config, tag="no-skill-agent") + asyncio.get_event_loop().run_until_complete( + agent.prepare_skills()) + self.assertIsNone(agent._skill_catalog) + self.assertIsNone(agent._skill_injector) + + def test_create_messages_injects_skill_section(self): + agent = self._make_agent(CLAUDE_SKILLS_DIR) + agent.tool_manager = MagicMock() + asyncio.get_event_loop().run_until_complete( + agent.prepare_skills()) + + msgs = asyncio.get_event_loop().run_until_complete( + agent.create_messages("Hello")) + + system_content = msgs[0].content + self.assertIn("Available Skills", system_content) + self.assertIn("docx", system_content) + self.assertIn("pdf", system_content) + self.assertIn("skill_view", system_content) + + def test_create_messages_no_injection_without_skills(self): + from ms_agent.agent.llm_agent import LLMAgent + config = OmegaConf.create({ + "llm": { + "model": "qwen-max", + "api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1", + }, + "prompt": {"system": "You are a test agent."}, + }) + agent = LLMAgent(config=config, tag="no-skill") + msgs = asyncio.get_event_loop().run_until_complete( + agent.create_messages("Hello")) + self.assertNotIn("Available Skills", msgs[0].content) + + def test_create_messages_with_always_skill(self): + """Verify always-skill injection using an inline fixture.""" + from ms_agent.agent.llm_agent import LLMAgent + tmp = Path(tempfile.mkdtemp()) + try: + _make_skill_dir(tmp, "greeter", "Greeter", + "Auto-greet", always=True) + _make_skill_dir(tmp, "helper", "Helper", "A helper") + config = OmegaConf.create({ + "llm": {"model": "qwen-max"}, + "skills": {"path": [str(tmp)]}, + "prompt": {"system": "Test agent."}, + }) + agent = LLMAgent(config=config, tag="always-test") + agent.tool_manager = MagicMock() + asyncio.get_event_loop().run_until_complete( + agent.prepare_skills()) + msgs = asyncio.get_event_loop().run_until_complete( + agent.create_messages("Hi")) + content = msgs[0].content + self.assertIn("Active Skills", content) + self.assertIn("Greeter", content) + self.assertIn("Instructions for Greeter", content) + self.assertIn("Available Skills", content) + self.assertIn("helper", content) + finally: + shutil.rmtree(tmp, ignore_errors=True) + + +# ============================================================ +# 7. Schema parsing and validation +# ============================================================ + +class TestSchemaPreserved(unittest.TestCase): + + def test_skill_schema_parser_works(self): + from ms_agent.skill.schema import SkillSchemaParser + skill = SkillSchemaParser.parse_skill_directory( + CLAUDE_SKILLS_DIR / "pdf") + self.assertIsNotNone(skill) + self.assertEqual(skill.skill_id, "pdf") + self.assertEqual(skill.name, "pdf") + self.assertTrue(len(skill.scripts) > 0) + + def test_docx_skill_has_references(self): + from ms_agent.skill.schema import SkillSchemaParser + skill = SkillSchemaParser.parse_skill_directory( + CLAUDE_SKILLS_DIR / "docx") + self.assertIsNotNone(skill) + self.assertEqual(skill.skill_id, "docx") + ref_names = [r.name for r in skill.references] + self.assertIn("docx-js.md", ref_names) + self.assertIn("ooxml.md", ref_names) + + def test_frontmatter_parsing(self): + from ms_agent.skill.schema import SkillSchemaParser + content = '---\nname: Test\ndescription: "desc"\n---\nBody' + fm = SkillSchemaParser.parse_yaml_frontmatter(content) + self.assertEqual(fm["name"], "Test") + + def test_skill_schema_validation(self): + from ms_agent.skill.schema import SkillSchemaParser + skill = SkillSchemaParser.parse_skill_directory( + CLAUDE_SKILLS_DIR / "pdf") + errors = SkillSchemaParser.validate_skill_schema(skill) + self.assertEqual(len(errors), 0) + + +# ============================================================ +# 8. End-to-end pipeline +# ============================================================ + +class TestEndToEnd(unittest.TestCase): + + def test_full_pipeline_with_claude_skills(self): + from ms_agent.skill.catalog import SkillCatalog + from ms_agent.skill.prompt_injector import SkillPromptInjector + from ms_agent.skill.skill_tools import SkillToolSet + + cfg = OmegaConf.create({"path": [str(CLAUDE_SKILLS_DIR)]}) + catalog = SkillCatalog(config=cfg) + catalog.load_from_config(cfg) + + skills = catalog.get_enabled_skills() + self.assertIn("docx", skills) + self.assertIn("pdf", skills) + + injector = SkillPromptInjector(catalog) + section = injector.build_skill_prompt_section() + self.assertIn("Available Skills", section) + self.assertIn("docx", section) + self.assertIn("pdf", section) + + toolset = SkillToolSet( + DictConfig({}), catalog, enable_manage=False) + + list_result = toolset._handle_skills_list({}) + data = json.loads(list_result) + self.assertGreaterEqual(data["total"], 2) + + view_result = toolset._handle_skill_view( + {"skill_id": "pdf"}) + view_data = json.loads(view_result) + self.assertEqual(view_data["name"], "pdf") + self.assertIn("scripts", view_data["linked_files"]) + + +if __name__ == "__main__": + unittest.main()