diff --git a/ms_agent/tools/code/code_executor.py b/ms_agent/tools/code/code_executor.py index b5e486440..1df89e39c 100644 --- a/ms_agent/tools/code/code_executor.py +++ b/ms_agent/tools/code/code_executor.py @@ -407,8 +407,10 @@ async def _get_tools_inner(self) -> Dict[str, Any]: tool_name='shell_executor', server_name='code_executor', description= - ('Execute shell commands in an isolated environment using bash. ' - 'Supports basic shell operations like ls, cd, mkdir, rm, etc. ' + ('Execute one shell command in an isolated environment. ' + 'Commands will be executed directly without shell parsing. ' + 'For shell syntax (cd, &&, ||, pipes, redirection), use explicit wrapper like sh -lc "...". ' + 'Supports basic operations like ls, mkdir, rm, mv, npm, pip, etc. ' 'Data files in the output directory are accessible at /data/ path. ' ), parameters={ @@ -421,7 +423,7 @@ async def _get_tools_inner(self) -> Dict[str, Any]: 'timeout': { 'type': 'integer', 'description': 'Execution timeout in seconds', - 'default': 30 + 'default': 900 } }, 'required': ['command'], @@ -648,13 +650,22 @@ async def shell_executor(self, try: logger.info(f'Executing command: {command[:50]}...') + shell_meta = ('&&', '||', '|', ';', '>', '<', '`', '$(', 'cd ', + 'export ') + already_wrapped = command.lstrip().startswith( + ('sh ', 'bash ', '/bin/sh ', '/bin/bash ')) + if not already_wrapped and any(meta in command + for meta in shell_meta): + import shlex + command = f'sh -lc {shlex.quote(command)}' + # Execute via shell_executor result = await self.manager.execute_tool( sandbox_id=self.sandbox_id, tool_name='shell_executor', parameters={ 'command': command, - 'timeout': timeout or 60 + 'timeout': timeout or 900 }) success = result.status == ExecutionStatus.SUCCESS diff --git a/ms_agent/tools/shell/__init__.py b/ms_agent/tools/shell/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/ms_agent/tools/shell/shell.py b/ms_agent/tools/shell/shell.py deleted file mode 100644 index b00f2c718..000000000 --- a/ms_agent/tools/shell/shell.py +++ /dev/null @@ -1,206 +0,0 @@ -import os -import re -import subprocess -from pathlib import Path -from typing import Any, Dict - -from ms_agent.llm.utils import Tool -from ms_agent.tools.base import ToolBase -from ms_agent.utils.constants import DEFAULT_OUTPUT_DIR - - -class Shell(ToolBase): - - def __init__(self, config): - super().__init__(config) - self.output_dir = getattr(self.config, 'output_dir', - DEFAULT_OUTPUT_DIR) - - async def connect(self) -> None: - pass - - async def _get_tools_inner(self) -> Dict[str, Any]: - tools = { - 'shell': [ - Tool( - tool_name='execute_single', - server_name='shell', - description='Execute a single shell command. ' - 'Use this tool to read/write/create file/dirs, ' - 'or start/stop processes or install required packages.' - 'Note:\n ' - '1. Do not execute dangerous commands which will affect the file system ' - 'or other processes\n ' - '2. The work_dir arg should always base on the project you are working on', - parameters={ - 'type': 'object', - 'properties': { - 'command': { - 'type': 'string', - 'description': 'The shell command to execute.', - }, - 'work_dir': { - 'type': - 'string', - 'description': - 'The work dir of the command, this argument should always ' - 'be a relative sub folder of the project you are working on.', - } - }, - 'required': ['command', 'work_dir'], - 'additionalProperties': False - }), - ] - } - return tools - - def check_safe(self, command, work_dir): - # 1. Check work_dir - output_dir_abs = Path(self.output_dir).resolve() - if work_dir.startswith('/') or work_dir.startswith('~'): - work_dir_abs = Path(work_dir).resolve() - else: - work_dir_abs = (output_dir_abs / work_dir).resolve() - - if not str(work_dir_abs).startswith(str(output_dir_abs)): - raise ValueError( - f"Work directory '{work_dir}' is outside allowed directory '{self.output_dir}'" - ) - - # 2. Check dangerous commands - dangerous_commands = [ - r'\brm\s+-rf\s+/', # rm -rf / - r'\bsudo\b', # sudo - r'\bsu\b', # su - r'\bchmod\b', # chmod - r'\bchown\b', # chown - r'\breboot\b', # reboot - r'\bshutdown\b', # shutdown - r'\bmkfs\b', # mkfs - r'\bdd\b', # dd - r'\bcurl\b.*\|\s*bash', # curl | bash - r'\bwget\b.*\|\s*bash', # wget | bash - r'\bcurl\b.*\|\s*sh\b', # curl | sh - r'\bwget\b.*\|\s*sh\b', # wget | sh - r'\b:\(\)\{.*\|.*&\s*\}', # fork bomb - r'\bmount\b', # mount - r'\bumount\b', # umount - r'\bfdisk\b', # fdisk - r'\bparted\b', # parted - ] - - for pattern in dangerous_commands: - if re.search(pattern, command, re.IGNORECASE): - raise ValueError( - f'Command contains dangerous operation: {pattern}') - - # 3. Check path traversal - suspicious_patterns = [ - r'(?:^|\s)/', # absolute path - r'\.\.', # parent directory - r'~', # HOME - r'\$HOME', # HOME env - r'\$\{HOME\}', # ${HOME} - ] - - for pattern in suspicious_patterns: - if re.search(pattern, command): - # 提取所有可能的路径 - potential_paths = re.findall(r'(?:^|\s)([\w\./~${}]+)', - command) - for path_str in potential_paths: - if not path_str: - continue - - try: - expanded_path = os.path.expandvars( - os.path.expanduser(path_str)) - if not os.path.isabs(expanded_path): - full_path = (work_dir_abs - / expanded_path).resolve() - else: - full_path = Path(expanded_path).resolve() - if not str(full_path).startswith(str(output_dir_abs)): - raise ValueError( - f"Command attempts to access path outside allowed directory: '{path_str}' " - f"resolves to '{full_path}', which is outside '{self.output_dir}'" - ) - except Exception: # noqa - continue - - # 4. Check dangerous redirections - redirect_patterns = [ - r'>+\s*/(?!tmp/|var/tmp/|dev/null)', # redirect to root directory (except /tmp/, /var/tmp/, /dev/null) - r'<\s*/etc/', # read from /etc - r'>+\s*/dev/(?!null)', # redirect to device files (except /dev/null) - ] - - for pattern in redirect_patterns: - if re.search(pattern, command): - raise ValueError('Command contains dangerous redirection') - - # 5. Check environment variable modifications - if re.search(r'\bexport\b|\benv\b.*=', command, re.IGNORECASE): - if re.search(r'\bPATH\s*=|LD_PRELOAD|LD_LIBRARY_PATH', command, - re.IGNORECASE): - raise ValueError( - 'Command attempts to modify critical (PATH/LD_PRELOAD/LD_LIBRARY_PATH) ' - 'environment variables') - - # 6. Check for command substitution and other shell injection risks - shell_injection_patterns = [ - r'\$\(.*\)', # command substitution $(...) - r'`.*`', # command substitution `...` - ] - - for pattern in shell_injection_patterns: - if re.search(pattern, command): - substituted = re.findall(pattern, command) - for sub_cmd in substituted: - inner_cmd = re.sub(r'[\$\(\)`]', '', sub_cmd) - for dangerous in dangerous_commands: - if re.search(dangerous, inner_cmd, re.IGNORECASE): - raise ValueError( - f'Command substitution contains dangerous operation: {inner_cmd}' - ) - - async def execute_shell(self, command: str, work_dir: str): - try: - self.check_safe(command, work_dir) - if work_dir == '.' or work_dir == '.' + os.sep: - work_dir = '' - work_dir = os.path.join(self.output_dir, work_dir) - Path(work_dir).mkdir(parents=True, exist_ok=True) - ret = subprocess.run( - command, - shell=True, - cwd=work_dir, - capture_output=True, - text=True, - timeout=getattr(self.config.tools.shell, 'timeout', 5), - ) - - if ret.returncode == 0: - result = f'Command executed successfully. return_code=0, output: {ret.stdout.strip()}' - else: - result = f'Command executed failed. return_code={ret.returncode}, error message: {ret.stderr.strip()}' - - except subprocess.TimeoutExpired: - result = f'Run timed out after {getattr(self.config.tools.shell, "timeout", 5)} seconds.' - except Exception as e: - result = f'Run failed with an exception: {e}.' - - output = ( - f'Shell command status:\n' - f'Command line: {command}\n' - f'Workdir: {work_dir}\n' - f'Result: {result or "The command does not give any responses."}') - return output - - async def call_tool(self, server_name: str, *, tool_name: str, - tool_args: dict) -> str: - if tool_name == 'execute_single': - return await self.execute_shell(tool_args['command'], - tool_args['work_dir']) - else: - return f'Unknown tool type: {tool_name}' diff --git a/ms_agent/tools/tool_manager.py b/ms_agent/tools/tool_manager.py index 9bc09e943..58f019774 100644 --- a/ms_agent/tools/tool_manager.py +++ b/ms_agent/tools/tool_manager.py @@ -18,7 +18,6 @@ from ms_agent.tools.image_generator import ImageGenerator from ms_agent.tools.mcp_client import MCPClient from ms_agent.tools.search.websearch_tool import WebSearchTool -from ms_agent.tools.shell.shell import Shell from ms_agent.tools.split_task import SplitTask from ms_agent.tools.todolist_tool import TodoListTool from ms_agent.tools.video_generator import VideoGenerator @@ -56,8 +55,6 @@ def __init__(self, if hasattr(config, 'tools') and hasattr(config.tools, 'video_generator'): self.extra_tools.append(VideoGenerator(config)) - if hasattr(config, 'tools') and hasattr(config.tools, 'shell'): - self.extra_tools.append(Shell(config)) if hasattr(config, 'tools') and hasattr(config.tools, 'file_system'): self.extra_tools.append( FileSystemTool( diff --git a/projects/code_genesis/README.md b/projects/code_genesis/README.md index 4e7ee2f1e..1369e4363 100644 --- a/projects/code_genesis/README.md +++ b/projects/code_genesis/README.md @@ -19,25 +19,16 @@ This project needs to be used together with ms-agent. cd ms-agent ``` -2. Prepare python environment (python>=3.10) with conda: +2. Build the Docker sandbox image (requires Docker): ```shell - conda create -n code_genesis python==3.11 - conda activate code_genesis - pip install -r ./requirements.txt + bash projects/code_genesis/tools/build_sandbox_image.sh ``` -3. Prepare npm environment, following https://nodejs.org/en/download. If you are using Mac, using Homebrew is recommended: https://formulae.brew.sh/formula/node + This builds a `code-genesis-sandbox:version1` image containing Python 3.12, Node.js 20, npm, git and curl. All shell commands from the agents run inside this container for security isolation. + Note: To speed up dependency downloads during image builds and at container runtime, we use some mirror registries instead of the official sources by default. If your network environment does not require mirrors, you can comment out the relevant lines. -Make sure your installation is successful: - -```shell -npm --version -``` - -Make sure the npm installation is successful, or the npm install/build/dev will fail and cause an infinite loop. - -4. Run: +3. Run: ```shell PYTHONPATH=. openai_api_key=your-api-key openai_base_url=your-api-url python ms_agent/cli/cli.py run --config projects/code_genesis --query 'make a demo website' --trust_remote_code true diff --git a/projects/code_genesis/coding.yaml b/projects/code_genesis/coding.yaml index 84eb8d7a6..1508a5de1 100644 --- a/projects/code_genesis/coding.yaml +++ b/projects/code_genesis/coding.yaml @@ -103,11 +103,10 @@ prompt: 8. When fixing issues and updating files: Call the `edit_file` tool or `write_file` tool, after fixing issues there's no need to check by yourself, the lsp tool will check and report issues. - 9. You can use the shell to debug problems: + 9. You can use the shell_executor to debug problems: Example: - # Find all fields of a class - execute_single(command='python -c "from module import MyClass; print(vars(MyClass))"') + shell_executor(command='python -c "from module import MyClass; print(vars(MyClass))"') Your optimization goals: 1. [Priority] Output the most accurate code implementation at once, without hallucinations or incorrect references. @@ -128,8 +127,25 @@ tools: base_url: https://api.morphllm.com/v1 plugins: - workflow/api_search - shell: + code_executor: mcp: false + sandbox: + mode: local + type: docker_notebook + image: code-genesis-sandbox:version1 + working_dir: /data + timeout: 180 + memory_limit: "2g" + cpu_limit: 2.0 + network_enabled: true + tools_config: + shell_executor: {} + exclude: + - notebook_executor + - python_executor + - file_operation + - reset_executor + - get_executor_info pre_import_check: true post_import_check: true diff --git a/projects/code_genesis/install.yaml b/projects/code_genesis/install.yaml index 57ff53194..b570fc437 100644 --- a/projects/code_genesis/install.yaml +++ b/projects/code_genesis/install.yaml @@ -34,7 +34,7 @@ prompt: - Avoid installing development tools (like eslint, prettier) unless they are essential for the project to function - If the framework description states "No Framework" or "No external dependencies", do NOT create or install unnecessary dependencies - 2. After writing dependency files (if needed), you should proactively call shell tools to install dependencies ONLY if: + 2. After writing dependency files (if needed), you should proactively call the shell_executor tool to install dependencies ONLY if: - The project has runtime dependencies that are required for execution - The project explicitly requires build tools or package managers - Do NOT install dependencies for simple vanilla web projects that can run without them @@ -46,10 +46,28 @@ prompt: max_chat_round: 20 +tool_call_timeout: 30000 + tools: - shell: + code_executor: mcp: false - timeout: 180 + sandbox: + mode: local + type: docker_notebook + image: code-genesis-sandbox:version1 + working_dir: /data + timeout: 180 + memory_limit: "2g" + cpu_limit: 2.0 + network_enabled: true + tools_config: + shell_executor: {} + exclude: + - notebook_executor + - python_executor + - file_operation + - reset_executor + - get_executor_info file_system: mcp: false @@ -57,6 +75,4 @@ tools: - read_file - write_file -tool_call_timeout: 120 - help: | diff --git a/projects/code_genesis/refine.yaml b/projects/code_genesis/refine.yaml index c0b1465ff..2795dc654 100644 --- a/projects/code_genesis/refine.yaml +++ b/projects/code_genesis/refine.yaml @@ -61,7 +61,7 @@ prompt: * Compress the workspace subdirectory into a zip file and name it workspace.zip in the workspace_dir * Once the project is working, use the EdgeOne Pages MCP tools to deploy it * Follow the deployment documentation provided in the user message - * When calling the deployment tool, first use the shell command `pwd` to get the current working directory path, then use "/workspace.zip" as the builtFolderPath parameter + * When calling the deployment tool, first use the shell_executor with command `pwd` to get the current working directory path, then use "/workspace.zip" as the builtFolderPath parameter 5. If everything is OK, you may exit * Ignore warnings like unused variables; they don't affect runtime behavior @@ -72,9 +72,25 @@ prompt: 2. [Secondary] Use as few tokens as possible tools: - shell: + code_executor: mcp: false - timeout: 10 + sandbox: + mode: local + type: docker_notebook + image: code-genesis-sandbox:version1 + working_dir: /data + timeout: 180 + memory_limit: "2g" + cpu_limit: 2.0 + network_enabled: true + tools_config: + shell_executor: {} + exclude: + - notebook_executor + - python_executor + - file_operation + - reset_executor + - get_executor_info file_system: mcp: false include: diff --git a/projects/code_genesis/tools/build_sandbox_image.sh b/projects/code_genesis/tools/build_sandbox_image.sh new file mode 100755 index 000000000..8aae6859a --- /dev/null +++ b/projects/code_genesis/tools/build_sandbox_image.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Build Docker sandbox image for code_genesis +# Includes Python + Node.js for full-stack project support + +set -e + +IMAGE_NAME="code-genesis-sandbox" +IMAGE_TAG="version1" + +echo "Building code-genesis sandbox Docker image..." + +docker pull python:3.12-slim + +cat > Dockerfile.sandbox << 'EOF' +FROM python:3.12-slim + +# Install system dependencies and Node.js +RUN sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources \ + && apt-get update -o Acquire::Retries=5 \ + && apt-get install -y --no-install-recommends \ + curl \ + git \ + build-essential \ + && curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ + && apt-get install -y --no-install-recommends nodejs \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Configure npm to use a Chinese mirror. Comment out this line if not needed. +RUN npm config set registry https://registry.npmmirror.com/ + +# Install Jupyter kernel gateway (required by sandbox) +RUN pip install --no-cache-dir -i https://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com \ + jupyter_kernel_gateway \ + jupyter_client \ + ipykernel + +# Install Python kernel +RUN python -m ipykernel install --sys-prefix --name python3 --display-name "Python 3" + +WORKDIR /data + +EXPOSE 8888 +CMD ["jupyter", "kernelgateway", "--KernelGatewayApp.ip=0.0.0.0", "--KernelGatewayApp.port=8888", "--KernelGatewayApp.allow_origin=*"] +EOF + +echo "Building Docker image: ${IMAGE_NAME}:${IMAGE_TAG}" +docker build -f Dockerfile.sandbox -t "${IMAGE_NAME}:${IMAGE_TAG}" . + +rm Dockerfile.sandbox + +echo "Done: ${IMAGE_NAME}:${IMAGE_TAG}" +echo "Contains: Python 3.12, Node.js 20, npm, git, curl" diff --git a/projects/code_genesis/workflow/refine.py b/projects/code_genesis/workflow/refine.py index 71dc0b689..17c9ffba4 100644 --- a/projects/code_genesis/workflow/refine.py +++ b/projects/code_genesis/workflow/refine.py @@ -107,10 +107,10 @@ async def run(self, messages, **kwargs): f'Tech stack (framework.txt): {framework}\n' f'Communication protocol (protocol.txt): {protocol}\n' f'File list:\n{file_info}\n' - # f'Your shell tool workspace_dir is {self.output_dir}; ' - f'all tools should use this directory as the current working directory.\n' + f'The shell_executor runs inside a Docker sandbox. ' + f'Project files are at the current working directory (/data). ' + f'All relative paths work directly.\n' f'When creating the deployment zip file, name it workspace.zip.\n' - f'Python executable: {sys.executable}\n' f'Please refine the project and deploy it to EdgeOne Pages:'), ] return await super().run(messages, **kwargs) @@ -119,7 +119,6 @@ async def after_tool_call(self, messages: List[Message]): await super().after_tool_call(messages) if self.runtime.should_stop: - import sys if not sys.stdin.isatty(): # Running in WebUI - notify user that agent is waiting for input logger.info( diff --git a/requirements/code.txt b/requirements/code.txt index e5c59c13f..5426b30d7 100644 --- a/requirements/code.txt +++ b/requirements/code.txt @@ -1,3 +1,5 @@ +docker llama-index-core llama-index-embeddings-huggingface mem0ai +websocket-client