diff --git a/mellea/stdlib/requirements/__init__.py b/mellea/stdlib/requirements/__init__.py index c0bd7d3c9..8b932b0ad 100644 --- a/mellea/stdlib/requirements/__init__.py +++ b/mellea/stdlib/requirements/__init__.py @@ -4,6 +4,13 @@ from ...core import Requirement, ValidationResult, default_output_to_bool from .md import as_markdown_list, is_markdown_list, is_markdown_table from .python_reqs import PythonExecutionReq +from .python_tools import ( + ImportRestrictions, + OutputSizeLimit, + PythonCodeExtraction, + PythonSyntaxValid, + python_tool_requirements, +) from .requirement import ( ALoraRequirement, LLMaJRequirement, @@ -17,8 +24,12 @@ __all__ = [ "ALoraRequirement", + "ImportRestrictions", "LLMaJRequirement", + "OutputSizeLimit", + "PythonCodeExtraction", "PythonExecutionReq", + "PythonSyntaxValid", "Requirement", "ValidationResult", "as_markdown_list", @@ -26,6 +37,7 @@ "default_output_to_bool", "is_markdown_list", "is_markdown_table", + "python_tool_requirements", "req", "reqify", "requirement_check_to_bool", diff --git a/mellea/stdlib/requirements/python_tools.py b/mellea/stdlib/requirements/python_tools.py new file mode 100644 index 000000000..42c2a2e66 --- /dev/null +++ b/mellea/stdlib/requirements/python_tools.py @@ -0,0 +1,350 @@ +"""Generic Python tool requirements for code generation validation. + +This module provides a set of composable requirements for validating Python code +generated by language models. Requirements can be used individually or bundled +via the python_tool_requirements() factory function. + +The requirement pipeline validates code in this order: +1. PythonCodeExtraction — code blocks are present and extractable +2. PythonSyntaxValid — code parses without syntax errors +3. PythonExecutionReq — code runs without exceptions +4. OutputSizeLimit — captured output stays within bounds +5. ImportRestrictions — only whitelisted modules are imported (optional) +""" + +import ast + +from mellea.stdlib.tools.interpreter import ( + ExecutionEnvironment, + LLMSandboxEnvironment, + UnsafeEnvironment, +) + +from ...core import Context, MelleaLogger, Requirement, ValidationResult +from .python_reqs import ( + PythonExecutionReq, + _has_python_code_listing, + _python_executes_without_error, +) + +logger = MelleaLogger.get_logger() + + +class PythonCodeExtraction(Requirement): + """Code blocks are present and extractable from model output. + + This requirement checks whether the model's response contains Python code + blocks that can be extracted for further validation or execution. + """ + + def __init__(self) -> None: + """Initialize PythonCodeExtraction requirement.""" + super().__init__( + description="Code blocks are present and extractable.", + validation_fn=_has_python_code_listing, + check_only=True, + ) + + +class PythonSyntaxValid(Requirement): + """Python code is syntactically valid (parses without AST errors). + + Uses Python's ast.parse() to validate syntax without executing code. + Useful for catching malformed code early in the validation pipeline. + """ + + def __init__(self) -> None: + """Initialize PythonSyntaxValid requirement.""" + super().__init__( + description="Python code is syntactically valid.", + validation_fn=self._validate_syntax, + check_only=True, + ) + + def _validate_syntax(self, ctx: Context) -> ValidationResult: + """Validate that extracted code has valid Python syntax. + + Args: + ctx: Context containing model output with code blocks. + + Returns: + ValidationResult with pass/fail and extracted code or error details. + """ + extraction_result = _has_python_code_listing(ctx) + if not extraction_result.as_bool(): + return ValidationResult( + result=False, + reason=f"Could not extract code for syntax validation: {extraction_result.reason}", + ) + + code = extraction_result.reason + assert code is not None + + try: + ast.parse(code) + return ValidationResult(result=True, reason="Syntax is valid.") + except SyntaxError as e: + return ValidationResult( + result=False, reason=f"Syntax error at line {e.lineno}: {e.msg}" + ) + + +class OutputSizeLimit(Requirement): + """Captured output does not exceed size limit (in characters). + + Executes code and verifies that the captured stdout does not exceed + the configured character limit. Useful for preventing excessive logging + or infinite output loops. + + Args: + limit_chars: Maximum allowed output size in characters. Defaults to 10,000. + timeout: Maximum execution time in seconds. Defaults to 5. + use_sandbox: Use llm-sandbox for Docker-isolated execution. Defaults to False. + allowed_imports: Whitelist of importable top-level modules. None allows all. + """ + + def __init__( + self, + limit_chars: int = 10_000, + timeout: int = 5, + use_sandbox: bool = False, + allowed_imports: list[str] | None = None, + ) -> None: + """Initialize OutputSizeLimit requirement. + + Raises: + ValueError: If limit_chars is not positive. + """ + if limit_chars <= 0: + raise ValueError(f"limit_chars must be positive, got {limit_chars}") + + self.limit_chars = limit_chars + self.timeout = timeout + self.use_sandbox = use_sandbox + self.allowed_imports = allowed_imports + super().__init__( + description=f"Output does not exceed {limit_chars} characters.", + validation_fn=self._validate_output_size, + check_only=True, + ) + + def _validate_output_size(self, ctx: Context) -> ValidationResult: + """Validate that executed code's output stays within size limit. + + Args: + ctx: Context containing model output with code blocks. + + Returns: + ValidationResult with pass/fail and output size details. + """ + extraction_result = _has_python_code_listing(ctx) + if not extraction_result.as_bool(): + return ValidationResult( + result=False, + reason="Could not extract code for output size validation.", + ) + + code = extraction_result.reason + assert code is not None + + try: + environment: ExecutionEnvironment + if self.use_sandbox: + environment = LLMSandboxEnvironment( + allowed_imports=self.allowed_imports + ) + else: + environment = UnsafeEnvironment(allowed_imports=self.allowed_imports) + + exec_result = environment.execute(code, timeout=self.timeout) + if not exec_result.success: + return ValidationResult( + result=False, + reason=f"Code execution failed during output size check: {exec_result.to_validationresult_reason()}", + ) + + output_size = len(exec_result.stdout or "") + + if output_size <= self.limit_chars: + return ValidationResult( + result=True, + reason=f"Output size ({output_size} chars) within limit ({self.limit_chars}).", + ) + else: + return ValidationResult( + result=False, + reason=f"Output size ({output_size} chars) exceeds limit ({self.limit_chars}).", + ) + except Exception as e: + return ValidationResult( + result=False, reason=f"Error checking output size: {e!s}" + ) + + +class ImportRestrictions(Requirement): + """Only whitelisted modules are imported in the code. + + Uses AST analysis to find all imports (Import and ImportFrom nodes) + and validates them against an optional allowlist. If an empty list is + provided, all imports are blocked. If None is provided, all imports are accepted. + + Args: + allowed_imports: List of module names that are allowed to be imported. + If None, all imports are accepted. If an empty list, all imports are blocked. + """ + + def __init__(self, allowed_imports: list[str] | None = None) -> None: + """Initialize ImportRestrictions requirement.""" + self.allowed_imports: list[str] | None = allowed_imports + if allowed_imports is None: + imports_str = "all" + else: + imports_str = ", ".join(allowed_imports) if allowed_imports else "none" + description = f"Only imports from [{imports_str}] are used." + + super().__init__( + description=description, + validation_fn=self._validate_imports, + check_only=True, + ) + + def _validate_imports(self, ctx: Context) -> ValidationResult: + """Validate that imports in extracted code match allowlist. + + Args: + ctx: Context containing model output with code blocks. + + Returns: + ValidationResult with pass/fail and forbidden imports if any. + """ + extraction_result = _has_python_code_listing(ctx) + if not extraction_result.as_bool(): + return ValidationResult( + result=False, reason="Could not extract code for import validation." + ) + + code = extraction_result.reason + assert code is not None + + if self.allowed_imports is None: + return ValidationResult( + result=True, reason="No import restrictions configured." + ) + + try: + tree = ast.parse(code) + except SyntaxError as e: + return ValidationResult( + result=False, + reason=f"Could not parse code for import analysis: {e.msg}", + ) + + forbidden_imports: list[str] = [] + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + module_name = alias.name.split(".")[0] + if module_name not in self.allowed_imports: + forbidden_imports.append(module_name) + + elif isinstance(node, ast.ImportFrom): + if node.module is None: + # Relative-only imports like "from . import x" + for alias in node.names: + module_name = alias.name.split(".")[0] + if module_name not in self.allowed_imports: + forbidden_imports.append(module_name) + else: + module_name = node.module.split(".")[0] + if module_name not in self.allowed_imports: + forbidden_imports.append(module_name) + + if forbidden_imports: + unique_forbidden = sorted(set(forbidden_imports)) + return ValidationResult( + result=False, + reason=f"Forbidden imports detected: {', '.join(unique_forbidden)}", + ) + + return ValidationResult(result=True, reason="All imports are whitelisted.") + + +def python_tool_requirements( + allowed_imports: list[str] | None = None, + output_limit_chars: int = 10_000, + timeout_seconds: int = 5, + use_sandbox: bool = False, +) -> list[Requirement]: + """Bundle generic Python tool requirements with configurable parameters. + + Factory function that creates a complete set of requirements for validating + Python code generation, from extraction through execution and output checks. + + Args: + allowed_imports: Whitelist of importable top-level modules. If None, all + imports are allowed. Default None. + output_limit_chars: Maximum allowed characters of captured stdout. + Default 10,000. + timeout_seconds: Maximum execution time in seconds. Default 5. + use_sandbox: Use llm-sandbox for Docker-isolated execution. Default False. + + Returns: + list[Requirement]: Requirement instances in validation order: + 1. PythonCodeExtraction + 2. PythonSyntaxValid + 3. PythonExecutesWithoutError (configured with timeout and sandbox settings) + 4. OutputSizeLimit (configured with output_limit_chars) + 5. ImportRestrictions (only included if allowed_imports is provided) + + Raises: + ValueError: If timeout_seconds is not positive. + ValueError: If output_limit_chars is not positive. + + Examples: + >>> # Unrestricted execution with defaults + >>> reqs = python_tool_requirements() + >>> len(reqs) + 4 + + >>> # Restricted to safe modules only + >>> reqs = python_tool_requirements( + ... allowed_imports=["os", "sys", "json"], + ... output_limit_chars=5_000, + ... ) + >>> len(reqs) # includes ImportRestrictions + 5 + + >>> # Sandbox mode for untrusted code + >>> reqs = python_tool_requirements( + ... use_sandbox=True, + ... timeout_seconds=10, + ... ) + """ + if timeout_seconds <= 0: + raise ValueError(f"timeout_seconds must be positive, got {timeout_seconds}") + if output_limit_chars <= 0: + raise ValueError( + f"output_limit_chars must be positive, got {output_limit_chars}" + ) + + reqs: list[Requirement] = [ + PythonCodeExtraction(), + PythonSyntaxValid(), + PythonExecutionReq( + timeout=timeout_seconds, + allowed_imports=allowed_imports, + use_sandbox=use_sandbox, + ), + OutputSizeLimit( + limit_chars=output_limit_chars, + timeout=timeout_seconds, + use_sandbox=use_sandbox, + allowed_imports=allowed_imports, + ), + ] + + if allowed_imports is not None: + reqs.append(ImportRestrictions(allowed_imports=allowed_imports)) + + return reqs diff --git a/test/stdlib/requirements/test_python_tools.py b/test/stdlib/requirements/test_python_tools.py new file mode 100644 index 000000000..2865ad821 --- /dev/null +++ b/test/stdlib/requirements/test_python_tools.py @@ -0,0 +1,441 @@ +"""Tests for Python tool requirements from python_tools module.""" + +import pytest + +from mellea.core import Context, ModelOutputThunk +from mellea.stdlib.context import ChatContext +from mellea.stdlib.requirements.python_tools import ( + ImportRestrictions, + OutputSizeLimit, + PythonCodeExtraction, + PythonSyntaxValid, + python_tool_requirements, +) + + +def from_model(content: str) -> Context: + """Helper to create context from model output.""" + ctx = ChatContext() + ctx = ctx.add(ModelOutputThunk(value=content)) + return ctx + + +# Test fixtures +VALID_PYTHON_CODE = """```python +def hello_world(): + return "Hello, World!" + +print(hello_world()) +```""" + +PYTHON_WITH_SYNTAX_ERROR = """```python +def hello_world( + return "Hello, World!" +```""" + +PYTHON_WITH_IMPORTS = """```python +import os +import sys +from pathlib import Path + +print("Hello from imports!") +```""" + +PYTHON_WITH_FORBIDDEN_IMPORTS = """```python +import subprocess +import socket +import urllib + +print("Dangerous imports!") +```""" + +NO_PYTHON_CODE = """ +This is just text without any Python code blocks. +It contains no executable content. +""" + + +class TestPythonCodeExtraction: + """Tests for PythonCodeExtraction requirement.""" + + def test_extract_valid_code_block(self): + """Test extraction of valid Python code.""" + req = PythonCodeExtraction() + ctx = from_model(VALID_PYTHON_CODE) + result = req.validation_fn(ctx) + + assert result.as_bool() is True + assert "hello_world" in (result.reason or "") + + def test_extract_no_code_blocks(self): + """Test extraction when no code blocks present.""" + req = PythonCodeExtraction() + ctx = from_model(NO_PYTHON_CODE) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + assert result.reason is not None + + def test_extract_multiple_code_blocks(self): + """Test extraction when multiple code blocks present (should return highest scoring).""" + code = """ +Here's a simple one: +```python +print("simple") +``` + +And a more complex one: +```python +def fibonacci(n): + if n <= 1: + return n + return fibonacci(n-1) + fibonacci(n-2) + +for i in range(10): + print(fibonacci(i)) +``` +""" + req = PythonCodeExtraction() + ctx = from_model(code) + result = req.validation_fn(ctx) + + assert result.as_bool() is True + assert "fibonacci" in (result.reason or "") + + +class TestPythonSyntaxValid: + """Tests for PythonSyntaxValid requirement.""" + + def test_valid_syntax(self): + """Test validation of syntactically valid code.""" + req = PythonSyntaxValid() + ctx = from_model(VALID_PYTHON_CODE) + result = req.validation_fn(ctx) + + assert result.as_bool() is True + assert "valid" in (result.reason or "").lower() + + def test_syntax_error(self): + """Test validation of code with syntax errors.""" + req = PythonSyntaxValid() + ctx = from_model(PYTHON_WITH_SYNTAX_ERROR) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + assert "syntax error" in (result.reason or "").lower() + + def test_syntax_error_unclosed_paren(self): + """Test validation of code with unclosed parenthesis.""" + code = """```python +def foo( + pass +```""" + req = PythonSyntaxValid() + ctx = from_model(code) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + + def test_syntax_error_bad_indentation(self): + """Test validation of code with indentation errors.""" + code = """```python +def foo(): +return "bad indent" +```""" + req = PythonSyntaxValid() + ctx = from_model(code) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + + def test_syntax_valid_no_code_extraction(self): + """Test validation when no code can be extracted.""" + req = PythonSyntaxValid() + ctx = from_model(NO_PYTHON_CODE) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + + +class TestOutputSizeLimit: + """Tests for OutputSizeLimit requirement.""" + + def test_init_valid_limit(self): + """Test initialization with valid limit.""" + req = OutputSizeLimit(limit_chars=5000) + assert req.limit_chars == 5000 + + def test_init_invalid_limit_zero(self): + """Test initialization with zero limit raises ValueError.""" + with pytest.raises(ValueError, match="must be positive"): + OutputSizeLimit(limit_chars=0) + + def test_init_invalid_limit_negative(self): + """Test initialization with negative limit raises ValueError.""" + with pytest.raises(ValueError, match="must be positive"): + OutputSizeLimit(limit_chars=-100) + + def test_init_default_limit(self): + """Test initialization with default limit.""" + req = OutputSizeLimit() + assert req.limit_chars == 10_000 + + def test_output_within_limit(self): + """Test validation when output stays within limit.""" + req = OutputSizeLimit(limit_chars=1000) + code = """```python +print("Hello, World!") +```""" + ctx = from_model(code) + result = req.validation_fn(ctx) + # Should pass: "Hello, World!" is much less than 1000 chars + assert result.as_bool() is True + + def test_output_exceeds_limit(self): + """Test validation when output exceeds limit.""" + req = OutputSizeLimit(limit_chars=10) + code = """```python +print("Hello, World! This is a long message.") +```""" + ctx = from_model(code) + result = req.validation_fn(ctx) + # Should fail: output is more than 10 chars + assert result.as_bool() is False + assert "exceeds" in (result.reason or "").lower() + + +class TestImportRestrictions: + """Tests for ImportRestrictions requirement.""" + + def test_init_with_allowlist(self): + """Test initialization with import allowlist.""" + req = ImportRestrictions(allowed_imports=["os", "sys", "json"]) + assert req.allowed_imports == ["os", "sys", "json"] + + def test_init_with_none(self): + """Test initialization with None allowlist.""" + req = ImportRestrictions(allowed_imports=None) + assert req.allowed_imports is None + + def test_init_default(self): + """Test initialization with default (None) allowlist.""" + req = ImportRestrictions() + assert req.allowed_imports is None + + def test_init_with_empty_list(self): + """Test initialization with empty allowlist (blocks all imports).""" + req = ImportRestrictions(allowed_imports=[]) + assert req.allowed_imports == [] + + def test_allowed_imports_pass(self): + """Test validation when imports are in allowlist.""" + req = ImportRestrictions(allowed_imports=["os", "sys", "pathlib"]) + ctx = from_model(PYTHON_WITH_IMPORTS) + result = req.validation_fn(ctx) + + assert result.as_bool() is True + + def test_forbidden_imports_fail(self): + """Test validation when forbidden imports are detected.""" + req = ImportRestrictions(allowed_imports=["os", "sys"]) + ctx = from_model(PYTHON_WITH_FORBIDDEN_IMPORTS) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + assert "forbidden" in (result.reason or "").lower() + assert any( + m in (result.reason or "") for m in ["subprocess", "socket", "urllib"] + ) + + def test_no_imports_pass(self): + """Test validation when code has no imports.""" + req = ImportRestrictions(allowed_imports=["os"]) + code = """```python +def add(a, b): + return a + b + +print(add(2, 3)) +```""" + ctx = from_model(code) + result = req.validation_fn(ctx) + + assert result.as_bool() is True + + def test_no_allowlist_passes_all(self): + """Test validation with no allowlist (None) passes all imports.""" + req = ImportRestrictions(allowed_imports=None) + ctx = from_model(PYTHON_WITH_FORBIDDEN_IMPORTS) + result = req.validation_fn(ctx) + + assert result.as_bool() is True + assert "No import restrictions" in (result.reason or "") + + def test_empty_allowlist_blocks_all(self): + """Test validation with empty allowlist blocks all imports.""" + req = ImportRestrictions(allowed_imports=[]) + ctx = from_model(PYTHON_WITH_IMPORTS) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + assert "forbidden" in (result.reason or "").lower() + + def test_syntax_error_in_imports_check(self): + """Test import validation when code has syntax errors.""" + req = ImportRestrictions(allowed_imports=["os"]) + ctx = from_model(PYTHON_WITH_SYNTAX_ERROR) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + + def test_submodule_imports(self): + """Test validation of submodule imports.""" + req = ImportRestrictions(allowed_imports=["pathlib"]) + code = """```python +from pathlib.posixpath import join +import pathlib.pure + +print("submodules") +```""" + ctx = from_model(code) + result = req.validation_fn(ctx) + + assert result.as_bool() is True + + def test_forbidden_submodule(self): + """Test validation when submodule is forbidden.""" + req = ImportRestrictions(allowed_imports=["os"]) + code = """```python +from urllib.request import urlopen + +print("fetch") +```""" + ctx = from_model(code) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + + def test_relative_import_forbidden(self): + """Test validation catches relative-only imports like 'from . import x'.""" + req = ImportRestrictions(allowed_imports=["os"]) + code = """```python +from . import subprocess as sp + +print("relative import") +```""" + ctx = from_model(code) + result = req.validation_fn(ctx) + + assert result.as_bool() is False + assert "subprocess" in (result.reason or "") + + +class TestPythonToolRequirementsFactory: + """Tests for python_tool_requirements() factory function.""" + + def test_factory_default_returns_four_requirements(self): + """Test factory with defaults returns 4 requirements (no import restrictions).""" + reqs = python_tool_requirements() + assert len(reqs) == 4 + assert isinstance(reqs[0], PythonCodeExtraction) + assert isinstance(reqs[1], PythonSyntaxValid) + assert isinstance(reqs[3], OutputSizeLimit) + + def test_factory_with_allowed_imports_returns_five(self): + """Test factory with allowed_imports returns 5 requirements.""" + reqs = python_tool_requirements(allowed_imports=["os", "sys"]) + assert len(reqs) == 5 + assert isinstance(reqs[4], ImportRestrictions) + + def test_factory_parameter_propagation_output_limit(self): + """Test factory propagates output_limit_chars to OutputSizeLimit.""" + reqs = python_tool_requirements(output_limit_chars=5000) + output_limit_req = reqs[3] + assert isinstance(output_limit_req, OutputSizeLimit) + assert output_limit_req.limit_chars == 5000 + + def test_factory_parameter_propagation_imports(self): + """Test factory propagates allowed_imports to ImportRestrictions.""" + imports = ["os", "sys", "json"] + reqs = python_tool_requirements(allowed_imports=imports) + import_req = reqs[4] + assert isinstance(import_req, ImportRestrictions) + assert import_req.allowed_imports == imports + + def test_factory_timeout_parameter(self): + """Test factory accepts and uses timeout_seconds parameter.""" + reqs = python_tool_requirements(timeout_seconds=10) + assert len(reqs) == 4 + + def test_factory_sandbox_parameter(self): + """Test factory accepts and uses use_sandbox parameter.""" + reqs = python_tool_requirements(use_sandbox=True) + assert len(reqs) == 4 + + def test_factory_all_parameters(self): + """Test factory with all parameters configured.""" + reqs = python_tool_requirements( + allowed_imports=["os", "sys"], + output_limit_chars=8000, + timeout_seconds=15, + use_sandbox=True, + ) + assert len(reqs) == 5 + assert isinstance(reqs[3], OutputSizeLimit) + assert reqs[3].limit_chars == 8000 + assert isinstance(reqs[4], ImportRestrictions) + + def test_factory_invalid_timeout(self): + """Test factory with invalid timeout raises ValueError.""" + with pytest.raises(ValueError, match="timeout_seconds must be positive"): + python_tool_requirements(timeout_seconds=0) + + with pytest.raises(ValueError, match="timeout_seconds must be positive"): + python_tool_requirements(timeout_seconds=-5) + + def test_factory_invalid_output_limit(self): + """Test factory with invalid output_limit raises ValueError.""" + with pytest.raises(ValueError, match="output_limit_chars must be positive"): + python_tool_requirements(output_limit_chars=0) + + with pytest.raises(ValueError, match="output_limit_chars must be positive"): + python_tool_requirements(output_limit_chars=-1000) + + def test_factory_requirement_order(self): + """Test factory returns requirements in correct validation order.""" + from mellea.stdlib.requirements.python_reqs import PythonExecutionReq + + reqs = python_tool_requirements(allowed_imports=["os"]) + + assert isinstance(reqs[0], PythonCodeExtraction) + assert isinstance(reqs[1], PythonSyntaxValid) + assert isinstance(reqs[2], PythonExecutionReq) + assert isinstance(reqs[3], OutputSizeLimit) + assert isinstance(reqs[4], ImportRestrictions) + + def test_factory_timeout_propagation_to_execution_req(self): + """Test factory propagates timeout_seconds to PythonExecutionReq.""" + from mellea.stdlib.requirements.python_reqs import PythonExecutionReq + + reqs = python_tool_requirements(timeout_seconds=15) + execution_req = reqs[2] + assert isinstance(execution_req, PythonExecutionReq) + assert execution_req._timeout == 15 + + def test_factory_sandbox_propagation_to_execution_req(self): + """Test factory propagates use_sandbox to PythonExecutionReq.""" + from mellea.stdlib.requirements.python_reqs import PythonExecutionReq + + reqs = python_tool_requirements(use_sandbox=True) + execution_req = reqs[2] + assert isinstance(execution_req, PythonExecutionReq) + assert execution_req._use_sandbox is True + + def test_factory_allowed_imports_propagation_to_execution_req(self): + """Test factory propagates allowed_imports to PythonExecutionReq.""" + from mellea.stdlib.requirements.python_reqs import PythonExecutionReq + + imports = ["os", "sys", "json"] + reqs = python_tool_requirements(allowed_imports=imports) + execution_req = reqs[2] + assert isinstance(execution_req, PythonExecutionReq) + assert execution_req._allowed_imports == imports