diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bf7a19a..aae9d59 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -126,8 +126,14 @@ jobs: - name: Pin Python version run: uv python pin ${{ matrix.python-version }} - - name: Install package - run: uv sync + - name: Install package with dev dependencies + run: uv sync --extra dev + + - name: Run unit tests + run: uv run pytest tests/ -v -m 'not smoke' + + - name: Run smoke tests + run: uv run pytest tests/smoke/ -v - name: Test CLI can be invoked run: uv run promptfoo --version @@ -192,8 +198,14 @@ jobs: - name: Pin Python version run: uv python pin ${{ matrix.python-version }} - - name: Install package - run: uv sync + - name: Install package with dev dependencies + run: uv sync --extra dev + + - name: Run unit tests + run: uv run pytest tests/ -v -m 'not smoke' + + - name: Run smoke tests (with npx fallback) + run: uv run pytest tests/smoke/ -v - name: Test CLI fallback to npx (no global install) run: uv run promptfoo --version diff --git a/.gitignore b/.gitignore index 1056d39..bdc5ce5 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ htmlcov/ .tox/ .mypy_cache/ .ruff_cache/ +tests/smoke/.temp-output/ # Distribution dist/ diff --git a/AGENTS.md b/AGENTS.md index 84e5a2d..35f7fc0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -135,9 +135,12 @@ Runs on every PR and push to main: - **Lint**: Ruff linting (`uv run ruff check src/`) - **Format Check**: Ruff formatting (`uv run ruff format --check src/`) - **Type Check**: mypy static analysis (`uv run mypy src/promptfoo/`) -- **Tests**: pytest on multiple Python versions (3.9, 3.13) and OSes (Ubuntu, Windows) +- **Unit Tests**: Fast tests with mocked dependencies (`uv run pytest -m 'not smoke'`) +- **Smoke Tests**: Integration tests against real CLI (`uv run pytest tests/smoke/`) - **Build**: Package build validation +Tests run on multiple Python versions (3.9, 3.13) and OSes (Ubuntu, Windows). + ### Release Workflow (`.github/workflows/release-please.yml`) Triggered on push to main: @@ -214,7 +217,38 @@ uv run pytest ### Test Structure -Tests are located in the root directory (not yet created, but should be in `tests/` when added). +Tests are organized in the `tests/` directory: + +``` +tests/ +├── __init__.py +├── test_cli.py # Unit tests for CLI wrapper logic +├── test_environment.py # Unit tests for environment detection +├── test_instructions.py # Unit tests for installation instructions +└── smoke/ + ├── __init__.py + ├── README.md # Smoke test documentation + ├── test_smoke.py # Integration tests against real CLI + └── fixtures/ + └── configs/ # YAML configs for smoke tests + ├── basic.yaml + ├── assertions.yaml + └── failing-assertion.yaml +``` + +### Test Types + +**Unit Tests** (`tests/test_*.py`): +- Fast, isolated tests for individual functions +- Mock external dependencies +- Run on every PR + +**Smoke Tests** (`tests/smoke/`): +- Integration tests that run the actual CLI via subprocess +- Use the `echo` provider (no external API dependencies) +- Test the full Python → Node.js integration +- Slower but verify end-to-end functionality +- Marked with `@pytest.mark.smoke` ### Test Matrix @@ -229,16 +263,36 @@ CI tests across: # Install dependencies with dev extras uv sync --extra dev -# Run all tests +# Run all tests (unit + smoke) uv run pytest +# Run only unit tests (fast) +uv run pytest -m 'not smoke' + +# Run only smoke tests (slow, requires Node.js) +uv run pytest tests/smoke/ + # Run with coverage uv run pytest --cov=src/promptfoo +# Run specific test class +uv run pytest tests/test_cli.py::TestMainFunction + # Run specific test -uv run pytest tests/test_cli.py::test_wrapper_detection +uv run pytest tests/smoke/test_smoke.py::TestEvalCommand::test_basic_eval ``` +### Smoke Test Details + +Smoke tests verify critical CLI functionality: +- **Basic CLI**: `--version`, `--help`, unknown commands, missing files +- **Eval Command**: Output formats (JSON, YAML, CSV), flags (`--repeat`, `--verbose`) +- **Exit Codes**: 0 for success, 100 for assertion failures, 1 for errors +- **Echo Provider**: Variable substitution, multiple variables +- **Assertions**: `contains`, `icontains`, failing assertions + +The smoke tests use a 120-second timeout to accommodate the first `npx` call which downloads promptfoo. + ## Security Practices ### 1. No Credentials in Repository @@ -365,14 +419,23 @@ promptfoo-python/ ├── src/ │ └── promptfoo/ │ ├── __init__.py # Package exports -│ └── cli.py # Main wrapper implementation +│ ├── cli.py # Main wrapper implementation +│ ├── environment.py # Environment detection +│ └── instructions.py # Node.js installation instructions +├── tests/ +│ ├── test_cli.py # Unit tests for CLI +│ ├── test_environment.py # Unit tests for environment detection +│ ├── test_instructions.py # Unit tests for instructions +│ └── smoke/ +│ ├── test_smoke.py # Integration smoke tests +│ └── fixtures/configs/ # Test configuration files ├── AGENTS.md # This file (agent documentation) ├── CHANGELOG.md # Auto-generated by release-please ├── CLAUDE.md # Points to AGENTS.md ├── LICENSE # MIT License ├── README.md # User-facing documentation ├── pyproject.toml # Package configuration -├── release-please-config.json # Release-please configuration +├── release-please-config.json # Release-please configuration └── .release-please-manifest.json # Release version tracking ``` @@ -443,5 +506,5 @@ git push --force --- -**Last Updated**: 2026-01-05 +**Last Updated**: 2026-01-11 **Maintained By**: @promptfoo/engineering diff --git a/pyproject.toml b/pyproject.toml index cf1e18a..10a41be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,3 +102,16 @@ show_error_codes = true pretty = true check_untyped_defs = true disallow_incomplete_defs = true + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = [ + "-v", + "--strict-markers", +] +markers = [ + "smoke: smoke tests that run the full CLI (slow, requires Node.js)", +] diff --git a/tests/smoke/README.md b/tests/smoke/README.md new file mode 100644 index 0000000..43f964e --- /dev/null +++ b/tests/smoke/README.md @@ -0,0 +1,88 @@ +# Smoke Tests + +These smoke tests verify that the core promptfoo CLI functionality works correctly through the Python wrapper. + +## What are Smoke Tests? + +Smoke tests are high-level integration tests that verify the most critical functionality works end-to-end. They: + +- Run against the actual installed CLI via the Python wrapper (using either global promptfoo or npx) +- Test the Python wrapper integration with the Node.js CLI +- Use the `echo` provider to avoid external API dependencies +- Verify command-line arguments, file I/O, and output formats +- Check exit codes and error handling + +## Running Smoke Tests + +```bash +# Run all smoke tests +pytest tests/smoke/ + +# Run with verbose output +pytest tests/smoke/ -v + +# Run a specific test class +pytest tests/smoke/test_smoke.py::TestEvalCommand + +# Run a specific test +pytest tests/smoke/test_smoke.py::TestEvalCommand::test_basic_eval +``` + +## Test Structure + +- `test_smoke.py` - Main smoke test suite +- `fixtures/` - Test configuration files + - `configs/` - YAML configuration files for testing + +## Test Coverage + +### Basic CLI Operations +- Version flag (`--version`) +- Help output (`--help`, `eval --help`) +- Unknown command handling +- Missing file error handling + +### Eval Command +- Basic evaluation with echo provider +- Output formats (JSON, YAML, CSV) +- Command-line flags (`--max-concurrency`, `--repeat`, `--verbose`) +- Cache control (`--no-cache`) + +### Exit Codes +- Exit code 0 for success +- Exit code 100 for assertion failures +- Exit code 1 for configuration errors + +### Echo Provider +- Basic prompt echoing +- Variable substitution +- Multiple variable handling + +### Assertions +- `contains` assertion +- `icontains` assertion (case-insensitive) +- Multiple assertions per test +- Failing assertion behavior + +## Why Echo Provider? + +The `echo` provider is perfect for smoke tests because: + +1. **No external dependencies** - Doesn't require API keys or network calls +2. **Deterministic** - Always returns the same output for the same input +3. **Fast** - No network latency +4. **Predictable** - Easy to write assertions against + +## Adding New Smoke Tests + +1. Create a new test config in `fixtures/configs/` if needed +2. Add test methods to the appropriate test class in `test_smoke.py` +3. Use the `run_promptfoo()` helper to execute CLI commands +4. Make assertions on stdout, stderr, exit codes, and output files + +## Notes + +- Smoke tests run slower than unit tests (they spawn subprocesses) +- They require Node.js and promptfoo to be installed +- They test the integration between Python and Node.js +- They should be kept focused on critical functionality diff --git a/tests/smoke/__init__.py b/tests/smoke/__init__.py new file mode 100644 index 0000000..a2573de --- /dev/null +++ b/tests/smoke/__init__.py @@ -0,0 +1 @@ +"""Smoke tests for promptfoo CLI.""" diff --git a/tests/smoke/fixtures/configs/assertions.yaml b/tests/smoke/fixtures/configs/assertions.yaml new file mode 100644 index 0000000..b03ee62 --- /dev/null +++ b/tests/smoke/fixtures/configs/assertions.yaml @@ -0,0 +1,22 @@ +# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json +description: 'Smoke test - multiple assertions' + +providers: + - echo + +prompts: + - 'Hello {{name}}, welcome to {{place}}' + +tests: + - vars: + name: Alice + place: Wonderland + assert: + - type: contains + value: Hello + - type: contains + value: Alice + - type: contains + value: Wonderland + - type: icontains + value: WELCOME diff --git a/tests/smoke/fixtures/configs/basic.yaml b/tests/smoke/fixtures/configs/basic.yaml new file mode 100644 index 0000000..936bb4a --- /dev/null +++ b/tests/smoke/fixtures/configs/basic.yaml @@ -0,0 +1,17 @@ +# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json +description: 'Smoke test - basic config validation' + +providers: + - echo + +prompts: + - 'Hello {{name}}' + +tests: + - vars: + name: World + assert: + - type: contains + value: Hello + - type: contains + value: World diff --git a/tests/smoke/fixtures/configs/failing-assertion.yaml b/tests/smoke/fixtures/configs/failing-assertion.yaml new file mode 100644 index 0000000..ee8d327 --- /dev/null +++ b/tests/smoke/fixtures/configs/failing-assertion.yaml @@ -0,0 +1,17 @@ +# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json +description: 'Smoke test - config with failing assertion' + +providers: + - echo + +prompts: + - 'Hello {{name}}' + +tests: + - vars: + name: World + assert: + # This assertion will fail because echo returns "Hello World" + # but we're asserting it contains "IMPOSSIBLE_STRING_NOT_IN_OUTPUT" + - type: contains + value: IMPOSSIBLE_STRING_NOT_IN_OUTPUT_12345 diff --git a/tests/smoke/test_smoke.py b/tests/smoke/test_smoke.py new file mode 100644 index 0000000..7f0fe10 --- /dev/null +++ b/tests/smoke/test_smoke.py @@ -0,0 +1,423 @@ +""" +Smoke tests for the promptfoo CLI. + +These tests verify the core evaluation pipeline works correctly +using the echo provider (no external API dependencies). + +These tests run against the installed promptfoo package via the Python wrapper +(using either a globally installed promptfoo CLI or falling back to npx). +""" + +import json +import os +import shutil +import subprocess +from collections.abc import Generator +from pathlib import Path +from typing import Optional + +import pytest + +# Mark all tests in this module as smoke tests +pytestmark = pytest.mark.smoke + +# Directories +SMOKE_DIR = Path(__file__).parent +FIXTURES_DIR = SMOKE_DIR / "fixtures" +CONFIGS_DIR = FIXTURES_DIR / "configs" +OUTPUT_DIR = SMOKE_DIR / ".temp-output" + + +def run_promptfoo( + args: list[str], + cwd: Optional[Path] = None, + expect_error: bool = False, + env: Optional[dict[str, str]] = None, + timeout: int = 120, +) -> tuple[str, str, int]: + """ + Run promptfoo CLI and capture output. + + Args: + args: CLI arguments to pass to promptfoo + cwd: Working directory for the command + expect_error: If True, don't raise on non-zero exit + env: Environment variables to set + timeout: Timeout in seconds (default 120) + + Returns: + Tuple of (stdout, stderr, exit_code) + """ + cmd = ["promptfoo"] + args + + full_env = os.environ.copy() + full_env["NO_COLOR"] = "1" # Disable color output for easier parsing + if env: + full_env.update(env) + + result = subprocess.run( + cmd, + cwd=cwd or Path.cwd(), + capture_output=True, + text=True, + env=full_env, + timeout=timeout, + # Use UTF-8 encoding with error replacement to handle Windows encoding issues + # Windows default cp1252 can't decode some bytes in npx/promptfoo output + encoding="utf-8", + errors="replace", + ) + + stdout = result.stdout or "" + stderr = result.stderr or "" + exit_code = result.returncode + + if not expect_error and exit_code != 0: + # For debugging failed tests + print(f"Command failed: {' '.join(cmd)}") + print(f"Exit code: {exit_code}") + print(f"STDOUT:\n{stdout}") + print(f"STDERR:\n{stderr}") + + return stdout, stderr, exit_code + + +@pytest.fixture(scope="module", autouse=True) +def setup_and_teardown() -> Generator[None, None, None]: + """Create and cleanup output directory for smoke tests.""" + OUTPUT_DIR.mkdir(exist_ok=True) + yield + if OUTPUT_DIR.exists(): + shutil.rmtree(OUTPUT_DIR) + + +@pytest.fixture(scope="module", autouse=True) +def warmup_npx() -> Generator[None, None, None]: + """ + Warm up npx by running promptfoo --version before all tests. + + On npx fallback (when promptfoo isn't globally installed), the first npx call + downloads and caches promptfoo, which can take several minutes on Windows. + Running this warmup prevents the first actual test from timing out. + """ + # Run with a longer timeout (5 minutes) for the initial npx download + try: + subprocess.run( + ["promptfoo", "--version"], + capture_output=True, + timeout=300, # 5 minutes for initial npx download + encoding="utf-8", + errors="replace", + ) + except subprocess.TimeoutExpired: + # If warmup times out, tests will likely fail but let them run anyway + pass + except FileNotFoundError: + # promptfoo not installed, tests will fail but let them try + pass + yield + + +class TestBasicCLI: + """Basic CLI operations smoke tests.""" + + def test_version_flag(self) -> None: + """Test --version flag outputs version.""" + stdout, stderr, exit_code = run_promptfoo(["--version"]) + + assert exit_code == 0 + # Should output a version number (semver format) + assert stdout.strip(), "Version output should not be empty" + + def test_help_flag(self) -> None: + """Test --help flag outputs help.""" + stdout, stderr, exit_code = run_promptfoo(["--help"]) + + assert exit_code == 0 + assert "promptfoo" in stdout.lower() + assert "eval" in stdout.lower() + + def test_eval_help(self) -> None: + """Test 'eval --help' outputs eval command help.""" + stdout, stderr, exit_code = run_promptfoo(["eval", "--help"]) + + assert exit_code == 0 + assert "--config" in stdout or "-c" in stdout + assert "--output" in stdout or "-o" in stdout + + def test_unknown_command(self) -> None: + """Test unknown command returns error.""" + stdout, stderr, exit_code = run_promptfoo( + ["unknowncommand123"], + expect_error=True, + ) + + assert exit_code != 0 + output = stdout + stderr + assert "unknown" in output.lower() or "not found" in output.lower() + + def test_missing_config_file(self) -> None: + """Test missing config file returns error.""" + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", "nonexistent-config-file.yaml"], + expect_error=True, + ) + + assert exit_code != 0 + output = stdout + stderr + # Should indicate the file wasn't found + assert any( + phrase in output.lower() + for phrase in [ + "not found", + "no such file", + "does not exist", + "cannot find", + "no configuration file", + ] + ) + + +class TestEvalCommand: + """Eval command smoke tests.""" + + def test_basic_eval(self) -> None: + """Test basic eval with echo provider.""" + config_path = CONFIGS_DIR / "basic.yaml" + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--no-cache"]) + + assert exit_code == 0, f"Eval failed:\nSTDOUT: {stdout}\nSTDERR: {stderr}" + # Should show evaluation results + assert "pass" in stdout.lower() or "✓" in stdout or "success" in stdout.lower() + + def test_json_output(self) -> None: + """Test eval outputs valid JSON.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "output.json" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0, f"Eval failed:\nSTDOUT: {stdout}\nSTDERR: {stderr}" + assert output_path.exists(), "Output file was not created" + + # Verify it's valid JSON with expected structure + with open(output_path) as f: + data = json.load(f) + + assert "results" in data + assert "results" in data["results"] + assert isinstance(data["results"]["results"], list) + assert len(data["results"]["results"]) > 0 + + # Verify echo provider returns the prompt + first_result = data["results"]["results"][0] + assert "response" in first_result + assert "output" in first_result["response"] + output_text = first_result["response"]["output"] + assert "Hello" in output_text + assert "World" in output_text + + def test_yaml_output(self) -> None: + """Test eval outputs YAML format.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "output.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0 + assert output_path.exists() + + # Verify it contains YAML-like content + with open(output_path) as f: + content = f.read() + + assert "results:" in content + + def test_csv_output(self) -> None: + """Test eval outputs CSV format.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "output.csv" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0 + assert output_path.exists() + + # Verify it's CSV format (has header row with columns) + with open(output_path) as f: + content = f.read() + + lines = content.strip().split("\n") + assert len(lines) > 0 + # CSV should have comma-separated values + assert "," in lines[0] + + def test_max_concurrency_flag(self) -> None: + """Test --max-concurrency flag.""" + config_path = CONFIGS_DIR / "basic.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--max-concurrency", "1", "--no-cache"] + ) + + assert exit_code == 0 + + def test_repeat_flag(self) -> None: + """Test --repeat flag runs tests multiple times.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "repeat-output.json" + + stdout, stderr, exit_code = run_promptfoo( + [ + "eval", + "-c", + str(config_path), + "--repeat", + "2", + "-o", + str(output_path), + "--no-cache", + ] + ) + + assert exit_code == 0 + + # Verify we got repeated results + with open(output_path) as f: + data = json.load(f) + + # With repeat=2 and 1 test case, we should have 2 results + assert len(data["results"]["results"]) == 2 + + def test_verbose_flag(self) -> None: + """Test --verbose flag.""" + config_path = CONFIGS_DIR / "basic.yaml" + + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--verbose", "--no-cache"]) + + assert exit_code == 0 + # Verbose mode should produce output + assert len(stdout) > 0 or len(stderr) > 0 + + +class TestExitCodes: + """Exit code smoke tests.""" + + def test_success_exit_code(self) -> None: + """Test exit code 0 when all assertions pass.""" + config_path = CONFIGS_DIR / "basic.yaml" + + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--no-cache"]) + + assert exit_code == 0 + + def test_failure_exit_code(self) -> None: + """Test exit code 100 when assertions fail.""" + config_path = CONFIGS_DIR / "failing-assertion.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--no-cache"], + expect_error=True, + ) + + # Exit code 100 indicates test failures + assert exit_code == 100, f"Expected exit code 100, got {exit_code}" + + def test_config_error_exit_code(self) -> None: + """Test exit code 1 for config errors.""" + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", "nonexistent-file.yaml", "--no-cache"], + expect_error=True, + ) + + assert exit_code == 1 + + +class TestEchoProvider: + """Echo provider smoke tests.""" + + def test_echo_provider_basic(self) -> None: + """Test echo provider returns the prompt.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "echo-test.json" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0 + + # Verify echo provider returns the prompt + with open(output_path) as f: + data = json.load(f) + + first_result = data["results"]["results"][0] + + # Echo provider should return the prompt in the response + output = first_result["response"]["output"] + assert "Hello" in output + assert "World" in output + + def test_echo_provider_with_multiple_vars(self) -> None: + """Test echo provider with multiple variables.""" + config_path = CONFIGS_DIR / "assertions.yaml" + output_path = OUTPUT_DIR / "echo-multi-var.json" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0 + + with open(output_path) as f: + data = json.load(f) + + first_result = data["results"]["results"][0] + output = first_result["response"]["output"] + + # Should contain all variable values + assert "Alice" in output + assert "Wonderland" in output + + +class TestAssertions: + """Assertion smoke tests.""" + + def test_contains_assertion(self) -> None: + """Test contains assertion.""" + config_path = CONFIGS_DIR / "basic.yaml" + + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--no-cache"]) + + assert exit_code == 0 + # All assertions should pass + assert "pass" in stdout.lower() or "✓" in stdout or "success" in stdout.lower() + + def test_multiple_assertions(self) -> None: + """Test multiple assertions in single test.""" + config_path = CONFIGS_DIR / "assertions.yaml" + + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--no-cache"]) + + assert exit_code == 0 + + def test_failing_assertion(self) -> None: + """Test failing assertion.""" + config_path = CONFIGS_DIR / "failing-assertion.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--no-cache"], + expect_error=True, + ) + + # Should fail with exit code 100 + assert exit_code == 100 + output = stdout + stderr + # Should indicate failure + assert "fail" in output.lower() or "✗" in output or "error" in output.lower() diff --git a/tests/test_cli.py b/tests/test_cli.py index d5611b8..0e4a1c0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -104,6 +104,7 @@ def test_strip_quotes(self, input_path: str, expected: str) -> None: """Quote stripping handles various quote patterns correctly.""" assert _strip_quotes(input_path) == expected + @pytest.mark.skipif(sys.platform == "win32", reason="Unix-style PATH separator test") @pytest.mark.parametrize( "path_value,expected", [ @@ -115,8 +116,24 @@ def test_strip_quotes(self, input_path: str, expected: str) -> None: (":::", []), # Only separators ], ) - def test_split_path(self, path_value: str, expected: list[str]) -> None: - """PATH splitting handles quotes, empty entries, and whitespace.""" + def test_split_path_unix(self, path_value: str, expected: list[str]) -> None: + """PATH splitting handles quotes, empty entries, and whitespace on Unix.""" + assert _split_path(path_value) == expected + + @pytest.mark.skipif(sys.platform != "win32", reason="Windows-style PATH separator test") + @pytest.mark.parametrize( + "path_value,expected", + [ + ("C:\\bin;C:\\tools", ["C:\\bin", "C:\\tools"]), + ('"C:\\bin";C:\\tools', ["C:\\bin", "C:\\tools"]), + ("C:\\bin;;C:\\tools", ["C:\\bin", "C:\\tools"]), # Empty entry removed + (" C:\\bin ; C:\\tools ", ["C:\\bin", "C:\\tools"]), # Whitespace + ("", []), + (";;;", []), # Only separators + ], + ) + def test_split_path_windows(self, path_value: str, expected: list[str]) -> None: + """PATH splitting handles quotes, empty entries, and whitespace on Windows.""" assert _split_path(path_value) == expected @@ -221,8 +238,9 @@ def test_find_external_promptfoo_when_found(self, monkeypatch: pytest.MonkeyPatc result = _find_external_promptfoo() assert result == promptfoo_path - def test_find_external_promptfoo_prevents_recursion(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Filters out wrapper directory from PATH to prevent recursion.""" + @pytest.mark.skipif(sys.platform == "win32", reason="Unix-specific recursion test") + def test_find_external_promptfoo_prevents_recursion_unix(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Filters out wrapper directory from PATH to prevent recursion on Unix.""" wrapper_path = "/home/user/.local/bin/promptfoo" real_promptfoo = "/usr/local/bin/promptfoo" @@ -243,6 +261,30 @@ def mock_which(cmd: str, path: Optional[str] = None) -> Optional[str]: result = _find_external_promptfoo() assert result == real_promptfoo + @pytest.mark.skipif(sys.platform != "win32", reason="Windows-specific recursion test") + def test_find_external_promptfoo_prevents_recursion_windows(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Filters out wrapper directory from PATH to prevent recursion on Windows.""" + wrapper_path = "C:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python312\\Scripts\\promptfoo.exe" + real_promptfoo = "C:\\npm\\prefix\\promptfoo.cmd" + + monkeypatch.setattr(sys, "argv", [wrapper_path]) + test_path = "C:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python312\\Scripts;C:\\npm\\prefix" + monkeypatch.setenv("PATH", test_path) + + def mock_which(cmd: str, path: Optional[str] = None) -> Optional[str]: + if cmd != "promptfoo": + return None + if path is None: + return wrapper_path + # When called with filtered PATH, return the real one + if "Python312\\Scripts" not in path: + return real_promptfoo + return None + + monkeypatch.setattr("shutil.which", mock_which) + result = _find_external_promptfoo() + assert result == real_promptfoo + class TestShellRequirement: """Test Windows shell requirement detection for .bat/.cmd files.""" @@ -344,6 +386,8 @@ def test_main_uses_external_promptfoo_when_available(self, monkeypatch: pytest.M "shutil.which", lambda cmd, path=None: {"node": "/usr/bin/node", "promptfoo": "/usr/local/bin/promptfoo"}.get(cmd), ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) mock_result = subprocess.CompletedProcess([], 0) mock_run = MagicMock(return_value=mock_result) @@ -379,6 +423,8 @@ def test_main_skips_external_when_wrapper_env_set(self, monkeypatch: pytest.Monk "promptfoo": "/usr/local/bin/promptfoo", }.get(cmd), ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) mock_result = subprocess.CompletedProcess([], 0) mock_run = MagicMock(return_value=mock_result) @@ -402,6 +448,8 @@ def test_main_falls_back_to_npx(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr( "shutil.which", lambda cmd, path=None: {"node": "/usr/bin/node", "npx": "/usr/bin/npx"}.get(cmd) ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) mock_result = subprocess.CompletedProcess([], 0) mock_run = MagicMock(return_value=mock_result) @@ -426,8 +474,14 @@ def test_main_exits_when_neither_external_nor_npx_available( self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture ) -> None: """Exits with error when neither external promptfoo nor npx found.""" + # Use platform-appropriate path for node + node_path = "C:\\Program Files\\nodejs\\node.exe" if sys.platform == "win32" else "/usr/bin/node" + monkeypatch.setattr(sys, "argv", ["promptfoo", "eval"]) - monkeypatch.setattr("shutil.which", lambda cmd, path=None: {"node": "/usr/bin/node"}.get(cmd)) + monkeypatch.setattr("shutil.which", lambda cmd, path=None: {"node": node_path}.get(cmd)) + # Also mock os.path.isfile to prevent _find_windows_promptfoo() from finding + # a real promptfoo installation on Windows CI runners + monkeypatch.setattr(os.path, "isfile", lambda p: False) with pytest.raises(SystemExit) as exc_info: main() @@ -442,6 +496,8 @@ def test_main_passes_arguments_correctly(self, monkeypatch: pytest.MonkeyPatch) monkeypatch.setattr( "shutil.which", lambda cmd, path=None: {"node": "/usr/bin/node", "npx": "/usr/bin/npx"}.get(cmd) ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) mock_result = subprocess.CompletedProcess([], 0) mock_run = MagicMock(return_value=mock_result) @@ -464,6 +520,8 @@ def test_main_returns_subprocess_exit_code(self, monkeypatch: pytest.MonkeyPatch monkeypatch.setattr( "shutil.which", lambda cmd, path=None: {"node": "/usr/bin/node", "npx": "/usr/bin/npx"}.get(cmd) ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) # Test non-zero exit code mock_result = subprocess.CompletedProcess([], 42)