From 910f7c428a3f2974934c7196d2e04ec0bee3d525 Mon Sep 17 00:00:00 2001 From: mldangelo Date: Tue, 6 Jan 2026 06:27:54 -0800 Subject: [PATCH 01/11] feat: add smoke tests for CLI integration testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add smoke tests that verify end-to-end CLI functionality - Test basic CLI operations (--version, --help, error handling) - Test eval command with echo provider (no external dependencies) - Test output formats (JSON, YAML, CSV) - Test CLI flags (--repeat, --max-concurrency, --verbose, --no-cache) - Test exit codes (0 for success, 100 for failures, 1 for errors) - Test assertions (contains, icontains, failing assertions) - Add pytest configuration with 'smoke' marker for selective testing - Add comprehensive README documenting smoke test purpose and usage Total: 20 smoke tests, all passing ✅ Smoke tests run against the installed promptfoo CLI via subprocess, testing the Python wrapper integration with the Node.js CLI. Run smoke tests: pytest tests/smoke/ # Run all smoke tests pytest tests/ -m smoke # Run only smoke-marked tests pytest tests/ -m 'not smoke' # Skip smoke tests (unit tests only) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- pyproject.toml | 13 + tests/smoke/README.md | 88 ++++ tests/smoke/__init__.py | 1 + tests/smoke/fixtures/configs/assertions.yaml | 22 + tests/smoke/fixtures/configs/basic.yaml | 17 + .../fixtures/configs/failing-assertion.yaml | 17 + tests/smoke/test_smoke.py | 398 ++++++++++++++++++ 7 files changed, 556 insertions(+) create mode 100644 tests/smoke/README.md create mode 100644 tests/smoke/__init__.py create mode 100644 tests/smoke/fixtures/configs/assertions.yaml create mode 100644 tests/smoke/fixtures/configs/basic.yaml create mode 100644 tests/smoke/fixtures/configs/failing-assertion.yaml create mode 100644 tests/smoke/test_smoke.py diff --git a/pyproject.toml b/pyproject.toml index 3d61faf..a092d39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,3 +98,16 @@ show_error_codes = true pretty = true check_untyped_defs = true disallow_incomplete_defs = true + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = [ + "-v", + "--strict-markers", +] +markers = [ + "smoke: smoke tests that run the full CLI (slow, requires Node.js)", +] diff --git a/tests/smoke/README.md b/tests/smoke/README.md new file mode 100644 index 0000000..73d813b --- /dev/null +++ b/tests/smoke/README.md @@ -0,0 +1,88 @@ +# Smoke Tests + +These smoke tests verify that the core promptfoo CLI functionality works correctly through the Python wrapper. + +## What are Smoke Tests? + +Smoke tests are high-level integration tests that verify the most critical functionality works end-to-end. They: + +- Run against the actual installed CLI (via `npx promptfoo`) +- Test the Python wrapper integration with the Node.js CLI +- Use the `echo` provider to avoid external API dependencies +- Verify command-line arguments, file I/O, and output formats +- Check exit codes and error handling + +## Running Smoke Tests + +```bash +# Run all smoke tests +pytest tests/smoke/ + +# Run with verbose output +pytest tests/smoke/ -v + +# Run a specific test class +pytest tests/smoke/test_smoke.py::TestEvalCommand + +# Run a specific test +pytest tests/smoke/test_smoke.py::TestEvalCommand::test_basic_eval +``` + +## Test Structure + +- `test_smoke.py` - Main smoke test suite +- `fixtures/` - Test configuration files + - `configs/` - YAML configuration files for testing + +## Test Coverage + +### Basic CLI Operations +- Version flag (`--version`) +- Help output (`--help`, `eval --help`) +- Unknown command handling +- Missing file error handling + +### Eval Command +- Basic evaluation with echo provider +- Output formats (JSON, YAML, CSV) +- Command-line flags (`--max-concurrency`, `--repeat`, `--verbose`) +- Cache control (`--no-cache`) + +### Exit Codes +- Exit code 0 for success +- Exit code 100 for assertion failures +- Exit code 1 for configuration errors + +### Echo Provider +- Basic prompt echoing +- Variable substitution +- Multiple variable handling + +### Assertions +- `contains` assertion +- `icontains` assertion (case-insensitive) +- Multiple assertions per test +- Failing assertion behavior + +## Why Echo Provider? + +The `echo` provider is perfect for smoke tests because: + +1. **No external dependencies** - Doesn't require API keys or network calls +2. **Deterministic** - Always returns the same output for the same input +3. **Fast** - No network latency +4. **Predictable** - Easy to write assertions against + +## Adding New Smoke Tests + +1. Create a new test config in `fixtures/configs/` if needed +2. Add test methods to the appropriate test class in `test_smoke.py` +3. Use the `run_promptfoo()` helper to execute CLI commands +4. Make assertions on stdout, stderr, exit codes, and output files + +## Notes + +- Smoke tests run slower than unit tests (they spawn subprocesses) +- They require Node.js and promptfoo to be installed +- They test the integration between Python and Node.js +- They should be kept focused on critical functionality diff --git a/tests/smoke/__init__.py b/tests/smoke/__init__.py new file mode 100644 index 0000000..a2573de --- /dev/null +++ b/tests/smoke/__init__.py @@ -0,0 +1 @@ +"""Smoke tests for promptfoo CLI.""" diff --git a/tests/smoke/fixtures/configs/assertions.yaml b/tests/smoke/fixtures/configs/assertions.yaml new file mode 100644 index 0000000..b03ee62 --- /dev/null +++ b/tests/smoke/fixtures/configs/assertions.yaml @@ -0,0 +1,22 @@ +# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json +description: 'Smoke test - multiple assertions' + +providers: + - echo + +prompts: + - 'Hello {{name}}, welcome to {{place}}' + +tests: + - vars: + name: Alice + place: Wonderland + assert: + - type: contains + value: Hello + - type: contains + value: Alice + - type: contains + value: Wonderland + - type: icontains + value: WELCOME diff --git a/tests/smoke/fixtures/configs/basic.yaml b/tests/smoke/fixtures/configs/basic.yaml new file mode 100644 index 0000000..936bb4a --- /dev/null +++ b/tests/smoke/fixtures/configs/basic.yaml @@ -0,0 +1,17 @@ +# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json +description: 'Smoke test - basic config validation' + +providers: + - echo + +prompts: + - 'Hello {{name}}' + +tests: + - vars: + name: World + assert: + - type: contains + value: Hello + - type: contains + value: World diff --git a/tests/smoke/fixtures/configs/failing-assertion.yaml b/tests/smoke/fixtures/configs/failing-assertion.yaml new file mode 100644 index 0000000..ee8d327 --- /dev/null +++ b/tests/smoke/fixtures/configs/failing-assertion.yaml @@ -0,0 +1,17 @@ +# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json +description: 'Smoke test - config with failing assertion' + +providers: + - echo + +prompts: + - 'Hello {{name}}' + +tests: + - vars: + name: World + assert: + # This assertion will fail because echo returns "Hello World" + # but we're asserting it contains "IMPOSSIBLE_STRING_NOT_IN_OUTPUT" + - type: contains + value: IMPOSSIBLE_STRING_NOT_IN_OUTPUT_12345 diff --git a/tests/smoke/test_smoke.py b/tests/smoke/test_smoke.py new file mode 100644 index 0000000..143db70 --- /dev/null +++ b/tests/smoke/test_smoke.py @@ -0,0 +1,398 @@ +""" +Smoke tests for the promptfoo CLI. + +These tests verify the core evaluation pipeline works correctly +using the echo provider (no external API dependencies). + +These tests run against the installed promptfoo package via npx, +testing the Python wrapper integration. +""" + +import json +import os +import shutil +import subprocess +from pathlib import Path + +import pytest + +# Mark all tests in this module as smoke tests +pytestmark = pytest.mark.smoke + +# Directories +SMOKE_DIR = Path(__file__).parent +FIXTURES_DIR = SMOKE_DIR / "fixtures" +CONFIGS_DIR = FIXTURES_DIR / "configs" +OUTPUT_DIR = SMOKE_DIR / ".temp-output" + + +def run_promptfoo( + args: list[str], + cwd: Path | None = None, + expect_error: bool = False, + env: dict[str, str] | None = None, +) -> tuple[str, str, int]: + """ + Run promptfoo CLI and capture output. + + Args: + args: CLI arguments to pass to promptfoo + cwd: Working directory for the command + expect_error: If True, don't raise on non-zero exit + env: Environment variables to set + + Returns: + Tuple of (stdout, stderr, exit_code) + """ + cmd = ["promptfoo"] + args + + full_env = os.environ.copy() + full_env["NO_COLOR"] = "1" # Disable color output for easier parsing + if env: + full_env.update(env) + + result = subprocess.run( + cmd, + cwd=cwd or Path.cwd(), + capture_output=True, + text=True, + env=full_env, + timeout=60, # Eval can take longer + ) + + stdout = result.stdout + stderr = result.stderr + exit_code = result.returncode + + if not expect_error and exit_code != 0: + # For debugging failed tests + print(f"Command failed: {' '.join(cmd)}") + print(f"Exit code: {exit_code}") + print(f"STDOUT:\n{stdout}") + print(f"STDERR:\n{stderr}") + + return stdout, stderr, exit_code + + +@pytest.fixture(scope="module", autouse=True) +def setup_and_teardown(): + """Create and cleanup output directory for smoke tests.""" + OUTPUT_DIR.mkdir(exist_ok=True) + yield + if OUTPUT_DIR.exists(): + shutil.rmtree(OUTPUT_DIR) + + +class TestBasicCLI: + """Basic CLI operations smoke tests.""" + + def test_version_flag(self): + """Test --version flag outputs version.""" + stdout, stderr, exit_code = run_promptfoo(["--version"]) + + assert exit_code == 0 + # Should output a version number (semver format) + assert stdout.strip(), "Version output should not be empty" + + def test_help_flag(self): + """Test --help flag outputs help.""" + stdout, stderr, exit_code = run_promptfoo(["--help"]) + + assert exit_code == 0 + assert "promptfoo" in stdout.lower() + assert "eval" in stdout.lower() + + def test_eval_help(self): + """Test 'eval --help' outputs eval command help.""" + stdout, stderr, exit_code = run_promptfoo(["eval", "--help"]) + + assert exit_code == 0 + assert "--config" in stdout or "-c" in stdout + assert "--output" in stdout or "-o" in stdout + + def test_unknown_command(self): + """Test unknown command returns error.""" + stdout, stderr, exit_code = run_promptfoo( + ["unknowncommand123"], + expect_error=True, + ) + + assert exit_code != 0 + output = stdout + stderr + assert "unknown" in output.lower() or "not found" in output.lower() + + def test_missing_config_file(self): + """Test missing config file returns error.""" + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", "nonexistent-config-file.yaml"], + expect_error=True, + ) + + assert exit_code != 0 + output = stdout + stderr + # Should indicate the file wasn't found + assert any( + phrase in output.lower() + for phrase in [ + "not found", + "no such file", + "does not exist", + "cannot find", + "no configuration file", + ] + ) + + +class TestEvalCommand: + """Eval command smoke tests.""" + + def test_basic_eval(self): + """Test basic eval with echo provider.""" + config_path = CONFIGS_DIR / "basic.yaml" + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--no-cache"] + ) + + assert exit_code == 0, f"Eval failed:\nSTDOUT: {stdout}\nSTDERR: {stderr}" + # Should show evaluation results + assert "pass" in stdout.lower() or "✓" in stdout or "success" in stdout.lower() + + def test_json_output(self): + """Test eval outputs valid JSON.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "output.json" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0, f"Eval failed:\nSTDOUT: {stdout}\nSTDERR: {stderr}" + assert output_path.exists(), "Output file was not created" + + # Verify it's valid JSON with expected structure + with open(output_path) as f: + data = json.load(f) + + assert "results" in data + assert "results" in data["results"] + assert isinstance(data["results"]["results"], list) + assert len(data["results"]["results"]) > 0 + + # Verify echo provider returns the prompt + first_result = data["results"]["results"][0] + assert "response" in first_result + assert "output" in first_result["response"] + output_text = first_result["response"]["output"] + assert "Hello" in output_text + assert "World" in output_text + + def test_yaml_output(self): + """Test eval outputs YAML format.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "output.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0 + assert output_path.exists() + + # Verify it contains YAML-like content + with open(output_path) as f: + content = f.read() + + assert "results:" in content + + def test_csv_output(self): + """Test eval outputs CSV format.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "output.csv" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0 + assert output_path.exists() + + # Verify it's CSV format (has header row with columns) + with open(output_path) as f: + content = f.read() + + lines = content.strip().split("\n") + assert len(lines) > 0 + # CSV should have comma-separated values + assert "," in lines[0] + + def test_max_concurrency_flag(self): + """Test --max-concurrency flag.""" + config_path = CONFIGS_DIR / "basic.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--max-concurrency", "1", "--no-cache"] + ) + + assert exit_code == 0 + + def test_repeat_flag(self): + """Test --repeat flag runs tests multiple times.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "repeat-output.json" + + stdout, stderr, exit_code = run_promptfoo( + [ + "eval", + "-c", + str(config_path), + "--repeat", + "2", + "-o", + str(output_path), + "--no-cache", + ] + ) + + assert exit_code == 0 + + # Verify we got repeated results + with open(output_path) as f: + data = json.load(f) + + # With repeat=2 and 1 test case, we should have 2 results + assert len(data["results"]["results"]) == 2 + + def test_verbose_flag(self): + """Test --verbose flag.""" + config_path = CONFIGS_DIR / "basic.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--verbose", "--no-cache"] + ) + + assert exit_code == 0 + # Verbose mode should produce output + assert len(stdout) > 0 or len(stderr) > 0 + + +class TestExitCodes: + """Exit code smoke tests.""" + + def test_success_exit_code(self): + """Test exit code 0 when all assertions pass.""" + config_path = CONFIGS_DIR / "basic.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--no-cache"] + ) + + assert exit_code == 0 + + def test_failure_exit_code(self): + """Test exit code 100 when assertions fail.""" + config_path = CONFIGS_DIR / "failing-assertion.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--no-cache"], + expect_error=True, + ) + + # Exit code 100 indicates test failures + assert exit_code == 100, f"Expected exit code 100, got {exit_code}" + + def test_config_error_exit_code(self): + """Test exit code 1 for config errors.""" + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", "nonexistent-file.yaml", "--no-cache"], + expect_error=True, + ) + + assert exit_code == 1 + + +class TestEchoProvider: + """Echo provider smoke tests.""" + + def test_echo_provider_basic(self): + """Test echo provider returns the prompt.""" + config_path = CONFIGS_DIR / "basic.yaml" + output_path = OUTPUT_DIR / "echo-test.json" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0 + + # Verify echo provider returns the prompt + with open(output_path) as f: + data = json.load(f) + + first_result = data["results"]["results"][0] + + # Echo provider should return the prompt in the response + output = first_result["response"]["output"] + assert "Hello" in output + assert "World" in output + + def test_echo_provider_with_multiple_vars(self): + """Test echo provider with multiple variables.""" + config_path = CONFIGS_DIR / "assertions.yaml" + output_path = OUTPUT_DIR / "echo-multi-var.json" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "-o", str(output_path), "--no-cache"] + ) + + assert exit_code == 0 + + with open(output_path) as f: + data = json.load(f) + + first_result = data["results"]["results"][0] + output = first_result["response"]["output"] + + # Should contain all variable values + assert "Alice" in output + assert "Wonderland" in output + + +class TestAssertions: + """Assertion smoke tests.""" + + def test_contains_assertion(self): + """Test contains assertion.""" + config_path = CONFIGS_DIR / "basic.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--no-cache"] + ) + + assert exit_code == 0 + # All assertions should pass + assert "pass" in stdout.lower() or "✓" in stdout or "success" in stdout.lower() + + def test_multiple_assertions(self): + """Test multiple assertions in single test.""" + config_path = CONFIGS_DIR / "assertions.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--no-cache"] + ) + + assert exit_code == 0 + + def test_failing_assertion(self): + """Test failing assertion.""" + config_path = CONFIGS_DIR / "failing-assertion.yaml" + + stdout, stderr, exit_code = run_promptfoo( + ["eval", "-c", str(config_path), "--no-cache"], + expect_error=True, + ) + + # Should fail with exit code 100 + assert exit_code == 100 + output = stdout + stderr + # Should indicate failure + assert "fail" in output.lower() or "✗" in output or "error" in output.lower() From 60dff7d68466ba517d69d7c8ac3adefc9eedb782 Mon Sep 17 00:00:00 2001 From: mldangelo Date: Tue, 6 Jan 2026 06:32:45 -0800 Subject: [PATCH 02/11] ci: run unit tests and smoke tests in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the CI was only testing CLI invocation but not running pytest. Changes: - Install dev dependencies (pytest, mypy, ruff) in test jobs - Run unit tests with: pytest tests/ -v -m 'not smoke' - Run smoke tests with: pytest tests/smoke/ -v - Both 'test' and 'test-npx-fallback' jobs now run full test suite This ensures: ✅ Unit tests run on all platforms (ubuntu, windows) and Python versions (3.9, 3.13) ✅ Smoke tests verify end-to-end CLI functionality ✅ Both global install and npx fallback paths are tested --- .github/workflows/test.yml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bf7a19a..aae9d59 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -126,8 +126,14 @@ jobs: - name: Pin Python version run: uv python pin ${{ matrix.python-version }} - - name: Install package - run: uv sync + - name: Install package with dev dependencies + run: uv sync --extra dev + + - name: Run unit tests + run: uv run pytest tests/ -v -m 'not smoke' + + - name: Run smoke tests + run: uv run pytest tests/smoke/ -v - name: Test CLI can be invoked run: uv run promptfoo --version @@ -192,8 +198,14 @@ jobs: - name: Pin Python version run: uv python pin ${{ matrix.python-version }} - - name: Install package - run: uv sync + - name: Install package with dev dependencies + run: uv sync --extra dev + + - name: Run unit tests + run: uv run pytest tests/ -v -m 'not smoke' + + - name: Run smoke tests (with npx fallback) + run: uv run pytest tests/smoke/ -v - name: Test CLI fallback to npx (no global install) run: uv run promptfoo --version From 6193feb39f7ffa0a223ceb67d7653fbb22adbadd Mon Sep 17 00:00:00 2001 From: mldangelo Date: Tue, 6 Jan 2026 06:37:43 -0800 Subject: [PATCH 03/11] fix: use Optional for Python 3.9 compatibility in smoke tests --- tests/smoke/test_smoke.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/smoke/test_smoke.py b/tests/smoke/test_smoke.py index 143db70..73377c1 100644 --- a/tests/smoke/test_smoke.py +++ b/tests/smoke/test_smoke.py @@ -13,6 +13,7 @@ import shutil import subprocess from pathlib import Path +from typing import Optional import pytest @@ -28,9 +29,9 @@ def run_promptfoo( args: list[str], - cwd: Path | None = None, + cwd: Optional[Path] = None, expect_error: bool = False, - env: dict[str, str] | None = None, + env: Optional[dict[str, str]] = None, ) -> tuple[str, str, int]: """ Run promptfoo CLI and capture output. From 3f4e9fd433d3c4b19cb8ed1ed6a380ce8e442527 Mon Sep 17 00:00:00 2001 From: mldangelo Date: Tue, 6 Jan 2026 06:58:41 -0800 Subject: [PATCH 04/11] fix: make platform-specific tests work on both Unix and Windows - Split test_split_path into platform-specific versions (Unix/Windows) - Split test_find_external_promptfoo_prevents_recursion for platform paths - Use platform-appropriate node path in test_main_exits_when_neither_external_nor_npx_available - Tests now skip appropriately on incompatible platforms --- tests/test_cli.py | 54 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 1acf1c7..e32f2e9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -104,6 +104,7 @@ def test_strip_quotes(self, input_path: str, expected: str) -> None: """Quote stripping handles various quote patterns correctly.""" assert _strip_quotes(input_path) == expected + @pytest.mark.skipif(sys.platform == "win32", reason="Unix-style PATH separator test") @pytest.mark.parametrize( "path_value,expected", [ @@ -115,8 +116,24 @@ def test_strip_quotes(self, input_path: str, expected: str) -> None: (":::", []), # Only separators ], ) - def test_split_path(self, path_value: str, expected: list[str]) -> None: - """PATH splitting handles quotes, empty entries, and whitespace.""" + def test_split_path_unix(self, path_value: str, expected: list[str]) -> None: + """PATH splitting handles quotes, empty entries, and whitespace on Unix.""" + assert _split_path(path_value) == expected + + @pytest.mark.skipif(sys.platform != "win32", reason="Windows-style PATH separator test") + @pytest.mark.parametrize( + "path_value,expected", + [ + ("C:\\bin;C:\\tools", ["C:\\bin", "C:\\tools"]), + ('"C:\\bin";C:\\tools', ["C:\\bin", "C:\\tools"]), + ("C:\\bin;;C:\\tools", ["C:\\bin", "C:\\tools"]), # Empty entry removed + (" C:\\bin ; C:\\tools ", ["C:\\bin", "C:\\tools"]), # Whitespace + ("", []), + (";;;", []), # Only separators + ], + ) + def test_split_path_windows(self, path_value: str, expected: list[str]) -> None: + """PATH splitting handles quotes, empty entries, and whitespace on Windows.""" assert _split_path(path_value) == expected @@ -224,8 +241,9 @@ def test_find_external_promptfoo_when_found(self, monkeypatch: pytest.MonkeyPatc result = _find_external_promptfoo() assert result == promptfoo_path - def test_find_external_promptfoo_prevents_recursion(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Filters out wrapper directory from PATH to prevent recursion.""" + @pytest.mark.skipif(sys.platform == "win32", reason="Unix-specific recursion test") + def test_find_external_promptfoo_prevents_recursion_unix(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Filters out wrapper directory from PATH to prevent recursion on Unix.""" wrapper_path = "/home/user/.local/bin/promptfoo" real_promptfoo = "/usr/local/bin/promptfoo" @@ -246,6 +264,29 @@ def mock_which(cmd: str, path: Optional[str] = None) -> Optional[str]: result = _find_external_promptfoo() assert result == real_promptfoo + @pytest.mark.skipif(sys.platform != "win32", reason="Windows-specific recursion test") + def test_find_external_promptfoo_prevents_recursion_windows(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Filters out wrapper directory from PATH to prevent recursion on Windows.""" + wrapper_path = "C:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python312\\Scripts\\promptfoo.exe" + real_promptfoo = "C:\\npm\\prefix\\promptfoo.cmd" + + monkeypatch.setattr(sys, "argv", [wrapper_path]) + monkeypatch.setenv("PATH", "C:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python312\\Scripts;C:\\npm\\prefix") + + def mock_which(cmd: str, path: Optional[str] = None) -> Optional[str]: + if cmd != "promptfoo": + return None + if path is None: + return wrapper_path + # When called with filtered PATH, return the real one + if "Python312\\Scripts" not in path: + return real_promptfoo + return None + + monkeypatch.setattr("shutil.which", mock_which) + result = _find_external_promptfoo() + assert result == real_promptfoo + class TestShellRequirement: """Test Windows shell requirement detection for .bat/.cmd files.""" @@ -427,9 +468,12 @@ def test_main_exits_when_neither_external_nor_npx_available( self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture ) -> None: """Exits with error when neither external promptfoo nor npx found.""" + # Use platform-appropriate path for node + node_path = "C:\\Program Files\\nodejs\\node.exe" if sys.platform == "win32" else "/usr/bin/node" + monkeypatch.setattr(sys, "argv", ["promptfoo", "eval"]) monkeypatch.setattr("shutil.which", lambda cmd, path=None: { - "node": "/usr/bin/node" + "node": node_path }.get(cmd)) with pytest.raises(SystemExit) as exc_info: From 9cd4d1177add344d3a78d2b83dde8a70056a02c7 Mon Sep 17 00:00:00 2001 From: mldangelo Date: Tue, 6 Jan 2026 07:12:11 -0800 Subject: [PATCH 05/11] fix: increase smoke test timeout for npx fallback scenarios The first npx call can be slow as it downloads promptfoo. Increased timeout from 60s to 120s to accommodate this. --- tests/smoke/test_smoke.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/smoke/test_smoke.py b/tests/smoke/test_smoke.py index 73377c1..ec862f0 100644 --- a/tests/smoke/test_smoke.py +++ b/tests/smoke/test_smoke.py @@ -58,7 +58,7 @@ def run_promptfoo( capture_output=True, text=True, env=full_env, - timeout=60, # Eval can take longer + timeout=120, # Increased timeout for npx fallback (first npx call downloads promptfoo) ) stdout = result.stdout From 13c1f1d9cfe52d76681ff66bc44a39dd42019256 Mon Sep 17 00:00:00 2001 From: mldangelo Date: Tue, 6 Jan 2026 07:27:45 -0800 Subject: [PATCH 06/11] fix: handle None stdout/stderr in smoke tests Add safety checks for None values from subprocess.run() output, which can occur on Windows in certain error conditions. --- tests/smoke/test_smoke.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/smoke/test_smoke.py b/tests/smoke/test_smoke.py index ec862f0..f7b889b 100644 --- a/tests/smoke/test_smoke.py +++ b/tests/smoke/test_smoke.py @@ -61,8 +61,8 @@ def run_promptfoo( timeout=120, # Increased timeout for npx fallback (first npx call downloads promptfoo) ) - stdout = result.stdout - stderr = result.stderr + stdout = result.stdout or "" + stderr = result.stderr or "" exit_code = result.returncode if not expect_error and exit_code != 0: From 874955b522629dc37293e002fb6b0e87afa08ca3 Mon Sep 17 00:00:00 2001 From: mldangelo Date: Sun, 11 Jan 2026 01:54:32 -0500 Subject: [PATCH 07/11] fix: address linting issues and add temp output to gitignore - Fix line too long (123 > 120) in test_cli.py - Run ruff format on test files - Add tests/smoke/.temp-output/ to .gitignore Co-Authored-By: Claude Opus 4.5 --- .gitignore | 1 + tests/smoke/test_smoke.py | 20 +++++--------------- tests/test_cli.py | 7 +++---- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 1056d39..bdc5ce5 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ htmlcov/ .tox/ .mypy_cache/ .ruff_cache/ +tests/smoke/.temp-output/ # Distribution dist/ diff --git a/tests/smoke/test_smoke.py b/tests/smoke/test_smoke.py index f7b889b..04f2d8d 100644 --- a/tests/smoke/test_smoke.py +++ b/tests/smoke/test_smoke.py @@ -150,9 +150,7 @@ class TestEvalCommand: def test_basic_eval(self): """Test basic eval with echo provider.""" config_path = CONFIGS_DIR / "basic.yaml" - stdout, stderr, exit_code = run_promptfoo( - ["eval", "-c", str(config_path), "--no-cache"] - ) + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--no-cache"]) assert exit_code == 0, f"Eval failed:\nSTDOUT: {stdout}\nSTDERR: {stderr}" # Should show evaluation results @@ -267,9 +265,7 @@ def test_verbose_flag(self): """Test --verbose flag.""" config_path = CONFIGS_DIR / "basic.yaml" - stdout, stderr, exit_code = run_promptfoo( - ["eval", "-c", str(config_path), "--verbose", "--no-cache"] - ) + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--verbose", "--no-cache"]) assert exit_code == 0 # Verbose mode should produce output @@ -283,9 +279,7 @@ def test_success_exit_code(self): """Test exit code 0 when all assertions pass.""" config_path = CONFIGS_DIR / "basic.yaml" - stdout, stderr, exit_code = run_promptfoo( - ["eval", "-c", str(config_path), "--no-cache"] - ) + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--no-cache"]) assert exit_code == 0 @@ -365,9 +359,7 @@ def test_contains_assertion(self): """Test contains assertion.""" config_path = CONFIGS_DIR / "basic.yaml" - stdout, stderr, exit_code = run_promptfoo( - ["eval", "-c", str(config_path), "--no-cache"] - ) + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--no-cache"]) assert exit_code == 0 # All assertions should pass @@ -377,9 +369,7 @@ def test_multiple_assertions(self): """Test multiple assertions in single test.""" config_path = CONFIGS_DIR / "assertions.yaml" - stdout, stderr, exit_code = run_promptfoo( - ["eval", "-c", str(config_path), "--no-cache"] - ) + stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--no-cache"]) assert exit_code == 0 diff --git a/tests/test_cli.py b/tests/test_cli.py index 28218ad..82ee073 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -268,7 +268,8 @@ def test_find_external_promptfoo_prevents_recursion_windows(self, monkeypatch: p real_promptfoo = "C:\\npm\\prefix\\promptfoo.cmd" monkeypatch.setattr(sys, "argv", [wrapper_path]) - monkeypatch.setenv("PATH", "C:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python312\\Scripts;C:\\npm\\prefix") + test_path = "C:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python312\\Scripts;C:\\npm\\prefix" + monkeypatch.setenv("PATH", test_path) def mock_which(cmd: str, path: Optional[str] = None) -> Optional[str]: if cmd != "promptfoo": @@ -471,9 +472,7 @@ def test_main_exits_when_neither_external_nor_npx_available( node_path = "C:\\Program Files\\nodejs\\node.exe" if sys.platform == "win32" else "/usr/bin/node" monkeypatch.setattr(sys, "argv", ["promptfoo", "eval"]) - monkeypatch.setattr("shutil.which", lambda cmd, path=None: { - "node": node_path - }.get(cmd)) + monkeypatch.setattr("shutil.which", lambda cmd, path=None: {"node": node_path}.get(cmd)) with pytest.raises(SystemExit) as exc_info: main() From 055b21160b30c66d8ab8b81088130b00ee31d2e3 Mon Sep 17 00:00:00 2001 From: mldangelo Date: Sun, 11 Jan 2026 01:57:18 -0500 Subject: [PATCH 08/11] docs: update AGENTS.md with smoke test documentation - Add comprehensive testing strategy section with unit vs smoke tests - Document test directory structure - Add smoke test details and commands - Update CI/CD section to mention both test types - Update project structure to include tests directory Co-Authored-By: Claude Opus 4.5 --- AGENTS.md | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 84e5a2d..35f7fc0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -135,9 +135,12 @@ Runs on every PR and push to main: - **Lint**: Ruff linting (`uv run ruff check src/`) - **Format Check**: Ruff formatting (`uv run ruff format --check src/`) - **Type Check**: mypy static analysis (`uv run mypy src/promptfoo/`) -- **Tests**: pytest on multiple Python versions (3.9, 3.13) and OSes (Ubuntu, Windows) +- **Unit Tests**: Fast tests with mocked dependencies (`uv run pytest -m 'not smoke'`) +- **Smoke Tests**: Integration tests against real CLI (`uv run pytest tests/smoke/`) - **Build**: Package build validation +Tests run on multiple Python versions (3.9, 3.13) and OSes (Ubuntu, Windows). + ### Release Workflow (`.github/workflows/release-please.yml`) Triggered on push to main: @@ -214,7 +217,38 @@ uv run pytest ### Test Structure -Tests are located in the root directory (not yet created, but should be in `tests/` when added). +Tests are organized in the `tests/` directory: + +``` +tests/ +├── __init__.py +├── test_cli.py # Unit tests for CLI wrapper logic +├── test_environment.py # Unit tests for environment detection +├── test_instructions.py # Unit tests for installation instructions +└── smoke/ + ├── __init__.py + ├── README.md # Smoke test documentation + ├── test_smoke.py # Integration tests against real CLI + └── fixtures/ + └── configs/ # YAML configs for smoke tests + ├── basic.yaml + ├── assertions.yaml + └── failing-assertion.yaml +``` + +### Test Types + +**Unit Tests** (`tests/test_*.py`): +- Fast, isolated tests for individual functions +- Mock external dependencies +- Run on every PR + +**Smoke Tests** (`tests/smoke/`): +- Integration tests that run the actual CLI via subprocess +- Use the `echo` provider (no external API dependencies) +- Test the full Python → Node.js integration +- Slower but verify end-to-end functionality +- Marked with `@pytest.mark.smoke` ### Test Matrix @@ -229,16 +263,36 @@ CI tests across: # Install dependencies with dev extras uv sync --extra dev -# Run all tests +# Run all tests (unit + smoke) uv run pytest +# Run only unit tests (fast) +uv run pytest -m 'not smoke' + +# Run only smoke tests (slow, requires Node.js) +uv run pytest tests/smoke/ + # Run with coverage uv run pytest --cov=src/promptfoo +# Run specific test class +uv run pytest tests/test_cli.py::TestMainFunction + # Run specific test -uv run pytest tests/test_cli.py::test_wrapper_detection +uv run pytest tests/smoke/test_smoke.py::TestEvalCommand::test_basic_eval ``` +### Smoke Test Details + +Smoke tests verify critical CLI functionality: +- **Basic CLI**: `--version`, `--help`, unknown commands, missing files +- **Eval Command**: Output formats (JSON, YAML, CSV), flags (`--repeat`, `--verbose`) +- **Exit Codes**: 0 for success, 100 for assertion failures, 1 for errors +- **Echo Provider**: Variable substitution, multiple variables +- **Assertions**: `contains`, `icontains`, failing assertions + +The smoke tests use a 120-second timeout to accommodate the first `npx` call which downloads promptfoo. + ## Security Practices ### 1. No Credentials in Repository @@ -365,14 +419,23 @@ promptfoo-python/ ├── src/ │ └── promptfoo/ │ ├── __init__.py # Package exports -│ └── cli.py # Main wrapper implementation +│ ├── cli.py # Main wrapper implementation +│ ├── environment.py # Environment detection +│ └── instructions.py # Node.js installation instructions +├── tests/ +│ ├── test_cli.py # Unit tests for CLI +│ ├── test_environment.py # Unit tests for environment detection +│ ├── test_instructions.py # Unit tests for instructions +│ └── smoke/ +│ ├── test_smoke.py # Integration smoke tests +│ └── fixtures/configs/ # Test configuration files ├── AGENTS.md # This file (agent documentation) ├── CHANGELOG.md # Auto-generated by release-please ├── CLAUDE.md # Points to AGENTS.md ├── LICENSE # MIT License ├── README.md # User-facing documentation ├── pyproject.toml # Package configuration -├── release-please-config.json # Release-please configuration +├── release-please-config.json # Release-please configuration └── .release-please-manifest.json # Release version tracking ``` @@ -443,5 +506,5 @@ git push --force --- -**Last Updated**: 2026-01-05 +**Last Updated**: 2026-01-11 **Maintained By**: @promptfoo/engineering From 44cdf96b90ab0483354cb1d54eb2ce362fddcdfd Mon Sep 17 00:00:00 2001 From: mldangelo Date: Sun, 11 Jan 2026 02:06:18 -0500 Subject: [PATCH 09/11] style: add return type annotations and fix documentation wording - Add `-> None` return type annotations to all smoke test methods - Add Generator return type to setup_and_teardown fixture - Update documentation to clarify tests run via Python wrapper (not just npx) Co-Authored-By: Claude Opus 4.5 --- tests/smoke/README.md | 2 +- tests/smoke/test_smoke.py | 47 ++++++++++++++++++++------------------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/tests/smoke/README.md b/tests/smoke/README.md index 73d813b..43f964e 100644 --- a/tests/smoke/README.md +++ b/tests/smoke/README.md @@ -6,7 +6,7 @@ These smoke tests verify that the core promptfoo CLI functionality works correct Smoke tests are high-level integration tests that verify the most critical functionality works end-to-end. They: -- Run against the actual installed CLI (via `npx promptfoo`) +- Run against the actual installed CLI via the Python wrapper (using either global promptfoo or npx) - Test the Python wrapper integration with the Node.js CLI - Use the `echo` provider to avoid external API dependencies - Verify command-line arguments, file I/O, and output formats diff --git a/tests/smoke/test_smoke.py b/tests/smoke/test_smoke.py index 04f2d8d..aa6bd4c 100644 --- a/tests/smoke/test_smoke.py +++ b/tests/smoke/test_smoke.py @@ -4,14 +4,15 @@ These tests verify the core evaluation pipeline works correctly using the echo provider (no external API dependencies). -These tests run against the installed promptfoo package via npx, -testing the Python wrapper integration. +These tests run against the installed promptfoo package via the Python wrapper +(using either a globally installed promptfoo CLI or falling back to npx). """ import json import os import shutil import subprocess +from collections.abc import Generator from pathlib import Path from typing import Optional @@ -76,7 +77,7 @@ def run_promptfoo( @pytest.fixture(scope="module", autouse=True) -def setup_and_teardown(): +def setup_and_teardown() -> Generator[None, None, None]: """Create and cleanup output directory for smoke tests.""" OUTPUT_DIR.mkdir(exist_ok=True) yield @@ -87,7 +88,7 @@ def setup_and_teardown(): class TestBasicCLI: """Basic CLI operations smoke tests.""" - def test_version_flag(self): + def test_version_flag(self) -> None: """Test --version flag outputs version.""" stdout, stderr, exit_code = run_promptfoo(["--version"]) @@ -95,7 +96,7 @@ def test_version_flag(self): # Should output a version number (semver format) assert stdout.strip(), "Version output should not be empty" - def test_help_flag(self): + def test_help_flag(self) -> None: """Test --help flag outputs help.""" stdout, stderr, exit_code = run_promptfoo(["--help"]) @@ -103,7 +104,7 @@ def test_help_flag(self): assert "promptfoo" in stdout.lower() assert "eval" in stdout.lower() - def test_eval_help(self): + def test_eval_help(self) -> None: """Test 'eval --help' outputs eval command help.""" stdout, stderr, exit_code = run_promptfoo(["eval", "--help"]) @@ -111,7 +112,7 @@ def test_eval_help(self): assert "--config" in stdout or "-c" in stdout assert "--output" in stdout or "-o" in stdout - def test_unknown_command(self): + def test_unknown_command(self) -> None: """Test unknown command returns error.""" stdout, stderr, exit_code = run_promptfoo( ["unknowncommand123"], @@ -122,7 +123,7 @@ def test_unknown_command(self): output = stdout + stderr assert "unknown" in output.lower() or "not found" in output.lower() - def test_missing_config_file(self): + def test_missing_config_file(self) -> None: """Test missing config file returns error.""" stdout, stderr, exit_code = run_promptfoo( ["eval", "-c", "nonexistent-config-file.yaml"], @@ -147,7 +148,7 @@ def test_missing_config_file(self): class TestEvalCommand: """Eval command smoke tests.""" - def test_basic_eval(self): + def test_basic_eval(self) -> None: """Test basic eval with echo provider.""" config_path = CONFIGS_DIR / "basic.yaml" stdout, stderr, exit_code = run_promptfoo(["eval", "-c", str(config_path), "--no-cache"]) @@ -156,7 +157,7 @@ def test_basic_eval(self): # Should show evaluation results assert "pass" in stdout.lower() or "✓" in stdout or "success" in stdout.lower() - def test_json_output(self): + def test_json_output(self) -> None: """Test eval outputs valid JSON.""" config_path = CONFIGS_DIR / "basic.yaml" output_path = OUTPUT_DIR / "output.json" @@ -185,7 +186,7 @@ def test_json_output(self): assert "Hello" in output_text assert "World" in output_text - def test_yaml_output(self): + def test_yaml_output(self) -> None: """Test eval outputs YAML format.""" config_path = CONFIGS_DIR / "basic.yaml" output_path = OUTPUT_DIR / "output.yaml" @@ -203,7 +204,7 @@ def test_yaml_output(self): assert "results:" in content - def test_csv_output(self): + def test_csv_output(self) -> None: """Test eval outputs CSV format.""" config_path = CONFIGS_DIR / "basic.yaml" output_path = OUTPUT_DIR / "output.csv" @@ -224,7 +225,7 @@ def test_csv_output(self): # CSV should have comma-separated values assert "," in lines[0] - def test_max_concurrency_flag(self): + def test_max_concurrency_flag(self) -> None: """Test --max-concurrency flag.""" config_path = CONFIGS_DIR / "basic.yaml" @@ -234,7 +235,7 @@ def test_max_concurrency_flag(self): assert exit_code == 0 - def test_repeat_flag(self): + def test_repeat_flag(self) -> None: """Test --repeat flag runs tests multiple times.""" config_path = CONFIGS_DIR / "basic.yaml" output_path = OUTPUT_DIR / "repeat-output.json" @@ -261,7 +262,7 @@ def test_repeat_flag(self): # With repeat=2 and 1 test case, we should have 2 results assert len(data["results"]["results"]) == 2 - def test_verbose_flag(self): + def test_verbose_flag(self) -> None: """Test --verbose flag.""" config_path = CONFIGS_DIR / "basic.yaml" @@ -275,7 +276,7 @@ def test_verbose_flag(self): class TestExitCodes: """Exit code smoke tests.""" - def test_success_exit_code(self): + def test_success_exit_code(self) -> None: """Test exit code 0 when all assertions pass.""" config_path = CONFIGS_DIR / "basic.yaml" @@ -283,7 +284,7 @@ def test_success_exit_code(self): assert exit_code == 0 - def test_failure_exit_code(self): + def test_failure_exit_code(self) -> None: """Test exit code 100 when assertions fail.""" config_path = CONFIGS_DIR / "failing-assertion.yaml" @@ -295,7 +296,7 @@ def test_failure_exit_code(self): # Exit code 100 indicates test failures assert exit_code == 100, f"Expected exit code 100, got {exit_code}" - def test_config_error_exit_code(self): + def test_config_error_exit_code(self) -> None: """Test exit code 1 for config errors.""" stdout, stderr, exit_code = run_promptfoo( ["eval", "-c", "nonexistent-file.yaml", "--no-cache"], @@ -308,7 +309,7 @@ def test_config_error_exit_code(self): class TestEchoProvider: """Echo provider smoke tests.""" - def test_echo_provider_basic(self): + def test_echo_provider_basic(self) -> None: """Test echo provider returns the prompt.""" config_path = CONFIGS_DIR / "basic.yaml" output_path = OUTPUT_DIR / "echo-test.json" @@ -330,7 +331,7 @@ def test_echo_provider_basic(self): assert "Hello" in output assert "World" in output - def test_echo_provider_with_multiple_vars(self): + def test_echo_provider_with_multiple_vars(self) -> None: """Test echo provider with multiple variables.""" config_path = CONFIGS_DIR / "assertions.yaml" output_path = OUTPUT_DIR / "echo-multi-var.json" @@ -355,7 +356,7 @@ def test_echo_provider_with_multiple_vars(self): class TestAssertions: """Assertion smoke tests.""" - def test_contains_assertion(self): + def test_contains_assertion(self) -> None: """Test contains assertion.""" config_path = CONFIGS_DIR / "basic.yaml" @@ -365,7 +366,7 @@ def test_contains_assertion(self): # All assertions should pass assert "pass" in stdout.lower() or "✓" in stdout or "success" in stdout.lower() - def test_multiple_assertions(self): + def test_multiple_assertions(self) -> None: """Test multiple assertions in single test.""" config_path = CONFIGS_DIR / "assertions.yaml" @@ -373,7 +374,7 @@ def test_multiple_assertions(self): assert exit_code == 0 - def test_failing_assertion(self): + def test_failing_assertion(self) -> None: """Test failing assertion.""" config_path = CONFIGS_DIR / "failing-assertion.yaml" From 79e74ac0815d18d4f89be4eb05a23dc5334536be Mon Sep 17 00:00:00 2001 From: mldangelo Date: Sun, 11 Jan 2026 02:33:02 -0500 Subject: [PATCH 10/11] fix: resolve Windows CI test failures - Add os.path.isfile mock to unit test to prevent _find_windows_promptfoo() from finding real promptfoo installations on Windows CI runners - Add UTF-8 encoding with error replacement to smoke tests to handle Windows cp1252 encoding issues with npx output - Add warmup_npx fixture to pre-download promptfoo via npx before tests, preventing timeout on first test when npx needs to download package Co-Authored-By: Claude Opus 4.5 --- tests/smoke/test_smoke.py | 35 ++++++++++++++++++++++++++++++++++- tests/test_cli.py | 3 +++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/tests/smoke/test_smoke.py b/tests/smoke/test_smoke.py index aa6bd4c..7f0fe10 100644 --- a/tests/smoke/test_smoke.py +++ b/tests/smoke/test_smoke.py @@ -33,6 +33,7 @@ def run_promptfoo( cwd: Optional[Path] = None, expect_error: bool = False, env: Optional[dict[str, str]] = None, + timeout: int = 120, ) -> tuple[str, str, int]: """ Run promptfoo CLI and capture output. @@ -42,6 +43,7 @@ def run_promptfoo( cwd: Working directory for the command expect_error: If True, don't raise on non-zero exit env: Environment variables to set + timeout: Timeout in seconds (default 120) Returns: Tuple of (stdout, stderr, exit_code) @@ -59,7 +61,11 @@ def run_promptfoo( capture_output=True, text=True, env=full_env, - timeout=120, # Increased timeout for npx fallback (first npx call downloads promptfoo) + timeout=timeout, + # Use UTF-8 encoding with error replacement to handle Windows encoding issues + # Windows default cp1252 can't decode some bytes in npx/promptfoo output + encoding="utf-8", + errors="replace", ) stdout = result.stdout or "" @@ -85,6 +91,33 @@ def setup_and_teardown() -> Generator[None, None, None]: shutil.rmtree(OUTPUT_DIR) +@pytest.fixture(scope="module", autouse=True) +def warmup_npx() -> Generator[None, None, None]: + """ + Warm up npx by running promptfoo --version before all tests. + + On npx fallback (when promptfoo isn't globally installed), the first npx call + downloads and caches promptfoo, which can take several minutes on Windows. + Running this warmup prevents the first actual test from timing out. + """ + # Run with a longer timeout (5 minutes) for the initial npx download + try: + subprocess.run( + ["promptfoo", "--version"], + capture_output=True, + timeout=300, # 5 minutes for initial npx download + encoding="utf-8", + errors="replace", + ) + except subprocess.TimeoutExpired: + # If warmup times out, tests will likely fail but let them run anyway + pass + except FileNotFoundError: + # promptfoo not installed, tests will fail but let them try + pass + yield + + class TestBasicCLI: """Basic CLI operations smoke tests.""" diff --git a/tests/test_cli.py b/tests/test_cli.py index 82ee073..0c311d8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -473,6 +473,9 @@ def test_main_exits_when_neither_external_nor_npx_available( monkeypatch.setattr(sys, "argv", ["promptfoo", "eval"]) monkeypatch.setattr("shutil.which", lambda cmd, path=None: {"node": node_path}.get(cmd)) + # Also mock os.path.isfile to prevent _find_windows_promptfoo() from finding + # a real promptfoo installation on Windows CI runners + monkeypatch.setattr(os.path, "isfile", lambda p: False) with pytest.raises(SystemExit) as exc_info: main() From 02acd12c40e11a786face89f296c55e20eed8e05 Mon Sep 17 00:00:00 2001 From: mldangelo Date: Sun, 11 Jan 2026 02:41:06 -0500 Subject: [PATCH 11/11] fix: mock telemetry in CLI unit tests Add record_wrapper_used mock to tests that mock subprocess.run to prevent PostHog telemetry calls from interfering with mock call counts. Co-Authored-By: Claude Opus 4.5 --- tests/test_cli.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0c311d8..0e4a1c0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -386,6 +386,8 @@ def test_main_uses_external_promptfoo_when_available(self, monkeypatch: pytest.M "shutil.which", lambda cmd, path=None: {"node": "/usr/bin/node", "promptfoo": "/usr/local/bin/promptfoo"}.get(cmd), ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) mock_result = subprocess.CompletedProcess([], 0) mock_run = MagicMock(return_value=mock_result) @@ -421,6 +423,8 @@ def test_main_skips_external_when_wrapper_env_set(self, monkeypatch: pytest.Monk "promptfoo": "/usr/local/bin/promptfoo", }.get(cmd), ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) mock_result = subprocess.CompletedProcess([], 0) mock_run = MagicMock(return_value=mock_result) @@ -444,6 +448,8 @@ def test_main_falls_back_to_npx(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr( "shutil.which", lambda cmd, path=None: {"node": "/usr/bin/node", "npx": "/usr/bin/npx"}.get(cmd) ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) mock_result = subprocess.CompletedProcess([], 0) mock_run = MagicMock(return_value=mock_result) @@ -490,6 +496,8 @@ def test_main_passes_arguments_correctly(self, monkeypatch: pytest.MonkeyPatch) monkeypatch.setattr( "shutil.which", lambda cmd, path=None: {"node": "/usr/bin/node", "npx": "/usr/bin/npx"}.get(cmd) ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) mock_result = subprocess.CompletedProcess([], 0) mock_run = MagicMock(return_value=mock_result) @@ -512,6 +520,8 @@ def test_main_returns_subprocess_exit_code(self, monkeypatch: pytest.MonkeyPatch monkeypatch.setattr( "shutil.which", lambda cmd, path=None: {"node": "/usr/bin/node", "npx": "/usr/bin/npx"}.get(cmd) ) + # Mock telemetry to avoid PostHog calls during test + monkeypatch.setattr("promptfoo.cli.record_wrapper_used", lambda mode: None) # Test non-zero exit code mock_result = subprocess.CompletedProcess([], 42)