Reference for pytest fixture patterns in SCBench problem tests.
Every problem's tests/conftest.py must include these fixtures:
"""Pytest configuration for my_problem evaluation."""
import shlex
import pytest
def pytest_addoption(parser):
"""Register required CLI options for SCBench evaluation."""
parser.addoption("--entrypoint", required=True)
parser.addoption("--checkpoint", required=True)
@pytest.fixture(scope="session")
def entrypoint_argv(request):
"""Get submission entrypoint as argv list.
Returns the command to invoke the agent's submission,
split into an argv-style list for subprocess.run().
Example: "python main.py" -> ["python", "main.py"]
"""
return shlex.split(request.config.getoption("--entrypoint"))
@pytest.fixture(scope="session")
def checkpoint_name(request):
"""Get current checkpoint name.
Returns the checkpoint being evaluated (e.g., "checkpoint_1").
Useful for checkpoint-aware test logic.
"""
return request.config.getoption("--checkpoint")from pathlib import Path
@pytest.fixture(scope="session")
def assets_dir():
"""Path to test assets directory.
Returns: Path to tests/assets/
"""
return Path(__file__).parent / "assets"@pytest.fixture(scope="session")
def data_dir():
"""Path to test data directory.
Returns: Path to tests/data/
"""
return Path(__file__).parent / "data"
@pytest.fixture(scope="session")
def checkpoint_data_dir(data_dir, checkpoint_name):
"""Path to current checkpoint's data directory.
Returns: Path to tests/data/checkpoint_N/
"""
return data_dir / checkpoint_nameimport os
@pytest.fixture(scope="session")
def files_dir():
"""Path to problem's static files directory.
Uses SCBENCH_ASSET_FILES environment variable if set,
otherwise falls back to tests/assets/files.
"""
env_path = os.environ.get("SCBENCH_ASSET_FILES")
if env_path:
return Path(env_path)
return Path(__file__).parent / "assets" / "files"Use scope="session" for fixtures that don't change between tests:
@pytest.fixture(scope="session")
def entrypoint_argv(request):
"""Shared across all tests in the session."""
return shlex.split(request.config.getoption("--entrypoint"))Use for fixtures that need fresh state per test:
@pytest.fixture
def temp_workspace(tmp_path):
"""Fresh temporary directory for each test."""
workspace = tmp_path / "workspace"
workspace.mkdir()
return workspaceUse for fixtures shared within a test file:
@pytest.fixture(scope="module")
def loaded_cases():
"""Load cases once per test file."""
return load_cases(CHECKPOINT_DIR / "core")For problems with inline test data:
"""Minimal conftest.py for inline tests."""
import shlex
import pytest
def pytest_addoption(parser):
parser.addoption("--entrypoint", required=True)
parser.addoption("--checkpoint", required=True)
@pytest.fixture(scope="session")
def entrypoint_argv(request):
return shlex.split(request.config.getoption("--entrypoint"))
@pytest.fixture(scope="session")
def checkpoint_name(request):
return request.config.getoption("--checkpoint")For problems with external test data:
"""conftest.py with asset directories."""
import shlex
from pathlib import Path
import pytest
def pytest_addoption(parser):
parser.addoption("--entrypoint", required=True)
parser.addoption("--checkpoint", required=True)
@pytest.fixture(scope="session")
def entrypoint_argv(request):
return shlex.split(request.config.getoption("--entrypoint"))
@pytest.fixture(scope="session")
def checkpoint_name(request):
return request.config.getoption("--checkpoint")
@pytest.fixture(scope="session")
def assets_dir():
"""Path to tests/assets/."""
return Path(__file__).parent / "assets"
@pytest.fixture(scope="session")
def data_dir():
"""Path to tests/data/."""
return Path(__file__).parent / "data"
@pytest.fixture(scope="session")
def checkpoint_data(data_dir, checkpoint_name):
"""Path to current checkpoint's data."""
return data_dir / checkpoint_nameFor problems with complex test setup:
"""conftest.py with helper fixtures."""
import json
import shlex
import subprocess
from pathlib import Path
import pytest
def pytest_addoption(parser):
parser.addoption("--entrypoint", required=True)
parser.addoption("--checkpoint", required=True)
@pytest.fixture(scope="session")
def entrypoint_argv(request):
return shlex.split(request.config.getoption("--entrypoint"))
@pytest.fixture(scope="session")
def checkpoint_name(request):
return request.config.getoption("--checkpoint")
@pytest.fixture(scope="session")
def run_command(entrypoint_argv):
"""Factory fixture for running commands.
Usage:
def test_example(run_command):
result = run_command({"key": "value"})
assert result.returncode == 0
"""
def _run(input_data, extra_args=None):
cmd = entrypoint_argv.copy()
if extra_args:
cmd.extend(extra_args)
input_str = json.dumps(input_data) if isinstance(input_data, dict) else input_data
return subprocess.run(
cmd,
input=input_str,
capture_output=True,
text=True,
timeout=30,
)
return _run
@pytest.fixture(scope="session")
def parse_output():
"""Factory for parsing command output."""
def _parse(result):
if result.returncode != 0:
return None
return json.loads(result.stdout)
return _parse@pytest.fixture(scope="session")
def expected_features(checkpoint_name):
"""Features available in current checkpoint."""
features = {"basic_parse"}
if checkpoint_name in {"checkpoint_2", "checkpoint_3"}:
features.add("advanced_filter")
if checkpoint_name == "checkpoint_3":
features.add("aggregation")
return features@pytest.fixture(scope="session")
def output_format(checkpoint_name):
"""Expected output format for checkpoint."""
if checkpoint_name == "checkpoint_1":
return "json"
else:
return "jsonl"@pytest.fixture(scope="session")
def create_runner(entrypoint_argv):
"""Create a command runner with specific config."""
def _create(timeout=30, env=None):
def runner(input_data, args=None):
cmd = entrypoint_argv.copy()
if args:
cmd.extend(args)
return subprocess.run(
cmd,
input=json.dumps(input_data),
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return runner
return _create@pytest.fixture(scope="session")
def load_cases(data_dir):
"""Factory for loading test cases from directory."""
def _load(group_name, checkpoint_name):
group_dir = data_dir / checkpoint_name / group_name
cases = []
for case_dir in sorted(group_dir.iterdir()):
if case_dir.is_dir():
case = yaml.safe_load((case_dir / "case.yaml").read_text())
expected = json.loads((case_dir / "expected.json").read_text())
cases.append({
"id": case_dir.name,
"case": case,
"expected": expected,
})
return cases
return _load# Good: Load once, share across tests
@pytest.fixture(scope="session")
def reference_data(assets_dir):
with open(assets_dir / "large_reference.json") as f:
return json.load(f)
# Bad: Reload for every test
@pytest.fixture
def reference_data(assets_dir):
with open(assets_dir / "large_reference.json") as f:
return json.load(f)# Good: Single responsibility
@pytest.fixture(scope="session")
def entrypoint_argv(request):
return shlex.split(request.config.getoption("--entrypoint"))
@pytest.fixture(scope="session")
def run_command(entrypoint_argv):
def _run(input_data):
return subprocess.run(entrypoint_argv, ...)
return _run
# Bad: Mixed responsibilities
@pytest.fixture
def everything(request):
argv = shlex.split(request.config.getoption("--entrypoint"))
def run(data):
return subprocess.run(argv, ...)
return {"argv": argv, "run": run}@pytest.fixture(scope="session")
def strip_regression_fields():
"""Remove fields that change between checkpoints.
Some fields (like 'version' or 'timestamp') are added in later
checkpoints. When running regression tests, strip these fields
to allow comparison with checkpoint_1 expectations.
"""
def _strip(data):
data = data.copy()
for field in ["version", "timestamp", "checkpoint"]:
data.pop(field, None)
return data
return _strip- Markers - Test categorization
- Test Data - Organizing test data
- CLI Testing - CLI patterns