Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,29 @@ jobs:
sudo apt-get install -y libopenmpi-dev openmpi-common openmpi-bin

- name: Install package and test dependencies
run: uv sync # Automatically creates the environment and locks dependencies
run: uv sync --extra test

# 4. Heavy Lifting (Runs only if linting passed)
- name: Run unit tests
run: uv run pytest # Runs your tests safely inside the managed environment
- name: Install subpackages (vdb_benchmark, kv_cache_benchmark)
# These have their own pyproject.toml and tests; install editable so
# their imports resolve and their `test` extras are available.
run: |
uv pip install -e ./vdb_benchmark
uv pip install -e ./kv_cache_benchmark

# 4. Run the four test suites separately.
# We can't collect them together: `tests/` and `vdb_benchmark/tests/`
# both have a top-level package named `tests` whose conftest.py modules
# collide under pytest's rootdir-relative import (ImportPathMismatchError).
# Each suite's pyproject defines its own `slow` marker and `-m 'not slow'`
# default, so subprocess-level invocation is correct.
- name: Run root test suite (tests/)
run: uv run pytest tests

- name: Run mlpstorage_py test suite
run: uv run pytest mlpstorage_py/tests

- name: Run vdb_benchmark test suite
run: uv run pytest vdb_benchmark/tests

- name: Run kv_cache_benchmark test suite
run: uv run pytest kv_cache_benchmark/tests
5 changes: 4 additions & 1 deletion kv_cache_benchmark/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,7 @@ ignore_missing_imports = true
testpaths = ["tests", "."]
python_files = ["test_*.py"]
python_functions = ["test_*"]
addopts = "-v --tb=short"
addopts = "-v --tb=short -m 'not slow'"
markers = [
"slow: tests that take >5s (e.g., large GPU-overflow allocations, profiling). Excluded from the default suite; opt in with `pytest -m slow` (or `pytest -m ''` to run everything).",
]
2 changes: 2 additions & 0 deletions kv_cache_benchmark/tests/test_kv_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,7 @@ def test_allocation_prefers_gpu(self, multi_tier_cache_with_gpu):
assert success is True
assert location == 'gpu'

@pytest.mark.slow
def test_gpu_overflow_to_cpu(self, multi_tier_cache_with_gpu):
"""When GPU is full, should overflow to CPU."""
# Fill GPU with large allocations
Expand Down Expand Up @@ -3899,6 +3900,7 @@ def test_part5_one_tier_nvme_only_eviction(self, tiny_model):
class TestBottleneckProfiling:
"""Profile bottleneck detection in the KV cache benchmark."""

@pytest.mark.slow
def test_profile_allocate_vs_access_overhead(self):
"""Profile allocate vs access operations to identify bottleneck ratios."""
import time as time_mod
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "mlpstorage"
version = "3.0.23"
version = "3.0.25"
description = "MLPerf Storage Benchmark Suite"
readme = "README.md"
license = {text = "Apache-2.0"}
Expand Down
1 change: 1 addition & 0 deletions tests/integration/test_canonical_layout_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ class TestInitThenRunFullCliDispatch:
runs on any dev box (does not require DLIO/openmpi).
"""

@pytest.mark.slow
def test_init_then_closed_datagen_no_env_var(self, tmp_path, monkeypatch):
"""RED today: the second invocation raises ConfigurationError E101
even though `mlpstorage init` wrote a valid sentinel.
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/test_cluster_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3554,6 +3554,11 @@ def test_rank0_emits_markers_and_non_rank0_silent(
["probe", str(tmp_path), "silence-test-uuid"],
)

# The probe's rank-0 D-49 quiesce path sleeps 5s; neutralize for the
# unit test (we're only locking the stdout marker contract).
import time as _time
monkeypatch.setattr(_time, "sleep", lambda *_a, **_kw: None)

from mlpstorage_py.cluster_collector import SHARED_FS_PROBE_SCRIPT

captured = io.StringIO()
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_shared_fs_probe.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,15 +618,21 @@ class _FakeMPI:
saved_argv = sys.argv
saved_mpi4py = sys.modules.get("mpi4py")
saved_mpi = sys.modules.get("mpi4py.MPI")
# The probe's rank-0 D-49 quiesce path sleeps 5s; neutralize for the
# unit test (we're only locking call ordering, not timing).
import time as _time
saved_sleep = _time.sleep
try:
sys.modules["mpi4py"] = fake_mpi4py
sys.modules["mpi4py.MPI"] = _FakeMPI()
_time.sleep = lambda *_a, **_kw: None
sys.argv = ["<probe>", str(tmp_path), "test-uuid", out_file]
namespace = {"__name__": "__main__"}
# The heredoc body calls sys.exit at the end; trap it.
with pytest.raises(SystemExit):
exec(SHARED_FS_PROBE_SCRIPT, namespace)
finally:
_time.sleep = saved_sleep
sys.argv = saved_argv
if saved_mpi4py is not None:
sys.modules["mpi4py"] = saved_mpi4py
Expand Down
6 changes: 5 additions & 1 deletion tests/unit/test_validation_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,11 @@ def test_collects_multiple_errors(self, mock_dlio, mock_mpi):
mock_logger = MagicMock()

with pytest.raises(DependencyError) as exc_info:
validate_benchmark_environment(args, logger=mock_logger)
# skip_remote_checks: hosts=['node1','node2'] would otherwise
# trigger a real SSH probe to nonexistent hosts (~20s of
# connect timeouts); this test only asserts that multiple
# errors accumulate, which the MPI+DLIO mocks already cover.
validate_benchmark_environment(args, logger=mock_logger, skip_remote_checks=True)

# First error should be raised (MPI)
assert "MPI not found" in str(exc_info.value)
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading