mlcommons · FileSystemGuy · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026
@@ -45,8 +45,29 @@ jobs:
         sudo apt-get install -y libopenmpi-dev openmpi-common openmpi-bin 
 
     - name: Install package and test dependencies
-      run: uv sync # Automatically creates the environment and locks dependencies
+      run: uv sync --extra test
 
-    # 4. Heavy Lifting (Runs only if linting passed)
-    - name: Run unit tests
-      run: uv run pytest # Runs your tests safely inside the managed environment
+    - name: Install subpackages (vdb_benchmark, kv_cache_benchmark)
+      # These have their own pyproject.toml and tests; install editable so
+      # their imports resolve and their `test` extras are available.
+      run: |
+        uv pip install -e ./vdb_benchmark
+        uv pip install -e ./kv_cache_benchmark
+
+    # 4. Run the four test suites separately.
+    # We can't collect them together: `tests/` and `vdb_benchmark/tests/`
+    # both have a top-level package named `tests` whose conftest.py modules
+    # collide under pytest's rootdir-relative import (ImportPathMismatchError).
+    # Each suite's pyproject defines its own `slow` marker and `-m 'not slow'`
+    # default, so subprocess-level invocation is correct.
+    - name: Run root test suite (tests/)
+      run: uv run pytest tests
+
+    - name: Run mlpstorage_py test suite
+      run: uv run pytest mlpstorage_py/tests
+
+    - name: Run vdb_benchmark test suite
+      run: uv run pytest vdb_benchmark/tests
+
+    - name: Run kv_cache_benchmark test suite
+      run: uv run pytest kv_cache_benchmark/tests
@@ -110,4 +110,7 @@ ignore_missing_imports = true
 testpaths = ["tests", "."]
 python_files = ["test_*.py"]
 python_functions = ["test_*"]
-addopts = "-v --tb=short"
+addopts = "-v --tb=short -m 'not slow'"
+markers = [
+    "slow: tests that take >5s (e.g., large GPU-overflow allocations, profiling). Excluded from the default suite; opt in with `pytest -m slow` (or `pytest -m ''` to run everything).",
+]
@@ -1102,6 +1102,7 @@ def test_allocation_prefers_gpu(self, multi_tier_cache_with_gpu):
         assert success is True
         assert location == 'gpu'
 
+    @pytest.mark.slow
     def test_gpu_overflow_to_cpu(self, multi_tier_cache_with_gpu):
         """When GPU is full, should overflow to CPU."""
         # Fill GPU with large allocations
@@ -3899,6 +3900,7 @@ def test_part5_one_tier_nvme_only_eviction(self, tiny_model):
 class TestBottleneckProfiling:
     """Profile bottleneck detection in the KV cache benchmark."""
 
+    @pytest.mark.slow
     def test_profile_allocate_vs_access_overhead(self):
         """Profile allocate vs access operations to identify bottleneck ratios."""
         import time as time_mod

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "mlpstorage"
-version = "3.0.23"
+version = "3.0.25"
 description = "MLPerf Storage Benchmark Suite"
 readme = "README.md"
 license = {text = "Apache-2.0"}

@@ -402,6 +402,7 @@ class TestInitThenRunFullCliDispatch:
     runs on any dev box (does not require DLIO/openmpi).
     """
 
+    @pytest.mark.slow
     def test_init_then_closed_datagen_no_env_var(self, tmp_path, monkeypatch):
         """RED today: the second invocation raises ConfigurationError E101
         even though `mlpstorage init` wrote a valid sentinel.

@@ -3554,6 +3554,11 @@ def test_rank0_emits_markers_and_non_rank0_silent(
             ["probe", str(tmp_path), "silence-test-uuid"],
         )
 
+        # The probe's rank-0 D-49 quiesce path sleeps 5s; neutralize for the
+        # unit test (we're only locking the stdout marker contract).
+        import time as _time
+        monkeypatch.setattr(_time, "sleep", lambda *_a, **_kw: None)
+
         from mlpstorage_py.cluster_collector import SHARED_FS_PROBE_SCRIPT
 
         captured = io.StringIO()

@@ -618,15 +618,21 @@ class _FakeMPI:
         saved_argv = sys.argv
         saved_mpi4py = sys.modules.get("mpi4py")
         saved_mpi = sys.modules.get("mpi4py.MPI")
+        # The probe's rank-0 D-49 quiesce path sleeps 5s; neutralize for the
+        # unit test (we're only locking call ordering, not timing).
+        import time as _time
+        saved_sleep = _time.sleep
         try:
             sys.modules["mpi4py"] = fake_mpi4py
             sys.modules["mpi4py.MPI"] = _FakeMPI()
+            _time.sleep = lambda *_a, **_kw: None
             sys.argv = ["<probe>", str(tmp_path), "test-uuid", out_file]
             namespace = {"__name__": "__main__"}
             # The heredoc body calls sys.exit at the end; trap it.
             with pytest.raises(SystemExit):
                 exec(SHARED_FS_PROBE_SCRIPT, namespace)
         finally:
+            _time.sleep = saved_sleep
             sys.argv = saved_argv
             if saved_mpi4py is not None:
                 sys.modules["mpi4py"] = saved_mpi4py

@@ -155,7 +155,11 @@ def test_collects_multiple_errors(self, mock_dlio, mock_mpi):
         mock_logger = MagicMock()
 
         with pytest.raises(DependencyError) as exc_info:
-            validate_benchmark_environment(args, logger=mock_logger)
+            # skip_remote_checks: hosts=['node1','node2'] would otherwise
+            # trigger a real SSH probe to nonexistent hosts (~20s of
+            # connect timeouts); this test only asserts that multiple
+            # errors accumulate, which the MPI+DLIO mocks already cover.
+            validate_benchmark_environment(args, logger=mock_logger, skip_remote_checks=True)
 
         # First error should be raised (MPI)
         assert "MPI not found" in str(exc_info.value)