From e04551231590e76a1415f874ac4b25b7c4a8f73e Mon Sep 17 00:00:00 2001 From: Curtis Anderson Date: Fri, 26 Jun 2026 17:12:34 -0700 Subject: [PATCH 1/4] test(perf): eliminate ~30s of unintended waits in three fast-lane tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test_collects_multiple_errors: pass skip_remote_checks=True. Both dependency checks are mocked to raise, but the test left hosts=['node1','node2'] in args, which triggered a real SSH probe to nonexistent hosts and ate ~20s of TCP connect timeouts before the assertion ran. - test_bcast_precedes_barrier_in_executed_heredoc_with_mocked_mpi4py: patch time.sleep around the in-process exec of SHARED_FS_PROBE_SCRIPT. The probe's rank-0 D-49 quiesce path calls time.sleep(5.0); the unit test only locks call ordering, not timing. - test_rank0_emits_markers_and_non_rank0_silent[0]: same root cause — rank 0 hits the 5s quiesce. monkeypatch time.sleep for the test. No behavioral changes to production code. --- tests/unit/test_cluster_collector.py | 5 +++++ tests/unit/test_shared_fs_probe.py | 6 ++++++ tests/unit/test_validation_helpers.py | 6 +++++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_cluster_collector.py b/tests/unit/test_cluster_collector.py index a5eda47b..2489e4b3 100755 --- a/tests/unit/test_cluster_collector.py +++ b/tests/unit/test_cluster_collector.py @@ -3554,6 +3554,11 @@ def test_rank0_emits_markers_and_non_rank0_silent( ["probe", str(tmp_path), "silence-test-uuid"], ) + # The probe's rank-0 D-49 quiesce path sleeps 5s; neutralize for the + # unit test (we're only locking the stdout marker contract). + import time as _time + monkeypatch.setattr(_time, "sleep", lambda *_a, **_kw: None) + from mlpstorage_py.cluster_collector import SHARED_FS_PROBE_SCRIPT captured = io.StringIO() diff --git a/tests/unit/test_shared_fs_probe.py b/tests/unit/test_shared_fs_probe.py index a5c1bf0f..a0a94a8e 100644 --- a/tests/unit/test_shared_fs_probe.py +++ b/tests/unit/test_shared_fs_probe.py @@ -618,15 +618,21 @@ class _FakeMPI: saved_argv = sys.argv saved_mpi4py = sys.modules.get("mpi4py") saved_mpi = sys.modules.get("mpi4py.MPI") + # The probe's rank-0 D-49 quiesce path sleeps 5s; neutralize for the + # unit test (we're only locking call ordering, not timing). + import time as _time + saved_sleep = _time.sleep try: sys.modules["mpi4py"] = fake_mpi4py sys.modules["mpi4py.MPI"] = _FakeMPI() + _time.sleep = lambda *_a, **_kw: None sys.argv = ["", str(tmp_path), "test-uuid", out_file] namespace = {"__name__": "__main__"} # The heredoc body calls sys.exit at the end; trap it. with pytest.raises(SystemExit): exec(SHARED_FS_PROBE_SCRIPT, namespace) finally: + _time.sleep = saved_sleep sys.argv = saved_argv if saved_mpi4py is not None: sys.modules["mpi4py"] = saved_mpi4py diff --git a/tests/unit/test_validation_helpers.py b/tests/unit/test_validation_helpers.py index 69ae443f..c121c90d 100755 --- a/tests/unit/test_validation_helpers.py +++ b/tests/unit/test_validation_helpers.py @@ -155,7 +155,11 @@ def test_collects_multiple_errors(self, mock_dlio, mock_mpi): mock_logger = MagicMock() with pytest.raises(DependencyError) as exc_info: - validate_benchmark_environment(args, logger=mock_logger) + # skip_remote_checks: hosts=['node1','node2'] would otherwise + # trigger a real SSH probe to nonexistent hosts (~20s of + # connect timeouts); this test only asserts that multiple + # errors accumulate, which the MPI+DLIO mocks already cover. + validate_benchmark_environment(args, logger=mock_logger, skip_remote_checks=True) # First error should be raised (MPI) assert "MPI not found" in str(exc_info.value) From b4c8e220273fa37c111579764a447b1457e903eb Mon Sep 17 00:00:00 2001 From: Curtis Anderson Date: Fri, 26 Jun 2026 17:12:51 -0700 Subject: [PATCH 2/4] test: mark genuinely-slow tests as slow, declare marker in kv_cache - tests/integration: mark test_init_then_closed_datagen_no_env_var slow (~17.5s; full in-process CLI dispatcher exercising init + datagen). - kv_cache_benchmark/pyproject.toml: declare 'slow' marker and default to '-m not slow' (parity with the root suite). Without this, the next two slow marks would emit PytestUnknownMarkWarning and still run by default. - kv_cache_benchmark/tests: mark test_gpu_overflow_to_cpu slow (~32s; 100 x 10K-token allocations) and test_profile_allocate_vs_access_overhead slow (~5.8s; profiling). Net effect on default test run: root suite drops from 86s to 39s, kv_cache suite drops from 155s to 98s. --- kv_cache_benchmark/pyproject.toml | 5 ++++- kv_cache_benchmark/tests/test_kv_cache.py | 2 ++ tests/integration/test_canonical_layout_end_to_end.py | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/kv_cache_benchmark/pyproject.toml b/kv_cache_benchmark/pyproject.toml index 3eaf156c..64db0488 100755 --- a/kv_cache_benchmark/pyproject.toml +++ b/kv_cache_benchmark/pyproject.toml @@ -110,4 +110,7 @@ ignore_missing_imports = true testpaths = ["tests", "."] python_files = ["test_*.py"] python_functions = ["test_*"] -addopts = "-v --tb=short" +addopts = "-v --tb=short -m 'not slow'" +markers = [ + "slow: tests that take >5s (e.g., large GPU-overflow allocations, profiling). Excluded from the default suite; opt in with `pytest -m slow` (or `pytest -m ''` to run everything).", +] diff --git a/kv_cache_benchmark/tests/test_kv_cache.py b/kv_cache_benchmark/tests/test_kv_cache.py index 31d5b1af..b493a817 100644 --- a/kv_cache_benchmark/tests/test_kv_cache.py +++ b/kv_cache_benchmark/tests/test_kv_cache.py @@ -1102,6 +1102,7 @@ def test_allocation_prefers_gpu(self, multi_tier_cache_with_gpu): assert success is True assert location == 'gpu' + @pytest.mark.slow def test_gpu_overflow_to_cpu(self, multi_tier_cache_with_gpu): """When GPU is full, should overflow to CPU.""" # Fill GPU with large allocations @@ -3899,6 +3900,7 @@ def test_part5_one_tier_nvme_only_eviction(self, tiny_model): class TestBottleneckProfiling: """Profile bottleneck detection in the KV cache benchmark.""" + @pytest.mark.slow def test_profile_allocate_vs_access_overhead(self): """Profile allocate vs access operations to identify bottleneck ratios.""" import time as time_mod diff --git a/tests/integration/test_canonical_layout_end_to_end.py b/tests/integration/test_canonical_layout_end_to_end.py index 98c62592..fc4f6df0 100644 --- a/tests/integration/test_canonical_layout_end_to_end.py +++ b/tests/integration/test_canonical_layout_end_to_end.py @@ -402,6 +402,7 @@ class TestInitThenRunFullCliDispatch: runs on any dev box (does not require DLIO/openmpi). """ + @pytest.mark.slow def test_init_then_closed_datagen_no_env_var(self, tmp_path, monkeypatch): """RED today: the second invocation raises ConfigurationError E101 even though `mlpstorage init` wrote a valid sentinel. From 69d4ab50a7dc8b347e274a1a5c1003bb234a1530 Mon Sep 17 00:00:00 2001 From: Curtis Anderson Date: Fri, 26 Jun 2026 17:13:02 -0700 Subject: [PATCH 3/4] ci: run all four test suites, not just tests/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous workflow ran 'uv run pytest', which picked up only the root pyproject's testpaths=['tests']. The three sibling suites (mlpstorage_py/tests, vdb_benchmark/tests, kv_cache_benchmark/tests) were never executed in CI, so regressions in those areas could land without CI catching them — exactly the gap that PRs #551-#560 had to fix by hand. Each suite is invoked in its own step: - tests/ and vdb_benchmark/tests/ can't be collected in one pytest process (both define a top-level 'tests' package whose conftest.py modules collide via pytest's ImportPathMismatchError). - Each suite's pyproject defines its own '-m not slow' default, so subprocess-level invocation is the correct boundary. Also installs vdb_benchmark and kv_cache_benchmark editable so their imports resolve. --- .github/workflows/test.yml | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2ff5c5e1..c1703fc3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -45,8 +45,29 @@ jobs: sudo apt-get install -y libopenmpi-dev openmpi-common openmpi-bin - name: Install package and test dependencies - run: uv sync # Automatically creates the environment and locks dependencies + run: uv sync --extra test - # 4. Heavy Lifting (Runs only if linting passed) - - name: Run unit tests - run: uv run pytest # Runs your tests safely inside the managed environment + - name: Install subpackages (vdb_benchmark, kv_cache_benchmark) + # These have their own pyproject.toml and tests; install editable so + # their imports resolve and their `test` extras are available. + run: | + uv pip install -e ./vdb_benchmark + uv pip install -e ./kv_cache_benchmark + + # 4. Run the four test suites separately. + # We can't collect them together: `tests/` and `vdb_benchmark/tests/` + # both have a top-level package named `tests` whose conftest.py modules + # collide under pytest's rootdir-relative import (ImportPathMismatchError). + # Each suite's pyproject defines its own `slow` marker and `-m 'not slow'` + # default, so subprocess-level invocation is correct. + - name: Run root test suite (tests/) + run: uv run pytest tests + + - name: Run mlpstorage_py test suite + run: uv run pytest mlpstorage_py/tests + + - name: Run vdb_benchmark test suite + run: uv run pytest vdb_benchmark/tests + + - name: Run kv_cache_benchmark test suite + run: uv run pytest kv_cache_benchmark/tests From 1e26fee0a4f5ba18cfe44be2c3b83a77271467de Mon Sep 17 00:00:00 2001 From: Curtis Anderson Date: Fri, 26 Jun 2026 17:13:09 -0700 Subject: [PATCH 4/4] chore: bump version 3.0.23 -> 3.0.25; regenerate uv.lock PR #550 bumps 3.0.23 -> 3.0.24. This PR lands on top, so bump to 3.0.25 directly. uv.lock regenerated to reflect the new project version (no dependency changes). --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a2f6b505..3b47387b 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mlpstorage" -version = "3.0.23" +version = "3.0.25" description = "MLPerf Storage Benchmark Suite" readme = "README.md" license = {text = "Apache-2.0"} diff --git a/uv.lock b/uv.lock index 944e15f8..6e1ed50e 100644 --- a/uv.lock +++ b/uv.lock @@ -518,7 +518,7 @@ wheels = [ [[package]] name = "mlpstorage" -version = "3.0.23" +version = "3.0.25" source = { editable = "." } dependencies = [ { name = "dlio-benchmark", marker = "sys_platform == 'linux'" },