From c7a772555ac5982946080e1119b1c5d45a856472 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 11 Mar 2026 17:05:01 -0400
Subject: [PATCH 1/2] Add gcov-based test pruning with file-level coverage
 cache

- File-level gcov coverage cache maps test UUIDs to exercised .fpp source
  files (gzip JSON, committed to repo)
- --only-changes flag prunes tests by intersecting PR-changed files against
  coverage cache; conservative fallbacks for missing cache/coverage
- --build-coverage-cache flag + 3-phase parallel cache builder
  (prepare, run, gcov collect)
- New rebuild-cache CI job on Phoenix via SLURM when cases.py or Fortran
  dependency graph changes
- Dep-change detection greps PR/push diffs for added use/include statements
- 53 unit tests cover core coverage logic
- Rebased onto PR #1299 unified CI architecture (submit-slurm-job.sh,
  common/test.sh)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/file-filter.yml                       |   4 +
 .github/scripts/submit-slurm-job.sh           |   1 +
 .github/workflows/common/rebuild-cache.sh     |  23 +
 .github/workflows/common/test.sh              |   8 +-
 .github/workflows/test.yml                    | 140 ++-
 .gitignore                                    |   3 +
 CMakeLists.txt                                |  21 +-
 toolchain/mfc/cli/commands.py                 |  25 +
 toolchain/mfc/test/case.py                    |  60 +-
 toolchain/mfc/test/cases.py                   |   2 +-
 toolchain/mfc/test/coverage.py                | 811 ++++++++++++++++++
 toolchain/mfc/test/test.py                    |  81 +-
 .../mfc/test/test_coverage_cache.json.gz      | Bin 0 -> 8827 bytes
 toolchain/mfc/test/test_coverage_unit.py      | 662 ++++++++++++++
 14 files changed, 1808 insertions(+), 33 deletions(-)
 create mode 100755 .github/workflows/common/rebuild-cache.sh
 create mode 100644 toolchain/mfc/test/coverage.py
 create mode 100644 toolchain/mfc/test/test_coverage_cache.json.gz
 create mode 100644 toolchain/mfc/test/test_coverage_unit.py

diff --git a/.github/file-filter.yml b/.github/file-filter.yml
index c0e7477cf2..b504ad526d 100644
--- a/.github/file-filter.yml
+++ b/.github/file-filter.yml
@@ -25,6 +25,7 @@ yml: &yml
   - '.github/workflows/phoenix/**'
   - '.github/workflows/frontier/**'
   - '.github/workflows/frontier_amd/**'
+  - '.github/workflows/common/**'
   - '.github/scripts/**'
   - '.github/workflows/bench.yml'
   - '.github/workflows/test.yml'
@@ -37,3 +38,6 @@ checkall: &checkall
   - *tests
   - *scripts
   - *yml
+
+cases_py:
+  - 'toolchain/mfc/test/cases.py'
diff --git a/.github/scripts/submit-slurm-job.sh b/.github/scripts/submit-slurm-job.sh
index eb6702cfbe..172e61225e 100755
--- a/.github/scripts/submit-slurm-job.sh
+++ b/.github/scripts/submit-slurm-job.sh
@@ -184,6 +184,7 @@ job_device="$device"
 job_interface="$interface"
 job_shard="$shard"
 job_cluster="$cluster"
+export GITHUB_EVENT_NAME="$GITHUB_EVENT_NAME"
 
 . ./mfc.sh load -c $compiler_flag -m $module_mode
 
diff --git a/.github/workflows/common/rebuild-cache.sh b/.github/workflows/common/rebuild-cache.sh
new file mode 100755
index 0000000000..4ef2a09522
--- /dev/null
+++ b/.github/workflows/common/rebuild-cache.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -e
+
+# Number of parallel jobs: use SLURM allocation or default to 24.
+# Cap at 64 to avoid overwhelming OpenMPI daemons and OS process limits with concurrent launches.
+NJOBS="${SLURM_CPUS_ON_NODE:-24}"
+if [ "$NJOBS" -gt 64 ]; then NJOBS=64; fi
+
+# Clean stale build artifacts: the self-hosted runner may have a cached
+# GPU build (e.g. --gpu mp) whose CMake flags are incompatible with gcov.
+./mfc.sh clean
+
+# Source retry_build() for NFS stale file handle resilience (3 attempts).
+source .github/scripts/retry-build.sh
+
+# Build MFC with gcov coverage instrumentation (CPU-only, gfortran).
+retry_build ./mfc.sh build --gcov -j 8
+
+# Run all tests in parallel, collecting per-test coverage data.
+# Each test gets an isolated GCOV_PREFIX directory so .gcda files
+# don't collide. Coverage is collected per-test after all tests finish.
+# --gcov is required so the internal build step preserves instrumentation.
+./mfc.sh test --build-coverage-cache --gcov -j "$NJOBS"
diff --git a/.github/workflows/common/test.sh b/.github/workflows/common/test.sh
index 746c54f5d1..032056eb0c 100644
--- a/.github/workflows/common/test.sh
+++ b/.github/workflows/common/test.sh
@@ -68,4 +68,10 @@ if [ -n "${job_shard:-}" ]; then
     shard_opts="--shard $job_shard"
 fi
 
-./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $rdma_opts $device_opts $build_opts $shard_opts -- -c $job_cluster
+# Only prune tests on PRs; master pushes must run the full suite.
+prune_flag=""
+if [ "${GITHUB_EVENT_NAME:-}" = "pull_request" ]; then
+    prune_flag="--only-changes"
+fi
+
+./mfc.sh test -v --max-attempts 3 $prune_flag -a -j $n_test_threads $rdma_opts $device_opts $build_opts $shard_opts -- -c $job_cluster
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a52a5967d1..ad9d0ac7f1 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -56,8 +56,10 @@ jobs:
   file-changes:
     name: Detect File Changes
     runs-on: 'ubuntu-latest'
-    outputs: 
+    outputs:
       checkall: ${{ steps.changes.outputs.checkall }}
+      cases_py: ${{ steps.changes.outputs.cases_py }}
+      dep_changed: ${{ steps.dep-check.outputs.dep_changed }}
     steps:
       - name: Clone
         uses: actions/checkout@v4
@@ -65,13 +67,107 @@ jobs:
       - name: Detect Changes
         uses: dorny/paths-filter@v3
         id: changes
-        with: 
+        with:
           filters: ".github/file-filter.yml"
 
+      - name: Check for Fortran dependency changes
+        id: dep-check
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          # Detect added/removed use/include statements that change the
+          # Fortran dependency graph, which would make the coverage cache stale.
+          PR_NUMBER="${{ github.event.pull_request.number }}"
+          BEFORE="${{ github.event.before }}"
+          AFTER="${{ github.event.after }}"
+          if [ "${{ github.event_name }}" = "pull_request" ]; then
+            # Default to dep_changed=true if gh pr diff fails (safe fallback).
+            DIFF=$(gh pr diff "$PR_NUMBER" 2>/dev/null) || {
+              echo "gh pr diff failed — defaulting to dep_changed=true for safety."
+              echo "dep_changed=true" >> "$GITHUB_OUTPUT"
+              exit 0
+            }
+          elif [ "${{ github.event_name }}" = "push" ]; then
+            DIFF=$(git diff "$BEFORE".."$AFTER" 2>/dev/null) || {
+              echo "git diff failed for push event — defaulting to dep_changed=true for safety."
+              echo "dep_changed=true" >> "$GITHUB_OUTPUT"
+              exit 0
+            }
+          else
+            DIFF=""
+          fi
+          if echo "$DIFF" | \
+             grep -qP '^[+-]\s*(use[\s,]+\w|#:include\s|include\s+['"'"'"])'; then
+            echo "dep_changed=true" >> "$GITHUB_OUTPUT"
+            echo "Fortran dependency change detected — will rebuild coverage cache."
+          else
+            echo "dep_changed=false" >> "$GITHUB_OUTPUT"
+          fi
+
+  rebuild-cache:
+    name: Rebuild Coverage Cache
+    needs: [lint-gate, file-changes]
+    if: >-
+      github.repository == 'MFlowCode/MFC' &&
+      (
+        (github.event_name == 'pull_request' &&
+         (needs.file-changes.outputs.cases_py == 'true' ||
+          needs.file-changes.outputs.dep_changed == 'true')) ||
+        (github.event_name == 'push' &&
+         (needs.file-changes.outputs.cases_py == 'true' ||
+          needs.file-changes.outputs.dep_changed == 'true')) ||
+        github.event_name == 'workflow_dispatch'
+      )
+    timeout-minutes: 240
+    runs-on:
+      group:  phoenix
+      labels: gt
+    permissions:
+      contents: write   # Required for Commit Cache to Master on push events
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+          clean: false
+
+      - name: Rebuild Cache via SLURM
+        run: bash .github/scripts/submit-slurm-job.sh .github/workflows/common/rebuild-cache.sh cpu none phoenix
+
+      - name: Print Logs
+        if:   always()
+        run:  cat rebuild-cache-cpu-none.out
+
+      - name: Upload Cache Artifact
+        if: github.event_name == 'pull_request'
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-cache
+          path: toolchain/mfc/test/test_coverage_cache.json.gz
+          retention-days: 1
+
+      - name: Commit Cache to Master
+        if: (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/master'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add toolchain/mfc/test/test_coverage_cache.json.gz
+          if git diff --cached --quiet; then
+            echo "Coverage cache unchanged."
+          else
+            git commit -m "Regenerate gcov coverage cache [skip ci]"
+            git push origin HEAD:refs/heads/master
+          fi
+
   github:
     name: Github
-    if: needs.file-changes.outputs.checkall == 'true'
-    needs: [lint-gate, file-changes]
+    needs: [lint-gate, file-changes, rebuild-cache]
+    if: >-
+      always() &&
+      needs.lint-gate.result == 'success' &&
+      needs.file-changes.result == 'success' &&
+      needs.rebuild-cache.result != 'cancelled' &&
+      needs.file-changes.outputs.checkall == 'true'
     strategy:
       matrix:
         os:    ['ubuntu', 'macos']
@@ -98,6 +194,20 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
 
+      - name: Fetch master for coverage diff
+        run: |
+          git fetch origin master:master --depth=1
+          git fetch --deepen=200
+        continue-on-error: true
+
+      - name: Download Coverage Cache
+        if: needs.rebuild-cache.result == 'success'
+        uses: actions/download-artifact@v4
+        with:
+          name: coverage-cache
+          path: toolchain/mfc/test
+        continue-on-error: true
+
       - name: Setup MacOS
         if:   matrix.os == 'macos'
         run:  |
@@ -140,15 +250,23 @@ jobs:
 
       - name: Test
         run:  |
-          /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT
+          /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $ONLY_CHANGES $TEST_ALL $TEST_PCT
         env:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
           TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }}
+          ONLY_CHANGES: ${{ github.event_name == 'pull_request' && '--only-changes' || '' }}
 
   self:
     name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"
-    if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true
-    needs: [lint-gate, file-changes]
+    needs: [lint-gate, file-changes, rebuild-cache]
+    if: >-
+      always() &&
+      needs.lint-gate.result == 'success' &&
+      needs.file-changes.result == 'success' &&
+      needs.rebuild-cache.result != 'cancelled' &&
+      github.repository == 'MFlowCode/MFC' &&
+      needs.file-changes.outputs.checkall == 'true' &&
+      github.event.pull_request.draft != true
     # Frontier CCE compiler is periodically broken by toolchain updates (e.g.
     # cpe/25.03 introduced an IPA SIGSEGV in CCE 19.0.0). Allow Frontier to
     # fail without blocking PR merges; Phoenix remains a hard gate.
@@ -234,6 +352,14 @@ jobs:
           # submit-slurm-job.sh can detect and cancel stale SLURM jobs on retry.
           clean: false
 
+      - name: Download Coverage Cache
+        if: needs.rebuild-cache.result == 'success'
+        uses: actions/download-artifact@v4
+        with:
+          name: coverage-cache
+          path: toolchain/mfc/test
+        continue-on-error: true
+
       - name: Build (login node)
         if:   matrix.cluster != 'phoenix'
         timeout-minutes: 60
diff --git a/.gitignore b/.gitignore
index f831e1fcfe..664b6c3083 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,9 @@ __pycache__
 # Auto-generated version file
 toolchain/mfc/_version.py
 
+# Raw coverage cache — legacy, not tracked (the .json.gz version IS committed)
+toolchain/mfc/test/test_coverage_cache.json
+
 # Auto-generated toolchain files (regenerate with: ./mfc.sh generate)
 toolchain/completions/mfc.bash
 toolchain/completions/_mfc
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 15c55af8df..23ae42c7e5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -113,6 +113,8 @@ endif()
 # debug builds. These include optimization and debug flags, as well as some that
 # are required for a successful build of MFC.
 
+set(FYPP_GCOV_OPTS "")
+
 if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
     add_compile_options(
         $<$<COMPILE_LANGUAGE:Fortran>:-ffree-line-length-none>
@@ -131,13 +133,20 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
         add_compile_options(
             $<$<COMPILE_LANGUAGE:Fortran>:-fprofile-arcs>
             $<$<COMPILE_LANGUAGE:Fortran>:-ftest-coverage>
-            $<$<COMPILE_LANGUAGE:Fortran>:-O1>
-	    )
+        )
 
         add_link_options(
             $<$<COMPILE_LANGUAGE:Fortran>:-lgcov>
             $<$<COMPILE_LANGUAGE:Fortran>:--coverage>
         )
+
+        # Override Release -O3 with -O1 for gcov: coverage instrumentation is
+        # inaccurate at -O3, and aggressive codegen (e.g. AVX-512 FP16 on
+        # Granite Rapids) can emit instructions that older assemblers reject.
+        set(CMAKE_Fortran_FLAGS_RELEASE "-O1 -DNDEBUG" CACHE STRING "" FORCE)
+
+        # Use gfortran5 line markers so gcov can map coverage to .fpp sources.
+        set(FYPP_GCOV_OPTS "--line-marker-format=gfortran5")
     endif()
 
     if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -245,8 +254,11 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
         endif()
     endif()
 
-    # Enable LTO/IPO if supported
-    if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
+    # Enable LTO/IPO if supported (skip for gcov — LTO interferes with coverage
+    # instrumentation and can trigger assembler errors on newer architectures).
+    if (MFC_GCov)
+        message(STATUS "LTO/IPO disabled for gcov build")
+    elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
         if (MFC_Unified)
             message(STATUS "LTO/IPO is not available with NVHPC using Unified Memory")
         elseif (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER "24.11" AND CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "25.9")
@@ -392,6 +404,7 @@ macro(HANDLE_SOURCES target useCommon)
                                  --no-folding
 								 --line-length=999
 		 						 --line-numbering-mode=nocontlines
+                                 ${FYPP_GCOV_OPTS}
                                  "${fpp}" "${f90}"
             DEPENDS  "${fpp};${${target}_incs}"
             COMMENT  "Preprocessing (Fypp) ${fpp_filename}"
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index d4b34df3d8..73eb02c2cf 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -458,6 +458,27 @@
             type=str,
             default=None,
         ),
+        Argument(
+            name="build-coverage-cache",
+            help="Run all tests with gcov instrumentation to build the file-level coverage cache. Pass --gcov to enable coverage instrumentation in the internal build step.",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="build_coverage_cache",
+        ),
+        Argument(
+            name="only-changes",
+            help="Only run tests whose covered files overlap with files changed since branching from master (uses file-level gcov coverage cache).",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="only_changes",
+        ),
+        Argument(
+            name="changes-branch",
+            help="Branch to compare against for --only-changes (default: master).",
+            type=str,
+            default="master",
+            dest="changes_branch",
+        ),
     ],
     mutually_exclusive=[
         MutuallyExclusiveGroup(arguments=[
@@ -488,6 +509,8 @@
         Example("./mfc.sh test -j 4", "Run with 4 parallel jobs"),
         Example("./mfc.sh test --only 3D", "Run only 3D tests"),
         Example("./mfc.sh test --generate", "Regenerate golden files"),
+        Example("./mfc.sh test --only-changes -j 4", "Run tests affected by changed files"),
+        Example("./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache", "One-time: build file-coverage cache"),
     ],
     key_options=[
         ("-j, --jobs N", "Number of parallel test jobs"),
@@ -495,6 +518,8 @@
         ("-f, --from UUID", "Start from specific test"),
         ("--generate", "Generate/update golden files"),
         ("--no-build", "Skip rebuilding MFC"),
+        ("--build-coverage-cache", "Build file-level gcov coverage cache (one-time)"),
+        ("--only-changes", "Run tests affected by changed files (requires cache)"),
     ],
 )
 
diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index c5ffdd301a..9d27e9df62 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -1,4 +1,4 @@
-import os, glob, hashlib, binascii, subprocess, itertools, dataclasses, shutil
+import os, json, glob, hashlib, binascii, subprocess, itertools, dataclasses, shutil
 
 from typing import List, Set, Union, Callable, Optional
 
@@ -7,6 +7,44 @@
 from ..run   import input
 from ..build import MFCTarget, get_target
 
+# Parameters that enable simulation output writing for post_process.
+# When post_process is a target, simulation must write field data so
+# post_process has something to read.  Used in the generated case.py
+# template and by the coverage cache builder.
+POST_PROCESS_OUTPUT_PARAMS = {
+    'parallel_io':  'T', 'cons_vars_wrt':   'T',
+    'prim_vars_wrt': 'T', 'alpha_rho_wrt(1)': 'T',
+    'rho_wrt':      'T', 'mom_wrt(1)':      'T',
+    'vel_wrt(1)':   'T', 'E_wrt':           'T',
+    'pres_wrt':     'T', 'alpha_wrt(1)':    'T',
+    'gamma_wrt':    'T', 'heat_ratio_wrt':  'T',
+    'pi_inf_wrt':   'T', 'pres_inf_wrt':    'T',
+    'c_wrt':        'T',
+}
+
+# Additional output parameters for 3D cases (p != 0).
+POST_PROCESS_3D_PARAMS = {
+    'fd_order':     1,
+    'omega_wrt(1)': 'T',
+    'omega_wrt(2)': 'T',
+    'omega_wrt(3)': 'T',
+}
+
+# Parameters set when post_process is NOT a target.
+POST_PROCESS_OFF_PARAMS = {
+    'parallel_io':   'F',
+    'prim_vars_wrt': 'F',
+}
+
+
+def get_post_process_mods(case_params: dict) -> dict:
+    """Return parameter modifications needed when post_process is a target."""
+    mods = dict(POST_PROCESS_OUTPUT_PARAMS)
+    if int(case_params.get('p', 0)) != 0:
+        mods.update(POST_PROCESS_3D_PARAMS)
+    return mods
+
+
 Tend = 0.25
 Nt   = 50
 mydt = 0.0005
@@ -204,25 +242,11 @@ def create_directory(self):
 mods = {{}}
 
 if "post_process" in ARGS["mfc"]["targets"]:
-    mods = {{
-        'parallel_io'  : 'T', 'cons_vars_wrt'   : 'T',
-        'prim_vars_wrt': 'T', 'alpha_rho_wrt(1)': 'T',
-        'rho_wrt'      : 'T', 'mom_wrt(1)'      : 'T',
-        'vel_wrt(1)'   : 'T', 'E_wrt'           : 'T',
-        'pres_wrt'     : 'T', 'alpha_wrt(1)'    : 'T',
-        'gamma_wrt'    : 'T', 'heat_ratio_wrt'  : 'T',
-        'pi_inf_wrt'   : 'T', 'pres_inf_wrt'    : 'T',
-        'c_wrt'        : 'T',
-    }}
-
+    mods = {json.dumps(POST_PROCESS_OUTPUT_PARAMS)}
     if case['p'] != 0:
-        mods['fd_order']  = 1
-        mods['omega_wrt(1)'] = 'T'
-        mods['omega_wrt(2)'] = 'T'
-        mods['omega_wrt(3)'] = 'T'
+        mods.update({json.dumps(POST_PROCESS_3D_PARAMS)})
 else:
-    mods['parallel_io']   = 'F'
-    mods['prim_vars_wrt'] = 'F'
+    mods = {json.dumps(POST_PROCESS_OFF_PARAMS)}
 
 print(json.dumps({{**case, **mods}}))
 """)
diff --git a/toolchain/mfc/test/cases.py b/toolchain/mfc/test/cases.py
index 7835981151..4c385f3b31 100644
--- a/toolchain/mfc/test/cases.py
+++ b/toolchain/mfc/test/cases.py
@@ -1071,7 +1071,7 @@ def foreach_example():
                            "2D_forward_facing_step",
                            "1D_convergence",
                            "3D_IGR_33jet", "1D_multispecies_diffusion",
-                           "2D_ibm_stl_MFCCharacter"]
+                           "2D_ibm_stl_MFCCharacter", "1D_qbmm"]
             if path in casesToSkip:
                 continue
             name = f"{path.split('_')[0]} -> Example -> {'_'.join(path.split('_')[1:])}"
diff --git a/toolchain/mfc/test/coverage.py b/toolchain/mfc/test/coverage.py
new file mode 100644
index 0000000000..eedd2bcc36
--- /dev/null
+++ b/toolchain/mfc/test/coverage.py
@@ -0,0 +1,811 @@
+"""
+File-level gcov coverage-based test pruning for MFC.
+
+Build MFC once with gfortran --coverage, run all tests individually, record
+which .fpp files each test executes, and cache that mapping.
+
+When files change on a PR, intersect the changed .fpp files against each test's
+covered file set. Only tests that touch at least one changed file run.
+
+Workflow:
+    ./mfc.sh test --build-coverage-cache --gcov -j 8  # one-time: populate the cache
+    ./mfc.sh test --only-changes -j 8                 # fast: run only affected tests
+"""
+
+import io
+import os
+import re
+import json
+import gzip
+import shutil
+import hashlib
+import tempfile
+import subprocess
+import datetime
+from pathlib import Path
+from typing import Optional
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from ..printer import cons
+from .. import common
+from ..common import MFCException
+from ..build import PRE_PROCESS, SIMULATION, POST_PROCESS
+from .case import (input_bubbles_lagrange, get_post_process_mods,
+                    POST_PROCESS_3D_PARAMS)
+
+
+COVERAGE_CACHE_PATH = Path(common.MFC_ROOT_DIR) / "toolchain/mfc/test/test_coverage_cache.json.gz"
+
+# Changes to these files trigger the full test suite.
+# CPU coverage cannot tell us about GPU directive changes (macro files), and
+# toolchain files define or change the set of tests themselves.
+ALWAYS_RUN_ALL = frozenset([
+    "src/common/include/parallel_macros.fpp",
+    "src/common/include/acc_macros.fpp",
+    "src/common/include/omp_macros.fpp",
+    "src/common/include/shared_parallel_macros.fpp",
+    "src/common/include/macros.fpp",
+    "src/common/include/case.fpp",
+    "toolchain/mfc/test/case.py",
+    "toolchain/mfc/test/cases.py",
+    "toolchain/mfc/test/coverage.py",
+    "toolchain/mfc/params/definitions.py",
+    "toolchain/mfc/run/input.py",
+    "toolchain/mfc/case_validator.py",
+])
+
+# Directory prefixes: any changed file under these paths triggers full suite.
+# Note: src/simulation/include/ (.fpp files like inline_riemann.fpp) is NOT
+# listed here — Fypp line markers (--line-marker-format=gfortran5) correctly
+# attribute included file paths, so gcov coverage tracks them accurately.
+ALWAYS_RUN_ALL_PREFIXES = (
+    "toolchain/cmake/",
+)
+
+
+def _get_gcov_version(gcov_binary: str) -> str:
+    """Return the version string from gcov --version."""
+    try:
+        result = subprocess.run(
+            [gcov_binary, "--version"],
+            capture_output=True, text=True, timeout=10, check=False
+        )
+        for line in result.stdout.splitlines():
+            if line.strip():
+                return line.strip()
+    except Exception:
+        pass
+    return "unknown"
+
+
+def find_gcov_binary() -> str:
+    """
+    Find a GNU gcov binary compatible with the system gfortran.
+
+    On macOS with Homebrew GCC, the binary is gcov-{major} (e.g. gcov-15).
+    On Linux with system GCC, plain gcov is usually correct.
+    Apple LLVM's /usr/bin/gcov is incompatible with gfortran .gcda files.
+    """
+    # Determine gfortran major version
+    major = None
+    try:
+        result = subprocess.run(
+            ["gfortran", "--version"],
+            capture_output=True, text=True, timeout=10, check=False
+        )
+        m = re.search(r'(\d+)\.\d+\.\d+', result.stdout)
+        if m:
+            major = m.group(1)
+    except Exception:
+        pass
+
+    # Try versioned binary first (Homebrew macOS), then plain gcov
+    candidates = []
+    if major:
+        candidates.append(f"gcov-{major}")
+    candidates.append("gcov")
+
+    for candidate in candidates:
+        path = shutil.which(candidate)
+        if path is None:
+            continue
+        try:
+            result = subprocess.run(
+                [path, "--version"],
+                capture_output=True, text=True, timeout=10, check=False
+            )
+            version_out = result.stdout
+            if "Apple LLVM" in version_out or "Apple clang" in version_out:
+                continue  # Apple's gcov cannot parse GCC-generated .gcda files
+            if "GCC" in version_out or "GNU" in version_out:
+                return path
+        except Exception:
+            continue
+
+    raise MFCException(
+        "GNU gcov not found. gcov is required for the coverage cache.\n"
+        "  On macOS (Homebrew):  brew install gcc\n"
+        "  On Linux (Debian/Ubuntu): apt install gcc\n"
+        "  On Linux (RHEL/CentOS):  yum install gcc\n"
+        "Apple's /usr/bin/gcov is incompatible with gfortran .gcda files."
+    )
+
+
+def find_gcno_files(root_dir: str) -> list:
+    """
+    Walk build/ and return all .gcno files (excluding venv paths).
+    Raises if none found (indicates build was not done with --gcov).
+    """
+    build_dir = Path(root_dir) / "build"
+    gcno_files = [
+        p for p in build_dir.rglob("*.gcno")
+        if "venv" not in p.parts
+    ]
+    if not gcno_files:
+        raise MFCException(
+            "No .gcno files found. Build with --gcov instrumentation first:\n"
+            "  ./mfc.sh build --gcov -j 8"
+        )
+    return gcno_files
+
+
+
+def _parse_gcov_json_output(raw_bytes: bytes, root_dir: str) -> set:
+    """
+    Parse gcov JSON output and return the set of .fpp file paths with coverage.
+    Handles both gzip-compressed (gcov 13+) and raw JSON (gcov 12) formats.
+    Handles concatenated JSON objects from batched gcov calls (multiple .gcno
+    files passed to a single gcov invocation).
+    Only .fpp files with at least one executed line are included.
+    """
+    try:
+        text = gzip.decompress(raw_bytes).decode("utf-8", errors="replace")
+    except (gzip.BadGzipFile, OSError):
+        try:
+            text = raw_bytes.decode("utf-8", errors="replace")
+        except (UnicodeDecodeError, ValueError):
+            cons.print("[yellow]Warning: gcov output is not valid UTF-8 or gzip — "
+                       "no coverage recorded for this test.[/yellow]")
+            return set()
+
+    result = set()
+    real_root = os.path.realpath(root_dir)
+
+    # Parse potentially concatenated JSON objects (one per .gcno file).
+    decoder = json.JSONDecoder()
+    pos = 0
+    while pos < len(text):
+        while pos < len(text) and text[pos] in " \t\n\r":
+            pos += 1
+        if pos >= len(text):
+            break
+        try:
+            data, end_pos = decoder.raw_decode(text, pos)
+            pos = end_pos
+        except json.JSONDecodeError:
+            remaining = len(text) - pos
+            if remaining > 0:
+                cons.print(f"[yellow]Warning: gcov JSON parse error at offset "
+                           f"{pos} ({remaining} bytes remaining) — partial "
+                           f"coverage recorded for this test.[/yellow]")
+            break
+
+        for file_entry in data.get("files", []):
+            file_path = file_entry.get("file", "")
+            if not file_path.endswith(".fpp"):
+                continue
+            if any(line.get("count", 0) > 0 for line in file_entry.get("lines", [])):
+                try:
+                    rel_path = os.path.relpath(os.path.realpath(file_path), real_root)
+                except ValueError:
+                    rel_path = file_path
+                # Only keep src/ paths — build/staging/ artifacts from
+                # case-optimized builds are auto-generated and never
+                # appear in PR diffs.
+                if rel_path.startswith("src/"):
+                    result.add(rel_path)
+
+    return result
+
+
+def _compute_gcov_prefix_strip(root_dir: str) -> str:
+    """
+    Compute GCOV_PREFIX_STRIP so .gcda files preserve the build/ tree.
+
+    GCOV_PREFIX_STRIP removes N leading path components from the compile-time
+    absolute .gcda path.  We strip all components of the MFC root directory
+    so the prefix tree starts with ``build/staging/...``.
+    """
+    real_root = os.path.realpath(root_dir)
+    return str(len(Path(real_root).parts) - 1)  # -1 excludes root '/'
+
+
+def _collect_single_test_coverage(  # pylint: disable=too-many-locals
+    uuid: str, test_gcda: str, root_dir: str, gcov_bin: str,
+) -> tuple:
+    """
+    Collect file-level coverage for a single test, fully self-contained.
+
+    Copies .gcno files from the real build tree into the test's isolated
+    .gcda directory (alongside the .gcda files), runs a batched gcov call,
+    then removes the .gcno copies.  Each test has its own directory, so
+    this is safe to call concurrently without touching the shared build tree.
+    """
+    build_subdir = os.path.join(test_gcda, "build")
+    if not os.path.isdir(build_subdir):
+        # No .gcda files produced — test may not have run or GCOV_PREFIX
+        # was misconfigured.  Return empty list; the sanity check at the end
+        # of build_coverage_cache will catch systemic failures.
+        return uuid, []
+
+    gcno_copies = []
+
+    for dirpath, _, filenames in os.walk(build_subdir):
+        for fname in filenames:
+            if not fname.endswith(".gcda"):
+                continue
+            # Derive matching .gcno path in the real build tree
+            gcda_path = os.path.join(dirpath, fname)
+            rel = os.path.relpath(gcda_path, test_gcda)
+            gcno_rel = rel[:-5] + ".gcno"
+            gcno_src = os.path.join(root_dir, gcno_rel)
+            if os.path.isfile(gcno_src):
+                # Copy .gcno alongside .gcda in the test's isolated dir.
+                # Wrap in try/except for NFS TOCTOU races (file may vanish
+                # between isfile() and copy on networked filesystems).
+                gcno_dst = os.path.join(dirpath, fname[:-5] + ".gcno")
+                try:
+                    shutil.copy2(gcno_src, gcno_dst)
+                except OSError:
+                    continue
+                gcno_copies.append(gcno_dst)
+
+    if not gcno_copies:
+        return uuid, []
+
+    # Batch: single gcov call for all .gcno files in this test.
+    # Run from root_dir so source path resolution works correctly.
+    cmd = [gcov_bin, "--json-format", "--stdout"] + gcno_copies
+    try:
+        proc = subprocess.run(
+            cmd, capture_output=True, cwd=root_dir, timeout=120, check=False
+        )
+    except (subprocess.TimeoutExpired, subprocess.SubprocessError, OSError) as exc:
+        cons.print(f"[yellow]Warning: gcov failed for {uuid}: {exc}[/yellow]")
+        return uuid, []
+    finally:
+        for g in gcno_copies:
+            try:
+                os.remove(g)
+            except OSError:
+                pass
+
+    if proc.returncode != 0 or not proc.stdout:
+        if proc.returncode != 0:
+            cons.print(f"[yellow]Warning: gcov exited {proc.returncode} for {uuid}[/yellow]")
+        return uuid, []
+
+    coverage = _parse_gcov_json_output(proc.stdout, root_dir)
+    return uuid, sorted(coverage)
+
+
+def _run_single_test_direct(test_info: dict, gcda_dir: str, strip: str) -> tuple:  # pylint: disable=too-many-locals
+    """
+    Run a single test by invoking Fortran executables directly.
+
+    Bypasses ``./mfc.sh run`` entirely (no Python startup, no Mako template
+    rendering, no shell script generation).  Input files and binary paths are
+    pre-computed by the caller.
+
+    Returns (uuid, test_gcda_path, failures).
+    """
+    uuid = test_info["uuid"]
+    test_dir = test_info["dir"]
+    binaries = test_info["binaries"]  # ordered list of (target_name, bin_path)
+    ppn = test_info["ppn"]
+
+    test_gcda = os.path.join(gcda_dir, uuid)
+    os.makedirs(test_gcda, exist_ok=True)
+
+    env = {**os.environ, "GCOV_PREFIX": test_gcda, "GCOV_PREFIX_STRIP": strip}
+
+    # MPI-compiled binaries must be launched via an MPI launcher (even ppn=1).
+    # Use --bind-to none to avoid binding issues with concurrent launches.
+    if shutil.which("mpirun"):
+        mpi_cmd = ["mpirun", "--bind-to", "none", "-np", str(ppn)]
+    elif shutil.which("srun"):
+        mpi_cmd = ["srun", "--ntasks", str(ppn)]
+    else:
+        raise MFCException(
+            "No MPI launcher found (mpirun or srun). "
+            "MFC binaries require an MPI launcher.\n"
+            "  On Ubuntu: sudo apt install openmpi-bin\n"
+            "  On macOS:  brew install open-mpi"
+        )
+
+    failures = []
+    for target_name, bin_path in binaries:
+        if not os.path.isfile(bin_path):
+            # Record missing binary as a failure and stop: downstream targets
+            # depend on outputs from earlier ones (e.g. simulation needs the
+            # grid from pre_process), so running them without a predecessor
+            # produces misleading init-only gcda files.
+            failures.append((target_name, "missing-binary",
+                             f"binary not found: {bin_path}"))
+            break
+
+        # Verify .inp file exists before running (diagnostic for transient
+        # filesystem issues where the file goes missing between phases).
+        inp_file = os.path.join(test_dir, f"{target_name}.inp")
+        if not os.path.isfile(inp_file):
+            failures.append((target_name, "missing-inp",
+                             f"{inp_file} not found before launch"))
+            break
+
+        cmd = mpi_cmd + [bin_path]
+        try:
+            result = subprocess.run(cmd, check=False, text=True,
+                                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                                    env=env, cwd=test_dir, timeout=600)
+            if result.returncode != 0:
+                # Save last lines of output for debugging.  Stop here: a
+                # failed pre_process/simulation leaves no valid outputs for
+                # the next target, and running it produces spurious coverage.
+                tail = "\n".join(result.stdout.strip().splitlines()[-15:])
+                failures.append((target_name, result.returncode, tail))
+                break
+        except subprocess.TimeoutExpired:
+            failures.append((target_name, "timeout", ""))
+            break
+        except (subprocess.SubprocessError, OSError) as exc:
+            failures.append((target_name, str(exc), ""))
+            break
+
+    return uuid, test_gcda, failures
+
+
+def _prepare_test(case, root_dir: str) -> dict:  # pylint: disable=unused-argument,too-many-locals
+    """
+    Prepare a test for direct execution: create directory, generate .inp
+    files, and resolve binary paths.  All Python/toolchain overhead happens
+    here (single-threaded) so the parallel phase is pure subprocess calls.
+
+    Operates on a shallow copy of case.params to avoid mutating the
+    original case object.
+    """
+    try:
+        case.delete_output()
+        case.create_directory()
+    except OSError as exc:
+        cons.print(f"[yellow]Warning: Failed to prepare test directory for "
+                   f"{case.get_uuid()}: {exc}[/yellow]")
+        raise
+
+    # Lagrange bubble tests need input files generated before running.
+    if case.params.get("bubbles_lagrange", 'F') == 'T':
+        try:
+            input_bubbles_lagrange(case)
+        except Exception as exc:
+            cons.print(f"[yellow]Warning: Failed to generate Lagrange bubble input "
+                       f"for {case.get_uuid()}: {exc}[/yellow]")
+
+    # Work on a copy so we don't permanently mutate the case object.
+    params = dict(case.params)
+
+    # Apply post_process output params so simulation writes data files that
+    # post_process reads.  Mirrors the generated case.py logic that normally
+    # runs via ./mfc.sh run (see POST_PROCESS_OUTPUT_PARAMS in case.py).
+    params.update(get_post_process_mods(params))
+
+    # Run only one timestep: we only need to know which source files are
+    # *touched*, not verify correctness.  A single step exercises the key
+    # code paths across all three executables while preventing heavy 3D tests
+    # from timing out under gcov instrumentation (~10x slowdown).
+    params['t_step_stop'] = 1
+
+    # Adaptive-dt tests: post_process computes n_save = int(t_stop/t_save)+1
+    # and iterates over that many save indices.  But with small t_step_stop
+    # the simulation produces far fewer saves.  Clamp t_stop so post_process
+    # only reads saves that actually exist.
+    if params.get('cfl_adap_dt', 'F') == 'T':
+        t_save = float(params.get('t_save', 1.0))
+        params['t_stop'] = t_save  # n_save = 2: indices 0 and 1
+
+    # Heavy 3D tests: remove vorticity output (omega_wrt + fd_order) for
+    # 3D QBMM tests.  Normal test execution never runs post_process (only
+    # PRE_PROCESS + SIMULATION, never POST_PROCESS), so post_process on
+    # heavy 3D configs is untested.  Vorticity FD computation on large grids
+    # with many QBMM variables causes post_process to crash (exit code 2).
+    if (int(params.get('p', 0)) > 0 and
+            params.get('qbmm', 'F') == 'T'):
+        for key in POST_PROCESS_3D_PARAMS:
+            params.pop(key, None)
+
+    case.params = params
+    test_dir = case.get_dirpath()
+    input_file = case.to_input_file()
+
+    # Write .inp files directly (no subprocess, no Mako templates).
+    # Suppress console output from get_inp() to avoid one message per (test, target) pair.
+    # Run all three executables to capture coverage across the full pipeline
+    # (pre_process: grid/IC generation; simulation: RHS/time-stepper; post_process: field I/O).
+    targets = [PRE_PROCESS, SIMULATION, POST_PROCESS]
+    binaries = []
+    # NOTE: not thread-safe — Phase 1 must remain single-threaded.
+    orig_file = cons.raw.file
+    cons.raw.file = io.StringIO()
+    try:
+        for target in targets:
+            inp_content = case.get_inp(target)
+            common.file_write(os.path.join(test_dir, f"{target.name}.inp"),
+                              inp_content)
+            bin_path = target.get_install_binpath(input_file)
+            binaries.append((target.name, bin_path))
+    finally:
+        cons.raw.file = orig_file
+
+    return {
+        "uuid":     case.get_uuid(),
+        "dir":      test_dir,
+        "binaries": binaries,
+        "ppn":      getattr(case, 'ppn', 1),
+    }
+
+
+def build_coverage_cache(  # pylint: disable=too-many-locals,too-many-statements
+    root_dir: str, cases: list, n_jobs: int = None,
+) -> None:
+    """
+    Build the file-level coverage cache by running tests in parallel.
+
+    Phase 1 — Prepare all tests: generate .inp files and resolve binary paths.
+    This happens single-threaded so the parallel phase has zero Python overhead.
+
+    Phase 2 — Run all tests concurrently.  Each worker invokes Fortran binaries
+    directly (no ``./mfc.sh run``, no shell scripts).  Each test's GCOV_PREFIX
+    points to an isolated directory so .gcda files don't collide.
+
+    Phase 3 — For each test, temporarily copy .gcno files from the real build tree
+    into the test's isolated .gcda directory, run gcov to collect which .fpp files
+    had coverage, then remove the .gcno copies.
+
+    Requires a prior ``--gcov`` build: ``./mfc.sh build --gcov -j 8``
+    """
+    gcov_bin = find_gcov_binary()
+    gcno_files = find_gcno_files(root_dir)
+    strip = _compute_gcov_prefix_strip(root_dir)
+
+    if n_jobs is None:
+        # Caller should pass n_jobs explicitly on SLURM systems;
+        # os.cpu_count() may exceed the SLURM allocation.
+        n_jobs = max(os.cpu_count() or 1, 1)
+    # Cap Phase 2 test parallelism: each test spawns gcov-instrumented MPI
+    # processes (~2-5 GB each under gcov).  Too many concurrent tests cause OOM.
+    # Phase 3 gcov workers run at full n_jobs (gcov is lightweight by comparison).
+    phase2_jobs = min(n_jobs, 16)
+    cons.print(f"[bold]Building coverage cache for {len(cases)} tests "
+               f"({phase2_jobs} test workers, {n_jobs} gcov workers)...[/bold]")
+    cons.print(f"[dim]Using gcov binary: {gcov_bin}[/dim]")
+    cons.print(f"[dim]Found {len(gcno_files)} .gcno files[/dim]")
+    cons.print(f"[dim]GCOV_PREFIX_STRIP={strip}[/dim]")
+    cons.print()
+
+    # Phase 1: Prepare all tests (single-threaded; scales linearly with test count).
+    cons.print("[bold]Phase 1/3: Preparing tests...[/bold]")
+    test_infos = []
+    for i, case in enumerate(cases):
+        try:
+            test_infos.append(_prepare_test(case, root_dir))
+        except Exception as exc:  # pylint: disable=broad-except
+            cons.print(f"  [yellow]Warning: skipping {case.get_uuid()} — prep failed: {exc}[/yellow]")
+        if (i + 1) % 100 == 0 or (i + 1) == len(cases):
+            cons.print(f"  [{i+1:3d}/{len(cases):3d}] prepared")
+    cons.print()
+
+    gcda_dir = tempfile.mkdtemp(prefix="mfc_gcov_")
+    try:
+        # Phase 2: Run all tests in parallel via direct binary invocation.
+        cons.print("[bold]Phase 2/3: Running tests...[/bold]")
+        test_results: dict = {}
+        all_failures: dict = {}
+        with ThreadPoolExecutor(max_workers=phase2_jobs) as pool:
+            futures = {
+                pool.submit(_run_single_test_direct, info, gcda_dir, strip): info
+                for info in test_infos
+            }
+            for i, future in enumerate(as_completed(futures)):
+                try:
+                    uuid, test_gcda, failures = future.result()
+                except Exception as exc:  # pylint: disable=broad-except
+                    info = futures[future]
+                    cons.print(f"  [yellow]Warning: {info['uuid']} failed to run: {exc}[/yellow]")
+                    continue
+                test_results[uuid] = test_gcda
+                if failures:
+                    all_failures[uuid] = failures
+                if (i + 1) % 50 == 0 or (i + 1) == len(test_infos):
+                    cons.print(f"  [{i+1:3d}/{len(test_infos):3d}] tests completed")
+
+        if all_failures:
+            cons.print()
+            cons.print(f"[bold yellow]Warning: {len(all_failures)} tests had target failures:[/bold yellow]")
+            for uuid, fails in sorted(all_failures.items()):
+                fail_str = ", ".join(f"{t}={rc}" for t, rc, _ in fails)
+                cons.print(f"  [yellow]{uuid}[/yellow]: {fail_str}")
+                for target_name, _rc, tail in fails:
+                    if tail:
+                        cons.print(f"    {target_name} output (last 15 lines):")
+                        for line in tail.splitlines():
+                            cons.print(f"      {line}")
+
+        # Diagnostic: verify .gcda files exist for at least one test.
+        sample_uuid = next(iter(test_results), None)
+        if sample_uuid:
+            sample_gcda = test_results[sample_uuid]
+            sample_build = os.path.join(sample_gcda, "build")
+            if os.path.isdir(sample_build):
+                gcda_count = sum(
+                    1 for _, _, fns in os.walk(sample_build)
+                    for f in fns if f.endswith(".gcda")
+                )
+                cons.print(f"[dim]Sample test {sample_uuid}: "
+                           f"{gcda_count} .gcda files in {sample_build}[/dim]")
+            else:
+                cons.print(f"[yellow]Sample test {sample_uuid}: "
+                           f"no build/ dir in {sample_gcda}[/yellow]")
+
+        # Phase 3: Collect gcov coverage from each test's isolated .gcda directory.
+        # .gcno files are temporarily copied alongside .gcda files, then removed.
+        cons.print()
+        cons.print("[bold]Phase 3/3: Collecting coverage...[/bold]")
+        cache: dict = {}
+        completed = 0
+        with ThreadPoolExecutor(max_workers=n_jobs) as pool:
+            futures = {
+                pool.submit(
+                    _collect_single_test_coverage,
+                    uuid, test_gcda, root_dir, gcov_bin,
+                ): uuid
+                for uuid, test_gcda in test_results.items()
+            }
+            for future in as_completed(futures):
+                try:
+                    uuid, coverage = future.result()
+                except Exception as exc:  # pylint: disable=broad-except
+                    uuid = futures[future]
+                    cons.print(f"  [yellow]Warning: {uuid} coverage failed: {exc}[/yellow]")
+                    coverage = []
+                cache[uuid] = coverage
+                completed += 1
+                if completed % 50 == 0 or completed == len(test_results):
+                    cons.print(f"  [{completed:3d}/{len(test_results):3d}] tests processed")
+    finally:
+        try:
+            shutil.rmtree(gcda_dir)
+        except OSError as exc:
+            cons.print(f"[yellow]Warning: Failed to clean up temp directory "
+                       f"{gcda_dir}: {exc}[/yellow]")
+
+    # Sanity check: at least some tests should have non-empty coverage.
+    tests_with_coverage = sum(1 for v in cache.values() if v)
+    if tests_with_coverage == 0:
+        raise MFCException(
+            "Coverage cache build produced zero coverage for all tests. "
+            "Check that the build was done with --gcov and gcov is working correctly."
+        )
+    if tests_with_coverage < len(cases) // 2:
+        cons.print(f"[bold yellow]Warning: Only {tests_with_coverage}/{len(cases)} tests "
+                   f"have coverage data. Cache may be incomplete.[/bold yellow]")
+
+    cases_py_path = Path(root_dir) / "toolchain/mfc/test/cases.py"
+    try:
+        cases_hash = hashlib.sha256(cases_py_path.read_bytes()).hexdigest()
+    except OSError as exc:
+        raise MFCException(
+            f"Failed to read {cases_py_path} for cache metadata: {exc}"
+        ) from exc
+    gcov_version = _get_gcov_version(gcov_bin)
+
+    cache["_meta"] = {
+        "created": datetime.datetime.now(datetime.timezone.utc).isoformat(),
+        "cases_hash": cases_hash,
+        "gcov_version": gcov_version,
+    }
+
+    try:
+        with gzip.open(COVERAGE_CACHE_PATH, "wt", encoding="utf-8") as f:
+            json.dump(cache, f, indent=2)
+    except OSError as exc:
+        raise MFCException(
+            f"Failed to write coverage cache to {COVERAGE_CACHE_PATH}: {exc}\n"
+            "Check disk space and filesystem permissions."
+        ) from exc
+
+    cons.print()
+    cons.print(f"[bold green]Coverage cache written to {COVERAGE_CACHE_PATH}[/bold green]")
+    cons.print(f"[dim]Cache has {len(cases)} test entries.[/dim]")
+
+    # Clean up test output directories from Phase 1/2 (grid files, restart files,
+    # silo output, etc.).  These live on NFS scratch and can total several GB for
+    # the full test suite.  Leaving them behind creates I/O pressure for subsequent
+    # test jobs that share the same scratch filesystem.
+    cons.print("[dim]Cleaning up test output directories...[/dim]")
+    for case in cases:
+        try:
+            case.delete_output()
+        except OSError:
+            pass  # Best-effort; NFS errors are non-fatal here
+
+
+def _normalize_cache(cache: dict) -> dict:
+    """Convert old line-level cache format to file-level if needed.
+
+    Old format: {uuid: {file: [lines], ...}, ...}
+    New format: {uuid: [file, ...], ...}
+    """
+    result = {}
+    for k, v in cache.items():
+        if k == "_meta":
+            result[k] = v
+        elif isinstance(v, dict):
+            result[k] = sorted(v.keys())
+        elif isinstance(v, list):
+            result[k] = v
+        else:
+            cons.print(f"[yellow]Warning: unexpected cache value type for {k}: "
+                       f"{type(v).__name__} — treating as empty.[/yellow]")
+            result[k] = []
+    return result
+
+
+def load_coverage_cache(root_dir: str) -> Optional[dict]:
+    """
+    Load the coverage cache, returning None if missing or stale.
+
+    Staleness is detected by comparing the SHA256 of cases.py at cache-build time
+    against the current cases.py. Auto-converts old line-level format if needed.
+    """
+    if not COVERAGE_CACHE_PATH.exists():
+        return None
+
+    try:
+        with gzip.open(COVERAGE_CACHE_PATH, "rt", encoding="utf-8") as f:
+            cache = json.load(f)
+    except (OSError, gzip.BadGzipFile, json.JSONDecodeError, UnicodeDecodeError) as exc:
+        cons.print(f"[yellow]Warning: Coverage cache is unreadable or corrupt: {exc}[/yellow]")
+        return None
+
+    if not isinstance(cache, dict):
+        cons.print("[yellow]Warning: Coverage cache has unexpected format.[/yellow]")
+        return None
+
+    cases_py = Path(root_dir) / "toolchain/mfc/test/cases.py"
+    try:
+        current_hash = hashlib.sha256(cases_py.read_bytes()).hexdigest()
+    except FileNotFoundError:
+        cons.print("[yellow]Warning: cases.py not found; cannot verify cache staleness.[/yellow]")
+        return None
+    stored_hash = cache.get("_meta", {}).get("cases_hash", "")
+
+    if current_hash != stored_hash:
+        cons.print("[yellow]Warning: Coverage cache is stale (cases.py changed).[/yellow]")
+        return None
+
+    return _normalize_cache(cache)
+
+
+def _parse_diff_files(diff_text: str) -> set:
+    """
+    Parse ``git diff --name-only`` output and return the set of changed file paths.
+    """
+    return {f for f in diff_text.strip().splitlines() if f}
+
+
+def get_changed_files(root_dir: str, compare_branch: str = "master") -> Optional[set]:
+    """
+    Return the set of files changed in this branch relative to the merge-base
+    with compare_branch, or None on git failure.
+
+    Uses merge-base (not master tip) so that unrelated master advances don't
+    appear as "your changes."
+    """
+    try:
+        # Try local branch first, then origin/ remote ref (CI shallow clones).
+        for ref in [compare_branch, f"origin/{compare_branch}"]:
+            merge_base_result = subprocess.run(
+                ["git", "merge-base", ref, "HEAD"],
+                capture_output=True, text=True, cwd=root_dir, timeout=30, check=False
+            )
+            if merge_base_result.returncode == 0:
+                break
+        else:
+            return None
+        merge_base = merge_base_result.stdout.strip()
+        if not merge_base:
+            return None
+
+        diff_result = subprocess.run(
+            ["git", "diff", merge_base, "HEAD", "--name-only", "--no-color"],
+            capture_output=True, text=True, cwd=root_dir, timeout=30, check=False
+        )
+        if diff_result.returncode != 0:
+            return None
+
+        return _parse_diff_files(diff_result.stdout)
+    except (subprocess.TimeoutExpired, OSError):
+        return None
+
+
+def should_run_all_tests(changed_files: set) -> bool:
+    """
+    Return True if any changed file is in ALWAYS_RUN_ALL or under
+    ALWAYS_RUN_ALL_PREFIXES.
+
+    GPU macro files, Fypp includes, and build system files cannot be
+    correctly analyzed by CPU coverage — changes to them must always
+    trigger the full test suite.
+    """
+    if changed_files & ALWAYS_RUN_ALL:
+        return True
+    return any(f.startswith(ALWAYS_RUN_ALL_PREFIXES) for f in changed_files)
+
+
+def filter_tests_by_coverage(
+    cases: list, coverage_cache: dict, changed_files: set
+) -> tuple:
+    """
+    Filter test cases to only those whose covered files overlap with changed files.
+
+    Returns (cases_to_run, skipped_cases).
+
+    Conservative behavior:
+    - Test not in cache (newly added) -> include it
+    - No changed .fpp files -> skip all tests (this branch is unreachable from
+      test.py, which handles the no-changed-fpp case before calling this function;
+      retained as a safe fallback for direct callers)
+    - Test has incomplete coverage (no simulation files recorded but simulation
+      files changed) -> include it (cache build likely failed for this test)
+    """
+    changed_fpp = {f for f in changed_files if f.endswith(".fpp")}
+    if not changed_fpp:
+        return [], list(cases)
+
+    changed_sim = any(f.startswith("src/simulation/") for f in changed_fpp)
+
+    to_run = []
+    skipped = []
+    n_not_in_cache = 0
+    n_no_sim_coverage = 0
+
+    for case in cases:
+        uuid = case.get_uuid()
+        test_files = coverage_cache.get(uuid)
+
+        if test_files is None:
+            # Test not in cache (e.g., newly added) -> conservative: include
+            to_run.append(case)
+            n_not_in_cache += 1
+            continue
+
+        test_file_set = set(test_files)
+
+        # If simulation files changed but this test has no simulation coverage,
+        # include it conservatively — the cache build likely failed for this test.
+        if changed_sim and not any(f.startswith("src/simulation/") for f in test_file_set):
+            to_run.append(case)
+            n_no_sim_coverage += 1
+            continue
+
+        if test_file_set & changed_fpp:
+            to_run.append(case)
+        else:
+            skipped.append(case)
+
+    if n_not_in_cache:
+        cons.print(f"[dim]  {n_not_in_cache} test(s) included conservatively "
+                   f"(not in cache)[/dim]")
+    if n_no_sim_coverage:
+        cons.print(f"[dim]  {n_no_sim_coverage} test(s) included conservatively "
+                   f"(missing sim coverage)[/dim]")
+
+    return to_run, skipped
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 2193e677b4..26e37c669e 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -76,8 +76,8 @@ def is_uuid(term):
     return cases, skipped_cases
 
 
-# pylint: disable=too-many-branches, too-many-statements, trailing-whitespace
-def __filter(cases_) -> typing.List[TestCase]:
+# pylint: disable=too-many-branches,too-many-locals,too-many-statements,trailing-whitespace
+def __filter(cases_) -> typing.Tuple[typing.List[TestCase], typing.List[TestCase]]:
     cases = cases_[:]
     selected_cases = []
     skipped_cases  = []
@@ -108,6 +108,62 @@ def __filter(cases_) -> typing.List[TestCase]:
                 f"Specified: {ARG('only')}. Check that UUIDs/names are valid."
             )
 
+    # --only-changes: filter based on file-level gcov coverage
+    if ARG("only_changes"):
+        from .coverage import (  # pylint: disable=import-outside-toplevel
+            load_coverage_cache, get_changed_files,
+            should_run_all_tests, filter_tests_by_coverage,
+        )
+
+        # Example-based tests cover no unique files beyond non-example tests,
+        # so they add no value to coverage-based pruning. Skip them entirely.
+        example_skipped = [c for c in cases if "Example" in c.trace]
+        cases = [c for c in cases if "Example" not in c.trace]
+        skipped_cases += example_skipped
+        if example_skipped:
+            cons.print(f"[dim]Skipped {len(example_skipped)} example tests "
+                       f"(redundant coverage)[/dim]")
+
+        cache = load_coverage_cache(common.MFC_ROOT_DIR)
+        if cache is None:
+            cons.print("[yellow]Coverage cache missing or stale.[/yellow]")
+            cons.print("[yellow]Run: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache[/yellow]")
+            cons.print("[yellow]Falling back to full test suite.[/yellow]")
+        else:
+            changed_files = get_changed_files(common.MFC_ROOT_DIR, ARG("changes_branch"))
+
+            if changed_files is None:
+                cons.print("[yellow]git diff failed — falling back to full test suite.[/yellow]")
+            elif should_run_all_tests(changed_files):
+                cons.print()
+                cons.print("[bold cyan]Coverage Change Analysis[/bold cyan]")
+                cons.print("-" * 50)
+                cons.print("[yellow]Infrastructure or macro file changed — running full test suite.[/yellow]")
+                cons.print("-" * 50)
+            else:
+                changed_fpp = {f for f in changed_files if f.endswith(".fpp")}
+                if not changed_fpp:
+                    cons.print()
+                    cons.print("[bold cyan]Coverage Change Analysis[/bold cyan]")
+                    cons.print("-" * 50)
+                    cons.print("[green]No .fpp source changes detected — skipping all tests.[/green]")
+                    cons.print("-" * 50)
+                    cons.print()
+                    skipped_cases += cases
+                    cases = []
+                else:
+                    cons.print()
+                    cons.print("[bold cyan]Coverage Change Analysis[/bold cyan]")
+                    cons.print("-" * 50)
+                    for fpp_file in sorted(changed_fpp):
+                        cons.print(f"  [green]*[/green] {fpp_file}")
+
+                    cases, new_skipped = filter_tests_by_coverage(cases, cache, changed_files)
+                    skipped_cases += new_skipped
+                    cons.print(f"\n[bold]Tests to run: {len(cases)} / {len(cases) + len(new_skipped)}[/bold]")
+                    cons.print("-" * 50)
+                    cons.print()
+
     for case in cases[:]:
         if case.ppn > 1 and not ARG("mpi"):
             cases.remove(case)
@@ -176,6 +232,27 @@ def test():
 
         return
 
+    if ARG("build_coverage_cache"):
+        from .coverage import build_coverage_cache  # pylint: disable=import-outside-toplevel
+        # Exclude example-based tests: they cover no unique files beyond
+        # non-example tests, so building coverage for them is wasted work.
+        cases = [c for c in cases if "Example" not in c.trace]
+        all_cases = [b.to_case() for b in cases]
+
+        # Build all unique slugs (Chemistry, case-optimization, etc.) so every
+        # test has a compatible binary when run with --no-build.
+        codes = [PRE_PROCESS, SIMULATION, POST_PROCESS]
+        unique_builds = set()
+        for case, code in itertools.product(all_cases, codes):
+            slug = code.get_slug(case.to_input_file())
+            if slug not in unique_builds:
+                build(code, case.to_input_file())
+                unique_builds.add(slug)
+
+        build_coverage_cache(common.MFC_ROOT_DIR, all_cases,
+                             n_jobs=int(ARG("jobs")))
+        return
+
     cases, skipped_cases = __filter(cases)
     cases = [ _.to_case() for _ in cases ]
     total_test_count = len(cases)
diff --git a/toolchain/mfc/test/test_coverage_cache.json.gz b/toolchain/mfc/test/test_coverage_cache.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0bb928250ae10924246c9c4845c6ecf959abadae
GIT binary patch
literal 8827
zcmYjW2{@GB_n%}-_AQZ+o$PC}D?*kmiJGymW2uaN>^mWQ*2un%2-R34gfJ*$>`V52
z-}%2oegD7bJ?44ddGFkN&gXpYIiLGtiX<e&`BG?$a}Kj~h8deXxmvz9wYD@iH#N7h
z<hOHnas;i6sXM<wziQenpPuF^lHBr;tGeH7U#IR`L~SSdg}LBOvJl82scS;!Mub`H
zoh#uxeHr%y&!d06P(YENxM;m!llAq=c`jm9xHWft?(?Z};>^^`%iG&)J9MmjZ)wx)
zsAbp6%eVG?ZEJ0<dtJ`w^l0T|Z*$Jf`Fv{S{H!DSHl`u1#y54%*nN5o<{~jBcixFP
zv}zpjS=?Q&9lLJ4e>&&$`Cen{-nNLudcpDOTKxQ)<35?S+y3Vf+s5<7WB8eWWOvKl
z%HHPG+U8VnX#Vlo@#1vB`QCO+Lp@Sv;zh=o!<qzopygy$=6qq`eEy`i(Z~HP_Oxa0
zY-(k5?JMV64??0=>ui6$Am(hh+o#dXYkSt|IG(`QJ#EhE$n>mLKzesE?XY9Hw=Vg;
z^!D`ec1L{e>e!fok9uv|Sc~JC=XP&%>q*JT{6wTc!^nGSp>u6n>GAhYkP~6!11iiq
zf~nl+;B)49&i3jj0q?y%@2#!miFz$2kE!{McmiY3WI2xw_4UpIGU>I?2EFx?jbg{A
zGbd-GS_Wy2XD3m|fBKHydVR?DYZpD8ot<kM>pjn#KTDm@sYqq|{<(4X^0q5_-~<KR
zKe=(Z;`HmR<t$Y0i|@&FDEe%E->K3O!j>^f1DT?MG=`(8sCSN51O~cS<;tLZJRaqJ
zG+@9~A)y@Q<Du`$*gb*`+=C6`(Aga&YpuIxDB!WnDY8dyYMVi#euoBhPzXxuUY-ob
zGXde7^go9&7C~R~aTt&wKkx!pjmfUY+zzDSi>+(s1*~Hk^+(@kCNSZb)#XB=OG}^P
z_#au3Vkf2nx8H^af8`TT_CqX0V2#y9pBqXe<6;1N2pBpWsZ7y2N|ko`+dh|E1fqc1
zF7k40l|>X|ayJB@UFyHJ@~_O+8kAjlGE)hv6KvwBmUV0Gr@Gz*dh8jAFmJa^LkNk7
z{$1?+9CbYA<8^#6GD!PCgHPPZ>e~%CV4+J!-i7??!572SM!unH0)M52_lW|yTtc%A
zsaGze>c9q!{IIMIc5q>d>=EVrcbi7r#;LM)5f3$_ofe|Om3NJ-e1oX`G27b-{t1z^
z2t>zKLC{sfLp~(`kO;JFOM$nB(NEwYK;R%6JiV%;SNzKB&gE=F%?g#<RX18{E@KYl
zy~;nY9Vq`7pI9Vqnm2V4`#A6v#t7Qz2Nj5G1?g+p()wMY^Sk1H82iE@e&wz-$OM$%
z;D<0r!zKEh@Bh+knH?ZIgSl4J)}^l%q^(vl*>9}2!ti={n?2mlmf^~q`ni1u1ILSY
zax$fA#{y@>rz_!5#nuJwK4_SZvg)1aUAAeq=>@5(o|T};N};h3#DZGJO}`uEK|ywr
zzji9F2!i6rvB-e+In+CrRQ_diiQltQv?v=3TnUp(xeu%|=`LNf-zBUST~2DM3a@x^
zFC(99F4-6BZn#J$fbt`7jygE3^dA!n5n?>+a4bo%JgGqDo7wSQLIilVwxpI>4$Bh0
zWR=t&Vw>8N^6VffpyF*vu;{^0Mf!lIu(k^FID^v1O`baTI`%N|mg3~?`dy^e2A(n2
z7>#5G^N@O99T~xWi4G>9x&!54>Z&N9^Z~%CI`z4%WXN_TjpHfVUaqR-LerTh=!C2E
z*CV{%)(0CE*HrATH%tO{zh=1!mQ2KF{5Oqc^?Ed5TXCVw=%O<WOCWfxUrdOxd!o<f
zWy@2MK37DOf&nbgoYpTHUvgnElD3rUstJgr9=j`&R=|}S|B%rJYfz8#rq*2UD1v34
zH_1|27u<Y({}46O;lQg_tGPu~zR}rn<8ZbA&|$Y?VeZ{$U;}eBK16>TAy)h$_Uanj
zU%vSp(ZJq{{hJ%bU>D>9Ne;qph3kt*52MYUk0Y7g<<k2<4`&Y5Mb8el458#_!`FK6
zc-UejuTGISa5PgKR8#wTar3z<Gm+Ezb3e&i9x9=OKh4($DPN!i-lGIV^N7>}FBl)N
zj!HB!+}FP4Yz-`u-k@+m5lxDIR{e41XYflxP6cH&dkK~1{-I(R4i?Hky@b9&1zy#L
zRKjCWa?&YSIm^Zt4ku?d^lqrU32uIXKSKSfpwU47QD+7<Fg~{tR1YT$`6$JpbQIY{
z_@;@luC8g@hiR3X-2_(|u*uo|t8!OLXTtex8ofSadY_k0I5>`L5=NmC!us42ufvDy
z<iCjXho~69z@|4C9PErecY5d1=!4UiH&sQ7?`(c#ZZb}EWOigG|B|S=6;*yHcm$C>
zoz$|vQQOtE=!3*)dcC$MZkse0Js1nE9l41v(83dxfol%8bCl%wkzjXfpGyb7`AEX+
z8f{a2;|(y8u~k-|*JJOmi}`&F^;kRpu|xV>%+zuQMn2Y;(M%{QVCY(dR0XpqhXSeQ
zPro03>%HT|C{FmN1#itXXrSXlh+`X$YUT~5pL;Sz%~p~?sBzYD^Q>x_Q1v_EOIZxa
zY%gx4lYneGczoRHy+QowVd9^j@$U%e?6e+G*%2K68#AnR9wllt33AN0nz`j!H>PpY
z3gK%Utb#bKf~ulHOcQob*lQQ1P-n!O<XXofNBpVs>NN5F;kfhwhj;(CXMEu|SQXFB
zDq;c3o~{xAdD=Vuer~&VAMNxu+fMQo0`NRLS7kng=JYj)xarnztS!?NBFgY3hSh5c
zSiH!Ed1H&Qi`$S9XCxY+RFPR0P+KybpP-v*H~`Z8`Ak@MVwfp1{A^AE%Q#A<1EICn
zcMNe+gX!A}l0nWX*QA-y`g+KsR3hjsilJV$iQ<f~R<!iDku6dbSl}M(1N676=<WXA
zlxKtBEP<mB0zfn)dz0S$JB>iOs$9bXcHx&UTCX2fKY-#ka3{1W@_IV*N1j*<^7=&#
zrM1u42bzgeTr7n+fzobJ^16CybEoMZ(_@Na1qqlEVEcV2uu<u~im00a_))7}8$QT7
zvXt{0evBEO&x{1tn35;HhL;9?b2@2rY9m+_xYtaRgP4nhS~THJvt;&A?tn%8pO(!J
z$zVOiI_a5G@<`(uFSqgKjwc0YWCy`AXM6tE8D4_EJhQ81PQYzx8)&li#zgIOdQXDm
z75Xey-#)h|-g&%==qA){VDm2ufCL(ne~4LoYXdpJ8qhzp(Sr$AGvq)(psO3x@l@X)
z%%>4xJ3k12GWbe#8-@pYAp{VaA@k<LW8yIot}<d0`-Q0S$VG3j&8d~GJ)h&cHwPzn
z{Ly=e&&pSB`dxW93%zT6U$_yv!^5e76E$(55h#S&96CfgzIcCs6kEj-2V_8yI%yW8
zAL(lst;8n>fsQ#|{muJfpBFJ6rXY1g^uL}zKB?!enz(6!?F8%|jA?Z>)RAl9e=hWn
zDx}?Yj>iz0d<n;bl&f>f>w2%b+kb1NvD6c9J%rvx6ET1>BAtJ-6}=|B9DEB(A~VP%
z`*r0(5YV5{dG6rs?Ap)!1ey@Ea*mLx?2Z;beKrMshxFkKxh-dJG6lB*8-t66B#S1?
ztnO4!{e}0RlYhkanH4!aF8dW@aY@p6A!I1<QejBBhC;*HRkSV@!-AB!o0#}R-NM?-
z8ch0IU)3_`XO#t(hAa#K4WH%4StgTW%LcX&*?2NM?7Jtld?7a$r1m_^M&rXm`tM@P
zx$rAN*J<Vg$2{87mlxW_ow2@<{WI#hloZ5CAfI;>99vtvZLA_7Up~FDV&^FbUM&c9
zafm$Z??v@ye#s9KItdtlm$w!5aQvB8v_^Ck;@o}V)UmPN`$S_iYWqid=_m;QvcDZ1
z4NDu7tm$4<cq6YX?xueAd->cdPyv8Di+srFjO+Z0f5^YG{cUk8nhe&tt0CX6{cYfh
zS8_^9AeSNcWA0adGWc$!PJxFNty77ATx^W}lPiW0mubW5K%HOdbve-Mm;KUyKt7Ye
zC-Tj|EQ`9i3^EAtG0>PKM-kcUndCI{65|vtLhQ5s{C9JQA}Jc0r9Joe&YF;jGJ$WF
zdGK{wY$}UnQF~$u=wHqq)tp$Z$G*+BeX=;(g&x*38K4zaWEEAMQBfy+{@Li+QX+~l
z3eGhzYO|6H-T!b;MIE%#Pbses;=59M<-E?}?xD7Z>5F7-d3;TL(gyQ(7o{iJIun<&
z4J=yifC>2}gO2nUIC#C*_H{(%b6RA7CT)lI&9=upEqWnE#PPDeklHX=aU~UjY`>ip
z;5y7PB}k623+QUiqM+wqj!P!hknV!NLcHRClI{nNprYpF($r2ru+P^Fd#g+SozM(d
zD1c7kjeFGf;J~P&`}idlnZm@D@6g%HC-o4=ngL7;?33@57nsy^CLvxiE<8shMG|}c
zPVF5g@qV3m@aPS*GC6A)1u)h3Uh|+uaZEnV#gy2uAv34!knZ~>Hl^P^d5-jU0L^lf
z;}8FXDV(Q&6Z*A;?}<Udzl1GlYr9lGp!j$D?bFvGGmWRZlAuemRU0s|JF3A_{da#n
zPtsSsUywo_3O?#w;LRyt5yQX2S)UH23t^TgXC@@aCB&ir>U*0ts1IY;)c{qzVoBF2
zIC<Z-u2;%Xefuv9kS2B`50l5OjN<>PdOPp89n}B!x);paUI$U}=jJ7aHU`ChYw(a<
z+yKXtImEd7h_Hk8NA;1L4%Sr}_07!W^|Xe+H6x+`0_6@ZNWtoQf0r_bVk>LcB2rP&
z&1s#ibXnNmrxiId>Mhix>)rIYm`snUP`0l5<XJUGgl71E&X-pxc{CVldA4sb4(hUH
z<3=IOb?q7BdgHFj0`94y60+ow)h&YycDYA>DcC8ry-n`5&UbM~6g69~!8xek%Ic<r
zCbE%(D|I$cFDpM*A%Ehz*WPm=3aDirq+_ORtn5mS^)@fz$?$UqyUwQ{hbjFj-ks2b
z+n4C=7J9hWGy=dq>ynK#C(+xry}Up93n<0r-GXq0Ukbn608>tI<iz&qW3_PJ{=4y+
zn4USE<xbQgC<^rmq6U$w*)|>Leqwg*AKHI@JRs^>7`&IHWEnjxnKbikjmB!SACr}v
zy-&B?(S-ix1wV5?iz|_9fDh8!o<SSwbH#G>#Q+6I-m}P9;3jAJ*|0+!nfu*8B|(zw
zmG*QN@tNUG>6(>K;kms*IQ`Z<%zE<CPHwNO^o#DW+-hb?$D=2#b7wUc9xgHCu96oC
zrX}K|HsY#+*H{$Ygr-_`P-gqx&9HL9hGO>mZM{f4VZ)?)BEwWndE1=?M5`4k4=~N*
zc24hl&X#l4Q-;<(Nt>WH^Pi6A2VGj}-M+UaoXb+A4-)6UsV-%S@TN4~URm6epS-Gf
zr#KG7J1EW~h@<;BWP{Z{WzU&@ECCR!p)XT~P%a&@N}R=?Vy{<dr#RXo^A?kQ5^sLd
z<4NgBVIPe5ced&v2#C*&Ar=aqzV`{&>tn%a0K!}%E9oxmUUM4p<qnDY`c9^xMrWz>
z84++OTy+Q4yB|T--ger?U=?g`;{wv;h&LzVE6u;t$_Tf$BFad_HUP2EnR_jWiD}At
zL@Cu;LpVrDGAN!o2O!4?-2d~9ybH1vI049oH>Vat&En_*PEg`uf6-6-{DXzmRRCEw
zdVjKyPc-)|uB(ayTCW9V;@i&|pxCCDitqK2z^mT@7XsgZ8(GoX*@S&=p!zEDmr5<0
zS$e@D<;+i@GTxex9lLF+qD-oGtpJc$Ea}naF!0OU!xFX`+9cQ`HuoCNPXA&#!YDk6
z7?=U4ygWc5d|Ex`?C|7>zVZEuYYp{8<-9AG3Gn9_I_=6;d{-`pq9}n9W?>erEg!9w
z7|hc(ZRknKXEl4z;sK!hI~5aWhl=mkr<e#+g&ssrh=#vqj9JAg+J?XH2%#5W(P_yK
z(I82q-?K(PWlvd#^NHV-%=RNr&tQTxOU`C6<=+r^fP;;$jfdsrz)tGnF5;HD65#ye
zJ%!0uyy!Bc0U8=Atc<oHlxv>)nGfrzQIrJW1t1de!9?mnmS9<GJF^4E(Tgi)ZXWl>
zv?NAsV+JIn2HI^lYRTL_iy*XO@$y6Wt$8!ReVpP`?)#3P-p`$%&CRXiqCg}lwvLjz
z7<B^%CCi3efCgGr&S!}Exz3T`!tPdS4Q~fND(BgEtH&i*jdH%f`9}BVTf#eZ2doM*
zG;K<sk2TN4$2Vr`BI|X!KsJ8(Hg-27GjWHX<?Us#-3%0>3sj9m8+K`%j_3aodE6q3
zwQ|@p>=kzy;ea5?fS|_184&A`^4CfF=c1Q1Y<IoQdo$_>Am?eoH~M!^_NH4fv#Y1B
zW&>L#6IG@93Ex0iU1$apNpf2<_qaY9rW3fJCbgCjj6HuW$lgYly}d(VD>-!4-A}GU
zoy7Qnh*~AOL{jJBwuZ1e4hqzbVLsv?IZe{olS1WQghU5_JEOoE@j8WG1Sn&bVpeJ>
z&hBj-%enO6*layvn>X$5!Zu;DO)^86Lr5BXAfPuaC1f*hsklbEqA?+Jn+%PcX+(sJ
zyH9o>d2z!@?NLqcZwmTITMdSnrYyD~_v7qAXLx>cM^xt&!XNWrM19X0WV~mnA0Y~7
z7Ag{)C3TlAeyyU`O*<=cm3JX}?N85SoyA(|oLAL+m)5f%{=kclHR4{C#QV^as#2Mk
zdSRP%TKFV=UDhalLi2hTASrZ@Yw;hKf8s<Yjh0R&8N3Uz)v+X&{r)1Gw%VZGUIO1-
zut@lEdAzM{z$*+7@bn#r{uv2LgU_f_IfMdb+(%pRJp8LCIld`ilvA@*rt!H^{ItYv
zCRl@voAiM*(0zT=Ay@*?!9jsuZ)B`BA{l^r@y$0_9K|}=3!Zjh(+wC|A6>SESQ%SL
z9+1Ig`lN>XgO{!zUX9+FvLGLJQ1X%*D!fM&a3aoxSk~{G#&<S-OBoqpDy3V3_5BUk
z1}r)vMDG6$A?RC0{`l+W3qRT>uilj;HESp+66;w(B|=l*(uI?;S5!B+E#hGWA@L--
zk|B#O<Iq&g;z2t~E8w(X>4ZDg86zNbG{3zJMO;19uO=OMuuO~022P?3$e0kv^@sd5
z^bxEevJL7&%vTS;j5N)?ahKoB#Ldi)gHwrir3yefJ$zQzFStrFP+}o9s|T~Q_<PF>
ze*NBCU|6^pOXS&`fn*iBIr*u%P%KDTocv535|>}-$yO+Qt945|J+WOK1NL1|!$M@k
zLTzUem8DrpBJl-gLI>pfVG#B8%+=1zABWncZai#T5xGEE(3zRw-PQ09QZ!;2P^@pn
z&7tyNBu$@l;{_?uVU5i1<LaF@at!7g_F+O0nKtEMHkYk=I~KUP1ryH<PPgCH#nKF;
zCiMBLiA4stK0qk+G@LnwT7LdFVHZ#CYYXh}FXvxO%6NIV0~bpPwdT-g+_BJW-6$FV
zZ*)I97y&LVzEi7?i~s*TL99((EY8U;DRG7PX$Mb6#P@%B8+G7(At<E(mr$}l_|3Rw
z?hgrG@<mt!{pnR6!M}&{$3`EK^J+5xfcY$fCXc)J>pyngpeRPk9#V-N?3p2P=4qzt
z?w`BJ<{a?gD<Jas2Y&l_K1<v)KDl{>p+W~mOtHrw9_5ceGtzp%4CVwbxktw#8xXIS
z1`FWrNaD>pMH{S$vfmhfO2EFHMw?+M4Z_m~;i1YQe%z_s5F9Q)94>uOF^HfL=MR05
zU<ME9Uxb;-#fx*IQB;$UXK*71BozwXYhEeRJ!jqad4SvRV%PI4z40u`CjP_Vcr28a
z_pA8DwdZ`wX!ys9{&+2%@Xv5U8+R0w8R9vop$iL`z`Eas8gNlhnt|sH#c#>MSW1dV
zXznOo`tzlXBR&_X3l8Aeg>T*Rx~T${;-c0dE<YW?d=q$B8Lfg^!53zl3zQC^BtDHJ
zPEJg|GqX-&H3MVGy(zb?J(+<|%!wQm58Q#9TK)QM3V-`d;PUat)ID{99IuOFq+R2?
z1En$2v40rm9W0*Sqf`xJdq8>TT5Qmxt?pR1M1JRrgeXF~geWeCC*np8Fk{Q`hL@e!
zdn~rOG{v<F-DSaU6~)Ck*#X)Lh0J>7JX|O6=WN%E+9#D*;>iEa^@m~o;S#q8!<YEL
zM~ZBh4<_hTEq+WDsV4pR&4W0nQ31X5Cv6rN%U=#`c=fx$6&%uou)x75sTBRu&+HX8
zM$7(FiHO?_s6^Oew1sb7mR1>^xc_$_(NT+3BtdYo51m&D(a~`BtTDsqkIb%6%LCSE
z%2H1{t~{_<BLDP=r;rt8pkUl8;kpq1cK*H|#zOwDdw??yhq<|hdTjrNK~UIRrbTtH
z(1d;0_YTgkZ|XA}GsAHc-6|M|Sij3l4zS9reiy}7=R`(^Vr~An2r3|#s=$>j@#S6a
z99|6lUp^;GJQFW?@CybG5MigHF0fE6bm!L-<u+gGqu?Q=2_jO&1JK+YEg+hT_|g3Y
z!*WL(V@j7~I#iDPD}`fjnMWGnr=JdefoKB~ljT_ZGHO6QEW{=(WFgQ#n&eY+C8{;_
zU(rsCV{c=Yq{X$4#(R=(uE4`HA*uM@?A5Sar3Vf^21q;RM5|nG-1vVEYHLVBrT~Yy
zZPmgMJ9=rGjf16umZ3TPuQpit5?&;~*at0tsHz%-Y0c877rn@lOoZ~FCy*WAi#%dg
zS<^U8-gd_Wm%#h($QMyd!*Sh#A{gli>{q@s8<r(c35&x25v(~hdR`XBmaRuf>`zI-
zb)tQcz458R-lrkH?D!Y;10_+c3B2cMcTC;P=n?`NjqQyB@JY#Y8t1poH4jwj4Y^h0
z@pa=k&_4X=!3rq>O!&hG%;@~_&BMUtWHR4u+REYTN~;DMktN)T4U&ljHLgVXs^-vb
zPd*|n6|ijZ%r1HP;raC}5K$I2ITvMV`J=4hpHGa2qgm)bu%yzjclJ!`Bz8pn^wfj5
zS;N~Vck@tpr1GGw!C1s?bQWf&pVLkSPR^+ugg709EYMJ{=IGgGKKIAwL!YAF<EZxh
zf8S&`T)fHt?r#8Dt-)|Q-P#w&kwIORLA~eM`Mpr;TqIAnBkv1C&Ft{LLb{B>Hz^8U
z(HC$>`__J2mmMAJ`6By}_TBAFO-^Vf<%O0S3xc2MW%oi1W9{)hvj{x5UJ*l#<+6$T
zQXkkdd;nswB``K50;l%#a2?fe-h96av@Fw)5=ETL*h>6vk1QRsxEGtLs|tB(NbKiq
zNQ@CrZYAf*yq_Xk{e6h+SAb%rW+mwq+cf9MB)S$ljG6?v1xJ$j_@2yi_RPt}q}^NQ
zV5{4D%ltvjs9mv{WY45gcEuKb)}P}m+t|>y&g2LY%r6)yD{Cw~8LRE~Np0-v?j8x}
zlBdpEQH-uv&hcK}j*I)nRcnIX%`@xE_PXK92WDUVujIE2p7!;&#*8XR<<9IDRv{yA
z)mz~blyB0PZ+h;s#>+;&_!2}}&K-#IR@}eF@{K;Fs_^cIJ8a~XgEW*w96vZpSlZn9
z619@5m}Z1B%PPP7>uStt{HTa9vtbW?NoXvt(LOQYKxNFVLN*pUhpQ`(A{n~{kE4qL
z+v${u>*&m0NTH)fK>Ko4a{ng2)UrieSFB#zC-u3A>zNATf9hu_Ko56;OLP^cMKxyt
z+c82Ps~T94CS(GMOZjR#7xFf%Sbe)YG2!dAYj3)BDchG5y~cvxKE+pYx=k6Mp2LeI
z1>ebom3@WnS(}S016KZ0Jxm=uZ&maRz{GhdY=E~OqO<%WFI&gIiSQ@bj5wTWq{Aes
z|7WtoH1=(-K5^wtcA0yF&8lq**5^q9X?KsqjT>;tazySkSoF77;jHi$3FzaF0WC4D
z<l;mQF|<5DI}zcQAH*yQM5Y!!FHGehF|(E0pYN`=hJNZAIbOvbC_lr&F$|y$<{JcF
z((b%R`M$RBx!;#6q$nJzM9oa}GC(c5UNNV(CeKri1*p8Y0&z||#zWp_#Iy;*uI<S8
z{$64B36P1(&&!Q`N*4p@S7EJNlD~_iv}zJ>``upf&+NP>p^f;|i+bSXydnA#koW@6
zHCQxc$L137)VN!DOwF}{jG#2uS?ZWDYrHoDHT`uYJ9DIID2soE4l(h%lp6|p!~ksM
z!SDU|Z)20A1RfTi-=g#V8ES;GGa~nd4@iIi#<hM^{PU67CAhsdjHQG6f~?_ys#zco
zdvB7cw46D&+(WNVx})qQY_kFUr@4k>{#lB_Hxpv3DW1pWRi-%>8Iimf`z`zGcdc@i
z(Eg07X<dsua7u6uA}7#vnR>60H-{*)0ZzfXeah%q<lYCb>(LmjobI>DlG9&ETM%Z4
zUPNud5ST16w8ZAH{sK1~I7Z)=q2*)jeU-D(n);OY;{Ck@jH%Z_SXiGWN|JbNn+jcf
z+Hy+pqTS_1!Q^IfM2~{k?^XK6ygMY=!(2IojjlI|IKyl6g<!}+@c1nM@5-k?-BKhj
zZXpX`1X)fJBA5$xy&h||qfrxv1y+ioz}So7%BRins}YGeJ+p6MExDGF9i1OU{BJm2
zz4bOjt;hE1#JK%>L3aymbM9C|jcb2h@w`^>oc;TLC%k;cO-&_dwXwVLhS^!%$f0t<
zDF$9XP+Krxpqgo%t}fKU6|kc)9J##+XZSu?;@&VuZ>jMv2sVF1+!0LpeEGxZ!gVHr
z8S1(6eOIsJvlSJ*`SOFEk%`kT-*JZ;-=i_#gst+%_@(=&8SdNrjpqU&5a|5;DoFm2
L?8!DR5$OK_J|8wp

literal 0
HcmV?d00001

diff --git a/toolchain/mfc/test/test_coverage_unit.py b/toolchain/mfc/test/test_coverage_unit.py
new file mode 100644
index 0000000000..7341027db2
--- /dev/null
+++ b/toolchain/mfc/test/test_coverage_unit.py
@@ -0,0 +1,662 @@
+"""
+Unit tests for toolchain/mfc/test/coverage.py
+
+Run with:
+    python3 -m pytest toolchain/mfc/test/test_coverage_unit.py -v
+
+These tests are fully offline (no build, no git, no gcov binary required).
+They use mocks and in-memory data structures to verify logic.
+"""
+# pylint: disable=protected-access,exec-used,too-few-public-methods,wrong-import-position
+
+import gzip
+import importlib.util
+import json
+import os
+import sys
+import types
+import unittest
+from unittest.mock import patch
+
+# ---------------------------------------------------------------------------
+# Import the module under test.
+# We patch the module-level imports that require the full toolchain.
+# ---------------------------------------------------------------------------
+
+# Create minimal stubs for toolchain modules so coverage.py can be imported
+# without the full MFC toolchain being on sys.path.
+def _make_stub(name):
+    mod = types.ModuleType(name)
+    sys.modules[name] = mod
+    return mod
+
+
+for _mod_name in [
+    "toolchain",
+    "toolchain.mfc",
+    "toolchain.mfc.printer",
+    "toolchain.mfc.common",
+    "toolchain.mfc.build",
+    "toolchain.mfc.test",
+    "toolchain.mfc.test.case",
+]:
+    if _mod_name not in sys.modules:
+        _make_stub(_mod_name)
+
+# Provide the attributes coverage.py needs from its relative imports
+_printer_stub = sys.modules.get("toolchain.mfc.printer", _make_stub("toolchain.mfc.printer"))
+
+
+class _FakeCons:
+    def print(self, *args, **kwargs):
+        pass  # suppress output during tests
+
+
+_printer_stub.cons = _FakeCons()
+
+_common_stub = sys.modules.get("toolchain.mfc.common", _make_stub("toolchain.mfc.common"))
+_common_stub.MFC_ROOT_DIR = "/fake/repo"
+
+
+class _FakeMFCException(Exception):
+    pass
+
+
+_common_stub.MFCException = _FakeMFCException
+
+_build_stub = sys.modules.get("toolchain.mfc.build", _make_stub("toolchain.mfc.build"))
+_build_stub.PRE_PROCESS = "pre_process"
+_build_stub.SIMULATION = "simulation"
+_build_stub.POST_PROCESS = "post_process"
+
+_case_stub = sys.modules.get("toolchain.mfc.test.case", _make_stub("toolchain.mfc.test.case"))
+_case_stub.input_bubbles_lagrange = lambda case: None
+_case_stub.get_post_process_mods = lambda params: {}
+_case_stub.POST_PROCESS_3D_PARAMS = {
+    'fd_order': 1, 'omega_wrt(1)': 'T', 'omega_wrt(2)': 'T', 'omega_wrt(3)': 'T',
+}
+
+# Load coverage.py by injecting stubs into sys.modules so relative imports resolve.
+_COVERAGE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "coverage.py")
+
+sys.modules.pop("toolchain.mfc.test.coverage", None)  # reset if already loaded
+
+_spec = importlib.util.spec_from_file_location(
+    "toolchain.mfc.test.coverage",
+    _COVERAGE_PATH,
+    submodule_search_locations=[]
+)
+_coverage_mod = importlib.util.module_from_spec(_spec)
+_coverage_mod.__package__ = "toolchain.mfc.test"
+
+sys.modules["toolchain.mfc.test"] = types.ModuleType("toolchain.mfc.test")
+sys.modules["toolchain.mfc.test"].__package__ = "toolchain.mfc.test"
+
+with patch.dict("sys.modules", {
+    "toolchain.mfc.printer":   _printer_stub,
+    "toolchain.mfc.common":    _common_stub,
+    "toolchain.mfc.build":     _build_stub,
+    "toolchain.mfc.test.case": _case_stub,
+}):
+    try:
+        _spec.loader.exec_module(_coverage_mod)
+    except ImportError:
+        pass  # fallback below
+
+# If the importlib approach failed (relative imports unresolvable), fall back to exec.
+try:
+    _parse_diff_files = _coverage_mod._parse_diff_files
+    _parse_gcov_json_output = _coverage_mod._parse_gcov_json_output
+    _normalize_cache = _coverage_mod._normalize_cache
+    should_run_all_tests = _coverage_mod.should_run_all_tests
+    filter_tests_by_coverage = _coverage_mod.filter_tests_by_coverage
+    ALWAYS_RUN_ALL = _coverage_mod.ALWAYS_RUN_ALL
+    COVERAGE_CACHE_PATH = _coverage_mod.COVERAGE_CACHE_PATH
+except AttributeError:
+    _globals = {
+        "__name__": "toolchain.mfc.test.coverage",
+        "__package__": "toolchain.mfc.test",
+        "cons": _printer_stub.cons,
+        "common": _common_stub,
+        "MFCException": _FakeMFCException,
+        "PRE_PROCESS": "pre_process",
+        "SIMULATION": "simulation",
+        "POST_PROCESS": "post_process",
+    }
+    with open(_COVERAGE_PATH, encoding="utf-8") as _f:
+        _src = _f.read()
+
+    _src = (
+        _src
+        .replace("from ..printer import cons", "cons = _globals['cons']")
+        .replace("from .. import common", "")
+        .replace("from ..common import MFCException", "MFCException = _globals['MFCException']")
+        .replace("from ..build import PRE_PROCESS, SIMULATION, POST_PROCESS", "")
+        .replace("from .case import (input_bubbles_lagrange, get_post_process_mods,\n"
+                 "                    POST_PROCESS_3D_PARAMS)",
+                 "input_bubbles_lagrange = lambda case: None\n"
+                 "get_post_process_mods = lambda params: {}\n"
+                 "POST_PROCESS_3D_PARAMS = {'fd_order': 1, 'omega_wrt(1)': 'T', "
+                 "'omega_wrt(2)': 'T', 'omega_wrt(3)': 'T'}")
+    )
+    exec(compile(_src, _COVERAGE_PATH, "exec"), _globals)  # noqa: S102
+
+    _parse_diff_files = _globals["_parse_diff_files"]
+    _parse_gcov_json_output = _globals["_parse_gcov_json_output"]
+    _normalize_cache = _globals["_normalize_cache"]
+    should_run_all_tests = _globals["should_run_all_tests"]
+    filter_tests_by_coverage = _globals["filter_tests_by_coverage"]
+    ALWAYS_RUN_ALL = _globals["ALWAYS_RUN_ALL"]
+    COVERAGE_CACHE_PATH = _globals["COVERAGE_CACHE_PATH"]
+
+
+# ---------------------------------------------------------------------------
+# Helper: minimal fake test case
+# ---------------------------------------------------------------------------
+
+class FakeCase:
+    """Minimal stand-in for TestCase — only get_uuid() is needed."""
+
+    def __init__(self, uuid: str):
+        self._uuid = uuid
+
+    def get_uuid(self) -> str:
+        return self._uuid
+
+
+# ===========================================================================
+# Group 1: _parse_diff_files — git diff --name-only parsing
+# ===========================================================================
+
+class TestParseDiffFiles(unittest.TestCase):
+
+    def test_parse_single_file(self):
+        result = _parse_diff_files("src/simulation/m_rhs.fpp\n")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+    def test_parse_multiple_files(self):
+        text = "src/simulation/m_rhs.fpp\nsrc/simulation/m_weno.fpp\nREADME.md\n"
+        result = _parse_diff_files(text)
+        assert result == {
+            "src/simulation/m_rhs.fpp",
+            "src/simulation/m_weno.fpp",
+            "README.md",
+        }
+
+    def test_parse_empty(self):
+        assert _parse_diff_files("") == set()
+        assert _parse_diff_files("\n") == set()
+
+    def test_parse_ignores_blank_lines(self):
+        text = "src/simulation/m_rhs.fpp\n\n\nsrc/simulation/m_weno.fpp\n"
+        result = _parse_diff_files(text)
+        assert result == {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+
+    def test_parse_mixed_extensions(self):
+        text = "src/simulation/m_rhs.fpp\ntoolchain/mfc/test/cases.py\nCMakeLists.txt\n"
+        result = _parse_diff_files(text)
+        assert len(result) == 3
+        assert "toolchain/mfc/test/cases.py" in result
+        assert "CMakeLists.txt" in result
+
+
+# ===========================================================================
+# Group 2: should_run_all_tests — ALWAYS_RUN_ALL detection
+# ===========================================================================
+
+class TestShouldRunAllTests(unittest.TestCase):
+
+    def test_parallel_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/parallel_macros.fpp"}
+        ) is True
+
+    def test_acc_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/acc_macros.fpp"}
+        ) is True
+
+    def test_omp_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/omp_macros.fpp"}
+        ) is True
+
+    def test_shared_parallel_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/shared_parallel_macros.fpp"}
+        ) is True
+
+    def test_macros_fpp_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/macros.fpp"}
+        ) is True
+
+    def test_cases_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/test/cases.py"}
+        ) is True
+
+    def test_case_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/test/case.py"}
+        ) is True
+
+    def test_definitions_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/params/definitions.py"}
+        ) is True
+
+    def test_input_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/run/input.py"}
+        ) is True
+
+    def test_case_validator_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/case_validator.py"}
+        ) is True
+
+    def test_cmakelists_does_not_trigger_all(self):
+        assert should_run_all_tests({"CMakeLists.txt"}) is False
+
+    def test_case_fpp_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/case.fpp"}
+        ) is True
+
+    def test_coverage_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/test/coverage.py"}
+        ) is True
+
+    def test_cmake_dir_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/cmake/FindFFTW.cmake"}
+        ) is True
+
+    def test_cmake_subdir_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/cmake/some/nested/file.cmake"}
+        ) is True
+
+    def test_simulation_module_does_not_trigger_all(self):
+        assert should_run_all_tests(
+            {"src/simulation/m_rhs.fpp"}
+        ) is False
+
+    def test_empty_set_does_not_trigger_all(self):
+        assert should_run_all_tests(set()) is False
+
+    def test_mixed_one_trigger_fires_all(self):
+        assert should_run_all_tests({
+            "src/simulation/m_rhs.fpp",
+            "src/common/include/macros.fpp",
+        }) is True
+
+
+# ===========================================================================
+# Group 3: filter_tests_by_coverage — core file-level selection logic
+# ===========================================================================
+
+class TestFilterTestsByCoverage(unittest.TestCase):
+
+    def test_file_overlap_includes_test(self):
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"]}
+        changed = {"src/simulation/m_rhs.fpp"}
+        cases = [FakeCase("AAAA0001")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 1
+        assert len(skipped) == 0
+
+    def test_no_file_overlap_skips_test(self):
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp"]}
+        changed = {"src/simulation/m_weno.fpp"}
+        cases = [FakeCase("AAAA0001")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_uuid_not_in_cache_is_conservative(self):
+        """Newly added test not in cache -> include it (conservative)."""
+        cache = {}
+        changed = {"src/simulation/m_rhs.fpp"}
+        to_run, _ = filter_tests_by_coverage([FakeCase("NEWTEST1")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_no_fpp_changes_skips_all(self):
+        """Only non-.fpp files changed -> skip all tests."""
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp"]}
+        changed = {"toolchain/setup.py", "README.md"}
+        cases = [FakeCase("AAAA0001")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_empty_changed_files_skips_all(self):
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp"]}
+        changed = set()
+        to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_multiple_tests_partial_selection(self):
+        """Only the test covering the changed file should run."""
+        cache = {
+            "TEST_A": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"],
+            "TEST_B": ["src/simulation/m_bubbles.fpp"],
+            "TEST_C": ["src/simulation/m_rhs.fpp"],
+        }
+        changed = {"src/simulation/m_bubbles.fpp"}
+        cases = [FakeCase("TEST_A"), FakeCase("TEST_B"), FakeCase("TEST_C")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert uuids_run == {"TEST_B"}
+        assert len(skipped) == 2
+
+    def test_multiple_changed_files_union(self):
+        """Changing multiple files includes any test that covers any of them."""
+        cache = {
+            "TEST_A": ["src/simulation/m_rhs.fpp"],
+            "TEST_B": ["src/simulation/m_weno.fpp"],
+            "TEST_C": ["src/simulation/m_bubbles.fpp"],
+        }
+        changed = {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+        cases = [FakeCase("TEST_A"), FakeCase("TEST_B"), FakeCase("TEST_C")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert uuids_run == {"TEST_A", "TEST_B"}
+        assert len(skipped) == 1
+
+    def test_test_covering_multiple_files_matched_via_second(self):
+        """Test matched because m_weno.fpp (its second covered file) was changed."""
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"]}
+        changed = {"src/simulation/m_weno.fpp"}
+        to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_empty_cache_runs_all_conservatively(self):
+        """Empty coverage cache -> all tests included (conservative)."""
+        cache = {}
+        changed = {"src/simulation/m_rhs.fpp"}
+        cases = [FakeCase("T1"), FakeCase("T2"), FakeCase("T3")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 3
+        assert len(skipped) == 0
+
+    def test_mixed_fpp_and_nonfpp_changes(self):
+        """Non-.fpp files in changed set are ignored for matching."""
+        cache = {"TEST_A": ["src/simulation/m_rhs.fpp"]}
+        changed = {"src/simulation/m_rhs.fpp", "README.md", "toolchain/setup.py"}
+        to_run, _ = filter_tests_by_coverage([FakeCase("TEST_A")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_incomplete_coverage_included_conservatively(self):
+        """Test with no simulation coverage but simulation file changed -> include."""
+        cache = {
+            "GOOD_T": ["src/simulation/m_rhs.fpp", "src/pre_process/m_start_up.fpp"],
+            "BAD_T":  ["src/pre_process/m_start_up.fpp", "src/common/m_helper.fpp"],
+        }
+        changed = {"src/simulation/m_rhs.fpp"}
+        cases = [FakeCase("GOOD_T"), FakeCase("BAD_T")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert "GOOD_T" in uuids_run  # direct file overlap
+        assert "BAD_T" in uuids_run   # no sim coverage -> conservative include
+        assert len(skipped) == 0
+
+    def test_incomplete_coverage_not_triggered_by_preprocess(self):
+        """Test with no sim coverage is NOT auto-included for pre_process changes."""
+        cache = {
+            "BAD_T": ["src/pre_process/m_start_up.fpp"],
+        }
+        changed = {"src/pre_process/m_data_output.fpp"}
+        to_run, skipped = filter_tests_by_coverage([FakeCase("BAD_T")], cache, changed)
+        assert len(to_run) == 0  # no sim change, no overlap -> skip
+        assert len(skipped) == 1
+
+
+# ===========================================================================
+# Group 4: Corner cases from design discussion
+# ===========================================================================
+
+class TestDesignCornerCases(unittest.TestCase):
+
+    def test_gpu_ifdef_file_still_triggers_if_covered(self):
+        """
+        GPU-specific code lives in the same .fpp file as CPU code.
+        At file level, changing any part of the file triggers tests that cover it.
+        """
+        cache = {"MUSCL_T": ["src/simulation/m_muscl.fpp"]}
+        changed = {"src/simulation/m_muscl.fpp"}
+        to_run, _ = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_macro_file_triggers_all_via_should_run_all(self):
+        """parallel_macros.fpp in changed files -> should_run_all_tests() is True."""
+        assert should_run_all_tests({"src/common/include/parallel_macros.fpp"}) is True
+
+    def test_new_fpp_file_no_coverage_skips(self):
+        """
+        Brand new .fpp file has no coverage in cache.
+        All tests are skipped (no test covers the new file).
+        """
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp"]}
+        changed = {"src/simulation/m_brand_new.fpp"}
+        to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_non_fpp_always_run_all_detected(self):
+        """
+        End-to-end: diff lists only cases.py (non-.fpp) ->
+        _parse_diff_files includes it -> should_run_all_tests fires.
+        """
+        files = _parse_diff_files("toolchain/mfc/test/cases.py\n")
+        assert should_run_all_tests(files) is True
+
+    def test_niche_feature_pruning(self):
+        """
+        Niche features: most tests don't cover m_bubbles.fpp.
+        Changing it skips tests that don't touch it.
+        """
+        cache = {
+            "BUBBLE1": ["src/simulation/m_bubbles.fpp", "src/simulation/m_rhs.fpp"],
+            "BUBBLE2": ["src/simulation/m_bubbles.fpp"],
+            "BASIC_1": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"],
+            "BASIC_2": ["src/simulation/m_rhs.fpp"],
+            "BASIC_3": ["src/simulation/m_weno.fpp"],
+        }
+        changed = {"src/simulation/m_bubbles.fpp"}
+        cases = [FakeCase(u) for u in ["BUBBLE1", "BUBBLE2", "BASIC_1", "BASIC_2", "BASIC_3"]]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert uuids_run == {"BUBBLE1", "BUBBLE2"}
+        assert len(skipped) == 3
+
+
+# ===========================================================================
+# Group 5: _parse_gcov_json_output — gcov JSON parsing (file-level)
+# ===========================================================================
+
+class TestParseGcovJsonOutput(unittest.TestCase):
+
+    def _make_gcov_json(self, files_data: list) -> bytes:
+        """Build a fake gzip-compressed gcov JSON blob."""
+        data = {
+            "format_version": "2",
+            "gcc_version": "15.2.0",
+            "files": files_data,
+        }
+        return gzip.compress(json.dumps(data).encode())
+
+    def test_returns_set_of_covered_fpp_files(self):
+        compressed = self._make_gcov_json([{
+            "file": "/repo/src/simulation/m_rhs.fpp",
+            "lines": [
+                {"line_number": 45, "count": 3},
+                {"line_number": 46, "count": 0},
+                {"line_number": 47, "count": 1},
+            ],
+        }])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+    def test_ignores_file_with_zero_coverage(self):
+        compressed = self._make_gcov_json([{
+            "file": "/repo/src/simulation/m_rhs.fpp",
+            "lines": [
+                {"line_number": 10, "count": 0},
+                {"line_number": 11, "count": 0},
+            ],
+        }])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == set()
+
+    def test_ignores_f90_files(self):
+        """Generated .f90 files must not appear in coverage output."""
+        compressed = self._make_gcov_json([
+            {
+                "file": "/repo/build/fypp/simulation/m_rhs.fpp.f90",
+                "lines": [{"line_number": 10, "count": 5}],
+            },
+            {
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 1}],
+            },
+        ])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+    def test_handles_raw_json_gcov12(self):
+        """gcov 12 outputs raw JSON (not gzip). Must parse correctly."""
+        data = {
+            "format_version": "1",
+            "gcc_version": "12.3.0",
+            "files": [{
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 3}],
+            }],
+        }
+        raw = json.dumps(data).encode()
+        result = _parse_gcov_json_output(raw, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+    def test_handles_invalid_data_gracefully(self):
+        result = _parse_gcov_json_output(b"not valid gzip or json", "/repo")
+        assert result == set()
+
+    def test_handles_empty_files_list(self):
+        compressed = self._make_gcov_json([])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == set()
+
+    def test_multiple_fpp_files(self):
+        compressed = self._make_gcov_json([
+            {
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 1}],
+            },
+            {
+                "file": "/repo/src/simulation/m_weno.fpp",
+                "lines": [{"line_number": 200, "count": 2}],
+            },
+        ])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+
+    def test_concatenated_json_from_batched_gcov(self):
+        """Batched gcov calls produce concatenated JSON objects (gcov 12)."""
+        obj1 = json.dumps({
+            "format_version": "1",
+            "gcc_version": "12.3.0",
+            "files": [{
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 3}],
+            }],
+        })
+        obj2 = json.dumps({
+            "format_version": "1",
+            "gcc_version": "12.3.0",
+            "files": [{
+                "file": "/repo/src/simulation/m_weno.fpp",
+                "lines": [{"line_number": 10, "count": 1}],
+            }],
+        })
+        raw = (obj1 + "\n" + obj2).encode()
+        result = _parse_gcov_json_output(raw, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+
+    def test_concatenated_json_skips_zero_coverage(self):
+        """Batched gcov: files with zero coverage are excluded."""
+        obj1 = json.dumps({
+            "format_version": "1",
+            "files": [{
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 3}],
+            }],
+        })
+        obj2 = json.dumps({
+            "format_version": "1",
+            "files": [{
+                "file": "/repo/src/simulation/m_weno.fpp",
+                "lines": [{"line_number": 10, "count": 0}],
+            }],
+        })
+        raw = (obj1 + "\n" + obj2).encode()
+        result = _parse_gcov_json_output(raw, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+
+# ===========================================================================
+# Group 6: _normalize_cache — old format conversion
+# ===========================================================================
+
+class TestNormalizeCache(unittest.TestCase):
+
+    def test_converts_old_line_level_format(self):
+        """Old format {uuid: {file: [lines]}} -> new format {uuid: [files]}."""
+        old_cache = {
+            "TEST_A": {
+                "src/simulation/m_rhs.fpp": [45, 46, 47],
+                "src/simulation/m_weno.fpp": [100, 200],
+            },
+            "TEST_B": {
+                "src/simulation/m_bubbles.fpp": [10],
+            },
+            "_meta": {"cases_hash": "abc123"},
+        }
+        result = _normalize_cache(old_cache)
+        assert isinstance(result["TEST_A"], list)
+        assert set(result["TEST_A"]) == {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+        assert result["TEST_B"] == ["src/simulation/m_bubbles.fpp"]
+        assert result["_meta"] == {"cases_hash": "abc123"}
+
+    def test_new_format_unchanged(self):
+        """New format {uuid: [files]} passes through unchanged."""
+        new_cache = {
+            "TEST_A": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"],
+            "_meta": {"cases_hash": "abc123"},
+        }
+        result = _normalize_cache(new_cache)
+        assert result["TEST_A"] == ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"]
+
+    def test_empty_coverage_dict_becomes_empty_list(self):
+        """Test with 0 coverage (old format: empty dict) -> empty list."""
+        old_cache = {"TEST_A": {}, "_meta": {"cases_hash": "abc"}}
+        result = _normalize_cache(old_cache)
+        assert result["TEST_A"] == []
+
+
+# ===========================================================================
+# Group 7: Cache path format
+# ===========================================================================
+
+class TestCachePath(unittest.TestCase):
+
+    def test_cache_path_is_gzipped(self):
+        """Cache file must use .json.gz so it can be committed to the repo."""
+        assert str(COVERAGE_CACHE_PATH).endswith(".json.gz")
+
+
+if __name__ == "__main__":
+    unittest.main()

From a7afc065a40238cbc4beab1476c155ac8c278d8f Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 11 Mar 2026 20:36:30 -0400
Subject: [PATCH 2/2] Fix review findings: error handling, params mutation, CI
 robustness

- Phase 3: omit failed tests from cache instead of storing [] (which
  permanently skips them); absent entries are conservatively included
- _prepare_test: save/restore case.params in finally block so callers
  can safely reuse the case list after build_coverage_cache returns
- _parse_gcov_json_output: return None on decode failure so caller
  omits the test from cache rather than storing empty coverage
- Missing build dir: log warning and return None instead of []
- Lagrange bubble input failure: re-raise so test is skipped from cache
- CI: replace always() with !cancelled() to respect workflow cancellation
- CI: grep -P -> grep -E for POSIX portability (PCRE not universal)
- Narrow except Exception to (OSError, SubprocessError) in find_gcov_binary

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/test.yml     |  6 +--
 toolchain/mfc/test/coverage.py | 79 ++++++++++++++++++++++------------
 2 files changed, 54 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ad9d0ac7f1..0fac5cf44d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -97,7 +97,7 @@ jobs:
             DIFF=""
           fi
           if echo "$DIFF" | \
-             grep -qP '^[+-]\s*(use[\s,]+\w|#:include\s|include\s+['"'"'"])'; then
+             grep -qE '^[+-][[:space:]]*(use[[:space:],]+[a-zA-Z_]|#:include[[:space:]]|include[[:space:]]+['"'"'"])'; then
             echo "dep_changed=true" >> "$GITHUB_OUTPUT"
             echo "Fortran dependency change detected — will rebuild coverage cache."
           else
@@ -163,7 +163,7 @@ jobs:
     name: Github
     needs: [lint-gate, file-changes, rebuild-cache]
     if: >-
-      always() &&
+      !cancelled() &&
       needs.lint-gate.result == 'success' &&
       needs.file-changes.result == 'success' &&
       needs.rebuild-cache.result != 'cancelled' &&
@@ -260,7 +260,7 @@ jobs:
     name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"
     needs: [lint-gate, file-changes, rebuild-cache]
     if: >-
-      always() &&
+      !cancelled() &&
       needs.lint-gate.result == 'success' &&
       needs.file-changes.result == 'success' &&
       needs.rebuild-cache.result != 'cancelled' &&
diff --git a/toolchain/mfc/test/coverage.py b/toolchain/mfc/test/coverage.py
index eedd2bcc36..b73ae675f5 100644
--- a/toolchain/mfc/test/coverage.py
+++ b/toolchain/mfc/test/coverage.py
@@ -96,7 +96,7 @@ def find_gcov_binary() -> str:
         m = re.search(r'(\d+)\.\d+\.\d+', result.stdout)
         if m:
             major = m.group(1)
-    except Exception:
+    except (OSError, subprocess.SubprocessError):
         pass
 
     # Try versioned binary first (Homebrew macOS), then plain gcov
@@ -119,7 +119,7 @@ def find_gcov_binary() -> str:
                 continue  # Apple's gcov cannot parse GCC-generated .gcda files
             if "GCC" in version_out or "GNU" in version_out:
                 return path
-        except Exception:
+        except (OSError, subprocess.SubprocessError):
             continue
 
     raise MFCException(
@@ -166,7 +166,7 @@ def _parse_gcov_json_output(raw_bytes: bytes, root_dir: str) -> set:
         except (UnicodeDecodeError, ValueError):
             cons.print("[yellow]Warning: gcov output is not valid UTF-8 or gzip — "
                        "no coverage recorded for this test.[/yellow]")
-            return set()
+            return None
 
     result = set()
     real_root = os.path.realpath(root_dir)
@@ -234,9 +234,12 @@ def _collect_single_test_coverage(  # pylint: disable=too-many-locals
     build_subdir = os.path.join(test_gcda, "build")
     if not os.path.isdir(build_subdir):
         # No .gcda files produced — test may not have run or GCOV_PREFIX
-        # was misconfigured.  Return empty list; the sanity check at the end
-        # of build_coverage_cache will catch systemic failures.
-        return uuid, []
+        # was misconfigured.  Return None so the test is omitted from the
+        # cache (conservatively included on future runs).  The sanity check
+        # at the end of build_coverage_cache will catch systemic failures.
+        cons.print(f"[yellow]Warning: No .gcda directory for {uuid} — "
+                   f"GCOV_PREFIX may be misconfigured.[/yellow]")
+        return uuid, None
 
     gcno_copies = []
 
@@ -286,6 +289,10 @@ def _collect_single_test_coverage(  # pylint: disable=too-many-locals
         return uuid, []
 
     coverage = _parse_gcov_json_output(proc.stdout, root_dir)
+    if coverage is None:
+        # Decode failure — return None so the caller omits this test from
+        # the cache (absent entries are conservatively included).
+        return uuid, None
     return uuid, sorted(coverage)
 
 
@@ -370,8 +377,9 @@ def _prepare_test(case, root_dir: str) -> dict:  # pylint: disable=unused-argume
     files, and resolve binary paths.  All Python/toolchain overhead happens
     here (single-threaded) so the parallel phase is pure subprocess calls.
 
-    Operates on a shallow copy of case.params to avoid mutating the
-    original case object.
+    Temporarily sets modified params on the case object (needed by
+    get_dirpath/to_input_file/get_inp), then restores the original
+    params in a finally block so callers can safely reuse the case list.
     """
     try:
         case.delete_output()
@@ -388,6 +396,7 @@ def _prepare_test(case, root_dir: str) -> dict:  # pylint: disable=unused-argume
         except Exception as exc:
             cons.print(f"[yellow]Warning: Failed to generate Lagrange bubble input "
                        f"for {case.get_uuid()}: {exc}[/yellow]")
+            raise
 
     # Work on a copy so we don't permanently mutate the case object.
     params = dict(case.params)
@@ -421,28 +430,35 @@ def _prepare_test(case, root_dir: str) -> dict:  # pylint: disable=unused-argume
         for key in POST_PROCESS_3D_PARAMS:
             params.pop(key, None)
 
+    # Temporarily set mutated params on the case object for get_dirpath(),
+    # to_input_file(), and get_inp().  Always restore the original params
+    # so build_coverage_cache callers can safely reuse the case list.
+    orig_params = case.params
     case.params = params
-    test_dir = case.get_dirpath()
-    input_file = case.to_input_file()
-
-    # Write .inp files directly (no subprocess, no Mako templates).
-    # Suppress console output from get_inp() to avoid one message per (test, target) pair.
-    # Run all three executables to capture coverage across the full pipeline
-    # (pre_process: grid/IC generation; simulation: RHS/time-stepper; post_process: field I/O).
-    targets = [PRE_PROCESS, SIMULATION, POST_PROCESS]
-    binaries = []
-    # NOTE: not thread-safe — Phase 1 must remain single-threaded.
-    orig_file = cons.raw.file
-    cons.raw.file = io.StringIO()
     try:
-        for target in targets:
-            inp_content = case.get_inp(target)
-            common.file_write(os.path.join(test_dir, f"{target.name}.inp"),
-                              inp_content)
-            bin_path = target.get_install_binpath(input_file)
-            binaries.append((target.name, bin_path))
+        test_dir = case.get_dirpath()
+        input_file = case.to_input_file()
+
+        # Write .inp files directly (no subprocess, no Mako templates).
+        # Suppress console output from get_inp() to avoid one message per (test, target) pair.
+        # Run all three executables to capture coverage across the full pipeline
+        # (pre_process: grid/IC generation; simulation: RHS/time-stepper; post_process: field I/O).
+        targets = [PRE_PROCESS, SIMULATION, POST_PROCESS]
+        binaries = []
+        # NOTE: not thread-safe — Phase 1 must remain single-threaded.
+        orig_file = cons.raw.file
+        cons.raw.file = io.StringIO()
+        try:
+            for target in targets:
+                inp_content = case.get_inp(target)
+                common.file_write(os.path.join(test_dir, f"{target.name}.inp"),
+                                  inp_content)
+                bin_path = target.get_install_binpath(input_file)
+                binaries.append((target.name, bin_path))
+        finally:
+            cons.raw.file = orig_file
     finally:
-        cons.raw.file = orig_file
+        case.params = orig_params
 
     return {
         "uuid":     case.get_uuid(),
@@ -574,7 +590,14 @@ def build_coverage_cache(  # pylint: disable=too-many-locals,too-many-statements
                 except Exception as exc:  # pylint: disable=broad-except
                     uuid = futures[future]
                     cons.print(f"  [yellow]Warning: {uuid} coverage failed: {exc}[/yellow]")
-                    coverage = []
+                    # Do NOT store entry — absent entries are conservatively
+                    # included by filter_tests_by_coverage, while [] means
+                    # "covers no files" and would permanently skip the test.
+                    continue
+                if coverage is None:
+                    # Decode or collection failure — omit from cache so the
+                    # test is conservatively included on future runs.
+                    continue
                 cache[uuid] = coverage
                 completed += 1
                 if completed % 50 == 0 or completed == len(test_results):