MFlowCode · sbryngelson · Feb 10, 2026 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026
@@ -64,7 +64,7 @@ while true; do
   # Try to read from tail output (non-blocking via timeout)
   # Read multiple lines if available to avoid falling behind
   lines_read=0
-  while IFS= read -r -t 0.1 line <&3 2>/dev/null; do
+  while IFS= read -r -t 1 line <&3 2>/dev/null; do
     echo "$line"
     lines_read=$((lines_read + 1))
     last_heartbeat=$(date +%s)
@@ -115,7 +115,7 @@ done
 # Drain any remaining output from tail after job completes
 echo "Draining remaining output..."
 drain_count=0
-while IFS= read -r -t 0.5 line <&3 2>/dev/null; do
+while IFS= read -r -t 1 line <&3 2>/dev/null; do
   echo "$line"
   drain_count=$((drain_count + 1))
   # Safety limit to avoid infinite loop

@@ -1,85 +1,35 @@
 name: 'Benchmark'
 
 on:
-  # Trigger when Test Suite completes (no polling needed)
-  workflow_run:
-    workflows: ["Test Suite"]
-    types: [completed]
+  pull_request:
+  pull_request_review:
+    types: [submitted]
   workflow_dispatch:
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.workflow_run.head_branch || github.ref }}
+  group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
 jobs:
   file-changes:
     name: Detect File Changes
-    # Only run if Test Suite passed (or manual dispatch)
-    if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
     runs-on: 'ubuntu-latest'
     outputs:
       checkall: ${{ steps.changes.outputs.checkall }}
-      pr_number: ${{ steps.pr-info.outputs.pr_number }}
-      pr_approved: ${{ steps.pr-info.outputs.approved }}
-      pr_author: ${{ steps.pr-info.outputs.author }}
     steps:
       - name: Clone
         uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.workflow_run.head_sha || github.sha }}
 
       - name: Detect Changes
         uses: dorny/paths-filter@v3
         id: changes
         with:
           filters: ".github/file-filter.yml"
 
-      - name: Get PR Info
-        id: pr-info
-        env:
-          GH_TOKEN: ${{ github.token }}
-        run: |
-          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
-            echo "pr_number=" >> $GITHUB_OUTPUT
-            echo "approved=true" >> $GITHUB_OUTPUT
-            echo "author=${{ github.actor }}" >> $GITHUB_OUTPUT
-          else
-            # Get PR number from workflow_run
-            PR_NUMBER="${{ github.event.workflow_run.pull_requests[0].number }}"
-            if [ -n "$PR_NUMBER" ]; then
-              echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
-
-              # Fetch actual PR author from API (workflow_run.actor is the re-runner, not PR author)
-              PR_AUTHOR=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER --jq '.user.login')
-              echo "author=$PR_AUTHOR" >> $GITHUB_OUTPUT
-
-              # Check if PR is approved
-              APPROVED=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/reviews \
-                --jq '[.[] | select(.state == "APPROVED")] | length')
-              if [ "$APPROVED" -gt 0 ]; then
-                echo "approved=true" >> $GITHUB_OUTPUT
-              else
-                echo "approved=false" >> $GITHUB_OUTPUT
-              fi
-            else
-              echo "pr_number=" >> $GITHUB_OUTPUT
-              echo "approved=false" >> $GITHUB_OUTPUT
-              echo "author=" >> $GITHUB_OUTPUT
-            fi
-          fi
-
   self:
     name: "${{ matrix.name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }})"
-    if: >
-      github.repository == 'MFlowCode/MFC' &&
-      needs.file-changes.outputs.checkall == 'true' &&
-      (
-        github.event_name == 'workflow_dispatch' ||
-        needs.file-changes.outputs.pr_approved == 'true' ||
-        needs.file-changes.outputs.pr_author == 'sbryngelson' ||
-        needs.file-changes.outputs.pr_author == 'wilfonba'
-      )
-    needs: [file-changes]
+    if: ${{ github.repository=='MFlowCode/MFC' && needs.file-changes.outputs.checkall=='true' && ((github.event_name=='pull_request_review' && github.event.review.state=='approved') || (github.event_name=='pull_request' && (github.event.pull_request.user.login=='sbryngelson' || github.event.pull_request.user.login=='wilfonba')) || github.event_name=='workflow_dispatch') }}
+    needs: file-changes
     strategy:
       fail-fast: false
       matrix:
@@ -143,7 +93,6 @@ jobs:
       - name: Clone - PR
         uses: actions/checkout@v4
         with:
-          ref: ${{ github.event.workflow_run.head_sha || github.sha }}
           path: pr
 
       - name: Clone - Master
@@ -155,7 +104,7 @@ jobs:
 
       - name: Setup & Build
         if: matrix.build_script != ''
-        run: | 
+        run: |
           (cd pr     && ${{ matrix.build_script }}) &
           (cd master && ${{ matrix.build_script }}) &
           wait %1 && wait %2

@@ -20,9 +20,8 @@ sbatch_cpu_opts="\
 "
 
 sbatch_gpu_opts="\
-#SBATCH -CL40S
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
-#SBATCH -G2\
+#SBATCH --gres=gpu:H200:2
+#SBATCH --ntasks-per-node=8       # Number of cores per node required\
 "
 
 if [ "$2" = "cpu" ]; then

@@ -23,9 +23,8 @@ sbatch_cpu_opts="\
 "
 
 sbatch_gpu_opts="\
-#SBATCH -p gpu-v100,gpu-a100,gpu-h100,gpu-l40s
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
-#SBATCH -G2\
+#SBATCH --gres=gpu:H200:2
+#SBATCH --ntasks-per-node=8       # Number of cores per node required\
 "
 
 if [ "$2" = "cpu" ]; then

@@ -134,8 +134,23 @@ jobs:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
 
       - name: Test
-        run:  |
-          /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT
+        run: |
+          /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT || true
+
+          # Retry only if a small number of tests failed (sporadic failures)
+          if [ -f tests/failed_uuids.txt ]; then
+            NUM_FAILED=$(wc -l < tests/failed_uuids.txt)
+            if [ "$NUM_FAILED" -le 5 ]; then
+              FAILED=$(cat tests/failed_uuids.txt | tr '\n' ' ')
+              echo ""
+              echo "=== Retrying $NUM_FAILED failed test(s): $FAILED ==="
+              echo ""
+              /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) --only $FAILED $TEST_ALL
+            else
+              echo "Too many failures ($NUM_FAILED) to retry — likely a real issue."
+              exit 1
+            fi
+          fi
         env:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
           TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }}

diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
@@ -206,6 +206,15 @@ def test():
     # Build the summary report
     _print_test_summary(nPASS, nFAIL, nSKIP, minutes, seconds, failed_tests, skipped_cases)
 
+    # Write failed UUIDs to file for CI retry logic
+    failed_uuids_path = os.path.join(common.MFC_TEST_DIR, "failed_uuids.txt")
+    if failed_tests:
+        with open(failed_uuids_path, "w") as f:
+            for test_info in failed_tests:
+                f.write(test_info['uuid'] + "\n")
+    elif os.path.exists(failed_uuids_path):
+        os.remove(failed_uuids_path)
+
     exit(nFAIL)