diff --git a/.github/workflows/weekly-valgrind.yml b/.github/workflows/weekly-valgrind.yml
index 1e12f47e..2678e3a6 100644
--- a/.github/workflows/weekly-valgrind.yml
+++ b/.github/workflows/weekly-valgrind.yml
@@ -8,12 +8,9 @@ name: Weekly Valgrind
 #
 # A failure here does NOT red-X a PR — the workflow is independent of
 # ci.yml — but it IS allowed to red-X this scheduled run. We do NOT set
-# `continue-on-error: true` on the job: valgrind's --error-exitcode=1
+# `continue-on-error: true` on any shard: valgrind's --error-exitcode=1
 # would otherwise be silently swallowed and the scheduled run would go
-# green even when valgrind detected a real memory error. The whole
-# point of running valgrind is to surface those errors. The hard
-# `timeout-minutes: 360` cap below still protects against a hung
-# suite eating the runner.
+# green even when valgrind detected a real memory error.
 #
 # The high-value outputs are:
 #   1. Conditional-jump-on-uninitialized — bug class invisible to ASan.
@@ -25,6 +22,33 @@ name: Weekly Valgrind
 # Scope limitation: stress and timeout suites are EXCLUDED — under
 # valgrind the 10-50x slowdown collapses their timing assertions. The
 # protocol / lifecycle suites carry the high-value memory-safety signal.
+#
+# Topology: sharded into 4 parallel matrix jobs. The previous single
+# sequential job consumed the entire 6h GitHub job cap on the `auth`
+# umbrella alone (without ever finishing), starving every suite
+# scheduled after it. The shards keep critical-path wall time well
+# under 30 min and let an individual shard hang without blocking the
+# others (`fail-fast: false`). Per-shard 4h cap catches hangs while
+# leaving plenty of buffer for the slowest expected shard (~10 min
+# observed locally; CI is ~1.5-2x slower).
+#
+# Shard composition:
+#   - `protocol-fast`: lifecycle + parser + small unit tests
+#     (~4 min local, expect ~6-8 min on GitHub runner).
+#   - `proxy-heavy`: upstream-pool-heavy + auth_intro (the single
+#     longest auth sub-suite — moved out of `auth-crypto` for balance).
+#   - `auth-crypto`: auth umbrella minus the three integration sub-
+#     suites whose HTTP plumbing is already covered upstream
+#     (`auth2`, `auth_reload`, `auth_multi`). Loss of valgrind-unique
+#     coverage on those is bounded — see the table in PR #42 follow-up.
+#   - `observability`: all obs_* suites — every one is small enough
+#     that the whole family fits in one shard.
+#
+# When adding a new suite, drop it into the shard whose theme matches.
+# If the new suite is >60s, re-check the shard balance: bin-packing
+# was greedy LPT (Longest Processing Time first) on the original
+# measurements; a single new ~2-3 min suite typically shouldn't
+# require redistribution.
 on:
   schedule:
     # 09:00 UTC every Sunday — matches RocksDB cadence; off-peak for
@@ -38,13 +62,41 @@ concurrency:
 
 jobs:
   valgrind-linux:
+    name: valgrind-${{ matrix.shard }}
     runs-on: ubuntu-latest
-    timeout-minutes: 360  # 6h cap — refuse to consume runner-minutes
-                          # past this even if a suite hangs under valgrind.
-                          # NOTE: deliberately NO `continue-on-error: true`
-                          # — valgrind --error-exitcode=1 must surface as a
-                          # red scheduled run; suppressing it would defeat
-                          # the purpose of running this workflow at all.
+    timeout-minutes: 240  # 4h per-shard cap. Each shard's actual work
+                          # is single-digit-minutes; the cap exists to
+                          # catch a hung suite without burning the
+                          # whole 6h job ceiling. NOTE: deliberately NO
+                          # `continue-on-error: true` — valgrind's
+                          # --error-exitcode=1 must surface as a red
+                          # scheduled run.
+    strategy:
+      fail-fast: false  # one shard's hang or finding must not cancel
+                        # the others — we want every shard's signal.
+      matrix:
+        include:
+          - shard: protocol-fast
+            suites: >-
+              basic lru_cache http http2 ws tls cli route upstream h2_upstream
+              rate_limit dns h2_trailer grpc_obs grpc_web_edge
+              introspection_cache
+          - shard: proxy-heavy
+            suites: >-
+              circuit_breaker proxy streaming_request grpc grpc_proxy grpc_web
+              auth_intro
+          - shard: auth-crypto
+            suites: >-
+              auth_foundation hrauth auth_mgr auth_fail auth_ws
+              jwt jwks oidc intro_client auth_race
+              router_async auth_observability
+          - shard: observability
+            suites: >-
+              obs_foundation obs_tracer obs_metrics obs_mgr obs_propagator
+              obs_jaeger_propagator obs_prom obs_config obs_shutdown obs_linkkill
+              obs_issue obs_e2e obs_self_handler obs_proxy_client obs_auth_trace
+              obs_catalog obs_kill_marshal obs_ws_messages obs_self_metrics
+              obs_connection_metrics obs_pool_gauges obs_middleware_metrics
     steps:
       - uses: actions/checkout@v4
       - name: Install dependencies
@@ -55,7 +107,7 @@ jobs:
             CXXFLAGS_EXTRA="-O1 -g -fno-omit-frame-pointer" \
             CFLAGS_EXTRA="-O1 -g -fno-omit-frame-pointer" \
             NGHTTP2_CFLAGS_EXTRA="-O1 -g -fno-omit-frame-pointer"
-      - name: Valgrind sweep (memory-safety subset)
+      - name: Valgrind sweep (shard ${{ matrix.shard }})
         env:
           # Signal the test binary to skip wall-clock perf assertions
           # (GWE22–GWE24) that fail under valgrind's 10-50× slowdown.
@@ -63,55 +115,7 @@ jobs:
           VALGRIND_TEST: "1"
         run: |
           set -e
-          # Suites chosen for memory-safety signal-per-minute. Excluded:
-          # `stress`, `timeout`, `race`, `kqueue` (Linux no-op anyway),
-          # and the obs_stress / obs_export suites (interpreter-amplified
-          # timing makes them flake under valgrind).
-          for suite in \
-            basic \
-            lru_cache \
-            http \
-            http2 \
-            ws \
-            tls \
-            cli \
-            route \
-            upstream \
-            h2_upstream \
-            proxy \
-            rate_limit \
-            circuit_breaker \
-            dns \
-            auth \
-            obs_foundation \
-            obs_tracer \
-            obs_metrics \
-            obs_mgr \
-            obs_propagator \
-            obs_jaeger_propagator \
-            obs_prom \
-            obs_config \
-            obs_shutdown \
-            obs_linkkill \
-            obs_issue \
-            obs_e2e \
-            obs_self_handler \
-            obs_proxy_client \
-            obs_auth_trace \
-            obs_catalog \
-            obs_kill_marshal \
-            obs_ws_messages \
-            obs_self_metrics \
-            obs_connection_metrics \
-            obs_pool_gauges \
-            obs_middleware_metrics \
-            streaming_request \
-            h2_trailer \
-            grpc \
-            grpc_proxy \
-            grpc_obs \
-            grpc_web \
-            grpc_web_edge ; do
+          for suite in ${{ matrix.suites }}; do
             echo "::group::valgrind test_runner $suite"
             valgrind \
               --error-exitcode=1 \
diff --git a/server/auth/auth_upstream_http_client.cc b/server/auth/auth_upstream_http_client.cc
index a4659bec..414164b0 100644
--- a/server/auth/auth_upstream_http_client.cc
+++ b/server/auth/auth_upstream_http_client.cc
@@ -255,10 +255,20 @@ void UpstreamHttpClient::ApplyOutboundTraceContext(Request& req) {
     // concurrent SIGHUP that swaps `ObservabilityManager::propagator_`
     // cannot free the composite between strip and inject — we hold a
     // reference for this call's duration.
+    // Unwrap the optional via a raw pointer alias so the inline
+    // `has_value() && opt-><field> <relop> X` pattern can never be lowered
+    // to a branchless `cmov` with a speculative load of the optional's
+    // payload. libstdc++ leaves a default-constructed std::optional<T>'s
+    // payload bytewise-uninitialised, so any speculative load triggers a
+    // valgrind "Conditional jump depends on uninitialised value(s)"
+    // report. Pointer dereference is the only language-level barrier:
+    // speculatively loading via a possibly-null pointer would segfault
+    // on the null path, so the compiler MUST branch. `&*opt` is a
+    // compile-time offset computation, not a payload load.
+    const OBSERVABILITY_NAMESPACE::IssueTraceContext* ictx =
+        req.issue_ctx.has_value() ? &*req.issue_ctx : nullptr;
     const OBSERVABILITY_NAMESPACE::Propagator* propagator =
-        (req.issue_ctx.has_value() && req.issue_ctx->propagator)
-            ? req.issue_ctx->propagator.get()
-            : nullptr;
+        (ictx && ictx->propagator) ? ictx->propagator.get() : nullptr;
     OBSERVABILITY_NAMESPACE::Propagator::StripAllKnownTraceHeaders(req.headers);
     // Defense-in-depth gate: inject only when (a) `local` is valid AND
     // (b) a Tracer is bound. The Tracer field is the caller's contract
@@ -273,17 +283,15 @@ void UpstreamHttpClient::ApplyOutboundTraceContext(Request& req) {
     // See `auth_manager.cc`'s IssueTraceContext builder for the local-
     // span-id precedence (auth.idp_check span → inbound SERVER span →
     // fresh synthesized SpanContext with sampled=0 inherited).
-    if (req.issue_ctx.has_value()
-        && req.issue_ctx->local.IsValid()
-        && req.issue_ctx->tracer != nullptr) {
+    if (ictx && ictx->local.IsValid() && ictx->tracer != nullptr) {
         if (propagator) {
-            propagator->Inject(req.issue_ctx->local, req.headers);
+            propagator->Inject(ictx->local, req.headers);
         } else {
             // Stack-local W3C instance is stateless — vtable pointer
             // assignment, no allocation. Falls back to W3C-only when
             // no propagator is bound on `issue_ctx`.
             OBSERVABILITY_NAMESPACE::W3CPropagator{}.Inject(
-                req.issue_ctx->local, req.headers);
+                ictx->local, req.headers);
         }
     }
 }
diff --git a/server/upstream/proxy_transaction.cc b/server/upstream/proxy_transaction.cc
index 9203122b..5e68baca 100644
--- a/server/upstream/proxy_transaction.cc
+++ b/server/upstream/proxy_transaction.cc
@@ -3435,6 +3435,22 @@ bool ProxyTransaction::MaybeRetry(RetryPolicy::RetryCondition condition,
     // covers every caller path uniformly.
     if (cancelled_ || IsKilledForShutdown()) return false;
 
+    // Unwrap the optional via a raw pointer alias so the inline
+    // `has_value() && *opt <relop> X` pattern can never be lowered to a
+    // branchless `cmov` with a speculative load of the optional's payload.
+    // libstdc++ leaves a default-constructed std::optional<int>'s payload
+    // bytewise-uninitialised, so any speculative load triggers a valgrind
+    // "Conditional jump depends on uninitialised value(s)" report.
+    // Pointer dereference is the only language-level barrier here:
+    // speculatively loading via a possibly-null pointer would segfault
+    // on the null path, so the compiler MUST branch. `&*opt` is a
+    // compile-time offset computation, not a payload load.
+    const int* explicit_pushback_ptr =
+        explicit_pushback_ms.has_value() ? &*explicit_pushback_ms : nullptr;
+    const bool has_explicit_pushback = (explicit_pushback_ptr != nullptr);
+    const int explicit_pushback_value =
+        explicit_pushback_ptr ? *explicit_pushback_ptr : 0;
+
     // gRPC-Web bridge decode failure is terminal. The inbound bytes
     // are corrupted (or truncated mid-stream) — replay would
     // deterministically re-fail and waste an upstream attempt. Gate
@@ -3461,8 +3477,8 @@ bool ProxyTransaction::MaybeRetry(RetryPolicy::RetryCondition condition,
     // that bypasses MaybeTriggerGrpcTrailerStatusRetry MUST report
     // the breaker itself.
     const bool terminal_pushback =
-        (explicit_pushback_ms.has_value() &&
-         *explicit_pushback_ms == RetryPolicy::PUSHBACK_TERMINAL_SENTINEL);
+        has_explicit_pushback &&
+        explicit_pushback_value == RetryPolicy::PUSHBACK_TERMINAL_SENTINEL;
 
     // Streaming-only intent: fire TombstonePreBodyHeadersForRetry_() only
     // if both (a) the streaming preconditions hold AND (b) ShouldRetry
@@ -3677,8 +3693,8 @@ bool ProxyTransaction::MaybeRetry(RetryPolicy::RetryCondition condition,
         // The sentinel-encoded "terminal" case never reaches here — the
         // should_retry gate above already short-circuited.
         const auto delay =
-            (explicit_pushback_ms.has_value() && *explicit_pushback_ms >= 0)
-                ? std::chrono::milliseconds(*explicit_pushback_ms)
+            (has_explicit_pushback && explicit_pushback_value >= 0)
+                ? std::chrono::milliseconds(explicit_pushback_value)
                 : ((attempt_ <= 1 && is_transient_connection_failure)
                        ? std::chrono::milliseconds(0)
                        : retry_policy_.BackoffDelay(attempt_));