diff --git a/.github/workflows/cuda-perf.yml b/.github/workflows/cuda-perf.yml
index 1bb9b62be65..ff126dbef1c 100644
--- a/.github/workflows/cuda-perf.yml
+++ b/.github/workflows/cuda-perf.yml
@@ -124,7 +124,7 @@ jobs:
   export-models:
     name: export-models
     needs: set-parameters
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main
     permissions:
       id-token: write
       contents: read
@@ -135,7 +135,7 @@ jobs:
     with:
       timeout: 90
       secrets-env: EXECUTORCH_HF_TOKEN
-      runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'linux.aws.a100' || 'linux.g5.4xlarge.nvidia.gpu' }}
+      runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }}
       gpu-arch-type: cuda
       gpu-arch-version: "13.0"
       use-custom-docker-registry: false
@@ -145,6 +145,14 @@ jobs:
       script: |
         set -eux
         echo "::group::Setup ExecuTorch"
+        # OSDC runners can't reach the public PyPI CDN that download.pytorch.org's
+        # transitive deps resolve to. Pre-install torch's pure-python deps from the
+        # in-cluster pypi-cache and drop the default cpu extra-index so the cuda
+        # torch wheel is the only candidate.
+        export PIP_EXTRA_INDEX_URL=
+        # fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later
+        # examples install doesn't try to downgrade it from the public CDN.
+        pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow
         # Disable MKL to avoid duplicate target error when conda has multiple MKL installations
         export USE_MKL=OFF
         ./install_executorch.sh
@@ -192,7 +200,7 @@ jobs:
         contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
         needs.run-decision.outputs.is-full-run == 'true'
       )
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main
     permissions:
       id-token: write
       contents: read
@@ -201,7 +209,7 @@ jobs:
       fail-fast: false
     with:
       timeout: 90
-      runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'linux.aws.a100' || 'linux.g5.4xlarge.nvidia.gpu' }}
+      runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }}
       gpu-arch-type: cuda
       gpu-arch-version: "13.0"
       use-custom-docker-registry: false
@@ -212,6 +220,14 @@ jobs:
       script: |
         set -eux
         echo "::group::Setup environment"
+        # OSDC runners can't reach the public PyPI CDN that download.pytorch.org's
+        # transitive deps resolve to. Pre-install torch's pure-python deps from the
+        # in-cluster pypi-cache and drop the default cpu extra-index so the cuda
+        # torch wheel is the only candidate.
+        export PIP_EXTRA_INDEX_URL=
+        # fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later
+        # examples install doesn't try to downgrade it from the public CDN.
+        pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow
         ./install_requirements.sh
         pip list
         echo "::endgroup::"
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index ada0f5983cc..d0da13e5733 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -229,7 +229,7 @@ jobs:
         contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
         needs.run-decision.outputs.is-full-run == 'true'
       )
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main
     permissions:
       id-token: write
       contents: read
@@ -342,7 +342,7 @@ jobs:
     with:
       timeout: 150
       secrets-env: EXECUTORCH_HF_TOKEN
-      runner: ${{ (matrix.model.name == 'Qwen3.5-35B-A3B-HQQ-INT4' || matrix.model.name == 'gemma-4-31B-it-HQQ-INT4') && 'linux.aws.a100' || 'linux.g5.4xlarge.nvidia.gpu' }}
+      runner: ${{ (matrix.model.name == 'Qwen3.5-35B-A3B-HQQ-INT4' || matrix.model.name == 'gemma-4-31B-it-HQQ-INT4') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }}
       gpu-arch-type: cuda
       gpu-arch-version: "13.0"
       use-custom-docker-registry: false
@@ -353,6 +353,14 @@ jobs:
         set -eux
 
         echo "::group::Setup ExecuTorch"
+        # OSDC runners can't reach the public PyPI CDN that download.pytorch.org's
+        # transitive deps resolve to. Pre-install torch's pure-python deps from the
+        # in-cluster pypi-cache and drop the default cpu extra-index so the cuda
+        # torch wheel is the only candidate.
+        export PIP_EXTRA_INDEX_URL=
+        # fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later
+        # examples install doesn't try to downgrade it from the public CDN.
+        pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow
         # Disable MKL to avoid duplicate target error when conda has multiple MKL installations
         export USE_MKL=OFF
         ./install_executorch.sh
@@ -390,7 +398,7 @@ jobs:
         contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
         needs.run-decision.outputs.is-full-run == 'true'
       )
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main
     permissions:
       id-token: write
       contents: read
@@ -494,7 +502,7 @@ jobs:
             quant: "non-quantized"
     with:
       timeout: 90
-      runner: ${{ (matrix.model.name == 'Qwen3.5-35B-A3B-HQQ-INT4' || matrix.model.name == 'gemma-4-31B-it-HQQ-INT4') && 'linux.aws.a100' || 'linux.g5.4xlarge.nvidia.gpu' }}
+      runner: ${{ (matrix.model.name == 'Qwen3.5-35B-A3B-HQQ-INT4' || matrix.model.name == 'gemma-4-31B-it-HQQ-INT4') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }}
       gpu-arch-type: cuda
       gpu-arch-version: "13.0"
       use-custom-docker-registry: false
@@ -502,6 +510,14 @@ jobs:
       download-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-${{ matrix.quant }}
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |
+        # OSDC runners can't reach the public PyPI CDN that download.pytorch.org's
+        # transitive deps resolve to. Pre-install torch's pure-python deps from the
+        # in-cluster pypi-cache and drop the default cpu extra-index so the cuda
+        # torch wheel is the only candidate.
+        export PIP_EXTRA_INDEX_URL=
+        # fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later
+        # examples install doesn't try to downgrade it from the public CDN.
+        pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow
         source .ci/scripts/test_model_e2e.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"
 
   test-cuda-pybind: