Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions .github/workflows/cuda-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ jobs:
export-models:
name: export-models
needs: set-parameters
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main
permissions:
id-token: write
contents: read
Expand All @@ -135,7 +135,7 @@ jobs:
with:
timeout: 90
secrets-env: EXECUTORCH_HF_TOKEN
runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'linux.aws.a100' || 'linux.g5.4xlarge.nvidia.gpu' }}
runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }}
gpu-arch-type: cuda
gpu-arch-version: "13.0"
use-custom-docker-registry: false
Expand All @@ -145,6 +145,14 @@ jobs:
script: |
set -eux
echo "::group::Setup ExecuTorch"
# OSDC runners can't reach the public PyPI CDN that download.pytorch.org's
# transitive deps resolve to. Pre-install torch's pure-python deps from the
# in-cluster pypi-cache and drop the default cpu extra-index so the cuda
# torch wheel is the only candidate.
export PIP_EXTRA_INDEX_URL=
# fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later
# examples install doesn't try to downgrade it from the public CDN.
pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow
# Disable MKL to avoid duplicate target error when conda has multiple MKL installations
export USE_MKL=OFF
./install_executorch.sh
Expand Down Expand Up @@ -192,7 +200,7 @@ jobs:
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
)
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main
permissions:
id-token: write
contents: read
Expand All @@ -201,7 +209,7 @@ jobs:
fail-fast: false
with:
timeout: 90
runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'linux.aws.a100' || 'linux.g5.4xlarge.nvidia.gpu' }}
runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }}
gpu-arch-type: cuda
gpu-arch-version: "13.0"
use-custom-docker-registry: false
Expand All @@ -212,6 +220,14 @@ jobs:
script: |
set -eux
echo "::group::Setup environment"
# OSDC runners can't reach the public PyPI CDN that download.pytorch.org's
# transitive deps resolve to. Pre-install torch's pure-python deps from the
# in-cluster pypi-cache and drop the default cpu extra-index so the cuda
# torch wheel is the only candidate.
export PIP_EXTRA_INDEX_URL=
# fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later
# examples install doesn't try to downgrade it from the public CDN.
pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow
./install_requirements.sh
pip list
echo "::endgroup::"
Expand Down
24 changes: 20 additions & 4 deletions .github/workflows/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ jobs:
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
)
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main
permissions:
id-token: write
contents: read
Expand Down Expand Up @@ -342,7 +342,7 @@ jobs:
with:
timeout: 150
secrets-env: EXECUTORCH_HF_TOKEN
runner: ${{ (matrix.model.name == 'Qwen3.5-35B-A3B-HQQ-INT4' || matrix.model.name == 'gemma-4-31B-it-HQQ-INT4') && 'linux.aws.a100' || 'linux.g5.4xlarge.nvidia.gpu' }}
runner: ${{ (matrix.model.name == 'Qwen3.5-35B-A3B-HQQ-INT4' || matrix.model.name == 'gemma-4-31B-it-HQQ-INT4') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }}
gpu-arch-type: cuda
gpu-arch-version: "13.0"
use-custom-docker-registry: false
Expand All @@ -353,6 +353,14 @@ jobs:
set -eux

echo "::group::Setup ExecuTorch"
# OSDC runners can't reach the public PyPI CDN that download.pytorch.org's
# transitive deps resolve to. Pre-install torch's pure-python deps from the
# in-cluster pypi-cache and drop the default cpu extra-index so the cuda
# torch wheel is the only candidate.
export PIP_EXTRA_INDEX_URL=
# fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later
# examples install doesn't try to downgrade it from the public CDN.
pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow
# Disable MKL to avoid duplicate target error when conda has multiple MKL installations
export USE_MKL=OFF
./install_executorch.sh
Expand Down Expand Up @@ -390,7 +398,7 @@ jobs:
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
)
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main
permissions:
id-token: write
contents: read
Expand Down Expand Up @@ -494,14 +502,22 @@ jobs:
quant: "non-quantized"
with:
timeout: 90
runner: ${{ (matrix.model.name == 'Qwen3.5-35B-A3B-HQQ-INT4' || matrix.model.name == 'gemma-4-31B-it-HQQ-INT4') && 'linux.aws.a100' || 'linux.g5.4xlarge.nvidia.gpu' }}
runner: ${{ (matrix.model.name == 'Qwen3.5-35B-A3B-HQQ-INT4' || matrix.model.name == 'gemma-4-31B-it-HQQ-INT4') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }}
gpu-arch-type: cuda
gpu-arch-version: "13.0"
use-custom-docker-registry: false
submodules: recursive
download-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-${{ matrix.quant }}
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# OSDC runners can't reach the public PyPI CDN that download.pytorch.org's
# transitive deps resolve to. Pre-install torch's pure-python deps from the
# in-cluster pypi-cache and drop the default cpu extra-index so the cuda
# torch wheel is the only candidate.
export PIP_EXTRA_INDEX_URL=
# fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later
# examples install doesn't try to downgrade it from the public CDN.
pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow
source .ci/scripts/test_model_e2e.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"

test-cuda-pybind:
Expand Down
Loading