Skip to content
Merged
103 changes: 30 additions & 73 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,60 +30,36 @@ jobs:
run: NVTE_USE_CCACHE=1 NVTE_CCACHE_BIN=sccache pip install --no-build-isolation . -v
env:
NVTE_FRAMEWORK: none
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
SCCACHE_CACHE_SIZE: "5G"
Comment thread
pggPL marked this conversation as resolved.
Outdated
- name: 'Sanity check'
run: python3 -c "import transformer_engine"
working-directory: /
pytorch:
name: 'PyTorch'
runs-on: ubuntu-latest
container:
image: ghcr.io/nvidia/jax:jax
options: --user root
steps:
- name: Move /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker"

- name: Maximize build space
uses: easimon/maximize-build-space@c28619d8999a147d5e09c1199f84ff6af6ad5794
with:
root-reserve-mb: 5120
temp-reserve-mb: 32
swap-size-mb: 10240
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
build-mount-path: '/var/lib/docker/'

- name: Restore /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker"

- name: 'Dependencies'
run: |
pip install --no-cache-dir cmake==3.21.0 pybind11[global] ninja pydantic importlib-metadata>=1.0 packaging numpy einops onnxscript
pip install --no-cache-dir torch
Comment thread
pggPL marked this conversation as resolved.
Outdated
Comment thread
pggPL marked this conversation as resolved.
Outdated
Comment thread
pggPL marked this conversation as resolved.
Outdated
Comment thread
pggPL marked this conversation as resolved.
Outdated
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive

- name: Start named container
run: |
docker run -v $(pwd):$(pwd) -w $(pwd) --name builder -d nvcr.io/nvidia/cuda:12.8.0-devel-ubuntu22.04 sleep infinity

- name: 'Dependencies'
run: |
docker exec builder bash -c '\
apt-get update && \
apt-get install -y git python3.9 pip cudnn9-cuda-12 && \
pip install cmake torch ninja pydantic importlib-metadata>=1.0 packaging pybind11 numpy einops onnxscript && \
apt-get clean \
'

- name: ccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad
- name: 'Build'
run: docker exec builder bash -c 'pip install --no-build-isolation . -v --no-deps'
run: NVTE_USE_CCACHE=1 NVTE_CCACHE_BIN=sccache pip install --no-build-isolation . -v --no-deps
env:
NVTE_FRAMEWORK: pytorch
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
SCCACHE_CACHE_SIZE: "5G"
- name: 'Sanity check'
run: docker exec builder bash -c 'python3 tests/pytorch/test_sanity_import.py'
run: python3 tests/pytorch/test_sanity_import.py
jax:
name: 'JAX'
runs-on: ubuntu-latest
Expand All @@ -104,53 +80,34 @@ jobs:
NVTE_CCACHE_BIN=sccache NVTE_USE_CCACHE=1 pip install --no-build-isolation . -v
env:
NVTE_FRAMEWORK: jax
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
SCCACHE_CACHE_SIZE: "5G"
- name: 'Sanity check'
run: python3 tests/jax/test_sanity_import.py
all:
name: 'All'
runs-on: ubuntu-latest
container:
image: ghcr.io/nvidia/jax:jax
options: --user root
steps:
- name: Move /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker"

- name: Maximize build space
uses: easimon/maximize-build-space@c28619d8999a147d5e09c1199f84ff6af6ad5794
with:
root-reserve-mb: 5120
temp-reserve-mb: 32
swap-size-mb: 10240
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
build-mount-path: '/var/lib/docker/'

- name: Restore /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker"

- name: 'Dependencies'
run: |
pip install --no-cache-dir cmake==3.21.0 pybind11[global] einops onnxscript
pip install --no-cache-dir torch
Comment thread
pggPL marked this conversation as resolved.
Outdated
Comment thread
pggPL marked this conversation as resolved.
Outdated
Comment thread
pggPL marked this conversation as resolved.
Outdated
Comment thread
pggPL marked this conversation as resolved.
Outdated
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive

- name: Start named container
run: |
docker run -v $(pwd):$(pwd) -w $(pwd) --name builder -d ghcr.io/nvidia/jax:jax sleep infinity

- name: 'Dependencies'
run: |
docker exec builder bash -c '\
pip install cmake==3.21.0 pybind11[global] einops onnxscript && \
pip install torch --no-cache-dir --index-url https://download.pytorch.org/whl/cu130
'
- name: ccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad
- name: 'Build'
run: docker exec builder bash -c 'pip install --no-cache-dir --no-build-isolation . -v --no-deps'
run: NVTE_USE_CCACHE=1 NVTE_CCACHE_BIN=sccache pip install --no-build-isolation . -v --no-deps
env:
NVTE_FRAMEWORK: all
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
SCCACHE_CACHE_SIZE: "5G"
- name: 'Sanity check'
run: docker exec builder bash -c 'python3 tests/pytorch/test_sanity_import.py && python3 tests/jax/test_sanity_import.py'
run: |
python3 tests/pytorch/test_sanity_import.py
python3 tests/jax/test_sanity_import.py
7 changes: 4 additions & 3 deletions .github/workflows/deploy_nightly_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ name: Deploy nightly docs
on:
push:
branches: [ "main" ]
workflow_dispatch:
jobs:
build:
uses: ./.github/workflows/docs.yml
Expand All @@ -21,9 +22,8 @@ jobs:
name: "te_docs"
path: "html"
- name: Prepare for pages
uses: actions/upload-pages-artifact@v1.0.7
uses: actions/upload-pages-artifact@v3
with:
name: github-pages
path: "html"
Comment thread
pggPL marked this conversation as resolved.
deploy:
needs: prepare
Expand All @@ -36,4 +36,5 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Deploy
uses: actions/deploy-pages@v2.0.0
id: deployment
uses: actions/deploy-pages@v4
Loading