Skip to content
Merged
101 changes: 32 additions & 69 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,58 +32,37 @@ jobs:
NVTE_FRAMEWORK: none
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
SCCACHE_CACHE_SIZE: "1G"
- name: 'Sanity check'
run: python3 -c "import transformer_engine"
working-directory: /
pytorch:
name: 'PyTorch'
runs-on: ubuntu-latest
container:
image: ghcr.io/nvidia/jax:jax
options: --user root
steps:
- name: Move /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker"

- name: Maximize build space
uses: easimon/maximize-build-space@c28619d8999a147d5e09c1199f84ff6af6ad5794
with:
root-reserve-mb: 5120
temp-reserve-mb: 32
swap-size-mb: 10240
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
build-mount-path: '/var/lib/docker/'

- name: Restore /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker"

- name: 'Dependencies'
run: |
pip install cmake==3.21.0 pybind11[global] ninja pydantic importlib-metadata>=1.0 packaging numpy einops onnxscript
pip install torch --no-cache-dir
Comment thread
pggPL marked this conversation as resolved.
Outdated
pip cache purge
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive

- name: Start named container
run: |
docker run -v $(pwd):$(pwd) -w $(pwd) --name builder -d nvcr.io/nvidia/cuda:12.8.0-devel-ubuntu22.04 sleep infinity

- name: 'Dependencies'
run: |
docker exec builder bash -c '\
apt-get update && \
apt-get install -y git python3.9 pip cudnn9-cuda-12 && \
pip install cmake torch ninja pydantic importlib-metadata>=1.0 packaging pybind11 numpy einops onnxscript && \
apt-get clean \
'

- name: ccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad
- name: 'Build'
run: docker exec builder bash -c 'pip install --no-build-isolation . -v --no-deps'
run: NVTE_USE_CCACHE=1 NVTE_CCACHE_BIN=sccache pip install --no-build-isolation . -v --no-deps
env:
NVTE_FRAMEWORK: pytorch
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
SCCACHE_CACHE_SIZE: "1G"
- name: 'Sanity check'
run: docker exec builder bash -c 'python3 tests/pytorch/test_sanity_import.py'
run: python3 tests/pytorch/test_sanity_import.py
jax:
name: 'JAX'
runs-on: ubuntu-latest
Expand All @@ -106,51 +85,35 @@ jobs:
NVTE_FRAMEWORK: jax
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
SCCACHE_CACHE_SIZE: "1G"
- name: 'Sanity check'
run: python3 tests/jax/test_sanity_import.py
all:
name: 'All'
runs-on: ubuntu-latest
container:
image: ghcr.io/nvidia/jax:jax
options: --user root
steps:
- name: Move /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker"

- name: Maximize build space
uses: easimon/maximize-build-space@c28619d8999a147d5e09c1199f84ff6af6ad5794
with:
root-reserve-mb: 5120
temp-reserve-mb: 32
swap-size-mb: 10240
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
build-mount-path: '/var/lib/docker/'

- name: Restore /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker"

- name: 'Dependencies'
run: |
pip install cmake==3.21.0 pybind11[global] einops onnxscript
pip install torch --no-cache-dir
Comment thread
pggPL marked this conversation as resolved.
Outdated
pip cache purge
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive

- name: Start named container
run: |
docker run -v $(pwd):$(pwd) -w $(pwd) --name builder -d ghcr.io/nvidia/jax:jax sleep infinity

- name: 'Dependencies'
run: |
docker exec builder bash -c '\
pip install cmake==3.21.0 pybind11[global] einops onnxscript && \
pip install torch --no-cache-dir --index-url https://download.pytorch.org/whl/cu130
'
- name: ccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad
- name: 'Build'
run: docker exec builder bash -c 'pip install --no-cache-dir --no-build-isolation . -v --no-deps'
run: NVTE_USE_CCACHE=1 NVTE_CCACHE_BIN=sccache pip install --no-build-isolation . -v --no-deps
env:
NVTE_FRAMEWORK: all
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
SCCACHE_CACHE_SIZE: "1G"
- name: 'Sanity check'
run: docker exec builder bash -c 'python3 tests/pytorch/test_sanity_import.py && python3 tests/jax/test_sanity_import.py'
run: |
python3 tests/pytorch/test_sanity_import.py
python3 tests/jax/test_sanity_import.py
7 changes: 4 additions & 3 deletions .github/workflows/deploy_nightly_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ name: Deploy nightly docs
on:
push:
branches: [ "main" ]
workflow_dispatch:
jobs:
build:
uses: ./.github/workflows/docs.yml
Expand All @@ -21,9 +22,8 @@ jobs:
name: "te_docs"
path: "html"
- name: Prepare for pages
uses: actions/upload-pages-artifact@v1.0.7
uses: actions/upload-pages-artifact@v3
with:
name: github-pages
path: "html"
Comment thread
pggPL marked this conversation as resolved.
deploy:
needs: prepare
Expand All @@ -36,4 +36,5 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Deploy
uses: actions/deploy-pages@v2.0.0
id: deployment
uses: actions/deploy-pages@v4
Loading