RustCompute/.github/workflows/gpu-tests.yml at main · mivertowski/RustCompute · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
name: GPU Tests

on:
  # Manual trigger for GPU tests
  workflow_dispatch:
    inputs:
      backend:
        description: 'GPU backend to test'
        required: true
        default: 'all'
        type: choice
        options:
          - all
          - cuda
          - wgpu
          - metal
  # Run on PRs with GPU label
  pull_request:
    types: [labeled]

env:
  CARGO_TERM_COLOR: always
  RUST_BACKTRACE: 1

jobs:
  # CUDA GPU Tests - requires self-hosted runner with NVIDIA GPU
  cuda-tests:
    name: CUDA Tests
    if: |
      github.event_name == 'workflow_dispatch' &&
      (github.event.inputs.backend == 'all' || github.event.inputs.backend == 'cuda')
      || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'gpu-test'))
    runs-on: [self-hosted, gpu, cuda]
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@v4

      - name: Setup Rust
        uses: dtolnay/rust-toolchain@stable

      - name: Check CUDA availability
        run: |
          nvidia-smi
          nvcc --version

      - name: Cache cargo
        uses: Swatinem/rust-cache@v2
        with:
          shared-key: "gpu-cuda"

      - name: Run CUDA codegen tests
        run: cargo test -p ringkernel-cuda-codegen --all-features

      - name: Run CUDA backend tests
        run: cargo test -p ringkernel-cuda --features cuda

      - name: Run GPU execution verification tests
        run: cargo test -p ringkernel-cuda --test gpu_execution_verify --features cuda

      - name: Run WaveSim3D GPU benchmark
        run: |
          cargo run -p ringkernel-wavesim3d --bin wavesim3d-benchmark --release --features cuda-codegen -- --quick
        continue-on-error: true

      - name: Run TxMon GPU benchmark
        run: |
          cargo run -p ringkernel-txmon --bin txmon-benchmark --release --features cuda-codegen -- --quick
        continue-on-error: true

  # WebGPU Tests - can run on any runner with Vulkan/DX12/Metal support
  wgpu-tests:
    name: WebGPU Tests
    if: |
      github.event_name == 'workflow_dispatch' &&
      (github.event.inputs.backend == 'all' || github.event.inputs.backend == 'wgpu')
      || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'gpu-test'))
    runs-on: [self-hosted, gpu]
    timeout-minutes: 20
    steps:
      - uses: actions/checkout@v4

      - name: Setup Rust
        uses: dtolnay/rust-toolchain@stable

      - name: Cache cargo
        uses: Swatinem/rust-cache@v2
        with:
          shared-key: "gpu-wgpu"

      - name: Run WGSL codegen tests
        run: cargo test -p ringkernel-wgpu-codegen --all-features

      - name: Run WebGPU backend tests
        run: cargo test -p ringkernel-wgpu --features wgpu-tests -- --ignored
        continue-on-error: true

  # Metal Tests - macOS only
  metal-tests:
    name: Metal Tests
    if: |
      github.event_name == 'workflow_dispatch' &&
      (github.event.inputs.backend == 'all' || github.event.inputs.backend == 'metal')
      || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'gpu-test'))
    runs-on: macos-latest
    timeout-minutes: 20
    steps:
      - uses: actions/checkout@v4

      - name: Setup Rust
        uses: dtolnay/rust-toolchain@stable

      - name: Cache cargo
        uses: Swatinem/rust-cache@v2
        with:
          shared-key: "gpu-metal"

      - name: Check Metal availability
        run: |
          system_profiler SPDisplaysDataType | grep -i metal || echo "Metal info not available"

      - name: Run Metal backend tests
        run: cargo test -p ringkernel-metal --features metal
        continue-on-error: true

      - name: Build Metal examples
        run: cargo build -p ringkernel --examples --features metal
        continue-on-error: true

  # CPU Backend GPU Mock Tests - runs on all platforms
  cpu-mock-tests:
    name: CPU Mock GPU Tests
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Setup Rust
        uses: dtolnay/rust-toolchain@stable

      - name: Cache cargo
        uses: Swatinem/rust-cache@v2

      - name: Run CPU backend tests (GPU mock)
        run: cargo test -p ringkernel-cpu --all-features

      - name: Run core tests with CPU backend
        run: cargo test -p ringkernel-core --all-features

      - name: Run ecosystem tests with CPU mock
        run: cargo test -p ringkernel-ecosystem --features "persistent,actix,tower,axum,grpc"

  # Performance baseline on CPU
  benchmark-baseline:
    name: Performance Baseline
    runs-on: ubuntu-latest
    if: github.event_name == 'workflow_dispatch'
    steps:
      - uses: actions/checkout@v4

      - name: Setup Rust
        uses: dtolnay/rust-toolchain@stable

      - name: Cache cargo
        uses: Swatinem/rust-cache@v2

      - name: Run CPU benchmarks
        run: cargo bench --package ringkernel -- --noplot --quick
        continue-on-error: true

      - name: Run WaveSim CPU benchmark
        run: cargo run -p ringkernel-wavesim --example benchmark --release -- --quick
        continue-on-error: true

  # Summary report
  summary:
    name: Test Summary
    needs: [cuda-tests, wgpu-tests, metal-tests, cpu-mock-tests]
    if: always()
    runs-on: ubuntu-latest
    steps:
      - name: Report Status
        run: |
          echo "## GPU Test Results" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "| Backend | Status |" >> $GITHUB_STEP_SUMMARY
          echo "|---------|--------|" >> $GITHUB_STEP_SUMMARY
          echo "| CUDA | ${{ needs.cuda-tests.result }} |" >> $GITHUB_STEP_SUMMARY
          echo "| WebGPU | ${{ needs.wgpu-tests.result }} |" >> $GITHUB_STEP_SUMMARY
          echo "| Metal | ${{ needs.metal-tests.result }} |" >> $GITHUB_STEP_SUMMARY
          echo "| CPU Mock | ${{ needs.cpu-mock-tests.result }} |" >> $GITHUB_STEP_SUMMARY