-
Notifications
You must be signed in to change notification settings - Fork 974
109 lines (96 loc) · 3.43 KB
/
test-backend-cuda.yml
File metadata and controls
109 lines (96 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
name: Test CUDA Backend
on:
schedule:
- cron: 0 2 * * *
push:
branches:
- release/*
tags:
- ciflow/nightly/*
pull_request:
paths:
- .github/workflows/test-backend-cuda.yml
- .ci/scripts/test_backend.sh
- .github/workflows/_test_backend.yml
- .ci/scripts/generate_test_pass_rate_in_v3_format.py
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
test-cuda:
strategy:
fail-fast: false
matrix:
suite: [models, operators]
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: '12.6'
use-custom-docker-registry: false
submodules: recursive
timeout: 120
upload-artifact: test-report-cuda-${{ matrix.suite }}
script: |
set -eux
source .ci/scripts/test_backend.sh "${{ matrix.suite }}" "cuda" "${RUNNER_ARTIFACT_DIR}"
upload-test-results:
needs: test-cuda
if: always()
runs-on: ubuntu-22.04
environment: upload-benchmark-results
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3
with:
submodules: false
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Download test report artifacts
uses: actions/download-artifact@v4
with:
pattern: test-report-*
path: downloaded-reports/
- name: Generate v3 benchmark results
shell: bash
run: |
set -eux
mkdir -p benchmark-results/v3
for SUITE in models operators; do
REPORT="downloaded-reports/test-report-cuda-${SUITE}/test-report-cuda-${SUITE}.json"
if [ -f "$REPORT" ]; then
echo "Processing report for suite: $SUITE"
python .ci/scripts/generate_test_pass_rate_in_v3_format.py \
--report-json "$REPORT" \
--suite "$SUITE" \
--flow cuda \
--git-sha "${{ github.sha }}" \
--workflow-run-id "${{ github.run_id }}" \
--workflow-run-url "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
--runner-name "linux.g5.4xlarge.nvidia.gpu" \
--output-v3 "benchmark-results/v3/cuda-test-${SUITE}.json"
else
echo "Warning: Report not found for suite $SUITE at $REPORT"
fi
done
echo "V3 results prepared:"
ls -lah benchmark-results/v3/ || echo "No v3 results generated"
- name: Authenticate with AWS
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
role-duration-seconds: 18000
aws-region: us-east-1
- name: Upload test pass rate to dashboard
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: benchmark-results/v3
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}