Skip to content

Commit 5a5cc6f

Browse files
committed
[executorch] show backend test infra pass rate of CUDA backend to PyTorch HUD
Add infrastructure to track CUDA backend test pass rates on PyTorch HUD dashboard over time Differential Revision: [D95335059](https://our.internmc.facebook.com/intern/diff/D95335059/) ghstack-source-id: 347850731 Pull Request resolved: #17874
1 parent 28e2186 commit 5a5cc6f

2 files changed

Lines changed: 254 additions & 0 deletions

File tree

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""
2+
Parse test report JSON produced by pytest --json-report and generate
3+
v3 format benchmark results for upload to PyTorch HUD.
4+
5+
Metrics produced per suite:
6+
- pass_rate(%) : percentage of passing tests (skips excluded from denominator)
7+
- total_pass : number of passing tests
8+
- total_fail : number of failing tests
9+
- total_skip : number of skipped tests
10+
"""
11+
12+
import argparse
13+
import json
14+
import sys
15+
16+
17+
def parse_test_report(json_path: str) -> dict:
18+
"""
19+
Parse a test report JSON file and return pass/fail/skip counts.
20+
21+
The JSON is produced by test_backend.sh via pytest --json-report and has the
22+
structure used by generate_markdown_summary_json.py:
23+
{ "tests": [ { "metadata": { "subtests": [ { "Result": "Pass"|"Fail"|"Skip", ... } ] } } ] }
24+
"""
25+
with open(json_path) as f:
26+
data = json.load(f)
27+
28+
passes = 0
29+
fails = 0
30+
skips = 0
31+
32+
for test_data in data["tests"]:
33+
for subtest in test_data["metadata"]["subtests"]:
34+
result = subtest["Result"]
35+
if result == "Pass":
36+
passes += 1
37+
elif result == "Fail":
38+
fails += 1
39+
elif result == "Skip":
40+
skips += 1
41+
42+
return {"passes": passes, "fails": fails, "skips": skips}
43+
44+
45+
def build_v3_record(
46+
metric_name: str,
47+
value: float,
48+
suite: str,
49+
flow: str,
50+
git_sha: str,
51+
workflow_run_id: str,
52+
workflow_run_url: str,
53+
runner_name: str,
54+
) -> dict:
55+
"""Build a single v3 format benchmark record."""
56+
return {
57+
"benchmark": {
58+
"name": "ExecuTorch",
59+
"mode": "test",
60+
"extra_info": {
61+
"backend": "cuda",
62+
"suite": suite,
63+
"flow": flow,
64+
"git_sha": git_sha,
65+
"workflow_run_id": workflow_run_id,
66+
"workflow_run_url": workflow_run_url,
67+
},
68+
},
69+
"model": {
70+
"name": f"cuda_backend_tests_{suite}",
71+
"type": "OSS backend test",
72+
"backend": "cuda",
73+
},
74+
"metric": {
75+
"name": metric_name,
76+
"benchmark_values": [value],
77+
"target_value": 0,
78+
"extra_info": {},
79+
},
80+
"runners": [{"name": runner_name, "type": "linux"}],
81+
}
82+
83+
84+
def generate_v3_records(
85+
counts: dict,
86+
suite: str,
87+
flow: str,
88+
git_sha: str,
89+
workflow_run_id: str,
90+
workflow_run_url: str,
91+
runner_name: str,
92+
) -> list:
93+
"""Generate v3 format records for all metrics."""
94+
total_excluding_skips = counts["passes"] + counts["fails"]
95+
pass_rate = (
96+
(counts["passes"] / total_excluding_skips * 100)
97+
if total_excluding_skips > 0
98+
else 0.0
99+
)
100+
101+
common = dict(
102+
suite=suite,
103+
flow=flow,
104+
git_sha=git_sha,
105+
workflow_run_id=workflow_run_id,
106+
workflow_run_url=workflow_run_url,
107+
runner_name=runner_name,
108+
)
109+
110+
return [
111+
build_v3_record("pass_rate(%)", pass_rate, **common),
112+
build_v3_record("total_pass", counts["passes"], **common),
113+
build_v3_record("total_fail", counts["fails"], **common),
114+
build_v3_record("total_skip", counts["skips"], **common),
115+
]
116+
117+
118+
def main():
119+
parser = argparse.ArgumentParser(
120+
description="Generate v3 format benchmark results from test report JSON"
121+
)
122+
parser.add_argument(
123+
"--report-json",
124+
required=True,
125+
help="Path to the test report JSON file",
126+
)
127+
parser.add_argument(
128+
"--suite",
129+
required=True,
130+
help="Test suite name (e.g. models, operators)",
131+
)
132+
parser.add_argument(
133+
"--flow",
134+
required=True,
135+
help="Test flow name (e.g. cuda)",
136+
)
137+
parser.add_argument(
138+
"--git-sha",
139+
required=True,
140+
help="Git commit SHA",
141+
)
142+
parser.add_argument(
143+
"--workflow-run-id",
144+
required=True,
145+
help="GitHub workflow run ID",
146+
)
147+
parser.add_argument(
148+
"--workflow-run-url",
149+
default="",
150+
help="GitHub workflow run URL",
151+
)
152+
parser.add_argument(
153+
"--runner-name",
154+
default="linux.g5.4xlarge.nvidia.gpu",
155+
help="CI runner name",
156+
)
157+
parser.add_argument(
158+
"--output-v3",
159+
required=True,
160+
help="Path to write v3 format JSON output",
161+
)
162+
args = parser.parse_args()
163+
164+
counts = parse_test_report(args.report_json)
165+
166+
total_excluding_skips = counts["passes"] + counts["fails"]
167+
pass_rate = (
168+
(counts["passes"] / total_excluding_skips * 100)
169+
if total_excluding_skips > 0
170+
else 0.0
171+
)
172+
173+
print(f"Suite: {args.suite}")
174+
print(f" Pass: {counts['passes']}, Fail: {counts['fails']}, Skip: {counts['skips']}")
175+
print(f" Pass rate: {pass_rate:.2f}%")
176+
177+
records = generate_v3_records(
178+
counts=counts,
179+
suite=args.suite,
180+
flow=args.flow,
181+
git_sha=args.git_sha,
182+
workflow_run_id=args.workflow_run_id,
183+
workflow_run_url=args.workflow_run_url,
184+
runner_name=args.runner_name,
185+
)
186+
187+
with open(args.output_v3, "w") as f:
188+
json.dump(records, f, indent=2)
189+
190+
print(f"Wrote {len(records)} v3 records to {args.output_v3}")
191+
192+
193+
if __name__ == "__main__":
194+
main()

.github/workflows/test-backend-cuda.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ on:
1212
paths:
1313
- .github/workflows/test-backend-cuda.yml
1414
- .github/workflows/_test_backend.yml
15+
- .ci/scripts/generate_test_pass_rate_in_v3_format.py
1516
workflow_dispatch:
1617

1718
concurrency:
@@ -28,3 +29,62 @@ jobs:
2829
timeout: 120
2930
run-linux: true
3031
runner-linux: linux.g5.4xlarge.nvidia.gpu
32+
33+
upload-test-results:
34+
needs: test-cuda
35+
if: always()
36+
runs-on: ubuntu-22.04
37+
environment: upload-benchmark-results
38+
permissions:
39+
id-token: write
40+
contents: read
41+
steps:
42+
- uses: actions/checkout@v3
43+
with:
44+
submodules: false
45+
46+
- name: Setup Python
47+
uses: actions/setup-python@v4
48+
with:
49+
python-version: '3.10'
50+
51+
- name: Download test report artifacts
52+
uses: actions/download-artifact@v4
53+
with:
54+
pattern: test-report-*
55+
path: downloaded-reports/
56+
57+
- name: Generate v3 benchmark results
58+
shell: bash
59+
run: |
60+
set -eux
61+
mkdir -p benchmark-results/v3
62+
63+
for SUITE in models operators; do
64+
REPORT="downloaded-reports/test-report-cuda-${SUITE}/test-report-cuda-${SUITE}.json"
65+
if [ -f "$REPORT" ]; then
66+
echo "Processing report for suite: $SUITE"
67+
python .ci/scripts/generate_test_pass_rate_in_v3_format.py \
68+
--report-json "$REPORT" \
69+
--suite "$SUITE" \
70+
--flow cuda \
71+
--git-sha "${{ github.sha }}" \
72+
--workflow-run-id "${{ github.run_id }}" \
73+
--workflow-run-url "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
74+
--runner-name "linux.g5.4xlarge.nvidia.gpu" \
75+
--output-v3 "benchmark-results/v3/cuda-test-${SUITE}.json"
76+
else
77+
echo "Warning: Report not found for suite $SUITE at $REPORT"
78+
fi
79+
done
80+
81+
echo "V3 results prepared:"
82+
ls -lah benchmark-results/v3/ || echo "No v3 results generated"
83+
84+
- name: Upload test pass rate to dashboard
85+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
86+
with:
87+
benchmark-results-dir: benchmark-results/v3
88+
dry-run: false
89+
schema-version: v3
90+
github-token: ${{ secrets.GITHUB_TOKEN }}

0 commit comments

Comments
 (0)