Skip to content

Commit bbd3a83

Browse files
committed
Tighten ContextBench score artifact output
1 parent 5349e5d commit bbd3a83

1 file changed

Lines changed: 24 additions & 2 deletions

File tree

.github/workflows/contextbench-five-lane-score.yml

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,12 @@ jobs:
8888
path: ${{ env.EXTERNAL_READINESS_ROOT }}/ripgrep
8989
- name: Score five ready lane selections
9090
shell: bash
91-
run: node scripts/contextbench-score-five-lane-artifact-selections.mjs
91+
run: |
92+
set +e
93+
node scripts/contextbench-score-five-lane-artifact-selections.mjs > "$ROOT/logs/score-five-lane.log" 2>&1
94+
status=$?
95+
tail -n 120 "$ROOT/logs/score-five-lane.log"
96+
exit "$status"
9297
- name: Build publishable pilot report
9398
shell: bash
9499
run: |
@@ -108,5 +113,22 @@ jobs:
108113
uses: actions/upload-artifact@v4
109114
with:
110115
name: contextbench-five-lane-score
111-
path: /tmp/contextbench-five-lane-score
116+
path: |
117+
/tmp/contextbench-five-lane-score/summary.json
118+
/tmp/contextbench-five-lane-score/publishable-summary.json
119+
/tmp/contextbench-five-lane-score/publishable-validation.json
120+
/tmp/contextbench-five-lane-score/humanized-summary.md
121+
/tmp/contextbench-five-lane-score/task-payloads.json
122+
/tmp/contextbench-five-lane-score/logs/**
123+
/tmp/contextbench-five-lane-score/lane-score/summary.json
124+
/tmp/contextbench-five-lane-score/lane-score/gold.json
125+
/tmp/contextbench-five-lane-score/lane-score/gold-command.json
126+
/tmp/contextbench-five-lane-score/lane-score/selections.json
127+
/tmp/contextbench-five-lane-score/lane-score/*/selection.json
128+
/tmp/contextbench-five-lane-score/lane-score/*/prediction.json
129+
/tmp/contextbench-five-lane-score/lane-score/*/official-score.jsonl
130+
/tmp/contextbench-five-lane-score/lane-score/*/evaluator-command.json
131+
/tmp/contextbench-five-lane-score/external-readiness/**/*.json
132+
/tmp/contextbench-five-lane-score/external-readiness/**/*.jsonl
133+
/tmp/contextbench-five-lane-score/external-readiness/**/*.md
112134
retention-days: 14

0 commit comments

Comments
 (0)