Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions actions/setup/js/emit_outcome_spans.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ const {
const AW_INFO_PATH = "/tmp/gh-aw/aw_info.json";
const EVALUATIONS_PATH = "/tmp/gh-aw/outcome-evaluations.jsonl";
const SUMMARY_PATH = "/tmp/gh-aw/outcome-summary.json";
const OTLP_STATUS_UNSET = 0;
const OTLP_STATUS_OK = 1;
Comment thread
pelikhan marked this conversation as resolved.
const OTLP_STATUS_ERROR = 2;

/**
* Read a JSONL file, returning an array of parsed objects.
Expand Down Expand Up @@ -136,6 +139,11 @@ async function main() {
for (const eval_ of evaluations) {
const type = typeof eval_.type === "string" ? eval_.type : "";
const result = typeof eval_.result === "string" ? eval_.result : "unknown";
// Fall back to the legacy result field so older JSONL artifacts still render
// useful spans while newer artifacts carry explicit normalized fields.
const outcomeStatus = typeof eval_.outcome_status === "string" ? eval_.outcome_status : result;
const evidenceStrength = typeof eval_.evidence_strength === "string" ? eval_.evidence_strength : "weak";
const signal = typeof eval_.signal === "string" ? eval_.signal : "";
Comment thread
mnkiefer marked this conversation as resolved.
const detail = typeof eval_.detail === "string" ? eval_.detail : "";
const workflow = typeof eval_.workflow === "string" ? eval_.workflow : "";
const sourceRunId = typeof eval_.run_id === "number" ? eval_.run_id : 0;
Expand All @@ -159,13 +167,16 @@ async function main() {
buildAttr("gh-aw.exporter.name", "outcome-collector"),
buildAttr("gh-aw.outcome.type", type),
buildAttr("gh-aw.outcome.result", result),
buildAttr("gh-aw.outcome.outcome_status", outcomeStatus),
buildAttr("gh-aw.outcome.evidence_strength", evidenceStrength),
buildAttr("gh-aw.outcome.workflow", workflow),
buildAttr("gh-aw.outcome.run_id", sourceRunId),
buildAttr("gh-aw.outcome.repo", repo),
];

if (url) attributes.push(buildAttr("gh-aw.outcome.url", url));
if (detail) attributes.push(buildAttr("gh-aw.outcome.detail", detail));
if (signal) attributes.push(buildAttr("gh-aw.outcome.signal", signal));
if (timestamp) attributes.push(buildAttr("gh-aw.outcome.created_at", timestamp));
if (event) attributes.push(buildAttr("gh-aw.outcome.event", event));
if (resolutionSec !== null) attributes.push(buildAttr("gh-aw.outcome.resolution_sec", resolutionSec));
Expand All @@ -180,8 +191,8 @@ async function main() {
if (comments !== null) attributes.push(buildAttr("gh-aw.outcome.comments", comments));
if (zeroTouch) attributes.push(buildAttr("gh-aw.outcome.zero_touch", true));

// Map result to OTLP status: accepted=OK, rejected=ERROR, noop=UNSET, pending/ignored=UNSET
const statusCode = result === "rejected" ? 2 : result === "accepted" ? 1 : 0;
// Map normalized outcome_status to OTLP status: accepted=OK, rejected=ERROR, all others=UNSET
const statusCode = outcomeStatus === "rejected" ? OTLP_STATUS_ERROR : outcomeStatus === "accepted" ? OTLP_STATUS_OK : OTLP_STATUS_UNSET;

itemSpans.push(
buildOTLPSpan({
Expand Down Expand Up @@ -213,6 +224,10 @@ async function main() {
buildAttr("gh-aw.outcome.ignored", getSummaryNumber("ignored", 0)),
buildAttr("gh-aw.outcome.pending", getSummaryNumber("pending", 0)),
buildAttr("gh-aw.outcome.noop", getSummaryNumber("noop", 0)),
buildAttr("gh-aw.outcome.accepted_strong", getSummaryNumber("accepted_strong", 0)),
buildAttr("gh-aw.outcome.accepted_medium", getSummaryNumber("accepted_medium", 0)),
buildAttr("gh-aw.outcome.accepted_weak", getSummaryNumber("accepted_weak", 0)),
buildAttr("gh-aw.outcome.fallback_exists_only_count", getSummaryNumber("fallback_exists_only_count", 0)),
buildAttr("gh-aw.outcome.acceptance_rate", getSummaryNumber("acceptance_rate", 0)),
buildAttr("gh-aw.outcome.waste_rate", getSummaryNumber("waste_rate", 0)),
buildAttr("gh-aw.outcome.noop_rate", getSummaryNumber("noop_rate", 0)),
Expand Down
24 changes: 21 additions & 3 deletions actions/setup/js/emit_outcome_spans.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,10 @@ describe("emit_outcome_spans.cjs", () => {
ignored: 0,
pending: 0,
noop: 0,
accepted_strong: 1,
accepted_medium: 0,
accepted_weak: 0,
fallback_exists_only_count: 1,
noop_rate: 0,
zero_touch: 1,
zero_touch_rate: 1,
Expand All @@ -197,7 +201,10 @@ describe("emit_outcome_spans.cjs", () => {
JSON.stringify({
type: "issue",
result: "accepted",
detail: "created item",
outcome_status: "accepted",
evidence_strength: "strong",
signal: "merged",
detail: "merged",
workflow: "triage",
run_id: 101,
url: "https://github.com/github/gh-aw/issues/1",
Expand All @@ -216,6 +223,9 @@ describe("emit_outcome_spans.cjs", () => {
JSON.stringify({
type: "comment",
result: "rejected",
outcome_status: "unknown",
evidence_strength: "weak",
signal: "target_exists_only",
workflow: "triage",
run_id: 102,
repo: "github/gh-aw",
Expand Down Expand Up @@ -271,16 +281,21 @@ describe("emit_outcome_spans.cjs", () => {
expect.objectContaining({
spanName: "gh-aw.outcome.evaluation",
parentSpanId: summarySpan.spanId,
statusCode: 2,
statusCode: 0,
})
);

expect(summarySpan.attributes).toContainEqual({ key: "gh-aw.exporter.name", value: "outcome-collector" });
expect(summarySpan.attributes).toContainEqual({ key: "gh-aw.outcome.date", value: "2026-05-13" });
expect(summarySpan.attributes).toContainEqual({ key: "gh-aw.outcome.zero_touch_count", value: 1 });
expect(summarySpan.attributes).toContainEqual({ key: "gh-aw.outcome.accepted_strong", value: 1 });
expect(summarySpan.attributes).toContainEqual({ key: "gh-aw.outcome.fallback_exists_only_count", value: 1 });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.exporter.name", value: "outcome-collector" });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.url", value: "https://github.com/github/gh-aw/issues/1" });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.detail", value: "created item" });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.detail", value: "merged" });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.outcome_status", value: "accepted" });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.evidence_strength", value: "strong" });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.signal", value: "merged" });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.created_at", value: "2026-05-13T09:00:00Z" });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.review_comments", value: 0 });
expect(spans[1].attributes).toContainEqual({ key: "gh-aw.outcome.changed_files", value: 3 });
Expand All @@ -300,6 +315,9 @@ describe("emit_outcome_spans.cjs", () => {
expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.reactions_negative")).toBeUndefined();
expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.comments")).toBeUndefined();
expect(spans[2].attributes.find(attr => attr.key === "gh-aw.outcome.zero_touch")).toBeUndefined();
expect(spans[2].attributes).toContainEqual({ key: "gh-aw.outcome.outcome_status", value: "unknown" });
expect(spans[2].attributes).toContainEqual({ key: "gh-aw.outcome.evidence_strength", value: "weak" });
expect(spans[2].attributes).toContainEqual({ key: "gh-aw.outcome.signal", value: "target_exists_only" });

expect(mockAppendToOTLPJSONL).toHaveBeenCalledOnce();
expect(mockSendOTLPToAllEndpoints).not.toHaveBeenCalled();
Expand Down
92 changes: 86 additions & 6 deletions actions/setup/js/evaluate_outcomes.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ function secondsBetween(from, to) {
/**
* @typedef {object} EvalResult
* @property {string} result
* @property {"accepted"|"rejected"|"pending"|"ignored"|"skipped"|"unknown"} outcome_status
* @property {"strong"|"medium"|"weak"} evidence_strength
* @property {string} signal
* @property {string} detail
* @property {number | null} resolution_sec
* @property {number | null} pending_age_sec
Expand All @@ -170,6 +173,46 @@ function secondsBetween(from, to) {
* @property {boolean} zero_touch
*/

/**
* Normalize legacy result/detail pairs into the shared outcome model.
* @param {string} result
* @param {string} detail
* @returns {{ outcome_status: "accepted"|"rejected"|"pending"|"ignored"|"skipped"|"unknown", evidence_strength: "strong"|"medium"|"weak", signal: string }}
*/
function normalizeOutcome(result, detail) {
const normalizedDetail = String(detail || "")
.toLowerCase()
.trim();

if (result === "noop") {
return { outcome_status: "skipped", evidence_strength: "weak", signal: "noop" };
}
if (normalizedDetail === "object still exists") {
return { outcome_status: "unknown", evidence_strength: "weak", signal: "target_exists_only" };
}
if (result === "accepted" && normalizedDetail === "merged") {
return { outcome_status: "accepted", evidence_strength: "strong", signal: "merged" };
}
if (result === "rejected" && normalizedDetail === "closed") {
return { outcome_status: "rejected", evidence_strength: "strong", signal: "closed" };
}
if (result === "pending" && normalizedDetail === "open") {
return { outcome_status: "pending", evidence_strength: "medium", signal: "open" };
}
switch (result) {
case "accepted":
return { outcome_status: "accepted", evidence_strength: "medium", signal: "acted_on" };
case "rejected":
return { outcome_status: "rejected", evidence_strength: "medium", signal: "rejected" };
case "ignored":
return { outcome_status: "ignored", evidence_strength: "medium", signal: "ignored" };
case "pending":
return { outcome_status: "pending", evidence_strength: "medium", signal: "pending" };
default:
return { outcome_status: "unknown", evidence_strength: "weak", signal: "unknown" };
}
}

/**
* Evaluate a single safe-output item against the GitHub API.
* @param {object} item
Expand All @@ -184,6 +227,9 @@ function evaluateItem(item, defaultRepo) {
/** @type {EvalResult} */
const out = {
result: "pending",
outcome_status: "pending",
evidence_strength: "medium",
signal: "pending",
Comment thread
mnkiefer marked this conversation as resolved.
detail: "",
resolution_sec: null,
pending_age_sec: null,
Expand Down Expand Up @@ -291,8 +337,9 @@ function evaluateItem(item, defaultRepo) {
}

// Comments, labels, etc. — if URL exists, the item was created
out.result = "accepted";
out.detail = "object exists";
out.result = "unknown";
out.detail = "object still exists";
Object.assign(out, normalizeOutcome(out.result, out.detail));
return out;
}

Expand Down Expand Up @@ -349,11 +396,15 @@ function main() {
let checked = 0;
let accepted = 0;
let rejected = 0;
const ignored = 0;
let ignored = 0;
let pending = 0;
let total = 0;
let noop = 0;
let zeroTouchCount = 0;
let acceptedStrong = 0;
let acceptedMedium = 0;
let acceptedWeak = 0;
let fallbackExistsOnlyCount = 0;
/** @type {number[]} */
const resolutionTimes = [];

Expand Down Expand Up @@ -398,13 +449,17 @@ function main() {

// Write noop entries
for (const n of noops) {
const normalized = normalizeOutcome("noop", n.type || "");
fs.appendFileSync(
EVAL_JSONL,
JSON.stringify({
type: n.type,
url: "",
repo,
result: "noop",
outcome_status: normalized.outcome_status,
evidence_strength: normalized.evidence_strength,
signal: normalized.signal,
detail: n.type,
workflow,
run_id: runId,
Expand All @@ -430,21 +485,39 @@ function main() {
// Evaluate each actionable item
for (const item of actionable) {
const evalResult = evaluateItem(item, repo);
const normalized = normalizeOutcome(evalResult.result, evalResult.detail);

switch (evalResult.result) {
switch (normalized.outcome_status) {
case "accepted":
accepted++;
switch (normalized.evidence_strength) {
case "strong":
acceptedStrong++;
break;
case "medium":
acceptedMedium++;
break;
case "weak":
acceptedWeak++;
break;
}
if (evalResult.zero_touch === true) {
zeroTouchCount++;
}
break;
case "rejected":
rejected++;
break;
default:
case "ignored":
ignored++;
break;
case "pending":
pending++;
break;
}
if (normalized.signal === "target_exists_only") {
fallbackExistsOnlyCount++;
}
if (typeof evalResult.resolution_sec === "number" && evalResult.resolution_sec > 0) {
resolutionTimes.push(evalResult.resolution_sec);
}
Expand All @@ -456,6 +529,9 @@ function main() {
url: item.url || "",
repo: item.repo || repo,
result: evalResult.result,
outcome_status: normalized.outcome_status,
evidence_strength: normalized.evidence_strength,
signal: normalized.signal,
detail: evalResult.detail,
workflow,
run_id: runId,
Expand Down Expand Up @@ -511,6 +587,10 @@ function main() {
ignored,
pending,
noop,
accepted_strong: acceptedStrong,
accepted_medium: acceptedMedium,
accepted_weak: acceptedWeak,
fallback_exists_only_count: fallbackExistsOnlyCount,
acceptance_rate: Math.round(acceptanceRate * 10000) / 10000,
waste_rate: Math.round(wasteRate * 10000) / 10000,
noop_rate: Math.round(noopRate * 10000) / 10000,
Expand All @@ -534,4 +614,4 @@ if (require.main === module) {
main();
}

module.exports = { main, evaluateItem, readJSONL, secondsBetween, isoToEpoch };
module.exports = { main, evaluateItem, normalizeOutcome, readJSONL, secondsBetween, isoToEpoch };
23 changes: 23 additions & 0 deletions actions/setup/js/evaluate_outcomes.test.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { describe, expect, it } from "vitest";
import { createRequire } from "module";

const req = createRequire(import.meta.url);
const { normalizeOutcome } = req("./evaluate_outcomes.cjs");

describe("evaluate_outcomes.cjs", () => {
it("maps existence-only fallback to weak unknown evidence", () => {
expect(normalizeOutcome("unknown", "object still exists")).toEqual({
outcome_status: "unknown",
evidence_strength: "weak",
signal: "target_exists_only",
});
});

it("maps merged outcomes to strong accepted evidence", () => {
expect(normalizeOutcome("accepted", "merged")).toEqual({
outcome_status: "accepted",
evidence_strength: "strong",
signal: "merged",
});
});
});
Loading
Loading