Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 119 additions & 18 deletions setup/js/handle_agent_failure.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ const fs = require("fs");
const path = require("path");

const DEFAULT_ACTION_FAILURE_ISSUE_EXPIRES_HOURS = 24 * 7;
const FAILURE_ISSUE_DEDUP_WINDOW_HOURS = 24;
const FAILURE_ISSUE_CATEGORY_DAILY_CAP = 5;
const FAILURE_ISSUE_WINDOW_MS = FAILURE_ISSUE_DEDUP_WINDOW_HOURS * 60 * 60 * 1000;
const DEFAULT_OTEL_JSONL_PATH = "/tmp/gh-aw/otel.jsonl";
// Engine-side 429/rate-limit signatures:
// - HTTP 429 accompanied by "too many requests"/"rate limit" phrasing
Expand Down Expand Up @@ -201,8 +204,6 @@ function generateFailureMatchMarker(options) {
* @param {string} body - Existing issue body
* @param {Object} options - Match criteria
* @param {string} options.workflowId - Workflow identifier
* @param {string} options.branch - Triggering branch
* @param {number|undefined} options.pullRequestNumber - Triggering pull request number
* @param {string[]} options.failureCategories - Sorted failure categories
* @returns {boolean} True when the issue body matches and is not expired
*/
Expand All @@ -229,18 +230,24 @@ function isReusableFailureIssue(body, options) {
if ((failureMarker.workflow_id || "") !== options.workflowId) {
return false;
}
if ((failureMarker.branch || "") !== (options.branch || "")) {
return false;
}

const expectedPullRequest = options.pullRequestNumber ? String(options.pullRequestNumber) : "";
if ((failureMarker.pull_request || "") !== expectedPullRequest) {
return false;
}

return (failureMarker.failure_categories || "") === options.failureCategories.join("|");
}

/**
* Determine whether an issue timestamp falls within the active dedup/throttle window.
* @param {string|undefined} createdAt - Issue created_at timestamp
* @param {number} windowStartMs - Inclusive lower bound as Unix ms
* @returns {boolean} True when timestamp is missing or within the window
*/
function isIssueWithinWindow(createdAt, windowStartMs) {
if (!createdAt) {
return true;
}
const createdMs = Date.parse(createdAt);
return Number.isFinite(createdMs) && createdMs >= windowStartMs;
}

/**
* Escape a GitHub search phrase for safe inclusion inside double quotes.
* GitHub search phrases are wrapped in double quotes, so embedded backslashes and
Expand All @@ -262,15 +269,15 @@ function escapeGitHubSearchPhrase(value) {
* @param {string} options.owner - Repository owner
* @param {string} options.repo - Repository name
* @param {string} options.workflowId - Workflow identifier
* @param {string} options.branch - Triggering branch
* @param {number|undefined} options.pullRequestNumber - Triggering pull request number
* @param {string[]} options.failureCategories - Sorted failure categories
* @returns {Promise<{number: number, html_url: string} | null>} Matching issue or null
*/
async function findExistingFailureIssue(options) {
const { owner, repo, workflowId, branch, pullRequestNumber, failureCategories } = options;
const { owner, repo, workflowId, failureCategories } = options;
const windowStartMs = Date.now() - FAILURE_ISSUE_WINDOW_MS;
const since = new Date(windowStartMs).toISOString().slice(0, 19) + "Z";
const escapedWorkflowId = escapeGitHubSearchPhrase(workflowId);
const searchQuery = `repo:${owner}/${repo} is:issue is:open label:agentic-workflows ` + `"gh-aw-agentic-workflow:" "workflow_id: ${escapedWorkflowId}" in:body`;
const searchQuery = `repo:${owner}/${repo} is:issue is:open label:agentic-workflows created:>=${since} ` + `"gh-aw-agentic-workflow:" "workflow_id: ${escapedWorkflowId}" in:body`;
const perPage = 100;
for (let page = 1; ; page += 1) {
const searchResult = await github.rest.search.issuesAndPullRequests({
Expand All @@ -280,6 +287,9 @@ async function findExistingFailureIssue(options) {
});

for (const item of searchResult.data.items) {
if (!isIssueWithinWindow(item.created_at, windowStartMs)) {
continue;
}
let body = typeof item.body === "string" ? item.body : "";
if (!body) {
const issueResult = await github.rest.issues.get({
Expand All @@ -293,8 +303,6 @@ async function findExistingFailureIssue(options) {
if (
isReusableFailureIssue(body, {
workflowId,
branch,
pullRequestNumber,
failureCategories,
})
) {
Expand All @@ -313,6 +321,90 @@ async function findExistingFailureIssue(options) {
return null;
}

/**
* Count recently created failure issues that include the specified failure category.
* @param {Object} options - Query options
* @param {string} options.owner - Repository owner
* @param {string} options.repo - Repository name
* @param {string} options.category - Failure category name
* @returns {Promise<number>} Number of matching issues created within the dedup window
*/
async function countRecentFailureIssuesByCategory(options) {
const { owner, repo, category } = options;
const windowStartMs = Date.now() - FAILURE_ISSUE_WINDOW_MS;
const since = new Date(windowStartMs).toISOString().slice(0, 19) + "Z";
const escapedCategory = escapeGitHubSearchPhrase(category);
const searchQuery = `repo:${owner}/${repo} is:issue is:open label:agentic-workflows created:>=${since} ` + `"gh-aw-failure-issue:" "failure_categories:" "${escapedCategory}" in:body`;
const perPage = 100;
let count = 0;

for (let page = 1; ; page += 1) {
const searchResult = await github.rest.search.issuesAndPullRequests({
q: searchQuery,
per_page: perPage,
page,
});

for (const item of searchResult.data.items) {
if (!isIssueWithinWindow(item.created_at, windowStartMs)) {
continue;
}

let body = typeof item.body === "string" ? item.body : "";
if (!body) {
const issueResult = await github.rest.issues.get({
owner,
repo,
issue_number: item.number,
});
body = issueResult.data.body || "";
}

const marker = parseHTMLCommentMetadata(body, "gh-aw-failure-issue");
if (!marker) {
continue;
}
const categories = (marker.failure_categories || "")
.split("|")
.map(part => part.trim())
.filter(Boolean);
if (categories.includes(category)) {
count += 1;
}
}

if (searchResult.data.items.length < perPage) {
break;
}
}

return count;
}

/**
* Find categories that hit the daily new-issue cap.
* @param {Object} options - Query options
* @param {string} options.owner - Repository owner
* @param {string} options.repo - Repository name
* @param {string[]} options.failureCategories - Categories for the current failure
* @returns {Promise<Array<{category: string, count: number}>>}
*/
async function getCappedFailureCategories(options) {
const { owner, repo, failureCategories } = options;
const uniqueCategories = [...new Set(failureCategories)];
/** @type {Array<{category: string, count: number}>} */
const capped = [];

for (const category of uniqueCategories) {
const count = await countRecentFailureIssuesByCategory({ owner, repo, category });
if (count >= FAILURE_ISSUE_CATEGORY_DAILY_CAP) {
capped.push({ category, count });
}
}

return capped;
}

/**
* Search for or create the parent issue for all agentic workflow failures
* @param {number|null} previousParentNumber - Previous parent issue number if creating due to limit
Expand Down Expand Up @@ -2260,8 +2352,6 @@ async function main() {
owner,
repo,
workflowId: workflowID,
branch: currentBranch,
pullRequestNumber: pullRequest?.number,
failureCategories,
});

Expand Down Expand Up @@ -2449,6 +2539,17 @@ async function main() {
} else {
// No existing issue, create a new one
core.info("No existing issue found, creating a new one");
const cappedCategories = await getCappedFailureCategories({
owner,
repo,
failureCategories,
});
if (cappedCategories.length > 0) {
const summary = cappedCategories.map(({ category, count }) => `${category} (${count}/${FAILURE_ISSUE_CATEGORY_DAILY_CAP})`).join(", ");
core.warning(`Daily per-category issue cap reached for ${summary}.`);
core.info(`Summarize-and-stop: skipping new issue creation because category cap was reached in the last ${FAILURE_ISSUE_DEDUP_WINDOW_HOURS}h.`);
return;
}

// Read issue template
const issueTemplatePath = getPromptPath("agent_failure_issue.md");
Expand Down
13 changes: 5 additions & 8 deletions setup/js/upload_artifact.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -291,12 +291,13 @@ function autoCopyToStaging(reqPath) {
* @returns {{ files: string[], error: string|null }}
*/
function resolveFiles(request, allowedPaths, defaultInclude, defaultExclude) {
const hasMutuallyExclusive = ("path" in request ? 1 : 0) + ("filters" in request ? 1 : 0);
if (hasMutuallyExclusive !== 1) {
const hasPath = "path" in request;
const hasFilters = "filters" in request;
if (hasPath === hasFilters) {
return { files: [], error: "exactly one of 'path' or 'filters' must be present" };
}

/** @type {string[]} candidateRelPaths */
/** @type {string[]} */
let candidateRelPaths;

if ("path" in request) {
Expand Down Expand Up @@ -354,11 +355,7 @@ function resolveFiles(request, allowedPaths, defaultInclude, defaultExclude) {
const include = /** @type {string[]} */ requestFilters.include || defaultInclude;
const exclude = /** @type {string[]} */ requestFilters.exclude || defaultExclude;

candidateRelPaths = allFiles.filter(f => {
if (include.length > 0 && !matchesAnyPattern(f, include)) return false;
if (exclude.length > 0 && matchesAnyPattern(f, exclude)) return false;
return true;
});
candidateRelPaths = allFiles.filter(f => (include.length === 0 || matchesAnyPattern(f, include)) && (exclude.length === 0 || !matchesAnyPattern(f, exclude)));
}

// Apply allowed-paths policy filter.
Expand Down
20 changes: 20 additions & 0 deletions setup/sh/check_cache_memory_git_integrity.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
set +o histexpand

set -euo pipefail

CACHE_DIR="${GH_AW_CACHE_DIR:-/tmp/gh-aw/cache-memory}"

if [ -d "$CACHE_DIR/.git" ]; then
if ! git -C "$CACHE_DIR" fsck --no-dangling >/dev/null 2>&1; then
echo "::warning title=cache-memory git integrity::Detected git corruption; reseeding cache-memory git repository"
rm -rf "$CACHE_DIR/.git" || true
git -C "$CACHE_DIR" init >/dev/null 2>&1 || true
git -C "$CACHE_DIR" \
-c user.name="github-actions[bot]" \
-c user.email="41898282+github-actions[bot]@users.noreply.github.com" \
commit --allow-empty -m "chore(cache-memory): reseed after corruption" >/dev/null 2>&1 || true
else
git -C "$CACHE_DIR" gc --prune=now >/dev/null 2>&1 || true
fi
fi
106 changes: 106 additions & 0 deletions setup/sh/check_cache_memory_git_integrity_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env bash
set +o histexpand

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SCRIPT="${SCRIPT_DIR}/check_cache_memory_git_integrity.sh"

TESTS_PASSED=0
TESTS_FAILED=0
WORKSPACE="$(mktemp -d)"

cleanup() {
rm -rf "${WORKSPACE}"
}
trap cleanup EXIT

assert() {
local name="$1"
local condition="$2"
if eval "${condition}" 2>/dev/null; then
echo " ✓ ${name}"
TESTS_PASSED=$((TESTS_PASSED + 1))
else
echo " ✗ ${name}"
TESTS_FAILED=$((TESTS_FAILED + 1))
fi
}

make_repo() {
local dir="$1"
mkdir -p "${dir}"
pushd "${dir}" >/dev/null
git init -q
git config user.email "test@example.com"
git config user.name "test"
echo "data" > data.json
git add data.json
git commit -m "init" -q
popd >/dev/null
}

run_script() {
local dir="$1"
GH_AW_CACHE_DIR="${dir}" bash "${SCRIPT}" 2>&1 || true
}

echo "Testing check_cache_memory_git_integrity.sh"
echo ""

echo "Test 1: Script syntax is valid"
assert "script passes bash -n" "bash -n '${SCRIPT}'"
echo ""

echo "Test 2: Missing .git repo is a no-op"
D="${WORKSPACE}/test2"
mkdir -p "${D}"
echo "data" > "${D}/data.json"
set +e
OUTPUT="$(run_script "${D}")"
EXIT_CODE=$?
set -e
assert "script exits successfully" "[ '${EXIT_CODE}' -eq 0 ]"
assert "warning not emitted" "! printf '%s' \"${OUTPUT}\" | grep -q 'cache-memory git integrity'"
assert "non-git files preserved" "[ -f '${D}/data.json' ]"
echo ""

echo "Test 3: Healthy git repo remains healthy"
D="${WORKSPACE}/test3"
make_repo "${D}"
set +e
OUTPUT="$(run_script "${D}")"
EXIT_CODE=$?
set -e
assert "script exits successfully" "[ '${EXIT_CODE}' -eq 0 ]"
assert "no corruption warning for healthy repo" "! printf '%s' \"${OUTPUT}\" | grep -q 'Detected git corruption'"
assert "git metadata still exists" "[ -d '${D}/.git' ]"
assert "git repo still readable" "git -C '${D}' rev-parse --verify HEAD >/dev/null 2>&1"
echo ""

echo "Test 4: Corrupted repo is reseeded"
D="${WORKSPACE}/test4"
make_repo "${D}"
pushd "${D}" >/dev/null
TREE_OBJ="$(git rev-parse HEAD^{tree})"
TREE_OBJ_PATH=".git/objects/${TREE_OBJ:0:2}/${TREE_OBJ:2}"
rm -f "${TREE_OBJ_PATH}"
popd >/dev/null
set +e
OUTPUT="$(run_script "${D}")"
EXIT_CODE=$?
set -e
assert "script exits successfully after corruption" "[ '${EXIT_CODE}' -eq 0 ]"
assert "corruption warning emitted" "printf '%s' \"${OUTPUT}\" | grep -q 'Detected git corruption; reseeding cache-memory git repository'"
assert "git metadata recreated" "[ -d '${D}/.git' ]"
assert "new empty commit exists" "git -C '${D}' rev-parse --verify HEAD >/dev/null 2>&1"
echo ""

echo "Tests passed: ${TESTS_PASSED}"
echo "Tests failed: ${TESTS_FAILED}"

if [ "${TESTS_FAILED}" -gt 0 ]; then
exit 1
fi

echo "✓ All tests passed!"
Loading
Loading