Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion packages/agent/src/pr-url-detector.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { describe, expect, it } from "vitest";
import { findPrUrl, wasCreatedRecently } from "./pr-url-detector";
import {
findPrUrl,
PR_CREATION_RECENCY_MS,
wasCreatedRecently,
} from "./pr-url-detector";

const PR_URL = "https://github.com/PostHog/posthog.com/pull/17764";

Expand Down Expand Up @@ -59,4 +63,13 @@ describe("wasCreatedRecently", () => {
it("fails closed on an unparseable createdAt", () => {
expect(wasCreatedRecently("not-a-date", now, maxAge)).toBe(false);
});

it("attributes a PR created earlier in a long multi-turn run with the default window", () => {
// The notification path gates on attribution, so the default window must be
// wide enough that a PR created an hour ago (and only re-surfacing now) is
// still attributed. The old 15-minute default dropped these silently.
const oneHourAgo = new Date(now - 60 * 60 * 1000).toISOString();
expect(wasCreatedRecently(oneHourAgo, now)).toBe(true);
expect(PR_CREATION_RECENCY_MS).toBeGreaterThan(60 * 60 * 1000);
});
});
10 changes: 9 additions & 1 deletion packages/agent/src/pr-url-detector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@ const PR_URL_REGEX = /https:\/\/github\.com\/[^/\s"]+\/[^/\s"]+\/pull\/\d+/;

// A fixed window (not "since run start") so a PR the agent merely views on a
// long run is too old to be mistaken for one it just created.
export const PR_CREATION_RECENCY_MS = 15 * 60 * 1000;
//
// Sized to comfortably cover a real run rather than a single tool call: a PR is
// often created in an early turn and only surfaces again (a summary line, a
// follow-up turn, a coalesced/late terminal flush) much later in the same
// multi-turn run. The old 15-minute window failed closed on exactly those runs,
// so the PR was never attributed and the Slack inbox notification — which is
// gated on attribution — silently never fired. A day is wide enough for any
// realistic run while still excluding genuinely stale PRs the agent only views.
export const PR_CREATION_RECENCY_MS = 24 * 60 * 60 * 1000;

export function findPrUrl(text: string): string | null {
return text.match(PR_URL_REGEX)?.[0] ?? null;
Expand Down
131 changes: 131 additions & 0 deletions packages/agent/src/server/agent-server.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,137 @@ describe("AgentServer HTTP Mode", () => {
});
});

describe("Slack inbox relay telemetry", () => {
const payload: JwtPayload = {
task_id: "t",
run_id: "r",
team_id: 1,
user_id: 1,
distinct_id: "d",
mode: "interactive",
};

type FakeLogger = {
info: ReturnType<typeof vi.fn>;
warn: ReturnType<typeof vi.fn>;
error: ReturnType<typeof vi.fn>;
debug: ReturnType<typeof vi.fn>;
};

type RelayTestServer = {
relayAgentResponse(p: JwtPayload): Promise<void>;
session: unknown;
questionRelayedToSlack: boolean;
posthogAPI: { relayMessage: ReturnType<typeof vi.fn> };
logger: FakeLogger;
};

const setup = (opts: {
message?: string;
relayMessage?: ReturnType<typeof vi.fn>;
flush?: ReturnType<typeof vi.fn>;
}): RelayTestServer => {
const s = createServer() as unknown as RelayTestServer;
s.logger = {
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
debug: vi.fn(),
};
s.posthogAPI = {
relayMessage: opts.relayMessage ?? vi.fn(async () => {}),
};
s.session = {
logWriter: {
flush: opts.flush ?? vi.fn(async () => {}),
getFullAgentResponse: vi.fn(() => opts.message),
isRegistered: vi.fn(() => true),
},
};
s.questionRelayedToSlack = false;
return s;
};

const outcomesFrom = (mock: ReturnType<typeof vi.fn>) =>
mock.mock.calls.map((c) => (c[1] as { outcome: string }).outcome);

const throwingFn = (msg: string) =>
vi.fn(async () => {
throw new Error(msg);
});

it.each([
{
name: "records attempt then delivered on a successful relay",
message: "hello" as string | undefined,
relayThrows: false,
relayed: true,
infoOutcomes: ["attempt", "delivered"],
warn: null as { outcome: string; reason: string } | null,
},
{
name: "records a warn-level dropped event when the relay request throws",
message: "hello" as string | undefined,
relayThrows: true,
relayed: true,
infoOutcomes: ["attempt"],
warn: { outcome: "dropped", reason: "relay_request_failed" },
},
{
name: "records a dropped event when there is no agent message to relay",
message: undefined as string | undefined,
relayThrows: false,
relayed: false,
infoOutcomes: ["attempt"],
warn: { outcome: "dropped", reason: "no_agent_message" },
},
])(
"$name",
async ({ message, relayThrows, relayed, infoOutcomes, warn }) => {
const relayMessage = relayThrows
? throwingFn("boom")
: vi.fn(async () => {});
const s = setup({ message, relayMessage });
await s.relayAgentResponse(payload);

expect(outcomesFrom(s.logger.info)).toEqual(infoOutcomes);
expect(s.posthogAPI.relayMessage).toHaveBeenCalledTimes(
relayed ? 1 : 0,
);

if (warn) {
expect(s.logger.warn).toHaveBeenCalledTimes(1);
const event = s.logger.warn.mock.calls[0][1] as {
outcome: string;
reason: string;
};
expect(event.outcome).toBe(warn.outcome);
expect(event.reason).toBe(warn.reason);
} else {
expect(s.logger.warn).not.toHaveBeenCalled();
}
},
);

// Separate case: a flush failure that cascades into an empty message must
// keep its root cause inside the telemetry shape (two warns, drop tagged
// flushFailed), so it does not read as a bare `no_agent_message` drop.
it("tags the drop with flushFailed when a flush failure leaves no message", async () => {
const s = setup({ message: undefined, flush: throwingFn("flush boom") });
await s.relayAgentResponse(payload);

expect(s.posthogAPI.relayMessage).not.toHaveBeenCalled();
const warnOutcomes = outcomesFrom(s.logger.warn);
expect(warnOutcomes).toEqual(["flush_failed", "dropped"]);
const dropEvent = s.logger.warn.mock.calls[1][1] as {
reason: string;
flushFailed: boolean;
};
expect(dropEvent.reason).toBe("no_agent_message");
expect(dropEvent.flushFailed).toBe(true);
});
});

describe("buildCloudSystemPrompt", () => {
it("returns review-first prompt for existing PRs on non-Slack runs", () => {
const s = createServer();
Expand Down
58 changes: 47 additions & 11 deletions packages/agent/src/server/agent-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
import { type ServerType, serve } from "@hono/node-server";
import { execGh } from "@posthog/git/gh";
import { getCurrentBranch } from "@posthog/git/queries";
import { serializeError } from "@posthog/shared";
import { Hono } from "hono";
import { z } from "zod";
import packageJson from "../../package.json" with { type: "json" };
Expand Down Expand Up @@ -2279,21 +2280,31 @@ ${signedCommitInstructions}
return;
}

// Inbox/Slack notifications are posted server-side off this relay, so a
// silent drop here reads to the user as an empty inbox rather than a broken
// pipe. Emit attempt/outcome telemetry at a visible severity (not debug) so
// drops surface in agent logs instead of only as user reports.
this.recordRelayTelemetry("attempt", payload);

let flushFailed = false;
try {
await this.session.logWriter.flush(payload.run_id, { coalesce: true });
} catch (error) {
this.logger.debug("Failed to flush logs before Slack relay", {
taskId: payload.task_id,
runId: payload.run_id,
error,
// Keep the flush failure inside the queryable telemetry shape: a failed
// flush can leave no coalesced message to relay, and without this the
// resulting drop would read as a bare `no_agent_message` with no upstream
// cause.
flushFailed = true;
this.recordRelayTelemetry("flush_failed", payload, {
error: serializeError(error),
});
}

const message = this.session.logWriter.getFullAgentResponse(payload.run_id);
if (!message) {
this.logger.debug("No agent message found for Slack relay", {
taskId: payload.task_id,
runId: payload.run_id,
this.recordRelayTelemetry("dropped", payload, {
reason: "no_agent_message",
flushFailed,
sessionRegistered: this.session.logWriter.isRegistered(payload.run_id),
});
return;
Comment on lines 2290 to 2310

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Flush failure loses root cause in the slack_inbox_relay telemetry shape

When flush throws, the raw logger.warn("Failed to flush logs before Slack relay", …) sits outside the slack_inbox_relay structured shape. If the flush failure then causes getFullAgentResponse to return undefined, the funnel records dropped: no_agent_message — hiding the real upstream cause. An engineer querying telemetry:slack_inbox_relay will see a no_agent_message drop and have no signal that a flush failure preceded it. Passing reason: "flush_failed" (and the serialised error) into recordRelayTelemetry when the flush throws would keep the root cause inside the queryable shape.

Prompt To Fix With AI
This is a comment left during a code review.
Path: packages/agent/src/server/agent-server.ts
Line: 2289-2305

Comment:
**Flush failure loses root cause in the `slack_inbox_relay` telemetry shape**

When `flush` throws, the raw `logger.warn("Failed to flush logs before Slack relay", …)` sits outside the `slack_inbox_relay` structured shape. If the flush failure then causes `getFullAgentResponse` to return `undefined`, the funnel records `dropped: no_agent_message` — hiding the real upstream cause. An engineer querying `telemetry:slack_inbox_relay` will see a `no_agent_message` drop and have no signal that a flush failure preceded it. Passing `reason: "flush_failed"` (and the serialised error) into `recordRelayTelemetry` when the flush throws would keep the root cause inside the queryable shape.

How can I resolve this? If you propose a fix, please make it concise.

Expand All @@ -2305,15 +2316,40 @@ ${signedCommitInstructions}
payload.run_id,
message,
);
this.recordRelayTelemetry("delivered", payload);
} catch (error) {
this.logger.debug("Failed to relay initial agent response to Slack", {
taskId: payload.task_id,
runId: payload.run_id,
error,
this.recordRelayTelemetry("dropped", payload, {
reason: "relay_request_failed",
error: serializeError(error),
});
}
}

// A single, stably-named telemetry shape for the Slack/inbox relay path so the
// full attempt -> delivered/dropped funnel is queryable in agent logs. Drops
// and flush failures are warn-level (always emitted, even with debug off)
// because a swallowed failure here is invisible to the user — the exact
// silent-drop class of bug this records. `attempt`/`delivered` are info so the
// funnel stays complete.
private recordRelayTelemetry(
outcome: "attempt" | "delivered" | "dropped" | "flush_failed",
payload: JwtPayload,
extra: Record<string, unknown> = {},
): void {
const event = {
telemetry: "slack_inbox_relay",
outcome,
taskId: payload.task_id,
runId: payload.run_id,
...extra,
};
if (outcome === "dropped" || outcome === "flush_failed") {
this.logger.warn("Slack inbox relay", event);
} else {
this.logger.info("Slack inbox relay", event);
}
}

private relaySlackQuestion(
payload: JwtPayload,
toolMeta: Record<string, unknown> | null | undefined,
Expand Down
Loading