diff --git a/.gitignore b/.gitignore index 8a01cc2c..bee8809e 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ dist/ packages/junior/dist # Auto-generated by dotagents — do not commit these files. .agents/.gitignore +# Generated by eval replay auto mode; existing tracked recordings stay tracked. +packages/junior-evals/.vitest-evals/recordings/**/*.json diff --git a/AGENTS.md b/AGENTS.md index 3a829054..56bed0e0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -113,6 +113,7 @@ Co-Authored-By: (agent model name) - `specs/oauth-flows-spec.md` (OAuth authorization code flow + Slack UX contract) - `specs/agent-prompt-spec.md` (core prompt ownership, execution-bias, and bloat-control contract) - `specs/advisor-tool-spec.md` (draft provider-agnostic advisor tool contract) +- `specs/scheduler-spec.md` (draft scheduled Junior task contract) - `specs/harness-agent-spec.md` (agent loop and output contract) - `specs/agent-session-resumability-spec.md` (multi-slice turn resumability and timeout recovery contract) - `specs/agent-execution-spec.md` (agent execution rubric and completion gates) diff --git a/apps/example/README.md b/apps/example/README.md index 93d234e2..89a97889 100644 --- a/apps/example/README.md +++ b/apps/example/README.md @@ -27,6 +27,8 @@ Copy `.env.example` and set: - `AI_FAST_MODEL` (optional) - `AI_VISION_MODEL` (optional, enables image-understanding; unset disables vision features) - `AI_WEB_SEARCH_MODEL` (optional, overrides the `webSearch` tool model; defaults to a search-tuned model) +- `JUNIOR_SECRET` (required outside `pnpm dev`; the local wrapper supplies a dev-only secret when unset) +- `JUNIOR_SCHEDULER_SECRET` or `CRON_SECRET` (optional for `pnpm dev`; the local wrapper supplies a dev-only secret when both are unset) - `NOTION_TOKEN` (optional, enables the bundled Notion plugin) ## Wiring @@ -34,3 +36,4 @@ Copy `.env.example` and set: - `plugin-packages.ts` is the single source of truth for installed plugin packages in this app - `nitro.config.ts` passes that list to `juniorNitro()` so plugin content is copied into the build output - `server.ts` passes the same list to `createApp()` so local dev does not depend on Nitro's virtual config path for plugin discovery +- root `pnpm dev` starts a local heartbeat loop that calls `/api/internal/heartbeat` every minute, matching the production cron pulse used by the built-in scheduler plugin; it also defaults `JUNIOR_BASE_URL` to the local server when unset so signed internal callbacks can recover scheduled dispatches diff --git a/package.json b/package.json index 70a888d9..fa83fcca 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "private": true, "packageManager": "pnpm@10.33.0", "scripts": { - "dev": "node scripts/dev-with-root-env.mjs", + "dev": "node scripts/dev-server.mjs", "dev:env": "pnpx vercel env pull .env.local --environment=development && pnpm run cloudflare:token", "cli": "node scripts/cli-with-root-env.mjs", "cloudflare:token": "node scripts/refresh-cloudflare-tunnel-token.mjs", @@ -22,7 +22,7 @@ "test:watch": "pnpm --filter @sentry/junior test:watch", "evals": "pnpm --filter @sentry/junior-evals evals", "evals:record": "pnpm --filter @sentry/junior-evals evals:record", - "typecheck": "pnpm --filter @sentry/junior typecheck && pnpm --filter @sentry/junior-example typecheck", + "typecheck": "pnpm --filter @sentry/junior typecheck && pnpm --filter @sentry/junior-testing typecheck && pnpm --filter @sentry/junior-example typecheck", "skills:check": "pnpm --filter @sentry/junior skills:check" }, "simple-git-hooks": { diff --git a/packages/docs/astro.config.mjs b/packages/docs/astro.config.mjs index 5e28b6db..93a46d0f 100644 --- a/packages/docs/astro.config.mjs +++ b/packages/docs/astro.config.mjs @@ -28,6 +28,7 @@ export default defineConfig({ "/plugins/hex": "/extend/hex-plugin", "/plugins/linear": "/extend/linear-plugin", "/plugins/notion": "/extend/notion-plugin", + "/plugins/scheduler": "/extend/scheduler-plugin", "/plugins/sentry": "/extend/sentry-plugin", "/operate/telemetry-runbooks": "/operate/reliability-runbooks", "/operate/security": "/operate/security-hardening", @@ -102,6 +103,7 @@ export default defineConfig({ { label: "Hex Plugin", link: "/extend/hex-plugin/" }, { label: "Linear Plugin", link: "/extend/linear-plugin/" }, { label: "Notion Plugin", link: "/extend/notion-plugin/" }, + { label: "Scheduler Plugin", link: "/extend/scheduler-plugin/" }, { label: "Sentry Plugin", link: "/extend/sentry-plugin/" }, ], }, diff --git a/packages/docs/src/content/docs/extend/build-a-plugin.md b/packages/docs/src/content/docs/extend/build-a-plugin.md index 9e7dc4b4..ed98f03f 100644 --- a/packages/docs/src/content/docs/extend/build-a-plugin.md +++ b/packages/docs/src/content/docs/extend/build-a-plugin.md @@ -144,6 +144,9 @@ installing sandbox helper files or mutating tool input/env before execution. Trusted hooks are backend code and must be registered explicitly from app code; Junior never loads them from `plugin.yaml`. +Trusted hook contexts include `ctx.plugin` and `ctx.log`. Use `ctx.log` for +plugin-scoped structured logs instead of writing directly to stdout. + Export a factory from the plugin package: ```ts title="index.ts" @@ -157,6 +160,7 @@ export function myProviderPlugin() { }, hooks: { async sandboxPrepare(ctx) { + ctx.log.info("Preparing my-provider sandbox helpers"); await ctx.sandbox.writeFile({ path: `${ctx.sandbox.juniorRoot}/my-provider-ready`, content: "ok\n", @@ -193,6 +197,79 @@ plugin package config is merged with the build-time plugin catalog. Use `ctx.decision.replaceInput(...)` only with object-shaped tool input. Junior rejects non-object replacements before the tool runs. +### Trusted hook surfaces + +Use the smallest hook that matches the deterministic boundary your plugin needs: + +| Hook | Purpose | +| ------------------------ | ------------------------------------------------------------------------------------------------------------------------ | +| `sandboxPrepare(ctx)` | Prepare files or runtime state inside a sandbox before agent tools run. | +| `beforeToolExecute(ctx)` | Deny or rewrite object-shaped tool input and set non-secret env values before a tool runs. | +| `tools(ctx)` | Return host-registered tool definitions for the current turn. Tool names must be camelCase and cannot shadow core tools. | +| `heartbeat(ctx)` | Run bounded periodic work from Junior's internal heartbeat route. | + +`tools(ctx)` receives the active turn context, `ctx.state`, and `ctx.log`. +Return tool definitions keyed by the public tool names your plugin owns: + +```ts title="index.ts" +import { Type } from "@sinclair/typebox"; +import { defineJuniorPlugin } from "@sentry/junior-plugin-api"; + +export function myProviderPlugin() { + return defineJuniorPlugin({ + name: "my-provider", + hooks: { + tools(ctx) { + return { + myProviderPing: { + description: "Check my-provider connectivity.", + inputSchema: Type.Object({}), + execute: async () => { + ctx.log.info("Running my-provider ping"); + return { ok: true }; + }, + }, + }; + }, + }, + }); +} +``` + +`heartbeat(ctx)` is for trusted plugins that need server-side background work. +Use `ctx.state` for plugin-namespaced durable state. Use +`ctx.agent.dispatch(...)` when the heartbeat needs Junior to run an autonomous +agent task, and `ctx.agent.get(...)` to reconcile that dispatch later. + +```ts title="index.ts" +import { defineJuniorPlugin } from "@sentry/junior-plugin-api"; + +export function myProviderPlugin() { + return defineJuniorPlugin({ + name: "my-provider", + hooks: { + async heartbeat(ctx) { + const lastDispatch = await ctx.state.get<{ id: string }>( + "last-dispatch", + ); + if (lastDispatch) { + const dispatch = await ctx.agent.get(lastDispatch.id); + ctx.log.info("Checked background dispatch", { + status: dispatch?.status ?? "missing", + }); + } + + return { dispatchCount: 0 }; + }, + }, + }); +} +``` + +Heartbeat dispatches are durable, signed, bounded, and scoped to the plugin +that created them. Plugins can dispatch only to validated Slack destinations +and receive projection records, not raw runtime state. + ## Validate Run validation before deploy: diff --git a/packages/docs/src/content/docs/extend/index.md b/packages/docs/src/content/docs/extend/index.md index 6ccbafb6..db7fe0aa 100644 --- a/packages/docs/src/content/docs/extend/index.md +++ b/packages/docs/src/content/docs/extend/index.md @@ -13,6 +13,7 @@ related: - /extend/hex-plugin/ - /extend/linear-plugin/ - /extend/notion-plugin/ + - /extend/scheduler-plugin/ - /extend/sentry-plugin/ --- @@ -56,6 +57,9 @@ For reuse across apps or teams, package plugin manifests and any bundled skills pnpm add @sentry/junior @sentry/junior-agent-browser @sentry/junior-datadog @sentry/junior-github @sentry/junior-hex @sentry/junior-linear @sentry/junior-notion @sentry/junior-sentry ``` +Junior also includes the built-in [Scheduler Plugin](/extend/scheduler-plugin/) +for reminders and recurring Slack tasks. It does not require a separate package. + List the plugin packages in `juniorNitro` so they are bundled at build time and available at runtime: ```ts title="nitro.config.ts" diff --git a/packages/docs/src/content/docs/extend/scheduler-plugin.md b/packages/docs/src/content/docs/extend/scheduler-plugin.md new file mode 100644 index 00000000..70ccfaa7 --- /dev/null +++ b/packages/docs/src/content/docs/extend/scheduler-plugin.md @@ -0,0 +1,78 @@ +--- +title: Scheduler Plugin +description: Enable and verify Junior's built-in scheduled task support. +type: tutorial +summary: Configure the built-in scheduler plugin so Slack users can create reminders and recurring tasks. +prerequisites: + - /start-here/quickstart/ + - /start-here/slack-app-setup/ +related: + - /reference/config-and-env/ + - /extend/build-a-plugin/ + - /operate/reliability-runbooks/ +--- + +The scheduler plugin is built into `@sentry/junior`. It registers Slack tools for creating, listing, updating, deleting, and running scheduled tasks, then uses Junior's internal heartbeat to dispatch due work back to the configured Slack conversation. + +## Runtime setup + +No plugin package install is required. `createApp()` registers the trusted scheduler plugin automatically: + +```ts title="server.ts" +import { createApp } from "@sentry/junior"; + +const app = await createApp(); + +export default app; +``` + +The Vercel helper includes the internal heartbeat route: + +```ts title="vercel.config.ts" +import { juniorVercelConfig } from "@sentry/junior/vercel"; + +export default juniorVercelConfig(); +``` + +If you manage routes manually, call the heartbeat route on a one-minute cadence: + +| Route | Purpose | +| ------------------------- | ------------------------------- | +| `/api/internal/heartbeat` | Runs trusted plugin heartbeats. | + +## Configure environment variables + +Set one scheduler route secret: + +| Variable | Required | Purpose | +| ------------------------------------------ | ---------- | --------------------------------------------------------------------------------------------- | +| `CRON_SECRET` or `JUNIOR_SCHEDULER_SECRET` | Production | Bearer token for internal scheduler and heartbeat routes. Use `CRON_SECRET` with Vercel Cron. | +| `JUNIOR_TIMEZONE` | No | Default IANA timezone for schedule authoring. Defaults to `America/Los_Angeles`. | + +Local development can run without a scheduler route secret when you call the dev server directly. Production deployments should set `CRON_SECRET` or `JUNIOR_SCHEDULER_SECRET`. + +## Verify + +Run the workflow in Slack where users will schedule work: + +```text +remind me in 1 minute to stretch +``` + +Then confirm: + +1. Junior acknowledges the scheduled task without asking for confirmation for the simple one-off reminder. +2. `what scheduled tasks do i have` lists the task in the same Slack conversation. +3. The reminder posts back to that conversation after the due time. + +For recurring or non-reminder scheduled work, Junior should show the proposed task details and wait for confirmation before creating the task. + +## Failure modes + +- No due tasks run: confirm `/api/internal/heartbeat` is called every minute and the route secret matches the configured bearer token. +- Tasks list but never complete: check scheduler and dispatch logs for missing Slack destination fields or stale dispatch recovery errors. +- Unexpected timezone: set `JUNIOR_TIMEZONE` to the deployment default, or include the timezone in the user's schedule request. + +## Next step + +Read [Build a Plugin](/extend/build-a-plugin/) for the trusted `tools(ctx)` and `heartbeat(ctx)` APIs that the built-in scheduler uses. diff --git a/packages/docs/src/content/docs/reference/api/functions/createApp.md b/packages/docs/src/content/docs/reference/api/functions/createApp.md index d2a3b824..83d9de61 100644 --- a/packages/docs/src/content/docs/reference/api/functions/createApp.md +++ b/packages/docs/src/content/docs/reference/api/functions/createApp.md @@ -7,7 +7,7 @@ title: "createApp" > **createApp**(`options?`): `Promise`\<`Hono`\<`BlankEnv`, `BlankSchema`, `"/"`\>\> -Defined in: [app.ts:175](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L175) +Defined in: [app.ts:180](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L180) Create a Hono app with all Junior routes. diff --git a/packages/docs/src/content/docs/reference/api/interfaces/JuniorAppOptions.md b/packages/docs/src/content/docs/reference/api/interfaces/JuniorAppOptions.md index 0520184e..e80efdc1 100644 --- a/packages/docs/src/content/docs/reference/api/interfaces/JuniorAppOptions.md +++ b/packages/docs/src/content/docs/reference/api/interfaces/JuniorAppOptions.md @@ -5,7 +5,7 @@ prev: false title: "JuniorAppOptions" --- -Defined in: [app.ts:30](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L30) +Defined in: [app.ts:33](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L33) ## Properties @@ -13,7 +13,7 @@ Defined in: [app.ts:30](https://github.com/getsentry/junior/blob/main/packages/j > `optional` **configDefaults?**: `Record`\<`string`, `unknown`\> -Defined in: [app.ts:32](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L32) +Defined in: [app.ts:35](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L35) Install-wide provider defaults (`provider.key` format). Channel overrides take precedence. @@ -23,7 +23,7 @@ Install-wide provider defaults (`provider.key` format). Channel overrides take p > `optional` **plugins?**: `PluginConfig` \| `JuniorPlugin`[] -Defined in: [app.ts:40](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L40) +Defined in: [app.ts:43](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L43) Plugin packages/overrides, or trusted plugin instances loaded by this app. @@ -37,4 +37,4 @@ their package config is merged with the catalog bundled by `juniorNitro()`. > `optional` **waitUntil?**: `WaitUntilFn` -Defined in: [app.ts:41](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L41) +Defined in: [app.ts:44](https://github.com/getsentry/junior/blob/main/packages/junior/src/app.ts#L44) diff --git a/packages/docs/src/content/docs/reference/config-and-env.md b/packages/docs/src/content/docs/reference/config-and-env.md index f8fea1eb..e3e03ccb 100644 --- a/packages/docs/src/content/docs/reference/config-and-env.md +++ b/packages/docs/src/content/docs/reference/config-and-env.md @@ -12,19 +12,22 @@ related: ## Core runtime -| Variable | Required | Purpose | -| ------------------------------------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -| `SLACK_SIGNING_SECRET` | Yes | Verifies Slack request signatures. | -| `SLACK_BOT_TOKEN` or `SLACK_BOT_USER_TOKEN` | Yes | Posts thread replies and calls Slack APIs. | -| `REDIS_URL` | Yes | Queue and runtime state storage. | -| `JUNIOR_SECRET` | Yes | Signs internal timeout-resume callbacks and sandbox egress requester context. | -| `JUNIOR_BOT_NAME` | No | Bot display/config naming. | -| `AI_MODEL` | No | Primary model selection override for main assistant turns. Defaults to `openai/gpt-5.4`; Junior chooses the reasoning effort per turn automatically. | -| `AI_FAST_MODEL` | No | Faster model for lightweight tasks and routing/classification passes before the main turn begins. Defaults to `openai/gpt-5.4-mini`. | -| `AI_VISION_MODEL` | No | Dedicated image-understanding model; unset disables vision features. | -| `AI_WEB_SEARCH_MODEL` | No | Override for the `webSearch` tool model. Defaults to a search-tuned model; does not fall through to `AI_MODEL`. | -| `JUNIOR_BASE_URL` | No | Canonical base URL for callback/auth URL generation. | -| `AI_GATEWAY_API_KEY` | No | AI gateway auth if used in your setup. | +| Variable | Required | Purpose | +| ------------------------------------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| `SLACK_SIGNING_SECRET` | Yes | Verifies Slack request signatures. | +| `SLACK_BOT_TOKEN` or `SLACK_BOT_USER_TOKEN` | Yes | Posts thread replies and calls Slack APIs. | +| `REDIS_URL` | Yes | Queue and runtime state storage. | +| `JUNIOR_SECRET` | Yes | Signs internal timeout-resume and agent-dispatch callbacks, plus sandbox egress requester context. | +| `JUNIOR_BOT_NAME` | No | Bot display/config naming. | +| `AI_MODEL` | No | Primary model selection override for main assistant turns. Defaults to `openai/gpt-5.4`; Junior chooses the reasoning effort per turn automatically. | +| `AI_FAST_MODEL` | No | Faster model for lightweight tasks and routing/classification passes before the main turn begins. Defaults to `openai/gpt-5.4-mini`. | +| `AI_VISION_MODEL` | No | Dedicated image-understanding model; unset disables vision features. | +| `AI_WEB_SEARCH_MODEL` | No | Override for the `webSearch` tool model. Defaults to a search-tuned model; does not fall through to `AI_MODEL`. | +| `JUNIOR_BASE_URL` | No | Canonical base URL for callback/auth URL generation. | +| `JUNIOR_STATE_KEY_PREFIX` | No | Optional namespace prepended to all state-adapter keys, locks, and queues. Use separate prefixes when sharing one Redis database across environments. | +| `CRON_SECRET` or `JUNIOR_SCHEDULER_SECRET` | Conditional | Bearer token for internal scheduler and heartbeat routes; use `CRON_SECRET` with Vercel Cron, or `JUNIOR_SCHEDULER_SECRET` for an external scheduler. | +| `JUNIOR_TIMEZONE` | No | Default IANA timezone for scheduler authoring and other timezone-sensitive behavior. Defaults to `America/Los_Angeles`. | +| `AI_GATEWAY_API_KEY` | No | AI gateway auth if used in your setup. | Generate `JUNIOR_SECRET` with Node, then store the generated value in every environment that runs the same app: diff --git a/packages/docs/src/content/docs/start-here/deploy-to-vercel.md b/packages/docs/src/content/docs/start-here/deploy-to-vercel.md index 8556fb8d..cba88bc6 100644 --- a/packages/docs/src/content/docs/start-here/deploy-to-vercel.md +++ b/packages/docs/src/content/docs/start-here/deploy-to-vercel.md @@ -52,6 +52,7 @@ Set the core runtime variables in Vercel: | `REDIS_URL` | Yes | Queue and runtime state storage. | | `JUNIOR_SECRET` | Yes | Signs internal callbacks and sandbox requester context. | | `JUNIOR_BASE_URL` | Conditional | Canonical URL for OAuth and callback URLs when Vercel URL envs are not enough. | +| `JUNIOR_STATE_KEY_PREFIX` | No | Redis key namespace for this deployment when sharing one Redis database. | | `AI_GATEWAY_API_KEY` | Optional | AI Gateway auth when your setup requires it. | Use one stable `JUNIOR_SECRET` per deployment: diff --git a/packages/docs/src/content/docs/start-here/quickstart.md b/packages/docs/src/content/docs/start-here/quickstart.md index be00aaf2..1401e443 100644 --- a/packages/docs/src/content/docs/start-here/quickstart.md +++ b/packages/docs/src/content/docs/start-here/quickstart.md @@ -56,17 +56,18 @@ node -e "console.log(require('node:crypto').randomBytes(32).toString('base64url' Set these values before running real turns: -| Variable | Required | Purpose | -| ---------------------- | ---------------------- | -------------------------------------------------------------- | -| `SLACK_SIGNING_SECRET` | Yes, for Slack traffic | Verifies Slack requests. | -| `SLACK_BOT_TOKEN` | Yes, for Slack replies | Posts thread replies and calls Slack APIs. | -| `REDIS_URL` | Yes | Queue and runtime state storage. | -| `JUNIOR_SECRET` | Yes | Signs internal resume callbacks and sandbox requester context. | -| `JUNIOR_BOT_NAME` | No | Bot display/config name. | -| `AI_MODEL` | No | Primary assistant model override. | -| `AI_FAST_MODEL` | No | Lightweight routing/classification model override. | -| `AI_VISION_MODEL` | No | Enables image understanding when set. | -| `AI_WEB_SEARCH_MODEL` | No | Search model override. | +| Variable | Required | Purpose | +| ------------------------- | ---------------------- | -------------------------------------------------------------- | +| `SLACK_SIGNING_SECRET` | Yes, for Slack traffic | Verifies Slack requests. | +| `SLACK_BOT_TOKEN` | Yes, for Slack replies | Posts thread replies and calls Slack APIs. | +| `REDIS_URL` | Yes | Queue and runtime state storage. | +| `JUNIOR_SECRET` | Yes | Signs internal resume callbacks and sandbox requester context. | +| `JUNIOR_BOT_NAME` | No | Bot display/config name. | +| `AI_MODEL` | No | Primary assistant model override. | +| `AI_FAST_MODEL` | No | Lightweight routing/classification model override. | +| `AI_VISION_MODEL` | No | Enables image understanding when set. | +| `AI_WEB_SEARCH_MODEL` | No | Search model override. | +| `JUNIOR_STATE_KEY_PREFIX` | No | Redis key namespace for this local app/environment. | See [Config & Environment](/reference/config-and-env/) for the full reference. diff --git a/packages/junior-evals/README.md b/packages/junior-evals/README.md index e31cb5f0..97c9e8f3 100644 --- a/packages/junior-evals/README.md +++ b/packages/junior-evals/README.md @@ -66,8 +66,8 @@ Harness override knobs (in `EvalOverrides`): - `auto_complete_mcp_oauth`: after our app genuinely starts an MCP OAuth flow for the listed providers, the harness immediately completes the fake provider callback. - `auto_complete_oauth`: after our app genuinely starts a generic OAuth flow for the listed providers, the harness immediately completes the fake provider callback. +- `credential_providers`: seed normal provider credentials for the listed providers. GitHub uses dummy GitHub App env vars plus an intercepted installation-token exchange; Sentry uses the normal OAuth token store. - `fail_reply_call`: force a non-retryable reply failure on a specific call. -- `faults.sandbox_bash_stream_interrupts`: inject a fixed number of eval-only sandbox bash stream interruptions so the real agent must recover from failed command results. - `mock_image_generation`: stub the image-generation HTTP response with a valid image payload while still exercising the real attachment path. - `plugin_dirs`: load plugin fixtures from eval-local directories without adding workspace packages. - `reply_texts`: override returned reply text per call. diff --git a/packages/junior-evals/evals/behavior-harness.ts b/packages/junior-evals/evals/behavior-harness.ts index 95433ec3..fc1c5b49 100644 --- a/packages/junior-evals/evals/behavior-harness.ts +++ b/packages/junior-evals/evals/behavior-harness.ts @@ -1,6 +1,13 @@ import path from "node:path"; +import { spawn, type ChildProcess } from "node:child_process"; +import { generateKeyPairSync } from "node:crypto"; +import { createServer, type Server } from "node:http"; import { fileURLToPath } from "node:url"; import type { Message } from "chat"; +import { + interceptTestHttp, + resetTestGitHubHttpFixtures, +} from "@sentry/junior-testing/http"; import { executeWithReplay } from "vitest-evals/replay"; import type { JsonValue } from "vitest-evals/harness"; import { @@ -23,8 +30,10 @@ import { deleteMcpStoredOAuthCredentials, getLatestMcpAuthSessionForUserProvider, } from "@/chat/mcp/auth-store"; +import { getAgentPlugins, setAgentPlugins } from "@/chat/plugins/agent-hooks"; import { getPluginOAuthConfig, setPluginConfig } from "@/chat/plugins/registry"; import { generateAssistantReply } from "@/chat/respond"; +import { createSchedulerPlugin } from "@/chat/scheduler/plugin"; import { getStateAdapter } from "@/chat/state/adapter"; import { resetSkillDiscoveryCache } from "@/chat/skills"; import { createWebFetchTool } from "@/chat/tools/web/fetch-tool"; @@ -53,6 +62,7 @@ import { readCapturedSlackApiCalls, type CapturedSlackApiCall, } from "@junior-tests/msw/captured-slack-api-calls"; +import { ALL as sandboxEgressProxyALL } from "@/handlers/sandbox-egress-proxy"; import { createMockImageGenerateDeps } from "./fixtures/image-generate"; // --------------------------------------------------------------------------- @@ -131,11 +141,8 @@ interface EvalReplyResultFixture { export interface EvalOverrides { auto_complete_mcp_oauth?: string[]; auto_complete_oauth?: string[]; - enable_test_credentials?: boolean; + credential_providers?: Array<"github" | "sentry">; fail_reply_call?: number; - faults?: { - sandbox_bash_stream_interrupts?: number; - }; mock_image_generation?: boolean; plugin_dirs?: string[]; plugin_packages?: string[]; @@ -144,7 +151,6 @@ export interface EvalOverrides { reply_texts?: string[]; skill_dirs?: string[]; subscribed_decisions?: SubscribedDecisionFixture[]; - test_credential_token?: string; unset_gateway_api_key?: boolean; } @@ -195,6 +201,7 @@ export interface EvalCanvasArtifact { } export interface EvalToolInvocation { + arguments?: Record; tool: string; bash_command?: string; mcp_arguments?: Record; @@ -325,6 +332,26 @@ function toEvalToolInvocation(input: { }): EvalToolInvocation { const invocation: EvalToolInvocation = { tool: input.toolName }; + if (input.toolName.startsWith("slackSchedule")) { + invocation.arguments = Object.fromEntries( + [ + "title", + "task_id", + "objective", + "confirmed_by_user", + "schedule_description", + "timezone", + "next_run_at_iso", + "recurrence_frequency", + "recurrence_interval", + "recurrence_weekdays", + "status", + ] + .filter((key) => key in input.params) + .map((key) => [key, input.params[key]]), + ); + } + if (input.toolName === "bash" && typeof input.params.command === "string") { invocation.bash_command = input.params.command.trim(); } @@ -399,14 +426,23 @@ function buildRuntimeThreadId(fixture: EvalEventThreadFixture): string { // --------------------------------------------------------------------------- const HARNESS_ENV_KEYS = [ - "EVAL_ENABLE_TEST_CREDENTIALS", - "EVAL_TEST_CREDENTIAL_TOKEN", + "GITHUB_APP_BOT_EMAIL", + "GITHUB_APP_BOT_NAME", + "GITHUB_APP_ID", + "GITHUB_APP_PRIVATE_KEY", + "GITHUB_INSTALLATION_ID", "JUNIOR_BASE_URL", - "JUNIOR_EVAL_ENABLE_FAULTS", - "JUNIOR_EVAL_FAULT_SANDBOX_BASH_STREAM_INTERRUPTS", + "JUNIOR_SECRET", "JUNIOR_STATE_ADAPTER", "SLACK_BOT_TOKEN", ] as const; +const DEFAULT_EVAL_BASE_URL = "https://junior.example.com"; +const SENTRY_EVAL_SCOPE = "event:read org:read project:read team:read"; +const DUMMY_GITHUB_APP_PRIVATE_KEY = generateKeyPairSync("rsa", { + modulusLength: 2048, +}) + .privateKey.export({ format: "pem", type: "pkcs8" }) + .toString(); interface EnvSnapshot { restore(): void; @@ -430,6 +466,219 @@ function snapshotEnv(keys: readonly string[]): EnvSnapshot { }; } +function isSandboxReachableBaseUrl(value: string): boolean { + try { + const url = new URL(value); + const hostname = url.hostname.toLowerCase(); + return ( + url.protocol === "https:" && + hostname !== "localhost" && + hostname !== "127.0.0.1" && + hostname !== "::1" && + !hostname.endsWith(".example.com") && + !hostname.endsWith(".example.test") && + hostname !== "example.com" + ); + } catch { + return false; + } +} + +function scenarioNeedsEvalEgress(scenario: EvalScenario): boolean { + return Boolean( + scenario.overrides?.credential_providers?.length || + scenario.overrides?.auto_complete_oauth?.length, + ); +} + +function configureHarnessBaseUrl(scenario: EvalScenario): void { + const baseUrl = process.env.JUNIOR_BASE_URL?.trim(); + if (scenarioNeedsEvalEgress(scenario)) { + if (!baseUrl || !isSandboxReachableBaseUrl(baseUrl)) { + throw new Error( + "Eval sandbox HTTP interception requires JUNIOR_BASE_URL to point at a public HTTPS Junior app URL reachable from Vercel Sandbox so sandbox egress can reach the test egress proxy.", + ); + } + return; + } + + if (!baseUrl) { + process.env.JUNIOR_BASE_URL = DEFAULT_EVAL_BASE_URL; + } +} + +function requestHeadersFromNode( + headers: Record, +): Headers { + const result = new Headers(); + for (const [key, value] of Object.entries(headers)) { + if (value === undefined) continue; + if (Array.isArray(value)) { + for (const item of value) result.append(key, item); + } else { + result.set(key, value); + } + } + return result; +} + +function listen(server: Server): Promise { + return new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(0, "127.0.0.1", () => { + server.off("error", reject); + const address = server.address(); + if (!address || typeof address === "string") { + reject(new Error("Eval egress server did not bind to a TCP port")); + return; + } + resolve(address.port); + }); + }); +} + +function closeServer(server: Server): Promise { + return new Promise((resolve, reject) => { + server.close((error) => { + if (error) reject(error); + else resolve(); + }); + }); +} + +async function writeResponse( + target: import("node:http").ServerResponse, + response: Response, +): Promise { + target.statusCode = response.status; + target.statusMessage = response.statusText; + response.headers.forEach((value, key) => { + target.setHeader(key, value); + }); + + if (!response.body) { + target.end(); + return; + } + + const reader = response.body.getReader(); + try { + for (;;) { + const next = await reader.read(); + if (next.done) break; + target.write(next.value); + } + target.end(); + } finally { + reader.releaseLock(); + } +} + +async function waitForPublicEgressUrl(baseUrl: string): Promise { + const deadline = Date.now() + 20_000; + let lastError: unknown; + while (Date.now() < deadline) { + try { + const response = await fetch(new URL("/health", baseUrl)); + if (response.ok) return; + lastError = new Error(`HTTP ${response.status}`); + } catch (error) { + lastError = error; + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } + throw new Error( + `Eval egress server was not reachable at ${baseUrl}: ${ + lastError instanceof Error ? lastError.message : String(lastError) + }`, + ); +} + +async function startEvalEgressServer(): Promise { + const baseUrl = process.env.JUNIOR_BASE_URL?.trim() ?? ""; + const token = process.env.CLOUDFLARE_TUNNEL_TOKEN?.trim(); + if (!token) { + throw new Error( + "Eval sandbox HTTP interception requires CLOUDFLARE_TUNNEL_TOKEN so Vercel Sandbox can reach the eval egress proxy.", + ); + } + + const server = createServer((incoming, outgoing) => { + void (async () => { + try { + if (incoming.url === "/health") { + outgoing.setHeader("content-type", "application/json"); + outgoing.end(JSON.stringify({ ok: true })); + return; + } + + const request = new Request( + new URL(incoming.url ?? "/", `http://${incoming.headers.host}`).href, + { + method: incoming.method, + headers: requestHeadersFromNode(incoming.headers), + ...(incoming.method === "GET" || incoming.method === "HEAD" + ? {} + : { + body: incoming as unknown as BodyInit, + duplex: "half", + }), + } as RequestInit, + ); + await writeResponse( + outgoing, + await sandboxEgressProxyALL(request, { + interceptHttp: interceptTestHttp, + }), + ); + } catch (error) { + console.error( + "Eval egress server request failed", + error instanceof Error ? error.message : String(error), + ); + outgoing.statusCode = 500; + outgoing.setHeader("content-type", "text/plain; charset=utf-8"); + outgoing.end("Eval egress server error\n"); + } + })(); + }); + + const port = await listen(server); + let tunnel: ChildProcess | undefined; + tunnel = spawn( + "cloudflared", + [ + "tunnel", + "--no-autoupdate", + "--loglevel", + "warn", + "--transport-loglevel", + "error", + "run", + "--token", + token, + "--url", + `http://127.0.0.1:${port}`, + ], + { stdio: "ignore" }, + ); + + try { + await waitForPublicEgressUrl(baseUrl); + } catch (error) { + tunnel.kill("SIGTERM"); + await closeServer(server); + throw error; + } + + return { + async close() { + tunnel?.kill("SIGTERM"); + await closeServer(server); + }, + }; +} + // --------------------------------------------------------------------------- // Thread / message helpers // --------------------------------------------------------------------------- @@ -708,6 +957,7 @@ function toIncomingMessage(event: MentionEvent | SubscribedMessageEvent) { runId: event.thread.run_id, raw: { channel: event.thread.channel_id, + team_id: "TEVAL", ts: messageTs, thread_ts: event.thread.thread_ts, }, @@ -777,6 +1027,37 @@ async function cleanupOAuthTokens( } } +function configureCredentialProviderEnv( + providers: Set<"github" | "sentry">, +): void { + if (providers.has("github")) { + process.env.GITHUB_APP_ID = "12345"; + process.env.GITHUB_INSTALLATION_ID = "67890"; + process.env.GITHUB_APP_PRIVATE_KEY = DUMMY_GITHUB_APP_PRIVATE_KEY; + process.env.GITHUB_APP_BOT_NAME = "junior-eval"; + process.env.GITHUB_APP_BOT_EMAIL = "junior-eval@example.com"; + } +} + +async function seedCredentialProviderTokens(input: { + providers: Set<"github" | "sentry">; + userIds: Iterable; +}): Promise { + if (!input.providers.has("sentry")) { + return; + } + + const userTokenStore = createUserTokenStore(); + for (const userId of input.userIds) { + await userTokenStore.set(userId, "sentry", { + accessToken: "eval-sentry-access-token", + refreshToken: "eval-sentry-refresh-token", + expiresAt: Date.now() + 60 * 60 * 1000, + scope: SENTRY_EVAL_SCOPE, + }); + } +} + function getDefaultAuthCode( type: "mcp-oauth" | "oauth", provider: string, @@ -908,17 +1189,24 @@ interface HarnessEnvironment { authRequesterUsers: Set; autoCompleteMcpOauthProviders: Set; autoCompleteOauthProviders: Set; + credentialProviders: Set<"github" | "sentry">; configuredPluginDirs: string[]; configuredSkillDirs: string[]; envSnapshot: EnvSnapshot; + egressServer?: EvalEgressServer; pluginApp?: PluginAppFixture; stateAdapter: HarnessStateAdapter; } +interface EvalEgressServer { + close(): Promise; +} + async function setupHarnessEnvironment( scenario: EvalScenario, ): Promise { const envSnapshot = snapshotEnv(HARNESS_ENV_KEYS); + let egressServer: EvalEgressServer | undefined; let pluginApp: PluginAppFixture | undefined; try { @@ -932,6 +1220,9 @@ async function setupHarnessEnvironment( const autoCompleteOauthProviders = new Set( scenario.overrides?.auto_complete_oauth?.map((p) => p.trim()) ?? [], ); + const credentialProviders = new Set( + scenario.overrides?.credential_providers ?? [], + ); const authRequesterUsers = new Set( scenario.events.flatMap((event) => "message" in event @@ -945,26 +1236,9 @@ async function setupHarnessEnvironment( authRequesterUsers.add("U-test"); } - if (scenario.overrides?.enable_test_credentials) { - process.env.EVAL_ENABLE_TEST_CREDENTIALS = "1"; - if (scenario.overrides.test_credential_token) { - process.env.EVAL_TEST_CREDENTIAL_TOKEN = - scenario.overrides.test_credential_token; - } - } - const sandboxBashStreamInterrupts = - scenario.overrides?.faults?.sandbox_bash_stream_interrupts; - if ( - typeof sandboxBashStreamInterrupts === "number" && - Number.isFinite(sandboxBashStreamInterrupts) && - sandboxBashStreamInterrupts > 0 - ) { - process.env.JUNIOR_EVAL_ENABLE_FAULTS = "1"; - process.env.JUNIOR_EVAL_FAULT_SANDBOX_BASH_STREAM_INTERRUPTS = String( - Math.floor(sandboxBashStreamInterrupts), - ); - } - process.env.JUNIOR_BASE_URL = "https://junior.example.com"; + configureCredentialProviderEnv(credentialProviders); + configureHarnessBaseUrl(scenario); + process.env.JUNIOR_SECRET = "junior-test-secret"; process.env.JUNIOR_STATE_ADAPTER = "memory"; pluginApp = configuredPluginDirs.length > 0 @@ -978,21 +1252,32 @@ async function setupHarnessEnvironment( const stateAdapter = getStateAdapter(); await stateAdapter.connect(); + egressServer = scenarioNeedsEvalEgress(scenario) + ? await startEvalEgressServer() + : undefined; resetSkillDiscoveryCache(); + resetTestGitHubHttpFixtures(); await cleanupHarnessThreadState(stateAdapter, scenario.events); await cleanupMcpAuthState( authRequesterUsers, autoCompleteMcpOauthProviders, ); await cleanupOAuthTokens(authRequesterUsers, autoCompleteOauthProviders); + await cleanupOAuthTokens(authRequesterUsers, credentialProviders); + await seedCredentialProviderTokens({ + providers: credentialProviders, + userIds: authRequesterUsers, + }); return { authRequesterUsers, autoCompleteMcpOauthProviders, autoCompleteOauthProviders, + credentialProviders, configuredPluginDirs, configuredSkillDirs, envSnapshot, + ...(egressServer ? { egressServer } : {}), ...(pluginApp ? { pluginApp } : {}), stateAdapter, }; @@ -1000,6 +1285,7 @@ async function setupHarnessEnvironment( resetSkillDiscoveryCache(); setPluginConfig(undefined); envSnapshot.restore(); + await egressServer?.close(); await pluginApp?.cleanup(); throw error; } @@ -1020,6 +1306,8 @@ async function teardownHarnessEnvironment( env.authRequesterUsers, env.autoCompleteOauthProviders, ); + await cleanupOAuthTokens(env.authRequesterUsers, env.credentialProviders); + await env.egressServer?.close(); env.envSnapshot.restore(); await env.pluginApp?.cleanup(); } @@ -1041,7 +1329,11 @@ function buildRuntimeServices( scenario.overrides?.reply_timeout_ms && scenario.overrides.reply_timeout_ms > 0 ? scenario.overrides.reply_timeout_ms - : Number.parseInt(process.env.EVAL_AGENT_REPLY_TIMEOUT_MS ?? "30000", 10); + : Number.parseInt( + process.env.EVAL_AGENT_REPLY_TIMEOUT_MS ?? + (scenarioNeedsEvalEgress(scenario) ? "60000" : "30000"), + 10, + ); let replyCallCount = 0; let decisionIndex = 0; const replyState = { successfulCount: 0 }; @@ -1386,62 +1678,69 @@ export async function runEvalScenario( ): Promise { const logRecords = options.logRecords ?? []; const env = await setupHarnessEnvironment(scenario); + let previousAgentPlugins: ReturnType | undefined; - const slackAdapter = new FakeSlackAdapter(); - const threadRecordsById = new Map(); - const readyQueueDeliveries: QueueDelivery[] = []; - const observations: RuntimeObservations = { - toolInvocations: [], - }; - const channelStateById = new Map< - string, - { value: Record } - >(); - - const getChannelStateRef = ( - channelId: string | undefined, - ): { value: Record } | undefined => { - const normalized = channelId?.trim(); - if (!normalized) return undefined; - const existing = channelStateById.get(normalized); - if (existing) return existing; - const created = { value: {} }; - channelStateById.set(normalized, created); - return created; - }; + try { + const currentAgentPlugins = getAgentPlugins(); + previousAgentPlugins = setAgentPlugins([ + createSchedulerPlugin(), + ...currentAgentPlugins.filter((plugin) => plugin.name !== "scheduler"), + ]); + + const slackAdapter = new FakeSlackAdapter(); + const threadRecordsById = new Map(); + const readyQueueDeliveries: QueueDelivery[] = []; + const observations: RuntimeObservations = { + toolInvocations: [], + }; + const channelStateById = new Map< + string, + { value: Record } + >(); + + const getChannelStateRef = ( + channelId: string | undefined, + ): { value: Record } | undefined => { + const normalized = channelId?.trim(); + if (!normalized) return undefined; + const existing = channelStateById.get(normalized); + if (existing) return existing; + const created = { value: {} }; + channelStateById.set(normalized, created); + return created; + }; - const getThreadRecord = ( - fixture: EvalEventThreadFixture, - ): EvalThreadRecord => { - const runtimeThreadId = buildRuntimeThreadId(fixture); - const existing = threadRecordsById.get(runtimeThreadId); - if (existing) return existing; - const thread = createEvalThread({ - fixture, - channelStateRef: getChannelStateRef(fixture.channel_id), - stateAdapter: env.stateAdapter, - }); - const transcript: Message[] = []; - attachTranscriptAccessors(thread, transcript); - const record = { thread, transcript }; - threadRecordsById.set(runtimeThreadId, record); - return record; - }; + const getThreadRecord = ( + fixture: EvalEventThreadFixture, + ): EvalThreadRecord => { + const runtimeThreadId = buildRuntimeThreadId(fixture); + const existing = threadRecordsById.get(runtimeThreadId); + if (existing) return existing; + const thread = createEvalThread({ + fixture, + channelStateRef: getChannelStateRef(fixture.channel_id), + stateAdapter: env.stateAdapter, + }); + const transcript: Message[] = []; + attachTranscriptAccessors(thread, transcript); + const record = { thread, transcript }; + threadRecordsById.set(runtimeThreadId, record); + return record; + }; - const services = buildRuntimeServices( - scenario, - env, - threadRecordsById, - observations, - ); + const services = buildRuntimeServices( + scenario, + env, + threadRecordsById, + observations, + ); - const slackRuntime = createSlackRuntime({ - getSlackAdapter: () => slackAdapter as any, - services, - }); - const dispatch = createThreadMessageDispatcher({ runtime: slackRuntime }); + const slackRuntime = createSlackRuntime({ + getSlackAdapter: () => slackAdapter as any, + services, + }); + const dispatch = createThreadMessageDispatcher({ runtime: slackRuntime }); - try { await processEvents({ scenario, env, @@ -1450,16 +1749,19 @@ export async function runEvalScenario( getThreadRecord, readyQueueDeliveries, }); + + return collectResults( + threadRecordsById, + slackAdapter, + logRecords, + observations, + ); } finally { + if (previousAgentPlugins) { + setAgentPlugins(previousAgentPlugins); + } await teardownHarnessEnvironment(scenario, env); } - - return collectResults( - threadRecordsById, - slackAdapter, - logRecords, - observations, - ); } // Compile-time guards for Thread and Message fakes are in tests/fixtures/slack-harness.ts. diff --git a/packages/junior-evals/evals/core/coding-file-tools.eval.ts b/packages/junior-evals/evals/core/coding-file-tools.eval.ts index 7c3c9c1a..8590f685 100644 --- a/packages/junior-evals/evals/core/coding-file-tools.eval.ts +++ b/packages/junior-evals/evals/core/coding-file-tools.eval.ts @@ -6,7 +6,7 @@ const codingFixtureOverrides = { }; describeEval("Coding File Tools", slackEvals, (it) => { - it("when making a targeted source edit, use precise edit tooling and report the changed path", async ({ + it("when making a targeted source edit, update the value and report the changed path", async ({ run, }) => { await run({ @@ -18,43 +18,39 @@ describeEval("Coding File Tools", slackEvals, (it) => { ], criteria: rubric({ contract: - "A small source edit in the sandbox fixture uses precise file editing instead of full-file rewrites or shell mutation.", + "A small source edit in the sandbox fixture updates the requested value and reports the changed file.", pass: [ - "The assistant inspects the fixture before editing and uses precise edit tooling for the retry-count change.", - "The assistant does not use full-file rewrites or shell mutation for this targeted edit.", "The final reply identifies the changed config file and says the default retry count is now 3.", ], fail: [ - "Do not claim the file was changed without an observed `editFile` invocation.", - "Do not use `writeFile` for this targeted edit.", "Do not answer with only a plan or promise to edit later.", + "Do not report a file unrelated to the retry-count setting as the changed file.", ], }), }); }); - it("when locating fixture behavior, use structured discovery and leave files unchanged", async ({ + it("when comparing fixture behavior, cite the relevant files and leave them unchanged", async ({ run, }) => { await run({ overrides: codingFixtureOverrides, events: [ mention( - "In the eval coding fixture, find where emergency mode is handled or documented. Summarize the relevant file paths and what each one says. Do not change any files.", + "In the eval coding fixture, compare project/src/alerts.ts and project/docs/operations.md for emergency mode behavior. Summarize what each file says and do not change any files.", ), ], criteria: rubric({ contract: - "A sandbox fixture discovery task uses structured file discovery/read tools and returns grounded file-path evidence without modifying files.", + "A sandbox fixture comparison returns grounded file-path evidence without claiming to modify files.", pass: [ - "The assistant uses structured file discovery/read tools to inspect the fixture.", "The reply cites the alert source file and the operations doc using recognizable fixture-relative paths.", "The reply accurately summarizes that source code handles emergency alerts while the operations doc describes escalation or operator behavior.", - "No fixture files are modified.", + "The reply does not claim that any fixture files were modified.", ], fail: [ - "Do not modify files for this read-only request.", - "Do not answer from memory without observed file discovery or reads.", + "Do not say that files were changed for this read-only request.", + "Do not answer with generic emergency-mode advice instead of fixture file evidence.", "Do not report unrelated files as the only evidence.", ], }), diff --git a/packages/junior-evals/evals/core/lifecycle-and-resilience.eval.ts b/packages/junior-evals/evals/core/lifecycle-and-resilience.eval.ts index f46f9ce3..f30a9b56 100644 --- a/packages/junior-evals/evals/core/lifecycle-and-resilience.eval.ts +++ b/packages/junior-evals/evals/core/lifecycle-and-resilience.eval.ts @@ -68,37 +68,4 @@ describeEval("Lifecycle and Resilience", slackEvals, (it) => { }), }); }); - - it("when a sandbox command stream is interrupted, recover and finish the request", async ({ - run, - }) => { - await run({ - overrides: { - faults: { - sandbox_bash_stream_interrupts: 1, - }, - skill_dirs: ["evals/fixtures/skills"], - }, - events: [ - mention( - "/resilient-working-directory list files in the working directory", - ), - ], - taskTimeout: 120_000, - criteria: rubric({ - contract: - "A transient sandbox command-stream interruption is treated as recoverable tool output, not a terminal assistant failure.", - pass: [ - "observed_tool_invocations includes at least two `bash` calls, showing the agent retried after the injected interruption.", - "assistant_posts contains exactly one final reply.", - "The reply includes `Working directory files:` and a fenced list of files from the successful retry.", - ], - fail: [ - "Do not post a generic assistant failure reply.", - "Do not stop after reporting only the injected stream interruption.", - "Do not mention Sentry event IDs, stack traces, or provider internals.", - ], - }), - }); - }); }); diff --git a/packages/junior-evals/evals/core/oauth-workflows.eval.ts b/packages/junior-evals/evals/core/oauth-workflows.eval.ts index 00f63c02..9f69648a 100644 --- a/packages/junior-evals/evals/core/oauth-workflows.eval.ts +++ b/packages/junior-evals/evals/core/oauth-workflows.eval.ts @@ -130,10 +130,11 @@ describeEval("OAuth Workflows", slackEvals, (it) => { ], allow: [ "A brief 'Processing your request' continuation notice is acceptable if the final follow-up stays focused on the reconnect result.", + "A single initial auth-needed notice is acceptable before the harness auto-completes authorization.", "The auth-link handoff itself may happen off-thread and does not need to appear in the visible thread transcript.", ], fail: [ - "Do not ask the user to click a second auth link for the same turn.", + "Do not ask the user to authorize again after the reconnect has already completed.", "Do not post a generic failure message.", ], }), diff --git a/packages/junior-evals/evals/core/scheduler.eval.ts b/packages/junior-evals/evals/core/scheduler.eval.ts new file mode 100644 index 00000000..6156c9a1 --- /dev/null +++ b/packages/junior-evals/evals/core/scheduler.eval.ts @@ -0,0 +1,50 @@ +import { describeEval } from "vitest-evals"; +import { mention, rubric, slackEvals } from "../helpers"; + +describeEval("Scheduler", slackEvals, (it) => { + it("when asked for a simple one-off reminder, create it without asking for confirmation", async ({ + run, + }) => { + await run({ + events: [mention("@bot remind me in 1 minute to wash my hands")], + criteria: rubric({ + contract: + "A simple one-off reminder request is scheduled immediately for the active Slack context.", + pass: [ + "The reply confirms that a one-off reminder to wash hands was scheduled.", + "The reply does not ask the user to confirm first.", + ], + fail: [ + "Do not ask the user to confirm the reminder before creating it.", + "Do not ask the user to provide a channel ID.", + "Do not describe the reminder as a recurring schedule.", + ], + }), + }); + }); + + it("when asked to schedule recurring work, draft the task for confirmation before creating it", async ({ + run, + }) => { + await run({ + events: [ + mention( + "@bot schedule this every Monday at 9am Pacific: check open GitHub issues about the scheduler and post a short digest here.", + ), + ], + criteria: rubric({ + contract: + "A future or recurring task request is normalized into a scheduled task draft for the active Slack context before it is persisted.", + pass: [ + "The draft task title/objective/instructions describe checking scheduler-related GitHub issues, not creating a schedule.", + "The reply asks the user to confirm the normalized cadence or next run before creating the schedule.", + ], + fail: [ + "Do not persist a scheduled task before user confirmation.", + "Do not ask the user to provide a channel ID.", + "Do not only give instructions for how the user can set up an external cron.", + ], + }), + }); + }); +}); diff --git a/packages/junior-evals/evals/core/skill-infra.eval.ts b/packages/junior-evals/evals/core/skill-infra.eval.ts index 0095e385..6c86f239 100644 --- a/packages/junior-evals/evals/core/skill-infra.eval.ts +++ b/packages/junior-evals/evals/core/skill-infra.eval.ts @@ -85,15 +85,13 @@ describeEval("Skill Infrastructure", slackEvals, (it) => { contract: "A verification request uses the available source-backed skill and returns the checked answer instead of offering to check later.", pass: [ - "observed_tool_invocations includes a `loadSkill` invocation with `skill_name` set to `source-handbook`.", - "observed_tool_invocations includes a `readFile` invocation.", "The assistant posts exactly one final answer.", "The answer says closed tracking issues alone do not prove capability support.", "The answer says implementation evidence, linked PRs, release notes, issue comments, or an equivalent source-backed rationale is needed.", ], fail: [ "Do not offer to check the source handbook next or later.", - "Do not answer purely from memory without observed source/tool use.", + "Do not answer with generic capability advice that omits the source-handbook rule.", "Do not claim that a closed issue is enough to prove the capability exists.", ], }), @@ -116,8 +114,6 @@ describeEval("Skill Infrastructure", slackEvals, (it) => { contract: "An MCP-backed skill can complete a natural lookup by using the provider result instead of surfacing tool validation errors.", pass: [ - "observed_tool_invocations includes `callMcpTool` with `mcp_tool_name` set to `mcp__eval-mcp__handbook-search`.", - "That `callMcpTool` invocation includes `mcp_arguments.query` containing the handbook or US holidays lookup request.", "The visible thread output includes a final answer based on the demo MCP provider result.", "The visible thread output refers to the handbook or US holidays request.", "The visible thread output does not claim the MCP lookup was blocked by missing arguments.", diff --git a/packages/junior-evals/evals/core/skill-invocation-control.eval.ts b/packages/junior-evals/evals/core/skill-invocation-control.eval.ts index 7f471447..a7c9ce75 100644 --- a/packages/junior-evals/evals/core/skill-invocation-control.eval.ts +++ b/packages/junior-evals/evals/core/skill-invocation-control.eval.ts @@ -12,16 +12,15 @@ describeEval("Skill Invocation Control", slackEvals, (it) => { events: [mention("What's the weather like in San Francisco today?")], criteria: rubric({ contract: - "A skill with disable-model-invocation must not be loaded by the model based on topic relevance alone.", + "A command-style weather skill does not answer ordinary weather questions unless the user explicitly asks for that skill.", pass: [ - "The assistant does not invoke the weather-lookup skill and does not return that skill's simulated report (72°F, partly cloudy, 8 mph NW).", + "The assistant does not return the weather-lookup skill's simulated report (72°F, partly cloudy, 8 mph NW).", ], allow: [ "The assistant may answer from other available sources, say it cannot check real weather, or offer general knowledge.", - "The assistant may mention that a weather skill exists but explain it was not invoked.", + "The assistant may mention that a weather skill exists but explain it was not used for this general request.", ], fail: [ - "Do not load the weather-lookup skill automatically.", "Do not return the simulated weather report from the weather-lookup skill.", ], }), @@ -40,7 +39,7 @@ describeEval("Skill Invocation Control", slackEvals, (it) => { ], criteria: rubric({ contract: - "A skill with disable-model-invocation is loaded when the user explicitly references it by name in their message.", + "A command-style weather skill answers when the user explicitly names that skill.", pass: [ "The assistant posts a reply containing a weather report for San Francisco from the weather-lookup skill.", "The reply includes the simulated data: 72°F or 22°C.", @@ -64,13 +63,13 @@ describeEval("Skill Invocation Control", slackEvals, (it) => { ], criteria: rubric({ contract: - "A normal available skill (without disable-model-invocation) is auto-selected when the request matches its description.", + "A source-handbook request receives an answer based on the handbook content.", pass: [ - "The assistant uses the source-handbook skill and posts an answer based on its content.", + "The assistant posts an answer based on the source-handbook content.", ], fail: [ - "Do not answer from memory without loading the source-handbook skill.", - "Do not refuse to load the skill when the topic clearly matches.", + "Do not answer with generic capability advice that omits the handbook's verification rule.", + "Do not refuse the request when the handbook content is available.", ], }), }); diff --git a/packages/junior-evals/evals/fixtures/plugins/eval-oauth/plugin.yaml b/packages/junior-evals/evals/fixtures/plugins/eval-oauth/plugin.yaml index 6b39ddee..30c97f1c 100644 --- a/packages/junior-evals/evals/fixtures/plugins/eval-oauth/plugin.yaml +++ b/packages/junior-evals/evals/fixtures/plugins/eval-oauth/plugin.yaml @@ -7,12 +7,12 @@ capabilities: credentials: type: oauth-bearer domains: - - eval-oauth.example.test + - example.com auth-token-env: EVAL_OAUTH_ACCESS_TOKEN oauth: client-id-env: EVAL_OAUTH_CLIENT_ID client-secret-env: EVAL_OAUTH_CLIENT_SECRET - authorize-endpoint: https://eval-oauth.example.test/oauth/authorize - token-endpoint: https://eval-oauth.example.test/oauth/token + authorize-endpoint: https://example.com/junior-eval-oauth/oauth/authorize + token-endpoint: https://example.com/junior-eval-oauth/oauth/token scope: read diff --git a/packages/junior-evals/evals/fixtures/plugins/eval-oauth/skills/eval-oauth/SKILL.md b/packages/junior-evals/evals/fixtures/plugins/eval-oauth/skills/eval-oauth/SKILL.md index 66335760..4bd42fa0 100644 --- a/packages/junior-evals/evals/fixtures/plugins/eval-oauth/skills/eval-oauth/SKILL.md +++ b/packages/junior-evals/evals/fixtures/plugins/eval-oauth/skills/eval-oauth/SKILL.md @@ -6,19 +6,19 @@ allowed-tools: bash # Eval OAuth Flow -This fixture is bash-backed, not MCP-backed. Do not use or mention MCP tools for `eval-oauth`. +This fixture is HTTP-backed, not MCP-backed. Do not use or mention MCP tools for `eval-oauth`. Run this command before doing anything else: -`eval-oauth whoami` +`curl -fsSL https://example.com/junior-eval-oauth/whoami` Rules: -- Use the `bash` tool for `eval-oauth whoami`. +- Use the `bash` tool for `curl -fsSL https://example.com/junior-eval-oauth/whoami`. - Do not answer the user's question until that command succeeds. - If the first run does not complete, stop there. Do not summarize, apologize, or ask the user to repeat anything. - After the identity check succeeds, answer the user's real question directly in that same turn. -- Never stop after a successful `eval-oauth whoami` call without sending a final user-facing reply. +- Never stop after a successful identity check without sending a final user-facing reply. - If the user asks about earlier thread context, use that context plainly. - Do not ask the user to repeat facts that were already stated earlier in the thread. - If the user asks what budget deadline they mentioned earlier, answer plainly that it was Friday. diff --git a/packages/junior-evals/evals/fixtures/skills/capability-credential-smoke/SKILL.md b/packages/junior-evals/evals/fixtures/skills/capability-credential-smoke/SKILL.md deleted file mode 100644 index 98694a8c..00000000 --- a/packages/junior-evals/evals/fixtures/skills/capability-credential-smoke/SKILL.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: capability-credential-smoke -description: Smoke-test automatic capability credential injection with an eval-only token. Use only in eval scenarios. -allowed-tools: bash ---- - -# Capability Credential Smoke - -## Step 1: Run An Authenticated Command - -Call `bash` with exactly: - -`gh issue view 1 --repo getsentry/junior` - -## Step 2: Return The Result - -- If the command succeeds, return exactly: - -`CREDENTIAL_OK` - -- If the command fails, return a short error that includes the command stderr. diff --git a/packages/junior-evals/evals/fixtures/skills/sentry-credential-smoke/SKILL.md b/packages/junior-evals/evals/fixtures/skills/sentry-credential-smoke/SKILL.md deleted file mode 100644 index bea88ac2..00000000 --- a/packages/junior-evals/evals/fixtures/skills/sentry-credential-smoke/SKILL.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: sentry-credential-smoke -description: Smoke-test Sentry capability credential injection with an eval-only token. Use only in eval scenarios. -allowed-tools: bash ---- - -# Sentry Credential Smoke - -## Step 1: Run An Authenticated Command - -Call `bash` with exactly: - -`sentry issue list getsentry/ --limit 1` - -## Step 2: Return The Result - -- If the command succeeds, return exactly: - -`CREDENTIAL_OK` - -- If the command fails, return a short error that includes the command stderr. diff --git a/packages/junior-evals/evals/github/skill-workflows.eval.ts b/packages/junior-evals/evals/github/skill-workflows.eval.ts index dbd0171f..4b793331 100644 --- a/packages/junior-evals/evals/github/skill-workflows.eval.ts +++ b/packages/junior-evals/evals/github/skill-workflows.eval.ts @@ -2,188 +2,12 @@ import { describeEval } from "vitest-evals"; import { mention, rubric, slackEvals, threadMessage } from "../helpers"; describeEval("GitHub Skill Workflows", slackEvals, (it) => { - it("when the GitHub credential smoke command runs, return one CREDENTIAL_OK reply", async ({ - run, - }) => { - await run({ - overrides: { - skill_dirs: ["evals/fixtures/skills"], - enable_test_credentials: true, - plugin_packages: ["@sentry/junior-github"], - test_credential_token: "eval-smoke-token", - }, - events: [mention("/capability-credential-smoke")], - criteria: rubric({ - contract: - "The GitHub capability credential smoke command succeeds in one reply.", - pass: [ - "The assistant posts exactly one reply containing CREDENTIAL_OK.", - "The configured smoke command is `gh issue view 1 --repo getsentry/junior`; a final `CREDENTIAL_OK` reply is sufficient evidence that it succeeded.", - ], - fail: ["Do not include sandbox setup failure text."], - }), - }); - }); - - it("when creating a GitHub issue, skip duplicate-search narration in the reply", async ({ - run, - }) => { - await run({ - overrides: { - enable_test_credentials: true, - plugin_packages: ["@sentry/junior-github"], - test_credential_token: "eval-github-token", - skill_dirs: ["../junior/skills"], - }, - events: [ - mention( - "Create an issue for adding rate limiting to the API endpoint in getsentry/junior", - ), - ], - criteria: rubric({ - contract: - "The assistant creates the GitHub issue and reports the result without duplicate-search narration clutter.", - pass: [ - "The assistant creates the requested GitHub issue and reports the result without narrating unrelated duplicate-search work.", - ], - fail: [ - "Do not add duplicate-search narration unless the user asked for duplicate checking.", - ], - }), - }); - }); - - const reporterRequesterThread = { - id: "thread-reporter-requester", - channel_id: "C-reporter-requester", - thread_ts: "17000000.reporter-requester", - }; - - it("when one user reports and another files an issue, keep attribution roles separate", async ({ - run, - }) => { - await run({ - overrides: { - enable_test_credentials: true, - plugin_packages: ["@sentry/junior-github"], - subscribed_decisions: [ - { - should_reply: false, - reason: "context-setting message only", - }, - ], - test_credential_token: "eval-github-attribution-token", - skill_dirs: ["../junior/skills"], - }, - events: [ - threadMessage( - "Warden resolved its own review thread on getsentry/junior-eval-ops-reference-never-exists#20366 even though the warning still applies. The warning was about `SCM_RPC_SHARED_SECRET` not being backported to the cookiecutter template, and the PR still shows `REVIEW_REQUIRED`.", - { - thread: reporterRequesterThread, - author: { - user_id: "U_BOJAN", - user_name: "bojan", - full_name: "Bojan Oro", - }, - }, - ), - mention( - "Create a GitHub issue for this in getsentry/junior-eval-warden-never-exists. Include the issue body you filed in your reply so I can verify attribution.", - { - thread: reporterRequesterThread, - author: { - user_id: "U_DCRAMER", - user_name: "dcramer", - full_name: "David Cramer", - }, - }, - ), - ], - criteria: rubric({ - contract: - "GitHub issue creation from a multi-user Slack thread preserves the original reporter separately from the action requester.", - pass: [ - "The reply reports a created GitHub issue in getsentry/junior-eval-warden-never-exists with an issue URL or issue number.", - "The shown issue content keeps Bojan Oro as the reporter and David Cramer as the action requester.", - ], - allow: [ - "Reporter attribution may be phrased as `Reported by Bojan Oro`, `Raised by Bojan Oro`, or equivalent durable issue-body text.", - "The action-requester footer may be phrased as `Action taken on behalf of David Cramer.` or equivalent durable issue-body text.", - ], - fail: [ - "Do not swap the reporter and requester roles.", - "Do not omit reporter or requester attribution when the prompt asks to show the filed issue content.", - ], - }), - }); - }); - - it("when a GitHub task mentions a Sentry product area, do not prompt for Sentry auth first", async ({ - run, - }) => { - await run({ - overrides: { - enable_test_credentials: true, - plugin_packages: ["@sentry/junior-github", "@sentry/junior-sentry"], - test_credential_token: "eval-routing-token", - skill_dirs: ["../junior/skills"], - }, - events: [ - mention( - "Create a GitHub issue in getsentry/junior about why the Metrics Beta wording can send a code-change request down the wrong auth path.", - ), - ], - criteria: rubric({ - contract: - "A repository task that happens to mention a Sentry product area still follows the GitHub path instead of asking for unrelated Sentry auth.", - pass: [ - "The assistant treats the request as GitHub issue work and does not block on unrelated Sentry auth.", - ], - fail: [ - "Do not ask the user to connect Sentry or inspect live Sentry data before doing the GitHub task.", - ], - }), - }); - }); - - it("when asked an implementation question about this repo, answer from repository evidence", async ({ - run, - }) => { - await run({ - overrides: { - enable_test_credentials: true, - plugin_packages: ["@sentry/junior-github"], - test_credential_token: "eval-repo-evidence-token", - skill_dirs: ["../junior/skills"], - }, - events: [ - mention( - "In getsentry/junior, where do we resolve GitHub credential injection from the loaded skill for the current turn? Keep it brief and cite the repo file or symbol you checked.", - ), - ], - criteria: rubric({ - contract: - "An implementation question is answered from repository evidence rather than generic memory or product framing.", - pass: [ - "The reply cites repository evidence such as a file path, symbol, or nearby contract reference.", - "The reply explains briefly that credential injection comes from the loaded plugin-backed skill for the current turn.", - ], - fail: [ - "Do not answer as if this were a product or UI question.", - "Do not answer purely from generic GitHub or OAuth knowledge without repo evidence.", - ], - }), - }); - }); - it("when asked about PR auth sequencing, mention push auth before PR auth", async ({ run, }) => { await run({ overrides: { - enable_test_credentials: true, plugin_packages: ["@sentry/junior-github"], - test_credential_token: "eval-pr-auth-order-token", skill_dirs: ["../junior/skills"], }, events: [ @@ -211,11 +35,6 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => { channel_id: "C-default-repo", thread_ts: "17000000.default-repo", }; - const defaultRepoIssueThread = { - id: "thread-default-repo-issue", - channel_id: "C-default-repo-issue", - thread_ts: "17000000.default-repo-issue", - }; const targetClassificationContextThread = { id: "thread-target-classification-context", channel_id: "C-target-classification-context", @@ -227,53 +46,12 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => { thread_ts: "17000000.target-classification-explicit", }; - it("when creating an issue after repo setup, use the stored repo without inventing tool failures", async ({ - run, - }) => { - await run({ - overrides: { - enable_test_credentials: true, - plugin_packages: ["@sentry/junior-github"], - test_credential_token: "eval-default-repo-create-token", - skill_dirs: ["../junior/skills"], - }, - events: [ - mention("Set the default repo to getsentry/junior for this channel.", { - thread: defaultRepoIssueThread, - }), - threadMessage( - "Create a GitHub issue for a bug where Slack follow-up replies sometimes blame missing tooling instead of showing the failed command.", - { - thread: defaultRepoIssueThread, - is_mention: true, - }, - ), - ], - criteria: rubric({ - contract: - "Stored GitHub repo context carries into a later issue-creation workflow, and tool-failure explanations stay grounded in observed command results.", - pass: [ - "The assistant confirms the default repo setup and later uses getsentry/junior for issue creation without asking again.", - "The assistant creates or reports a GitHub issue in getsentry/junior.", - "Any tool-failure explanation is grounded in an observed command result.", - ], - fail: [ - "Do not claim that `gh`, the GitHub CLI, or `jr-rpc` is unavailable, missing, or not installed.", - "Do not ask the user to pass --repo or provide the repo again.", - "Do not create, target, or report an issue for a repository other than getsentry/junior.", - ], - }), - }); - }); - it("when a default repo is set in one turn, reuse it in the next turn without asking again", async ({ run, }) => { await run({ overrides: { - enable_test_credentials: true, plugin_packages: ["@sentry/junior-github"], - test_credential_token: "eval-default-repo-token", skill_dirs: ["../junior/skills"], }, events: [ @@ -310,9 +88,7 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => { }) => { await run({ overrides: { - enable_test_credentials: true, plugin_packages: ["@sentry/junior-github"], - test_credential_token: "eval-target-classification-context-token", skill_dirs: ["../junior/skills"], }, events: [ @@ -353,9 +129,7 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => { }) => { await run({ overrides: { - enable_test_credentials: true, plugin_packages: ["@sentry/junior-github"], - test_credential_token: "eval-target-classification-explicit-token", skill_dirs: ["../junior/skills"], }, events: [ diff --git a/packages/junior-evals/evals/helpers.ts b/packages/junior-evals/evals/helpers.ts index 717c93fc..53d80201 100644 --- a/packages/junior-evals/evals/helpers.ts +++ b/packages/junior-evals/evals/helpers.ts @@ -1,5 +1,5 @@ import { - namedJudge, + createJudge, type DescribeEvalOptions, type JudgeContext, } from "vitest-evals"; @@ -68,6 +68,9 @@ function toToolCallRecord( invocation: EvalResult["toolInvocations"][number], ): ToolCallRecord { const args: Record = {}; + if (invocation.arguments) { + args.arguments = toJson(invocation.arguments); + } if (invocation.bash_command) { args.command = invocation.bash_command; } @@ -388,10 +391,10 @@ export const slackHarness: Harness = { }; /** Scores Slack eval output against the case rubric. */ -export const RubricJudge = namedJudge( +export const RubricJudge = createJudge( "RubricJudge", async ({ - inputValue, + input, output, harness, }: JudgeContext< @@ -401,7 +404,10 @@ export const RubricJudge = namedJudge( >) => { const object = parseJudgeResult( await harness.prompt( - formatJudgePrompt(output, formatRubric(inputValue.criteria)), + formatJudgePrompt( + serializeEvalOutput(output as Record), + formatRubric(input.criteria), + ), { system: EVAL_SYSTEM, metadata: { diff --git a/packages/junior-evals/evals/sentry/skill-workflows.eval.ts b/packages/junior-evals/evals/sentry/skill-workflows.eval.ts index e78e0bfc..a07aa77d 100644 --- a/packages/junior-evals/evals/sentry/skill-workflows.eval.ts +++ b/packages/junior-evals/evals/sentry/skill-workflows.eval.ts @@ -2,51 +2,25 @@ import { describeEval } from "vitest-evals"; import { mention, rubric, slackEvals } from "../helpers"; describeEval("Sentry Skill Workflows", slackEvals, (it) => { - it("when the Sentry credential smoke command runs, return one CREDENTIAL_OK reply", async ({ + it("when listing Sentry organizations, report accessible organizations", async ({ run, }) => { await run({ overrides: { - skill_dirs: ["evals/fixtures/skills"], - enable_test_credentials: true, + credential_providers: ["sentry"], plugin_packages: ["@sentry/junior-sentry"], - test_credential_token: "eval-sentry-token", - }, - events: [mention("/sentry-credential-smoke")], - criteria: rubric({ - contract: - "The Sentry capability credential smoke command succeeds in one reply.", - pass: [ - "The assistant posts exactly one reply containing CREDENTIAL_OK.", - "The configured smoke command is `sentry issue list getsentry/ --limit 1`; a final `CREDENTIAL_OK` reply is sufficient evidence that it succeeded.", - ], - fail: ["Do not include sandbox setup failure text."], - }), - }); - }); - - it("when listing Sentry organizations, use the current org command surface", async ({ - run, - }) => { - await run({ - overrides: { - enable_test_credentials: true, - plugin_packages: ["@sentry/junior-sentry"], - test_credential_token: "eval-sentry-token", }, events: [mention("List the Sentry organizations I can access.")], criteria: rubric({ contract: - "The assistant verifies or uses the current Sentry CLI organization command and reports accessible organizations instead of blocking on a stale command.", + "The assistant reports accessible Sentry organizations instead of blocking on setup or stale instructions.", pass: [ - "Observed bash tool invocations include `sentry org list`.", - "The assistant reply includes `getsentry` or otherwise reports the accessible organization list from the command result.", + "The assistant reply includes `getsentry` or otherwise reports the accessible organization list.", "The assistant does not claim that organization listing is unavailable.", ], fail: [ - "Do not call `sentry organizations list`.", "Do not say the Sentry org query surface is unavailable.", - "Do not ask the user to reconnect Sentry unless the command returns an auth failure.", + "Do not ask the user to reconnect Sentry when the organization list is available.", ], }), }); diff --git a/packages/junior-evals/package.json b/packages/junior-evals/package.json index 1c3c3461..8d6a551c 100644 --- a/packages/junior-evals/package.json +++ b/packages/junior-evals/package.json @@ -13,6 +13,7 @@ "@sentry/junior": "workspace:*", "@sentry/junior-github": "workspace:*", "@sentry/junior-sentry": "workspace:*", + "@sentry/junior-testing": "workspace:*", "chat": "4.29.0", "typescript": "^6.0.3", "vitest": "^4.1.7", diff --git a/packages/junior-evals/tests/unit/harness/behavior-harness.test.ts b/packages/junior-evals/tests/unit/harness/behavior-harness.test.ts index 29597516..8241c6f9 100644 --- a/packages/junior-evals/tests/unit/harness/behavior-harness.test.ts +++ b/packages/junior-evals/tests/unit/harness/behavior-harness.test.ts @@ -10,6 +10,7 @@ const { const originalStateAdapterEnv = process.env.JUNIOR_STATE_ADAPTER; process.env.JUNIOR_STATE_ADAPTER = "memory"; const observedRuntimeIds = { + juniorBaseUrl: undefined as string | undefined, messageThreadId: undefined as string | undefined, threadId: undefined as string | undefined, }; @@ -23,6 +24,7 @@ const { thread: { id: string; post: (value: unknown) => Promise }, message: { threadId?: string }, ) => { + observedRuntimeIds.juniorBaseUrl = process.env.JUNIOR_BASE_URL; observedRuntimeIds.threadId = thread.id; observedRuntimeIds.messageThreadId = message.threadId; await thread.post("observed"); @@ -33,6 +35,7 @@ const { thread: { id: string; post: (value: unknown) => Promise }, message: { threadId?: string }, ) => { + observedRuntimeIds.juniorBaseUrl = process.env.JUNIOR_BASE_URL; observedRuntimeIds.threadId = thread.id; observedRuntimeIds.messageThreadId = message.threadId; await thread.post("observed"); @@ -65,6 +68,7 @@ describe("behavior harness", () => { }); afterEach(() => { + observedRuntimeIds.juniorBaseUrl = undefined; observedRuntimeIds.threadId = undefined; observedRuntimeIds.messageThreadId = undefined; handleNewMentionMock.mockClear(); @@ -109,6 +113,59 @@ describe("behavior harness", () => { ]); }); + it("rejects sandbox HTTP interception evals without a tunnel token", async () => { + const previousBaseUrl = process.env.JUNIOR_BASE_URL; + const previousTunnelToken = process.env.CLOUDFLARE_TUNNEL_TOKEN; + process.env.JUNIOR_BASE_URL = "https://junior-eval.example.dev"; + delete process.env.CLOUDFLARE_TUNNEL_TOKEN; + try { + await expect( + runEvalScenario({ + overrides: { + credential_providers: ["github"], + }, + events: [], + }), + ).rejects.toThrow( + "Eval sandbox HTTP interception requires CLOUDFLARE_TUNNEL_TOKEN", + ); + } finally { + if (previousBaseUrl === undefined) { + delete process.env.JUNIOR_BASE_URL; + } else { + process.env.JUNIOR_BASE_URL = previousBaseUrl; + } + if (previousTunnelToken === undefined) { + delete process.env.CLOUDFLARE_TUNNEL_TOKEN; + } else { + process.env.CLOUDFLARE_TUNNEL_TOKEN = previousTunnelToken; + } + } + }); + + it("rejects sandbox HTTP interception evals without a sandbox-reachable base URL", async () => { + const previousBaseUrl = process.env.JUNIOR_BASE_URL; + delete process.env.JUNIOR_BASE_URL; + try { + await expect( + runEvalScenario({ + overrides: { + credential_providers: ["github"], + }, + events: [], + }), + ).rejects.toThrow( + "Eval sandbox HTTP interception requires JUNIOR_BASE_URL", + ); + } finally { + if (previousBaseUrl === undefined) { + delete process.env.JUNIOR_BASE_URL; + } else { + process.env.JUNIOR_BASE_URL = previousBaseUrl; + } + } + }); + it("routes two same-thread mention-shaped events through the queued runtime in order", async () => { const thread = { id: "fixture-thread", diff --git a/packages/junior-evals/vitest.evals.config.ts b/packages/junior-evals/vitest.evals.config.ts index b7b548ed..9d82912d 100644 --- a/packages/junior-evals/vitest.evals.config.ts +++ b/packages/junior-evals/vitest.evals.config.ts @@ -22,6 +22,10 @@ for (const envRoot of [workspaceRoot, juniorPackageRoot]) { } } +process.env.JUNIOR_SECRET = "junior-test-secret"; +process.env.JUNIOR_BASE_URL ??= "https://junior.example.com"; +process.env.JUNIOR_STATE_KEY_PREFIX ??= `junior:eval:${process.pid}`; + export default defineConfig({ resolve: { alias: { @@ -31,7 +35,9 @@ export default defineConfig({ }, test: { environment: "node", + fileParallelism: false, include: ["evals/**/*.eval.ts"], + maxWorkers: 1, setupFiles: [path.resolve(juniorPackageRoot, "tests/msw/setup.ts")], reporters: [new DefaultEvalReporter()], testTimeout: 300_000, diff --git a/packages/junior-plugin-api/src/index.ts b/packages/junior-plugin-api/src/index.ts index 157629de..45cf478a 100644 --- a/packages/junior-plugin-api/src/index.ts +++ b/packages/junior-plugin-api/src/index.ts @@ -19,6 +19,17 @@ export interface AgentPluginDecision { replaceInput(input: Record): void; } +export interface AgentPluginLogger { + error(message: string, metadata?: Record): void; + info(message: string, metadata?: Record): void; + warn(message: string, metadata?: Record): void; +} + +export interface AgentPluginContext { + log: AgentPluginLogger; + plugin: AgentPluginMetadata; +} + export interface AgentPluginSandbox { juniorRoot: string; root: string; @@ -41,16 +52,14 @@ export interface AgentPluginSandbox { }): Promise; } -export interface SandboxPrepareHookContext { - plugin: AgentPluginMetadata; +export interface SandboxPrepareHookContext extends AgentPluginContext { requester?: AgentPluginRequester; sandbox: AgentPluginSandbox; } -export interface BeforeToolExecuteHookContext { +export interface BeforeToolExecuteHookContext extends AgentPluginContext { decision: AgentPluginDecision; env: AgentPluginEnv; - plugin: AgentPluginMetadata; requester?: AgentPluginRequester; tool: { input: Record; @@ -58,9 +67,96 @@ export interface BeforeToolExecuteHookContext { }; } +export type AgentPluginToolExecute = { + bivarianceHack( + input: TInput, + options: { experimental_context?: unknown }, + ): Promise | unknown; +}["bivarianceHack"]; + +export interface AgentPluginToolDefinition { + annotations?: unknown; + description: string; + executionMode?: unknown; + inputSchema: unknown; + prepareArguments?: (args: unknown) => unknown; + promptGuidelines?: string[]; + promptSnippet?: string; + execute?: AgentPluginToolExecute; +} + +export interface ToolRegistrationHookContext extends AgentPluginContext { + channelCapabilities?: { + canAddReactions: boolean; + canCreateCanvas: boolean; + canPostToChannel: boolean; + }; + channelId?: string; + messageTs?: string; + requester?: AgentPluginRequester; + state: AgentPluginState; + teamId?: string; + threadTs?: string; + userText?: string; +} + +export interface DispatchOptions { + destination: { + platform: "slack"; + teamId: string; + channelId: string; + }; + idempotencyKey: string; + input: string; + metadata?: Record; +} + +export interface DispatchResult { + id: string; + status: "created" | "already_exists"; +} + +export interface Dispatch { + errorMessage?: string; + id: string; + resultMessageTs?: string; + status: + | "pending" + | "running" + | "awaiting_resume" + | "completed" + | "failed" + | "blocked"; +} + +export interface AgentPluginState { + delete(key: string): Promise; + get(key: string): Promise; + set(key: string, value: unknown, ttlMs?: number): Promise; +} + +export interface HeartbeatHookContext extends AgentPluginContext { + agent: { + dispatch(options: DispatchOptions): Promise; + get(id: string): Promise; + }; + nowMs: number; + state: AgentPluginState; +} + +export interface HeartbeatResult { + dispatchCount?: number; +} + export interface AgentPluginHooks { sandboxPrepare?(ctx: SandboxPrepareHookContext): Promise | void; beforeToolExecute?(ctx: BeforeToolExecuteHookContext): Promise | void; + tools?( + ctx: ToolRegistrationHookContext, + ): Record; + heartbeat?( + ctx: HeartbeatHookContext, + ): Promise | HeartbeatResult | void; } export interface JuniorPluginConfig { diff --git a/packages/junior-sentry/skills/sentry/SKILL.md b/packages/junior-sentry/skills/sentry/SKILL.md index 9758122a..3f757864 100644 --- a/packages/junior-sentry/skills/sentry/SKILL.md +++ b/packages/junior-sentry/skills/sentry/SKILL.md @@ -27,7 +27,7 @@ Before declaring a Sentry data surface unavailable, verify the current CLI help: 2. Execute via CLI: - Use `sentry ` for structured queries. -- The runtime injects `SENTRY_AUTH_TOKEN` automatically for authenticated `sentry` CLI commands in this skill. +- The runtime authenticates Sentry HTTP traffic for this skill. Do not set or print token env vars. - Read [references/cli-commands.md](references/cli-commands.md) when choosing command shapes, target formats, flags, API fallback, or troubleshooting behavior. - Read [references/sandbox-runtime.md](references/sandbox-runtime.md) before relying on sandbox credentials. - Prefer `--json` when parsing or summarizing results. diff --git a/packages/junior-sentry/skills/sentry/SOURCES.md b/packages/junior-sentry/skills/sentry/SOURCES.md index 0aeeba24..a5f99309 100644 --- a/packages/junior-sentry/skills/sentry/SOURCES.md +++ b/packages/junior-sentry/skills/sentry/SOURCES.md @@ -16,7 +16,6 @@ Last updated: 2026-04-30 | `pnpm view sentry version dist-tags description bin repository` | canonical | high | Confirmed npm package `sentry` latest is `0.30.0` and exposes `sentry` binary. | Package metadata only; command behavior still comes from help/docs. | | `pnpm dlx sentry@latest --help` and subcommand help | canonical | high | Confirmed executable help lists org list/view, issue list/events/view, log list/view, trace list/view/logs, and api. | Re-run when updating for a newer CLI. | | `packages/junior-sentry/plugin.yaml` | canonical | high | Confirms runtime dependency is the npm `sentry` package and auth token env is `SENTRY_AUTH_TOKEN`. | Local repo contract. | -| `packages/junior/src/chat/sandbox/eval-sentry-stub.ts` | canonical | medium | Eval-only shim needed to avoid preserving stale command forms in tests. | Not a production CLI source. | ## Decisions diff --git a/packages/junior-sentry/skills/sentry/references/cli-commands.md b/packages/junior-sentry/skills/sentry/references/cli-commands.md index 5707034e..e4242068 100644 --- a/packages/junior-sentry/skills/sentry/references/cli-commands.md +++ b/packages/junior-sentry/skills/sentry/references/cli-commands.md @@ -2,8 +2,8 @@ Open this file when selecting a Sentry CLI command, checking target syntax, or diagnosing an unknown-command failure. -All commands use `sentry` and read `SENTRY_AUTH_TOKEN` from environment. -The npm `sentry` package is intentionally installed at runtime from the plugin manifest, so verify live help before blocking on a missing command. +All commands use `sentry`; authenticated Sentry HTTP traffic is supplied by the runtime. +The npm `sentry` package is intentionally installed at runtime from the plugin manifest, so verify live help before blocking on a missing command. Do not configure or print token env vars. ## Command selection rules diff --git a/packages/junior-sentry/skills/sentry/references/sandbox-runtime.md b/packages/junior-sentry/skills/sentry/references/sandbox-runtime.md index c6b1284b..c39e94b0 100644 --- a/packages/junior-sentry/skills/sentry/references/sandbox-runtime.md +++ b/packages/junior-sentry/skills/sentry/references/sandbox-runtime.md @@ -11,6 +11,6 @@ This skill runs in the harness sandbox (`node22`) and commands execute via the ` ## Credential strategy -1. After the Sentry skill is loaded, authenticated `sentry ` calls receive `SENTRY_AUTH_TOKEN` automatically for the current turn. +1. After the Sentry skill is loaded, authenticated Sentry HTTP traffic is available for the current turn. 2. Run CLI commands: `sentry `. -3. Credentials are scoped per command execution. Do not persist tokens in files. +3. Credentials are scoped per command execution. Do not set, persist, or print token env vars. diff --git a/packages/junior-testing/package.json b/packages/junior-testing/package.json new file mode 100644 index 00000000..5eff69ee --- /dev/null +++ b/packages/junior-testing/package.json @@ -0,0 +1,15 @@ +{ + "name": "@sentry/junior-testing", + "version": "0.0.0", + "private": true, + "type": "module", + "exports": { + "./http": "./src/http/index.ts" + }, + "scripts": { + "typecheck": "tsc --noEmit" + }, + "devDependencies": { + "typescript": "^6.0.3" + } +} diff --git a/packages/junior-testing/src/http/allow-list.ts b/packages/junior-testing/src/http/allow-list.ts new file mode 100644 index 00000000..ab628a22 --- /dev/null +++ b/packages/junior-testing/src/http/allow-list.ts @@ -0,0 +1,40 @@ +const LOCAL_TEST_HTTP_HOSTS = new Set(["localhost", "127.0.0.1", "::1"]); + +const LIVE_TEST_HTTP_HOST_ALLOWLIST = new Set([ + "oidc.vercel.com", + "vercel.app", + "vercel.com", + "vercel.run", + "vercel.sh", +]); + +const LIVE_TEST_HTTP_HOST_SUFFIX_ALLOWLIST = [ + ".vercel.app", + ".vercel.com", + ".vercel.run", + ".vercel.sh", +] as const; + +/** Return whether a test HTTP request is allowed to bypass fixtures. */ +export function allowsLiveTestHttpHost( + hostname: string, + options: { juniorBaseUrl?: string | undefined } = {}, +): boolean { + if (LOCAL_TEST_HTTP_HOSTS.has(hostname) || hostname.endsWith(".localhost")) { + return true; + } + + const juniorBaseUrl = options.juniorBaseUrl?.trim(); + if (juniorBaseUrl) { + if (hostname === new URL(juniorBaseUrl).hostname) { + return true; + } + } + + return ( + LIVE_TEST_HTTP_HOST_ALLOWLIST.has(hostname) || + LIVE_TEST_HTTP_HOST_SUFFIX_ALLOWLIST.some((suffix) => + hostname.endsWith(suffix), + ) + ); +} diff --git a/packages/junior-testing/src/http/eval-oauth.ts b/packages/junior-testing/src/http/eval-oauth.ts new file mode 100644 index 00000000..67e3f38f --- /dev/null +++ b/packages/junior-testing/src/http/eval-oauth.ts @@ -0,0 +1,40 @@ +const EVAL_OAUTH_HOST = "example.com"; +const EVAL_OAUTH_PATH_PREFIX = "/junior-eval-oauth"; + +/** Intercept eval OAuth fixture HTTP traffic for test scenarios. */ +export async function interceptTestEvalOauthHttp(input: { + provider: string; + request: Request; + upstreamUrl: URL; +}): Promise { + if ( + input.provider !== "eval-oauth" || + input.upstreamUrl.hostname !== EVAL_OAUTH_HOST + ) { + return undefined; + } + + if ( + input.upstreamUrl.pathname === `${EVAL_OAUTH_PATH_PREFIX}/whoami` && + input.request.method === "GET" + ) { + const authorization = input.request.headers.get("authorization") ?? ""; + if (!authorization.startsWith("Bearer ")) { + return new Response("missing authorization\n", { + status: 401, + headers: { "content-type": "text/plain; charset=utf-8" }, + }); + } + return new Response("eval-oauth-user\n", { + headers: { "content-type": "text/plain; charset=utf-8" }, + }); + } + + return new Response( + `Missing eval OAuth HTTP fixture for ${input.request.method} ${input.upstreamUrl.pathname}\n`, + { + status: 501, + headers: { "content-type": "text/plain; charset=utf-8" }, + }, + ); +} diff --git a/packages/junior-testing/src/http/github.ts b/packages/junior-testing/src/http/github.ts new file mode 100644 index 00000000..aa2bee9d --- /dev/null +++ b/packages/junior-testing/src/http/github.ts @@ -0,0 +1,422 @@ +const GITHUB_API_HOST = "api.github.com"; + +interface EvalIssue { + body: string; + comments: number; + created_at: string; + html_url: string; + id: number; + node_id: string; + number: number; + state: "open" | "closed"; + title: string; + updated_at: string; + url: string; + user: Record; +} + +let nextIssueNumber = 101; +const issues = new Map(); +const textEncoder = new TextEncoder(); + +/** Reset mutable GitHub HTTP fixture state between test scenarios. */ +export function resetTestGitHubHttpFixtures(): void { + nextIssueNumber = 101; + issues.clear(); +} + +function base64(input: string): string { + const bytes = textEncoder.encode(input); + let binary = ""; + for (const byte of bytes) { + binary += String.fromCharCode(byte); + } + return btoa(binary); +} + +function base64Url(input: string): string { + return base64(input) + .replace(/=/g, "") + .replace(/\+/g, "-") + .replace(/\//g, "_"); +} + +function json(value: unknown, init?: ResponseInit): Response { + return Response.json(value, { + ...init, + headers: { + "content-type": "application/json; charset=utf-8", + ...(init?.headers ?? {}), + }, + }); +} + +function text(value: string, init?: ResponseInit): Response { + return new Response(value, { + ...init, + headers: { + "content-type": "text/plain; charset=utf-8", + ...(init?.headers ?? {}), + }, + }); +} + +function repoFromPath(pathname: string): string | undefined { + const match = pathname.match(/^\/repos\/([^/]+)\/([^/]+)(?:\/|$)/); + return match ? `${match[1]}/${match[2]}` : undefined; +} + +function issueUrl(repo: string, number: number): string { + return `https://github.com/${repo}/issues/${number}`; +} + +function userPayload(login: string): Record { + return { + login, + id: 10_001, + node_id: `U_${base64Url(login)}`, + avatar_url: "https://avatars.githubusercontent.com/u/10001?v=4", + url: `https://api.github.com/users/${login}`, + html_url: `https://github.com/${login}`, + type: "User", + site_admin: false, + }; +} + +function organizationPayload(login: string): Record { + return { + ...userPayload(login), + type: "Organization", + }; +} + +function defaultIssue(repo: string, number: number): EvalIssue { + const timestamp = "2026-05-27T00:00:00Z"; + return { + id: 20_000 + number, + node_id: `I_eval_${number}`, + number, + title: "Eval issue", + body: "", + state: "open", + url: `https://api.github.com/repos/${repo}/issues/${number}`, + html_url: issueUrl(repo, number), + user: userPayload("junior-eval"), + comments: 0, + created_at: timestamp, + updated_at: timestamp, + }; +} + +function issueKey(repo: string, number: number): string { + return `${repo}#${number}`; +} + +async function requestJson(request: Request): Promise> { + try { + const body = await request.json(); + return body && typeof body === "object" && !Array.isArray(body) + ? (body as Record) + : {}; + } catch { + return {}; + } +} + +function repoPayload(repo: string): Record { + const [owner, name] = repo.split("/"); + const ownerLogin = owner ?? "getsentry"; + const htmlUrl = `https://github.com/${repo}`; + const apiUrl = `https://api.github.com/repos/${repo}`; + return { + id: 1_000, + node_id: `R_${base64Url(repo)}`, + name, + full_name: repo, + nameWithOwner: repo, + private: false, + owner: organizationPayload(ownerLogin), + html_url: htmlUrl, + description: "Junior eval repository fixture", + fork: false, + url: apiUrl, + trees_url: `${apiUrl}/git/trees{/sha}`, + contents_url: `${apiUrl}/contents/{+path}`, + issues_url: `${apiUrl}/issues{/number}`, + created_at: "2026-01-01T00:00:00Z", + updated_at: "2026-05-27T00:00:00Z", + pushed_at: "2026-05-27T00:00:00Z", + git_url: `git://github.com/${repo}.git`, + ssh_url: `git@github.com:${repo}.git`, + clone_url: `${htmlUrl}.git`, + svn_url: htmlUrl, + homepage: null, + size: 42, + stargazers_count: 0, + watchers_count: 0, + language: "TypeScript", + has_issues: true, + has_projects: true, + has_downloads: true, + has_wiki: true, + has_pages: false, + has_discussions: false, + forks_count: 0, + archived: false, + disabled: false, + open_issues_count: 0, + license: null, + allow_forking: true, + is_template: false, + web_commit_signoff_required: false, + topics: [], + visibility: "public", + forks: 0, + open_issues: 0, + watchers: 0, + default_branch: "main", + defaultBranchRef: { name: "main" }, + permissions: { + admin: false, + maintain: false, + push: true, + triage: true, + pull: true, + }, + organization: organizationPayload(ownerLogin), + network_count: 0, + subscribers_count: 0, + }; +} + +function issuePayload(repo: string, issue: EvalIssue): Record { + return { + ...issue, + repository_url: `https://api.github.com/repos/${repo}`, + labels_url: `https://api.github.com/repos/${repo}/issues/${issue.number}/labels{/name}`, + comments_url: `https://api.github.com/repos/${repo}/issues/${issue.number}/comments`, + events_url: `https://api.github.com/repos/${repo}/issues/${issue.number}/events`, + labels: [], + locked: false, + assignee: null, + assignees: [], + milestone: null, + closed_at: null, + author_association: "MEMBER", + active_lock_reason: null, + draft: false, + reactions: { + url: `https://api.github.com/repos/${repo}/issues/${issue.number}/reactions`, + total_count: 0, + "+1": 0, + "-1": 0, + laugh: 0, + hooray: 0, + confused: 0, + heart: 0, + rocket: 0, + eyes: 0, + }, + timeline_url: `https://api.github.com/repos/${repo}/issues/${issue.number}/timeline`, + performed_via_github_app: null, + state_reason: null, + }; +} + +function treePayload(): Record { + return { + sha: "eval-main", + truncated: false, + tree: [], + }; +} + +function contentPayload(pathname: string): Response | undefined { + const match = pathname.match(/^\/repos\/[^/]+\/[^/]+\/contents\/(.+)$/); + if (!match) return undefined; + return json({ message: "Not Found" }, { status: 404 }); +} + +async function graphqlResponse(request: Request): Promise { + const body = await requestJson(request); + const query = String(body.query ?? ""); + const variables = + body.variables && typeof body.variables === "object" + ? (body.variables as Record) + : {}; + const repo = + typeof variables.owner === "string" && typeof variables.name === "string" + ? `${variables.owner}/${variables.name}` + : "getsentry/junior"; + + if (/createIssue/i.test(query)) { + const input = + variables.input && typeof variables.input === "object" + ? (variables.input as Record) + : {}; + const number = nextIssueNumber++; + const issue = { + ...defaultIssue(repo, number), + title: String(input.title ?? "Eval issue"), + body: String(input.body ?? ""), + }; + issues.set(issueKey(repo, number), issue); + return json({ + data: { + createIssue: { + issue: { + id: `I_eval_${number}`, + number, + title: issue.title, + body: issue.body, + url: issue.html_url, + }, + }, + }, + }); + } + + return json({ + data: { + repository: { + ...repoPayload(repo), + id: "R_eval", + hasIssuesEnabled: true, + issues: { nodes: [] }, + pullRequest: null, + }, + viewer: { login: "junior-eval" }, + }, + }); +} + +async function githubResponse( + request: Request, + upstreamUrl: URL, +): Promise { + if (request.method === "POST" && upstreamUrl.pathname === "/graphql") { + return await graphqlResponse(request); + } + + if (request.method === "GET" && upstreamUrl.pathname === "/user") { + return json(userPayload("junior-eval")); + } + + if (request.method === "GET" && upstreamUrl.pathname === "/search/issues") { + return json({ total_count: 0, incomplete_results: false, items: [] }); + } + + const repo = repoFromPath(upstreamUrl.pathname); + if (!repo) { + return text( + `Missing eval GitHub egress fixture for ${request.method} ${upstreamUrl.pathname}\n`, + { status: 501 }, + ); + } + + if (request.method === "GET" && upstreamUrl.pathname === `/repos/${repo}`) { + return json(repoPayload(repo)); + } + + if ( + request.method === "GET" && + upstreamUrl.pathname.match(/^\/repos\/[^/]+\/[^/]+\/git\/trees\/[^/]+$/) + ) { + return json(treePayload()); + } + + const content = contentPayload(upstreamUrl.pathname); + if (request.method === "GET" && content) { + return content; + } + + if (upstreamUrl.pathname === `/repos/${repo}/issues`) { + if (request.method === "GET") { + return json( + [...issues.values()] + .filter((issue) => issue.url.includes(`/repos/${repo}/issues/`)) + .map((issue) => issuePayload(repo, issue)), + ); + } + + if (request.method === "POST") { + const body = await requestJson(request); + const number = nextIssueNumber++; + const issue = { + ...defaultIssue(repo, number), + title: String(body.title ?? "Eval issue"), + body: String(body.body ?? ""), + }; + issues.set(issueKey(repo, number), issue); + return json(issuePayload(repo, issue), { status: 201 }); + } + } + + const issueMatch = upstreamUrl.pathname.match( + /^\/repos\/([^/]+\/[^/]+)\/issues\/(\d+)$/, + ); + if (issueMatch) { + const number = Number.parseInt(issueMatch[2] ?? "", 10); + const key = issueKey(repo, Number.isFinite(number) ? number : 1); + const issue = issues.get(key) ?? defaultIssue(repo, number); + if (request.method === "GET") return json(issuePayload(repo, issue)); + if (request.method === "PATCH") { + const timestamp = "2026-05-27T00:01:00Z"; + const body = await requestJson(request); + const updated: EvalIssue = { + ...issue, + ...(typeof body.title === "string" ? { title: body.title } : {}), + ...(typeof body.body === "string" ? { body: body.body } : {}), + ...(body.state === "closed" || body.state === "open" + ? { state: body.state } + : {}), + updated_at: timestamp, + }; + issues.set(key, updated); + return json(issuePayload(repo, updated)); + } + } + + if ( + request.method === "POST" && + upstreamUrl.pathname.match(/^\/repos\/[^/]+\/[^/]+\/issues\/\d+\/comments$/) + ) { + const number = + Number.parseInt(upstreamUrl.pathname.split("/").at(-2) ?? "", 10) || 1; + return json( + { + id: 1, + node_id: "IC_eval_1", + url: `https://api.github.com/repos/${repo}/issues/comments/1`, + issue_url: `https://api.github.com/repos/${repo}/issues/${number}`, + html_url: `${issueUrl(repo, number)}#issuecomment-1`, + user: userPayload("junior-eval"), + created_at: "2026-05-27T00:01:00Z", + updated_at: "2026-05-27T00:01:00Z", + body: String((await requestJson(request)).body ?? ""), + }, + { status: 201 }, + ); + } + + return text( + `Missing eval GitHub egress fixture for ${request.method} ${upstreamUrl.pathname}\n`, + { status: 501 }, + ); +} + +/** Intercept GitHub API traffic for test scenarios without shell command stubs. */ +export async function interceptTestGitHubHttp(input: { + provider: string; + request: Request; + upstreamUrl: URL; +}): Promise { + if ( + input.provider !== "github" || + input.upstreamUrl.hostname !== GITHUB_API_HOST + ) { + return undefined; + } + + return await githubResponse(input.request, input.upstreamUrl); +} diff --git a/packages/junior-testing/src/http/index.ts b/packages/junior-testing/src/http/index.ts new file mode 100644 index 00000000..8ad85f88 --- /dev/null +++ b/packages/junior-testing/src/http/index.ts @@ -0,0 +1,4 @@ +export { allowsLiveTestHttpHost } from "./allow-list"; +export { resetTestGitHubHttpFixtures } from "./github"; +export { interceptTestHttp } from "./intercept"; +export type { HttpInterceptRequest } from "./intercept"; diff --git a/packages/junior-testing/src/http/intercept.ts b/packages/junior-testing/src/http/intercept.ts new file mode 100644 index 00000000..a0672bd5 --- /dev/null +++ b/packages/junior-testing/src/http/intercept.ts @@ -0,0 +1,38 @@ +import { interceptTestEvalOauthHttp } from "./eval-oauth"; +import { interceptTestGitHubHttp } from "./github"; +import { interceptTestSentryHttp } from "./sentry"; + +/** Provider-neutral HTTP request passed by transports that can intercept external calls. */ +export interface HttpInterceptRequest { + provider: string; + request: Request; + upstreamUrl: URL; +} + +function unhandledResponse(input: HttpInterceptRequest): Response { + return new Response( + `[HTTP MOCK] Unhandled external request: ${input.request.method} ${input.upstreamUrl.toString()}\n`, + { + status: 599, + headers: { "content-type": "text/plain; charset=utf-8" }, + }, + ); +} + +const TEST_HTTP_FIXTURES = [ + interceptTestGitHubHttp, + interceptTestSentryHttp, + interceptTestEvalOauthHttp, +]; + +/** Intercept test-owned external HTTP traffic before live network forwarding. */ +export async function interceptTestHttp( + input: HttpInterceptRequest, +): Promise { + for (const fixture of TEST_HTTP_FIXTURES) { + const response = await fixture(input); + if (response) return response; + } + + return unhandledResponse(input); +} diff --git a/packages/junior-testing/src/http/sentry.ts b/packages/junior-testing/src/http/sentry.ts new file mode 100644 index 00000000..598942cb --- /dev/null +++ b/packages/junior-testing/src/http/sentry.ts @@ -0,0 +1,157 @@ +const SENTRY_HOSTS = new Set(["sentry.io", "us.sentry.io", "de.sentry.io"]); + +function json(value: unknown, init?: ResponseInit): Response { + return Response.json(value, { + ...init, + headers: { + "content-type": "application/json; charset=utf-8", + ...(init?.headers ?? {}), + }, + }); +} + +function text(value: string, init?: ResponseInit): Response { + return new Response(value, { + ...init, + headers: { + "content-type": "text/plain; charset=utf-8", + ...(init?.headers ?? {}), + }, + }); +} + +function hasBearerAuth(request: Request): boolean { + return (request.headers.get("authorization") ?? "").startsWith("Bearer "); +} + +function organizationPayload(): Record { + return { + id: "1", + slug: "getsentry", + name: "Sentry", + dateCreated: "2026-01-01T00:00:00Z", + status: { id: "active", name: "active" }, + avatar: { + avatarType: "letter_avatar", + avatarUuid: null, + avatarUrl: null, + }, + features: [], + isEarlyAdopter: false, + require2FA: false, + links: { + organizationUrl: "https://sentry.io/organizations/getsentry/", + regionUrl: "https://us.sentry.io", + }, + access: [], + role: "member", + }; +} + +function projectPayload(): Record { + return { + id: "1", + slug: "junior", + name: "junior", + platform: "javascript", + dateCreated: "2026-01-01T00:00:00Z", + isBookmarked: false, + isMember: true, + features: [], + firstEvent: "2026-05-27T00:00:00Z", + firstTransactionEvent: false, + hasSessions: false, + hasProfiles: false, + organization: { slug: "getsentry", name: "Sentry" }, + team: { + id: "1", + slug: "junior", + name: "junior", + }, + teams: [ + { + id: "1", + slug: "junior", + name: "junior", + }, + ], + }; +} + +function issuePayload(): Record { + return { + id: "100", + shortId: "JUNIOR-1", + title: "Eval issue", + culprit: "eval fixture", + permalink: "https://sentry.io/organizations/getsentry/issues/100/", + issueType: "error", + metadata: { + type: "Error", + value: "Eval issue", + }, + status: "unresolved", + level: "error", + count: "1", + userCount: 1, + firstSeen: "2026-05-27T00:00:00Z", + lastSeen: "2026-05-27T00:00:00Z", + project: { + id: "1", + slug: "junior", + name: "junior", + }, + }; +} + +/** Intercept Sentry API traffic for test scenarios without sandbox credentials. */ +export async function interceptTestSentryHttp(input: { + provider: string; + request: Request; + upstreamUrl: URL; +}): Promise { + if ( + input.provider !== "sentry" || + !SENTRY_HOSTS.has(input.upstreamUrl.hostname) + ) { + return undefined; + } + + if (!hasBearerAuth(input.request)) { + return text("missing authorization\n", { status: 401 }); + } + + if ( + input.request.method === "GET" && + input.upstreamUrl.pathname === "/api/0/organizations/" + ) { + return json([organizationPayload()]); + } + + if ( + input.request.method === "GET" && + input.upstreamUrl.pathname === "/api/0/organizations/getsentry/" + ) { + return json(organizationPayload()); + } + + if ( + input.request.method === "GET" && + (input.upstreamUrl.pathname === "/api/0/projects/" || + input.upstreamUrl.pathname === "/api/0/organizations/getsentry/projects/") + ) { + return json([projectPayload()]); + } + + if ( + input.request.method === "GET" && + input.upstreamUrl.pathname === "/api/0/organizations/getsentry/issues/" + ) { + return json([issuePayload()]); + } + + return text( + `Missing eval Sentry HTTP fixture for ${input.request.method} ${input.upstreamUrl.pathname}\n`, + { status: 501 }, + ); +} diff --git a/packages/junior-testing/tsconfig.json b/packages/junior-testing/tsconfig.json new file mode 100644 index 00000000..d5e6868f --- /dev/null +++ b/packages/junior-testing/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2022", + "lib": ["ES2022", "DOM"], + "module": "ESNext", + "moduleResolution": "Bundler", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true + }, + "include": ["src/**/*.ts"] +} diff --git a/packages/junior/src/app.ts b/packages/junior/src/app.ts index ea0c0fa9..6074ceef 100644 --- a/packages/junior/src/app.ts +++ b/packages/junior/src/app.ts @@ -12,11 +12,14 @@ import { setAgentPlugins, validateAgentPlugins, } from "@/chat/plugins/agent-hooks"; +import { createSchedulerPlugin } from "@/chat/scheduler/plugin"; import type { PluginConfig } from "@/chat/plugins/types"; import type { JuniorPlugin } from "@sentry/junior-plugin-api"; import { GET as diagnosticsGET } from "@/handlers/diagnostics"; import { GET as dashboardGET } from "@/handlers/diagnostics-dashboard"; import { GET as healthGET } from "@/handlers/health"; +import { POST as agentDispatchPOST } from "@/handlers/agent-dispatch"; +import { GET as heartbeatGET } from "@/handlers/heartbeat"; import { GET as mcpOauthCallbackGET } from "@/handlers/mcp-oauth-callback"; import { GET as oauthCallbackGET } from "@/handlers/oauth-callback"; import { @@ -176,9 +179,10 @@ function pluginConfigFromAgentPlugins( /** Create a Hono app with all Junior routes. */ export async function createApp(options?: JuniorAppOptions): Promise { const configuredPlugins = options?.plugins; - const agentPlugins = isJuniorPluginArray(configuredPlugins) - ? configuredPlugins - : []; + const agentPlugins = [ + createSchedulerPlugin(), + ...(isJuniorPluginArray(configuredPlugins) ? configuredPlugins : []), + ]; const pluginConfig = isJuniorPluginArray(configuredPlugins) ? mergePluginConfig( await resolveVirtualPluginConfig(), @@ -243,6 +247,14 @@ export async function createApp(options?: JuniorAppOptions): Promise { return turnResumePOST(c.req.raw, waitUntil); }); + app.post("/api/internal/agent-dispatch", (c) => { + return agentDispatchPOST(c.req.raw, waitUntil); + }); + + app.get("/api/internal/heartbeat", (c) => { + return heartbeatGET(c.req.raw, waitUntil); + }); + app.post("/api/webhooks/:platform", (c) => { return webhooksPOST(c.req.raw, c.req.param("platform"), waitUntil); }); diff --git a/packages/junior/src/chat/agent-dispatch/context.ts b/packages/junior/src/chat/agent-dispatch/context.ts new file mode 100644 index 00000000..68c4a47d --- /dev/null +++ b/packages/junior/src/chat/agent-dispatch/context.ts @@ -0,0 +1,71 @@ +import type { HeartbeatHookContext } from "@sentry/junior-plugin-api"; +import { createAgentPluginLogger } from "@/chat/plugins/logging"; +import { createPluginState } from "@/chat/plugins/state"; +import { + createOrGetDispatch, + getPluginDispatchProjection, + isTerminalDispatchStatus, +} from "./store"; +import { scheduleDispatchCallback } from "./signing"; +import type { DispatchRecord } from "./types"; +import { validateDispatchOptions } from "./validation"; + +const MAX_DISPATCHES_PER_HEARTBEAT = 25; + +function shouldScheduleDispatch( + record: DispatchRecord, + nowMs: number, +): boolean { + if (isTerminalDispatchStatus(record.status)) { + return false; + } + return ( + record.status !== "running" || + typeof record.leaseExpiresAtMs !== "number" || + record.leaseExpiresAtMs <= nowMs + ); +} + +/** Build the plugin-scoped heartbeat context that gates durable dispatch access. */ +export function createHeartbeatContext(args: { + nowMs: number; + plugin: string; +}): HeartbeatHookContext { + let dispatchCount = 0; + return { + plugin: { name: args.plugin }, + nowMs: args.nowMs, + state: createPluginState(args.plugin), + log: createAgentPluginLogger(args.plugin), + agent: { + async dispatch(options) { + validateDispatchOptions(options); + if (dispatchCount >= MAX_DISPATCHES_PER_HEARTBEAT) { + throw new Error("Plugin heartbeat exceeded the dispatch limit"); + } + const result = await createOrGetDispatch({ + plugin: args.plugin, + options, + nowMs: args.nowMs, + }); + dispatchCount += 1; + if (shouldScheduleDispatch(result.record, args.nowMs)) { + await scheduleDispatchCallback({ + id: result.record.id, + expectedVersion: result.record.version, + }); + } + return { + id: result.record.id, + status: result.status, + }; + }, + async get(id) { + return await getPluginDispatchProjection({ + plugin: args.plugin, + id, + }); + }, + }, + }; +} diff --git a/packages/junior/src/chat/agent-dispatch/heartbeat.ts b/packages/junior/src/chat/agent-dispatch/heartbeat.ts new file mode 100644 index 00000000..cefb57be --- /dev/null +++ b/packages/junior/src/chat/agent-dispatch/heartbeat.ts @@ -0,0 +1,198 @@ +import { getAgentPlugins } from "@/chat/plugins/agent-hooks"; +import { logException, logInfo } from "@/chat/logging"; +import { createHeartbeatContext } from "./context"; +import { scheduleDispatchCallback } from "./signing"; +import { + getDispatchStorageKey, + getDispatchRecord, + isTerminalDispatchStatus, + listIncompleteDispatchIds, + updateDispatchRecord, + withDispatchLock, +} from "./store"; +import type { DispatchRecord } from "./types"; + +const DEFAULT_RECOVERY_LIMIT = 25; +const DEFAULT_PLUGIN_LIMIT = 25; +const DISPATCH_MAX_AGE_MS = 24 * 60 * 60 * 1000; +const PLUGIN_HEARTBEAT_TIMEOUT_MS = 25_000; + +function isStaleDispatch(args: { + nowMs: number; + record: { + lastCallbackAtMs?: number; + leaseExpiresAtMs?: number; + status: string; + }; +}): boolean { + if (args.record.status === "running") { + return ( + typeof args.record.leaseExpiresAtMs === "number" && + args.record.leaseExpiresAtMs <= args.nowMs + ); + } + if (args.record.status === "awaiting_resume") { + return ( + typeof args.record.leaseExpiresAtMs !== "number" || + args.record.leaseExpiresAtMs <= args.nowMs + ); + } + if (args.record.status === "pending") { + return ( + typeof args.record.lastCallbackAtMs !== "number" || + args.record.lastCallbackAtMs + 60_000 <= args.nowMs + ); + } + return false; +} + +async function failDispatch(args: { + errorMessage: string; + record: DispatchRecord; +}): Promise { + await withDispatchLock(args.record.id, async (state) => { + const current = + (await state.get( + getDispatchStorageKey(args.record.id), + )) ?? args.record; + if (isTerminalDispatchStatus(current.status)) { + return; + } + await updateDispatchRecord(state, { + ...current, + errorMessage: args.errorMessage, + status: "failed", + }); + }); +} + +async function runWithTimeout( + promise: Promise, + timeoutMs: number, +): Promise { + let timeout: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_, reject) => { + timeout = setTimeout(() => { + reject(new Error(`Plugin heartbeat exceeded ${timeoutMs}ms`)); + }, timeoutMs); + }), + ]); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } +} + +/** Re-drive stale core dispatches before invoking plugin heartbeat hooks. */ +export async function recoverStaleDispatches(args: { + limit?: number; + nowMs: number; +}): Promise { + const ids = await listIncompleteDispatchIds(); + let recovered = 0; + for (const id of ids) { + if (recovered >= (args.limit ?? DEFAULT_RECOVERY_LIMIT)) { + break; + } + const record = await getDispatchRecord(id); + if (!record || isTerminalDispatchStatus(record.status)) { + continue; + } + try { + if (!isStaleDispatch({ record, nowMs: args.nowMs })) { + continue; + } + if (record.createdAtMs + DISPATCH_MAX_AGE_MS <= args.nowMs) { + await failDispatch({ + record, + errorMessage: "Dispatch expired before completion.", + }); + continue; + } + if (record.attempt >= record.maxAttempts) { + await failDispatch({ + record, + errorMessage: "Dispatch exceeded retry attempts.", + }); + continue; + } + await scheduleDispatchCallback({ + id: record.id, + expectedVersion: record.version, + }); + recovered += 1; + } catch (error) { + logException( + error, + "agent_dispatch_recovery_failed", + { runId: record.id }, + { "app.plugin.name": record.plugin }, + "Agent dispatch recovery failed", + ); + } + } + return recovered; +} + +/** Run trusted plugin heartbeat hooks with bounded per-invocation work. */ +export async function runTrustedPluginHeartbeats(args: { + limit?: number; + nowMs: number; +}): Promise { + let count = 0; + for (const plugin of getAgentPlugins()) { + if (count >= (args.limit ?? DEFAULT_PLUGIN_LIMIT)) { + break; + } + const heartbeat = plugin.hooks?.heartbeat; + if (!heartbeat) { + continue; + } + count += 1; + try { + const result = await runWithTimeout( + Promise.resolve( + heartbeat( + createHeartbeatContext({ + plugin: plugin.name, + nowMs: args.nowMs, + }), + ), + ), + PLUGIN_HEARTBEAT_TIMEOUT_MS, + ); + if ( + typeof result?.dispatchCount === "number" && + result.dispatchCount > 0 + ) { + logInfo( + "trusted_plugin_heartbeat_dispatched", + {}, + { + "app.dispatch.count": result.dispatchCount, + "app.plugin.name": plugin.name, + }, + "Plugin heartbeat dispatched agent work", + ); + } + } catch (error) { + logException( + error, + "trusted_plugin_heartbeat_failed", + {}, + { "app.plugin.name": plugin.name }, + "Trusted plugin heartbeat failed", + ); + } + } +} + +/** Run the core heartbeat phases. */ +export async function runHeartbeat(args: { nowMs: number }): Promise { + await recoverStaleDispatches({ nowMs: args.nowMs }); + await runTrustedPluginHeartbeats({ nowMs: args.nowMs }); +} diff --git a/packages/junior/src/chat/agent-dispatch/runner.ts b/packages/junior/src/chat/agent-dispatch/runner.ts new file mode 100644 index 00000000..9966ed0c --- /dev/null +++ b/packages/junior/src/chat/agent-dispatch/runner.ts @@ -0,0 +1,461 @@ +import { botConfig } from "@/chat/config"; +import { + generateAssistantReply as generateAssistantReplyImpl, + type AssistantReply, +} from "@/chat/respond"; +import { logException } from "@/chat/logging"; +import { + buildConversationContext, + markConversationMessage, + normalizeConversationText, + updateConversationStats, + upsertConversationMessage, +} from "@/chat/services/conversation-memory"; +import { + coerceThreadConversationState, + type ThreadConversationState, +} from "@/chat/state/conversation"; +import { + coerceThreadArtifactsState, + type ThreadArtifactsState, +} from "@/chat/state/artifacts"; +import { + getChannelConfigurationServiceById, + getPersistedThreadState, + mergeArtifactsState, + persistThreadStateById, +} from "@/chat/runtime/thread-state"; +import { getStateAdapter } from "@/chat/state/adapter"; +import { + planSlackReplyPosts, + postSlackApiReplyPosts, +} from "@/chat/slack/reply"; +import { buildSlackReplyFooter } from "@/chat/slack/footer"; +import { finalizeFailedTurnReply } from "@/chat/services/turn-failure-response"; +import { AuthorizationFlowDisabledError } from "@/chat/services/auth-pause"; +import { PluginCredentialFailureError } from "@/chat/services/plugin-auth-orchestration"; +import { canScheduleTurnTimeoutResume } from "@/chat/services/timeout-resume"; +import { isRetryableTurnError } from "@/chat/runtime/turn"; +import { scheduleDispatchCallback } from "./signing"; +import { + getDispatchConversationId, + getDispatchStorageKey, + getDispatchTurnId, + isTerminalDispatchStatus, + updateDispatchRecord, + withDispatchLock, +} from "./store"; +import type { DispatchCallback, DispatchRecord } from "./types"; + +const DISPATCH_SLICE_LEASE_MS = 5 * 60 * 1000; + +export interface AgentDispatchRunnerDeps { + generateAssistantReply?: typeof generateAssistantReplyImpl; + scheduleCallback?: typeof scheduleDispatchCallback; +} + +function getUserMessageId(dispatch: DispatchRecord): string { + return `dispatch:${dispatch.id}:user`; +} + +function getAssistantMessageId(dispatch: DispatchRecord): string { + return `dispatch:${dispatch.id}:assistant`; +} + +function buildDispatchConversationText(dispatch: DispatchRecord): string { + return `[dispatched task] ${dispatch.input}`; +} + +function ensureVisibleDeliveryText(reply: AssistantReply): AssistantReply { + if (reply.text.trim().length > 0 || !reply.files?.length) { + return reply; + } + return { + ...reply, + text: "Generated files are attached.", + }; +} + +function upsertDispatchUserMessage(args: { + conversation: ThreadConversationState; + dispatch: DispatchRecord; + nowMs: number; +}): string { + return upsertConversationMessage(args.conversation, { + id: getUserMessageId(args.dispatch), + role: "user", + text: normalizeConversationText( + buildDispatchConversationText(args.dispatch), + ), + createdAtMs: args.nowMs, + author: { + userName: `system:${args.dispatch.actor.id}`, + isBot: true, + }, + meta: { + explicitMention: true, + }, + }); +} + +async function persistRuntimePatch(args: { + artifacts?: ThreadArtifactsState; + conversation: ThreadConversationState; + sandboxDependencyProfileHash?: string; + sandboxId?: string; + threadId: string; +}): Promise { + await persistThreadStateById(args.threadId, { + artifacts: args.artifacts, + conversation: args.conversation, + sandboxId: args.sandboxId, + sandboxDependencyProfileHash: args.sandboxDependencyProfileHash, + }); +} + +async function markDispatch(args: { + dispatch: DispatchRecord; + errorMessage?: string; + resumeCheckpointVersion?: number; + resultMessageTs?: string; + status: DispatchRecord["status"]; +}): Promise { + return await withDispatchLock(args.dispatch.id, async (state) => { + const current = + (await state.get( + getDispatchStorageKey(args.dispatch.id), + )) ?? args.dispatch; + return await updateDispatchRecord(state, { + ...current, + status: args.status, + ...(args.errorMessage ? { errorMessage: args.errorMessage } : {}), + ...(typeof args.resumeCheckpointVersion === "number" + ? { resumeCheckpointVersion: args.resumeCheckpointVersion } + : {}), + ...(args.resultMessageTs + ? { resultMessageTs: args.resultMessageTs } + : {}), + }); + }); +} + +function canClaimDispatch(record: DispatchRecord, nowMs: number): boolean { + if (isTerminalDispatchStatus(record.status)) { + return false; + } + if (record.attempt >= record.maxAttempts) { + return false; + } + if ( + record.status === "running" && + typeof record.leaseExpiresAtMs === "number" && + record.leaseExpiresAtMs > nowMs + ) { + return false; + } + return true; +} + +/** Run one serverless slice for a core-owned agent dispatch. */ +export async function runAgentDispatchSlice( + callback: DispatchCallback, + deps: AgentDispatchRunnerDeps = {}, +): Promise { + const generateAssistantReply = + deps.generateAssistantReply ?? generateAssistantReplyImpl; + const scheduleCallback = deps.scheduleCallback ?? scheduleDispatchCallback; + const nowMs = Date.now(); + const claimedDispatch = await withDispatchLock(callback.id, async (state) => { + const current = + (await state.get(getDispatchStorageKey(callback.id))) ?? + undefined; + if ( + !current || + !canClaimDispatch(current, nowMs) || + current.version !== callback.expectedVersion + ) { + return undefined; + } + return await updateDispatchRecord(state, { + ...current, + lastCallbackAtMs: nowMs, + leaseExpiresAtMs: nowMs + DISPATCH_SLICE_LEASE_MS, + status: "running", + }); + }); + if (!claimedDispatch) { + return; + } + let dispatch = claimedDispatch; + + const conversationId = getDispatchConversationId(dispatch.destination); + const stateAdapter = getStateAdapter(); + await stateAdapter.connect(); + const conversationLock = await stateAdapter.acquireLock( + conversationId, + DISPATCH_SLICE_LEASE_MS, + ); + if (!conversationLock) { + await markDispatch({ + dispatch, + status: "pending", + errorMessage: "Destination conversation is busy", + }); + return; + } + + try { + const startedDispatch = await withDispatchLock( + dispatch.id, + async (state) => { + const current = + (await state.get( + getDispatchStorageKey(dispatch.id), + )) ?? dispatch; + if ( + current.status !== "running" || + current.version !== dispatch.version || + current.attempt >= current.maxAttempts + ) { + return undefined; + } + return await updateDispatchRecord(state, { + ...current, + attempt: current.attempt + 1, + }); + }, + ); + if (!startedDispatch) { + return; + } + dispatch = startedDispatch; + + const persisted = await getPersistedThreadState(conversationId); + const conversation = coerceThreadConversationState(persisted); + const deliveredMessage = conversation.messages.find( + (message) => + message.id === getAssistantMessageId(dispatch) && + message.meta?.replied === true && + typeof message.meta.slackTs === "string", + ); + if (typeof deliveredMessage?.meta?.slackTs === "string") { + await markDispatch({ + dispatch, + status: "completed", + resultMessageTs: deliveredMessage.meta.slackTs, + }); + return; + } + + let artifacts = coerceThreadArtifactsState(persisted); + let sandboxId = + typeof persisted.app_sandbox_id === "string" + ? persisted.app_sandbox_id + : undefined; + let sandboxDependencyProfileHash = + typeof persisted.app_sandbox_dependency_profile_hash === "string" + ? persisted.app_sandbox_dependency_profile_hash + : undefined; + const channelConfiguration = getChannelConfigurationServiceById( + dispatch.destination.channelId, + ); + const configuration = await channelConfiguration.resolveValues(); + const userMessageId = upsertDispatchUserMessage({ + conversation, + dispatch, + nowMs, + }); + const conversationContext = buildConversationContext(conversation, { + excludeMessageId: userMessageId, + }); + + let reply = await generateAssistantReply(dispatch.input, { + authorizationFlowMode: "disabled", + configuration, + channelConfiguration, + conversationContext, + artifactState: artifacts, + piMessages: conversation.piMessages, + correlation: { + conversationId, + threadId: conversationId, + turnId: getDispatchTurnId(dispatch.id), + runId: dispatch.id, + channelId: dispatch.destination.channelId, + teamId: dispatch.destination.teamId, + actorType: dispatch.actor.type, + actorId: dispatch.actor.id, + }, + toolChannelId: dispatch.destination.channelId, + sandbox: { + sandboxId, + sandboxDependencyProfileHash, + }, + onSandboxAcquired: async (sandbox) => { + sandboxId = sandbox.sandboxId; + sandboxDependencyProfileHash = sandbox.sandboxDependencyProfileHash; + await persistRuntimePatch({ + threadId: conversationId, + conversation, + artifacts, + sandboxId, + sandboxDependencyProfileHash, + }); + }, + onArtifactStateUpdated: async (nextArtifacts) => { + artifacts = nextArtifacts; + await persistRuntimePatch({ + threadId: conversationId, + conversation, + artifacts, + sandboxId, + sandboxDependencyProfileHash, + }); + }, + }); + + const failure = + reply.diagnostics.outcome === "success" + ? undefined + : (reply.diagnostics.errorMessage ?? + `Agent turn ended with ${reply.diagnostics.outcome}.`); + if (failure) { + reply = finalizeFailedTurnReply({ + reply, + logException, + context: { + conversationId, + slackThreadId: conversationId, + slackChannelId: dispatch.destination.channelId, + runId: dispatch.id, + actorType: dispatch.actor.type, + actorId: dispatch.actor.id, + assistantUserName: botConfig.userName, + modelId: reply.diagnostics.modelId, + }, + }); + } + + const deliveryReply = ensureVisibleDeliveryText(reply); + const resultMessageTs = await postSlackApiReplyPosts({ + channelId: dispatch.destination.channelId, + posts: planSlackReplyPosts({ reply: deliveryReply }), + footer: buildSlackReplyFooter({ + conversationId, + durationMs: deliveryReply.diagnostics.durationMs, + thinkingLevel: deliveryReply.diagnostics.thinkingLevel, + usage: deliveryReply.diagnostics.usage, + }), + fileUploadFailureMode: "strict", + }); + + markConversationMessage(conversation, userMessageId, { + replied: true, + skippedReason: undefined, + }); + upsertConversationMessage(conversation, { + id: getAssistantMessageId(dispatch), + role: "assistant", + text: normalizeConversationText(deliveryReply.text) || "[empty response]", + createdAtMs: nowMs, + author: { + userName: botConfig.userName, + isBot: true, + }, + meta: { + replied: true, + slackTs: resultMessageTs, + }, + }); + updateConversationStats(conversation); + const nextArtifacts = reply.artifactStatePatch + ? mergeArtifactsState(artifacts, reply.artifactStatePatch) + : artifacts; + await persistRuntimePatch({ + threadId: conversationId, + conversation, + artifacts: nextArtifacts, + sandboxId: reply.sandboxId ?? sandboxId, + sandboxDependencyProfileHash: + reply.sandboxDependencyProfileHash ?? sandboxDependencyProfileHash, + }); + dispatch = await markDispatch({ + dispatch, + status: failure ? "failed" : "completed", + ...(failure ? { errorMessage: failure } : {}), + resultMessageTs, + }); + } catch (error) { + if (error instanceof AuthorizationFlowDisabledError) { + await markDispatch({ + dispatch, + status: "blocked", + errorMessage: `Dispatch requires ${error.provider} authorization.`, + }); + return; + } + if (error instanceof PluginCredentialFailureError) { + await markDispatch({ + dispatch, + status: "blocked", + errorMessage: error.message, + }); + return; + } + if ( + isRetryableTurnError(error, "mcp_auth_resume") || + isRetryableTurnError(error, "plugin_auth_resume") + ) { + await markDispatch({ + dispatch, + status: "blocked", + errorMessage: + "Dispatch requires authorization from an interactive user turn.", + }); + return; + } + if (isRetryableTurnError(error, "turn_timeout_resume")) { + const checkpointVersion = error.metadata?.checkpointVersion; + const nextSliceId = error.metadata?.sliceId; + if ( + typeof checkpointVersion === "number" && + canScheduleTurnTimeoutResume(nextSliceId) + ) { + const awaiting = await markDispatch({ + dispatch, + resumeCheckpointVersion: checkpointVersion, + status: "awaiting_resume", + }); + await scheduleCallback({ + id: awaiting.id, + expectedVersion: awaiting.version, + }); + return; + } + } + + logException( + error, + "agent_dispatch_run_failed", + { + conversationId, + slackThreadId: conversationId, + slackChannelId: dispatch.destination.channelId, + runId: dispatch.id, + actorType: dispatch.actor.type, + actorId: dispatch.actor.id, + assistantUserName: botConfig.userName, + modelId: botConfig.modelId, + }, + {}, + "Agent dispatch failed", + ); + await markDispatch({ + dispatch, + status: "failed", + errorMessage: error instanceof Error ? error.message : String(error), + }); + } finally { + await stateAdapter.releaseLock(conversationLock); + } +} diff --git a/packages/junior/src/chat/agent-dispatch/signing.ts b/packages/junior/src/chat/agent-dispatch/signing.ts new file mode 100644 index 00000000..78a3cc64 --- /dev/null +++ b/packages/junior/src/chat/agent-dispatch/signing.ts @@ -0,0 +1,123 @@ +import { createHmac, timingSafeEqual } from "node:crypto"; +import { resolveBaseUrl } from "@/chat/oauth-flow"; +import type { DispatchCallback } from "./types"; + +const DISPATCH_CALLBACK_PATH = "/api/internal/agent-dispatch"; +const DISPATCH_HMAC_CONTEXT = "junior.agent_dispatch.v1"; +const DISPATCH_SIGNATURE_VERSION = "v1"; +const DISPATCH_MAX_SKEW_MS = 5 * 60 * 1000; +const DISPATCH_CALLBACK_TIMEOUT_MS = 10_000; +const DISPATCH_TIMESTAMP_HEADER = "x-junior-dispatch-timestamp"; +const DISPATCH_SIGNATURE_HEADER = "x-junior-dispatch-signature"; + +function getDispatchSecret(): string | undefined { + return process.env.JUNIOR_SECRET?.trim() || undefined; +} + +function buildSignedPayload(timestamp: string, body: string): string { + return `${DISPATCH_HMAC_CONTEXT}:${timestamp}:${body}`; +} + +function signBody(secret: string, timestamp: string, body: string): string { + const digest = createHmac("sha256", secret) + .update(buildSignedPayload(timestamp, body)) + .digest("hex"); + return `${DISPATCH_SIGNATURE_VERSION}=${digest}`; +} + +function timingSafeMatch(expected: string, actual: string): boolean { + const expectedBuffer = Buffer.from(expected); + const actualBuffer = Buffer.from(actual); + if (expectedBuffer.length !== actualBuffer.length) { + return false; + } + return timingSafeEqual(expectedBuffer, actualBuffer); +} + +function parseDispatchCallback(value: unknown): DispatchCallback | undefined { + if (!value || typeof value !== "object") { + return undefined; + } + const record = value as Record; + if ( + typeof record.id !== "string" || + typeof record.expectedVersion !== "number" + ) { + return undefined; + } + return { + id: record.id, + expectedVersion: record.expectedVersion, + }; +} + +/** Schedule an authenticated internal callback to run a dispatched agent slice. */ +export async function scheduleDispatchCallback( + callback: DispatchCallback, +): Promise { + const baseUrl = resolveBaseUrl(); + if (!baseUrl) { + throw new Error( + "Cannot determine base URL for agent dispatch callback (set JUNIOR_BASE_URL or deploy to Vercel)", + ); + } + + const secret = getDispatchSecret(); + if (!secret) { + throw new Error( + "Cannot determine agent dispatch secret (set JUNIOR_SECRET)", + ); + } + + const body = JSON.stringify(callback); + const timestamp = Date.now().toString(); + const response = await fetch(`${baseUrl}${DISPATCH_CALLBACK_PATH}`, { + method: "POST", + headers: { + "content-type": "application/json", + [DISPATCH_TIMESTAMP_HEADER]: timestamp, + [DISPATCH_SIGNATURE_HEADER]: signBody(secret, timestamp, body), + }, + signal: AbortSignal.timeout(DISPATCH_CALLBACK_TIMEOUT_MS), + body, + }); + if (!response.ok) { + throw new Error( + `Agent dispatch callback failed with status ${response.status}`, + ); + } +} + +/** Verify and parse an authenticated agent dispatch callback request. */ +export async function verifyDispatchCallbackRequest( + request: Request, +): Promise { + const timestamp = + request.headers.get(DISPATCH_TIMESTAMP_HEADER)?.trim() ?? ""; + const signature = + request.headers.get(DISPATCH_SIGNATURE_HEADER)?.trim() ?? ""; + const secret = getDispatchSecret(); + if (!timestamp || !signature || !secret) { + return undefined; + } + + const parsedTimestamp = Number.parseInt(timestamp, 10); + if ( + !Number.isFinite(parsedTimestamp) || + Math.abs(Date.now() - parsedTimestamp) > DISPATCH_MAX_SKEW_MS + ) { + return undefined; + } + + const body = await request.text(); + const expectedSignature = signBody(secret, timestamp, body); + if (!timingSafeMatch(expectedSignature, signature)) { + return undefined; + } + + try { + return parseDispatchCallback(JSON.parse(body)); + } catch { + return undefined; + } +} diff --git a/packages/junior/src/chat/agent-dispatch/store.ts b/packages/junior/src/chat/agent-dispatch/store.ts new file mode 100644 index 00000000..55917a57 --- /dev/null +++ b/packages/junior/src/chat/agent-dispatch/store.ts @@ -0,0 +1,249 @@ +import { createHash } from "node:crypto"; +import { THREAD_STATE_TTL_MS } from "chat"; +import type { Lock, StateAdapter } from "chat"; +import { getStateAdapter } from "@/chat/state/adapter"; +import type { + DispatchCreateResult, + DispatchOptions, + DispatchProjection, + DispatchRecord, + DispatchStatus, +} from "./types"; + +const DISPATCH_PREFIX = "junior:agent_dispatch"; +const DISPATCH_LOCK_TTL_MS = 10 * 60 * 1000; +const DISPATCH_INDEX_LOCK_TTL_MS = 10_000; +const DISPATCH_INDEX_MAX_LENGTH = 10_000; +const DEFAULT_MAX_ATTEMPTS = 5; + +/** Keep dispatch persistence keys consistent across callback and recovery paths. */ +export function getDispatchStorageKey(id: string): string { + return `${DISPATCH_PREFIX}:record:${id}`; +} + +function incompleteDispatchIndexKey(): string { + return `${DISPATCH_PREFIX}:incomplete`; +} + +function incompleteDispatchIndexLockKey(): string { + return `${DISPATCH_PREFIX}:incomplete:lock`; +} + +function dispatchLockKey(id: string): string { + return `${DISPATCH_PREFIX}:lock:${id}`; +} + +function normalizeMetadata( + metadata: Record | undefined, +): Record | undefined { + if (!metadata) { + return undefined; + } + const entries = Object.entries(metadata).filter( + (entry): entry is [string, string] => + typeof entry[0] === "string" && typeof entry[1] === "string", + ); + return entries.length > 0 ? Object.fromEntries(entries) : undefined; +} + +function buildDispatchId(plugin: string, idempotencyKey: string): string { + const digest = createHash("sha256") + .update(plugin) + .update("\0") + .update(idempotencyKey) + .digest("hex") + .slice(0, 32); + return `dispatch_${digest}`; +} + +/** Map a dispatch destination to the conversation lock and memory key it owns. */ +export function getDispatchConversationId( + destination: DispatchRecord["destination"], +): string { + return `slack:${destination.teamId}:${destination.channelId}`; +} + +/** Give dispatch slices stable turn ids for resumability and trace correlation. */ +export function getDispatchTurnId(dispatchId: string): string { + return `dispatch:${dispatchId}`; +} + +function toDispatchProjection(record: DispatchRecord): DispatchProjection { + return { + id: record.id, + status: record.status, + ...(record.resultMessageTs + ? { resultMessageTs: record.resultMessageTs } + : {}), + ...(record.errorMessage ? { errorMessage: record.errorMessage } : {}), + }; +} + +/** Gate recovery to dispatches that can still make progress. */ +export function isTerminalDispatchStatus(status: DispatchStatus): boolean { + return status === "completed" || status === "failed" || status === "blocked"; +} + +/** Serialize mutations for a dispatch so callbacks and heartbeats stay idempotent. */ +export async function withDispatchLock( + dispatchId: string, + callback: (state: StateAdapter) => Promise, +): Promise { + const state = getStateAdapter(); + await state.connect(); + const lock: Lock | null = await state.acquireLock( + dispatchLockKey(dispatchId), + DISPATCH_LOCK_TTL_MS, + ); + if (!lock) { + throw new Error(`Could not acquire dispatch lock for ${dispatchId}`); + } + + try { + return await callback(state); + } finally { + await state.releaseLock(lock); + } +} + +async function withIncompleteDispatchIndexLock( + state: StateAdapter, + callback: () => Promise, +): Promise { + const lock: Lock | null = await state.acquireLock( + incompleteDispatchIndexLockKey(), + DISPATCH_INDEX_LOCK_TTL_MS, + ); + if (!lock) { + throw new Error("Could not acquire incomplete dispatch index lock"); + } + + try { + return await callback(); + } finally { + await state.releaseLock(lock); + } +} + +async function syncIncompleteDispatchIndex( + state: StateAdapter, + record: DispatchRecord, +): Promise { + await withIncompleteDispatchIndexLock(state, async () => { + const existing = + (await state.get(incompleteDispatchIndexKey())) ?? []; + const ids = [ + ...new Set(existing.filter((id): id is string => typeof id === "string")), + ]; + const next = isTerminalDispatchStatus(record.status) + ? ids.filter((id) => id !== record.id) + : ids.includes(record.id) + ? ids + : [...ids, record.id]; + + if ( + next.length === ids.length && + next.every((id, index) => id === ids[index]) + ) { + return; + } + + await state.set( + incompleteDispatchIndexKey(), + next.slice(-DISPATCH_INDEX_MAX_LENGTH), + THREAD_STATE_TTL_MS, + ); + }); +} + +async function putRecord( + state: StateAdapter, + record: DispatchRecord, +): Promise { + await state.set( + getDispatchStorageKey(record.id), + record, + THREAD_STATE_TTL_MS, + ); + await syncIncompleteDispatchIndex(state, record); +} + +/** Load dispatch state for callback, recovery, and plugin projection paths. */ +export async function getDispatchRecord( + id: string, +): Promise { + const state = getStateAdapter(); + await state.connect(); + return ( + (await state.get(getDispatchStorageKey(id))) ?? undefined + ); +} + +/** Create a plugin dispatch idempotently from the plugin's idempotency key. */ +export async function createOrGetDispatch(args: { + nowMs: number; + options: DispatchOptions; + plugin: string; +}): Promise { + const id = buildDispatchId(args.plugin, args.options.idempotencyKey); + return await withDispatchLock(id, async (state) => { + const existing = + (await state.get(getDispatchStorageKey(id))) ?? undefined; + if (existing) { + return { record: existing, status: "already_exists" }; + } + + const metadata = normalizeMetadata(args.options.metadata); + const record: DispatchRecord = { + actor: { type: "system", id: args.plugin }, + attempt: 0, + createdAtMs: args.nowMs, + destination: args.options.destination, + id, + idempotencyKey: args.options.idempotencyKey, + input: args.options.input, + maxAttempts: DEFAULT_MAX_ATTEMPTS, + ...(metadata ? { metadata } : {}), + plugin: args.plugin, + status: "pending", + updatedAtMs: args.nowMs, + version: 1, + }; + await putRecord(state, record); + return { record, status: "created" }; + }); +} + +/** Advance dispatch versions so stale callbacks cannot overwrite newer state. */ +export async function updateDispatchRecord( + state: StateAdapter, + record: DispatchRecord, +): Promise { + const next = { + ...record, + updatedAtMs: Date.now(), + version: record.version + 1, + }; + await putRecord(state, next); + return next; +} + +/** Feed heartbeat recovery from the durable incomplete-dispatch index. */ +export async function listIncompleteDispatchIds(): Promise { + const state = getStateAdapter(); + await state.connect(); + const ids = (await state.get(incompleteDispatchIndexKey())) ?? []; + return [...new Set(ids.filter((id): id is string => typeof id === "string"))]; +} + +/** Return a plugin-scoped dispatch projection without exposing raw runtime state. */ +export async function getPluginDispatchProjection(args: { + id: string; + plugin: string; +}): Promise { + const record = await getDispatchRecord(args.id); + if (!record || record.plugin !== args.plugin) { + return undefined; + } + return toDispatchProjection(record); +} diff --git a/packages/junior/src/chat/agent-dispatch/types.ts b/packages/junior/src/chat/agent-dispatch/types.ts new file mode 100644 index 00000000..9b259ea0 --- /dev/null +++ b/packages/junior/src/chat/agent-dispatch/types.ts @@ -0,0 +1,63 @@ +export type DispatchStatus = + | "pending" + | "running" + | "awaiting_resume" + | "completed" + | "failed" + | "blocked"; + +export interface DispatchActor { + type: "system"; + id: string; +} + +export interface DispatchDestination { + platform: "slack"; + teamId: string; + channelId: string; +} + +export interface DispatchOptions { + destination: DispatchDestination; + idempotencyKey: string; + input: string; + metadata?: Record; +} + +export interface DispatchRecord { + actor: DispatchActor; + attempt: number; + createdAtMs: number; + destination: DispatchDestination; + errorMessage?: string; + id: string; + idempotencyKey: string; + input: string; + lastCallbackAtMs?: number; + leaseExpiresAtMs?: number; + maxAttempts: number; + metadata?: Record; + plugin: string; + resultMessageTs?: string; + resumeCheckpointVersion?: number; + status: DispatchStatus; + updatedAtMs: number; + version: number; +} + +export interface DispatchProjection { + errorMessage?: string; + id: string; + resultMessageTs?: string; + status: DispatchStatus; +} + +export interface DispatchCallback { + expectedVersion: number; + id: string; +} + +export interface DispatchCreateResult { + record: DispatchRecord; + status: "created" | "already_exists"; +} diff --git a/packages/junior/src/chat/agent-dispatch/validation.ts b/packages/junior/src/chat/agent-dispatch/validation.ts new file mode 100644 index 00000000..c5ab60a3 --- /dev/null +++ b/packages/junior/src/chat/agent-dispatch/validation.ts @@ -0,0 +1,51 @@ +import type { DispatchOptions } from "./types"; +import { isSlackConversationId, isSlackTeamId } from "@/chat/slack/ids"; + +const MAX_DISPATCH_INPUT_LENGTH = 32_000; +const MAX_IDEMPOTENCY_KEY_LENGTH = 512; +const MAX_METADATA_KEYS = 20; +const MAX_METADATA_KEY_LENGTH = 128; +const MAX_METADATA_VALUE_LENGTH = 512; + +/** Validate plugin-provided dispatch options before core persists them. */ +export function validateDispatchOptions(options: DispatchOptions): void { + if (!options.idempotencyKey.trim()) { + throw new Error("Dispatch idempotencyKey is required"); + } + if (options.idempotencyKey.length > MAX_IDEMPOTENCY_KEY_LENGTH) { + throw new Error("Dispatch idempotencyKey exceeds the maximum length"); + } + if (options.destination.platform !== "slack") { + throw new Error("Dispatch destination platform must be slack"); + } + if (!isSlackTeamId(options.destination.teamId)) { + throw new Error("Dispatch destination teamId must be a Slack team id"); + } + if (!isSlackConversationId(options.destination.channelId)) { + throw new Error( + "Dispatch destination channelId must be a Slack channel id", + ); + } + if (!options.input.trim()) { + throw new Error("Dispatch input is required"); + } + if (options.input.length > MAX_DISPATCH_INPUT_LENGTH) { + throw new Error("Dispatch input exceeds the maximum length"); + } + const metadata = options.metadata ?? {}; + const entries = Object.entries(metadata); + if (entries.length > MAX_METADATA_KEYS) { + throw new Error("Dispatch metadata has too many keys"); + } + for (const [key, value] of entries) { + if (!key.trim() || typeof value !== "string") { + throw new Error("Dispatch metadata values must be strings"); + } + if (key.length > MAX_METADATA_KEY_LENGTH) { + throw new Error("Dispatch metadata key exceeds the maximum length"); + } + if (value.length > MAX_METADATA_VALUE_LENGTH) { + throw new Error("Dispatch metadata value exceeds the maximum length"); + } + } +} diff --git a/packages/junior/src/chat/capabilities/factory.ts b/packages/junior/src/chat/capabilities/factory.ts index b7f17182..15bed0f4 100644 --- a/packages/junior/src/chat/capabilities/factory.ts +++ b/packages/junior/src/chat/capabilities/factory.ts @@ -4,21 +4,15 @@ import { ProviderCredentialRouter } from "@/chat/capabilities/router"; import type { CredentialBroker, CredentialLease, - CredentialHeaderTransform, } from "@/chat/credentials/broker"; import { StateAdapterTokenStore } from "@/chat/credentials/state-adapter-token-store"; -import { TestCredentialBroker } from "@/chat/credentials/test-broker"; import type { UserTokenStore } from "@/chat/credentials/user-token-store"; -import { resolveAuthTokenPlaceholder } from "@/chat/plugins/auth/auth-token-placeholder"; -import { resolvePluginCommandEnv } from "@/chat/plugins/command-env"; import { createPluginBroker, getPluginProviders, } from "@/chat/plugins/registry"; -import type { PluginDefinition, PluginManifest } from "@/chat/plugins/types"; import { getStateAdapter } from "@/chat/state/adapter"; -const ENV_PLACEHOLDER_RE = /\$\{([A-Z_][A-Z0-9_]*)\}/g; const sandboxEgressRouters = new WeakMap< StateAdapter, ProviderCredentialRouter @@ -29,55 +23,10 @@ export function createUserTokenStore(): UserTokenStore { return new StateAdapterTokenStore(getStateAdapter()); } -function resolveTestApiHeaderTransforms( - manifest: PluginManifest, -): CredentialHeaderTransform[] { - const { domains, apiHeaders } = manifest; - if (!domains || !apiHeaders) { - return []; - } - // Eval mode must not read deployment secrets; placeholders become dummy values. - const headers = Object.fromEntries( - Object.entries(apiHeaders).map(([key, value]) => [ - key, - value.replace(ENV_PLACEHOLDER_RE, (_match, name) => { - return `eval-test-${String(name).toLowerCase().replaceAll("_", "-")}`; - }), - ]), - ); - return domains.map((domain) => ({ domain, headers })); -} - -function createTestBroker(plugin: PluginDefinition): TestCredentialBroker { - const { apiHeaders, credentials, name } = plugin.manifest; - const commandEnv = resolvePluginCommandEnv(plugin.manifest); - return new TestCredentialBroker({ - provider: name, - ...(credentials - ? { - domains: credentials.domains, - ...(credentials.apiHeaders - ? { apiHeaders: credentials.apiHeaders } - : {}), - envKey: credentials.authTokenEnv, - placeholder: resolveAuthTokenPlaceholder(credentials), - } - : {}), - ...(apiHeaders - ? { - headerTransforms: () => - resolveTestApiHeaderTransforms(plugin.manifest), - } - : {}), - ...(Object.keys(commandEnv).length > 0 ? { env: commandEnv } : {}), - }); -} - function createProviderCredentialRouter( userTokenStore: UserTokenStore, ): ProviderCredentialRouter { logCapabilityCatalogLoadedOnce(); - const useTestBroker = process.env.EVAL_ENABLE_TEST_CREDENTIALS === "1"; const brokersByProvider: Record = {}; @@ -86,9 +35,7 @@ function createProviderCredentialRouter( if (!plugin.manifest.credentials && !plugin.manifest.apiHeaders) { continue; } - brokersByProvider[name] = useTestBroker - ? createTestBroker(plugin) - : createPluginBroker(name, { userTokenStore }); + brokersByProvider[name] = createPluginBroker(name, { userTokenStore }); } return new ProviderCredentialRouter({ brokersByProvider }); diff --git a/packages/junior/src/chat/config.ts b/packages/junior/src/chat/config.ts index 933e936e..d240eea1 100644 --- a/packages/junior/src/chat/config.ts +++ b/packages/junior/src/chat/config.ts @@ -60,6 +60,7 @@ export interface ChatConfig { }; state: { adapter: "memory" | "redis"; + keyPrefix?: string; redisUrl?: string; }; } @@ -204,6 +205,7 @@ export function readChatConfig( env.JUNIOR_STATE_ADAPTER?.trim().toLowerCase() === "memory" ? "memory" : "redis", + keyPrefix: toOptionalTrimmed(env.JUNIOR_STATE_KEY_PREFIX), redisUrl: toOptionalTrimmed(env.REDIS_URL), }, }; diff --git a/packages/junior/src/chat/credentials/test-broker.ts b/packages/junior/src/chat/credentials/test-broker.ts deleted file mode 100644 index 1dd84f97..00000000 --- a/packages/junior/src/chat/credentials/test-broker.ts +++ /dev/null @@ -1,60 +0,0 @@ -import { randomUUID } from "node:crypto"; -import type { - CredentialBroker, - CredentialHeaderTransform, - CredentialLease, -} from "@/chat/credentials/broker"; -import { mergeHeaderTransforms } from "@/chat/credentials/header-transforms"; - -interface TestBrokerConfig { - provider: string; - domains?: string[]; - apiHeaders?: Record; - headerTransforms?: () => CredentialHeaderTransform[]; - env?: Record; - envKey?: string; - placeholder?: string; -} - -/** Issue deterministic placeholder credential leases for eval runs. */ -export class TestCredentialBroker implements CredentialBroker { - private readonly config: TestBrokerConfig; - - constructor(config: TestBrokerConfig) { - this.config = config; - } - - async issue(input: { reason: string }): Promise { - const token = - process.env.EVAL_TEST_CREDENTIAL_TOKEN?.trim() || "eval-test-token"; - const expiresAt = new Date(Date.now() + 5 * 60 * 1000).toISOString(); - const env = { - ...(this.config.env ?? {}), - ...(this.config.envKey && this.config.placeholder - ? { [this.config.envKey]: this.config.placeholder } - : {}), - }; - const tokenTransforms = - this.config.domains?.map((domain) => ({ - domain, - headers: { - ...(this.config.apiHeaders ?? {}), - Authorization: `Bearer ${token}`, - }, - })) ?? []; - - return { - id: randomUUID(), - provider: this.config.provider, - env, - headerTransforms: mergeHeaderTransforms([ - ...(this.config.headerTransforms?.() ?? []), - ...tokenTransforms, - ]), - expiresAt, - metadata: { - reason: input.reason, - }, - }; - } -} diff --git a/packages/junior/src/chat/ingress/workspace-membership.ts b/packages/junior/src/chat/ingress/workspace-membership.ts index 3d507afa..9d59f9e2 100644 --- a/packages/junior/src/chat/ingress/workspace-membership.ts +++ b/packages/junior/src/chat/ingress/workspace-membership.ts @@ -1,15 +1,5 @@ -import { AsyncLocalStorage } from "node:async_hooks"; - -const workspaceTeamIdStorage = new AsyncLocalStorage(); - -/** Run a callback with the workspace team ID available for membership checks. */ -export function runWithWorkspaceTeamId( - teamId: string | undefined, - fn: () => T, -): T { - if (!teamId) return fn(); - return workspaceTeamIdStorage.run(teamId, fn); -} +import { getWorkspaceTeamId } from "@/chat/slack/workspace-context"; +export { runWithWorkspaceTeamId } from "@/chat/slack/workspace-context"; /** * Return true when a Slack event's author is from an external workspace. @@ -23,7 +13,7 @@ export function isExternalSlackUser( ): boolean { if (!raw) return false; - const workspaceTeamId = workspaceTeamIdStorage.getStore(); + const workspaceTeamId = getWorkspaceTeamId(); if (!workspaceTeamId) return false; const userTeam = diff --git a/packages/junior/src/chat/logging.ts b/packages/junior/src/chat/logging.ts index 040e0277..80d66686 100644 --- a/packages/junior/src/chat/logging.ts +++ b/packages/junior/src/chat/logging.ts @@ -38,6 +38,8 @@ export interface LogContext { slackUserName?: string; slackChannelId?: string; runId?: string; + actorType?: string; + actorId?: string; assistantUserName?: string; modelId?: string; skillName?: string; @@ -382,6 +384,8 @@ function contextToAttributes(context: LogContext): LogAttributes { "enduser.id": context.slackUserId, "enduser.pseudo.id": context.slackUserName, "app.run.id": context.runId, + "app.actor.type": context.actorType, + "app.actor.id": context.actorId, "gen_ai.agent.name": context.assistantUserName, "gen_ai.request.model": context.modelId, "app.skill.name": context.skillName, @@ -797,6 +801,14 @@ function numericConsoleToken( return typeof value === "number" ? `${label}=${value}` : undefined; } +function stringConsoleToken( + label: string, + value: AttributeValue | undefined, +): string | undefined { + const normalized = toOptionalString(value); + return normalized ? `${label}=${normalized}` : undefined; +} + function booleanConsoleToken( label: string, value: AttributeValue | undefined, @@ -827,7 +839,9 @@ function getPrettyConsoleSummaryTokens( ); pushPrettyConsoleToken( tokens, - toOptionalString(attributes["app.plugin.name"]) ?? undefined, + eventName.startsWith("trusted_plugin_heartbeat") + ? stringConsoleToken("plugin", attributes["app.plugin.name"]) + : (toOptionalString(attributes["app.plugin.name"]) ?? undefined), ); pushPrettyConsoleToken( tokens, @@ -845,6 +859,10 @@ function getPrettyConsoleSummaryTokens( tokens, numericConsoleToken("plugins", attributes["app.plugin.count"]), ); + pushPrettyConsoleToken( + tokens, + numericConsoleToken("dispatches", attributes["app.dispatch.count"]), + ); pushPrettyConsoleToken( tokens, numericConsoleToken("skills", attributes["app.skill.count"]), diff --git a/packages/junior/src/chat/plugins/agent-hooks.ts b/packages/junior/src/chat/plugins/agent-hooks.ts index ee16465c..36ef1859 100644 --- a/packages/junior/src/chat/plugins/agent-hooks.ts +++ b/packages/junior/src/chat/plugins/agent-hooks.ts @@ -4,7 +4,11 @@ import type { JuniorPlugin, } from "@sentry/junior-plugin-api"; import { logInfo } from "@/chat/logging"; +import { createAgentPluginLogger } from "@/chat/plugins/logging"; +import { createPluginState } from "@/chat/plugins/state"; import { SANDBOX_WORKSPACE_ROOT } from "@/chat/sandbox/paths"; +import type { ToolDefinition } from "@/chat/tools/definition"; +import type { ToolRuntimeContext } from "@/chat/tools/types"; import type { SandboxCommandInput, SandboxInstance, @@ -35,6 +39,7 @@ export interface AgentPluginHookRunner { let agentPlugins: JuniorPlugin[] = []; const AGENT_PLUGIN_NAME_RE = /^[a-z][a-z0-9-]*$/; +const AGENT_PLUGIN_TOOL_NAME_RE = /^[a-z][A-Za-z0-9]*$/; /** Validate trusted plugin identity before it can affect process-wide hooks. */ export function validateAgentPlugins(plugins: JuniorPlugin[]): void { @@ -67,6 +72,46 @@ export function getAgentPlugins(): JuniorPlugin[] { return [...agentPlugins]; } +/** Collect turn-scoped tools exposed by trusted plugins. */ +export function getAgentPluginTools( + context: ToolRuntimeContext, +): Record> { + const tools: Record> = {}; + for (const plugin of getAgentPlugins()) { + const hook = plugin.hooks?.tools; + if (!hook) { + continue; + } + const log = createAgentPluginLogger(plugin.name); + const pluginTools = hook({ + plugin: { name: plugin.name }, + log, + requester: context.requester, + channelCapabilities: context.channelCapabilities, + channelId: context.channelId, + teamId: context.teamId, + messageTs: context.messageTs, + threadTs: context.threadTs, + userText: context.userText, + state: createPluginState(plugin.name), + }); + for (const [name, tool] of Object.entries(pluginTools)) { + if (!AGENT_PLUGIN_TOOL_NAME_RE.test(name)) { + throw new Error( + `Trusted plugin tool "${name}" from plugin "${plugin.name}" must be a camelCase identifier`, + ); + } + if (tools[name]) { + throw new Error( + `Duplicate trusted plugin tool "${name}" from plugin "${plugin.name}"`, + ); + } + tools[name] = tool as unknown as ToolDefinition; + } + } + return tools; +} + function isRecord(value: unknown): value is Record { return Boolean(value && typeof value === "object" && !Array.isArray(value)); } @@ -139,6 +184,7 @@ export function createAgentPluginHookRunner( ); await hook({ plugin: { name: plugin.name }, + log: createAgentPluginLogger(plugin.name), requester: input.requester, sandbox: sandboxCapability, }); @@ -157,6 +203,7 @@ export function createAgentPluginHookRunner( let denied: string | undefined; await hook({ plugin: { name: plugin.name }, + log: createAgentPluginLogger(plugin.name), requester: input.requester, tool: { name: tool.name, diff --git a/packages/junior/src/chat/plugins/logging.ts b/packages/junior/src/chat/plugins/logging.ts new file mode 100644 index 00000000..ca364d98 --- /dev/null +++ b/packages/junior/src/chat/plugins/logging.ts @@ -0,0 +1,33 @@ +import type { AgentPluginLogger } from "@sentry/junior-plugin-api"; +import { logException, logInfo, logWarn } from "@/chat/logging"; + +/** Create the host logger exposed to trusted plugin hooks. */ +export function createAgentPluginLogger(plugin: string): AgentPluginLogger { + return { + info(message, metadata) { + logInfo( + "agent_plugin_log_info", + {}, + { "app.plugin.name": plugin, ...metadata }, + message, + ); + }, + warn(message, metadata) { + logWarn( + "agent_plugin_log_warn", + {}, + { "app.plugin.name": plugin, ...metadata }, + message, + ); + }, + error(message, metadata) { + logException( + new Error(message), + "agent_plugin_log_error", + {}, + { "app.plugin.name": plugin, ...metadata }, + message, + ); + }, + }; +} diff --git a/packages/junior/src/chat/plugins/state.ts b/packages/junior/src/chat/plugins/state.ts new file mode 100644 index 00000000..f4c415f2 --- /dev/null +++ b/packages/junior/src/chat/plugins/state.ts @@ -0,0 +1,46 @@ +import { createHash } from "node:crypto"; +import type { AgentPluginState } from "@sentry/junior-plugin-api"; +import { getStateAdapter } from "@/chat/state/adapter"; + +const MAX_PLUGIN_STATE_KEY_LENGTH = 512; + +function hashKeyPart(value: string): string { + return createHash("sha256").update(value).digest("hex").slice(0, 32); +} + +function pluginStateKey(plugin: string, key: string): string { + return `junior:plugin_state:${hashKeyPart(plugin)}:${hashKeyPart(key)}`; +} + +function validatePluginStateKey(key: string): void { + if (!key.trim()) { + throw new Error("Plugin state key is required"); + } + if (key.length > MAX_PLUGIN_STATE_KEY_LENGTH) { + throw new Error("Plugin state key exceeds the maximum length"); + } +} + +/** Create a durable state namespace scoped to one trusted plugin. */ +export function createPluginState(plugin: string): AgentPluginState { + return { + async delete(key) { + validatePluginStateKey(key); + const state = getStateAdapter(); + await state.connect(); + await state.delete(pluginStateKey(plugin, key)); + }, + async get(key) { + validatePluginStateKey(key); + const state = getStateAdapter(); + await state.connect(); + return (await state.get(pluginStateKey(plugin, key))) ?? undefined; + }, + async set(key, value, ttlMs) { + validatePluginStateKey(key); + const state = getStateAdapter(); + await state.connect(); + await state.set(pluginStateKey(plugin, key), value, ttlMs); + }, + }; +} diff --git a/packages/junior/src/chat/prompt.ts b/packages/junior/src/chat/prompt.ts index 562bd4ee..d869562b 100644 --- a/packages/junior/src/chat/prompt.ts +++ b/packages/junior/src/chat/prompt.ts @@ -8,7 +8,6 @@ import { worldPathCandidates, } from "@/chat/discovery"; import { logInfo, logWarn } from "@/chat/logging"; -import { getPluginProviders } from "@/chat/plugins/registry"; import { slackOutputPolicy } from "@/chat/slack/output"; import { SANDBOX_DATA_ROOT, @@ -161,9 +160,6 @@ function formatSkillEntry(skill: SkillMetadata): string[] { lines.push(` ${escapeXml(skill.name)}`); lines.push(` ${escapeXml(skill.description)}`); lines.push(` ${escapeXml(skillLocation)}`); - if (skill.pluginProvider) { - lines.push(` ${escapeXml(skill.pluginProvider)}`); - } lines.push(" "); return lines; } @@ -235,37 +231,6 @@ function formatLoadedSkillsForPrompt(skills: Skill[]): string | null { return lines.join("\n"); } -function formatProviderCatalogForPrompt(): string | null { - const providers = getPluginProviders().map((plugin) => plugin.manifest); - if (providers.length === 0) { - return null; - } - - const lines = [ - "Config keys and default targets per provider; use after a skill is loaded. Run authenticated provider commands directly after resolving target defaults; let the runtime handle auth pauses/resumes.", - ]; - for (const provider of providers) { - lines.push(`- provider: ${escapeXml(provider.name)}`); - lines.push( - ` - config_keys: ${ - provider.configKeys.length > 0 - ? escapeXml(provider.configKeys.join(", ")) - : "none" - }`, - ); - lines.push( - ` - default_context: ${ - provider.target - ? escapeXml( - `${provider.target.type} via ${provider.target.configKey}`, - ) - : "none" - }`, - ); - } - return lines.join("\n"); -} - function formatActiveMcpCatalogsForPrompt( catalogs: ActiveMcpCatalogSummary[], ): string | null { @@ -465,7 +430,7 @@ function buildOutputSection(): string { return [ openTag, "- Start with the answer or result, not internal process narration.", - "- Use Slack-flavored Markdown: **bold** section labels, `code`, [text](url) links, bullet lists, and fenced code blocks. No tables. When the answer primarily lists several URLs, show each URL bare instead of as a labeled link.", + "- Use Slack-flavored Markdown: **bold** section labels, `code`, [text](url) links, bullet lists, and fenced code blocks. No hash-prefixed headings and no tables. When the answer primarily lists several URLs, show each URL bare instead of as a labeled link.", "- Keep replies brief and scannable; use bullets or short code blocks when helpful, and one compact thread reply when it fits.", "- When a research or document-style answer would benefit from continuation, multiple sections, or future reference value, create a Slack canvas and keep the thread reply to one or two short sentences plus the link; do not recap the canvas contents.", "- Unless a successful Slack side-effect tool intentionally satisfied the request by itself, end every turn with a final user-facing markdown response.", @@ -602,11 +567,6 @@ function buildCapabilitiesSection(params: { blocks.push(renderTagBlock("tool-guidance", toolGuidance)); } - const providerCatalog = formatProviderCatalogForPrompt(); - if (providerCatalog) { - blocks.push(renderTagBlock("providers", providerCatalog)); - } - if (blocks.length === 0) { return null; } diff --git a/packages/junior/src/chat/respond.ts b/packages/junior/src/chat/respond.ts index cf8cd7b2..9962d6b7 100644 --- a/packages/junior/src/chat/respond.ts +++ b/packages/junior/src/chat/respond.ts @@ -102,7 +102,11 @@ import { } from "@/chat/services/turn-checkpoint"; import { createMcpAuthOrchestration } from "@/chat/services/mcp-auth-orchestration"; import { createPluginAuthOrchestration } from "@/chat/services/plugin-auth-orchestration"; -import { AuthorizationPauseError } from "@/chat/services/auth-pause"; +import { + AuthorizationFlowDisabledError, + AuthorizationPauseError, + type AuthorizationFlowMode, +} from "@/chat/services/auth-pause"; // Re-export types for backward compatibility with existing consumers. export type { AssistantReply, AgentTurnDiagnostics }; @@ -127,14 +131,18 @@ export interface ReplyRequestContext { turnId?: string; runId?: string; channelId?: string; + teamId?: string; messageTs?: string; threadTs?: string; requesterId?: string; + actorType?: string; + actorId?: string; }; toolChannelId?: string; conversationContext?: string; artifactState?: ThreadArtifactsState; pendingAuth?: ConversationPendingAuthState; + authorizationFlowMode?: AuthorizationFlowMode; configuration?: Record; /** Durable Pi transcript for this conversation, excluding ephemeral turn context. */ piMessages?: PiMessage[]; @@ -342,6 +350,8 @@ export async function generateAssistantReply( requesterId: context.correlation?.requesterId, channelId: context.correlation?.channelId, runId: context.correlation?.runId, + actorType: context.correlation?.actorType, + actorId: context.correlation?.actorId, assistantUserName: botConfig.userName, modelId: botConfig.modelId, }; @@ -369,6 +379,8 @@ export async function generateAssistantReply( slackUserId: context.correlation?.requesterId, slackChannelId: context.correlation?.channelId, runId: context.correlation?.runId, + actorType: context.correlation?.actorType, + actorId: context.correlation?.actorId, assistantUserName: botConfig.userName, modelId: botConfig.modelId, }; @@ -632,6 +644,7 @@ export async function generateAssistantReply( getMergedArtifactState: () => mergeArtifactsState(context.artifactState ?? {}, artifactStatePatch), onPendingAuth: context.onAuthPending, + authorizationFlowMode: context.authorizationFlowMode, }, () => agent?.abort(), ); @@ -646,6 +659,7 @@ export async function generateAssistantReply( channelConfiguration: context.channelConfiguration, currentPendingAuth: context.pendingAuth, onPendingAuth: context.onAuthPending, + authorizationFlowMode: context.authorizationFlowMode, userTokenStore, }, () => agent?.abort(), @@ -667,6 +681,8 @@ export async function generateAssistantReply( slackUserId: context.correlation?.requesterId, slackChannelId: context.correlation?.channelId, runId: context.correlation?.runId, + actorType: context.correlation?.actorType, + actorId: context.correlation?.actorId, assistantUserName: botConfig.userName, modelId: botConfig.modelId, }); @@ -738,6 +754,8 @@ export async function generateAssistantReply( { channelId: toolChannelId, channelCapabilities, + requester: context.requester, + teamId: context.correlation?.teamId, messageTs: context.correlation?.messageTs, threadTs: context.correlation?.threadTs, userText: userInput, @@ -1203,6 +1221,9 @@ export async function generateAssistantReply( if (isRetryableTurnError(error)) { throw error; } + if (error instanceof AuthorizationFlowDisabledError) { + throw error; + } logException( error, @@ -1212,6 +1233,8 @@ export async function generateAssistantReply( slackUserId: context.correlation?.requesterId, slackChannelId: context.correlation?.channelId, runId: context.correlation?.runId, + actorType: context.correlation?.actorType, + actorId: context.correlation?.actorId, assistantUserName: botConfig.userName, modelId: botConfig.modelId, }, diff --git a/packages/junior/src/chat/runtime/reply-executor.ts b/packages/junior/src/chat/runtime/reply-executor.ts index a614bc09..2209a677 100644 --- a/packages/junior/src/chat/runtime/reply-executor.ts +++ b/packages/junior/src/chat/runtime/reply-executor.ts @@ -23,6 +23,7 @@ import { getAssistantThreadContext, getChannelId, getMessageTs, + getTeamId, getThreadId, getThreadTs, getRunId, @@ -211,6 +212,7 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { const threadTs = getThreadTs(threadId); const assistantThreadContext = getAssistantThreadContext(message); const messageTs = getMessageTs(message); + const teamId = getTeamId(message); const runId = getRunId(thread, message); const conversationId = threadId ?? runId; @@ -528,6 +530,7 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { turnId, threadTs, messageTs, + teamId, runId, channelId, requesterId: message.author.userId, diff --git a/packages/junior/src/chat/runtime/thread-context.ts b/packages/junior/src/chat/runtime/thread-context.ts index e6cc25ac..8bf2dc3b 100644 --- a/packages/junior/src/chat/runtime/thread-context.ts +++ b/packages/junior/src/chat/runtime/thread-context.ts @@ -2,12 +2,19 @@ import type { Message, Thread } from "chat"; import { botConfig } from "@/chat/config"; import { toOptionalString } from "@/chat/coerce"; import { isDmChannel, normalizeSlackConversationId } from "@/chat/slack/client"; +import { getWorkspaceTeamId } from "@/chat/slack/workspace-context"; +import { isSlackTeamId } from "@/chat/slack/ids"; import { parseSlackThreadId, resolveSlackChannelIdFromThreadId, resolveSlackChannelIdFromMessage, } from "@/chat/slack/context"; +function toSlackTeamId(value: unknown): string | undefined { + const candidate = toOptionalString(value); + return candidate && isSlackTeamId(candidate) ? candidate : undefined; +} + function escapeRegExp(value: string): string { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } @@ -127,3 +134,19 @@ export function getMessageTs(message: Message): string | undefined { toOptionalString((rawRecord.message as { ts?: unknown } | undefined)?.ts) ); } + +/** Resolve the Slack workspace/team id from the raw inbound message payload. */ +export function getTeamId(message: Message): string | undefined { + const raw = (message as unknown as { raw?: unknown }).raw; + if (!raw || typeof raw !== "object") { + return undefined; + } + + const rawRecord = raw as Record; + return ( + toSlackTeamId(rawRecord.team_id) ?? + toSlackTeamId(rawRecord.team) ?? + toSlackTeamId(getWorkspaceTeamId()) ?? + toSlackTeamId(rawRecord.user_team) + ); +} diff --git a/packages/junior/src/chat/sandbox/egress-proxy.ts b/packages/junior/src/chat/sandbox/egress-proxy.ts index 869c9b27..8fb21b1b 100644 --- a/packages/junior/src/chat/sandbox/egress-proxy.ts +++ b/packages/junior/src/chat/sandbox/egress-proxy.ts @@ -45,8 +45,17 @@ const DECODED_RESPONSE_HEADERS = new Set([ "content-length", ]); const AUTH_REJECTION_STATUS = new Set([401, 403]); + +/** Intercepts a credential-injected sandbox HTTP request before live forwarding. */ +export type SandboxEgressHttpInterceptor = (input: { + provider: string; + request: Request; + upstreamUrl: URL; +}) => Promise; + interface ProxyDeps { fetch?: typeof fetch; + interceptHttp?: SandboxEgressHttpInterceptor; verifyOidc?: (token: string) => Promise; } @@ -358,7 +367,7 @@ export function isSandboxEgressForwardedRequest(request: Request): boolean { ); } -/** Proxy one Vercel Sandbox firewall egress request through lazy credential injection. */ +/** Proxy one Vercel Sandbox firewall egress request through lazy credential headers. */ export async function proxySandboxEgressRequest( request: Request, deps: ProxyDeps = {}, @@ -527,6 +536,19 @@ export async function proxySandboxEgressRequest( const body = await requestBodyBytes(request); const fetchImpl = deps.fetch ?? fetch; const headers = requestHeaders(request, lease, upstreamUrl.hostname); + const intercepted = await deps.interceptHttp?.({ + provider, + request: new Request(upstreamUrl, { + method: request.method, + headers, + ...(body !== undefined ? { body } : {}), + }), + upstreamUrl, + }); + if (intercepted) { + return intercepted; + } + const upstream = await fetchImpl(upstreamUrl, { method: request.method, headers, diff --git a/packages/junior/src/chat/sandbox/eval-gh-stub.ts b/packages/junior/src/chat/sandbox/eval-gh-stub.ts deleted file mode 100644 index e841933d..00000000 --- a/packages/junior/src/chat/sandbox/eval-gh-stub.ts +++ /dev/null @@ -1,348 +0,0 @@ -/** Build the eval-only GitHub CLI shim copied into sandbox test environments. */ -export function buildEvalGitHubCliStub(): string { - return `#!/usr/bin/env node -const fs = require("node:fs"); -const path = require("node:path"); -const { spawnSync } = require("node:child_process"); - -const args = process.argv.slice(2); -const statePath = "/vercel/sandbox/.junior/eval-gh-state.json"; -const fallbackBinaries = ["/usr/bin/gh", "/usr/local/bin/gh", "/bin/gh"]; -const flagsWithValues = new Set([ - "--repo", - "--title", - "--body", - "--body-file", - "--json", - "--search", - "--state", - "--limit", - "--method", - "--jq", - "--template", - "--hostname", -]); - -function getFlag(name) { - for (let index = 0; index < args.length; index += 1) { - const value = args[index]; - if (value === name) { - return args[index + 1]; - } - if (value.startsWith(name + "=")) { - return value.slice(name.length + 1); - } - } - return undefined; -} - -function getPositionals() { - const values = []; - for (let index = 0; index < args.length; index += 1) { - const value = args[index]; - if (flagsWithValues.has(value)) { - index += 1; - continue; - } - if (value.startsWith("--") && value.includes("=")) { - continue; - } - if (value.startsWith("-")) { - continue; - } - values.push(value); - } - return values; -} - -function loadState() { - try { - return JSON.parse(fs.readFileSync(statePath, "utf8")); - } catch { - return { nextIssueNumber: 101, issues: {} }; - } -} - -function saveState(state) { - fs.mkdirSync(path.dirname(statePath), { recursive: true }); - fs.writeFileSync(statePath, JSON.stringify(state, null, 2)); -} - -function issueUrl(repo, number) { - return "https://github.com/" + repo + "/issues/" + number; -} - -function repoValue() { - return getFlag("--repo") || "getsentry/junior"; -} - -function readBody() { - const bodyFile = getFlag("--body-file"); - if (bodyFile) { - try { - return fs.readFileSync(bodyFile, "utf8"); - } catch { - return ""; - } - } - return getFlag("--body") || ""; -} - -function defaultIssue(repo, number) { - return { - number, - title: "Eval issue", - body: "", - state: "OPEN", - url: issueUrl(repo, number), - labels: [], - assignees: [], - author: { login: "junior-eval" }, - }; -} - -function pickFields(record, csv) { - if (!csv) { - return record; - } - return Object.fromEntries( - csv - .split(",") - .map((value) => value.trim()) - .filter(Boolean) - .map((key) => [key, key in record ? record[key] : null]), - ); -} - -function outputJson(value) { - fs.writeFileSync(process.stdout.fd, JSON.stringify(value, null, 2) + "\\n"); -} - -function outputText(value) { - fs.writeFileSync(process.stdout.fd, value); -} - -const repoFiles = { - "packages/junior/src/chat/sandbox/egress-policy.ts": \`import { resolveAuthTokenPlaceholder } from "@/chat/plugins/auth/auth-token-placeholder"; -import { resolvePluginCommandEnv } from "@/chat/plugins/command-env"; -import { getPluginProviders } from "@/chat/plugins/registry"; - -/** Build the policy that forwards provider requests back to Junior for credentials. */ -export function buildSandboxEgressNetworkPolicy() { - // Plugin credential domains are forwarded through the host so the sandbox can - // activate requester-bound credentials for the current turn. -} - -/** Resolve non-secret command environment values for registered sandbox providers. */ -export async function resolveSandboxCommandEnvironment() { - const env = {}; - for (const plugin of getPluginProviders()) { - Object.assign(env, resolvePluginCommandEnv(plugin.manifest)); - const credentials = plugin.manifest.credentials; - if (credentials) { - env[credentials.authTokenEnv] = resolveAuthTokenPlaceholder(credentials); - } - } - return env; -} -\`, - "packages/junior/src/chat/plugins/registry.ts": \`import { createGitHubAppBroker } from "@/chat/plugins/auth/github-app-broker"; - -export function createPluginBroker(provider, deps) { - const plugin = ensurePluginsLoaded().pluginsByName.get(provider); - const { credentials, name } = plugin.manifest; - if (credentials.type === "github-app") { - return createGitHubAppBroker(plugin.manifest, credentials); - } -} -\`, - "packages/junior-github/plugin.yaml": \`name: github -description: GitHub issue, pull request, and repository workflows via GitHub App - -credentials: - type: github-app - domains: - - api.github.com - - github.com - auth-token-env: GITHUB_TOKEN - auth-token-placeholder: ghp_host_managed_credential -\`, -}; - -function writeRepoFixture(targetDir) { - fs.mkdirSync(targetDir, { recursive: true }); - for (const [relativePath, content] of Object.entries(repoFiles)) { - const filePath = path.join(targetDir, relativePath); - fs.mkdirSync(path.dirname(filePath), { recursive: true }); - fs.writeFileSync(filePath, content); - } -} - -function fallbackToRealGh() { - for (const binary of fallbackBinaries) { - if (!fs.existsSync(binary)) { - continue; - } - const result = spawnSync(binary, args, { stdio: "inherit" }); - process.exit(result.status ?? 1); - } - process.stderr.write("gh stub: unsupported command\\n"); - process.exit(1); -} - -if (args.length === 0 || args[0] === "--version" || args[0] === "version") { - outputText("gh version 2.0.0 (junior-eval)\\n"); - process.exit(0); -} - -if (args[0] === "auth" && args[1] === "status") { - outputText("github.com\\n ✓ Logged in to github.com as junior-eval\\n"); - process.exit(0); -} - -if (args[0] === "search" && args[1] === "issues") { - const jsonFields = getFlag("--json"); - if (jsonFields) { - outputJson([]); - } - process.exit(0); -} - -if (args[0] === "repo" && args[1] === "view") { - const positionals = getPositionals(); - const repo = positionals[2] || repoValue(); - const record = { - nameWithOwner: repo, - url: "https://github.com/" + repo, - defaultBranchRef: { name: "main" }, - }; - const jsonFields = getFlag("--json"); - if (jsonFields) { - outputJson(pickFields(record, jsonFields)); - } else { - outputText(record.url + "\\n"); - } - process.exit(0); -} - -if (args[0] === "repo" && args[1] === "clone") { - const positionals = getPositionals(); - const repo = positionals[2] || repoValue(); - const targetDir = positionals[3] || repo.split("/").pop() || "repo"; - writeRepoFixture(path.resolve(process.cwd(), targetDir)); - outputText("Cloning into '" + targetDir + "'...\\n"); - process.exit(0); -} - -if (args[0] === "api") { - const positionals = getPositionals(); - const route = positionals[1] || ""; - if (route.includes("/git/trees/")) { - const paths = Object.keys(repoFiles); - const jq = getFlag("--jq"); - if (jq && jq.includes(".tree[].path")) { - outputText(paths.join("\\n") + "\\n"); - } else { - outputJson({ - tree: paths.map((filePath) => ({ - path: filePath, - type: "blob", - })), - }); - } - process.exit(0); - } - if (route.includes("/comments")) { - outputJson([]); - process.exit(0); - } - if (route.includes("/search/issues")) { - outputJson({ items: [] }); - process.exit(0); - } - outputJson({}); - process.exit(0); -} - -if (args[0] === "issue") { - const subcommand = args[1]; - const positionals = getPositionals(); - const repo = repoValue(); - const state = loadState(); - - if (subcommand === "list") { - const jsonFields = getFlag("--json"); - if (jsonFields) { - outputJson([]); - } - process.exit(0); - } - - if (subcommand === "create") { - const number = state.nextIssueNumber++; - const record = { - number, - title: getFlag("--title") || "Eval issue", - body: readBody(), - state: "OPEN", - url: issueUrl(repo, number), - labels: [], - assignees: [], - author: { login: "junior-eval" }, - }; - state.issues[repo + "#" + number] = record; - saveState(state); - const jsonFields = getFlag("--json"); - if (jsonFields) { - outputJson(pickFields(record, jsonFields)); - } else { - outputText(record.url + "\\n"); - } - process.exit(0); - } - - const number = Number.parseInt(positionals[2] || "", 10); - const key = repo + "#" + number; - const record = - state.issues[key] || - defaultIssue(repo, Number.isFinite(number) ? number : 101); - - if (subcommand === "view") { - const jsonFields = getFlag("--json"); - if (jsonFields) { - outputJson(pickFields(record, jsonFields)); - } else { - outputText(record.url + "\\n"); - } - process.exit(0); - } - - if (subcommand === "edit") { - const nextRecord = { - ...record, - title: getFlag("--title") || record.title, - body: readBody() || record.body, - }; - state.issues[key] = nextRecord; - saveState(state); - process.exit(0); - } - - if (subcommand === "comment") { - outputText(record.url + "#issuecomment-1\\n"); - process.exit(0); - } - - if (subcommand === "close" || subcommand === "reopen") { - state.issues[key] = { - ...record, - state: subcommand === "close" ? "CLOSED" : "OPEN", - }; - saveState(state); - process.exit(0); - } -} - -fallbackToRealGh(); -`; -} diff --git a/packages/junior/src/chat/sandbox/eval-oauth-stub.ts b/packages/junior/src/chat/sandbox/eval-oauth-stub.ts deleted file mode 100644 index f98f9270..00000000 --- a/packages/junior/src/chat/sandbox/eval-oauth-stub.ts +++ /dev/null @@ -1,25 +0,0 @@ -/** Build the eval-only generic OAuth CLI shim copied into sandbox eval environments. */ -export function buildEvalOauthCliStub(): string { - return `#!/usr/bin/env node -const fs = require("node:fs"); - -const args = process.argv.slice(2); - -function outputText(value) { - fs.writeFileSync(process.stdout.fd, value); -} - -if (args.length === 0 || args[0] === "--version" || args[0] === "version") { - outputText("eval-oauth 1.0.0 (junior-eval)\\n"); - process.exit(0); -} - -if (args[0] === "whoami") { - outputText("eval-oauth-user\\n"); - process.exit(0); -} - -process.stderr.write("eval-oauth stub: unsupported command\\n"); -process.exit(1); -`; -} diff --git a/packages/junior/src/chat/sandbox/eval-sentry-stub.ts b/packages/junior/src/chat/sandbox/eval-sentry-stub.ts deleted file mode 100644 index c5db4c0c..00000000 --- a/packages/junior/src/chat/sandbox/eval-sentry-stub.ts +++ /dev/null @@ -1,69 +0,0 @@ -/** Build the eval-only Sentry CLI shim copied into sandbox test environments. */ -export function buildEvalSentryCliStub(): string { - return `#!/usr/bin/env node -const fs = require("node:fs"); -const { spawnSync } = require("node:child_process"); - -const args = process.argv.slice(2); -const fallbackBinaries = ["/usr/bin/sentry", "/usr/local/bin/sentry", "/bin/sentry"]; - -function hasFlag(name) { - return args.includes(name) || args.some((value) => value.startsWith(name + "=")); -} - -function outputJson(value) { - fs.writeFileSync(process.stdout.fd, JSON.stringify(value, null, 2) + "\\n"); -} - -function outputText(value) { - fs.writeFileSync(process.stdout.fd, value); -} - -function fallbackToRealSentry() { - for (const binary of fallbackBinaries) { - if (!fs.existsSync(binary)) { - continue; - } - const result = spawnSync(binary, args, { stdio: "inherit" }); - process.exit(result.status ?? 1); - } - process.stderr.write("sentry stub: unsupported command\\n"); - process.exit(1); -} - -if (args.length === 0 || args[0] === "--version" || args[0] === "version") { - outputText("sentry-cli 2.0.0 (junior-eval)\\n"); - process.exit(0); -} - -if (args[0] === "--help" || args[0] === "help") { - outputText("USAGE\\n sentry issue list|view|events ...\\n sentry org list|view ...\\n sentry log list|view ...\\n sentry trace list|view|logs ...\\n sentry api ...\\n"); - process.exit(0); -} - -if (args.includes("--help")) { - outputText("sentry eval stub help\\n"); - process.exit(0); -} - -if (args[0] === "issue" && args[1] === "list") { - if (hasFlag("--json")) { - outputJson([]); - } else { - outputText("No issues found.\\n"); - } - process.exit(0); -} - -if (args[0] === "org" && args[1] === "list") { - if (hasFlag("--json")) { - outputJson([{ slug: "getsentry", name: "Sentry" }]); - } else { - outputText("getsentry\\n"); - } - process.exit(0); -} - -fallbackToRealSentry(); -`; -} diff --git a/packages/junior/src/chat/sandbox/fault-injection.ts b/packages/junior/src/chat/sandbox/fault-injection.ts deleted file mode 100644 index 754c5eb4..00000000 --- a/packages/junior/src/chat/sandbox/fault-injection.ts +++ /dev/null @@ -1,22 +0,0 @@ -const STREAM_INTERRUPT_FAULT_ENV = - "JUNIOR_EVAL_FAULT_SANDBOX_BASH_STREAM_INTERRUPTS"; - -/** Consume one eval-only sandbox bash stream interruption fault. */ -export function consumeSandboxBashStreamInterruptFault(): Error | undefined { - if (process.env.JUNIOR_EVAL_ENABLE_FAULTS !== "1") { - return undefined; - } - - const remaining = Number.parseInt( - process.env[STREAM_INTERRUPT_FAULT_ENV] ?? "0", - 10, - ); - if (!Number.isFinite(remaining) || remaining <= 0) { - return undefined; - } - - process.env[STREAM_INTERRUPT_FAULT_ENV] = String(remaining - 1); - return Object.assign(new Error("Stream ended before command finished"), { - name: "StreamError", - }); -} diff --git a/packages/junior/src/chat/sandbox/session.ts b/packages/junior/src/chat/sandbox/session.ts index 21f51fab..eb77c37b 100644 --- a/packages/junior/src/chat/sandbox/session.ts +++ b/packages/junior/src/chat/sandbox/session.ts @@ -10,7 +10,6 @@ import { isSnapshottingError, wrapSandboxSetupError, } from "@/chat/sandbox/errors"; -import { consumeSandboxBashStreamInterruptFault } from "@/chat/sandbox/fault-injection"; import { buildNonInteractiveShellScript } from "@/chat/sandbox/noninteractive-command"; import { SANDBOX_WORKSPACE_ROOT } from "@/chat/sandbox/paths"; import { @@ -233,7 +232,6 @@ export function createSandboxSessionManager(options?: { skills: availableSkills, referenceFiles: availableReferenceFiles, withSpan: withSandboxSpan, - runtimeBinDir: SANDBOX_RUNTIME_BIN_DIR, }); }; @@ -680,10 +678,6 @@ export function createSandboxSessionManager(options?: { controller.abort(); }, input.timeoutMs) : undefined; - const streamInterruptFault = consumeSandboxBashStreamInterruptFault(); - if (streamInterruptFault) { - throw streamInterruptFault; - } const commandResult = await sandboxInstance.runCommand({ cmd: "bash", args: ["-c", script], diff --git a/packages/junior/src/chat/sandbox/skill-sync.ts b/packages/junior/src/chat/sandbox/skill-sync.ts index fb07bc93..1e6acbfb 100644 --- a/packages/junior/src/chat/sandbox/skill-sync.ts +++ b/packages/junior/src/chat/sandbox/skill-sync.ts @@ -1,9 +1,5 @@ import fs from "node:fs/promises"; import path from "node:path"; -import { buildEvalGitHubCliStub } from "@/chat/sandbox/eval-gh-stub"; -import { buildEvalOauthCliStub } from "@/chat/sandbox/eval-oauth-stub"; -import { buildEvalSentryCliStub } from "@/chat/sandbox/eval-sentry-stub"; -import { runNonInteractiveCommand } from "@/chat/sandbox/noninteractive-command"; import { SANDBOX_DATA_ROOT, SANDBOX_SKILLS_ROOT, @@ -50,7 +46,6 @@ async function listFilesRecursive(root: string): Promise { async function buildSkillSyncFiles( availableSkills: SkillMetadata[], - runtimeBinDir: string, referenceFiles?: string[], ): Promise { const filesToWrite: SkillSyncFile[] = []; @@ -97,24 +92,6 @@ async function buildSkillSyncFiles( } } - if (process.env.EVAL_ENABLE_TEST_CREDENTIALS === "1") { - filesToWrite.push({ - path: `${runtimeBinDir}/gh`, - content: Buffer.from(buildEvalGitHubCliStub(), "utf8"), - }); - filesToWrite.push({ - path: `${runtimeBinDir}/sentry`, - content: Buffer.from(buildEvalSentryCliStub(), "utf8"), - }); - } - - if (availableSkills.some((skill) => skill.name === "eval-oauth")) { - filesToWrite.push({ - path: `${runtimeBinDir}/eval-oauth`, - content: Buffer.from(buildEvalOauthCliStub(), "utf8"), - }); - } - return filesToWrite; } @@ -218,7 +195,7 @@ export function isHostFileMissingError(error: unknown): boolean { ); } -/** Copy the current skill set and reference files into a sandbox and mark runtime shims executable. */ +/** Copy the current skill set and reference files into a sandbox. */ export async function syncSkillsToSandbox(params: { sandbox: SandboxInstance; skills: SkillMetadata[]; @@ -229,7 +206,6 @@ export async function syncSkillsToSandbox(params: { attributes: Record, callback: () => Promise, ) => Promise; - runtimeBinDir: string; workspaceRoot?: string; }): Promise { const workspaceRoot = params.workspaceRoot ?? SANDBOX_WORKSPACE_ROOT; @@ -243,7 +219,6 @@ export async function syncSkillsToSandbox(params: { async () => { const filesToWrite = await buildSkillSyncFiles( params.skills, - params.runtimeBinDir, params.referenceFiles, ); const bytesWritten = filesToWrite.reduce( @@ -273,23 +248,6 @@ export async function syncSkillsToSandbox(params: { } await params.sandbox.writeFiles(filesToWrite); - const executableFiles = filesToWrite - .map((file) => file.path) - .filter((filePath) => - filePath.startsWith(`${params.runtimeBinDir}/`), - ); - for (const filePath of executableFiles) { - const chmod = await runNonInteractiveCommand(params.sandbox, { - cmd: "chmod", - args: ["0755", filePath], - cwd: workspaceRoot, - }); - if (chmod.exitCode !== 0) { - throw new Error( - `sandbox chmod failed for ${filePath}: ${(await chmod.stderr()) || (await chmod.stdout()) || `exit ${chmod.exitCode}`}`, - ); - } - } } catch (error) { throwSandboxOperationError("sandbox writeFiles", error, true); } diff --git a/packages/junior/src/chat/scheduler/cadence.ts b/packages/junior/src/chat/scheduler/cadence.ts new file mode 100644 index 00000000..8fb445f5 --- /dev/null +++ b/packages/junior/src/chat/scheduler/cadence.ts @@ -0,0 +1,562 @@ +import type { + ScheduledCalendarFrequency, + ScheduledLocalTime, + ScheduledTask, + ScheduledTaskRecurrence, +} from "@/chat/scheduler/types"; + +/** Parse an ISO timestamp into a finite Unix timestamp in milliseconds. */ +export function parseScheduleTimestamp(value: string): number | undefined { + const trimmed = value.trim(); + const match = + /^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(?::(\d{2})(?:\.\d{1,9})?)?(Z|[+-]\d{2}:\d{2})$/.exec( + trimmed, + ); + if (!match) { + return undefined; + } + + const year = Number(match[1]); + const month = Number(match[2]); + const day = Number(match[3]); + const hour = Number(match[4]); + const minute = Number(match[5]); + const second = match[6] ? Number(match[6]) : 0; + if ( + !Number.isInteger(year) || + !Number.isInteger(month) || + !Number.isInteger(day) || + !Number.isInteger(hour) || + !Number.isInteger(minute) || + !Number.isInteger(second) || + month < 1 || + month > 12 || + day < 1 || + day > daysInMonth(year, month) || + hour < 0 || + hour > 23 || + minute < 0 || + minute > 59 || + second < 0 || + second > 59 + ) { + return undefined; + } + + const parsed = Date.parse(trimmed); + return Number.isFinite(parsed) ? parsed : undefined; +} + +export interface ZonedDateTimeParts { + day: number; + hour: number; + minute: number; + month: number; + second: number; + weekday: number; + year: number; +} + +interface LocalDate { + day: number; + month: number; + year: number; +} + +const FORMATTERS = new Map(); + +function getFormatter(timezone: string): Intl.DateTimeFormat { + const existing = FORMATTERS.get(timezone); + if (existing) { + return existing; + } + + const formatter = new Intl.DateTimeFormat("en-US", { + timeZone: timezone, + hour12: false, + year: "numeric", + month: "2-digit", + day: "2-digit", + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + }); + FORMATTERS.set(timezone, formatter); + return formatter; +} + +function normalizeHour(hour: number): number { + return hour === 24 ? 0 : hour; +} + +function getLocalDateWeekday(date: LocalDate): number { + return new Date(Date.UTC(date.year, date.month - 1, date.day)).getUTCDay(); +} + +/** Resolve a UTC timestamp into calendar parts for a named time zone. */ +export function getZonedDateTimeParts( + timestampMs: number, + timezone: string, +): ZonedDateTimeParts { + const parts = getFormatter(timezone).formatToParts(new Date(timestampMs)); + const values = new Map(parts.map((part) => [part.type, part.value])); + const year = Number(values.get("year")); + const month = Number(values.get("month")); + const day = Number(values.get("day")); + const hour = normalizeHour(Number(values.get("hour"))); + const minute = Number(values.get("minute")); + const second = Number(values.get("second")); + + return { + year, + month, + day, + hour, + minute, + second, + weekday: getLocalDateWeekday({ year, month, day }), + }; +} + +function getTimeZoneOffsetMs(timestampMs: number, timezone: string): number { + const parts = getZonedDateTimeParts(timestampMs, timezone); + return ( + Date.UTC( + parts.year, + parts.month - 1, + parts.day, + parts.hour, + parts.minute, + parts.second, + ) - timestampMs + ); +} + +function localDateTimeToTimestampMs(args: { + date: LocalDate; + time: ScheduledLocalTime; + timezone: string; +}): number { + const localAsUtcMs = Date.UTC( + args.date.year, + args.date.month - 1, + args.date.day, + args.time.hour, + args.time.minute, + 0, + ); + let timestampMs = + localAsUtcMs - getTimeZoneOffsetMs(localAsUtcMs, args.timezone); + + for (let index = 0; index < 3; index += 1) { + const next = localAsUtcMs - getTimeZoneOffsetMs(timestampMs, args.timezone); + if (next === timestampMs) { + break; + } + timestampMs = next; + } + + return timestampMs; +} + +function compareDate(left: LocalDate, right: LocalDate): number { + return ( + Date.UTC(left.year, left.month - 1, left.day) - + Date.UTC(right.year, right.month - 1, right.day) + ); +} + +function addDays(date: LocalDate, days: number): LocalDate { + const next = new Date(Date.UTC(date.year, date.month - 1, date.day + days)); + return { + year: next.getUTCFullYear(), + month: next.getUTCMonth() + 1, + day: next.getUTCDate(), + }; +} + +function daysInMonth(year: number, month: number): number { + return new Date(Date.UTC(year, month, 0)).getUTCDate(); +} + +function parseLocalDate(value: string): LocalDate | undefined { + const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(value); + if (!match) { + return undefined; + } + + const year = Number(match[1]); + const month = Number(match[2]); + const day = Number(match[3]); + if ( + !Number.isInteger(year) || + !Number.isInteger(month) || + !Number.isInteger(day) || + month < 1 || + month > 12 || + day < 1 || + day > daysInMonth(year, month) + ) { + return undefined; + } + + return { year, month, day }; +} + +function formatLocalDate(date: LocalDate): string { + return [ + String(date.year).padStart(4, "0"), + String(date.month).padStart(2, "0"), + String(date.day).padStart(2, "0"), + ].join("-"); +} + +function getLocalDate(timestampMs: number, timezone: string): LocalDate { + const parts = getZonedDateTimeParts(timestampMs, timezone); + return { year: parts.year, month: parts.month, day: parts.day }; +} + +function normalizeWeekdays(values: number[] | undefined): number[] { + return [ + ...new Set((values ?? []).filter((value) => value >= 0 && value <= 6)), + ].sort((a, b) => a - b); +} + +function buildCandidate(args: { + date: LocalDate; + recurrence: ScheduledTaskRecurrence; + timezone: string; +}): number { + return localDateTimeToTimestampMs({ + date: args.date, + time: args.recurrence.time, + timezone: args.timezone, + }); +} + +function parseLocalTime(value: string): ScheduledLocalTime | undefined { + const match = /^(\d{1,2})(?::(\d{2}))?\s*(am|pm)$/i.exec(value.trim()); + if (!match) { + return undefined; + } + + let hour = Number(match[1]); + const minute = match[2] ? Number(match[2]) : 0; + const meridiem = match[3].toLowerCase(); + if ( + !Number.isInteger(hour) || + !Number.isInteger(minute) || + hour < 1 || + hour > 12 || + minute < 0 || + minute > 59 + ) { + return undefined; + } + if (meridiem === "am" && hour === 12) { + hour = 0; + } else if (meridiem === "pm" && hour !== 12) { + hour += 12; + } + return { hour, minute }; +} + +/** Parse supported relative one-off schedule text into a UTC timestamp. */ +export function parseRelativeScheduleTimestamp(args: { + nowMs: number; + text: string; + timezone: string; +}): number | undefined { + const text = args.text.trim(); + const offsetMatch = /^in\s+(\d+)\s+(minute|minutes|hour|hours)$/i.exec(text); + if (offsetMatch) { + const amount = Number(offsetMatch[1]); + if (!Number.isSafeInteger(amount) || amount < 1 || amount > 24 * 60) { + return undefined; + } + const unitMs = offsetMatch[2].toLowerCase().startsWith("hour") + ? 60 * 60 * 1000 + : 60 * 1000; + return args.nowMs + amount * unitMs; + } + + const tomorrowMatch = /^tomorrow(?:\s+at)?\s+(.+)$/i.exec(text); + if (!tomorrowMatch) { + return undefined; + } + const time = parseLocalTime(tomorrowMatch[1]); + if (!time) { + return undefined; + } + return localDateTimeToTimestampMs({ + date: addDays(getLocalDate(args.nowMs, args.timezone), 1), + time, + timezone: args.timezone, + }); +} + +function getDailyNextRunAtMs(args: { + afterMs: number; + recurrence: ScheduledTaskRecurrence; + scheduledForMs: number; + timezone: string; +}): number | undefined { + const start = parseLocalDate(args.recurrence.startDate); + if (!start) { + return undefined; + } + + let candidateDate = addDays( + getLocalDate(args.scheduledForMs, args.timezone), + args.recurrence.interval, + ); + if (compareDate(candidateDate, start) < 0) { + candidateDate = start; + } + + let candidate = buildCandidate({ + date: candidateDate, + recurrence: args.recurrence, + timezone: args.timezone, + }); + while (candidate <= args.afterMs) { + candidateDate = addDays(candidateDate, args.recurrence.interval); + candidate = buildCandidate({ + date: candidateDate, + recurrence: args.recurrence, + timezone: args.timezone, + }); + } + return candidate; +} + +function getWeeklyNextRunAtMs(args: { + afterMs: number; + recurrence: ScheduledTaskRecurrence; + scheduledForMs: number; + timezone: string; +}): number | undefined { + const start = parseLocalDate(args.recurrence.startDate); + if (!start) { + return undefined; + } + + const weekdays = normalizeWeekdays(args.recurrence.weekdays); + if (weekdays.length === 0) { + return undefined; + } + + let candidateDate = addDays( + getLocalDate(args.scheduledForMs, args.timezone), + 1, + ); + for (let attempts = 0; attempts < 3660; attempts += 1) { + const weeksSinceStart = Math.floor( + (Date.UTC( + candidateDate.year, + candidateDate.month - 1, + candidateDate.day, + ) - + Date.UTC(start.year, start.month - 1, start.day)) / + (7 * 24 * 60 * 60 * 1000), + ); + const isInCycle = + weeksSinceStart >= 0 && weeksSinceStart % args.recurrence.interval === 0; + if (isInCycle && weekdays.includes(getLocalDateWeekday(candidateDate))) { + const candidate = buildCandidate({ + date: candidateDate, + recurrence: args.recurrence, + timezone: args.timezone, + }); + if (candidate > args.afterMs) { + return candidate; + } + } + candidateDate = addDays(candidateDate, 1); + } + + return undefined; +} + +function getMonthlyNextRunAtMs(args: { + afterMs: number; + recurrence: ScheduledTaskRecurrence; + scheduledForMs: number; + timezone: string; +}): number | undefined { + const start = parseLocalDate(args.recurrence.startDate); + const dayOfMonth = args.recurrence.dayOfMonth; + if (!start || !dayOfMonth) { + return undefined; + } + + const scheduledDate = getLocalDate(args.scheduledForMs, args.timezone); + let monthIndex = scheduledDate.year * 12 + scheduledDate.month - 1; + const startMonthIndex = start.year * 12 + start.month - 1; + + for (let attempts = 0; attempts < 1200; attempts += 1) { + monthIndex += args.recurrence.interval; + if (monthIndex < startMonthIndex) { + monthIndex = startMonthIndex; + } + const year = Math.floor(monthIndex / 12); + const month = (monthIndex % 12) + 1; + if (dayOfMonth > daysInMonth(year, month)) { + continue; + } + const candidate = buildCandidate({ + date: { year, month, day: dayOfMonth }, + recurrence: args.recurrence, + timezone: args.timezone, + }); + if (candidate > args.afterMs) { + return candidate; + } + } + + return undefined; +} + +function getYearlyNextRunAtMs(args: { + afterMs: number; + recurrence: ScheduledTaskRecurrence; + scheduledForMs: number; + timezone: string; +}): number | undefined { + const start = parseLocalDate(args.recurrence.startDate); + const month = args.recurrence.month; + const dayOfMonth = args.recurrence.dayOfMonth; + if (!start || !month || !dayOfMonth) { + return undefined; + } + + const scheduledDate = getLocalDate(args.scheduledForMs, args.timezone); + let year = scheduledDate.year; + + for (let attempts = 0; attempts < 100; attempts += 1) { + year += args.recurrence.interval; + if (year < start.year) { + year = start.year; + } + if (dayOfMonth > daysInMonth(year, month)) { + continue; + } + const candidate = buildCandidate({ + date: { year, month, day: dayOfMonth }, + recurrence: args.recurrence, + timezone: args.timezone, + }); + if (candidate > args.afterMs) { + return candidate; + } + } + + return undefined; +} + +/** Build a calendar recurrence anchored to an exact first run timestamp. */ +export function buildCalendarRecurrence(args: { + frequency: ScheduledCalendarFrequency; + interval?: number; + nextRunAtMs: number; + timezone: string; + weekdays?: number[]; +}): ScheduledTaskRecurrence { + const interval = args.interval && args.interval > 0 ? args.interval : 1; + const parts = getZonedDateTimeParts(args.nextRunAtMs, args.timezone); + const time = { hour: parts.hour, minute: parts.minute }; + const startDate = formatLocalDate(parts); + + if (args.frequency === "weekly") { + const weekdays = normalizeWeekdays(args.weekdays); + return { + frequency: args.frequency, + interval, + startDate, + time, + weekdays: weekdays.length > 0 ? weekdays : [parts.weekday], + }; + } + + if (args.frequency === "monthly") { + return { + dayOfMonth: parts.day, + frequency: args.frequency, + interval, + startDate, + time, + }; + } + + if (args.frequency === "yearly") { + return { + dayOfMonth: parts.day, + frequency: args.frequency, + interval, + month: parts.month, + startDate, + time, + }; + } + + return { + frequency: args.frequency, + interval, + startDate, + time, + }; +} + +/** Return the next fire time after a completed run, when the task recurs. */ +export function getNextRunAtMs( + task: ScheduledTask, + scheduledForMs: number, + afterMs: number = scheduledForMs, +): number | undefined { + if (task.schedule.kind !== "recurring") { + return undefined; + } + + const recurrence = task.schedule.recurrence; + if ( + !recurrence || + !Number.isFinite(recurrence.interval) || + recurrence.interval <= 0 + ) { + return undefined; + } + + if (recurrence.frequency === "daily") { + return getDailyNextRunAtMs({ + recurrence, + timezone: task.schedule.timezone, + scheduledForMs, + afterMs, + }); + } + + if (recurrence.frequency === "weekly") { + return getWeeklyNextRunAtMs({ + recurrence, + timezone: task.schedule.timezone, + scheduledForMs, + afterMs, + }); + } + + if (recurrence.frequency === "monthly") { + return getMonthlyNextRunAtMs({ + recurrence, + timezone: task.schedule.timezone, + scheduledForMs, + afterMs, + }); + } + + return getYearlyNextRunAtMs({ + recurrence, + timezone: task.schedule.timezone, + scheduledForMs, + afterMs, + }); +} diff --git a/packages/junior/src/chat/scheduler/plugin.ts b/packages/junior/src/chat/scheduler/plugin.ts new file mode 100644 index 00000000..4af4462a --- /dev/null +++ b/packages/junior/src/chat/scheduler/plugin.ts @@ -0,0 +1,305 @@ +import { + defineJuniorPlugin, + type Dispatch, + type ToolRegistrationHookContext, +} from "@sentry/junior-plugin-api"; +import { buildScheduledTaskRunPrompt } from "@/chat/scheduler/prompt"; +import { + createStateSchedulerStore, + type SchedulerStore, +} from "@/chat/scheduler/store"; +import type { ScheduledRun, ScheduledTask } from "@/chat/scheduler/types"; +import { + createSlackScheduleCreateTaskTool, + createSlackScheduleDeleteTaskTool, + createSlackScheduleListTasksTool, + createSlackScheduleRunTaskNowTool, + createSlackScheduleUpdateTaskTool, +} from "@/chat/tools/slack/schedule-tools"; +import type { ToolDefinition } from "@/chat/tools/definition"; +import type { ToolRuntimeContext } from "@/chat/tools/types"; + +const SCHEDULER_HEARTBEAT_LIMIT = 10; + +function shouldSkipRun( + task: ScheduledTask, + run: ScheduledRun, +): string | undefined { + if (task.status === "deleted") { + return `Scheduled task ${task.id} was deleted before the run started.`; + } + if (task.status !== "active") { + return `Scheduled task ${task.id} was ${task.status} before the run started.`; + } + if ( + task.nextRunAtMs !== run.scheduledForMs && + task.runNowAtMs !== run.scheduledForMs + ) { + return `Scheduled task ${task.id} no longer targets ${new Date(run.scheduledForMs).toISOString()}.`; + } + return undefined; +} + +function createSchedulerToolContext( + ctx: ToolRegistrationHookContext, +): ToolRuntimeContext { + return { + channelCapabilities: ctx.channelCapabilities ?? { + canAddReactions: false, + canCreateCanvas: false, + canPostToChannel: false, + }, + channelId: ctx.channelId, + messageTs: ctx.messageTs, + requester: ctx.requester, + sandbox: {} as ToolRuntimeContext["sandbox"], + teamId: ctx.teamId, + threadTs: ctx.threadTs, + userText: ctx.userText, + }; +} + +async function applyDispatchResult(args: { + dispatch: Dispatch; + nowMs: number; + run: ScheduledRun; + store: ReturnType; +}): Promise { + if (args.dispatch.status === "completed") { + const completed = await args.store.markRunCompleted({ + completedAtMs: args.nowMs, + resultMessageTs: args.dispatch.resultMessageTs, + runId: args.run.id, + startedAtMs: args.run.startedAtMs!, + }); + if (!completed) { + return false; + } + await args.store.updateTaskAfterRun({ + nowMs: args.nowMs, + run: args.run, + status: "completed", + }); + return true; + } + + if (args.dispatch.status === "blocked") { + const blocked = await args.store.markRunBlocked({ + completedAtMs: args.nowMs, + errorMessage: args.dispatch.errorMessage ?? "Dispatch blocked.", + runId: args.run.id, + startedAtMs: args.run.startedAtMs!, + }); + if (!blocked) { + return false; + } + await args.store.updateTaskAfterRun({ + errorMessage: blocked.errorMessage, + nowMs: args.nowMs, + run: args.run, + status: "blocked", + }); + return true; + } + + if (args.dispatch.status === "failed") { + const failed = await args.store.markRunFailed({ + completedAtMs: args.nowMs, + errorMessage: args.dispatch.errorMessage ?? "Dispatch failed.", + runId: args.run.id, + startedAtMs: args.run.startedAtMs, + }); + if (!failed) { + return false; + } + await args.store.updateTaskAfterRun({ + errorMessage: failed.errorMessage, + nowMs: args.nowMs, + run: args.run, + status: "failed", + }); + return true; + } + + return false; +} + +async function blockClaimedRun(args: { + errorMessage: string; + nowMs: number; + run: ScheduledRun; + store: SchedulerStore; +}): Promise { + const blocked = await args.store.markRunBlocked({ + completedAtMs: args.nowMs, + errorMessage: args.errorMessage, + runId: args.run.id, + }); + if (!blocked) { + return; + } + await args.store.updateTaskAfterRun({ + errorMessage: args.errorMessage, + nowMs: args.nowMs, + run: args.run, + status: "blocked", + }); +} + +async function failClaimedRun(args: { + errorMessage: string; + nowMs: number; + run: ScheduledRun; + store: SchedulerStore; +}): Promise { + const failed = await args.store.markRunFailed({ + completedAtMs: args.nowMs, + errorMessage: args.errorMessage, + runId: args.run.id, + startedAtMs: args.run.startedAtMs, + }); + if (!failed) { + return; + } + await args.store.updateTaskAfterRun({ + errorMessage: args.errorMessage, + nowMs: args.nowMs, + run: args.run, + status: "failed", + }); +} + +/** Create Junior's built-in trusted scheduler plugin. */ +export function createSchedulerPlugin() { + return defineJuniorPlugin({ + name: "scheduler", + hooks: { + tools(ctx) { + if (!ctx.channelId || !ctx.teamId || !ctx.requester?.userId) { + return {} as Record>; + } + const context = createSchedulerToolContext(ctx); + return { + slackScheduleCreateTask: createSlackScheduleCreateTaskTool(context), + slackScheduleListTasks: createSlackScheduleListTasksTool(context), + slackScheduleUpdateTask: createSlackScheduleUpdateTaskTool(context), + slackScheduleDeleteTask: createSlackScheduleDeleteTaskTool(context), + slackScheduleRunTaskNow: createSlackScheduleRunTaskNowTool(context), + } satisfies Record>; + }, + async heartbeat(ctx) { + const store = createStateSchedulerStore(); + let processedCount = 0; + let dispatchCount = 0; + for (const run of await store.listIncompleteRuns()) { + if (!run.dispatchId) { + continue; + } + const dispatch = await ctx.agent.get(run.dispatchId); + if (!dispatch) { + await failClaimedRun({ + errorMessage: "Scheduled task dispatch record is missing.", + nowMs: ctx.nowMs, + run, + store, + }); + continue; + } + if ( + await applyDispatchResult({ + dispatch, + nowMs: ctx.nowMs, + run, + store, + }) + ) { + processedCount += 1; + } + } + + for ( + let index = processedCount; + index < SCHEDULER_HEARTBEAT_LIMIT; + index += 1 + ) { + const run = await store.claimDueRun({ nowMs: ctx.nowMs }); + if (!run) { + break; + } + const task = await store.getTask(run.taskId); + if (!task) { + await store.markRunFailed({ + completedAtMs: ctx.nowMs, + errorMessage: `Scheduled task ${run.taskId} was not found`, + runId: run.id, + }); + continue; + } + const skippedReason = shouldSkipRun(task, run); + if (skippedReason) { + await store.markRunSkipped({ + completedAtMs: ctx.nowMs, + errorMessage: skippedReason, + runId: run.id, + }); + continue; + } + + let prompt: string; + try { + prompt = buildScheduledTaskRunPrompt({ + nowMs: ctx.nowMs, + run, + task, + }); + } catch (error) { + const errorMessage = + error instanceof Error + ? `Scheduled task prompt could not be built: ${error.message}` + : "Scheduled task prompt could not be built."; + await blockClaimedRun({ + errorMessage, + nowMs: ctx.nowMs, + run, + store, + }); + continue; + } + let dispatch: Awaited>; + try { + dispatch = await ctx.agent.dispatch({ + idempotencyKey: run.id, + destination: task.destination, + input: prompt, + metadata: { + runId: run.id, + taskId: task.id, + }, + }); + } catch (error) { + const errorMessage = + error instanceof Error + ? `Scheduled task dispatch could not be created: ${error.message}` + : "Scheduled task dispatch could not be created."; + await blockClaimedRun({ + errorMessage, + nowMs: ctx.nowMs, + run, + store, + }); + continue; + } + await store.markRunDispatched({ + claimedAtMs: run.claimedAtMs, + dispatchId: dispatch.id, + nowMs: ctx.nowMs, + runId: run.id, + }); + dispatchCount += 1; + } + + return { dispatchCount }; + }, + }, + }); +} diff --git a/packages/junior/src/chat/scheduler/prompt.ts b/packages/junior/src/chat/scheduler/prompt.ts new file mode 100644 index 00000000..c1c254c4 --- /dev/null +++ b/packages/junior/src/chat/scheduler/prompt.ts @@ -0,0 +1,95 @@ +import { escapeXml } from "@/chat/xml"; +import { + SCHEDULED_TASK_SYSTEM_ACTOR, + type ScheduledRun, + type ScheduledTask, +} from "@/chat/scheduler/types"; + +const EXECUTION_RULES = [ + "- Execute as the scheduled-task system actor; creator metadata is audit context, not an active user identity.", + "- Complete the task without asking follow-up questions unless access, approval, or required input is missing.", + "- Use the available tools and skills that are relevant to the task contract.", + "- If blocked, report the specific missing provider, permission, configuration, or input.", + "- Keep the final result shaped for the configured destination audience.", +]; + +function renderList(tag: string, values: string[] | undefined): string[] { + const entries = (values ?? []).map((value) => value.trim()).filter(Boolean); + if (entries.length === 0) { + return [`<${tag}>`, ""]; + } + return [ + `<${tag}>`, + ...entries.map((value) => `- ${escapeXml(value)}`), + ``, + ]; +} + +function renderOptionalLine(name: string, value: string | undefined): string[] { + return value?.trim() ? [`- ${name}: ${escapeXml(value.trim())}`] : []; +} + +/** Build the marker-delimited user prompt for one scheduled task execution. */ +export function buildScheduledTaskRunPrompt(args: { + nowMs: number; + run: ScheduledRun; + task: ScheduledTask; +}): string { + const { run, task } = args; + const destination = task.destination; + const creator = task.createdBy; + const executionActor = task.executionActor ?? SCHEDULED_TASK_SYSTEM_ACTOR; + + return [ + "", + "This is an autonomous scheduled run. Treat the stored task contract as the user request for this turn.", + "", + "", + `- id: ${escapeXml(task.id)}`, + `- title: ${escapeXml(task.task.title)}`, + `- objective: ${escapeXml(task.task.objective)}`, + ...renderOptionalLine("expected_output", task.task.expectedOutput), + "", + ...task.task.instructions.map( + (instruction) => `- ${escapeXml(instruction)}`, + ), + "", + ...renderList("constraints", task.task.constraints), + ...renderList("source-context", task.task.sourceContext), + "", + "", + "", + `- run_id: ${escapeXml(run.id)}`, + `- task_version: ${run.taskVersion}`, + `- scheduled_for: ${new Date(run.scheduledForMs).toISOString()}`, + `- running_at: ${new Date(args.nowMs).toISOString()}`, + `- schedule: ${escapeXml(task.schedule.description)}`, + `- timezone: ${escapeXml(task.schedule.timezone)}`, + `- schedule_kind: ${task.schedule.kind}`, + `- execution_actor_type: ${executionActor.type}`, + `- execution_actor_id: ${escapeXml(executionActor.id)}`, + ...(task.schedule.recurrence + ? [ + `- recurrence_frequency: ${task.schedule.recurrence.frequency}`, + `- recurrence_interval: ${task.schedule.recurrence.interval}`, + `- recurrence_start_date: ${escapeXml(task.schedule.recurrence.startDate)}`, + ] + : []), + `- creator_slack_user_id: ${escapeXml(creator.slackUserId)}`, + ...renderOptionalLine("creator_user_name", creator.userName), + ...renderOptionalLine("creator_full_name", creator.fullName), + `- destination_platform: ${destination.platform}`, + `- destination_team_id: ${escapeXml(destination.teamId)}`, + `- destination_channel_id: ${escapeXml(destination.channelId)}`, + "", + "", + "", + ...EXECUTION_RULES, + "", + "", + '', + "Execute the scheduled task now and provide the final result for the configured destination.", + "", + "", + ].join("\n"); +} diff --git a/packages/junior/src/chat/scheduler/store.ts b/packages/junior/src/chat/scheduler/store.ts new file mode 100644 index 00000000..1bdc65b1 --- /dev/null +++ b/packages/junior/src/chat/scheduler/store.ts @@ -0,0 +1,847 @@ +import type { Lock, StateAdapter } from "chat"; +import { getNextRunAtMs } from "@/chat/scheduler/cadence"; +import { getStateAdapter } from "@/chat/state/adapter"; +import type { ScheduledRun, ScheduledTask } from "@/chat/scheduler/types"; + +const SCHEDULER_KEY_PREFIX = "junior:scheduler"; +const SCHEDULER_RECORD_TTL_MS = 5 * 365 * 24 * 60 * 60 * 1000; +const SCHEDULED_RUN_TTL_MS = 90 * 24 * 60 * 60 * 1000; +const CLAIM_TTL_MS = 6 * 60 * 60 * 1000; +const PENDING_CLAIM_STALE_MS = 60_000; +const MISSED_RUN_MAX_AGE_MS = 24 * 60 * 60 * 1000; +const LOCK_TTL_MS = 10_000; + +export interface SchedulerStore { + claimDueRun(args: { nowMs: number }): Promise; + getRun(runId: string): Promise; + getTask(taskId: string): Promise; + listIncompleteRuns(): Promise; + listTasksForTeam(teamId: string): Promise; + markRunBlocked(args: { + completedAtMs: number; + errorMessage: string; + runId: string; + startedAtMs?: number; + }): Promise; + markRunCompleted(args: { + completedAtMs: number; + resultMessageTs?: string; + runId: string; + startedAtMs: number; + }): Promise; + markRunFailed(args: { + completedAtMs: number; + errorMessage: string; + startedAtMs?: number; + runId: string; + }): Promise; + markRunSkipped(args: { + completedAtMs: number; + errorMessage: string; + runId: string; + }): Promise; + markRunDispatched(args: { + claimedAtMs: number; + dispatchId: string; + nowMs: number; + runId: string; + }): Promise; + saveTask(task: ScheduledTask): Promise; + updateTaskAfterRun(args: { + errorMessage?: string; + nowMs: number; + run: ScheduledRun; + status: "blocked" | "completed" | "failed"; + }): Promise; +} + +function taskKey(taskId: string): string { + return `${SCHEDULER_KEY_PREFIX}:task:${taskId}`; +} + +function taskLockKey(taskId: string): string { + return `${taskKey(taskId)}:lock`; +} + +function runKey(runId: string): string { + return `${SCHEDULER_KEY_PREFIX}:run:${runId}`; +} + +function claimKey(taskId: string, scheduledForMs: number): string { + return `${SCHEDULER_KEY_PREFIX}:claim:${taskId}:${scheduledForMs}`; +} + +function activeRunKey(taskId: string): string { + return `${SCHEDULER_KEY_PREFIX}:active:${taskId}`; +} + +function globalTaskIndexKey(): string { + return `${SCHEDULER_KEY_PREFIX}:tasks`; +} + +function teamTaskIndexKey(teamId: string): string { + return `${SCHEDULER_KEY_PREFIX}:team:${teamId}:tasks`; +} + +function indexLockKey(indexKey: string): string { + return `${indexKey}:lock`; +} + +function buildRunId(taskId: string, scheduledForMs: number): string { + return `${taskId}:${scheduledForMs}`; +} + +function unique(values: string[]): string[] { + return [...new Set(values.filter(Boolean))]; +} + +async function withLock( + state: StateAdapter, + key: string, + callback: () => Promise, +): Promise { + const lock: Lock | null = await state.acquireLock(key, LOCK_TTL_MS); + if (!lock) { + throw new Error(`Could not acquire scheduler lock for ${key}`); + } + + try { + return await callback(); + } finally { + await state.releaseLock(lock); + } +} + +async function addToIndex( + state: StateAdapter, + key: string, + taskId: string, +): Promise { + await withLock(state, indexLockKey(key), async () => { + const current = ((await state.get(key)) ?? []).filter( + (value): value is string => typeof value === "string", + ); + await state.set(key, unique([...current, taskId]), SCHEDULER_RECORD_TTL_MS); + }); +} + +async function removeFromIndex( + state: StateAdapter, + key: string, + taskId: string, +): Promise { + await withLock(state, indexLockKey(key), async () => { + const current = unique( + ((await state.get(key)) ?? []).filter( + (value): value is string => typeof value === "string", + ), + ); + const next = current.filter((value) => value !== taskId); + if (next.length === current.length) { + return; + } + if (next.length === 0) { + await state.delete(key); + return; + } + await state.set(key, next, SCHEDULER_RECORD_TTL_MS); + }); +} + +async function getIndex(state: StateAdapter, key: string): Promise { + const values = (await state.get(key)) ?? []; + return unique( + values.filter((value): value is string => typeof value === "string"), + ); +} + +async function clearActiveRun( + state: StateAdapter, + taskId: string, + runId: string, +): Promise { + await withLock(state, indexLockKey(activeRunKey(taskId)), async () => { + const current = await state.get<{ runId?: unknown }>(activeRunKey(taskId)); + if (current?.runId === runId) { + await state.delete(activeRunKey(taskId)); + } + }); +} + +async function clearStaleActiveRun( + state: StateAdapter, + taskId: string, + nowMs: number, +): Promise { + const active = await state.get<{ + claimedAtMs?: unknown; + runId?: unknown; + scheduledForMs?: unknown; + }>(activeRunKey(taskId)); + if (typeof active?.runId !== "string") { + await state.delete(activeRunKey(taskId)); + return true; + } + + const activeRun = + (await state.get(runKey(active.runId))) ?? undefined; + if (!isStaleActiveRun(active, activeRun, nowMs)) { + return false; + } + + await clearActiveRun(state, taskId, active.runId); + if (typeof active.scheduledForMs === "number") { + await state.delete(claimKey(taskId, active.scheduledForMs)); + } + return true; +} + +function isFinishedRun(run: ScheduledRun): boolean { + return ( + run.status === "completed" || + run.status === "failed" || + run.status === "blocked" || + run.status === "skipped" + ); +} + +function isStaleActiveRun( + active: { claimedAtMs?: unknown }, + run: ScheduledRun | undefined, + nowMs: number, +): boolean { + if (run) { + return isFinishedRun(run) || isStalePendingRun(run, nowMs); + } + + return ( + typeof active.claimedAtMs === "number" && + active.claimedAtMs + PENDING_CLAIM_STALE_MS <= nowMs + ); +} + +function isStalePendingRun( + run: ScheduledRun | undefined, + nowMs: number, +): boolean { + return ( + run?.status === "pending" && + run.claimedAtMs + PENDING_CLAIM_STALE_MS <= nowMs + ); +} + +function isDueTask( + task: ScheduledTask, + nowMs: number, +): task is ScheduledTask & { + nextRunAtMs?: number; + runNowAtMs?: number; +} { + return ( + task.status === "active" && + ((typeof task.runNowAtMs === "number" && + Number.isFinite(task.runNowAtMs) && + task.runNowAtMs <= nowMs) || + (typeof task.nextRunAtMs === "number" && + Number.isFinite(task.nextRunAtMs) && + task.nextRunAtMs <= nowMs)) + ); +} + +function getDueRunAtMs(task: ScheduledTask, nowMs: number): number | undefined { + if ( + typeof task.runNowAtMs === "number" && + Number.isFinite(task.runNowAtMs) && + task.runNowAtMs <= nowMs + ) { + return task.runNowAtMs; + } + if ( + typeof task.nextRunAtMs === "number" && + Number.isFinite(task.nextRunAtMs) && + task.nextRunAtMs <= nowMs + ) { + return task.nextRunAtMs; + } + return undefined; +} + +function buildScheduledRun(args: { + claimedAtMs: number; + scheduledForMs: number; + task: ScheduledTask; +}): ScheduledRun { + const idempotencyKey = `${args.task.id}:${args.scheduledForMs}`; + return { + id: buildRunId(args.task.id, args.scheduledForMs), + attempt: 1, + claimedAtMs: args.claimedAtMs, + idempotencyKey, + scheduledForMs: args.scheduledForMs, + status: "pending", + taskId: args.task.id, + taskVersion: args.task.version, + }; +} + +function buildSkippedScheduledRun(args: { + completedAtMs: number; + errorMessage: string; + scheduledForMs: number; + task: ScheduledTask; +}): ScheduledRun { + return { + ...buildScheduledRun({ + claimedAtMs: args.completedAtMs, + scheduledForMs: args.scheduledForMs, + task: args.task, + }), + completedAtMs: args.completedAtMs, + errorMessage: args.errorMessage, + status: "skipped", + }; +} + +function isMissedRunTooOld(args: { + nowMs: number; + scheduledForMs: number; +}): boolean { + return args.scheduledForMs + MISSED_RUN_MAX_AGE_MS < args.nowMs; +} + +function normalizedText(value: string | undefined): string { + return value?.trim().replace(/\s+/g, " ").toLowerCase() ?? ""; +} + +function normalizedTexts(values: string[] | undefined): string[] { + return (values ?? []).map(normalizedText); +} + +function taskDedupeFingerprint(task: ScheduledTask): string { + return JSON.stringify({ + destination: task.destination, + schedule: { + kind: task.schedule.kind, + oneOffAtMs: task.schedule.kind === "one_off" ? task.nextRunAtMs : null, + recurrence: task.schedule.recurrence + ? { + dayOfMonth: task.schedule.recurrence.dayOfMonth ?? null, + frequency: task.schedule.recurrence.frequency, + interval: task.schedule.recurrence.interval, + month: task.schedule.recurrence.month ?? null, + startDate: task.schedule.recurrence.startDate, + time: task.schedule.recurrence.time, + weekdays: [...(task.schedule.recurrence.weekdays ?? [])].sort(), + } + : null, + timezone: task.schedule.timezone, + }, + task: { + constraints: normalizedTexts(task.task.constraints), + expectedOutput: normalizedText(task.task.expectedOutput), + instructions: normalizedTexts(task.task.instructions), + objective: normalizedText(task.task.objective), + sourceContext: normalizedTexts(task.task.sourceContext), + title: normalizedText(task.task.title), + }, + }); +} + +function isEarlierTask(left: ScheduledTask, right: ScheduledTask): boolean { + return ( + left.createdAtMs < right.createdAtMs || + (left.createdAtMs === right.createdAtMs && left.id < right.id) + ); +} + +function canFinishRun( + run: ScheduledRun, + startedAtMs: number | undefined, +): boolean { + if (run.status === "pending") { + return startedAtMs === undefined; + } + return run.status === "running" && run.startedAtMs === startedAtMs; +} + +class StateAdapterSchedulerStore implements SchedulerStore { + private readonly state: StateAdapter; + + constructor(state: StateAdapter) { + this.state = state; + } + + async saveTask(task: ScheduledTask): Promise { + await this.state.connect(); + await withLock(this.state, taskLockKey(task.id), async () => { + const current = + (await this.state.get(taskKey(task.id))) ?? undefined; + await this.saveTaskRecord(task, current); + }); + } + + private async saveTaskRecord( + task: ScheduledTask, + current: ScheduledTask | undefined, + ): Promise { + if ( + current?.status === "blocked" && + task.status === "active" && + typeof task.nextRunAtMs === "number" && + Number.isFinite(task.nextRunAtMs) + ) { + await this.state.delete(claimKey(task.id, task.nextRunAtMs)); + } + await this.state.set(taskKey(task.id), task, SCHEDULER_RECORD_TTL_MS); + + if (task.status === "deleted") { + await removeFromIndex(this.state, globalTaskIndexKey(), task.id); + await removeFromIndex( + this.state, + teamTaskIndexKey(task.destination.teamId), + task.id, + ); + if (current && current.destination.teamId !== task.destination.teamId) { + await removeFromIndex( + this.state, + teamTaskIndexKey(current.destination.teamId), + task.id, + ); + } + return; + } + + await addToIndex(this.state, globalTaskIndexKey(), task.id); + await addToIndex( + this.state, + teamTaskIndexKey(task.destination.teamId), + task.id, + ); + if (current && current.destination.teamId !== task.destination.teamId) { + await removeFromIndex( + this.state, + teamTaskIndexKey(current.destination.teamId), + task.id, + ); + } + } + + async getTask(taskId: string): Promise { + await this.state.connect(); + return (await this.state.get(taskKey(taskId))) ?? undefined; + } + + async listTasksForTeam(teamId: string): Promise { + await this.state.connect(); + const ids = await getIndex(this.state, teamTaskIndexKey(teamId)); + const tasks = await Promise.all(ids.map((id) => this.getTask(id))); + return tasks + .filter((task): task is ScheduledTask => Boolean(task)) + .filter((task) => task.status !== "deleted") + .sort((a, b) => a.createdAtMs - b.createdAtMs); + } + + async claimDueRun(args: { + nowMs: number; + }): Promise { + await this.state.connect(); + const ids = await getIndex(this.state, globalTaskIndexKey()); + + for (const id of ids) { + const task = await this.getTask(id); + if (!task || !isDueTask(task, args.nowMs)) { + continue; + } + + const scheduledForMs = getDueRunAtMs(task, args.nowMs); + if (scheduledForMs === undefined) { + continue; + } + const runId = buildRunId(task.id, scheduledForMs); + const tryClaimActiveRun = async (): Promise => + await this.state.setIfNotExists( + activeRunKey(task.id), + { claimedAtMs: args.nowMs, runId, scheduledForMs }, + CLAIM_TTL_MS, + ); + + let activeClaimed = await tryClaimActiveRun(); + if (!activeClaimed) { + if (await clearStaleActiveRun(this.state, task.id, args.nowMs)) { + activeClaimed = await tryClaimActiveRun(); + } + if (!activeClaimed) { + continue; + } + } + + if (isMissedRunTooOld({ nowMs: args.nowMs, scheduledForMs })) { + await this.skipMissedRun({ nowMs: args.nowMs, scheduledForMs, task }); + await clearActiveRun(this.state, task.id, runId); + continue; + } + + const tryClaimScheduledSlot = async (): Promise => + await this.state.setIfNotExists( + claimKey(task.id, scheduledForMs), + { claimedAtMs: args.nowMs }, + CLAIM_TTL_MS, + ); + + let claimed = await tryClaimScheduledSlot(); + if (!claimed) { + const existingRun = await this.getRun(runId); + if (isStalePendingRun(existingRun, args.nowMs)) { + await clearActiveRun(this.state, task.id, runId); + await this.state.delete(claimKey(task.id, scheduledForMs)); + activeClaimed = await tryClaimActiveRun(); + claimed = activeClaimed ? await tryClaimScheduledSlot() : false; + } + if (!claimed) { + await clearActiveRun(this.state, task.id, runId); + continue; + } + } + + const run = buildScheduledRun({ + claimedAtMs: args.nowMs, + scheduledForMs, + task, + }); + await this.state.set(runKey(run.id), run, SCHEDULED_RUN_TTL_MS); + return run; + } + + return undefined; + } + + private async skipMissedRun(args: { + nowMs: number; + scheduledForMs: number; + task: ScheduledTask; + }): Promise { + await withLock(this.state, taskLockKey(args.task.id), async () => { + const current = + (await this.state.get(taskKey(args.task.id))) ?? + undefined; + if ( + !current || + current.status !== "active" || + getDueRunAtMs(current, args.nowMs) !== args.scheduledForMs + ) { + return; + } + + const duplicateOf = await this.findStaleRecoveryCanonicalTask(current); + const errorMessage = duplicateOf + ? `Duplicate stale scheduled task was skipped without dispatch. Canonical task: ${duplicateOf.id}.` + : "Scheduled occurrence was more than 24 hours late and was skipped without dispatch."; + await this.state.set( + runKey(buildRunId(current.id, args.scheduledForMs)), + buildSkippedScheduledRun({ + completedAtMs: args.nowMs, + errorMessage, + scheduledForMs: args.scheduledForMs, + task: current, + }), + SCHEDULED_RUN_TTL_MS, + ); + + const isRunNow = current.runNowAtMs === args.scheduledForMs; + let nextRunAtMs: number | undefined; + if (!duplicateOf) { + nextRunAtMs = + isRunNow && current.nextRunAtMs !== args.scheduledForMs + ? current.nextRunAtMs + : current.schedule.kind === "recurring" + ? getNextRunAtMs(current, args.scheduledForMs, args.nowMs) + : undefined; + } + const nextStatus = nextRunAtMs ? "active" : "paused"; + + await this.saveTaskRecord( + { + ...current, + nextRunAtMs, + runNowAtMs: isRunNow ? undefined : current.runNowAtMs, + status: nextStatus, + statusReason: nextStatus === "paused" ? errorMessage : undefined, + updatedAtMs: args.nowMs, + version: current.version + 1, + }, + current, + ); + }); + } + + private async findStaleRecoveryCanonicalTask( + task: ScheduledTask, + ): Promise { + const fingerprint = taskDedupeFingerprint(task); + const ids = await getIndex( + this.state, + teamTaskIndexKey(task.destination.teamId), + ); + const tasks = await Promise.all( + ids.filter((id) => id !== task.id).map((id) => this.getTask(id)), + ); + return tasks + .filter((candidate): candidate is ScheduledTask => Boolean(candidate)) + .filter( + (candidate) => + candidate.status === "active" && + isEarlierTask(candidate, task) && + taskDedupeFingerprint(candidate) === fingerprint, + ) + .sort((a, b) => a.createdAtMs - b.createdAtMs || a.id.localeCompare(b.id)) + .at(0); + } + + async getRun(runId: string): Promise { + await this.state.connect(); + return (await this.state.get(runKey(runId))) ?? undefined; + } + + async listIncompleteRuns(): Promise { + await this.state.connect(); + const ids = await getIndex(this.state, globalTaskIndexKey()); + const runs: ScheduledRun[] = []; + for (const taskId of ids) { + const active = await this.state.get<{ runId?: unknown }>( + activeRunKey(taskId), + ); + if (typeof active?.runId !== "string") { + continue; + } + const run = await this.getRun(active.runId); + if (run && !isFinishedRun(run)) { + runs.push(run); + } + } + return runs; + } + + async markRunDispatched(args: { + claimedAtMs: number; + dispatchId: string; + nowMs: number; + runId: string; + }): Promise { + return await this.updateRun(args.runId, (run) => + run.status === "pending" && run.claimedAtMs === args.claimedAtMs + ? { + ...run, + dispatchId: args.dispatchId, + startedAtMs: args.nowMs, + status: "running", + } + : undefined, + ); + } + + async markRunCompleted(args: { + completedAtMs: number; + resultMessageTs?: string; + runId: string; + startedAtMs: number; + }): Promise { + const next = await this.updateRun(args.runId, (run) => + canFinishRun(run, args.startedAtMs) + ? { + ...run, + completedAtMs: args.completedAtMs, + resultMessageTs: args.resultMessageTs, + status: "completed", + } + : undefined, + ); + if (next) { + await clearActiveRun(this.state, next.taskId, next.id); + } + return next; + } + + async markRunFailed(args: { + completedAtMs: number; + errorMessage: string; + startedAtMs?: number; + runId: string; + }): Promise { + const next = await this.updateRun(args.runId, (run) => + canFinishRun(run, args.startedAtMs) + ? { + ...run, + completedAtMs: args.completedAtMs, + errorMessage: args.errorMessage, + status: "failed", + } + : undefined, + ); + if (next) { + await clearActiveRun(this.state, next.taskId, next.id); + } + return next; + } + + async markRunSkipped(args: { + completedAtMs: number; + errorMessage: string; + runId: string; + }): Promise { + const next = await this.updateRun(args.runId, (run) => + run.status === "pending" + ? { + ...run, + completedAtMs: args.completedAtMs, + errorMessage: args.errorMessage, + status: "skipped", + } + : undefined, + ); + if (next) { + await clearActiveRun(this.state, next.taskId, next.id); + } + return next; + } + + async markRunBlocked(args: { + completedAtMs: number; + errorMessage: string; + runId: string; + startedAtMs?: number; + }): Promise { + const next = await this.updateRun(args.runId, (run) => + canFinishRun(run, args.startedAtMs) + ? { + ...run, + completedAtMs: args.completedAtMs, + errorMessage: args.errorMessage, + status: "blocked", + } + : undefined, + ); + if (next) { + await clearActiveRun(this.state, next.taskId, next.id); + } + return next; + } + + async updateTaskAfterRun(args: { + errorMessage?: string; + nowMs: number; + run: ScheduledRun; + status: "blocked" | "completed" | "failed"; + }): Promise { + await this.state.connect(); + await withLock(this.state, taskLockKey(args.run.taskId), async () => { + const current = + (await this.state.get(taskKey(args.run.taskId))) ?? + undefined; + if (!current || current.status === "deleted") { + return; + } + + const isRunNow = current.runNowAtMs === args.run.scheduledForMs; + if (isRunNow) { + let nextRunAtMs = current.nextRunAtMs; + if ( + args.status !== "blocked" && + typeof current.nextRunAtMs === "number" && + current.nextRunAtMs <= args.run.scheduledForMs + ) { + nextRunAtMs = getNextRunAtMs( + current, + current.nextRunAtMs, + args.nowMs, + ); + } + await this.saveTaskRecord( + { + ...current, + lastRunAtMs: args.run.scheduledForMs, + nextRunAtMs, + runNowAtMs: undefined, + status: + args.status === "blocked" + ? "blocked" + : nextRunAtMs + ? current.status + : "paused", + statusReason: + args.status === "blocked" ? args.errorMessage : undefined, + updatedAtMs: args.nowMs, + version: current.version + 1, + }, + current, + ); + return; + } + + if ( + current.status !== "active" || + current.nextRunAtMs !== args.run.scheduledForMs + ) { + await this.saveTaskRecord( + { + ...current, + lastRunAtMs: args.run.scheduledForMs, + updatedAtMs: args.nowMs, + version: current.version + 1, + }, + current, + ); + return; + } + + const nextRunAtMs = + args.status === "blocked" + ? undefined + : getNextRunAtMs(current, args.run.scheduledForMs, args.nowMs); + + await this.saveTaskRecord( + { + ...current, + lastRunAtMs: args.run.scheduledForMs, + nextRunAtMs, + status: + args.status === "blocked" + ? "blocked" + : nextRunAtMs + ? "active" + : "paused", + statusReason: + args.status === "blocked" ? args.errorMessage : undefined, + updatedAtMs: args.nowMs, + version: current.version + 1, + }, + current, + ); + }); + } + + private async updateRun( + runId: string, + update: (run: ScheduledRun) => ScheduledRun | undefined, + ): Promise { + await this.state.connect(); + return await withLock(this.state, indexLockKey(runKey(runId)), async () => { + const current = await this.getRun(runId); + if (!current) { + return undefined; + } + const next = update(current); + if (!next) { + return undefined; + } + await this.state.set(runKey(runId), next, SCHEDULED_RUN_TTL_MS); + return next; + }); + } +} + +/** Create the production scheduler store backed by Junior's state adapter. */ +export function createStateSchedulerStore( + stateAdapter: StateAdapter = getStateAdapter(), +): SchedulerStore { + return new StateAdapterSchedulerStore(stateAdapter); +} diff --git a/packages/junior/src/chat/scheduler/types.ts b/packages/junior/src/chat/scheduler/types.ts new file mode 100644 index 00000000..9446b3ab --- /dev/null +++ b/packages/junior/src/chat/scheduler/types.ts @@ -0,0 +1,102 @@ +export type ScheduledTaskStatus = "active" | "paused" | "blocked" | "deleted"; + +export type ScheduledRunStatus = + | "pending" + | "running" + | "completed" + | "failed" + | "blocked" + | "skipped"; + +export interface ScheduledTaskPrincipal { + slackUserId: string; + fullName?: string; + userName?: string; +} + +export interface ScheduledTaskExecutionActor { + type: "system"; + id: string; +} + +export const SCHEDULED_TASK_SYSTEM_ACTOR = Object.freeze({ + type: "system", + id: "scheduled-task", +} satisfies ScheduledTaskExecutionActor); + +export interface ScheduledTaskDestination { + platform: "slack"; + teamId: string; + channelId: string; +} + +export type ScheduledCalendarFrequency = + | "daily" + | "weekly" + | "monthly" + | "yearly"; + +export interface ScheduledLocalTime { + hour: number; + minute: number; +} + +export interface ScheduledTaskRecurrence { + dayOfMonth?: number; + frequency: ScheduledCalendarFrequency; + interval: number; + month?: number; + startDate: string; + time: ScheduledLocalTime; + weekdays?: number[]; +} + +export interface ScheduledTaskSchedule { + description: string; + timezone: string; + kind: "one_off" | "recurring"; + recurrence?: ScheduledTaskRecurrence; +} + +export interface ScheduledTaskSpec { + title: string; + objective: string; + instructions: string[]; + expectedOutput?: string; + constraints?: string[]; + sourceContext?: string[]; +} + +export interface ScheduledTask { + id: string; + createdAtMs: number; + createdBy: ScheduledTaskPrincipal; + destination: ScheduledTaskDestination; + executionActor?: ScheduledTaskExecutionActor; + lastRunAtMs?: number; + nextRunAtMs?: number; + originalRequest?: string; + runNowAtMs?: number; + schedule: ScheduledTaskSchedule; + status: ScheduledTaskStatus; + statusReason?: string; + task: ScheduledTaskSpec; + updatedAtMs: number; + version: number; +} + +export interface ScheduledRun { + id: string; + attempt: number; + claimedAtMs: number; + completedAtMs?: number; + dispatchId?: string; + errorMessage?: string; + idempotencyKey: string; + resultMessageTs?: string; + scheduledForMs: number; + startedAtMs?: number; + status: ScheduledRunStatus; + taskId: string; + taskVersion: number; +} diff --git a/packages/junior/src/chat/services/auth-pause.ts b/packages/junior/src/chat/services/auth-pause.ts index 0ffbe036..18d8fadf 100644 --- a/packages/junior/src/chat/services/auth-pause.ts +++ b/packages/junior/src/chat/services/auth-pause.ts @@ -1,5 +1,6 @@ export type AuthorizationPauseKind = "mcp" | "plugin"; export type AuthorizationPauseDisposition = "link_already_sent" | "link_sent"; +export type AuthorizationFlowMode = "interactive" | "disabled"; /** * Runtime-owned signal that the current turn must park until the user @@ -29,3 +30,18 @@ export class AuthorizationPauseError extends Error { this.provider = provider; } } + +/** Error indicating this turn cannot start an external authorization flow. */ +export class AuthorizationFlowDisabledError extends Error { + readonly kind: AuthorizationPauseKind; + readonly provider: string; + + constructor(kind: AuthorizationPauseKind, provider: string) { + super( + `Authorization is required for ${provider}, but this turn cannot start an authorization flow.`, + ); + this.name = "AuthorizationFlowDisabledError"; + this.kind = kind; + this.provider = provider; + } +} diff --git a/packages/junior/src/chat/services/mcp-auth-orchestration.ts b/packages/junior/src/chat/services/mcp-auth-orchestration.ts index ffc14e15..34097659 100644 --- a/packages/junior/src/chat/services/mcp-auth-orchestration.ts +++ b/packages/junior/src/chat/services/mcp-auth-orchestration.ts @@ -7,7 +7,11 @@ import { } from "@/chat/mcp/auth-store"; import { deliverPrivateMessage, formatProviderLabel } from "@/chat/oauth-flow"; import { canReusePendingAuthLink } from "@/chat/services/pending-auth"; -import { AuthorizationPauseError } from "@/chat/services/auth-pause"; +import { + AuthorizationFlowDisabledError, + AuthorizationPauseError, + type AuthorizationFlowMode, +} from "@/chat/services/auth-pause"; import type { ThreadArtifactsState } from "@/chat/state/artifacts"; import type { ConversationPendingAuthState } from "@/chat/state/conversation"; import type { PluginDefinition } from "@/chat/plugins/types"; @@ -36,6 +40,7 @@ export interface McpAuthOrchestrationDeps { onPendingAuth?: ( pendingAuth: ConversationPendingAuthState, ) => void | Promise; + authorizationFlowMode?: AuthorizationFlowMode; } export interface McpAuthOrchestration { @@ -90,6 +95,10 @@ export function createMcpAuthOrchestration( `Missing MCP auth session context for plugin "${provider}"`, ); } + if (deps.authorizationFlowMode === "disabled") { + await deleteMcpAuthSession(authSessionId); + throw new AuthorizationFlowDisabledError("mcp", provider); + } const latestArtifactState = deps.getMergedArtifactState(); await patchMcpAuthSession(authSessionId, { diff --git a/packages/junior/src/chat/services/plugin-auth-orchestration.ts b/packages/junior/src/chat/services/plugin-auth-orchestration.ts index 55f07819..ca10d1ad 100644 --- a/packages/junior/src/chat/services/plugin-auth-orchestration.ts +++ b/packages/junior/src/chat/services/plugin-auth-orchestration.ts @@ -3,7 +3,11 @@ import { unlinkProvider } from "@/chat/credentials/unlink-provider"; import type { UserTokenStore } from "@/chat/credentials/user-token-store"; import { formatProviderLabel, startOAuthFlow } from "@/chat/oauth-flow"; import { canReusePendingAuthLink } from "@/chat/services/pending-auth"; -import { AuthorizationPauseError } from "@/chat/services/auth-pause"; +import { + AuthorizationFlowDisabledError, + AuthorizationPauseError, + type AuthorizationFlowMode, +} from "@/chat/services/auth-pause"; import type { ConversationPendingAuthState } from "@/chat/state/conversation"; import { getPluginDefinition, @@ -43,6 +47,7 @@ export interface PluginAuthOrchestrationDeps { onPendingAuth?: ( pendingAuth: ConversationPendingAuthState, ) => void | Promise; + authorizationFlowMode?: AuthorizationFlowMode; userTokenStore?: UserTokenStore; } @@ -219,6 +224,9 @@ export function createPluginAuthOrchestration( if (!deps.requesterId || !getPluginOAuthConfig(provider)) { throw new Error(`Cannot start plugin authorization for ${provider}`); } + if (deps.authorizationFlowMode === "disabled") { + throw new AuthorizationFlowDisabledError("plugin", provider); + } const providerLabel = formatProviderLabel(provider); const reusingPendingLink = canReusePendingAuthLink({ @@ -303,6 +311,9 @@ export function createPluginAuthOrchestration( } if (!deps.requesterId || !deps.userTokenStore) { + if (deps.authorizationFlowMode === "disabled") { + throw new AuthorizationFlowDisabledError("plugin", provider); + } throw buildCredentialFailureError(provider, input.command); } diff --git a/packages/junior/src/chat/slack/ids.ts b/packages/junior/src/chat/slack/ids.ts new file mode 100644 index 00000000..ce725d3b --- /dev/null +++ b/packages/junior/src/chat/slack/ids.ts @@ -0,0 +1,9 @@ +/** Return true when a value is a Slack workspace/team id. */ +export function isSlackTeamId(value: string): boolean { + return /^T[A-Z0-9]+$/.test(value); +} + +/** Return true when a value is a Slack conversation id. */ +export function isSlackConversationId(value: string): boolean { + return /^(C|G|D)[A-Z0-9]+$/.test(value); +} diff --git a/packages/junior/src/chat/slack/reply.ts b/packages/junior/src/chat/slack/reply.ts index 0099c679..5a8b1a11 100644 --- a/packages/junior/src/chat/slack/reply.ts +++ b/packages/junior/src/chat/slack/reply.ts @@ -192,7 +192,7 @@ export async function postSlackApiReplyPosts(args: { messageTs?: string; stage: PlannedSlackReplyStage; }) => Promise | void; - threadTs: string; + threadTs?: string; posts: PlannedSlackReplyPost[]; }): Promise { const lastTextPostIndex = findLastTextPostIndex(args.posts); @@ -224,10 +224,13 @@ export async function postSlackApiReplyPosts(args: { continue; } + if (!args.threadTs && !lastPostedMessageTs) { + throw new Error("Slack file delivery requires a posted message thread"); + } await uploadReplyFiles({ channelId: args.channelId, failureMode: args.fileUploadFailureMode ?? "best_effort", - threadTs: args.threadTs, + threadTs: args.threadTs ?? lastPostedMessageTs!, files: post.files, }); } catch (error) { diff --git a/packages/junior/src/chat/slack/workspace-context.ts b/packages/junior/src/chat/slack/workspace-context.ts new file mode 100644 index 00000000..6e26410b --- /dev/null +++ b/packages/junior/src/chat/slack/workspace-context.ts @@ -0,0 +1,17 @@ +import { AsyncLocalStorage } from "node:async_hooks"; + +const workspaceTeamIdStorage = new AsyncLocalStorage(); + +/** Run a callback with the Slack workspace team ID for the inbound webhook. */ +export function runWithWorkspaceTeamId( + teamId: string | undefined, + fn: () => T, +): T { + if (!teamId) return fn(); + return workspaceTeamIdStorage.run(teamId, fn); +} + +/** Return the Slack workspace team ID for the current inbound webhook. */ +export function getWorkspaceTeamId(): string | undefined { + return workspaceTeamIdStorage.getStore(); +} diff --git a/packages/junior/src/chat/state/adapter.ts b/packages/junior/src/chat/state/adapter.ts index c8a8016b..d918274a 100644 --- a/packages/junior/src/chat/state/adapter.ts +++ b/packages/junior/src/chat/state/adapter.ts @@ -10,6 +10,57 @@ const ACTIVE_LOCK_HEARTBEAT_MS = 30_000; let stateAdapter: StateAdapter | undefined; let redisStateAdapter: RedisStateAdapter | undefined; +function createPrefixedStateAdapter( + base: StateAdapter, + prefix: string, +): StateAdapter { + const prefixed = (value: string): string => `${prefix}:${value}`; + const unprefixed = (value: string): string => + value.startsWith(`${prefix}:`) ? value.slice(prefix.length + 1) : value; + const prefixLock = (lock: Lock): Lock => ({ + ...lock, + threadId: prefixed(lock.threadId), + }); + const unprefixLock = (lock: Lock): Lock => ({ + ...lock, + threadId: unprefixed(lock.threadId), + }); + + return { + appendToList: (key, value, options) => + base.appendToList(prefixed(key), value, options), + connect: () => base.connect(), + disconnect: () => base.disconnect(), + subscribe: (threadId) => base.subscribe(prefixed(threadId)), + unsubscribe: (threadId) => base.unsubscribe(prefixed(threadId)), + isSubscribed: (threadId) => base.isSubscribed(prefixed(threadId)), + acquireLock: async (threadId, ttlMs) => { + const lock = await base.acquireLock(prefixed(threadId), ttlMs); + return lock ? unprefixLock(lock) : null; + }, + releaseLock: (lock) => base.releaseLock(prefixLock(lock)), + extendLock: async (lock, ttlMs) => { + const prefixedLock = prefixLock(lock); + const extended = await base.extendLock(prefixedLock, ttlMs); + if (extended) { + lock.expiresAt = prefixedLock.expiresAt; + } + return extended; + }, + forceReleaseLock: (threadId) => base.forceReleaseLock(prefixed(threadId)), + enqueue: (threadId, entry, maxSize) => + base.enqueue(prefixed(threadId), entry, maxSize), + dequeue: (threadId) => base.dequeue(prefixed(threadId)), + queueDepth: (threadId) => base.queueDepth(prefixed(threadId)), + get: (key) => base.get(prefixed(key)), + getList: (key) => base.getList(prefixed(key)), + set: (key, value, ttlMs) => base.set(prefixed(key), value, ttlMs), + setIfNotExists: (key, value, ttlMs) => + base.setIfNotExists(prefixed(key), value, ttlMs), + delete: (key) => base.delete(prefixed(key)), + }; +} + function createQueuedStateAdapter( base: StateAdapter, options: { activeLockMaxAgeMs: number }, @@ -169,15 +220,20 @@ function createQueuedStateAdapter( }; } +function withOptionalPrefix(base: StateAdapter, prefix: string | undefined) { + return prefix ? createPrefixedStateAdapter(base, prefix) : base; +} + function createStateAdapter(): StateAdapter { const config = getChatConfig(); const activeLockMaxAgeMs = config.bot.turnTimeoutMs + ACTIVE_LOCK_TTL_MS; if (config.state.adapter === "memory") { redisStateAdapter = undefined; - return createQueuedStateAdapter(createMemoryState(), { - activeLockMaxAgeMs, - }); + return createQueuedStateAdapter( + withOptionalPrefix(createMemoryState(), config.state.keyPrefix), + { activeLockMaxAgeMs }, + ); } if (!config.state.redisUrl) { @@ -188,9 +244,10 @@ function createStateAdapter(): StateAdapter { url: config.state.redisUrl, }); redisStateAdapter = redisState; - return createQueuedStateAdapter(redisState, { - activeLockMaxAgeMs, - }); + return createQueuedStateAdapter( + withOptionalPrefix(redisState, config.state.keyPrefix), + { activeLockMaxAgeMs }, + ); } function getOptionalRedisStateAdapter(): RedisStateAdapter | undefined { diff --git a/packages/junior/src/chat/tools/agent-tools.ts b/packages/junior/src/chat/tools/agent-tools.ts index 608b86df..5f3f76f1 100644 --- a/packages/junior/src/chat/tools/agent-tools.ts +++ b/packages/junior/src/chat/tools/agent-tools.ts @@ -3,7 +3,10 @@ import { serializeGenAiAttribute } from "@/chat/logging"; import { setSpanAttributes, withSpan, type LogContext } from "@/chat/logging"; import { GEN_AI_PROVIDER_NAME } from "@/chat/pi/client"; import { shouldEmitDevAgentTrace } from "@/chat/runtime/dev-agent-trace"; -import { AuthorizationPauseError } from "@/chat/services/auth-pause"; +import { + AuthorizationFlowDisabledError, + AuthorizationPauseError, +} from "@/chat/services/auth-pause"; import type { PluginAuthOrchestration } from "@/chat/services/plugin-auth-orchestration"; import { buildReportedProgressStatus } from "@/chat/runtime/report-progress"; import type { AssistantStatusSpec } from "@/chat/slack/assistant-thread/status"; @@ -118,7 +121,10 @@ export function createAgentTools( } return normalized; } catch (error) { - if (error instanceof AuthorizationPauseError) { + if ( + error instanceof AuthorizationPauseError || + error instanceof AuthorizationFlowDisabledError + ) { throw error; } handleToolExecutionError( diff --git a/packages/junior/src/chat/tools/index.ts b/packages/junior/src/chat/tools/index.ts index 5de3d625..25aea6c9 100644 --- a/packages/junior/src/chat/tools/index.ts +++ b/packages/junior/src/chat/tools/index.ts @@ -36,6 +36,7 @@ import type { ToolRuntimeContext, ToolState, } from "@/chat/tools/types"; +import { getAgentPluginTools } from "@/chat/plugins/agent-hooks"; import { createWebFetchTool } from "@/chat/tools/web/fetch-tool"; import { createWebSearchTool } from "@/chat/tools/web/search"; import { createWriteFileTool } from "@/chat/tools/sandbox/write-file"; @@ -152,5 +153,16 @@ export function createTools( ); } + for (const [name, pluginTool] of Object.entries( + getAgentPluginTools(context), + )) { + if (tools[name]) { + throw new Error( + `Trusted plugin tool "${name}" conflicts with a core tool`, + ); + } + tools[name] = pluginTool; + } + return tools; } diff --git a/packages/junior/src/chat/tools/slack/schedule-tools.ts b/packages/junior/src/chat/tools/slack/schedule-tools.ts new file mode 100644 index 00000000..8ec99076 --- /dev/null +++ b/packages/junior/src/chat/tools/slack/schedule-tools.ts @@ -0,0 +1,793 @@ +import { randomUUID } from "node:crypto"; +import { Type } from "@sinclair/typebox"; +import { + buildCalendarRecurrence, + parseRelativeScheduleTimestamp, + parseScheduleTimestamp, +} from "@/chat/scheduler/cadence"; +import { createStateSchedulerStore } from "@/chat/scheduler/store"; +import { SCHEDULED_TASK_SYSTEM_ACTOR } from "@/chat/scheduler/types"; +import type { + ScheduledCalendarFrequency, + ScheduledTask, + ScheduledTaskDestination, + ScheduledTaskPrincipal, + ScheduledTaskRecurrence, + ScheduledTaskStatus, +} from "@/chat/scheduler/types"; +import { normalizeSlackConversationId } from "@/chat/slack/client"; +import { isSlackTeamId } from "@/chat/slack/ids"; +import { tool } from "@/chat/tools/definition"; +import type { ToolRuntimeContext } from "@/chat/tools/types"; + +const TASK_ID_PREFIX = "sched"; +const MAX_LISTED_TASKS = 50; +const DEFAULT_SCHEDULE_TIMEZONE = "America/Los_Angeles"; +const ACTIVE_DESTINATION_GUIDELINE = + "Only manage tasks for the active Slack DM or channel; never target an existing thread, another channel, or another user's DM."; +const ACTIVE_TASK_ID_GUIDELINE = + "Use only task IDs returned from this active destination."; + +function requireActiveDestination( + context: ToolRuntimeContext, +): + | { ok: true; destination: ScheduledTaskDestination } + | { ok: false; error: string } { + const channelId = normalizeSlackConversationId(context.channelId); + if (!channelId) { + return { + ok: false, + error: "No active Slack channel context is available.", + }; + } + if (!context.teamId) { + return { + ok: false, + error: "No active Slack workspace context is available.", + }; + } + if (!isSlackTeamId(context.teamId)) { + return { + ok: false, + error: "Active Slack workspace context is invalid.", + }; + } + + return { + ok: true, + destination: { + platform: "slack", + teamId: context.teamId, + channelId, + }, + }; +} + +function requireRequester( + context: ToolRuntimeContext, +): + | { ok: true; requester: ScheduledTaskPrincipal } + | { ok: false; error: string } { + const userId = context.requester?.userId; + if (!userId) { + return { + ok: false, + error: "No active Slack requester context is available.", + }; + } + + return { + ok: true, + requester: { + slackUserId: userId, + ...(context.requester?.userName + ? { userName: context.requester.userName } + : {}), + ...(context.requester?.fullName + ? { fullName: context.requester.fullName } + : {}), + }, + }; +} + +function sameDestination( + task: ScheduledTask, + destination: ScheduledTaskDestination, +): boolean { + return ( + task.destination.platform === destination.platform && + task.destination.teamId === destination.teamId && + task.destination.channelId === destination.channelId + ); +} + +async function getWritableTask(args: { + context: ToolRuntimeContext; + taskId: string; +}): Promise<{ ok: true; task: ScheduledTask } | { ok: false; error: string }> { + const destination = requireActiveDestination(args.context); + if (!destination.ok) { + return destination; + } + + const task = await createStateSchedulerStore().getTask(args.taskId); + if (!task || task.status === "deleted") { + return { + ok: false, + error: "Scheduled task was not found in the active destination.", + }; + } + + if (!sameDestination(task, destination.destination)) { + return { + ok: false, + error: + "Scheduled task can only be managed from the Slack destination where it was created.", + }; + } + return { + ok: true, + task, + }; +} + +function compactTask(task: ScheduledTask): Record { + return { + id: task.id, + status: task.status, + title: task.task.title, + objective: task.task.objective, + schedule: task.schedule.description, + timezone: task.schedule.timezone, + recurrence: task.schedule.recurrence + ? { + frequency: task.schedule.recurrence.frequency, + interval: task.schedule.recurrence.interval, + start_date: task.schedule.recurrence.startDate, + time: task.schedule.recurrence.time, + weekdays: task.schedule.recurrence.weekdays, + month: task.schedule.recurrence.month, + day_of_month: task.schedule.recurrence.dayOfMonth, + } + : null, + next_run_at: task.nextRunAtMs + ? new Date(task.nextRunAtMs).toISOString() + : null, + last_run_at: task.lastRunAtMs + ? new Date(task.lastRunAtMs).toISOString() + : null, + run_now_at: task.runNowAtMs + ? new Date(task.runNowAtMs).toISOString() + : null, + version: task.version, + }; +} + +function buildTaskId(): string { + return `${TASK_ID_PREFIX}_${randomUUID()}`; +} + +function normalizeStatus( + value: string | undefined, +): ScheduledTaskStatus | undefined { + if (value === "active" || value === "paused" || value === "blocked") { + return value; + } + return undefined; +} + +function normalizeFrequency( + value: unknown, +): ScheduledCalendarFrequency | undefined { + if ( + value === "daily" || + value === "weekly" || + value === "monthly" || + value === "yearly" + ) { + return value; + } + return undefined; +} + +function buildRecurrence(args: { + existing?: ScheduledTaskRecurrence; + input: { + recurrence_frequency?: unknown; + recurrence_interval?: number; + recurrence_weekdays?: number[]; + }; + nextRunAtMs: number | undefined; + timezone: string; +}): + | { ok: true; recurrence?: ScheduledTaskRecurrence } + | { ok: false; error: string } { + if (args.input.recurrence_frequency === null) { + return { ok: true, recurrence: undefined }; + } + + const frequency = + normalizeFrequency(args.input.recurrence_frequency) ?? + args.existing?.frequency; + if (!frequency) { + return { ok: true, recurrence: undefined }; + } + if (!args.nextRunAtMs) { + return { + ok: false, + error: + "Recurring scheduled tasks require next_run_at_iso or next_run_at_text.", + }; + } + + try { + return { + ok: true, + recurrence: buildCalendarRecurrence({ + frequency, + interval: args.input.recurrence_interval ?? args.existing?.interval, + nextRunAtMs: args.nextRunAtMs, + timezone: args.timezone, + weekdays: + frequency === "weekly" + ? (args.input.recurrence_weekdays ?? args.existing?.weekdays) + : undefined, + }), + }; + } catch (error) { + return { + ok: false, + error: + error instanceof RangeError + ? "timezone must be a valid IANA time zone." + : error instanceof Error + ? error.message + : String(error), + }; + } +} + +function shouldRebuildRecurrence(input: { + next_run_at_text?: string; + next_run_at_iso?: string; + recurrence_frequency?: unknown; + recurrence_interval?: number; + recurrence_weekdays?: number[]; + timezone?: string; +}): boolean { + return ( + input.next_run_at_text !== undefined || + input.next_run_at_iso !== undefined || + input.recurrence_frequency !== undefined || + input.recurrence_interval !== undefined || + input.recurrence_weekdays !== undefined || + input.timezone !== undefined + ); +} + +function getDefaultScheduleTimezone(): string { + return process.env.JUNIOR_TIMEZONE?.trim() || DEFAULT_SCHEDULE_TIMEZONE; +} + +function isValidTimeZone(timezone: string): boolean { + try { + new Intl.DateTimeFormat("en-US", { timeZone: timezone }).format(); + return true; + } catch { + return false; + } +} + +function parseNextRunAtMs(args: { + input: { + next_run_at_iso?: string; + next_run_at_text?: string; + }; + nowMs: number; + timezone: string; +}): number | undefined { + try { + if (args.input.next_run_at_iso) { + return parseScheduleTimestamp(args.input.next_run_at_iso); + } + if (args.input.next_run_at_text) { + return parseRelativeScheduleTimestamp({ + nowMs: args.nowMs, + text: args.input.next_run_at_text, + timezone: args.timezone, + }); + } + } catch { + return undefined; + } + return undefined; +} + +function hasConflictingNextRunInputs(input: { + next_run_at_iso?: string; + next_run_at_text?: string; +}): boolean { + return Boolean(input.next_run_at_iso && input.next_run_at_text); +} + +function canCreateUnconfirmedSimpleReminder(args: { + context: ToolRuntimeContext; + input: { + constraints?: string[]; + next_run_at_iso?: string; + next_run_at_text?: string; + recurrence_frequency?: unknown; + recurrence_interval?: number; + recurrence_weekdays?: number[]; + source_context?: string[]; + }; +}): boolean { + const userText = args.context.userText; + if ( + !userText || + !/\bremind\s+(me|us|this channel|the channel)\b/i.test(userText) + ) { + return false; + } + if (/\b(every|daily|weekly|monthly|yearly|each)\b/i.test(userText)) { + return false; + } + return ( + Boolean(args.input.next_run_at_iso || args.input.next_run_at_text) && + args.input.recurrence_frequency === undefined && + args.input.recurrence_interval === undefined && + args.input.recurrence_weekdays === undefined && + (args.input.constraints?.length ?? 0) === 0 && + (args.input.source_context?.length ?? 0) === 0 + ); +} + +/** Create a tool that stores a scheduled task for the active Slack context. */ +export function createSlackScheduleCreateTaskTool(context: ToolRuntimeContext) { + return tool({ + description: + "Create a scheduled Junior task in the active Slack conversation.", + promptSnippet: "create future or recurring Junior work here", + promptGuidelines: [ + "Use only when the user explicitly asks Junior to do work later or on a recurring cadence.", + ACTIVE_DESTINATION_GUIDELINE, + 'For an explicit simple one-off reminder request such as "remind me in 10 minutes to stretch", call immediately without asking for confirmation.', + "For recurring schedules or non-reminder scheduled work, show the normalized task, cadence, timezone, destination, and next run; call only after explicit user confirmation.", + "Provide exactly one of next_run_at_iso or next_run_at_text; omit timezone to use the configured default.", + "Use recurrence_frequency only for recurring schedules.", + ], + inputSchema: Type.Object({ + confirmed_by_user: Type.Optional( + Type.Boolean({ + description: + "Set true only after the user explicitly confirms the normalized task, cadence, timezone, destination, and next run. Omit or set false for explicit simple one-off reminders.", + }), + ), + title: Type.String({ minLength: 1, maxLength: 120 }), + objective: Type.String({ minLength: 1, maxLength: 1000 }), + instructions: Type.Array(Type.String({ minLength: 1, maxLength: 1000 }), { + minItems: 1, + maxItems: 12, + }), + expected_output: Type.Optional( + Type.String({ minLength: 1, maxLength: 1000 }), + ), + schedule_description: Type.String({ minLength: 1, maxLength: 300 }), + timezone: Type.Optional(Type.String({ minLength: 1, maxLength: 80 })), + next_run_at_iso: Type.Optional( + Type.String({ + minLength: 1, + description: + "Exact next run time as an ISO timestamp, computed from the user's requested schedule.", + }), + ), + next_run_at_text: Type.Optional( + Type.String({ + minLength: 1, + maxLength: 120, + description: + 'Supported relative one-off text such as "tomorrow at 9am" in the supplied timezone.', + }), + ), + recurrence_frequency: Type.Optional( + Type.Union( + [ + Type.Literal("daily"), + Type.Literal("weekly"), + Type.Literal("monthly"), + Type.Literal("yearly"), + ], + { + description: + "Calendar recurrence for recurring tasks. Omit for exact one-off calendar dates.", + }, + ), + ), + recurrence_interval: Type.Optional( + Type.Integer({ + minimum: 1, + maximum: 100, + description: + "Calendar interval. For example, 2 with weekly means every two weeks.", + }), + ), + recurrence_weekdays: Type.Optional( + Type.Array(Type.Integer({ minimum: 0, maximum: 6 }), { + maxItems: 7, + description: + "For weekly schedules only. Sunday is 0, Monday is 1, Saturday is 6.", + }), + ), + constraints: Type.Optional( + Type.Array(Type.String({ minLength: 1, maxLength: 1000 }), { + maxItems: 12, + }), + ), + source_context: Type.Optional( + Type.Array(Type.String({ minLength: 1, maxLength: 1000 }), { + maxItems: 12, + }), + ), + }), + execute: async (input) => { + const destination = requireActiveDestination(context); + if (!destination.ok) return destination; + const requester = requireRequester(context); + if (!requester.ok) return requester; + if ( + input.confirmed_by_user !== true && + !canCreateUnconfirmedSimpleReminder({ context, input }) + ) { + return { + ok: false, + error: + "Scheduled tasks require explicit user confirmation before they are created, except simple one-off reminders requested directly by the user. Draft the task contract for the user to confirm.", + }; + } + + const nowMs = Date.now(); + const timezone = input.timezone ?? getDefaultScheduleTimezone(); + if (hasConflictingNextRunInputs(input)) { + return { + ok: false, + error: "Provide only one of next_run_at_iso or next_run_at_text.", + }; + } + if (!isValidTimeZone(timezone)) { + return { + ok: false, + error: "timezone must be a valid IANA time zone.", + }; + } + const nextRunAtMs = parseNextRunAtMs({ + input, + nowMs, + timezone, + }); + if (!nextRunAtMs) { + return { + ok: false, + error: + 'Provide next_run_at_iso as a valid ISO timestamp or next_run_at_text such as "tomorrow at 9am".', + }; + } + const recurrence = buildRecurrence({ + input, + nextRunAtMs, + timezone, + }); + if (!recurrence.ok) { + return recurrence; + } + + const task: ScheduledTask = { + id: buildTaskId(), + createdAtMs: nowMs, + updatedAtMs: nowMs, + createdBy: requester.requester, + destination: destination.destination, + executionActor: SCHEDULED_TASK_SYSTEM_ACTOR, + nextRunAtMs, + originalRequest: context.userText, + schedule: { + description: input.schedule_description, + timezone, + kind: recurrence.recurrence ? "recurring" : "one_off", + recurrence: recurrence.recurrence, + }, + status: "active", + task: { + title: input.title, + objective: input.objective, + instructions: input.instructions, + expectedOutput: input.expected_output, + constraints: input.constraints, + sourceContext: input.source_context, + }, + version: 1, + }; + + await createStateSchedulerStore().saveTask(task); + return { + ok: true, + task: compactTask(task), + }; + }, + }); +} + +/** Create a tool that lists scheduled tasks for the active Slack destination. */ +export function createSlackScheduleListTasksTool(context: ToolRuntimeContext) { + return tool({ + description: + "List scheduled Junior tasks for the active Slack conversation.", + promptSnippet: "list schedules for this Slack destination", + promptGuidelines: [ + "Use when the user asks what is scheduled here or needs task IDs before editing, deleting, or running schedules.", + ACTIVE_DESTINATION_GUIDELINE, + ], + annotations: { readOnlyHint: true, destructiveHint: false }, + inputSchema: Type.Object({}), + execute: async () => { + const destination = requireActiveDestination(context); + if (!destination.ok) return destination; + + const tasks = await createStateSchedulerStore().listTasksForTeam( + destination.destination.teamId, + ); + const matching = tasks.filter((task) => + sameDestination(task, destination.destination), + ); + const visible = matching.slice(0, MAX_LISTED_TASKS).map(compactTask); + + return { + ok: true, + tasks: visible, + truncated: matching.length > visible.length, + }; + }, + }); +} + +/** Create a tool that edits a scheduled task in the active Slack destination. */ +export function createSlackScheduleUpdateTaskTool(context: ToolRuntimeContext) { + return tool({ + description: "Edit, pause, resume, or reschedule a Junior scheduled task.", + promptSnippet: "edit/pause/resume one schedule in this Slack destination", + promptGuidelines: [ + ACTIVE_TASK_ID_GUIDELINE, + ACTIVE_DESTINATION_GUIDELINE, + "Do not move scheduled tasks across conversations.", + "Provide exactly one of next_run_at_iso or next_run_at_text when changing the next run.", + "Set status to active, paused, or blocked when the user asks to resume, pause, or block a task.", + ], + inputSchema: Type.Object({ + task_id: Type.String({ minLength: 1 }), + title: Type.Optional(Type.String({ minLength: 1, maxLength: 120 })), + objective: Type.Optional(Type.String({ minLength: 1, maxLength: 1000 })), + instructions: Type.Optional( + Type.Array(Type.String({ minLength: 1, maxLength: 1000 }), { + minItems: 1, + maxItems: 12, + }), + ), + expected_output: Type.Optional( + Type.String({ minLength: 1, maxLength: 1000 }), + ), + schedule_description: Type.Optional( + Type.String({ minLength: 1, maxLength: 300 }), + ), + timezone: Type.Optional(Type.String({ minLength: 1, maxLength: 80 })), + next_run_at_iso: Type.Optional(Type.String({ minLength: 1 })), + next_run_at_text: Type.Optional( + Type.String({ minLength: 1, maxLength: 120 }), + ), + recurrence_frequency: Type.Optional( + Type.Union([ + Type.Literal("daily"), + Type.Literal("weekly"), + Type.Literal("monthly"), + Type.Literal("yearly"), + Type.Null(), + ]), + ), + recurrence_interval: Type.Optional( + Type.Integer({ minimum: 1, maximum: 100 }), + ), + recurrence_weekdays: Type.Optional( + Type.Array(Type.Integer({ minimum: 0, maximum: 6 }), { maxItems: 7 }), + ), + status: Type.Optional( + Type.Union([ + Type.Literal("active"), + Type.Literal("paused"), + Type.Literal("blocked"), + ]), + ), + constraints: Type.Optional( + Type.Array(Type.String({ minLength: 1, maxLength: 1000 }), { + maxItems: 12, + }), + ), + source_context: Type.Optional( + Type.Array(Type.String({ minLength: 1, maxLength: 1000 }), { + maxItems: 12, + }), + ), + }), + execute: async (input) => { + const lookup = await getWritableTask({ + context, + taskId: input.task_id, + }); + if (!lookup.ok) return lookup; + + const timezone = input.timezone ?? lookup.task.schedule.timezone; + if (hasConflictingNextRunInputs(input)) { + return { + ok: false, + error: "Provide only one of next_run_at_iso or next_run_at_text.", + }; + } + if (!isValidTimeZone(timezone)) { + return { + ok: false, + error: "timezone must be a valid IANA time zone.", + }; + } + const parsedNextRunAtMs = parseNextRunAtMs({ + input, + nowMs: Date.now(), + timezone, + }); + const nextRunAtMs = + input.next_run_at_iso || input.next_run_at_text + ? parsedNextRunAtMs + : lookup.task.nextRunAtMs; + if ((input.next_run_at_iso || input.next_run_at_text) && !nextRunAtMs) { + return { + ok: false, + error: + 'Provide next_run_at_iso as a valid ISO timestamp or next_run_at_text such as "tomorrow at 9am".', + }; + } + + const status = normalizeStatus(input.status); + if (input.status && !status) { + return { + ok: false, + error: "status must be active, paused, or blocked.", + }; + } + if (status === "active" && !nextRunAtMs) { + return { + ok: false, + error: + "Active scheduled tasks require next_run_at_iso or next_run_at_text when no next run is stored.", + }; + } + const recurrence = shouldRebuildRecurrence(input) + ? buildRecurrence({ + existing: lookup.task.schedule.recurrence, + input, + nextRunAtMs, + timezone, + }) + : { ok: true as const, recurrence: lookup.task.schedule.recurrence }; + if (!recurrence.ok) { + return recurrence; + } + const nextStatus = status ?? lookup.task.status; + + const next: ScheduledTask = { + ...lookup.task, + updatedAtMs: Date.now(), + nextRunAtMs, + runNowAtMs: + nextStatus === "active" ? lookup.task.runNowAtMs : undefined, + status: nextStatus, + statusReason: + nextStatus === "blocked" ? lookup.task.statusReason : undefined, + schedule: { + ...lookup.task.schedule, + description: + input.schedule_description ?? lookup.task.schedule.description, + timezone, + kind: recurrence.recurrence ? "recurring" : "one_off", + recurrence: recurrence.recurrence, + }, + task: { + ...lookup.task.task, + title: input.title ?? lookup.task.task.title, + objective: input.objective ?? lookup.task.task.objective, + instructions: input.instructions ?? lookup.task.task.instructions, + expectedOutput: + input.expected_output ?? lookup.task.task.expectedOutput, + constraints: input.constraints ?? lookup.task.task.constraints, + sourceContext: input.source_context ?? lookup.task.task.sourceContext, + }, + version: lookup.task.version + 1, + }; + + await createStateSchedulerStore().saveTask(next); + return { + ok: true, + task: compactTask(next), + }; + }, + }); +} + +/** Create a tool that removes a scheduled task from the active Slack destination. */ +export function createSlackScheduleDeleteTaskTool(context: ToolRuntimeContext) { + return tool({ + description: + "Delete a Junior scheduled task from the active Slack conversation.", + promptSnippet: "delete one schedule from this Slack destination", + promptGuidelines: [ACTIVE_TASK_ID_GUIDELINE, ACTIVE_DESTINATION_GUIDELINE], + inputSchema: Type.Object({ + task_id: Type.String({ minLength: 1 }), + }), + execute: async ({ task_id }) => { + const lookup = await getWritableTask({ context, taskId: task_id }); + if (!lookup.ok) return lookup; + + const next: ScheduledTask = { + ...lookup.task, + updatedAtMs: Date.now(), + status: "deleted", + nextRunAtMs: undefined, + runNowAtMs: undefined, + version: lookup.task.version + 1, + }; + + await createStateSchedulerStore().saveTask(next); + return { + ok: true, + task: compactTask(next), + }; + }, + }); +} + +/** Create a tool that marks an existing scheduled task due immediately. */ +export function createSlackScheduleRunTaskNowTool(context: ToolRuntimeContext) { + return tool({ + description: + "Queue an active Junior scheduled task to run as soon as possible.", + promptSnippet: "run one active schedule now without changing its cadence", + promptGuidelines: [ + ACTIVE_TASK_ID_GUIDELINE, + ACTIVE_DESTINATION_GUIDELINE, + "Use when the user asks to run an existing scheduled task now; do not rewrite the stored calendar cadence.", + ], + inputSchema: Type.Object({ + task_id: Type.String({ minLength: 1 }), + }), + execute: async ({ task_id }) => { + const lookup = await getWritableTask({ context, taskId: task_id }); + if (!lookup.ok) return lookup; + if (lookup.task.status !== "active") { + return { + ok: false, + error: + "Scheduled task must be active before it can be run now. Resume the task first if you want it to run.", + }; + } + + const nowMs = Date.now(); + const next: ScheduledTask = { + ...lookup.task, + updatedAtMs: nowMs, + runNowAtMs: nowMs, + version: lookup.task.version + 1, + }; + + await createStateSchedulerStore().saveTask(next); + return { + ok: true, + task: compactTask(next), + }; + }, + }); +} diff --git a/packages/junior/src/chat/tools/types.ts b/packages/junior/src/chat/tools/types.ts index a7ef70ae..e9aed3be 100644 --- a/packages/junior/src/chat/tools/types.ts +++ b/packages/junior/src/chat/tools/types.ts @@ -46,6 +46,12 @@ export interface ToolRuntimeContext { advisor?: AdvisorToolRuntimeContext; channelId?: string; channelCapabilities: ChannelCapabilities; + requester?: { + userId?: string; + userName?: string; + fullName?: string; + }; + teamId?: string; messageTs?: string; threadTs?: string; userText?: string; diff --git a/packages/junior/src/handlers/agent-dispatch.ts b/packages/junior/src/handlers/agent-dispatch.ts new file mode 100644 index 00000000..e5768874 --- /dev/null +++ b/packages/junior/src/handlers/agent-dispatch.ts @@ -0,0 +1,28 @@ +import { logException } from "@/chat/logging"; +import { runAgentDispatchSlice } from "@/chat/agent-dispatch/runner"; +import { verifyDispatchCallbackRequest } from "@/chat/agent-dispatch/signing"; +import type { WaitUntilFn } from "@/handlers/types"; + +/** Handle the authenticated internal agent-dispatch callback. */ +export async function POST( + request: Request, + waitUntil: WaitUntilFn, +): Promise { + const payload = await verifyDispatchCallbackRequest(request); + if (!payload) { + return new Response("Unauthorized", { status: 401 }); + } + + waitUntil(() => + runAgentDispatchSlice(payload).catch((error) => { + logException( + error, + "agent_dispatch_handler_failed", + {}, + { "app.dispatch.id": payload.id }, + "Agent dispatch handler failed", + ); + }), + ); + return new Response("Accepted", { status: 202 }); +} diff --git a/packages/junior/src/handlers/diagnostics-dashboard.ts b/packages/junior/src/handlers/diagnostics-dashboard.ts index 5756e0d3..753cd742 100644 --- a/packages/junior/src/handlers/diagnostics-dashboard.ts +++ b/packages/junior/src/handlers/diagnostics-dashboard.ts @@ -126,6 +126,8 @@ export async function GET(): Promise { { method: "GET", path: "/api/info" }, { method: "GET", path: "/api/oauth/callback/mcp/:provider" }, { method: "GET", path: "/api/oauth/callback/:provider" }, + { method: "POST", path: "/api/internal/agent-dispatch" }, + { method: "GET", path: "/api/internal/heartbeat" }, { method: "POST", path: "/api/webhooks/:platform" }, ]; html += `\n
diff --git a/packages/junior/src/handlers/heartbeat.ts b/packages/junior/src/handlers/heartbeat.ts new file mode 100644 index 00000000..f4e3ef1f --- /dev/null +++ b/packages/junior/src/handlers/heartbeat.ts @@ -0,0 +1,51 @@ +import { timingSafeEqual } from "node:crypto"; +import { runHeartbeat } from "@/chat/agent-dispatch/heartbeat"; +import { logException } from "@/chat/logging"; +import type { WaitUntilFn } from "@/handlers/types"; + +function getHeartbeatSecret(): string | undefined { + return ( + process.env.JUNIOR_SCHEDULER_SECRET?.trim() || + process.env.CRON_SECRET?.trim() + ); +} + +function verifyHeartbeatRequest(request: Request): boolean { + const secret = getHeartbeatSecret(); + if (!secret) { + return false; + } + + const authorization = request.headers.get("authorization")?.trim(); + if (!authorization?.startsWith("Bearer ")) { + return false; + } + const actual = Buffer.from(authorization.slice("Bearer ".length)); + const expected = Buffer.from(secret); + return actual.length === expected.length && timingSafeEqual(actual, expected); +} + +/** Handle the authenticated internal heartbeat. */ +export async function GET( + request: Request, + waitUntil: WaitUntilFn, +): Promise { + if (!verifyHeartbeatRequest(request)) { + return new Response("Unauthorized", { status: 401 }); + } + + const nowMs = Date.now(); + waitUntil(() => + runHeartbeat({ nowMs }).catch((error) => { + logException( + error, + "heartbeat_failed", + {}, + { "app.heartbeat.now_ms": nowMs }, + "Heartbeat failed", + ); + }), + ); + + return new Response("Accepted", { status: 202 }); +} diff --git a/packages/junior/src/handlers/sandbox-egress-proxy.ts b/packages/junior/src/handlers/sandbox-egress-proxy.ts index 186eaf85..f88d677b 100644 --- a/packages/junior/src/handlers/sandbox-egress-proxy.ts +++ b/packages/junior/src/handlers/sandbox-egress-proxy.ts @@ -1,11 +1,21 @@ import { isSandboxEgressForwardedRequest, proxySandboxEgressRequest, + type SandboxEgressHttpInterceptor, } from "@/chat/sandbox/egress-proxy"; +interface SandboxEgressProxyOptions { + interceptHttp?: SandboxEgressHttpInterceptor; +} + /** Handles Vercel Sandbox firewall egress proxy requests. */ -export async function ALL(request: Request): Promise { - return await proxySandboxEgressRequest(request); +export async function ALL( + request: Request, + options: SandboxEgressProxyOptions = {}, +): Promise { + return await proxySandboxEgressRequest(request, { + interceptHttp: options.interceptHttp, + }); } /** Return whether a request should be routed through sandbox egress proxying. */ diff --git a/packages/junior/src/vercel.ts b/packages/junior/src/vercel.ts index cefa2743..4fcc62b0 100644 --- a/packages/junior/src/vercel.ts +++ b/packages/junior/src/vercel.ts @@ -9,6 +9,12 @@ export function juniorVercelConfig(options: JuniorVercelConfigOptions = {}) { const config: Record = { framework: "nitro", + crons: [ + { + path: "/api/internal/heartbeat", + schedule: "* * * * *", + }, + ], }; if (buildCommand !== null) { diff --git a/packages/junior/tests/fixtures/plugins/eval-oauth/plugin.yaml b/packages/junior/tests/fixtures/plugins/eval-oauth/plugin.yaml index 6b39ddee..30c97f1c 100644 --- a/packages/junior/tests/fixtures/plugins/eval-oauth/plugin.yaml +++ b/packages/junior/tests/fixtures/plugins/eval-oauth/plugin.yaml @@ -7,12 +7,12 @@ capabilities: credentials: type: oauth-bearer domains: - - eval-oauth.example.test + - example.com auth-token-env: EVAL_OAUTH_ACCESS_TOKEN oauth: client-id-env: EVAL_OAUTH_CLIENT_ID client-secret-env: EVAL_OAUTH_CLIENT_SECRET - authorize-endpoint: https://eval-oauth.example.test/oauth/authorize - token-endpoint: https://eval-oauth.example.test/oauth/token + authorize-endpoint: https://example.com/junior-eval-oauth/oauth/authorize + token-endpoint: https://example.com/junior-eval-oauth/oauth/token scope: read diff --git a/packages/junior/tests/integration/agent-dispatch-runner.test.ts b/packages/junior/tests/integration/agent-dispatch-runner.test.ts new file mode 100644 index 00000000..6e21446f --- /dev/null +++ b/packages/junior/tests/integration/agent-dispatch-runner.test.ts @@ -0,0 +1,213 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + createOrGetDispatch, + getDispatchRecord, +} from "@/chat/agent-dispatch/store"; +import { runAgentDispatchSlice } from "@/chat/agent-dispatch/runner"; +import { getPersistedThreadState } from "@/chat/runtime/thread-state"; +import { RetryableTurnError } from "@/chat/runtime/turn"; +import { disconnectStateAdapter, getStateAdapter } from "@/chat/state/adapter"; +import type { AssistantReply } from "@/chat/respond"; +import { chatPostMessageOk } from "../fixtures/slack/factories/api"; +import { + getCapturedSlackApiCalls, + queueSlackApiResponse, +} from "../msw/handlers/slack-api"; + +vi.hoisted(() => { + process.env.JUNIOR_STATE_ADAPTER = "memory"; +}); + +function createReply(): AssistantReply { + return { + text: "Dispatch delivered.", + deliveryMode: "thread", + deliveryPlan: { + mode: "thread", + postThreadText: true, + attachFiles: "none", + }, + diagnostics: { + assistantMessageCount: 1, + durationMs: 1234, + modelId: "test-model", + outcome: "success", + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usedPrimaryText: true, + }, + }; +} + +describe("agent dispatch runner", () => { + beforeEach(async () => { + await disconnectStateAdapter(); + }); + + afterEach(async () => { + await disconnectStateAdapter(); + }); + + it("runs a system dispatch and persists Slack delivery", async () => { + queueSlackApiResponse("chat.postMessage", { + body: chatPostMessageOk({ + channel: "C123", + ts: "1700000000.000001", + }), + }); + const created = await createOrGetDispatch({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + options: { + idempotencyKey: "run-1", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + metadata: { runId: "run-1" }, + }, + }); + const generateAssistantReply = vi.fn(async (_input, context) => { + expect(context.requester).toBeUndefined(); + expect(context.authorizationFlowMode).toBe("disabled"); + expect(context.correlation).toMatchObject({ + conversationId: "slack:T123:C123", + channelId: "C123", + teamId: "T123", + actorType: "system", + actorId: "scheduler", + }); + return createReply(); + }); + + await runAgentDispatchSlice( + { + id: created.record.id, + expectedVersion: created.record.version, + }, + { generateAssistantReply }, + ); + + await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ + status: "completed", + resultMessageTs: "1700000000.000001", + }); + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + text: "Dispatch delivered.", + }), + }), + ]); + await expect( + getPersistedThreadState("slack:T123:C123"), + ).resolves.toMatchObject({ + conversation: { + messages: expect.arrayContaining([ + expect.objectContaining({ + id: `dispatch:${created.record.id}:user`, + author: expect.objectContaining({ + userName: "system:scheduler", + isBot: true, + }), + }), + expect.objectContaining({ + id: `dispatch:${created.record.id}:assistant`, + meta: expect.objectContaining({ + slackTs: "1700000000.000001", + replied: true, + }), + }), + ]), + }, + }); + }); + + it("persists timeout resume checkpoint state before scheduling the next slice", async () => { + const created = await createOrGetDispatch({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + options: { + idempotencyKey: "run-timeout", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }, + }); + const scheduleCallback = vi.fn(async () => undefined); + const generateAssistantReply = vi.fn(async () => { + throw new RetryableTurnError("turn_timeout_resume", "slice timed out", { + checkpointVersion: 7, + sliceId: 2, + }); + }); + + await runAgentDispatchSlice( + { + id: created.record.id, + expectedVersion: created.record.version, + }, + { generateAssistantReply, scheduleCallback }, + ); + + await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ + status: "awaiting_resume", + resumeCheckpointVersion: 7, + }); + expect(scheduleCallback).toHaveBeenCalledWith({ + id: created.record.id, + expectedVersion: expect.any(Number), + }); + }); + + it("does not burn an attempt when the destination conversation is busy", async () => { + const created = await createOrGetDispatch({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + options: { + idempotencyKey: "run-busy", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }, + }); + const state = getStateAdapter(); + await state.connect(); + const lock = await state.acquireLock("slack:T123:C123", 5 * 60 * 1000); + expect(lock).toBeTruthy(); + + try { + await runAgentDispatchSlice( + { + id: created.record.id, + expectedVersion: created.record.version, + }, + { + generateAssistantReply: async () => { + throw new Error("busy conversation should not run"); + }, + }, + ); + } finally { + if (lock) { + await state.releaseLock(lock); + } + } + + await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ + attempt: 0, + errorMessage: "Destination conversation is busy", + status: "pending", + }); + }); +}); diff --git a/packages/junior/tests/integration/heartbeat.test.ts b/packages/junior/tests/integration/heartbeat.test.ts new file mode 100644 index 00000000..d320bb6a --- /dev/null +++ b/packages/junior/tests/integration/heartbeat.test.ts @@ -0,0 +1,677 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { defineJuniorPlugin } from "@sentry/junior-plugin-api"; +import { createHeartbeatContext } from "@/chat/agent-dispatch/context"; +import { recoverStaleDispatches } from "@/chat/agent-dispatch/heartbeat"; +import { createSchedulerPlugin } from "@/chat/scheduler/plugin"; +import { createStateSchedulerStore } from "@/chat/scheduler/store"; +import type { ScheduledTask } from "@/chat/scheduler/types"; +import { + createOrGetDispatch, + getDispatchRecord, + getDispatchStorageKey, + listIncompleteDispatchIds, + updateDispatchRecord, + withDispatchLock, +} from "@/chat/agent-dispatch/store"; +import type { DispatchRecord } from "@/chat/agent-dispatch/types"; +import { disconnectStateAdapter, getStateAdapter } from "@/chat/state/adapter"; +import { setAgentPlugins } from "@/chat/plugins/agent-hooks"; +import { GET as heartbeat } from "@/handlers/heartbeat"; +import type { WaitUntilFn } from "@/handlers/types"; + +vi.hoisted(() => { + process.env.JUNIOR_STATE_ADAPTER = "memory"; +}); + +const TEST_NOW_MS = Date.parse("2026-05-26T12:05:00.000Z"); +const TEST_RUN_AT_MS = Date.parse("2026-05-26T12:00:00.000Z"); + +function collectWaitUntil(tasks: Promise[]): WaitUntilFn { + return (task) => { + tasks.push(typeof task === "function" ? task() : task); + }; +} + +function createTask(overrides: Partial = {}): ScheduledTask { + const nextRunAtMs = TEST_RUN_AT_MS; + return { + id: "sched_plugin_1", + createdAtMs: nextRunAtMs, + createdBy: { slackUserId: "U123" }, + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + nextRunAtMs, + schedule: { + description: "Once at noon", + kind: "one_off", + timezone: "UTC", + }, + status: "active", + task: { + title: "Digest", + objective: "Post a digest.", + instructions: ["Summarize the latest state."], + }, + updatedAtMs: nextRunAtMs, + version: 1, + ...overrides, + }; +} + +function createDailyTask( + overrides: Partial = {}, +): ScheduledTask { + const nextRunAtMs = Date.parse("2026-05-24T12:00:00.000Z"); + return createTask({ + id: "sched_plugin_daily", + createdAtMs: nextRunAtMs, + nextRunAtMs, + schedule: { + description: "Daily at noon UTC", + kind: "recurring", + timezone: "UTC", + recurrence: { + frequency: "daily", + interval: 1, + startDate: "2026-05-24", + time: { + hour: 12, + minute: 0, + }, + }, + }, + updatedAtMs: nextRunAtMs, + ...overrides, + }); +} + +describe("trusted plugin heartbeat", () => { + const originalFetch = global.fetch; + + beforeEach(async () => { + vi.useFakeTimers({ now: TEST_NOW_MS }); + process.env.JUNIOR_SCHEDULER_SECRET = "heartbeat-secret"; + process.env.JUNIOR_BASE_URL = "https://junior.example.com"; + process.env.JUNIOR_SECRET = "dispatch-secret"; + setAgentPlugins([]); + await disconnectStateAdapter(); + }); + + afterEach(async () => { + global.fetch = originalFetch; + setAgentPlugins([]); + await disconnectStateAdapter(); + delete process.env.JUNIOR_SCHEDULER_SECRET; + delete process.env.CRON_SECRET; + delete process.env.JUNIOR_BASE_URL; + delete process.env.JUNIOR_SECRET; + vi.restoreAllMocks(); + vi.useRealTimers(); + }); + + it("rejects unauthenticated heartbeat requests", async () => { + const waitUntilTasks: Promise[] = []; + const response = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat"), + collectWaitUntil(waitUntilTasks), + ); + + expect(response.status).toBe(401); + expect(waitUntilTasks).toHaveLength(0); + }); + + it("runs trusted plugin heartbeat hooks", async () => { + const seen: number[] = []; + setAgentPlugins([ + defineJuniorPlugin({ + name: "scheduler", + hooks: { + heartbeat(ctx) { + seen.push(ctx.nowMs); + }, + }, + }), + ]); + const waitUntilTasks: Promise[] = []; + const response = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }), + collectWaitUntil(waitUntilTasks), + ); + + expect(response.status).toBe(202); + await Promise.all(waitUntilTasks); + expect(seen).toHaveLength(1); + }); + + it("scopes dispatch lookup to the plugin that created it", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + + const schedulerCtx = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + const result = await schedulerCtx.agent.dispatch({ + idempotencyKey: "run-1", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + metadata: { runId: "run-1" }, + }); + + await expect(schedulerCtx.agent.get(result.id)).resolves.toEqual({ + id: result.id, + status: "pending", + }); + await expect( + createHeartbeatContext({ + plugin: "other-plugin", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }).agent.get(result.id), + ).resolves.toBeUndefined(); + + await expect(getDispatchRecord(result.id)).resolves.toMatchObject({ + input: "Run the scheduled task.", + destination: { channelId: "C123" }, + metadata: { runId: "run-1" }, + }); + }); + + it("keeps plugin state isolated when plugin names and keys contain delimiters", async () => { + const first = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + const second = createHeartbeatContext({ + plugin: "scheduler:run", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + + await first.state.set("run:1", "first"); + await second.state.set("1", "second"); + + await expect(first.state.get("run:1")).resolves.toBe("first"); + await expect(second.state.get("1")).resolves.toBe("second"); + }); + + it("bounds dispatch fanout from one heartbeat context", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + + const ctx = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + + for (let index = 0; index < 25; index += 1) { + await ctx.agent.dispatch({ + idempotencyKey: `run-${index}`, + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }); + } + + await expect( + ctx.agent.dispatch({ + idempotencyKey: "run-over-limit", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }), + ).rejects.toThrow("Plugin heartbeat exceeded the dispatch limit"); + }); + + it("does not count invalid dispatch requests against heartbeat fanout", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + + const ctx = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + + for (let index = 0; index < 25; index += 1) { + await expect( + ctx.agent.dispatch({ + idempotencyKey: `invalid-${index}`, + destination: { + platform: "slack", + teamId: "not-a-team", + channelId: "C123", + }, + input: "Run the scheduled task.", + }), + ).rejects.toThrow("Dispatch destination teamId must be a Slack team id"); + } + + await expect( + ctx.agent.dispatch({ + idempotencyKey: "valid-after-invalid", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }), + ).resolves.toMatchObject({ status: "created" }); + }); + + it("fails stale dispatches that exceed retry attempts", async () => { + const created = await createOrGetDispatch({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + options: { + idempotencyKey: "run-exhausted", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }, + }); + await withDispatchLock(created.record.id, async (state) => { + const record = await state.get( + getDispatchStorageKey(created.record.id), + ); + if (!record) { + throw new Error("Expected dispatch record to exist"); + } + await updateDispatchRecord(state, { + ...record, + attempt: record.maxAttempts, + lastCallbackAtMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + }); + + await expect( + recoverStaleDispatches({ + nowMs: Date.parse("2026-05-26T12:05:00.000Z"), + }), + ).resolves.toBe(0); + await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ + status: "failed", + errorMessage: "Dispatch exceeded retry attempts.", + }); + }); + + it("removes terminal dispatches from the recovery index", async () => { + const created = await createOrGetDispatch({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + options: { + idempotencyKey: "run-terminal-index", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }, + }); + + await expect(listIncompleteDispatchIds()).resolves.toContain( + created.record.id, + ); + + await withDispatchLock(created.record.id, async (state) => { + const record = await state.get( + getDispatchStorageKey(created.record.id), + ); + if (!record) { + throw new Error("missing dispatch record"); + } + await updateDispatchRecord(state, { + ...record, + status: "completed", + }); + }); + + await expect(listIncompleteDispatchIds()).resolves.not.toContain( + created.record.id, + ); + }); + + it("does not fail an active leased dispatch that reached max attempts", async () => { + const created = await createOrGetDispatch({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + options: { + idempotencyKey: "run-active-max-attempts", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }, + }); + await withDispatchLock(created.record.id, async (state) => { + const record = await state.get( + getDispatchStorageKey(created.record.id), + ); + if (!record) { + throw new Error("Expected dispatch record to exist"); + } + await updateDispatchRecord(state, { + ...record, + attempt: record.maxAttempts, + lastCallbackAtMs: Date.parse("2026-05-26T12:00:00.000Z"), + leaseExpiresAtMs: Date.parse("2026-05-26T12:10:00.000Z"), + status: "running", + }); + }); + + await expect( + recoverStaleDispatches({ + nowMs: Date.parse("2026-05-26T12:05:00.000Z"), + }), + ).resolves.toBe(0); + await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ + status: "running", + attempt: created.record.maxAttempts, + }); + }); + + it("dispatches and reconciles scheduled runs from the scheduler plugin", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([createSchedulerPlugin()]); + const store = createStateSchedulerStore(); + await store.saveTask(createTask()); + + const firstWaitUntilTasks: Promise[] = []; + const firstResponse = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }), + collectWaitUntil(firstWaitUntilTasks), + ); + expect(firstResponse.status).toBe(202); + await Promise.all(firstWaitUntilTasks); + + const running = await store.getRun(`sched_plugin_1:${TEST_RUN_AT_MS}`); + expect(running).toMatchObject({ + status: "running", + dispatchId: expect.any(String), + }); + expect(fetchMock).toHaveBeenCalledTimes(1); + + await withDispatchLock(running!.dispatchId!, async (state) => { + const record = await state.get( + getDispatchStorageKey(running!.dispatchId!), + ); + if (!record) { + throw new Error("Expected dispatch record to exist"); + } + await updateDispatchRecord(state, { + ...record, + resultMessageTs: "1700000000.000001", + status: "completed", + }); + }); + + const secondWaitUntilTasks: Promise[] = []; + const secondResponse = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }), + collectWaitUntil(secondWaitUntilTasks), + ); + expect(secondResponse.status).toBe(202); + await Promise.all(secondWaitUntilTasks); + + await expect(store.getRun(running!.id)).resolves.toMatchObject({ + status: "completed", + resultMessageTs: "1700000000.000001", + }); + await expect(store.getTask("sched_plugin_1")).resolves.toMatchObject({ + lastRunAtMs: Date.parse("2026-05-26T12:00:00.000Z"), + status: "paused", + }); + }); + + it("fails scheduled runs when their dispatch record disappeared", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([createSchedulerPlugin()]); + const store = createStateSchedulerStore(); + await store.saveTask(createTask()); + + const firstWaitUntilTasks: Promise[] = []; + const firstResponse = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }), + collectWaitUntil(firstWaitUntilTasks), + ); + expect(firstResponse.status).toBe(202); + await Promise.all(firstWaitUntilTasks); + + const running = await store.getRun(`sched_plugin_1:${TEST_RUN_AT_MS}`); + expect(running).toMatchObject({ + status: "running", + dispatchId: expect.any(String), + }); + const state = getStateAdapter(); + await state.connect(); + await state.delete(getDispatchStorageKey(running!.dispatchId!)); + + const secondWaitUntilTasks: Promise[] = []; + const secondResponse = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }), + collectWaitUntil(secondWaitUntilTasks), + ); + expect(secondResponse.status).toBe(202); + await Promise.all(secondWaitUntilTasks); + + await expect(store.getRun(running!.id)).resolves.toMatchObject({ + status: "failed", + errorMessage: "Scheduled task dispatch record is missing.", + }); + await expect(store.getTask("sched_plugin_1")).resolves.toMatchObject({ + status: "paused", + }); + }); + + it("blocks malformed scheduled tasks without stopping the scheduler plugin heartbeat", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([createSchedulerPlugin()]); + const store = createStateSchedulerStore(); + await store.saveTask({ + ...createTask(), + id: "sched_plugin_malformed", + task: { + title: "Digest", + objective: undefined, + instructions: ["Summarize the latest state."], + } as unknown as ScheduledTask["task"], + }); + + const waitUntilTasks: Promise[] = []; + const response = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }), + collectWaitUntil(waitUntilTasks), + ); + expect(response.status).toBe(202); + await Promise.all(waitUntilTasks); + + await expect( + store.getRun(`sched_plugin_malformed:${TEST_RUN_AT_MS}`), + ).resolves.toMatchObject({ + status: "blocked", + errorMessage: expect.stringContaining( + "Scheduled task prompt could not be built", + ), + }); + await expect( + store.getTask("sched_plugin_malformed"), + ).resolves.toMatchObject({ + status: "blocked", + statusReason: expect.stringContaining( + "Scheduled task prompt could not be built", + ), + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("blocks scheduled runs with invalid dispatch destinations without stopping the heartbeat", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([createSchedulerPlugin()]); + const store = createStateSchedulerStore(); + await store.saveTask({ + ...createTask(), + id: "sched_plugin_bad_destination", + destination: { + platform: "slack", + teamId: "D_BAD_TEAM", + channelId: "D123", + }, + }); + + const waitUntilTasks: Promise[] = []; + const response = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }), + collectWaitUntil(waitUntilTasks), + ); + expect(response.status).toBe(202); + await Promise.all(waitUntilTasks); + + await expect( + store.getRun(`sched_plugin_bad_destination:${TEST_RUN_AT_MS}`), + ).resolves.toMatchObject({ + status: "blocked", + errorMessage: expect.stringContaining( + "Scheduled task dispatch could not be created", + ), + }); + await expect( + store.getTask("sched_plugin_bad_destination"), + ).resolves.toMatchObject({ + status: "blocked", + statusReason: expect.stringContaining( + "Scheduled task dispatch could not be created", + ), + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("skips old recurring occurrences and advances to the next future run", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([createSchedulerPlugin()]); + const store = createStateSchedulerStore(); + const task = createDailyTask(); + await store.saveTask(task); + + const waitUntilTasks: Promise[] = []; + const response = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }), + collectWaitUntil(waitUntilTasks), + ); + expect(response.status).toBe(202); + await Promise.all(waitUntilTasks); + + await expect( + store.getRun(`${task.id}:${task.nextRunAtMs}`), + ).resolves.toMatchObject({ + status: "skipped", + errorMessage: expect.stringContaining("more than 24 hours late"), + }); + await expect(store.getTask(task.id)).resolves.toMatchObject({ + status: "active", + nextRunAtMs: Date.parse("2026-05-27T12:00:00.000Z"), + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("dedupes equivalent old recurring tasks during heartbeat recovery", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([createSchedulerPlugin()]); + const store = createStateSchedulerStore(); + const first = createDailyTask({ + id: "sched_plugin_duplicate_a", + createdAtMs: Date.parse("2026-05-24T12:00:00.000Z"), + }); + const duplicate = createDailyTask({ + id: "sched_plugin_duplicate_b", + createdAtMs: Date.parse("2026-05-24T12:00:01.000Z"), + }); + await store.saveTask(first); + await store.saveTask(duplicate); + + const waitUntilTasks: Promise[] = []; + const response = await heartbeat( + new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }), + collectWaitUntil(waitUntilTasks), + ); + expect(response.status).toBe(202); + await Promise.all(waitUntilTasks); + + await expect( + store.getRun(`${duplicate.id}:${duplicate.nextRunAtMs}`), + ).resolves.toMatchObject({ + status: "skipped", + errorMessage: expect.stringContaining( + "Duplicate stale scheduled task was skipped", + ), + }); + await expect(store.getTask(first.id)).resolves.toMatchObject({ + status: "active", + nextRunAtMs: Date.parse("2026-05-27T12:00:00.000Z"), + }); + await expect(store.getTask(duplicate.id)).resolves.toMatchObject({ + status: "paused", + nextRunAtMs: undefined, + statusReason: expect.stringContaining(first.id), + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/junior/tests/integration/sandbox-egress-proxy.test.ts b/packages/junior/tests/integration/sandbox-egress-proxy.test.ts index c69313df..64b3213a 100644 --- a/packages/junior/tests/integration/sandbox-egress-proxy.test.ts +++ b/packages/junior/tests/integration/sandbox-egress-proxy.test.ts @@ -52,8 +52,11 @@ function forwardUrlFor(policy: unknown, host: string): string { } function proxiedRequest(input: { + body?: BodyInit; forwardURL: string; headers?: Record; + method?: string; + upstreamHost?: string; upstreamPath?: string; }): Request { const url = new URL(input.forwardURL); @@ -64,11 +67,16 @@ function proxiedRequest(input: { : ""; return new Request(url, { + method: input.method ?? "GET", + ...(input.body !== undefined ? { body: input.body } : {}), headers: { - "vercel-forwarded-host": PROVIDER_HOST, + "vercel-forwarded-host": input.upstreamHost ?? PROVIDER_HOST, "vercel-forwarded-path": upstreamPath, "vercel-forwarded-scheme": "https", "vercel-sandbox-oidc-token": "signed-vercel-token", + ...(input.body !== undefined + ? { "content-type": "application/json" } + : {}), ...(input.headers ?? {}), }, }); @@ -81,11 +89,10 @@ describe("sandbox egress proxy integration", () => { beforeEach(async () => { process.env = { ...ORIGINAL_ENV, - EVAL_ENABLE_TEST_CREDENTIALS: "1", - EVAL_TEST_CREDENTIAL_TOKEN: "integration-egress-token", JUNIOR_BASE_URL: BASE_URL, JUNIOR_SECRET: "integration-secret", JUNIOR_STATE_ADAPTER: "memory", + SANDBOX_EGRESS_TEST_TOKEN: "integration-egress-token", }; pluginApp = await createPluginAppFixture([FIXTURE_PLUGIN_ROOT]); modules = await loadModules(); @@ -139,4 +146,40 @@ describe("sandbox egress proxy integration", () => { await expect(response.text()).resolves.toBe("ok"); expect(upstreamFetch).toHaveBeenCalledTimes(1); }); + + it("intercepts credential-injected provider traffic before live forwarding", async () => { + const requesterToken = modules.session.createSandboxEgressRequesterToken({ + requesterId: REQUESTER_ID, + egressId: EGRESS_ID, + ttlMs: 60_000, + }); + const networkPolicy = modules.policy.buildSandboxEgressNetworkPolicy({ + requesterToken, + }); + const forwardURL = forwardUrlFor(networkPolicy, PROVIDER_HOST); + const upstreamFetch = vi.fn(); + const interceptHttp = vi.fn(async (_input: { request: Request }) => { + return Response.json({ ok: true }); + }); + + const response = await modules.proxy.proxySandboxEgressRequest( + proxiedRequest({ + forwardURL, + upstreamPath: "/v1/repos?query=first", + }), + { + fetch: upstreamFetch as typeof fetch, + interceptHttp, + verifyOidc: async () => ({ sandbox_id: EGRESS_ID }), + }, + ); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toEqual({ ok: true }); + expect(upstreamFetch).not.toHaveBeenCalled(); + expect(interceptHttp).toHaveBeenCalledTimes(1); + expect( + interceptHttp.mock.calls[0]?.[0].request.headers.get("authorization"), + ).toBe("Bearer integration-egress-token"); + }); }); diff --git a/packages/junior/tests/integration/slack-schedule-tools.test.ts b/packages/junior/tests/integration/slack-schedule-tools.test.ts new file mode 100644 index 00000000..975c74e3 --- /dev/null +++ b/packages/junior/tests/integration/slack-schedule-tools.test.ts @@ -0,0 +1,649 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { disconnectStateAdapter, getStateAdapter } from "@/chat/state/adapter"; +import { createStateSchedulerStore } from "@/chat/scheduler/store"; +import { + createSlackScheduleCreateTaskTool, + createSlackScheduleDeleteTaskTool, + createSlackScheduleListTasksTool, + createSlackScheduleRunTaskNowTool, + createSlackScheduleUpdateTaskTool, +} from "@/chat/tools/slack/schedule-tools"; +import type { ToolRuntimeContext } from "@/chat/tools/types"; + +vi.hoisted(() => { + process.env.JUNIOR_STATE_ADAPTER = "memory"; +}); + +const TEST_TEAM_ID = `TSCHEDULE${Date.now()}`; + +function createContext( + overrides: Partial = {}, +): ToolRuntimeContext { + return { + channelId: "C123", + teamId: TEST_TEAM_ID, + requester: { + userId: "U123", + userName: "dcramer", + fullName: "David Cramer", + }, + channelCapabilities: { + canCreateCanvas: true, + canPostToChannel: true, + canAddReactions: true, + }, + userText: "schedule this weekly", + sandbox: {} as ToolRuntimeContext["sandbox"], + ...overrides, + }; +} + +async function executeTool(tool: any, input: TInput) { + if (typeof tool?.execute !== "function") { + throw new Error("tool execute function missing"); + } + return await tool.execute(input, {} as any); +} + +async function createTask( + context = createContext(), + overrides: Record = {}, +) { + const tool = createSlackScheduleCreateTaskTool(context); + return await executeTool(tool, { + confirmed_by_user: true, + title: "Weekly issue digest", + objective: "Summarize open scheduler issues.", + instructions: ["Find open scheduler issues", "Post a concise summary"], + expected_output: "A short Slack digest", + schedule_description: "Every Monday at 9am", + timezone: "America/Los_Angeles", + next_run_at_iso: "2026-05-25T16:00:00.000Z", + recurrence_frequency: "weekly", + recurrence_weekdays: [1], + ...overrides, + }); +} + +describe("Slack schedule tools", () => { + beforeEach(async () => { + await disconnectStateAdapter(); + }); + + afterEach(async () => { + vi.useRealTimers(); + delete process.env.JUNIOR_TIMEZONE; + await disconnectStateAdapter(); + }); + + it("creates and lists tasks only for the active Slack destination", async () => { + const created = await createTask(); + expect(created).toMatchObject({ + ok: true, + task: { + status: "active", + title: "Weekly issue digest", + recurrence: { + frequency: "weekly", + interval: 1, + weekdays: [1], + }, + next_run_at: "2026-05-25T16:00:00.000Z", + }, + }); + + const listed = await executeTool( + createSlackScheduleListTasksTool(createContext()), + {}, + ); + expect(listed).toMatchObject({ + ok: true, + tasks: [ + { + title: "Weekly issue digest", + schedule: "Every Monday at 9am", + }, + ], + }); + + const sameChannelOtherThread = await executeTool( + createSlackScheduleListTasksTool( + createContext({ threadTs: "1700000999.000000" }), + ), + {}, + ); + expect(sameChannelOtherThread).toMatchObject({ + ok: true, + tasks: [ + { + title: "Weekly issue digest", + schedule: "Every Monday at 9am", + }, + ], + }); + }); + + it("requires explicit confirmation before creating a task", async () => { + const result = await executeTool( + createSlackScheduleCreateTaskTool(createContext()), + { + title: "Weekly issue digest", + objective: "Summarize open scheduler issues.", + instructions: ["Find open scheduler issues", "Post a concise summary"], + schedule_description: "Every Monday at 9am", + timezone: "America/Los_Angeles", + next_run_at_iso: "2026-05-25T16:00:00.000Z", + recurrence_frequency: "weekly", + recurrence_weekdays: [1], + }, + ); + + expect(result).toMatchObject({ + ok: false, + error: + "Scheduled tasks require explicit user confirmation before they are created, except simple one-off reminders requested directly by the user. Draft the task contract for the user to confirm.", + }); + await expect( + createStateSchedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects invalid Slack workspace context before creating a task", async () => { + const result = await executeTool( + createSlackScheduleCreateTaskTool(createContext({ teamId: "D123" })), + { + confirmed_by_user: true, + title: "Reminder", + objective: "Remind David to wash his hands.", + instructions: ["Remind David to wash his hands."], + schedule_description: "In 1 minute", + next_run_at_text: "in 1 minute", + }, + ); + + expect(result).toMatchObject({ + ok: false, + error: "Active Slack workspace context is invalid.", + }); + await expect( + createStateSchedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("creates explicit one-off reminders without a second confirmation", async () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date("2026-05-27T00:24:23.000Z")); + + const result = await executeTool( + createSlackScheduleCreateTaskTool( + createContext({ + channelId: "D123", + userText: "remind me in 1 minute to wash my hands", + }), + ), + { + title: "Wash hands reminder", + objective: "Remind David to wash his hands.", + instructions: ["Remind David to wash his hands."], + schedule_description: "In 1 minute", + next_run_at_text: "in 1 minute", + }, + ); + + expect(result).toMatchObject({ + ok: true, + task: { + next_run_at: "2026-05-27T00:25:23.000Z", + schedule: "In 1 minute", + status: "active", + title: "Wash hands reminder", + }, + }); + await expect( + createStateSchedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toMatchObject([ + { + destination: { channelId: "D123" }, + nextRunAtMs: Date.parse("2026-05-27T00:25:23.000Z"), + status: "active", + }, + ]); + }); + + it("rejects parseable non-ISO next run timestamps", async () => { + const result = await createTask(createContext(), { + next_run_at_iso: "05/25/2026 09:00", + }); + + expect(result).toMatchObject({ + ok: false, + error: + 'Provide next_run_at_iso as a valid ISO timestamp or next_run_at_text such as "tomorrow at 9am".', + }); + await expect( + createStateSchedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects conflicting exact and relative next run inputs", async () => { + const result = await createTask(createContext(), { + next_run_at_iso: "2026-05-25T16:00:00.000Z", + next_run_at_text: "tomorrow at 9am", + }); + + expect(result).toMatchObject({ + ok: false, + error: "Provide only one of next_run_at_iso or next_run_at_text.", + }); + await expect( + createStateSchedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("edits and deletes a task from the same Slack destination", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const taskId = created.task.id; + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(context), + { + task_id: taskId, + title: "Daily scheduler digest", + schedule_description: "Every day at 9am", + recurrence_frequency: "daily", + }, + ); + expect(updated).toMatchObject({ + ok: true, + task: { + id: taskId, + title: "Daily scheduler digest", + schedule: "Every day at 9am", + version: 2, + }, + }); + + const deleted = await executeTool( + createSlackScheduleDeleteTaskTool(context), + { + task_id: taskId, + }, + ); + expect(deleted).toMatchObject({ + ok: true, + task: { + id: taskId, + status: "deleted", + }, + }); + + const listed = await executeTool( + createSlackScheduleListTasksTool(context), + {}, + ); + expect(listed).toMatchObject({ ok: true, tasks: [] }); + }); + + it("rejects edits from another active Slack destination", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(createContext({ channelId: "C999" })), + { + task_id: created.task.id, + title: "Wrong channel edit", + }, + ); + + expect(updated).toMatchObject({ + ok: false, + error: + "Scheduled task can only be managed from the Slack destination where it was created.", + }); + }); + + it("allows another requester to manage tasks in the same Slack destination", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const otherRequester = createContext({ + threadTs: "1700000003.000000", + requester: { + userId: "U999", + userName: "alice", + fullName: "Alice Reviewer", + }, + }); + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(otherRequester), + { + task_id: created.task.id, + title: "Team-owned digest", + }, + ); + const deleted = await executeTool( + createSlackScheduleDeleteTaskTool(otherRequester), + { + task_id: created.task.id, + }, + ); + + expect(updated).toMatchObject({ + ok: true, + task: { + id: created.task.id, + title: "Team-owned digest", + version: 2, + }, + }); + expect(deleted).toMatchObject({ + ok: true, + task: { + id: created.task.id, + status: "deleted", + }, + }); + await expect( + createStateSchedulerStore().getTask(created.task.id), + ).resolves.toMatchObject({ + status: "deleted", + executionActor: { + type: "system", + id: "scheduled-task", + }, + task: { + title: "Team-owned digest", + }, + version: 3, + }); + }); + + it("creates one-off tasks from tomorrow text using the default Pacific timezone", async () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date("2026-05-25T12:00:00.000Z")); + + const created = await createTask(createContext(), { + next_run_at_iso: undefined, + next_run_at_text: "tomorrow at 9am", + recurrence_frequency: undefined, + recurrence_weekdays: undefined, + timezone: undefined, + }); + + expect(created).toMatchObject({ + ok: true, + task: { + next_run_at: "2026-05-26T16:00:00.000Z", + recurrence: null, + timezone: "America/Los_Angeles", + }, + }); + }); + + it("uses JUNIOR_TIMEZONE as the default schedule timezone", async () => { + process.env.JUNIOR_TIMEZONE = "America/New_York"; + vi.useFakeTimers(); + vi.setSystemTime(new Date("2026-05-25T12:00:00.000Z")); + + const created = await createTask(createContext(), { + next_run_at_iso: undefined, + next_run_at_text: "tomorrow at 9am", + recurrence_frequency: undefined, + recurrence_weekdays: undefined, + timezone: undefined, + }); + + expect(created).toMatchObject({ + ok: true, + task: { + next_run_at: "2026-05-26T13:00:00.000Z", + recurrence: null, + timezone: "America/New_York", + }, + }); + }); + + it("rejects invalid default timezones", async () => { + process.env.JUNIOR_TIMEZONE = "not/a-zone"; + + const created = await createTask(createContext(), { + timezone: undefined, + }); + + expect(created).toMatchObject({ + ok: false, + error: "timezone must be a valid IANA time zone.", + }); + await expect( + createStateSchedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("preserves a recurring task calendar anchor on content-only edits", async () => { + const context = createContext(); + const created = (await createTask(context, { + recurrence_interval: 2, + })) as { + task: { id: string }; + }; + const store = createStateSchedulerStore(); + const task = await store.getTask(created.task.id); + expect(task?.schedule.recurrence).toMatchObject({ + interval: 2, + startDate: "2026-05-25", + }); + await store.saveTask({ + ...task!, + nextRunAtMs: Date.parse("2026-06-08T16:00:00.000Z"), + updatedAtMs: Date.parse("2026-05-26T16:00:00.000Z"), + version: task!.version + 1, + }); + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(context), + { + task_id: created.task.id, + title: "Renamed issue digest", + }, + ); + + expect(updated).toMatchObject({ + ok: true, + task: { + title: "Renamed issue digest", + }, + }); + await expect(store.getTask(created.task.id)).resolves.toMatchObject({ + nextRunAtMs: Date.parse("2026-06-08T16:00:00.000Z"), + schedule: { + recurrence: { + interval: 2, + startDate: "2026-05-25", + }, + }, + }); + }); + + it("clears stale block reasons when resuming a task", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const store = createStateSchedulerStore(); + const task = await store.getTask(created.task.id); + expect(task).toBeDefined(); + await store.saveTask({ + ...task!, + status: "blocked", + statusReason: "Missing GitHub credentials.", + updatedAtMs: Date.parse("2026-05-25T16:01:00.000Z"), + version: task!.version + 1, + }); + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(context), + { + task_id: created.task.id, + status: "active", + }, + ); + + expect(updated).toMatchObject({ + ok: true, + task: { + id: created.task.id, + status: "active", + }, + }); + const resumed = await store.getTask(created.task.id); + expect(resumed).toMatchObject({ + status: "active", + }); + expect(resumed?.statusReason).toBeUndefined(); + }); + + it("marks an active task due immediately without changing its scheduled next run", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const store = createStateSchedulerStore(); + const task = await store.getTask(created.task.id); + expect(task).toBeDefined(); + const scheduledNextRunAtMs = Date.parse("2026-06-01T16:00:00.000Z"); + await store.saveTask({ + ...task!, + nextRunAtMs: scheduledNextRunAtMs, + updatedAtMs: Date.parse("2026-05-25T16:01:00.000Z"), + version: task!.version + 1, + }); + + const beforeMs = Date.now(); + const result = await executeTool( + createSlackScheduleRunTaskNowTool(context), + { + task_id: created.task.id, + }, + ); + const afterMs = Date.now(); + + expect(result).toMatchObject({ + ok: true, + task: { + id: created.task.id, + status: "active", + next_run_at: "2026-06-01T16:00:00.000Z", + }, + }); + const due = await store.getTask(created.task.id); + expect(due).toMatchObject({ + status: "active", + nextRunAtMs: scheduledNextRunAtMs, + destination: { + teamId: context.teamId, + channelId: context.channelId, + }, + createdBy: { + slackUserId: context.requester?.userId, + }, + }); + expect(due?.statusReason).toBeUndefined(); + expect(due?.runNowAtMs).toBeGreaterThanOrEqual(beforeMs); + expect(due?.runNowAtMs).toBeLessThanOrEqual(afterMs); + + await expect(store.claimDueRun({ nowMs: afterMs })).resolves.toMatchObject({ + taskId: created.task.id, + scheduledForMs: due?.runNowAtMs, + status: "pending", + }); + }); + + it("does not run-now a paused task without an explicit resume", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const store = createStateSchedulerStore(); + const task = await store.getTask(created.task.id); + expect(task).toBeDefined(); + await store.saveTask({ + ...task!, + status: "paused", + statusReason: "Paused by user.", + updatedAtMs: Date.parse("2026-05-25T16:01:00.000Z"), + version: task!.version + 1, + }); + + const result = await executeTool( + createSlackScheduleRunTaskNowTool(context), + { + task_id: created.task.id, + }, + ); + + expect(result).toMatchObject({ + ok: false, + error: + "Scheduled task must be active before it can be run now. Resume the task first if you want it to run.", + }); + const paused = await store.getTask(created.task.id); + expect(paused).toMatchObject({ + status: "paused", + statusReason: "Paused by user.", + }); + expect(paused?.runNowAtMs).toBeUndefined(); + }); + + it("removes deleted tasks from scheduler indexes", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + + await executeTool(createSlackScheduleDeleteTaskTool(context), { + task_id: created.task.id, + }); + + const state = getStateAdapter(); + await state.connect(); + await expect(state.get("junior:scheduler:tasks")).resolves.toBe( + null, + ); + await expect( + state.get(`junior:scheduler:team:${TEST_TEAM_ID}:tasks`), + ).resolves.toBe(null); + }); + + it("claims due runs idempotently", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const store = createStateSchedulerStore(); + const task = await store.getTask(created.task.id); + expect(task).toBeDefined(); + await store.saveTask({ + ...task!, + nextRunAtMs: 1000, + updatedAtMs: 1000, + }); + + const first = await store.claimDueRun({ nowMs: 2000 }); + const second = await store.claimDueRun({ nowMs: 2000 }); + + expect(first).toMatchObject({ + taskId: created.task.id, + scheduledForMs: 1000, + status: "pending", + }); + expect(second).toBeUndefined(); + }); +}); diff --git a/packages/junior/tests/integration/slack-server.test.ts b/packages/junior/tests/integration/slack-server.test.ts index 1c0b3863..f66d930c 100644 --- a/packages/junior/tests/integration/slack-server.test.ts +++ b/packages/junior/tests/integration/slack-server.test.ts @@ -6,7 +6,16 @@ describe("Slack MSW server", () => { expect(response.status).toBe(500); const payload = (await response.json()) as { message?: string }; expect(payload.message).toContain( - "[MSW] Unhandled mocked request: GET https://slack.com/does-not-exist", + "[HTTP MOCK] Unhandled external request: GET https://slack.com/does-not-exist", + ); + }); + + it("fails on unhandled external host requests", async () => { + const response = await fetch("https://api.github.com/rate_limit"); + expect(response.status).toBe(500); + const payload = (await response.json()) as { message?: string }; + expect(payload.message).toContain( + "[HTTP MOCK] Unhandled external request: GET https://api.github.com/rate_limit", ); }); diff --git a/packages/junior/tests/msw/handlers/eval-oauth.ts b/packages/junior/tests/msw/handlers/eval-oauth.ts index 40863d0b..466235c4 100644 --- a/packages/junior/tests/msw/handlers/eval-oauth.ts +++ b/packages/junior/tests/msw/handlers/eval-oauth.ts @@ -2,8 +2,8 @@ import { http, HttpResponse } from "msw"; export const EVAL_OAUTH_PROVIDER = "eval-oauth"; export const EVAL_OAUTH_CODE = "eval-oauth-code"; -export const EVAL_OAUTH_ORIGIN = "https://eval-oauth.example.test"; -const EVAL_OAUTH_TOKEN_ENDPOINT = `${EVAL_OAUTH_ORIGIN}/oauth/token`; +export const EVAL_OAUTH_ORIGIN = "https://example.com"; +const EVAL_OAUTH_TOKEN_ENDPOINT = `${EVAL_OAUTH_ORIGIN}/junior-eval-oauth/oauth/token`; const EVAL_OAUTH_ACCESS_TOKEN = "eval-oauth-access-token"; export function resetEvalOAuthMockState(): void {} diff --git a/packages/junior/tests/msw/handlers/github-api.ts b/packages/junior/tests/msw/handlers/github-api.ts new file mode 100644 index 00000000..e18405d9 --- /dev/null +++ b/packages/junior/tests/msw/handlers/github-api.ts @@ -0,0 +1,16 @@ +import { http, HttpResponse } from "msw"; + +export const GITHUB_API_ORIGIN = "https://api.github.com"; + +export function resetGitHubApiMockState(): void {} + +export const githubApiHandlers = [ + http.post( + `${GITHUB_API_ORIGIN}/app/installations/:installationId/access_tokens`, + () => + HttpResponse.json({ + token: "eval-github-installation-token", + expires_at: new Date(Date.now() + 60 * 60 * 1000).toISOString(), + }), + ), +]; diff --git a/packages/junior/tests/msw/server.ts b/packages/junior/tests/msw/server.ts index b7a6ec79..c08e08cb 100644 --- a/packages/junior/tests/msw/server.ts +++ b/packages/junior/tests/msw/server.ts @@ -3,6 +3,8 @@ import { EVAL_MCP_AUTH_ORIGIN, evalMcpAuthHandlers, } from "./handlers/eval-mcp-auth"; +import { allowsLiveTestHttpHost } from "../../../junior-testing/src/http"; +import { githubApiHandlers } from "./handlers/github-api"; import { setupServer } from "msw/node"; import { slackApiHandlers } from "./handlers/slack-api"; import { slackWebhookHandlers } from "./handlers/slack-webhooks"; @@ -10,26 +12,26 @@ import { slackWebhookHandlers } from "./handlers/slack-webhooks"; const EVAL_MCP_AUTH_HOSTNAME = new URL(EVAL_MCP_AUTH_ORIGIN).hostname; const EVAL_OAUTH_HOSTNAME = new URL(EVAL_OAUTH_ORIGIN).hostname; -function isSlackHost(hostname: string): boolean { - return hostname === "slack.com" || hostname === "files.slack.com"; -} - -function requiresMockedHandling(hostname: string): boolean { - return ( - isSlackHost(hostname) || - hostname === EVAL_MCP_AUTH_HOSTNAME || - hostname === EVAL_OAUTH_HOSTNAME - ); -} +const HOST_HTTP_FIXTURE_ALLOWLIST = new Set([ + "files.slack.com", + "slack.com", + EVAL_MCP_AUTH_HOSTNAME, + EVAL_OAUTH_HOSTNAME, +]); -export function enforceUnhandledSlackRequestFailure(request: Request): void { +export function enforceUnhandledExternalRequestFailure(request: Request): void { const url = new URL(request.url); - if (!requiresMockedHandling(url.hostname)) { + if ( + allowsLiveTestHttpHost(url.hostname, { + juniorBaseUrl: process.env.JUNIOR_BASE_URL, + }) && + !HOST_HTTP_FIXTURE_ALLOWLIST.has(url.hostname) + ) { return; } throw new Error( - `[MSW] Unhandled mocked request: ${request.method} ${request.url}`, + `[HTTP MOCK] Unhandled external request: ${request.method} ${request.url}`, ); } @@ -38,4 +40,5 @@ export const mswServer = setupServer( ...slackWebhookHandlers, ...evalMcpAuthHandlers, ...evalOAuthHandlers, + ...githubApiHandlers, ); diff --git a/packages/junior/tests/msw/setup.ts b/packages/junior/tests/msw/setup.ts index 7b13d344..4495f28d 100644 --- a/packages/junior/tests/msw/setup.ts +++ b/packages/junior/tests/msw/setup.ts @@ -1,8 +1,9 @@ import { resetEvalOAuthMockState } from "./handlers/eval-oauth"; import { resetEvalMcpAuthMockState } from "./handlers/eval-mcp-auth"; -import { afterAll, afterEach, beforeAll } from "vitest"; +import { resetGitHubApiMockState } from "./handlers/github-api"; +import { afterAll, afterEach, beforeAll, beforeEach } from "vitest"; import { resetSlackApiMockState } from "./handlers/slack-api"; -import { enforceUnhandledSlackRequestFailure, mswServer } from "./server"; +import { enforceUnhandledExternalRequestFailure, mswServer } from "./server"; // Force test-safe Slack credentials at module evaluation time so any test module // importing bot/chat runtime at top-level sees deterministic values. @@ -12,15 +13,20 @@ process.env.SLACK_SIGNING_SECRET = "test-signing-secret"; process.env.SLACK_CLIENT_ID = "test-client-id"; process.env.SLACK_CLIENT_SECRET = "test-client-secret"; process.env.SLACK_APP_TOKEN = "xapp-test-token"; +process.env.JUNIOR_SECRET = "junior-test-secret"; process.env.EVAL_OAUTH_CLIENT_ID = "eval-oauth-client-id"; process.env.EVAL_OAUTH_CLIENT_SECRET = "eval-oauth-client-secret"; +beforeEach(() => { + process.env.JUNIOR_SECRET = "junior-test-secret"; +}); + // MSW is enabled globally for both tests and evals. Keep Slack HTTP contract // assertions in tests/integration and keep evals focused on behavior outcomes. beforeAll(() => { mswServer.listen({ onUnhandledRequest(request) { - enforceUnhandledSlackRequestFailure(request); + enforceUnhandledExternalRequestFailure(request); }, }); }); @@ -29,6 +35,7 @@ afterEach(() => { mswServer.resetHandlers(); resetEvalOAuthMockState(); resetEvalMcpAuthMockState(); + resetGitHubApiMockState(); resetSlackApiMockState(); }); diff --git a/packages/junior/tests/unit/app-config.test.ts b/packages/junior/tests/unit/app-config.test.ts index 857b1168..b7a93be2 100644 --- a/packages/junior/tests/unit/app-config.test.ts +++ b/packages/junior/tests/unit/app-config.test.ts @@ -88,7 +88,9 @@ describe("createApp plugin config", () => { }); expect(getPluginProviders()).toEqual([]); - expect(getAgentPlugins()).toEqual([]); + expect(getAgentPlugins().map((plugin) => plugin.name)).toEqual([ + "scheduler", + ]); }); it("fails loudly when configured plugin package names are invalid", async () => { @@ -213,7 +215,10 @@ describe("createApp plugin config", () => { expect(getPluginProviders().map((plugin) => plugin.manifest.name)).toEqual([ "trusted", ]); - expect(getAgentPlugins().map((plugin) => plugin.name)).toEqual(["trusted"]); + expect(getAgentPlugins().map((plugin) => plugin.name)).toEqual([ + "scheduler", + "trusted", + ]); }); it("rejects duplicate trusted plugin names before mutating app config", async () => { @@ -230,7 +235,9 @@ describe("createApp plugin config", () => { }), ).rejects.toThrow('Duplicate trusted plugin name "dupe"'); - expect(getAgentPlugins()).toEqual([]); + expect(getAgentPlugins().map((plugin) => plugin.name)).toEqual([ + "scheduler", + ]); expect(getPluginProviders()).toEqual([]); }); @@ -247,7 +254,9 @@ describe("createApp plugin config", () => { 'Trusted plugin name "GitHub" must be a lowercase plugin identifier', ); - expect(getAgentPlugins()).toEqual([]); + expect(getAgentPlugins().map((plugin) => plugin.name)).toEqual([ + "scheduler", + ]); expect(getPluginProviders()).toEqual([]); }); }); diff --git a/packages/junior/tests/unit/capabilities/capability-factory.test.ts b/packages/junior/tests/unit/capabilities/capability-factory.test.ts index ced69e5a..75b8a550 100644 --- a/packages/junior/tests/unit/capabilities/capability-factory.test.ts +++ b/packages/junior/tests/unit/capabilities/capability-factory.test.ts @@ -27,17 +27,21 @@ vi.mock("@/chat/state/adapter", () => ({ describe("capability factory", () => { afterEach(() => { - delete process.env.EVAL_ENABLE_TEST_CREDENTIALS; createPluginBrokerMock.mockReset(); getPluginProvidersMock.mockReset(); vi.resetModules(); }); - it("uses test header transforms for header-only plugins in eval mode", async () => { - process.env.EVAL_ENABLE_TEST_CREDENTIALS = "1"; - createPluginBrokerMock.mockImplementation(() => { - throw new Error("should not create real plugin broker"); - }); + it("uses normal plugin brokers for credential providers", async () => { + const broker = { + issue: vi.fn(async () => ({ + id: "lease-1", + provider: "example", + env: {}, + expiresAt: new Date(Date.now() + 60_000).toISOString(), + })), + }; + createPluginBrokerMock.mockReturnValue(broker); getPluginProvidersMock.mockReturnValue([ { manifest: { @@ -67,18 +71,13 @@ describe("capability factory", () => { reason: "test:api-headers", }); - expect(createPluginBrokerMock).not.toHaveBeenCalled(); - expect(lease.env).toEqual({ - EXAMPLE_API_KEY: "host_managed_credential", + expect(createPluginBrokerMock).toHaveBeenCalledWith("example", { + userTokenStore: expect.any(Object), }); - expect(lease.headerTransforms).toEqual([ - { - domain: "api.example.com", - headers: { - Authorization: "Bearer eval-test-example-api-header", - "X-Api-Version": "2026-01-01", - }, - }, - ]); + expect(broker.issue).toHaveBeenCalledWith({ + requesterId: "U123", + reason: "test:api-headers", + }); + expect(lease.provider).toBe("example"); }); }); diff --git a/packages/junior/tests/unit/capabilities/capability-router.test.ts b/packages/junior/tests/unit/capabilities/capability-router.test.ts index abc6175b..b5518362 100644 --- a/packages/junior/tests/unit/capabilities/capability-router.test.ts +++ b/packages/junior/tests/unit/capabilities/capability-router.test.ts @@ -8,7 +8,7 @@ describe("provider credential router", () => { issue: vi.fn(async () => ({ id: "lease-1", provider: "github", - env: { GITHUB_TOKEN: "ghp_host_managed_credential" }, + env: {}, expiresAt: new Date(Date.now() + 60_000).toISOString(), })), }; @@ -36,7 +36,7 @@ describe("provider credential router", () => { issue: vi.fn(async () => ({ id: "lease-1", provider: "github", - env: { GITHUB_TOKEN: "ghp_host_managed_credential" }, + env: {}, expiresAt: new Date(Date.now() + 60_000).toISOString(), })), }; diff --git a/packages/junior/tests/unit/logging/console-format.test.ts b/packages/junior/tests/unit/logging/console-format.test.ts index 443491c9..2e87d85b 100644 --- a/packages/junior/tests/unit/logging/console-format.test.ts +++ b/packages/junior/tests/unit/logging/console-format.test.ts @@ -87,6 +87,35 @@ describe("console log formatting", () => { expect(line).not.toContain("file.directory="); }); + it("labels plugin heartbeat summary fields", async () => { + process.env.NODE_ENV = "development"; + delete process.env.CI; + delete process.env.JUNIOR_LOG_FORMAT; + setStdoutIsTTY(false); + vi.useFakeTimers(); + vi.setSystemTime(new Date("2026-04-14T16:29:00.133Z")); + + const infoSpy = vi + .spyOn(console, "info") + .mockImplementation(() => undefined); + const { log } = await loadLoggingModule(); + + log.info( + "trusted_plugin_heartbeat_dispatched", + { + "app.dispatch.count": 1, + "app.plugin.name": "scheduler", + }, + "Plugin heartbeat dispatched agent work", + ); + + expect(infoSpy).toHaveBeenCalledTimes(1); + const line = stripAnsi(String(infoSpy.mock.calls[0]?.[0] ?? "")); + expect(line).toMatch( + /^\d{2}:\d{2}:\d{2} INF Plugin heartbeat dispatched agent work plugin=scheduler dispatches=1$/, + ); + }); + it("keeps the structured formatter in production", async () => { process.env.NODE_ENV = "production"; delete process.env.CI; diff --git a/packages/junior/tests/unit/misc/eval-gh-stub.test.ts b/packages/junior/tests/unit/misc/eval-gh-stub.test.ts deleted file mode 100644 index c1f0811a..00000000 --- a/packages/junior/tests/unit/misc/eval-gh-stub.test.ts +++ /dev/null @@ -1,30 +0,0 @@ -import { execFile } from "node:child_process"; -import fs from "node:fs/promises"; -import os from "node:os"; -import path from "node:path"; -import { promisify } from "node:util"; -import { describe, expect, it } from "vitest"; -import { buildEvalGitHubCliStub } from "@/chat/sandbox/eval-gh-stub"; - -const execFileAsync = promisify(execFile); - -describe("buildEvalGitHubCliStub", () => { - it("returns an empty object for unhandled gh api routes", async () => { - const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "junior-gh-stub-")); - const stubPath = path.join(tempDir, "gh"); - - try { - await fs.writeFile(stubPath, buildEvalGitHubCliStub(), "utf8"); - - const result = await execFileAsync("node", [ - stubPath, - "api", - "/repos/getsentry/junior/pulls/170", - ]); - - expect(JSON.parse(result.stdout)).toEqual({}); - } finally { - await fs.rm(tempDir, { recursive: true, force: true }); - } - }); -}); diff --git a/packages/junior/tests/unit/misc/sandbox-executor.test.ts b/packages/junior/tests/unit/misc/sandbox-executor.test.ts index 3befe930..799f0896 100644 --- a/packages/junior/tests/unit/misc/sandbox-executor.test.ts +++ b/packages/junior/tests/unit/misc/sandbox-executor.test.ts @@ -251,9 +251,6 @@ describe("createSandboxExecutor", () => { delete process.env.VERCEL_PROJECT_ID; delete process.env.VERCEL_OIDC_TOKEN; delete process.env.VERCEL_SANDBOX_KEEPALIVE_MS; - delete process.env.JUNIOR_EVAL_ENABLE_FAULTS; - delete process.env.JUNIOR_EVAL_FAULT_SANDBOX_BASH_STREAM_INTERRUPTS; - delete process.env.EVAL_ENABLE_TEST_CREDENTIALS; process.env.JUNIOR_BASE_URL = "https://junior.example.com"; process.env.JUNIOR_SECRET = "test-secret"; }); @@ -726,7 +723,7 @@ describe("createSandboxExecutor", () => { expect(invocation.args?.[1]).toContain("sentry-cli issues list"); }); - it("makes registered provider credentials available to sandbox commands", async () => { + it("makes registered provider placeholders available to sandbox commands", async () => { const sandbox = makeSandbox("sbx_registered_credentials"); sandboxGetMock.mockResolvedValue(sandbox); vi.mocked(createBashTool).mockResolvedValue({ @@ -801,52 +798,6 @@ describe("createSandboxExecutor", () => { }); }); - it("supports eval-only bash stream interruption fault injection", async () => { - process.env.JUNIOR_EVAL_ENABLE_FAULTS = "1"; - process.env.JUNIOR_EVAL_FAULT_SANDBOX_BASH_STREAM_INTERRUPTS = "1"; - const sandbox = makeSandbox("sbx_fault_injection"); - sandbox.runCommand.mockResolvedValueOnce({ - exitCode: 0, - stdout: async () => "ok\n", - stderr: async () => "", - }); - sandboxGetMock.mockResolvedValue(sandbox); - vi.mocked(createBashTool).mockResolvedValue({ - tools: { - readFile: { execute: vi.fn(async () => ({ content: "" })) }, - writeFile: { execute: vi.fn(async () => ({ success: true })) }, - }, - } as never); - - const executor = createSandboxExecutor({ - sandboxId: "sbx_fault_injection", - }); - executor.configureSkills([]); - - const interrupted = await executor.execute({ - toolName: "bash", - input: { - command: "echo first", - }, - }); - const recovered = await executor.execute({ - toolName: "bash", - input: { - command: "echo second", - }, - }); - - expect(interrupted.result).toMatchObject({ - ok: false, - exit_code: 125, - }); - expect(recovered.result).toMatchObject({ - ok: true, - stdout: "ok\n", - }); - expect(sandbox.runCommand).toHaveBeenCalledTimes(1); - }); - it("routes matching bash commands through custom command handler", async () => { const sandbox = makeSandbox("sbx_custom"); sandboxGetMock.mockResolvedValue(sandbox); @@ -1277,38 +1228,6 @@ describe("createSandboxExecutor", () => { }); }); - it("installs the eval gh shim when test credentials are enabled", async () => { - process.env.EVAL_ENABLE_TEST_CREDENTIALS = "1"; - const sandbox = makeSandbox("sbx_eval_gh"); - sandboxCreateMock.mockResolvedValue(sandbox); - - const executor = createSandboxExecutor(); - executor.configureSkills([]); - - await executor.createSandbox(); - - const syncedFiles = sandbox.writeFiles.mock.calls[0]?.[0] as Array<{ - path: string; - content: Buffer; - }>; - expect(syncedFiles).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - path: "/vercel/sandbox/.junior/bin/gh", - }), - ]), - ); - const chmodCall = sandbox.runCommand.mock.calls.find( - (call) => - call[0]?.cmd === "bash" && - typeof call[0]?.args?.[1] === "string" && - call[0].args[1].includes( - "'chmod' '0755' '/vercel/sandbox/.junior/bin/gh'", - ), - ); - expect(chmodCall).toBeDefined(); - }); - it("creates fresh sandboxes from dependency snapshots when available", async () => { const snapshotSandbox = makeSandbox("sbx_snapshot"); resolveRuntimeDependencySnapshotMock.mockResolvedValue({ diff --git a/packages/junior/tests/unit/plugins/agent-hooks.test.ts b/packages/junior/tests/unit/plugins/agent-hooks.test.ts index de1f96d5..efb662b1 100644 --- a/packages/junior/tests/unit/plugins/agent-hooks.test.ts +++ b/packages/junior/tests/unit/plugins/agent-hooks.test.ts @@ -2,8 +2,12 @@ import { defineJuniorPlugin } from "@sentry/junior-plugin-api"; import { describe, expect, it } from "vitest"; import { createAgentPluginHookRunner, + getAgentPluginTools, setAgentPlugins, } from "@/chat/plugins/agent-hooks"; +import { createTools } from "@/chat/tools"; +import { tool } from "@/chat/tools/definition"; +import { Type } from "@sinclair/typebox"; import type { SandboxInstance } from "@/chat/sandbox/workspace"; function fakeSandbox( @@ -57,6 +61,111 @@ function fakeSandbox( } describe("agent plugin hooks", () => { + it("collects turn-scoped tools from configured plugins", () => { + const previous = setAgentPlugins([ + defineJuniorPlugin({ + name: "agent-demo", + hooks: { + tools(ctx) { + expect(ctx.requester?.userId).toBe("U123"); + return { + demoTool: tool({ + description: "Demo tool", + inputSchema: Type.Object({}), + execute: () => ({ ok: true }), + }), + }; + }, + }, + }), + ]); + try { + const tools = getAgentPluginTools({ + channelCapabilities: { + canAddReactions: false, + canCreateCanvas: false, + canPostToChannel: false, + }, + requester: { userId: "U123" }, + sandbox: {} as any, + }); + + expect(tools).toHaveProperty("demoTool"); + } finally { + setAgentPlugins(previous); + } + }); + + it("rejects plugin tools with invalid names", () => { + const previous = setAgentPlugins([ + defineJuniorPlugin({ + name: "agent-demo", + hooks: { + tools() { + return { + "not-valid": tool({ + description: "Demo tool", + inputSchema: Type.Object({}), + execute: () => ({ ok: true }), + }), + }; + }, + }, + }), + ]); + try { + expect(() => + getAgentPluginTools({ + channelCapabilities: { + canAddReactions: false, + canCreateCanvas: false, + canPostToChannel: false, + }, + sandbox: {} as any, + }), + ).toThrow("must be a camelCase identifier"); + } finally { + setAgentPlugins(previous); + } + }); + + it("rejects plugin tools that conflict with core tools", () => { + const previous = setAgentPlugins([ + defineJuniorPlugin({ + name: "agent-demo", + hooks: { + tools() { + return { + loadSkill: tool({ + description: "Demo tool", + inputSchema: Type.Object({}), + execute: () => ({ ok: true }), + }), + }; + }, + }, + }), + ]); + try { + expect(() => + createTools( + [], + {}, + { + channelCapabilities: { + canAddReactions: false, + canCreateCanvas: false, + canPostToChannel: false, + }, + sandbox: {} as any, + }, + ), + ).toThrow('Trusted plugin tool "loadSkill" conflicts with a core tool'); + } finally { + setAgentPlugins(previous); + } + }); + it("runs sandbox and tool lifecycle hooks from configured plugins", async () => { const writes: Array<{ content: string | Uint8Array; path: string }> = []; const previous = setAgentPlugins([ diff --git a/packages/junior/tests/unit/plugins/test-broker.test.ts b/packages/junior/tests/unit/plugins/test-broker.test.ts deleted file mode 100644 index cdb837c3..00000000 --- a/packages/junior/tests/unit/plugins/test-broker.test.ts +++ /dev/null @@ -1,89 +0,0 @@ -import { afterEach, describe, expect, it } from "vitest"; -import type { CredentialHeaderTransform } from "@/chat/credentials/broker"; -import { TestCredentialBroker } from "@/chat/credentials/test-broker"; - -describe("test credential broker", () => { - afterEach(() => { - delete process.env.EVAL_TEST_CREDENTIAL_TOKEN; - }); - - it("preserves plugin-level header transforms separately from token domains", async () => { - process.env.EVAL_TEST_CREDENTIAL_TOKEN = "test-token"; - const broker = new TestCredentialBroker({ - provider: "example", - domains: ["api.example.com"], - apiHeaders: { - "X-Api-Version": "2026-01-01", - }, - headerTransforms: (): CredentialHeaderTransform[] => [ - { - domain: "uploads.example.com", - headers: { - "X-Upload-Mode": "sandbox", - }, - }, - { - domain: "api.example.com", - headers: { - Authorization: "PluginManaged value", - "X-Shared": "plugin", - }, - }, - ], - env: { - EXAMPLE_SITE: "example.com", - }, - envKey: "EXAMPLE_TOKEN", - placeholder: "host_managed_credential", - }); - - const lease = await broker.issue({ reason: "test:headers" }); - - expect(lease.env).toEqual({ - EXAMPLE_SITE: "example.com", - EXAMPLE_TOKEN: "host_managed_credential", - }); - expect(lease.headerTransforms).toEqual([ - { - domain: "uploads.example.com", - headers: { - "X-Upload-Mode": "sandbox", - }, - }, - { - domain: "api.example.com", - headers: { - Authorization: "Bearer test-token", - "X-Shared": "plugin", - "X-Api-Version": "2026-01-01", - }, - }, - ]); - }); - - it("issues header-only leases without token env", async () => { - const broker = new TestCredentialBroker({ - provider: "example", - headerTransforms: () => [ - { - domain: "api.example.com", - headers: { - Authorization: "eval-test-example-api-header", - }, - }, - ], - }); - - const lease = await broker.issue({ reason: "test:headers-only" }); - - expect(lease.env).toEqual({}); - expect(lease.headerTransforms).toEqual([ - { - domain: "api.example.com", - headers: { - Authorization: "eval-test-example-api-header", - }, - }, - ]); - }); -}); diff --git a/packages/junior/tests/unit/prompt.test.ts b/packages/junior/tests/unit/prompt.test.ts index d32e3655..d63d39d5 100644 --- a/packages/junior/tests/unit/prompt.test.ts +++ b/packages/junior/tests/unit/prompt.test.ts @@ -104,4 +104,25 @@ describe("prompt builders", () => { expect(turnContext).toContain("- exact edits"); expect(turnContext).toContain("- unique oldText"); }); + + it("does not expose plugin ownership as prompt knowledge", () => { + const turnContext = buildTurnContextPrompt({ + availableSkills: [ + { + name: "demo-skill", + description: "Demo workflow", + pluginProvider: "demo-provider", + skillPath: "/tmp/skills/demo-skill", + }, + ], + activeSkills: [], + activeMcpCatalogs: [], + invocation: null, + turnState: "fresh", + }); + + expect(turnContext).toContain("demo-skill"); + expect(turnContext).not.toContain("demo-provider"); + expect(turnContext).not.toContain(""); + }); }); diff --git a/packages/junior/tests/unit/runtime/agent-dispatch-signing.test.ts b/packages/junior/tests/unit/runtime/agent-dispatch-signing.test.ts new file mode 100644 index 00000000..dca196b1 --- /dev/null +++ b/packages/junior/tests/unit/runtime/agent-dispatch-signing.test.ts @@ -0,0 +1,72 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + scheduleDispatchCallback, + verifyDispatchCallbackRequest, +} from "@/chat/agent-dispatch/signing"; + +describe("agent dispatch callback signing", () => { + const originalFetch = global.fetch; + + beforeEach(() => { + process.env.JUNIOR_BASE_URL = "https://junior.example.com"; + process.env.JUNIOR_SECRET = "dispatch-secret"; + }); + + afterEach(() => { + global.fetch = originalFetch; + delete process.env.JUNIOR_BASE_URL; + delete process.env.JUNIOR_SECRET; + vi.restoreAllMocks(); + }); + + it("signs dispatch callbacks so the handler can verify them", async () => { + const fetchMock = vi.fn(async (_url: string, _init?: RequestInit) => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + + await scheduleDispatchCallback({ + id: "dispatch_123", + expectedVersion: 3, + }); + + expect(fetchMock).toHaveBeenCalledTimes(1); + const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + expect(url).toBe("https://junior.example.com/api/internal/agent-dispatch"); + + const request = new Request(url, { + method: init.method, + headers: init.headers, + body: init.body, + }); + await expect(verifyDispatchCallbackRequest(request)).resolves.toEqual({ + id: "dispatch_123", + expectedVersion: 3, + }); + }); + + it("rejects callbacks whose signature does not match the body", async () => { + const fetchMock = vi.fn(async (_url: string, _init?: RequestInit) => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + + await scheduleDispatchCallback({ + id: "dispatch_123", + expectedVersion: 3, + }); + + const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + const headers = new Headers(init.headers); + headers.set("x-junior-dispatch-signature", "v1=deadbeef"); + const request = new Request(url, { + method: init.method, + headers, + body: init.body, + }); + + await expect( + verifyDispatchCallbackRequest(request), + ).resolves.toBeUndefined(); + }); +}); diff --git a/packages/junior/tests/unit/runtime/agent-dispatch-validation.test.ts b/packages/junior/tests/unit/runtime/agent-dispatch-validation.test.ts new file mode 100644 index 00000000..c7af3f92 --- /dev/null +++ b/packages/junior/tests/unit/runtime/agent-dispatch-validation.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it } from "vitest"; +import { validateDispatchOptions } from "@/chat/agent-dispatch/validation"; + +const validOptions = { + idempotencyKey: "run-1", + destination: { + platform: "slack" as const, + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", +}; + +describe("agent dispatch validation", () => { + it("accepts a valid Slack channel dispatch", () => { + expect(() => validateDispatchOptions(validOptions)).not.toThrow(); + }); + + it("bounds durable idempotency and metadata keys", () => { + expect(() => + validateDispatchOptions({ + ...validOptions, + idempotencyKey: "x".repeat(513), + }), + ).toThrow("Dispatch idempotencyKey exceeds the maximum length"); + + expect(() => + validateDispatchOptions({ + ...validOptions, + metadata: { + ["x".repeat(129)]: "value", + }, + }), + ).toThrow("Dispatch metadata key exceeds the maximum length"); + }); +}); diff --git a/packages/junior/tests/unit/runtime/respond-error-path.test.ts b/packages/junior/tests/unit/runtime/respond-error-path.test.ts index a191153c..c74b6899 100644 --- a/packages/junior/tests/unit/runtime/respond-error-path.test.ts +++ b/packages/junior/tests/unit/runtime/respond-error-path.test.ts @@ -31,5 +31,5 @@ describe("generateAssistantReply error path", () => { expect(reply.diagnostics.outcome).toBe("provider_error"); expect(reply.diagnostics.modelId).toBe("openai/gpt-5.4"); expect(reply.diagnostics.thinkingLevel).toBeUndefined(); - }); + }, 10_000); }); diff --git a/packages/junior/tests/unit/runtime/thread-context.test.ts b/packages/junior/tests/unit/runtime/thread-context.test.ts index 79c2c08f..39af30fa 100644 --- a/packages/junior/tests/unit/runtime/thread-context.test.ts +++ b/packages/junior/tests/unit/runtime/thread-context.test.ts @@ -1,5 +1,9 @@ import { describe, expect, it } from "vitest"; -import { getAssistantThreadContext } from "@/chat/runtime/thread-context"; +import { + getAssistantThreadContext, + getTeamId, +} from "@/chat/runtime/thread-context"; +import { runWithWorkspaceTeamId } from "@/chat/slack/workspace-context"; describe("getAssistantThreadContext", () => { it("uses the current raw message ts for the first non-DM thread reply", () => { @@ -61,3 +65,54 @@ describe("getAssistantThreadContext", () => { ).toBeUndefined(); }); }); + +describe("getTeamId", () => { + it("uses the raw Slack workspace team when Slack provides it", () => { + expect( + getTeamId({ + raw: { + team_id: "TRAW", + }, + } as any), + ).toBe("TRAW"); + }); + + it("falls back to the inbound webhook workspace team", async () => { + await runWithWorkspaceTeamId("TWORKSPACE", async () => { + await Promise.resolve(); + expect( + getTeamId({ + raw: { + channel: "C12345", + ts: "1700000000.200", + }, + } as any), + ).toBe("TWORKSPACE"); + }); + }); + + it("prefers the inbound workspace over a Slack Connect author team", () => { + runWithWorkspaceTeamId("TWORKSPACE", () => { + expect( + getTeamId({ + raw: { + user_team: "TEXTERNAL", + }, + } as any), + ).toBe("TWORKSPACE"); + }); + }); + + it("ignores non-team raw team values from DM payloads", () => { + runWithWorkspaceTeamId("TWORKSPACE", () => { + expect( + getTeamId({ + raw: { + channel: "D12345", + team: "D12345", + }, + } as any), + ).toBe("TWORKSPACE"); + }); + }); +}); diff --git a/packages/junior/tests/unit/services/mcp-auth-orchestration.test.ts b/packages/junior/tests/unit/services/mcp-auth-orchestration.test.ts new file mode 100644 index 00000000..c00ea46d --- /dev/null +++ b/packages/junior/tests/unit/services/mcp-auth-orchestration.test.ts @@ -0,0 +1,83 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { createMcpAuthOrchestration } from "@/chat/services/mcp-auth-orchestration"; +import { AuthorizationFlowDisabledError } from "@/chat/services/auth-pause"; + +const { + createMcpOAuthClientProvider, + deleteMcpAuthSession, + deliverPrivateMessage, + formatProviderLabel, + getMcpAuthSession, + patchMcpAuthSession, +} = vi.hoisted(() => ({ + createMcpOAuthClientProvider: vi.fn(), + deleteMcpAuthSession: vi.fn(), + deliverPrivateMessage: vi.fn(), + formatProviderLabel: vi.fn((provider: string) => provider), + getMcpAuthSession: vi.fn(), + patchMcpAuthSession: vi.fn(), +})); + +vi.mock("@/chat/mcp/oauth", () => ({ + createMcpOAuthClientProvider, +})); + +vi.mock("@/chat/mcp/auth-store", () => ({ + deleteMcpAuthSession, + getMcpAuthSession, + patchMcpAuthSession, +})); + +vi.mock("@/chat/oauth-flow", () => ({ + deliverPrivateMessage, + formatProviderLabel, +})); + +describe("createMcpAuthOrchestration", () => { + beforeEach(() => { + createMcpOAuthClientProvider.mockReset(); + createMcpOAuthClientProvider.mockResolvedValue({ + authSessionId: "auth_1", + }); + deleteMcpAuthSession.mockReset(); + deliverPrivateMessage.mockReset(); + formatProviderLabel.mockClear(); + getMcpAuthSession.mockReset(); + patchMcpAuthSession.mockReset(); + }); + + it("returns a deterministic error instead of delivering auth links when authorization is disabled", async () => { + const abortAgent = vi.fn(); + const orchestration = createMcpAuthOrchestration( + { + conversationId: "slack:C123:1700000000.000000", + sessionId: "scheduled:sched_1:1000", + requesterId: "U123", + channelId: "C123", + threadTs: "1700000000.000000", + userMessage: "", + getConfiguration: () => ({}), + getArtifactState: () => undefined, + getMergedArtifactState: () => ({}), + authorizationFlowMode: "disabled", + }, + abortAgent, + ); + + await orchestration.authProviderFactory({ + manifest: { + name: "github", + }, + } as any); + + await expect( + orchestration.onAuthorizationRequired("github"), + ).rejects.toBeInstanceOf(AuthorizationFlowDisabledError); + + expect(deleteMcpAuthSession).toHaveBeenCalledWith("auth_1"); + expect(patchMcpAuthSession).not.toHaveBeenCalled(); + expect(getMcpAuthSession).not.toHaveBeenCalled(); + expect(deliverPrivateMessage).not.toHaveBeenCalled(); + expect(abortAgent).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/junior/tests/unit/services/plugin-auth-orchestration.test.ts b/packages/junior/tests/unit/services/plugin-auth-orchestration.test.ts index 442e094f..2876a407 100644 --- a/packages/junior/tests/unit/services/plugin-auth-orchestration.test.ts +++ b/packages/junior/tests/unit/services/plugin-auth-orchestration.test.ts @@ -4,6 +4,7 @@ import { PluginAuthorizationPauseError, PluginCredentialFailureError, } from "@/chat/services/plugin-auth-orchestration"; +import { AuthorizationFlowDisabledError } from "@/chat/services/auth-pause"; import type { Skill } from "@/chat/skills"; const { @@ -143,6 +144,63 @@ describe("createPluginAuthOrchestration", () => { ); }); + it("returns a deterministic error instead of starting oauth when authorization is disabled", async () => { + startOAuthFlow.mockResolvedValue({ + ok: true, + delivery: { channelId: "D123" }, + }); + const abortAgent = vi.fn(); + const userTokenStore = {} as any; + const orchestration = createPluginAuthOrchestration( + { + requesterId: "U123", + userMessage: "check Sentry", + userTokenStore, + authorizationFlowMode: "disabled", + }, + abortAgent, + ); + + await expect( + orchestration.handleCommandFailure({ + activeSkill: sentrySkill, + command: "sentry issue list", + details: { + exit_code: 1, + stderr: "junior-auth-required provider=sentry", + }, + }), + ).rejects.toBeInstanceOf(AuthorizationFlowDisabledError); + + expect(startOAuthFlow).not.toHaveBeenCalled(); + expect(unlinkProvider).not.toHaveBeenCalled(); + expect(abortAgent).not.toHaveBeenCalled(); + }); + + it("blocks oauth recovery when authorization is disabled and no requester is present", async () => { + const orchestration = createPluginAuthOrchestration( + { + userMessage: "", + authorizationFlowMode: "disabled", + }, + vi.fn(), + ); + + await expect( + orchestration.handleCommandFailure({ + activeSkill: sentrySkill, + command: "sentry issue list", + details: { + exit_code: 1, + stderr: "junior-auth-required provider=sentry", + }, + }), + ).rejects.toBeInstanceOf(AuthorizationFlowDisabledError); + + expect(startOAuthFlow).not.toHaveBeenCalled(); + expect(unlinkProvider).not.toHaveBeenCalled(); + }); + it("unlinks the stored token only after oauth restart is launched", async () => { const order: string[] = []; const userTokenStore = {} as any; diff --git a/packages/junior/tests/unit/slack/tool-registration.test.ts b/packages/junior/tests/unit/slack/tool-registration.test.ts index 916b8686..cb34d1ac 100644 --- a/packages/junior/tests/unit/slack/tool-registration.test.ts +++ b/packages/junior/tests/unit/slack/tool-registration.test.ts @@ -1,5 +1,7 @@ -import { describe, expect, it } from "vitest"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; import { createTools } from "@/chat/tools"; +import { createSchedulerPlugin } from "@/chat/scheduler/plugin"; +import { setAgentPlugins } from "@/chat/plugins/agent-hooks"; import { resolveChannelCapabilities } from "@/chat/tools/channel-capabilities"; const noopSandbox = {} as any; @@ -13,6 +15,14 @@ function ctx(channelId?: string) { } describe("Slack tool registration", () => { + beforeEach(() => { + setAgentPlugins([createSchedulerPlugin()]); + }); + + afterEach(() => { + setAgentPlugins([]); + }); + it("does not register channel-scope tools in DM context", () => { const tools = createTools([], {}, ctx("D12345")); @@ -31,6 +41,45 @@ describe("Slack tool registration", () => { expect(tools).toHaveProperty("slackCanvasCreate"); }); + it("registers schedule tools only with complete Slack turn context", () => { + const incomplete = createTools([], {}, ctx("C12345")); + const complete = createTools( + [], + {}, + { + ...ctx("C12345"), + teamId: "T123", + requester: { + userId: "U123", + }, + }, + ); + + expect(incomplete).not.toHaveProperty("slackScheduleCreateTask"); + expect(complete).toHaveProperty("slackScheduleCreateTask"); + expect(complete).toHaveProperty("slackScheduleListTasks"); + expect(complete).toHaveProperty("slackScheduleUpdateTask"); + expect(complete).toHaveProperty("slackScheduleDeleteTask"); + expect(complete).toHaveProperty("slackScheduleRunTaskNow"); + }); + + it("does not register schedule tools without a requester", () => { + const tools = createTools( + [], + {}, + { + ...ctx("C12345"), + teamId: "T123", + }, + ); + + expect(tools).not.toHaveProperty("slackScheduleCreateTask"); + expect(tools).not.toHaveProperty("slackScheduleListTasks"); + expect(tools).not.toHaveProperty("slackScheduleUpdateTask"); + expect(tools).not.toHaveProperty("slackScheduleDeleteTask"); + expect(tools).not.toHaveProperty("slackScheduleRunTaskNow"); + }); + it("does not register canvas create when channel context is unavailable", () => { const tools = createTools([], {}, ctx()); diff --git a/packages/junior/tests/unit/state/adapter-resolution-matrix.test.ts b/packages/junior/tests/unit/state/adapter-resolution-matrix.test.ts index 51cd38cc..72ea2464 100644 --- a/packages/junior/tests/unit/state/adapter-resolution-matrix.test.ts +++ b/packages/junior/tests/unit/state/adapter-resolution-matrix.test.ts @@ -51,4 +51,11 @@ describe("state adapter resolution decision matrix", () => { expect(readChatConfig(process.env).state.adapter).toBe(expectedAdapter); }, ); + + it("reads the optional state key prefix", async () => { + setOrDelete("JUNIOR_STATE_KEY_PREFIX", "junior:test:123"); + vi.resetModules(); + const { readChatConfig } = await import("@/chat/config"); + expect(readChatConfig(process.env).state.keyPrefix).toBe("junior:test:123"); + }); }); diff --git a/packages/junior/tests/unit/state/state-adapter-lock.test.ts b/packages/junior/tests/unit/state/state-adapter-lock.test.ts index 0eb47361..9b1dc55b 100644 --- a/packages/junior/tests/unit/state/state-adapter-lock.test.ts +++ b/packages/junior/tests/unit/state/state-adapter-lock.test.ts @@ -1,4 +1,5 @@ import { afterEach, describe, expect, it, vi } from "vitest"; +import { createTestMessage } from "../../fixtures/slack-harness"; const ORIGINAL_ENV = { ...process.env }; @@ -116,4 +117,32 @@ describe("state adapter lock lease", () => { await adapter.releaseLock(lock); } }); + + it("keeps caller-facing lock and queue identifiers unprefixed", async () => { + const { getStateAdapter } = await loadMemoryStateAdapter({ + JUNIOR_STATE_KEY_PREFIX: "junior:test:state-adapter-lock", + }); + const adapter = getStateAdapter(); + await adapter.connect(); + + await adapter.set("logical-key", "stored"); + await expect(adapter.get("logical-key")).resolves.toBe("stored"); + + const lock = await adapter.acquireLock("thread-1", 10 * 60 * 1000); + expect(lock).toMatchObject({ threadId: "thread-1" }); + if (lock) { + await adapter.releaseLock(lock); + } + + const entry: Parameters[1] = { + enqueuedAt: 0, + expiresAt: 60_000, + message: createTestMessage({ id: "entry-1" }), + }; + await adapter.enqueue("thread-1", entry, 10); + await expect(adapter.queueDepth("thread-1")).resolves.toBe(1); + await expect(adapter.dequeue("thread-1")).resolves.toMatchObject({ + message: { id: "entry-1" }, + }); + }); }); diff --git a/packages/junior/tests/unit/tools/agent-tools.test.ts b/packages/junior/tests/unit/tools/agent-tools.test.ts index edc684fe..a3bb5ded 100644 --- a/packages/junior/tests/unit/tools/agent-tools.test.ts +++ b/packages/junior/tests/unit/tools/agent-tools.test.ts @@ -1,5 +1,6 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; import { PluginAuthorizationPauseError } from "@/chat/services/plugin-auth-orchestration"; +import { AuthorizationFlowDisabledError } from "@/chat/services/auth-pause"; import { SkillSandbox } from "@/chat/sandbox/skill-sandbox"; import { createAgentTools } from "@/chat/tools/agent-tools"; import type { Skill } from "@/chat/skills"; @@ -310,4 +311,51 @@ describe("createAgentTools", () => { }); expect(handleToolExecutionError).not.toHaveBeenCalled(); }); + + it("rethrows disabled authorization errors without reporting a tool failure", async () => { + const sandbox = new SkillSandbox([githubSkill], [githubSkill]); + const pluginAuthOrchestration = { + handleCommandFailure: vi.fn(async () => { + throw new AuthorizationFlowDisabledError("plugin", "github"); + }), + } as any; + const sandboxExecutor = { + canExecute: (toolName: string) => toolName === "bash", + execute: vi.fn(async () => ({ + result: { + ok: false, + command: "gh issue view 123", + cwd: "/vercel/sandbox", + exit_code: 1, + signal: null, + timed_out: false, + stdout: "", + stderr: "bad credentials", + stdout_truncated: false, + stderr_truncated: false, + }, + })), + } as any; + + const [bashTool] = createAgentTools( + { + bash: { + description: "bash", + inputSchema: {} as any, + execute: async () => ({ ok: true }), + }, + }, + sandbox, + {}, + undefined, + sandboxExecutor, + pluginAuthOrchestration, + undefined, + ); + + await expect( + bashTool!.execute("tool-2", { command: "gh issue view 123" }), + ).rejects.toBeInstanceOf(AuthorizationFlowDisabledError); + expect(handleToolExecutionError).not.toHaveBeenCalled(); + }); }); diff --git a/packages/junior/tests/unit/vercel.test.ts b/packages/junior/tests/unit/vercel.test.ts index bbf02d9a..d6ff4c79 100644 --- a/packages/junior/tests/unit/vercel.test.ts +++ b/packages/junior/tests/unit/vercel.test.ts @@ -7,6 +7,12 @@ describe("juniorVercelConfig", () => { expect(config.framework).toBe("nitro"); expect(config.buildCommand).toBe("pnpm build"); + expect(config.crons).toEqual([ + { + path: "/api/internal/heartbeat", + schedule: "* * * * *", + }, + ]); }); it("omits buildCommand when set to null", () => { diff --git a/packages/junior/vitest.config.ts b/packages/junior/vitest.config.ts index 4aa4894b..83f34d6b 100644 --- a/packages/junior/vitest.config.ts +++ b/packages/junior/vitest.config.ts @@ -22,6 +22,9 @@ for (const envRoot of [workspaceRoot, packageRoot]) { } } +process.env.JUNIOR_SECRET = "junior-test-secret"; +process.env.JUNIOR_STATE_KEY_PREFIX ??= `junior:test:${process.pid}`; + export default defineConfig({ resolve: { alias: { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6d48295e..9c73e4c9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -233,6 +233,9 @@ importers: "@sentry/junior-sentry": specifier: workspace:* version: link:../junior-sentry + "@sentry/junior-testing": + specifier: workspace:* + version: link:../junior-testing chat: specifier: 4.29.0 version: 4.29.0(ai@6.0.190(zod@4.4.3))(zod@4.4.3) @@ -260,6 +263,12 @@ importers: packages/junior-sentry: {} + packages/junior-testing: + devDependencies: + typescript: + specifier: ^6.0.3 + version: 6.0.3 + packages: "@ai-sdk/gateway@3.0.119": resolution: diff --git a/policies/README.md b/policies/README.md index ae2fb7e0..f949a627 100644 --- a/policies/README.md +++ b/policies/README.md @@ -10,8 +10,10 @@ Good policy topics: - code comments and docstrings - testing expectations - naming conventions +- interface design - migration hygiene - automation safety boundaries +- serverless background work Keep policy docs small: diff --git a/policies/interface-design.md b/policies/interface-design.md new file mode 100644 index 00000000..cd9c3d78 --- /dev/null +++ b/policies/interface-design.md @@ -0,0 +1,21 @@ +# Interface Design + +## Intent + +Interfaces should expose the smallest useful capability while keeping ownership, lifecycle, and security boundaries obvious. + +## Policy + +- Prefer narrow capability methods over broad dependency bags or access to underlying services. +- Expose lifecycle-oriented operations, such as `dispatch` and `get`, instead of raw runners, clients, routes, or storage adapters. +- Return projections by default. Do not expose full internal records when callers only need status, ids, or summaries. +- Make ownership explicit in the API boundary. A caller should only read or mutate records it owns unless cross-owner access is the feature. +- Keep platform details inside the layer that owns the platform. Do not leak Slack clients, Vercel primitives, Redis clients, or model-runtime internals through feature interfaces. +- Require idempotency keys for APIs that create durable work from retryable contexts. +- Use short JavaScript-facing names for public types and methods. Avoid framework-style names that describe implementation mechanics instead of product intent. +- Add an interface only when it removes real coupling or represents a stable boundary. + +## Exceptions + +- Test fixtures may expose narrower construction seams when the production interface remains small. +- Low-level infrastructure modules may expose mechanism-specific APIs inside their own ownership boundary. diff --git a/policies/serverless-background-work.md b/policies/serverless-background-work.md new file mode 100644 index 00000000..82563020 --- /dev/null +++ b/policies/serverless-background-work.md @@ -0,0 +1,22 @@ +# Serverless Background Work + +## Intent + +Background work must survive serverless request boundaries, retries, and process loss without relying on memory or long-lived workers. + +## Policy + +- Persist durable work state before starting background execution. +- Internal callbacks should carry only small signed envelopes, such as ids and expected versions. Store full work payloads in durable state. +- Treat `waitUntil` as a per-request lifetime extension, not a job system. +- Make background work idempotent and retryable. +- Split long work into bounded slices with max attempts, max age, and max continuation depth. +- Define explicit recovery for stale non-terminal states such as `pending`, `running`, and `awaiting_resume`. +- Use durable leases or locks for ownership, and define lock ordering when work touches multiple state domains. +- Do not expose platform-specific background primitives directly to feature code or plugins unless that platform is the feature boundary. +- Stored user-authored instructions remain user content even when executed later by a system actor. + +## Exceptions + +- Purely best-effort telemetry or cache warming may skip durable state when losing the work has no product effect. +- Local development helpers may use in-memory execution when production code still follows the durable path. diff --git a/scripts/dev-with-root-env.mjs b/scripts/dev-server.mjs similarity index 70% rename from scripts/dev-with-root-env.mjs rename to scripts/dev-server.mjs index 90a0e98b..c337b28d 100644 --- a/scripts/dev-with-root-env.mjs +++ b/scripts/dev-server.mjs @@ -115,6 +115,64 @@ function syncInjectedJuniorDist(options = {}) { const tunnelToken = process.env.CLOUDFLARE_TUNNEL_TOKEN?.trim(); const tunnelUrl = process.env.CLOUDFLARE_TUNNEL_URL?.trim() || `http://localhost:${devPort}`; +const localInternalSecret = "junior-local-dev-internal"; +const heartbeatSecret = + process.env.JUNIOR_SCHEDULER_SECRET?.trim() || + process.env.CRON_SECRET?.trim() || + "junior-local-dev-heartbeat"; +const heartbeatUrl = + process.env.JUNIOR_DEV_HEARTBEAT_URL?.trim() || + `http://localhost:${devPort}/api/internal/heartbeat`; +const heartbeatIntervalMs = 60_000; + +if ( + !process.env.JUNIOR_SCHEDULER_SECRET?.trim() && + !process.env.CRON_SECRET?.trim() +) { + process.env.JUNIOR_SCHEDULER_SECRET = heartbeatSecret; +} +if (!process.env.JUNIOR_SECRET?.trim()) { + process.env.JUNIOR_SECRET = localInternalSecret; +} +if (!process.env.JUNIOR_BASE_URL?.trim()) { + process.env.JUNIOR_BASE_URL = `http://localhost:${devPort}`; +} + +async function pulseHeartbeat() { + try { + const response = await fetch(heartbeatUrl, { + headers: { authorization: `Bearer ${heartbeatSecret}` }, + }); + if (!response.ok) { + console.error( + `Local heartbeat returned ${response.status} ${response.statusText}`, + ); + } + } catch (error) { + console.error( + `Local heartbeat failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } +} + +function startLocalHeartbeat() { + const initialDelayMs = 5_000; + const initialTimer = setTimeout(() => { + void pulseHeartbeat(); + }, initialDelayMs); + const interval = setInterval(() => { + void pulseHeartbeat(); + }, heartbeatIntervalMs); + + children.add({ + killed: false, + kill() { + clearTimeout(initialTimer); + clearInterval(interval); + this.killed = true; + }, + }); +} runRequiredChild("pnpm", ["build"], { cwd: juniorPackageDir, @@ -142,6 +200,7 @@ if (tunnelToken) { } const child = spawnChild("pnpm", ["dev"], { cwd: exampleDir }); +startLocalHeartbeat(); for (const signal of ["SIGINT", "SIGTERM"]) { process.on(signal, () => { diff --git a/specs/agent-prompt-spec.md b/specs/agent-prompt-spec.md index 3bab1d92..bc0b1bde 100644 --- a/specs/agent-prompt-spec.md +++ b/specs/agent-prompt-spec.md @@ -3,7 +3,7 @@ ## Metadata - Created: 2026-04-28 -- Last Edited: 2026-05-06 +- Last Edited: 2026-05-26 ## Changelog @@ -11,6 +11,7 @@ - 2026-04-30: Reworked the core prompt contract around fixed operating sections, source hierarchy, explicit completion gates, OpenClaw-style tool-call/safety boundaries, and stable-before-volatile ordering. - 2026-05-06: Required the initial system prompt to be byte-stable across conversations and turns, with volatile runtime context moved into per-turn user-message context. - 2026-05-06: Clarified that deployment-stable assistant identity belongs in the system prompt while requester identity remains per-turn context. +- 2026-05-26: Clarified that core prompt assembly must not contain plugin-specific knowledge; plugins express behavior through skills, tools, schemas, and tool guidance. ## Status @@ -32,7 +33,7 @@ Define the canonical contract for Junior's platform-owned agent prompt so prompt - Defining Pi agent loop mechanics or terminal output assembly; see `./harness-agent-spec.md`. - Defining Slack delivery transport behavior; see `./slack-agent-delivery-spec.md` and `./slack-outbound-contract-spec.md`. - Defining test-layer taxonomy; see `./testing/index.md`. -- Defining provider-specific prompt overlays unless this repository owns that overlay. +- Defining plugin-specific prompt overlays or provider workflows. Plugins own that guidance through their skills, tools, schemas, and tool guidance. ## Contracts @@ -41,6 +42,7 @@ Define the canonical contract for Junior's platform-owned agent prompt so prompt - The core prompt owns platform behavior: tool-use policy, execution bias, context boundaries, Slack output shape, and failure reporting expectations. - `SOUL.md` and other deployment-authored personality files are voice-only. Platform behavior must still work if those files are empty or heavily customized. - Skill files own domain-specific workflow mechanics. They must not duplicate generic harness behavior such as "use tools before answering" or "ask only when blocked." +- The core prompt must not name or describe specific installed plugins, plugin providers, plugin-owned config keys, plugin-owned default targets, plugin-owned tools, or plugin-specific workflows. That knowledge belongs to dynamic capabilities. ### Section boundaries @@ -48,6 +50,8 @@ Define the canonical contract for Junior's platform-owned agent prompt so prompt `buildTurnContextPrompt(...)` owns volatile prompt context. It is attached to the current user turn, including requester identity and resumed-turn context, and may vary by conversation or turn. Completed turns must strip this context before storing durable Pi message history so prior turns are not replayed with stale runtime facts. +Turn context may disclose dynamic capability surfaces that the model can act on, such as available skill names/descriptions, active MCP catalog summaries, and tool guidance attached to the current native tool set. It must not separately disclose plugin ownership or installed plugin/provider catalogs as prompt knowledge. If the model needs plugin-specific behavior, that behavior must arrive through the loaded skill body, tool description, tool schema, `promptSnippet`, or `promptGuidelines`. + The combined prompt surface must keep these concerns distinct: 1. Identity/personality. @@ -106,9 +110,10 @@ Mutable facts need live checks. Examples include files, repos, versions, issues, - Tool schemas remain the source of truth for tool parameters. The prompt may state when to use tools, not re-document every tool schema. - The model should load the best-matching skill when relevant and avoid preloading unrelated skills. -- After loading a plugin-backed skill, the prompt may describe the generic MCP lookup path, but provider-specific tool strategy belongs in the skill or plugin docs. +- After loading a plugin-backed skill, the prompt may describe the generic MCP lookup path, but provider-specific tool strategy belongs in the skill, tool description, tool schema, or tool guidance. - Skill selection should be explicit: scan available skills, load one clearly matching skill, choose the most specific skill when several match, and avoid loading any skill when none clearly applies. - Tool-call style belongs in its own section: call routine tools directly, narrate only when it helps, and prefer first-class tools over asking the user to perform equivalent manual work. +- Trusted plugin tools must carry concise descriptions and optional tool guidance that tell the agent when and how to use them. Do not compensate for weak plugin tool descriptions by adding plugin-specific bullets to the core prompt. ### Runtime and safety boundaries @@ -139,8 +144,9 @@ Prompt changes are rejected or revised when they introduce: 1. Duplicate rules across core prompt, skills, or personality files. 2. Multiple adjacent bullets that all express the same ask/act/verify policy. 3. Tool-schema restatement in prompt prose. -4. Skill instructions that override generic harness behavior without a domain-specific reason. -5. Static prompt tests that assert wording instead of behavior. +4. Core prompt or turn-context code that exposes specific installed plugins, plugin providers, plugin-owned config keys, plugin-owned default targets, or plugin-specific workflows outside the dynamic skill/tool surfaces. +5. Skill instructions that override generic harness behavior without a domain-specific reason. +6. Static prompt tests that assert wording instead of behavior. ## Observability diff --git a/specs/index.md b/specs/index.md index 9c3b8c4a..fa1ea45b 100644 --- a/specs/index.md +++ b/specs/index.md @@ -3,7 +3,7 @@ ## Metadata - Created: 2026-03-03 -- Last Edited: 2026-05-13 +- Last Edited: 2026-05-26 ## Changelog @@ -17,6 +17,8 @@ - 2026-04-28: Added canonical agent prompt spec. - 2026-05-06: Added draft advisor tool spec. - 2026-05-13: Added ownership map for chat, agent session, and Slack delivery specs. +- 2026-05-18: Added draft scheduler spec for scheduled Junior tasks. +- 2026-05-26: Added draft trusted plugin heartbeat spec for scheduler packaging. ## Status @@ -82,6 +84,8 @@ For chat/agent/Slack turn behavior: ## Draft Specs - `specs/advisor-tool-spec.md` +- `specs/scheduler-spec.md` +- `specs/trusted-plugin-heartbeat-spec.md` ## Archive Policy diff --git a/specs/plugin-spec.md b/specs/plugin-spec.md index dc1edcf6..e96e057a 100644 --- a/specs/plugin-spec.md +++ b/specs/plugin-spec.md @@ -3,7 +3,7 @@ ## Metadata - Created: 2026-03-01 -- Last Edited: 2026-05-20 +- Last Edited: 2026-05-26 ## Changelog @@ -26,6 +26,7 @@ - 2026-05-12: Clarified that credentialed provider HTTP traffic is authenticated through the sandbox egress proxy. - 2026-05-20: Added `PluginConfig` manifests for install-level plugin configuration. - 2026-05-25: Added explicit trusted app plugin registration for deterministic agent behavior at Junior-owned lifecycle boundaries. +- 2026-05-26: Clarified that plugin-specific agent behavior must surface through skills, tools, schemas, and tool guidance, not core prompt/plugin catalog prose. ## Status @@ -65,6 +66,7 @@ Define a plugin model where provider integrations are self-contained manifests t 8. `loadSkill` activates the provider catalog and returns provider/count metadata once the MCP server is connected and `listTools` succeeds. If connection/listing needs MCP OAuth, `loadSkill` initiates the MCP auth pause and the resumed turn re-activates the catalog before the model continues. `searchMcpTools` returns focused descriptors, including input/output schema and annotations, for any available active-provider tool before `callMcpTool` executes it. 9. Runtime setup belongs to `plugin.yaml`: CLI packages, system packages, postinstall commands, MCP endpoints/tool allowlists, credential delivery, command env, OAuth, and provider config keys are manifest declarations, not skill instructions. 10. Skills consume the plugin-provided runtime surface. They must not instruct the agent to install packages, bootstrap CLIs, configure MCP servers, create credentials, or repair sandbox package installation as part of normal workflow. +11. The core prompt must not teach the agent about specific installed plugins, provider names, plugin config keys, default targets, or plugin workflows. Model-visible plugin behavior must arrive through dynamic capability surfaces: skill names/descriptions, loaded skill bodies, native tool descriptions, tool schemas, `promptSnippet`, `promptGuidelines`, and searched MCP tool descriptors. ## Plugin directory structure @@ -480,13 +482,7 @@ All existing functions (`getCapabilityProvider`, `isKnownCapability`, etc.) work for (const plugin of getPluginProviders()) { const { apiHeaders, commandEnv, credentials, name } = plugin.manifest; if (!credentials && !apiHeaders) continue; - brokersByProvider[name] = useTestBroker - ? new TestCredentialBroker({ - provider: name, - // token-backed credentials add domains/env placeholder; header-only - // plugins add header transforms and optional command env. - }) - : createPluginBroker(name, { userTokenStore }); + brokersByProvider[name] = createPluginBroker(name, { userTokenStore }); } ``` @@ -504,9 +500,12 @@ export function getOAuthProviderConfig( The OAuth callback route uses `getOAuthProviderConfig()` instead of accessing `OAUTH_PROVIDERS` directly. -### Test credential override +### Test and eval credentials -`TestCredentialBroker` substitution in eval mode works the same — `factory.ts` checks `EVAL_ENABLE_TEST_CREDENTIALS=1` and substitutes regardless of source. For plugin-level `api-headers`, eval mode injects deterministic dummy header values instead of resolving deployment env vars. Plugin-level `command-env` resolves through the same non-secret command env path as production. +Tests and evals seed credentials through the same stores and plugin env vars used +by production paths. Sandbox HTTP fixtures may intercept credential-injected +requests at the egress proxy boundary, but core broker selection does not switch +to test-only credential behavior. ### Install-wide config defaults @@ -533,6 +532,8 @@ Plugin skills use the same `SKILL.md` format and frontmatter contract as existin ### Skill/runtime boundary +Plugin prompt behavior must be local to the capability that needs it. Plugin-backed skills may describe provider-specific workflows after the skill is loaded. Trusted plugin tools must have concise descriptions and, when needed, tool guidance that tells the agent when to use the tool, what not to target, and which user confirmation or context is required. The host must not add plugin-specific rescue rules to the core prompt to compensate for weak plugin descriptions. + Plugin-backed skills may tell the model how to use available commands, MCP tools, command env, config defaults, and provider-specific query syntax. They may include troubleshooting for unavailable runtime surfaces only as diagnosis and escalation, for example “report that the GitHub plugin runtime dependency is unavailable.” When the runtime loads a plugin-backed skill, it enforces the parent plugin before returning the skill: @@ -593,7 +594,6 @@ All existing security invariants from `security-policy.md` are preserved: | `CredentialBroker` interface and `CredentialLease` type | Shared contract | | `ProviderCredentialRouter` | Generic router | | OAuth callback route (`/api/oauth/callback/[provider]`) | Shared HTTP handler | -| `TestCredentialBroker` | Eval infrastructure, not a plugin | ## Example: adding a new provider (Linear) diff --git a/specs/providers/catalog-spec.md b/specs/providers/catalog-spec.md index 85f59538..c47aa3bd 100644 --- a/specs/providers/catalog-spec.md +++ b/specs/providers/catalog-spec.md @@ -3,13 +3,14 @@ ## Metadata - Created: 2026-02-27 -- Last Edited: 2026-05-06 +- Last Edited: 2026-05-26 ## Changelog - 2026-03-03: Standardized metadata headers and reconciled spec references/structure. - 2026-04-30: Added `github.org` to GitHub provider configKeys. - 2026-05-06: Clarified that provider catalog prompt disclosure belongs in per-turn context, not the static system prompt. +- 2026-05-26: Marked provider catalog prompt disclosure as superseded; core prompt context must not expose installed plugin/provider catalogs. ## Status @@ -22,7 +23,7 @@ Draft — largely superseded by `specs/plugin-spec.md` which now drives the prov ## Purpose -Define the canonical provider catalog model used by runtime, skill validation, and prompts. +Define the historical provider catalog model used by runtime and skill validation. Prompt disclosure rules in this draft are superseded by `../agent-prompt-spec.md` and `../plugin-spec.md`. This spec answers: @@ -103,8 +104,7 @@ target: ## Prompt Contracts -- Per-turn prompt context should include provider catalog summary so natural language requests can map to valid config/capability tokens without changing the static system prompt. -- Prompt guidance must remain generic and provider-extensible. +Superseded. Core prompt assembly must not expose installed provider/plugin catalogs, provider config keys, or default targets as standalone prompt knowledge. Provider-specific behavior should reach the model through dynamic skill/tool surfaces: available skill descriptions, loaded skill bodies, tool descriptions, schemas, tool guidance, and searched MCP descriptors. ## Observability diff --git a/specs/scheduler-spec.md b/specs/scheduler-spec.md new file mode 100644 index 00000000..f18fffdd --- /dev/null +++ b/specs/scheduler-spec.md @@ -0,0 +1,262 @@ +# Scheduler Spec + +## Metadata + +- Created: 2026-05-18 +- Last Edited: 2026-05-27 + +## Changelog + +- 2026-05-27: Added stale missed-run policy: old occurrences are skipped and consumed or advanced, not dispatched late and not blocked for human review. +- 2026-05-27: Added the simple one-off reminder exception to Slack schedule confirmation. +- 2026-05-26: Reframed scheduled execution around system actors: creator is metadata/contact, scheduled runs execute as a system actor, and user-bound auth must not be borrowed implicitly. +- 2026-05-18: Clarified V1 calendar model: exact next-run instants plus simple daily/weekly/monthly/yearly recurrence rules. +- 2026-05-18: Initial draft contract for scheduled Junior tasks, prompt framing, no-SQL storage, run idempotency, and eval-first verification. + +## Status + +Draft + +## Purpose + +Define the first scheduler contract for Junior: users can create durable tasks that Junior executes later or repeatedly, with explicit task framing and delivery back to the configured surface. + +## Scope + +- Scheduled task and scheduled run data model. +- Prompt envelope used when executing a scheduled task. +- Storage and idempotency rules. +- Slack authoring and management behavior. +- Verification layer responsibilities. + +## Non-Goals + +- A generic event-rule engine for GitHub, Slack, Sentry, or webhook events. +- SQL-backed storage as a V1 requirement. +- A full durable workflow runtime such as Temporal or Vercel Workflow. +- Reusing timeout-resume callbacks as the product scheduler. +- Slack `chat.scheduleMessage` as the execution mechanism. + +## Contracts + +### Product Boundary + +A scheduled task is not a stored Slack message. It is a normalized task contract that Junior executes on a time trigger. + +The stored task must include: + +- task title +- objective +- instructions +- expected output +- creator metadata +- execution actor metadata +- destination surface +- schedule and timezone +- current status +- next-run timestamp when active +- recurrence rule when recurring +- optional constraints and source context + +The original user utterance may be retained for audit/debugging, but it must not be the sole execution input. + +Slack destinations are conversations, not existing threads. A scheduled task may target the active Slack DM or channel, and scheduled output posts as a new message in that conversation. + +Creator metadata records the user who confirmed the task so Junior can audit changes and privately notify someone when the task needs attention. The creator is not an owner, is not an authorization principal, and is not the actor for future scheduled runs. + +Task management is controlled only by access to the destination conversation window. If a user can post or trigger Junior in that Slack DM or channel context, they can manage scheduled tasks for that same context. The scheduler must not add creator-only, owner-only, workspace-admin-only, or channel-admin-only gates for V1 management. + +### Calendar Model + +Every active task must have an exact `nextRunAtMs` instant. For one-off tasks, that instant is the complete schedule. +Slack authoring may accept supported relative one-off phrases such as "tomorrow at 9am"; these must be resolved to an exact `nextRunAtMs` before storage. When a user does not provide a timezone, scheduler authoring defaults to `America/Los_Angeles` unless `JUNIOR_TIMEZONE` overrides it. + +Recurring tasks must also store a small calendar recurrence rule: + +- frequency: `daily`, `weekly`, `monthly`, or `yearly` +- positive interval +- local start date +- local time +- timezone +- optional weekly weekdays +- optional monthly/yearly exact day-of-month and month + +V1 recurrence is calendar-based, not fixed-duration. For example, "every Monday at 9am America/Los_Angeles" should continue to run at 9am local time across daylight-saving changes. Monthly and yearly recurrences use exact calendar dates; unsupported dates are skipped rather than converted into "last day" or "business day" behavior. + +The scheduler does not need advanced rules such as first business day, nearest weekday, holiday calendars, or arbitrary cron syntax. + +Run-now has a separate contract: + +1. Run-now applies only to active tasks. +2. Run-now must not implicitly resume paused or blocked tasks. +3. Run-now must not rewrite the task's stored calendar schedule. +4. A task may store a separate immediate-run timestamp. +5. When both the immediate-run timestamp and ordinary `nextRunAtMs` are due, the scheduler claims the immediate run first. +6. After the manual run reaches a terminal state, clear the immediate-run timestamp. +7. If the ordinary `nextRunAtMs` was already overdue when the manual run completed, consume that scheduled occurrence and advance recurrence once instead of running the same task twice in one tick. + +### Missed Run Policy + +The scheduler must not execute arbitrarily old work just because heartbeat delivery or dispatch recovery was broken. At claim time, any scheduled occurrence more than 24 hours older than the scheduler's current clock is stale. + +Stale occurrences are terminal skipped runs: + +1. The scheduler records a run for the missed `task_id:scheduled_for_ms` with `status: skipped`. +2. The scheduler must not dispatch the agent for that occurrence. +3. A skipped stale occurrence does not update `lastRunAtMs`, because no task execution happened. +4. A one-off stale occurrence is consumed: the task becomes `paused` with no `nextRunAtMs`. +5. A recurring stale occurrence is consumed and the task advances directly to the next future recurrence. The scheduler must not run catch-up loops for every missed recurrence. +6. A stale run-now request is cleared without shifting the stored ordinary schedule. +7. During stale recovery, an equivalent newer active task in the same destination should be skipped and paused when an older active task with the same schedule and task contract remains canonical. +8. Staleness is not a blocked or missing-input state and must not require human review. A user can still run the task manually if the missed work is still useful. + +### Prompt Framing + +Every scheduled run must compile the stored task into a marker-delimited prompt before entering the agent runtime. + +The prompt must make these facts explicit: + +1. This is an autonomous scheduled run. +2. The task contract is the source of truth for what to execute. +3. The run executes as a Junior system actor, not as the user who created the task. +4. The run should complete without asking follow-up questions unless access, approval, or required input is missing. +5. If blocked, the result should identify the missing provider, permission, or input. + +The compiled prompt must separate descriptive task facts from directives. Use marker blocks such as: + +- `` +- `` +- `` +- `` +- `` + +This follows the router and turn-context pattern: background and state live in descriptive blocks, while behavior rules live in a rules block and the actual ask appears last. + +### Storage + +V1 must not require SQL. The scheduler store should use the existing durable state dependency already required by Junior deployments. + +The initial implementation may use the Chat SDK state adapter and a global task index: + +- `junior:scheduler:task:{task_id}` stores the task record. +- `junior:scheduler:tasks` stores task ids for due scans. +- `junior:scheduler:team:{team_id}:tasks` stores task ids for workspace management. +- `junior:scheduler:run:{run_id}` stores run history. +- `junior:scheduler:active:{task_id}` stores the currently active run marker for task-level overlap prevention. +- `junior:scheduler:claim:{task_id}:{scheduled_for_ms}` is the idempotency claim. + +A future Redis-native store may replace the scan index with a sorted due index without changing the runtime-facing scheduler store interface. + +### Run Idempotency + +Scheduled execution is at-least-once at the trigger layer and exactly-once-best-effort at Junior's run layer. + +Rules: + +1. A run idempotency key is `task_id:scheduled_for_ms`. +2. The scheduler must claim that key before dispatch. +3. Duplicate ticks and retries must not dispatch the same scheduled run more than once. +4. Run side effects must be keyed by the scheduled run id where possible. +5. V1 tasks do not overlap with themselves. If a task already has an active run, later due claims for that same task are not dispatched. +6. Stale pending claims may be reclaimed after the scheduler's stale-claim timeout. + +### Actor And Auth Model + +Scheduled tasks must distinguish these V1 identities: + +- **Creator:** the human who confirmed the task. This is audit and notification metadata only. +- **Conversation manager:** any user who can post or trigger Junior in the destination Slack conversation window. This controls who may list, pause, resume, delete, or run-now the task for that same conversation. +- **Execution actor:** the actor used for the autonomous scheduled run. For scheduled tasks, this is a Junior system actor, not a Slack user. + +Scheduled runs must not pass the creator as the runtime requester or treat the creator as if they were present and acting during the run. Audit and correlation metadata should include both the system execution actor and creator metadata, but auth decisions must use the execution actor. + +V1 scheduled execution has no user requester. User OAuth tokens cannot be used merely because that user created the task. Authorization flows are disabled during scheduled runs, and authorization links must not be posted publicly. If no usable non-user credential exists, Junior must block the run and privately notify the creator when possible. + +Future actor-aware auth may add an explicit credential subject: an account, grant, or service principal whose provider credentials may be used by scheduled tools. Future credential subjects may include: + +- system-owned credentials available to the scheduled-run actor +- an explicitly recorded delegated credential grant in the task contract +- a supported service principal named by the task contract + +Those future credential subjects must be explicit and separate from creator metadata. Until that support exists, scheduled runs may use only credentials already available to the system execution actor. + +### Implementation Plan + +1. Introduce a small actor contract shared by runtime, scheduler, and auth boundaries. It should represent user actors, system actors, and future service actors without leaking Slack SDK types. +2. Keep `createdBy` as creator metadata and add an execution actor field to scheduled tasks. New scheduled tasks should default to a system actor such as `scheduled-task`; existing tasks may be read with that default until migrated. +3. Update the scheduled runner to enter the agent runtime with the system actor and no user requester. Creator details may remain in run context and notification metadata, but not in the actor slot. +4. Update auth and credential resolution so V1 scheduled runs cannot use requester-scoped OAuth or start interactive auth flows. Missing non-user credentials should produce a blocked run plus private notification. +5. Update telemetry, tests, and eval fixtures so scheduled execution assertions refer to creator metadata and execution actor separately. + +### Slack UX + +Slack authoring is confirm-first for recurring schedules and non-reminder scheduled work: + +1. User asks Junior to schedule work. +2. Junior drafts the normalized task: title, objective, instructions, expected output, cadence, timezone, destination, and next run. +3. User confirms before the task becomes active. +4. Junior creates the task only after confirmation and replies with the task id, destination, schedule, timezone, and next run. +5. Junior supports list, pause, resume, delete, and run-now commands from the destination conversation. + +Confirmation should show the executable task contract, not only echo the user's text. +Explicit simple one-off reminder requests, such as "remind me in 10 minutes to stretch", may be created immediately without a second confirmation when the request targets the active Slack destination and has no recurrence, extra constraints, or source context. +Anyone who can post or trigger Junior in the destination Slack conversation window may manage that conversation's scheduled tasks. Creator identity remains audit and notification metadata, but it is not an edit/delete/run-now ownership gate and is not the execution actor. +Task creation must use the current active Slack conversation as the destination. Users cannot create scheduled tasks for a different channel, and cannot create DMs for other users. +List output must be scoped to the active destination conversation and must not reveal tasks from other channels or DMs in the same workspace. +Blocked tasks must appear in list output with their blocked reason. After the missing requirement is fixed, a conversation manager can resume the task or run it now from the same destination conversation. + +## Failure Model + +1. Tick delivery fails: the task remains due and a later tick may claim it. +2. Duplicate tick delivery: the run claim suppresses duplicate dispatch. +3. Run fails after claim: run record captures failure and retry policy decides whether to re-dispatch. +4. Required non-user credentials are missing: mark the run blocked, keep or pause the task according to policy, and privately notify the creator when possible. +5. A task remains due for more than 24 hours: mark that occurrence skipped, then consume or advance the task according to the missed-run policy. +6. Prompt framing is ambiguous: evals must catch cases where the model creates/edits a schedule instead of executing the task. + +## Observability + +Scheduler execution should emit safe task/run metadata only: + +- task id +- run id +- scheduled timestamp +- task status +- run status +- destination platform and channel id +- execution actor type and id +- creator Slack user id, when available + +Logs and spans must not include OAuth tokens, provider credentials, raw authorization URLs, or private tool payloads. + +## Verification + +Use evals for model-dependent behavior: + +- natural-language schedule extraction +- task framing quality +- confirmation quality +- scheduled-run execution behavior +- not confusing scheduled execution with schedule creation + +Use integration tests for runtime/storage contracts that do not depend on model interpretation: + +- due claim idempotency +- stale one-off, recurring, and run-now occurrences skip without dispatch +- stale recovery dedupes equivalent active tasks in the same destination +- blocked auth path for missing non-user credentials +- scheduled runner passes a system actor rather than the creator as requester +- user OAuth tokens are not used implicitly for scheduled tasks +- dispatch to Slack delivery +- destination-scoped list output +- conversation-access management for pause, resume, delete, and run-now + +Use unit tests only for small deterministic helpers when integration or eval coverage would be wasteful. + +## Related Specs + +- `./chat-architecture-spec.md` +- `./agent-prompt-spec.md` +- `./agent-session-resumability-spec.md` +- `./slack-agent-delivery-spec.md` +- `./testing/index.md` diff --git a/specs/skill-capabilities-spec.md b/specs/skill-capabilities-spec.md index 1d27da96..db9cafaf 100644 --- a/specs/skill-capabilities-spec.md +++ b/specs/skill-capabilities-spec.md @@ -37,7 +37,7 @@ Define how Junior maps registered plugin provider domains to host-managed creden 2. Skills do not declare capabilities or config keys. 3. Registered providers are always available to sandbox commands. 4. The agent runs the real provider command. -5. The runtime resolves the provider from the outgoing request host, lazily issues a requester-bound provider lease, and injects credentials for that forwarded request. +5. The runtime resolves the provider from the outgoing request host, lazily issues a requester-bound provider lease, and applies credential headers to that forwarded request. 6. If auth is missing or stale, the proxy returns a command-readable auth-required response and the command failure path starts a private OAuth flow, then resumes the paused turn after authorization. 7. Plugin manifests own runtime setup. Skills do not instruct the agent to install packages, bootstrap CLIs, configure provider credentials, command env, or MCP servers. diff --git a/specs/testing/index.md b/specs/testing/index.md index f24d7256..eb259e41 100644 --- a/specs/testing/index.md +++ b/specs/testing/index.md @@ -7,6 +7,7 @@ ## Changelog +- 2026-05-27: Added fail-closed external HTTP isolation for tests/evals with explicit local, model, and sandbox control-plane exceptions. - 2026-04-21: Replaced the old layer ordering with a simpler decision rule: integration by default for product/runtime changes, evals as the integration-style layer for agent behavior, unit only for local deterministic logic. - 2026-03-03: Standardized metadata headers and reconciled spec references/structure. - 2026-03-04: Updated test fixture path references to repo-root paths under `packages/junior/`. @@ -53,17 +54,18 @@ Do not default to unit tests for runtime behavior just because they are easier t Layer selection is mandatory: classify the test contract first and choose `unit` vs `integration` vs `eval` before writing assertions. 1. Tests must be deterministic and isolated. -2. Slack network access is blocked in tests; use MSW fixtures for Slack HTTP. -3. Use centralized fixtures/factories (`packages/junior/tests/fixtures/slack/*`) over ad-hoc payload literals when available. -4. Prefer asserting user-visible behavior and external contracts over implementation details. -5. Keep test names descriptive of outcomes, not implementation mechanics. -6. Do not over-test: cover representative, high-risk scenarios for each contract, not every theoretical permutation. -7. Prefer one focused assertion path per behavior contract; add more cases only when they validate a distinct failure mode. -8. Workflow behavior integration tests should execute real runtime paths and only substitute deterministic fake agent output at the agent boundary. -9. Do not assert internal observability emission (`logInfo`, `logWarn`, spans, trace attributes) in behavior tests unless instrumentation output is itself the contract under test. -10. Do not assert prompt prose by checking that a string is present in a generated prompt. Prompt wording is not a stable contract; validate the resulting behavior in evals or integration tests instead. -11. If Slack API call shape or ordering is the external contract under test, keep those assertions in dedicated transport-contract integration suites; general behavior files should stay scenario-readable. -12. Prefer real in-memory adapters, fixtures, and harnesses over bespoke fake stores when the contract crosses module boundaries. +2. External HTTP is blocked by default in tests and evals; use MSW or the shared HTTP interceptor fixtures. Local URLs, model endpoints, and Vercel sandbox/OIDC control-plane traffic are the only live exceptions. +3. Slack network access is blocked in tests; use MSW fixtures for Slack HTTP. +4. Use centralized fixtures/factories (`packages/junior/tests/fixtures/slack/*`) over ad-hoc payload literals when available. +5. Prefer asserting user-visible behavior and external contracts over implementation details. +6. Keep test names descriptive of outcomes, not implementation mechanics. +7. Do not over-test: cover representative, high-risk scenarios for each contract, not every theoretical permutation. +8. Prefer one focused assertion path per behavior contract; add more cases only when they validate a distinct failure mode. +9. Workflow behavior integration tests should execute real runtime paths and only substitute deterministic fake agent output at the agent boundary. +10. Do not assert internal observability emission (`logInfo`, `logWarn`, spans, trace attributes) in behavior tests unless instrumentation output is itself the contract under test. +11. Do not assert prompt prose by checking that a string is present in a generated prompt. Prompt wording is not a stable contract; validate the resulting behavior in evals or integration tests instead. +12. If Slack API call shape or ordering is the external contract under test, keep those assertions in dedicated transport-contract integration suites; general behavior files should stay scenario-readable. +13. Prefer real in-memory adapters, fixtures, and harnesses over bespoke fake stores when the contract crosses module boundaries. ## Coverage Budget (Avoid Over-Testing) diff --git a/specs/trusted-plugin-heartbeat-spec.md b/specs/trusted-plugin-heartbeat-spec.md new file mode 100644 index 00000000..6c8f4d8e --- /dev/null +++ b/specs/trusted-plugin-heartbeat-spec.md @@ -0,0 +1,677 @@ +# Trusted Plugin Heartbeat Spec + +## Metadata + +- Created: 2026-05-26 +- Last Edited: 2026-05-26 + +## Changelog + +- 2026-05-26: Clarified that trusted plugin tools own their model-facing descriptions/guidance and must not require plugin-specific core prompt rules. +- 2026-05-26: Clarified heartbeat recovery budgets, dispatch callback path, retention constant, lease semantics, destination shape, and lookup verification. +- 2026-05-26: Defined dispatch lookup retention and scheduler-owned terminal run history. +- 2026-05-26: Added dispatch recovery, result lookup, serverless slice, lock ordering, and system-actor security invariants. +- 2026-05-26: Specified dispatched agent request runner, continuation behavior, and cleaner JavaScript API names. +- 2026-05-26: Initial draft for trusted plugin heartbeat and agent dispatch. + +## Status + +Draft + +## Purpose + +Define the minimal trusted-plugin runtime surface needed to move scheduler behavior out of Junior core without exposing raw routes, platform internals, Slack clients, or agent execution internals to plugins. + +The motivating consumer is a scheduler plugin that lets users create scheduled tasks, then uses a core-owned serverless heartbeat and agent dispatch primitive to execute due work later. + +## Scope + +- Trusted plugin heartbeat hook. +- Trusted plugin tool registration hook. +- Core-owned internal heartbeat endpoint. +- Core-owned durable agent dispatch primitive. +- Serverless continuation model for plugin-claimed work. +- Scheduler-as-plugin migration boundary. + +## Non-Goals + +- Manifest-only scheduler plugins. +- Plugin-defined routes. +- Per-plugin heartbeat URLs. +- Plugin-owned Vercel or deployment adapter behavior. +- Generic durable queue infrastructure. +- Arbitrary cron schedules per plugin. +- Raw Slack Web API access from plugins. +- Raw agent runtime or `generateAssistantReply` access from plugins. +- Raw state adapter or Redis access from plugins. + +## Contracts + +### Trust Boundary + +Heartbeat and agent dispatch are trusted plugin capabilities. They are available only to Junior-owned built-in trusted plugins and plugins explicitly passed to `createApp({ plugins: [...] })` as trusted runtime plugins. + +Declarative `plugin.yaml` manifests must not register heartbeat handlers, internal routes, or agent dispatch behavior. + +Core owns: + +- route registration +- internal route authentication +- deployment cron configuration +- trusted plugin lookup +- plugin state namespaces +- serverless continuation callbacks +- agent execution +- Slack delivery +- auth mode enforcement +- logging and redaction + +Plugins own only their domain logic: tools, heartbeat work discovery, durable plugin state records, and the inputs they ask core to dispatch. + +### Interactive Tool Registration + +Trusted plugins may register turn-scoped tools through a narrow hook: + +```ts +interface TrustedPluginHooks { + tools?(ctx: ToolRegistrationContext): Record; +} +``` + +`ToolRegistrationContext` exposes only the current turn context needed to +decide whether tools are available: + +- active conversation destination, when present +- requester, when present +- channel/team identifiers, when present +- thread/message timestamps, when present +- namespaced plugin state +- current user text +- schedule-tool suppression for system dispatches + +Tools returned by this hook participate in the normal tool pipeline: schema +validation, tool guidance, tracing, and plugin `beforeToolExecute` hooks. + +Each returned tool must carry a concise model-facing description that explains +what the tool does and when it should be used. If correct use requires policy +that is specific to the plugin domain, such as destination scoping, confirmation +requirements, or recurrence semantics, that guidance belongs on the tool via +its description, schema descriptions, `promptSnippet`, or `promptGuidelines`. +Core prompt rules must stay plugin-agnostic and must not name scheduler tools or +any other specific plugin tool. + +The built-in scheduler plugin uses this hook to register create/list/update/ +delete/run-now tools only when the active Slack conversation has enough context +to manage scheduled tasks. + +### Core Heartbeat Endpoint + +Core exposes one internal heartbeat endpoint: + +```txt +GET /api/internal/heartbeat +``` + +The endpoint is core-owned and deployment-owned. Plugins must not register heartbeat routes, choose heartbeat URLs, or receive the raw `Request`. + +Core responsibilities: + +1. Verify the request with the configured internal heartbeat secret. +2. Re-drive stale core dispatches within a bounded core recovery budget. +3. Enumerate trusted plugin heartbeat handlers. +4. Invoke handlers with a bounded `HeartbeatContext`. +5. Enforce a small per-handler and total plugin heartbeat budget. +6. Log core recovery and per-plugin outcomes. +7. Return a generic response that does not expose installed plugin details unnecessarily. + +V1 uses one platform cron entry for this endpoint. The endpoint is a pulse, not a job runner. + +### Heartbeat Hook + +Trusted plugins may implement: + +```ts +interface TrustedPluginHooks { + heartbeat?(ctx: HeartbeatContext): Promise; +} +``` + +Heartbeat semantics: + +- Serverless-triggered. +- Best effort. +- May run late. +- May be skipped. +- May run concurrently with another heartbeat invocation. +- May run more than once for the same wall-clock minute. +- Must be idempotent. +- Must process bounded work. +- Must persist progress in durable state. +- Must not rely on memory, timers, or process lifetime. + +Core does not guarantee every heartbeat handler runs on every pulse. Durable state and idempotent claiming are the reliability boundary. + +### Heartbeat Context + +`HeartbeatContext` should stay minimal: + +```ts +interface HeartbeatContext { + nowMs: number; + state: NamespacedState; + agent: { + get(id: string): Promise; + dispatch(options: DispatchOptions): Promise; + }; + log: PluginLogger; +} +``` + +Do not expose `waitUntil` to trusted plugins in V1. Core may use platform lifetime extension internally, but plugin handlers should be written as bounded request handlers. + +### Agent Dispatch + +Trusted plugins may ask core to fire off an agent request: + +```ts +const result = await ctx.agent.dispatch({ + idempotencyKey: run.id, + destination: { + platform: "slack", + teamId: task.destination.teamId, + channelId: task.destination.channelId, + }, + input: buildScheduledTaskRunPrompt({ task, run, nowMs }), + metadata: { + taskId: task.id, + runId: run.id, + }, +}); +``` + +The argument shape is: + +```ts +type DispatchOptions = { + idempotencyKey: string; + destination: { + platform: "slack"; + teamId: string; + channelId: string; + }; + input: string; + metadata?: Record; +}; +``` + +The return value is: + +```ts +type DispatchResult = { + id: string; + status: "created" | "already_exists"; +}; +``` + +Plugins may read the current state of a dispatch they created: + +```ts +const dispatch = await ctx.agent.get(dispatchId); +``` + +The lookup return value is: + +```ts +type Dispatch = { + id: string; + status: + | "pending" + | "running" + | "awaiting_resume" + | "completed" + | "failed" + | "blocked"; + resultMessageTs?: string; + errorMessage?: string; +}; +``` + +This is the only plugin-facing agent execution API for V1. Plugins do not call `runSystemTurn`, `generateAssistantReply`, Slack runner helpers, thread-state helpers, or delivery helpers. + +If exported types are needed, prefer short JavaScript-facing names like `DispatchOptions`, `DispatchResult`, and `Dispatch`. + +Core derives and enforces: + +- system actor identity from the plugin name +- auth mode from the system actor +- no requester for system actors +- disabled interactive auth for system actors +- conversation state identity from destination +- delivery behavior from destination +- internal callback scheduling +- timeout continuation behavior +- sandbox state persistence +- tool availability policy +- tracing, logging, and redaction + +`idempotencyKey` is required. Calling `agent.dispatch` with the same plugin and idempotency key must not create two dispatch records. + +V1 dispatch constraints: + +- `destination.platform` must be `"slack"`. +- The destination must be a Slack public channel, private channel, or existing DM channel that the bot can post to. +- The destination must not be an existing Slack thread. +- The destination uses a Slack channel id; it must not use or accept a user id. +- The dispatch input is plain text. +- Metadata is for correlation only and must not affect authorization. +- Dispatch input is inserted as user-role synthetic conversation content. +- The core-owned system actor controls execution identity, audit, and auth policy; it does not make `input` privileged system or developer instructions. +- System dispatches have no requester, no user OAuth token access, and no interactive auth continuation. +- Schedule-management tools are unavailable during system dispatches. +- App or bot credential tools may run only when their existing policy allows system actor use. + +### Internal Agent Invocation + +`agent.dispatch` persists a core-owned dispatch record, then fires a signed internal serverless callback. The callback is the execution unit. + +Core exposes one internal dispatch callback endpoint: + +```txt +POST /api/internal/agent-dispatch +``` + +The endpoint is core-owned. Plugins must not register dispatch routes, choose dispatch callback URLs, or receive the raw callback `Request`. + +Core should use the same state/serverless paradigm as existing turn continuation: + +1. Persist dispatch metadata and expected version in durable state. +2. Sign an internal callback using the core internal secret. +3. POST the callback to a core-owned internal endpoint. +4. The endpoint verifies the signature and timestamp. +5. The endpoint loads the durable dispatch record. +6. The endpoint transitions the dispatch under the dispatch lock before running it. +7. The endpoint runs the dispatched agent request and persists the result. + +The callback body should contain only a small core envelope, such as dispatch id and expected version. The prompt, destination, actor, and metadata live in durable state. + +Heartbeat auth and dispatch callback auth are separate: + +- `/api/internal/heartbeat` uses bearer cron auth, using `JUNIOR_SCHEDULER_SECRET` or `CRON_SECRET`. +- Dispatch callbacks use HMAC body signing with timestamp skew checks and `JUNIOR_SECRET`, matching the existing timeout-resume callback model. + +### Dispatch State + +Core dispatch state is separate from plugin state. The scheduler plugin records that a run was dispatched; core records whether the dispatched agent request actually ran and delivered output. + +Plugin state is namespaced by core using collision-resistant internal keys. +Plugin-visible keys must be non-empty and bounded. Plugins do not receive raw +Redis keys, raw state adapter handles, or another plugin's namespace. + +Minimal dispatch record: + +```ts +type DispatchRecord = { + id: string; + plugin: string; + idempotencyKey: string; + status: + | "pending" + | "running" + | "awaiting_resume" + | "completed" + | "failed" + | "blocked"; + version: number; + actor: { + type: "system"; + id: string; + }; + destination: { + platform: "slack"; + teamId: string; + channelId: string; + }; + input: string; + metadata?: Record; + createdAtMs: number; + attempt: number; + maxAttempts: number; + leaseExpiresAtMs?: number; + resumeCheckpointVersion?: number; + lastCallbackAtMs?: number; + updatedAtMs: number; + resultMessageTs?: string; + errorMessage?: string; +}; +``` + +Plugin-visible `Dispatch` is a projection of this record, not the full stored value. + +The dispatch id should be deterministic from plugin name and idempotency key. Duplicate `dispatch(...)` calls return the existing dispatch id and may re-fire the internal callback only when the existing record is incomplete. + +`ctx.agent.get(id)` returns only dispatches owned by the calling trusted plugin. It does not expose prompt text, destination details, actor details, metadata, conversation state, tool calls, model messages, logs, or credentials. + +Dispatch records use `THREAD_STATE_TTL_MS`, the same retention window as thread/checkpoint state. `ctx.agent.get(id)` is a short-to-medium-term reconciliation API, not permanent run history. After the retention window expires, `ctx.agent.get(id)` returns `undefined`. + +The scheduler plugin owns durable task and run history in its namespaced state. When it observes a terminal dispatch through `ctx.agent.get(id)`, it copies the terminal status, result timestamp, and error summary onto the scheduler run record. The scheduler must not depend on core dispatch records remaining readable forever. + +### Dispatch Recovery + +Core owns recovery for incomplete dispatches. Plugins do not need to understand callback delivery or platform lifetime failures. + +The heartbeat endpoint performs two bounded phases: + +1. Re-drive stale core dispatches within a bounded core recovery budget. +2. Invoke trusted plugin `heartbeat(ctx)` handlers within a separate bounded plugin budget. + +Core recovery must not starve when plugin heartbeat handlers are slow or failing. Plugin heartbeat work must not starve because core recovery found a large backlog; unfinished recovery remains durable for a later heartbeat. + +Core may re-fire a signed dispatch callback when a dispatch is incomplete and stale: + +- `pending` with no recent callback attempt +- `running` with an expired lease +- `awaiting_resume` with an expired lease or missing callback attempt + +Core must not re-fire terminal dispatches: + +- `completed` +- `failed` +- `blocked` + +Recovery is bounded by attempt count, max dispatch age, max continuation slices, and the dispatch retention window. A dispatch that exceeds retry bounds is marked `failed`. A dispatch that ages out of retained core state is no longer recoverable by core. + +### Serverless Slice Model + +Each dispatch callback owns one bounded execution slice. + +Callback route behavior: + +1. Verify HMAC signature and timestamp. +2. Parse the small callback envelope. +3. Register the dispatch work with platform `waitUntil`. +4. Return `202 Accepted`. + +Slice behavior: + +1. Load and claim the dispatch. +2. Run one generation and delivery attempt. +3. If the agent times out at a resumable boundary, persist the checkpoint, mark the dispatch `awaiting_resume`, and schedule another signed dispatch callback. +4. If the dispatch reaches the slice cap, mark it `failed`. + +The route must not rely on process memory, timers, or a long-lived worker after the platform request lifetime ends. The only in-process lifetime extension is the platform `waitUntil` task for the current callback. + +### Locking And State Transitions + +Dispatch mutation uses locks available from the existing state adapter. The implementation must not require a general compare-and-set primitive. + +Lock classes: + +- `dispatch:` protects dispatch status, version, attempts, and leases. +- destination conversation lock protects conversation, artifact, sandbox, and delivery state. + +Lock order is always: + +1. dispatch lock +2. destination conversation lock + +Code must not acquire those locks in the reverse order. Stale recovery uses durable status, version, attempt, and lease fields rather than process memory. + +Dispatch leases are not renewed during a slice in V1. The lease duration must exceed the maximum callback slice budget plus platform scheduling slack. A retry may claim an expired lease only after verifying the dispatch is still non-terminal. + +### Dispatched Agent Runner + +The internal callback runs a core-owned dispatched agent runner. This runner is the durable execution boundary for `ctx.agent.dispatch`. + +The runner owns: + +- loading the dispatch record +- acquiring the destination conversation lock +- loading persisted conversation, artifact, sandbox, and channel configuration state +- creating or reusing the synthetic system-authored conversation message for the dispatch +- building conversation context +- calling `generateAssistantReply` +- delivering the reply to the destination +- committing conversation, artifact, sandbox, and dispatch state +- marking auth-required runs as blocked +- scheduling continuation when the agent times out at a resumable boundary + +Plugins never see this runner or its dependencies. + +The runner should generalize the current scheduled Slack runner behavior instead of exposing that runner as plugin API. It should keep the same delivery success rule: a dispatch is not complete until the visible destination post has been accepted and completion state has been persisted. + +### Delivery Idempotency + +Dispatch callbacks are at-least-once. Visible delivery should be best-effort exactly once. + +The runner must use stable synthetic message ids: + +- `dispatch:${dispatch.id}:user` +- `dispatch:${dispatch.id}:assistant` + +Before posting, the runner checks persisted conversation state for the assistant message id. If it already has `meta.replied === true` and `meta.slackTs`, the runner marks the dispatch `completed` with that Slack timestamp and does not post again. + +Slack post and state commit are not atomic. If Slack accepts the post but persisting completion state fails, the dispatch is marked failed when possible with a delivery-commit error. A retry must check persisted conversation state before posting again, but the system only guarantees best-effort duplicate suppression for this post-then-commit failure window. + +### Dispatch Continuation + +Dispatched agent requests must not use the existing Slack turn-resume route directly. The current turn-resume path reconstructs an interactive Slack thread turn and requires a persisted user-authored message. System dispatches have no requester and target a DM or channel, not an existing thread. + +Timeout continuation for dispatched requests uses the dispatch callback path: + +1. `generateAssistantReply` persists a resumable turn checkpoint for the dispatch conversation and turn id. +2. The runner catches `turn_timeout_resume`. +3. The runner marks the dispatch `awaiting_resume` with the next checkpoint version. +4. The runner signs and posts another dispatch callback for the same dispatch id. +5. The next callback verifies the dispatch is still `awaiting_resume` at the expected version. +6. The runner resumes `generateAssistantReply` with the same dispatch input, conversation id, turn id, actor, destination, and persisted Pi messages. +7. The final callback delivers once, commits final state, and marks the dispatch `completed`, `failed`, or `blocked`. + +This keeps scheduled invocations aligned with the existing serverless execution model without treating them as interactive Slack turns. + +Dispatch continuation invariants: + +1. A dispatch has one stable conversation id and one stable turn id. +2. The turn id is derived from the dispatch id. +3. Duplicate callbacks must not run the same dispatch concurrently. +4. Duplicate callbacks must not deliver the same assistant output twice. +5. Timeout continuation must preserve cumulative usage and duration through the existing turn checkpoint state. +6. Auth continuation is disabled for system actors; auth-required outcomes become blocked results. + +### Dispatch Limits + +Core enforces reliability limits even for trusted plugin code: + +- maximum dispatch calls per heartbeat context +- maximum dispatch input length +- maximum metadata keys and bytes +- maximum concurrent dispatches per destination +- maximum retry attempts +- maximum dispatch age +- maximum continuation slices + +### Scheduler Plugin Flow + +The scheduler plugin uses two trusted hooks: + +1. `tools(ctx)` for interactive schedule management. +2. `heartbeat(ctx)` for due-run discovery and dispatch. + +Heartbeat flow: + +1. Load due tasks from the scheduler plugin's namespaced state. +2. Reconcile previously dispatched runs with `ctx.agent.get(dispatchId)`. +3. Claim up to a small limit of due runs. +4. Mark each claimed run as pending dispatch. +5. Call `ctx.agent.dispatch(...)` once per claimed run. +6. Store the returned dispatch id on the run record. +7. Leave remaining due work for a future heartbeat. + +The scheduler heartbeat must not execute scheduled tasks inline. It only claims and dispatches bounded work. + +If `ctx.agent.get(dispatchId)` returns `undefined` for a non-terminal scheduler run, the scheduler treats the core dispatch record as expired or missing. The scheduler may mark the run failed with an expiration error, or reclaim and redispatch only when its own run policy says that is safe. The scheduler must eventually transition the run to a terminal state or create a new redispatch attempt; it must not leave the original run non-terminal forever after core dispatch state expires. + +Dispatch call for a scheduled run: + +```ts +await ctx.agent.dispatch({ + idempotencyKey: run.id, + destination: task.destination, + input: buildScheduledTaskRunPrompt({ task, run, nowMs }), + metadata: { + taskId: task.id, + runId: run.id, + }, +}); +``` + +### Scheduler Run State + +The scheduler plugin should make dispatch state explicit enough to recover from partial failures: + +- due task +- claimed run +- pending dispatch +- dispatched +- running +- completed +- failed +- blocked +- skipped + +Required invariants: + +1. Heartbeat claims a due run before dispatch. +2. Dispatch success records the core dispatch id. +3. Duplicate dispatch attempts use the same idempotency key. +4. Duplicate internal callbacks do not execute the same run twice. +5. Stale pending-dispatch records are reclaimable by a later heartbeat. +6. Stale running records are reclaimable according to scheduler policy. +7. Scheduler tools derive destination from the active conversation context. +8. Users cannot create scheduled DMs for other users. +9. Existing Slack threads are never stored as task destinations. + +### Core Capability Boundaries + +Core must not expose these to plugins: + +- raw Slack tokens +- Slack Web API clients +- raw HTTP requests for internal routes +- route registration +- Vercel config mutation +- raw Redis clients +- unrestricted state adapter access +- unrestricted agent runtime functions +- user OAuth tokens for system actor dispatches + +Core may expose narrow capabilities: + +- namespaced state +- plugin logger +- active turn context for tool registration +- `agent.dispatch` +- `agent.get` + +## Failure Model + +### Heartbeat Missed Or Late + +No correctness failure. The next heartbeat can claim still-due work from durable state. + +### Duplicate Heartbeat + +Plugin state claiming and `agent.dispatch` idempotency suppress duplicate execution. + +### Heartbeat Budget Exhausted + +Core stops invoking additional handlers or the current handler times out. Plugins must leave unfinished work in durable state for a later heartbeat. + +### Dispatch Call Fails + +The plugin keeps the run in pending-dispatch or claimed state without a dispatch id. A later heartbeat may reclaim and retry dispatch after a stale timeout. + +### Dispatch Succeeds But Callback Does Not Complete + +The core dispatch record remains durable. A later heartbeat or future continuation mechanism may observe the incomplete dispatch and decide whether to retry according to core dispatch policy. + +### Dispatch Blocks For Auth + +System actor dispatches must not start interactive auth. Core returns or persists a blocked result. The scheduler plugin marks the scheduled run blocked and privately notifies the creator when possible through core-owned delivery behavior. + +### Plugin Throws + +Core logs the plugin heartbeat/tool error with plugin name and safe metadata. One plugin failure must not expose secrets or raw payloads, and must not grant that plugin broader capabilities. + +## Observability + +Core heartbeat logs should include: + +- heartbeat invocation id +- trusted plugin name +- handler kind +- duration +- outcome +- dispatch count, when reported +- error class/message, when safe + +Agent dispatch logs should include: + +- dispatch id +- plugin name +- idempotency key +- actor type and id +- destination platform and conversation id +- plugin metadata keys safe for logs +- outcome + +Dispatch recovery logs should include: + +- stale dispatch re-driven by heartbeat +- dispatch retry bound exceeded +- dispatch expired before completion +- `ctx.agent.get(id)` miss for missing or expired dispatch state + +Logs and spans must not include OAuth tokens, provider credentials, raw authorization URLs, Slack tokens, or private tool payloads. + +## Verification + +Use integration tests for: + +- heartbeat endpoint authentication +- trusted plugin heartbeat invocation +- heartbeat best-effort isolation when one plugin fails +- namespaced state access +- `agent.dispatch` idempotency +- `agent.get` returns the caller plugin's dispatch projection +- `agent.get` does not return another plugin's dispatch +- `agent.get` returns `undefined` after dispatch retention expiry +- `agent.get` omits prompt, destination, actor, metadata, conversation state, tool calls, model messages, logs, and credentials +- internal callback signature verification +- scheduler heartbeat claims due runs but does not execute inline +- scheduler heartbeat dispatches one request per claimed run +- duplicate heartbeat does not duplicate dispatch records +- stale pending-dispatch run is reclaimable +- stale core dispatch recovery is bounded separately from plugin heartbeat work +- expired or missing dispatch lookup forces scheduler terminal reconciliation or redispatch +- system actor dispatch does not use requester OAuth or interactive auth + +Use unit tests for: + +- scheduler due-run claim state transitions +- agent dispatch input validation +- plugin name/id validation +- internal callback signing and parsing + +Use evals for: + +- interactive schedule creation behavior +- confirmation-first schedule authoring +- scheduled-run prompt execution behavior + +## Related Specs + +- `./plugin-spec.md` +- `./scheduler-spec.md` +- `./agent-session-resumability-spec.md` +- `./chat-architecture-spec.md` +- `./slack-agent-delivery-spec.md`