diff --git a/packages/core/src/canvas/canvasTemplates.test.ts b/packages/core/src/canvas/canvasTemplates.test.ts new file mode 100644 index 000000000..7e661a10d --- /dev/null +++ b/packages/core/src/canvas/canvasTemplates.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, it } from "vitest"; +import { BUILT_IN_TEMPLATES } from "./canvasTemplates"; + +const aiGateway = BUILT_IN_TEMPLATES.find((t) => t.id === "ai-gateway"); + +describe("AI gateway template", () => { + it("is registered as a built-in", () => { + expect(aiGateway).toBeDefined(); + expect(aiGateway?.builtIn).toBe(true); + expect(aiGateway?.name).toBe("AI gateway"); + }); + + it("bakes the exact gateway filter into the prompt", () => { + // The $ai_gateway predicate is what separates gateway traffic from + // SDK-emitted $ai_generation events — it must survive verbatim. + expect(aiGateway?.systemPrompt).toContain( + "event = '$ai_generation' AND properties.$ai_gateway = true", + ); + }); + + it("scopes time with date-range placeholders, not a baked-in interval", () => { + const prompt = aiGateway?.systemPrompt ?? ""; + expect(prompt).toContain("{date_from}"); + expect(prompt).toContain("{date_to}"); + // The board must be refreshable, so the WHERE clause never bakes a window. + expect(prompt).not.toContain( + "timestamp >= now() - INTERVAL 30 DAY AND timestamp <", + ); + }); + + it.each([ + ["spend", "round(sum(toFloat(properties.$ai_total_cost_usd)), 4)"], + ["requests", "SELECT count() FROM events"], + ["input tokens", "sum(toFloat(properties.$ai_input_tokens))"], + ["output tokens", "sum(toFloat(properties.$ai_output_tokens))"], + [ + "tokens-per-model", + "sum(toFloat(properties.$ai_input_tokens) + toFloat(properties.$ai_output_tokens))", + ], + ])("bakes the exact %s formula", (_name, formula) => { + expect(aiGateway?.systemPrompt).toContain(formula); + }); + + it("includes the connect snippets and the declarative provider/language switch", () => { + const prompt = aiGateway?.systemPrompt ?? ""; + expect(prompt).toContain("baseURL: '/v1'"); // OpenAI + expect(prompt).toContain("@anthropic-ai/sdk"); // Anthropic + expect(prompt).toContain('"$state": "/provider"'); + expect(prompt).toContain('"$state": "/language"'); + }); +}); diff --git a/packages/core/src/canvas/canvasTemplates.ts b/packages/core/src/canvas/canvasTemplates.ts index eca72da36..5e04d9cd1 100644 --- a/packages/core/src/canvas/canvasTemplates.ts +++ b/packages/core/src/canvas/canvasTemplates.ts @@ -39,6 +39,16 @@ const WEB_ANALYTICS_COMPONENTS: CanvasComponentName[] = [ "RetentionGrid", ]; +// AI gateway: the Dashboard palette plus rich-page blocks for the "Connect your +// app" SDK snippets (Markdown code blocks) and the empty-state intro (Hero, +// Section). +const AI_GATEWAY_COMPONENTS: CanvasComponentName[] = [ + ...DASHBOARD_COMPONENTS, + "Section", + "Markdown", + "Hero", +]; + // Rules that apply to EVERY canvas template, regardless of its purpose. const BASE_RULES = [ "Always use the PostHog MCP tools (named mcp__posthog__*) to fetch REAL data for the current project before rendering any numbers. Never fabricate metrics.", @@ -86,6 +96,65 @@ const WEB_ANALYTICS_RULES = [ 'Store raw numeric values in Stat.value (e.g. 236000, not "236K") — the UI formats them. Percentages for RetentionGrid `values` are 0–100.', ]; +// The gateway filter — preserve EXACTLY. `event = '$ai_generation' AND +// properties.$ai_gateway = true` is what separates gateway-emitted generations +// from SDK-emitted $ai_generation events that share the event name. The time +// bound uses the canvas date-range placeholders (not a baked-in `INTERVAL 30 +// DAY`) so the board stays refreshable and the picker can rescope it. +const GATEWAY_WHERE = + "event = '$ai_generation' AND properties.$ai_gateway = true AND timestamp >= {date_from} AND timestamp < {date_to}"; + +// The "Connect your app" SDK snippets, baked verbatim from the Cloud page +// (AIGatewayScene.tsx). OpenAI points its SDK at /v1; the Anthropic SDK is +// given and appends /v1/messages itself. `` is a +// placeholder — Code has no preflight to source the real host from (open +// question), so the agent emits the placeholder for the user to replace. +const CONNECT_SNIPPETS = [ + "OpenAI · TypeScript →\n```ts\nimport OpenAI from 'openai'\n\nconst client = new OpenAI({\n baseURL: '/v1',\n apiKey: '',\n})\nconst response = await client.chat.completions.create({\n model: 'gpt-5-mini',\n messages: [{ role: 'user', content: 'Hello' }],\n})\n```", + 'OpenAI · Python →\n```python\nfrom openai import OpenAI\n\nclient = OpenAI(\n base_url="/v1",\n api_key="",\n)\nclient.chat.completions.create(\n model="gpt-5-mini",\n messages=[{"role": "user", "content": "Hello"}],\n)\n```', + 'OpenAI · cURL →\n```bash\ncurl /v1/chat/completions \\\n -H "Authorization: Bearer $POSTHOG_PROJECT_SECRET_KEY" \\\n -H "Content-Type: application/json" \\\n -d \'{\n "model": "gpt-5-mini",\n "messages": [{"role": "user", "content": "Hello"}]\n }\'\n```', + "Anthropic · TypeScript →\n```ts\nimport Anthropic from '@anthropic-ai/sdk'\n\nconst client = new Anthropic({\n baseURL: '',\n authToken: '', // sets the Bearer header\n})\nconst message = await client.messages.create({\n model: 'claude-sonnet-4.6',\n max_tokens: 512,\n messages: [{ role: 'user', content: 'Hello' }],\n})\n```", + 'Anthropic · Python →\n```python\nfrom anthropic import Anthropic\n\nclient = Anthropic(\n base_url="",\n auth_token="", # sets the Bearer header\n)\nclient.messages.create(\n model="claude-sonnet-4.6",\n max_tokens=512,\n messages=[{"role": "user", "content": "Hello"}],\n)\n```', + 'Anthropic · cURL →\n```bash\ncurl /v1/messages \\\n -H "Authorization: Bearer $POSTHOG_PROJECT_SECRET_KEY" \\\n -H "Content-Type: application/json" \\\n -d \'{\n "model": "claude-sonnet-4.6",\n "max_tokens": 512,\n "messages": [{"role": "user", "content": "Hello"}]\n }\'\n```', +].join("\n\n"); + +// AI gateway: a one-page usage board for traffic sent through PostHog's AI +// gateway. Mirrors the Cloud scene (products/ai_gateway/frontend): a Usage KPI +// row + spend-per-day chart, a By-model table, and a "Connect your app" panel +// with a declarative provider/language snippet switch. Time-based, so it leans +// on the date-range + refresh machinery like the web-analytics board. +const AI_GATEWAY_RULES = [ + 'ALWAYS begin with a single h1 title: a `Heading` level 1 with the text "AI gateway" (it names the saved file). Immediately follow it with a muted `Text`: "Every major LLM through one endpoint, billed at cost."', + "Do NOT set the root `Page`'s `title` prop — the level-1 Heading is the ONLY title.", + // --- The gateway filter (the core invariant) ----------------------------- + `GATEWAY FILTER — EVERY query MUST select ONLY gateway-emitted generations using EXACTLY this WHERE clause; never drop, rename, or weaken the \`properties.$ai_gateway = true\` predicate (it is what separates gateway traffic from SDK-emitted $ai_generation events that share the event name): \`WHERE ${GATEWAY_WHERE}\`. Keep the \`{date_from}\`/\`{date_to}\` placeholders verbatim — do NOT substitute a baked-in interval like \`now() - INTERVAL 30 DAY\`.`, + // --- Time window --------------------------------------------------------- + 'TIME WINDOW: seed a top-level `state.dateRange` object: `{ "name": "Last 30 days", "from": , "to": }`, computing `from`/`to` from the CURRENT DATE/TIME in the prompt context ("Last 30 days" = (now − 30 days) → now). The toolbar date picker reads and drives this — do NOT render a date picker yourself. The range name MUST stay exactly "Last 30 days" (one of the rolling names) so the window keeps following the clock. If the prompt includes a `[Range]` line with the user\'s selected window, use THAT instead.', + // --- Refreshable queries ------------------------------------------------- + 'EVERY data point is refreshable: record the HogQL that produced it under the top-level `state.queries`, keyed by element key then prop path: `state.queries.. = { "query": "", "shape": "" }`. Shapes: "scalar" (1 row × 1 col → a Stat `/value`), "labels" (first column of every row → a chart `/labels`), "column" (first column of every row → a chart series\' `/series/0/data`), "matrix" (every row as an array → a `Table` `/rows`).', + // --- Usage section ------------------------------------------------------- + 'USAGE section: a `Heading` level 2 "Usage", a muted `Text` "Last 30 days", then a `Grid` (columns 4) of four `Stat`s — labelled "Spend (USD)", "Requests", "Input tokens", "Output tokens" (the Stat formatter only adds thousands separators, so name the currency unit in the label). Each Stat\'s `/value` is a scalar query (store the RAW number; the UI formats it). Element keys + queries:', + ` • stat_spend — \`SELECT round(sum(toFloat(properties.$ai_total_cost_usd)), 4) FROM events WHERE ${GATEWAY_WHERE}\` (USD).`, + ` • stat_requests — \`SELECT count() FROM events WHERE ${GATEWAY_WHERE}\`.`, + ` • stat_input_tokens — \`SELECT sum(toFloat(properties.$ai_input_tokens)) FROM events WHERE ${GATEWAY_WHERE}\`.`, + ` • stat_output_tokens — \`SELECT sum(toFloat(properties.$ai_output_tokens)) FROM events WHERE ${GATEWAY_WHERE}\`.`, + // --- Spend per day chart ------------------------------------------------- + `SPEND PER DAY: a \`Card\` titled "Spend per day" wrapping a \`BarChart\` (key chart_spend_per_day) with ONE series labelled "Spend". Set two queries on it: \`state.queries.chart_spend_per_day./labels\` = { "query": "SELECT toStartOfDay(timestamp) AS day FROM events WHERE ${GATEWAY_WHERE} GROUP BY day ORDER BY day", "shape": "labels" } and \`state.queries.chart_spend_per_day./series/0/data\` = { "query": "SELECT round(sum(toFloat(properties.$ai_total_cost_usd)), 4) FROM events WHERE ${GATEWAY_WHERE} GROUP BY toStartOfDay(timestamp) ORDER BY toStartOfDay(timestamp)", "shape": "column" }. The two queries share an identical GROUP BY/ORDER BY so the data array stays the same length as labels.`, + // --- By-model table ------------------------------------------------------ + `BY MODEL section: a \`Heading\` level 2 "By model", a muted \`Text\` "Spend and tokens per model, last 30 days", then a \`Table\` (key table_by_model) with columns ["Model", "Requests", "Tokens", "Spend"]. Set \`state.queries.table_by_model./rows\` = { "query": "SELECT coalesce(nullIf(toString(properties.$ai_model), ''), 'unknown') AS model, count() AS requests, sum(toFloat(properties.$ai_input_tokens) + toFloat(properties.$ai_output_tokens)) AS tokens, round(sum(toFloat(properties.$ai_total_cost_usd)), 4) AS cost_usd FROM events WHERE ${GATEWAY_WHERE} GROUP BY model ORDER BY cost_usd DESC", "shape": "matrix" }.`, + // --- Connect your app (declarative provider/language switch) -------------- + 'CONNECT YOUR APP section: a `Heading` level 2 "Connect your app", a muted `Text` ("Point your app at the gateway with any project secret key carrying the llm_gateway:read scope — every request is tracked in AI observability with no SDK instrumentation."), then a declarative provider × language snippet switch. Seed `state.provider` = "openai" and `state.language` = "typescript".', + 'SNIPPET SWITCH controls: a `Grid` (columns 2) of two provider `Button`s — "OpenAI" and "Anthropic" — each with `"on": { "click": { "action": "setState", "params": { "statePath": "/provider", "value": "openai" | "anthropic" } } }`; then a `Grid` (columns 3) of three language `Button`s — "TypeScript", "Python", "cURL" — each setting `/language` to "typescript" | "python" | "curl".', + `SNIPPET BLOCKS: emit SIX \`Markdown\` blocks, one per provider×language pair, each gated by a \`visible\` condition that is an ARRAY of two state conditions (implicit AND): \`"visible": [ { "$state": "/provider", "eq": "" }, { "$state": "/language", "eq": "" } ]\`. The Markdown \`content\` is the matching fenced code block below (strip the "Provider · Language →" caption; keep the fenced block exactly, INCLUDING \`\` as a literal placeholder — Code has no preflight to fill the real host). Snippets:\n\n${CONNECT_SNIPPETS}`, + // --- Empty state --------------------------------------------------------- + "EMPTY STATE: before building, run `SELECT count() FROM events WHERE " + + GATEWAY_WHERE.replace("{date_from}", "now() - INTERVAL 30 DAY").replace( + " AND timestamp < {date_to}", + "", + ) + + '` via the MCP tools. If it returns 0 (no gateway usage in the window), do NOT build the Usage or By model sections (a zeroed-out board reads as broken). Instead emit ONLY: the h1 title + muted subtitle, a `Hero` (tone accent) titled "No gateway usage yet" whose subtitle is "One endpoint for every major LLM, billed at cost — no markup on tokens. Point your app at the gateway and PostHog tracks its usage, cost, and spend for you. Any project secret key with the llm_gateway:read scope can call it.", and the full Connect your app section.', +]; + // Blank: freeform. Build whatever the user describes from the catalog. const BLANK_RULES = [ "Build ANYTHING the user describes. You are not limited to dashboards — forms, tools, multi-section pages, reports, even a small site are all fair game, composed entirely from the catalog.", @@ -176,6 +245,28 @@ const BUILT_INS: BuiltInTemplate[] = [ }, ], }, + { + id: "ai-gateway", + name: "AI gateway", + description: + "PostHog AI gateway usage: spend, requests and tokens, a spend-per-day chart, a by-model breakdown, and copy-paste SDK snippets to connect your app.", + system: + "You are PostHog Canvas, an agent that builds the AI gateway usage board — spend, requests and token KPIs, a spend-per-day chart, a by-model breakdown, and a 'Connect your app' panel of copy-paste SDK snippets — for the user's current PostHog project, driven by a selectable date range.", + rules: AI_GATEWAY_RULES, + allow: AI_GATEWAY_COMPONENTS, + suggestions: [ + { label: "AI gateway", prompt: "Build the AI gateway usage board." }, + { + label: "Last 30 days", + prompt: "Build the AI gateway usage board for the last 30 days.", + }, + { + label: "By model", + prompt: + "Build the AI gateway usage board focused on the spend and tokens per model.", + }, + ], + }, { id: "blank", name: "Blank canvas", diff --git a/packages/ui/src/features/canvas/AI_GATEWAY.md b/packages/ui/src/features/canvas/AI_GATEWAY.md new file mode 100644 index 000000000..2cb30de8a --- /dev/null +++ b/packages/ui/src/features/canvas/AI_GATEWAY.md @@ -0,0 +1,124 @@ +# Porting the AI gateway usage page to a canvas + +**Status:** Implemented as a built-in declarative template (option A). +**Source:** PostHog/posthog#64511 — `products/ai_gateway/frontend/` (kea scene). +**Reference pattern:** PostHog/code#2657 — the web-analytics built-in template. + +## Recommendation: (A) a built-in declarative "AI gateway" template + +The page **is a dashboard** — a KPI row, a spend-per-day chart, and a by-model +table, all reading HogQL over the events table. That is exactly the declarative +catalog's home turf, and the team's stance (TEMPLATES.md) is declarative-first +for built-in boards. Going declarative also gets us, for free, the things the +hand-built kea scene doesn't have: per-card refresh, a toolbar date picker that +rescopes every query, and inline editing — because the board records its HogQL +under `state.queries` and rides the existing `dashboard-query` refresh path. + +The cost is the **"Connect your app" panel**: a provider × language snippet +switch with copy buttons. The catalog has no tab/segmented-control/copy-button +primitive, so the switch is rebuilt declaratively from `Button` + `state` + +`visible` (see below). That reproduces the *behaviour* (pick provider, pick +language, see the snippet) but not the exact segmented-button/tab chrome or the +one-click copy. That is the one lossy seam. + +**Why not (B) freeform React?** It would port the page near 1:1 — real tabs, a +real copy button — but a freeform canvas is a sandboxed-iframe React file: not +refreshable, not inline-editable, not date-range-driven, and off the +declarative-first path the built-ins are meant to model. For a board that is 80% +dashboard, paying that price to recover tab chrome is the wrong trade. + +**Verdict:** ship the whole page as **one declarative template** (done). If the +connect/onboarding half ever needs pixel-faithful tabs + copy, split *only that +section* into a freeform piece — don't drop the analytics board to freeform to +get it. The declarative switch is good enough for v1. + +## Source → canvas mapping + +| Source element (Cloud) | Canvas equivalent | HogQL | Shape | +| --- | --- | --- | --- | +| Title + tagline | `Heading` (level 1) + muted `Text` | — | — | +| Spend tile | `Stat` `stat_spend` `/value` | `round(sum(toFloat(properties.$ai_total_cost_usd)), 4)` | scalar | +| Requests tile | `Stat` `stat_requests` `/value` | `count()` | scalar | +| Input tokens tile | `Stat` `stat_input_tokens` `/value` | `sum(toFloat(properties.$ai_input_tokens))` | scalar | +| Output tokens tile | `Stat` `stat_output_tokens` `/value` | `sum(toFloat(properties.$ai_output_tokens))` | scalar | +| Spend-per-day bar sparkline | `BarChart` `chart_spend_per_day` (`/labels` + `/series/0/data`) | `toStartOfDay(timestamp)` grouped/ordered by day; `round(sum(...$ai_total_cost_usd), 4)` per day | labels + column | +| By-model `LemonTable` | `Table` `table_by_model` `/rows` | `coalesce(properties.$ai_model,'unknown'), count(), sum(input+output tokens), round(sum(cost),4)` group by model order by cost desc | matrix | +| Provider/language snippet tabs | `Button`s → `setState` `/provider`, `/language`; six `Markdown` code blocks gated by `visible` (implicit-AND array of two `$state` `eq` conditions) | — | — | +| Empty-state intro hero | `Hero` (tone accent) + the connect section only | `count()` probed at build time via MCP | — | + +Every query carries the **exact gateway filter** +`event = '$ai_generation' AND properties.$ai_gateway = true`. The only adaptation +from source is the time bound: the kea scene bakes `now() - INTERVAL 30 DAY`; +the canvas uses the `{date_from}`/`{date_to}` placeholders + `state.dateRange` +("Last 30 days") so the board is refreshable and the picker can rescope it — the +required canvas convention, not a deviation. The metric formulas are copied +verbatim, not paraphrased. + +## Open questions / blockers + +- **Gateway base URL (blocker, not guessed).** Cloud reads + `preflight.ai_gateway_url` (`AI_GATEWAY_PUBLIC_URL`). Code has no preflight, and + the only host in the repos is the dev tailnet box + (`http://ai-gateway-dev.hedgehog-kitefin.ts.net`), not a public prod URL. The + template emits a literal `` placeholder in every snippet for + the user to fill. To make the snippets paste-ready we need to decide where the + host comes from: (a) a build/runtime config constant, (b) an env var mirrored + into the renderer, (c) a small API/MCP call, or (d) hardcode per environment. + Recommend (a)/(b): inject it once and have the template substitute it like + `{date_from}`. Left as a follow-up. +- **Balance / top-up card + modal.** Mocked in Cloud (`GatewayTopUp.tsx`, + `lemonToast.info("… is mocked for now")`). Dropped from the port — out of scope + until the billing API is real. +- **Empty-state detection.** Cloud waits for two queries to resolve, then shows + the intro if `requests === 0 && modelUsage.length === 0`. A canvas is built + once from data the agent fetches via MCP, so detection moves to **build time**: + the template tells the agent to probe `count()` under the gateway filter first + and, if zero, render only the title + intro `Hero` + connect section, skipping + the would-be-zeroed board. +- **Contiguous 30-day padding.** Cloud's `buildSpendChartData` pads idle days to + a gap-free 30-point series. The declarative chart plots only days with data + (labels + data come from parallel `GROUP BY day` queries, so they stay + aligned). Minor fidelity loss; recoverable later with HogQL `WITH FILL`. +- **Selected-toggle styling.** `Button.variant` is a literal enum, not + state-bindable, so the active provider/language button isn't highlighted — + selection shows only via which snippet is visible. Cosmetic; a state-bound + variant would need a catalog change. +- **Currency formatting.** The catalog `Stat` formats values with a generic + `Intl.NumberFormat` (thousands separators only), so the Spend KPI can't render + as `$12.35` like the Cloud page's `humanFriendlyCurrency`. The unit is carried + in the label ("Spend (USD)") instead; a principled fix is a `format` prop on + `Stat`, out of scope here. +- **New-template host wiring.** A data-driven template must also be added to + `DATA_TEMPLATES` in `WebsiteLayout.tsx` (ui) or it renders with no date picker + and no toolbar refresh. That's a parallel list to the core template registry — + worth collapsing into a `dataTemplate` flag on the template record so the two + can't drift. + +## What was added (implementation) + +Self-contained — no new catalog component, router, or registry plumbing was +needed (unlike #2657, which added `Heatmap`/`RetentionGrid`). The board reuses +existing components (`Stat`, `BarChart`, `Table`, `Markdown`, `Button`, `Hero`, +`Section`) and the existing `state.queries` refresh + `state.dateRange` machinery. + +- `packages/core/src/canvas/canvasTemplates.ts` + - `AI_GATEWAY_COMPONENTS` allow-list (`DASHBOARD_COMPONENTS` + `Section`, + `Markdown`, `Hero`). + - `GATEWAY_WHERE` constant (the exact filter, with date placeholders). + - `CONNECT_SNIPPETS` — the six SDK snippets, verbatim from `AIGatewayScene.tsx`. + - `AI_GATEWAY_RULES` — title, gateway filter, time window, the four Stats, the + spend chart, the by-model table, the declarative connect switch, and the + empty state — all baked so the agent reproduces the board faithfully. + - An `ai-gateway` entry in `BUILT_INS` (name, description, system, rules, + allow-list, starter suggestions). It auto-registers via `BUILT_IN_TEMPLATES` + → `CanvasTemplatesService` and shows up in the create picker; no other wiring. +- `packages/core/src/canvas/canvasTemplates.test.ts` — asserts the template is + registered and that the prompt bakes the exact gateway filter, the metric + formulas, the date placeholders (not a baked interval), the snippets, and the + provider/language switch. + +## Checks + +- `pnpm --filter @posthog/core exec vitest run src/canvas/` — 6 files, 53 tests pass. +- `pnpm --filter @posthog/core typecheck` — clean (after building workspace dist deps). +- `biome lint packages/core/src/canvas/canvasTemplates.ts canvasTemplates.test.ts` — clean. diff --git a/packages/ui/src/features/canvas/components/WebsiteLayout.tsx b/packages/ui/src/features/canvas/components/WebsiteLayout.tsx index 7ef5d9257..6b6edfb65 100644 --- a/packages/ui/src/features/canvas/components/WebsiteLayout.tsx +++ b/packages/ui/src/features/canvas/components/WebsiteLayout.tsx @@ -42,7 +42,7 @@ function threadIdFor(dashboardId: string): string { // Templates whose canvases carry the data toolbar (Filter + date range + // refresh) — the ones with refreshable, time-scoped queries. -const DATA_TEMPLATES = ["dashboard", "web-analytics"]; +const DATA_TEMPLATES = ["dashboard", "web-analytics", "ai-gateway"]; // Edit toggle + (in edit mode) Save / Save-as-fork for the active dashboard. // Sits on the right of the single canvas toolbar, beside the refresh control.