ofershap
diff --git a/‎.cursor/rules/cursor-api-data-guide.mdc‎
Lines changed: 55 additions & 2 deletions b/‎.cursor/rules/cursor-api-data-guide.mdc‎
Lines changed: 55 additions & 2 deletions
diff --git a/‎.cursor/rules/project-context.mdc‎
Lines changed: 3 additions & 3 deletions b/‎.cursor/rules/project-context.mdc‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎scripts/generate-mock-db.ts‎
Lines changed: 60 additions & 0 deletions b/‎scripts/generate-mock-db.ts‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎src/app/api/cron/route.ts‎
Lines changed: 33 additions & 17 deletions b/‎src/app/api/cron/route.ts‎
Lines changed: 33 additions & 17 deletions
diff --git a/‎src/app/api/stats/route.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/app/api/stats/route.ts‎
Lines changed: 1 addition & 1 deletion
@@ -88,6 +88,9 @@ Key concepts that affect how we interpret the data:
 - `included_spend_cents` = the portion covered by the plan
 - Actual overage = `spend_cents - included_spend_cents`
 
+### Model Pricing
+Cursor charges at provider list prices (Anthropic, OpenAI, Google, xAI) plus a Teams/Enterprise surcharge of $0.25/1M total tokens. Max mode adds +20% on top. Auto mode has fixed blended rates. There is no public API for pricing tables — the canonical source is `cursor.com/docs/models`. Per-model token prices are NOT needed in our code because `usage_events.total_cents` already has the computed cost per request.
+
 ### Model Cost Drivers
 Model choice is the PRIMARY cost driver. The specific models change over time, but the cost principles are stable:
 
@@ -120,12 +123,15 @@ Legacy field from the old fixed-pricing model. It's NOT the current billing mech
 
 ### Now Collected
 - Per-request token/cost data from `/teams/filtered-usage-events` - gives per-model cost breakdown per user. Stored in `usage_events` table. Collected incrementally (since last timestamp).
-- Command adoption from `/analytics/team/commands` - which Cursor commands people use (explain, refactor, etc.). Stored in `analytics_commands` table.
+- Command adoption from `/analytics/team/commands` - team-level command usage. Stored in `analytics_commands` table.
 - Plan mode adoption from `/analytics/team/plans` - plan mode usage by model. Stored in `analytics_plans` table.
+- Per-user MCP tool usage from `/analytics/by-user/mcp` - which MCP tools each user uses. Stored in `analytics_user_mcp` table.
+- Per-user command usage from `/analytics/by-user/commands` - which commands each user uses. Stored in `analytics_user_commands` table.
 
 ### Not Currently Collected (but available)
 - AI Code Tracking data from `/analytics/ai-code/commits` - would give us accurate AI vs human line attribution
-- Per-user breakdowns from `/analytics/by-user/*` endpoints (agent-edits, tabs, models, mcp, commands, plans, ask-mode, client-versions, top-file-extensions)
+- Per-user breakdowns from `/analytics/by-user/*` endpoints for: agent-edits, tabs, models, plans, ask-mode, client-versions, top-file-extensions (we collect mcp and commands per-user, but not these others)
+- Leaderboard from `/analytics/team/leaderboard` - ranks users by tab accepts and agent edits. We chose NOT to collect this because it introduces a third ranking system that conflicts with our own spend_rank and activity_rank, confusing stakeholders.
 - `cmdkUsages`, `subscriptionIncludedReqs`, `apiKeyReqs`, `bugbotUsages` - available in daily usage but not stored
 - Audit logs from `/teams/audit-logs` - login events, settings changes, security events
 
@@ -151,3 +157,50 @@ Low accept rate could mean: picky reviewer (good), bad prompting (fixable), or w
 `lines_added / agent_requests`
 
 Highly task-dependent. A debugging session produces 0 lines. A scaffolding task produces 500. Not a quality metric.
+
+## Daily Spend Data Sources
+
+`usage_events` (from `/teams/filtered-usage-events`) is the most reliable source for daily spend data. It has per-request cost (`total_cents`) with full billing cycle history and no retention window. `daily_spend` (from `/teams/groups` billing groups API) has only ~2 days retention and systematically underreports compared to `usage_events`.
+
+The dashboard daily spend chart uses `usage_events` as the primary source, falling back to `daily_spend` only when the `usage_events` table is empty (e.g., a fresh install that hasn't collected events yet). The chart marks the last 2 days as "provisional" since spend data for today/yesterday may still be accumulating.
+
+## Conversation Insights (Dashboard-Only)
+
+The Cursor web dashboard has a "Conversation Insights" page (`cursor.com/dashboard?tab=conversation-insights`) that shows Work Type (KTLO/Feature/Bug), Intent Distribution (Write Code/Ask/Task Automation/Plan), Categories (Bug Fix/Configuration/Feature/Refactor), Task Complexity, and Prompt Specificity. This data is computed server-side from conversation content using AI analysis. There is NO API endpoint for it — it is a dashboard-only enterprise feature.
+
+## Complete Analytics API Endpoint List
+
+### Team-level endpoints (all collected)
+- `/analytics/team/dau` — daily active users (+ CLI, Cloud Agent, BugBot DAU)
+- `/analytics/team/models` — model usage breakdown per day
+- `/analytics/team/agent-edits` — diffs suggested/accepted/rejected
+- `/analytics/team/tabs` — tab autocomplete metrics
+- `/analytics/team/mcp` — MCP tool adoption
+- `/analytics/team/top-file-extensions` — file types
+- `/analytics/team/client-versions` — version distribution
+- `/analytics/team/commands` — command adoption
+- `/analytics/team/plans` — plan mode adoption
+- `/analytics/team/ask-mode` — ask mode adoption (not collected)
+- `/analytics/team/leaderboard` — user rankings by AI usage (not collected — see note above)
+
+### By-user endpoints (paginated, data keyed by email)
+- `/analytics/by-user/mcp` — per-user MCP tool usage (collected)
+- `/analytics/by-user/commands` — per-user command usage (collected)
+- `/analytics/by-user/agent-edits` — per-user agent edits (not collected)
+- `/analytics/by-user/tabs` — per-user tab usage (not collected)
+- `/analytics/by-user/models` — per-user model usage (not collected — covered by daily_usage)
+- `/analytics/by-user/plans` — per-user plan mode (not collected)
+- `/analytics/by-user/ask-mode` — per-user ask mode (not collected)
+- `/analytics/by-user/client-versions` — per-user versions (not collected — covered by daily_usage)
+- `/analytics/by-user/top-file-extensions` — per-user file types (not collected)
+
+## Critical: Billing Groups API Daily Spend Retention
+
+The `/teams/groups` endpoint returns `dailySpend` per member, but this data has a **very short retention window — approximately 2 days**. Older daily spend data is dropped from the API response entirely.
+
+This means:
+- If you don't collect at least once per day, you will permanently lose daily spend granularity for missed days
+- Early-day collections capture incomplete data (spend accumulates throughout the day)
+- The `upsertDailySpend` function uses `MAX(existing, new)` to prevent regressions from partial data overwriting complete data
+- Ideal collection frequency: at least twice daily (e.g. midday + end of day) to capture most of each day's spend before it falls off the API
+- The dashboard marks the last 2 days as "partial (API lag)" since spend data may not be fully settled yet
@@ -65,9 +65,9 @@ Single cron endpoint `POST /api/cron` does both: collect → detect → alert in
 
 ## Dashboard Pages
 
-- `/` — Team overview: stat cards, spend bar chart, daily spend trend, spend breakdown by user, members table with search/sort, **group filter dropdown**, time range picker (24h/3d/7d/14d/30d), billing cycle progress
+- `/` — Team overview: stat cards, model cost comparison table ($/request relative multipliers), daily spend trend (sourced from `usage_events` with `daily_spend` fallback, last 2 days marked provisional), spend breakdown by user, members table with search/sort, **group filter dropdown**, time range picker (24h/3d/7d/14d/30d), billing cycle progress
 - `/insights` — Analytics: DAU chart, model adoption, model efficiency rankings, MCP tool usage, file extensions, client versions
-- `/users/[email]` — Per-user: token timeline, model pie chart, feature breakdown, activity profile, anomaly history
+- `/users/[email]` — Per-user detail: KPI cards (cycle spend, $/req, agent reqs, accept rate, team rank), spend trend chart, usage profile radar (activity, intensity, tab usage, precision, on plan, power user), cost breakdown by model, tools & features (MCP tools + commands per user), model preferences, daily activity table, anomaly history
 - `/anomalies` — MTTD/MTTI/MTTR metrics, open incidents (acknowledge/resolve), anomaly table
 - `/settings` — Detection thresholds, **billing group management** (rename, assign, create), **HiBob CSV import** with change preview
 
@@ -90,7 +90,7 @@ Single cron endpoint `POST /api/cron` does both: collect → detect → alert in
 
 ## Database Tables
 
-members, daily_usage, spending, usage_events, anomalies, incidents, config, collection_log, metadata, daily_spend, billing_groups, group_members, billing_group_members, analytics_dau, analytics_model_usage, analytics_agent_edits, analytics_tabs, analytics_mcp, analytics_file_extensions, analytics_client_versions
+members, daily_usage, spending, usage_events, anomalies, incidents, config, collection_log, metadata, daily_spend, billing_groups, group_members, billing_group_members, analytics_dau, analytics_model_usage, analytics_agent_edits, analytics_tabs, analytics_mcp, analytics_file_extensions, analytics_client_versions, analytics_commands, analytics_plans, analytics_user_mcp, analytics_user_commands
 
 ## Important Caveats
 
 
@@ -736,6 +736,53 @@ function run() {
   });
   planTx();
 
+  const userMcpStmt = db.prepare(
+    "INSERT INTO analytics_user_mcp (date, email, tool_name, server_name, usage) VALUES (?, ?, ?, ?, ?)",
+  );
+  const userMcpTx = db.transaction(() => {
+    for (let d = 0; d < DAYS; d++) {
+      const date = dateStr(DAYS - 1 - d);
+      for (const user of userProfiles) {
+        if (Math.random() < 0.4) continue;
+        const toolCount =
+          user.activityLevel === "high"
+            ? rand(3, 8)
+            : user.activityLevel === "medium"
+              ? rand(1, 4)
+              : rand(0, 2);
+        const shuffled = [...MCP_TOOLS].sort(() => Math.random() - 0.5);
+        for (let t = 0; t < Math.min(toolCount, shuffled.length); t++) {
+          const tool = shuffled[t] as (typeof MCP_TOOLS)[number];
+          userMcpStmt.run(date, user.email, tool.tool, tool.server, rand(1, 30));
+        }
+      }
+    }
+  });
+  userMcpTx();
+
+  const userCmdStmt = db.prepare(
+    "INSERT INTO analytics_user_commands (date, email, command_name, usage) VALUES (?, ?, ?, ?)",
+  );
+  const userCmdTx = db.transaction(() => {
+    for (let d = 0; d < DAYS; d++) {
+      const date = dateStr(DAYS - 1 - d);
+      for (const user of userProfiles) {
+        if (Math.random() < 0.3) continue;
+        const cmdCount =
+          user.activityLevel === "high"
+            ? rand(3, 6)
+            : user.activityLevel === "medium"
+              ? rand(1, 4)
+              : rand(0, 2);
+        const shuffled = [...COMMANDS].sort(() => Math.random() - 0.5);
+        for (let c = 0; c < Math.min(cmdCount, shuffled.length); c++) {
+          userCmdStmt.run(date, user.email, shuffled[c], rand(1, 20));
+        }
+      }
+    }
+  });
+  userCmdTx();
+
   const metaStmt = db.prepare("INSERT INTO metadata (key, value, updated_at) VALUES (?, ?, ?)");
   metaStmt.run("cycle_start", CYCLE_START, now);
   metaStmt.run("cycle_end", CYCLE_END, now);
@@ -928,6 +975,19 @@ function createSchema(db: Database.Database) {
       collected_at TEXT NOT NULL DEFAULT (datetime('now')),
       PRIMARY KEY (date, model)
     );
+    CREATE TABLE IF NOT EXISTS analytics_user_mcp (
+      date TEXT NOT NULL, email TEXT NOT NULL, tool_name TEXT NOT NULL,
+      server_name TEXT NOT NULL, usage INTEGER NOT NULL DEFAULT 0,
+      collected_at TEXT NOT NULL DEFAULT (datetime('now')),
+      PRIMARY KEY (date, email, tool_name, server_name)
+    );
+    CREATE INDEX IF NOT EXISTS idx_user_mcp_email ON analytics_user_mcp(email);
+    CREATE TABLE IF NOT EXISTS analytics_user_commands (
+      date TEXT NOT NULL, email TEXT NOT NULL, command_name TEXT NOT NULL,
+      usage INTEGER NOT NULL DEFAULT 0, collected_at TEXT NOT NULL DEFAULT (datetime('now')),
+      PRIMARY KEY (date, email, command_name)
+    );
+    CREATE INDEX IF NOT EXISTS idx_user_commands_email ON analytics_user_commands(email);
     CREATE TABLE IF NOT EXISTS metadata (
       key TEXT PRIMARY KEY, value TEXT NOT NULL,
       updated_at TEXT NOT NULL DEFAULT (datetime('now'))
 
@@ -65,23 +65,39 @@ export async function POST(request: Request) {
 
     if (lastSummary !== today) {
       const db = getDb();
-      const spendRow = db
-        .prepare(
-          `SELECT COALESCE(SUM(spend_cents), 0) as total
-           FROM (SELECT email, MAX(spend_cents) as spend_cents FROM spending
-                 WHERE cycle_start = (SELECT MAX(cycle_start) FROM spending)
-                 GROUP BY email)`,
-        )
-        .get() as { total: number };
-
-      const topSpenders = db
-        .prepare(
-          `SELECT COALESCE(m.name, s.email) as name, s.spend_cents as spend
-           FROM spending s LEFT JOIN members m ON s.email = m.email
-           WHERE s.cycle_start = (SELECT MAX(cycle_start) FROM spending) AND s.spend_cents > 0
-           ORDER BY s.spend_cents DESC LIMIT 5`,
-        )
-        .all() as Array<{ name: string; spend: number }>;
+      const hasUE =
+        (db.prepare("SELECT COUNT(*) as c FROM usage_events").get() as { c: number }).c > 0;
+
+      const spendRow = hasUE
+        ? (db
+            .prepare(`SELECT COALESCE(ROUND(SUM(total_cents)), 0) as total FROM usage_events`)
+            .get() as { total: number })
+        : (db
+            .prepare(
+              `SELECT COALESCE(SUM(spend_cents), 0) as total
+               FROM (SELECT email, MAX(spend_cents) as spend_cents FROM spending
+                     WHERE cycle_start = (SELECT MAX(cycle_start) FROM spending)
+                     GROUP BY email)`,
+            )
+            .get() as { total: number });
+
+      const topSpenders = hasUE
+        ? (db
+            .prepare(
+              `SELECT COALESCE(m.name, ue.user_email) as name, ROUND(SUM(ue.total_cents)) as spend
+               FROM usage_events ue LEFT JOIN members m ON ue.user_email = m.email
+               GROUP BY ue.user_email HAVING spend > 0
+               ORDER BY spend DESC LIMIT 5`,
+            )
+            .all() as Array<{ name: string; spend: number }>)
+        : (db
+            .prepare(
+              `SELECT COALESCE(m.name, s.email) as name, s.spend_cents as spend
+               FROM spending s LEFT JOIN members m ON s.email = m.email
+               WHERE s.cycle_start = (SELECT MAX(cycle_start) FROM spending) AND s.spend_cents > 0
+               ORDER BY s.spend_cents DESC LIMIT 5`,
+            )
+            .all() as Array<{ name: string; spend: number }>);
 
       const limitedRow = db
         .prepare("SELECT value FROM metadata WHERE key = 'limited_users_count'")
 
@@ -5,7 +5,7 @@ export const dynamic = "force-dynamic";
 
 export function GET(request: Request) {
   const url = new URL(request.url);
-  const days = parseInt(url.searchParams.get("days") ?? "7", 10);
+  const days = parseInt(url.searchParams.get("days") ?? "30", 10);
 
   const dashboard = getFullDashboard(days);
   return NextResponse.json(dashboard);