TestSprite · SahilRakhaiya05 · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,10 @@ All notable changes to `@testsprite/testsprite-cli` are documented here. The for
 
 ## [Unreleased]
 
+### Added
+
+- **`test failure triage --project <id>`** — groups all failed tests in a project into root-cause clusters using existing M2.1 analysis fields (`failureKind`, `recommendedFixTarget.reference`, `rootCauseHypothesis`). Returns a representative test per cluster, affected test ids, confidence score, and fix priority — without downloading failure bundles. Supports `--type`, `--filter`, and `--max-concurrency`. Client-side Phase-0 triage until native backend clustering ships.
+
 ## [0.1.2] - 2026-06-19
 
 ### Added

diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md
@@ -248,6 +248,31 @@ testsprite test failure summary test_xxxxxxxx --output json
 testsprite test failure summary test_xxxxxxxx --dry-run --output json
 ```
 
+#### `testsprite test failure triage --project <id>`
+
+When many tests fail in the same project, triage them into a few root-cause clusters before downloading bundles. The CLI lists all failed tests, fetches a lightweight `failure/summary` per test (no screenshots or video), and groups them client-side by:
+
+1. shared `recommendedFixTarget.reference`
+2. env-wide `failureKind` (`infra`, `network`, `network_timeout`, `routing_404`)
+3. normalized `rootCauseHypothesis` prefix
+4. singleton (one test per cluster when no shared signal exists)
+
+Each cluster includes a `representativeTestId`, `memberTestIds`, `confidence`, and `fixPriority` (lower = fix first). After triage, pull one bundle from the representative test:
+
+```bash
+# Triage all failed tests in a project
+testsprite test failure triage --project proj_xxxxxxxx --output json
+
+# Limit to backend tests whose name contains "checkout"
+testsprite test failure triage --project proj_xxxxxxxx --type backend --filter checkout --output json
+
+# Then investigate the highest-priority cluster's representative test
+testsprite test failure get <representativeTestId> --out ./.testsprite/failure
+
+# Learn the JSON shape offline
+testsprite test failure triage --project proj_xxxxxxxx --dry-run --output json
+```
+
 ### Write commands
 
 Require the `write:tests` scope.

diff --git a/README.md b/README.md
@@ -101,6 +101,7 @@ Prefer to configure each step by hand (or learn the surface offline with `--dry-
 |           | `test result`                                       | Latest result; `--history` lists a test's prior runs                                                                  |
 |           | `test failure get`                                  | The agent entry point: one self-contained latest-failure bundle                                                       |
 |           | `test failure summary`                              | One-screen triage card (no media download)                                                                            |
+|           | `test failure triage`                               | Group all failed tests in a project into root-cause clusters (no bundle download)                                     |
 | **Write** | `test create` / `test create-batch`                 | Create a test (or bulk-create from a plan file); `--produces` / `--needs` / `--category` wire BE dependency metadata  |
 |           | `test update` / `test delete` / `test delete-batch` | Edit metadata / soft-delete                                                                                           |
 |           | `test code put`                                     | Replace generated code (etag-guarded)                                                                                 |

diff --git a/skills/testsprite-verify.skill.md b/skills/testsprite-verify.skill.md
@@ -398,7 +398,22 @@ testing agent's observation; don't auto-fix on the recommendation alone. If you
 genuinely can't tell: report `inconclusive` with the signal that triggered the
 call and ask.
 
-## 5. On failure → download the artifact
+## 5. On failure → triage first, then download one bundle
+
+When **multiple tests failed** in the same project (batch run, regression, or
+`test list --status failed` shows more than one red row), triage before pulling
+every bundle:
+
+```bash
+testsprite test failure triage --project <projectId> --output json
+```
+
+Read the clusters: each has a `representativeTestId`, `memberTestIds`,
+`confidence`, and `fixPriority` (lower = fix first). Investigate the
+representative test from the highest-priority cluster — not an arbitrary failed
+test. After a fix, rerun that representative before rerunning the full suite.
+
+For a **single** failed test, skip triage and go straight to the artifact:
 
 ```bash
 testsprite test artifact get <run-id> --out ./.testsprite/runs/<run-id>/

diff --git a/src/commands/test.test.ts b/src/commands/test.test.ts
@@ -29,6 +29,7 @@ import {
   runDelete,
   runFailureGet,
   runFailureSummary,
+  runFailureTriage,
   runGet,
   runList,
   runPlanPut,
@@ -147,7 +148,7 @@ describe('createTestCommand — surface', () => {
     expect(failure).toBeDefined();
     // M2.1 piece 3 adds `summary`. `get` is the bundle entry point;
     // `summary` is the lightweight analysis-only triage card.
-    expect(failure!.commands.map(c => c.name()).sort()).toEqual(['get', 'summary']);
+    expect(failure!.commands.map(c => c.name()).sort()).toEqual(['get', 'summary', 'triage']);
   });
 
   it('list exposes the documented filter and pagination flags (including --cursor alias)', () => {
@@ -283,6 +284,16 @@ describe('createTestCommand — surface', () => {
     expect(help).toContain('--dry-run');
   });
 
+  it('test failure triage --help includes GLOBAL_OPTS_HINT and --project', () => {
+    const test = createTestCommand();
+    const failure = test.commands.find(c => c.name() === 'failure')!;
+    const failureTriage = failure.commands.find(c => c.name() === 'triage')!;
+    const help = captureHelp(failureTriage);
+    expect(help).toContain('testsprite --help');
+    expect(help).toContain('--project');
+    expect(help).toContain('--max-concurrency');
+  });
+
   it('M2 sweep: all remaining leaf subcommands include GLOBAL_OPTS_HINT', () => {
     // Covers list, get, create, create-batch, steps, result, update, delete,
     // code get, code put, plan put — the full M2 surface that the dogfood
@@ -3168,6 +3179,268 @@ describe('runFailureSummary', () => {
   });
 });
 
+// ---------- runFailureTriage ----------
+
+describe('runFailureTriage', () => {
+  const FAILED_TEST_A = {
+    id: 'test_a',
+    projectId: 'proj_1',
+    name: 'Checkout submit',
+    type: 'frontend' as const,
+    createdFrom: 'cli' as const,
+    status: 'failed' as const,
+    createdAt: '2026-06-26T10:00:00.000Z',
+    updatedAt: '2026-06-26T12:00:00.000Z',
+  };
+  const FAILED_TEST_B = {
+    ...FAILED_TEST_A,
+    id: 'test_b',
+    name: 'Checkout validation',
+    updatedAt: '2026-06-26T12:01:00.000Z',
+  };
+  const FAILED_TEST_C = {
+    ...FAILED_TEST_A,
+    id: 'test_c',
+    name: 'Health check',
+    type: 'backend' as const,
+    updatedAt: '2026-06-26T12:02:00.000Z',
+  };
+
+  const SHARED_REF = 'src/components/CheckoutForm.tsx:412';
+
+  function summaryFor(testId: string, overrides: Record<string, unknown> = {}) {
+    return {
+      testId,
+      status: 'failed' as const,
+      failureKind: 'assertion' as const,
+      snapshotId: `snap_${testId}`,
+      rootCauseHypothesis: 'Submit button is disabled.',
+      recommendedFixTarget: {
+        kind: 'code' as const,
+        reference: SHARED_REF,
+        rationale: 'Fix validation predicate.',
+      },
+      ...overrides,
+    };
+  }
+
+  it('JSON mode clusters failed tests by shared fix target', async () => {
+    const { credentialsPath } = makeCreds();
+    const seen: string[] = [];
+    const fetchImpl = makeFetch(url => {
+      seen.push(url);
+      if (url.includes('/tests?') && url.includes('status=failed')) {
+        return { body: { items: [FAILED_TEST_A, FAILED_TEST_B, FAILED_TEST_C], nextToken: null } };
+      }
+      if (url.includes('/tests/test_a/failure/summary')) {
+        return { body: summaryFor('test_a') };
+      }
+      if (url.includes('/tests/test_b/failure/summary')) {
+        return { body: summaryFor('test_b') };
+      }
+      if (url.includes('/tests/test_c/failure/summary')) {
+        return {
+          body: summaryFor('test_c', {
+            failureKind: 'network_timeout',
+            rootCauseHypothesis: null,
+            recommendedFixTarget: null,
+          }),
+        };
+      }
+      throw new Error(`unexpected url: ${url}`);
+    });
+    const out: string[] = [];
+    const got = await runFailureTriage(
+      {
+        profile: 'default',
+        output: 'json',
+        debug: false,
+        projectId: 'proj_1',
+        maxConcurrency: 5,
+      },
+      { credentialsPath, fetchImpl, stdout: line => out.push(line) },
+    );
+
+    expect(seen.some(u => u.includes('status=failed'))).toBe(true);
+    expect(got.summary.totalFailed).toBe(3);
+    expect(got.clusters).toHaveLength(2);
+
+    const codeCluster = got.clusters.find(c => c.groupReason === 'fix_target');
+    expect(codeCluster?.memberTestIds).toEqual(['test_a', 'test_b']);
+    // test_b is fresher (updatedAt) and both members have a hypothesis
+    expect(codeCluster?.representativeTestId).toBe('test_b');
+
+    const envCluster = got.clusters.find(c => c.groupReason === 'failure_kind');
+    expect(envCluster?.memberTestIds).toEqual(['test_c']);
+
+    expect(JSON.parse(out[0]!).clusters).toHaveLength(2);
+  });
+
+  it('text mode renders cluster summary lines', async () => {
+    const { credentialsPath } = makeCreds();
+    const fetchImpl = makeFetch(url => {
+      if (url.includes('/tests?')) {
+        return { body: { items: [FAILED_TEST_A], nextToken: null } };
+      }
+      return { body: summaryFor('test_a') };
+    });
+    const out: string[] = [];
+    await runFailureTriage(
+      {
+        profile: 'default',
+        output: 'text',
+        debug: false,
+        projectId: 'proj_1',
+        maxConcurrency: 5,
+      },
+      { credentialsPath, fetchImpl, stdout: line => out.push(line) },
+    );
+    const block = out.join('\n');
+    expect(block).toContain('projectId:    proj_1');
+    expect(block).toContain('representative:  test_a');
+    expect(block).toContain('Shared fix target:');
+  });
+
+  it('dry-run emits canned clusters without network', async () => {
+    const out: string[] = [];
+    const got = await runFailureTriage(
+      {
+        profile: 'default',
+        output: 'json',
+        debug: false,
+        dryRun: true,
+        projectId: 'proj_dry',
+        maxConcurrency: 5,
+      },
+      { stdout: line => out.push(line) },
+    );
+    expect(got.summary.clusterCount).toBe(2);
+    expect(got.clusters[0]?.groupReason).toBe('failure_kind');
+    expect(JSON.parse(out[0]!).projectId).toBe('proj_dry');
+  });
+
+  it('returns empty clusters when no failed tests match', async () => {
+    const { credentialsPath } = makeCreds();
+    const fetchImpl = makeFetch(() => ({ body: { items: [], nextToken: null } }));
+    const out: string[] = [];
+    const got = await runFailureTriage(
+      {
+        profile: 'default',
+        output: 'json',
+        debug: false,
+        projectId: 'proj_empty',
+        maxConcurrency: 5,
+      },
+      { credentialsPath, fetchImpl, stdout: line => out.push(line) },
+    );
+    expect(got.clusters).toEqual([]);
+    expect(got.summary.totalFailed).toBe(0);
+    expect(JSON.parse(out[0]!).clusters).toEqual([]);
+  });
+
+  it('skips tests whose failure summary returns NOT_FOUND', async () => {
+    const { credentialsPath } = makeCreds();
+    const stderrLines: string[] = [];
+    const fetchImpl = makeFetch(url => {
+      if (url.includes('/tests?')) {
+        return { body: { items: [FAILED_TEST_A, FAILED_TEST_B], nextToken: null } };
+      }
+      if (url.includes('/tests/test_a/failure/summary')) {
+        return { body: summaryFor('test_a') };
+      }
+      return {
+        status: 404,
+        body: {
+          error: {
+            code: 'NOT_FOUND',
+            message: 'Test has no failing run.',
+            nextAction: 'No failing run.',
+            requestId: 'req_test',
+            details: { resource: 'test', id: 'test_b', reason: 'no_failing_run' },
+          },
+        },
+      };
+    });
+    const got = await runFailureTriage(
+      {
+        profile: 'default',
+        output: 'json',
+        debug: false,
+        projectId: 'proj_1',
+        maxConcurrency: 5,
+      },
+      {
+        credentialsPath,
+        fetchImpl,
+        stdout: () => undefined,
+        stderr: line => stderrLines.push(line),
+      },
+    );
+    expect(got.summary.totalFailed).toBe(1);
+    expect(got.summary.skipped).toBe(1);
+    expect(got.skipped?.[0]).toEqual({ testId: 'test_b', reason: 'no_failing_run' });
+    expect(stderrLines.some(l => l.includes('skipped'))).toBe(true);
+  });
+
+  it('rejects missing projectId with VALIDATION_ERROR (exit 5)', async () => {
+    await expect(
+      runFailureTriage(
+        {
+          profile: 'default',
+          output: 'json',
+          debug: false,
+          projectId: '',
+          maxConcurrency: 5,
+        },
+        { stdout: () => undefined },
+      ),
+    ).rejects.toMatchObject({ code: 'VALIDATION_ERROR', exitCode: 5 });
+  });
+
+  it('rejects invalid --max-concurrency with VALIDATION_ERROR (exit 5)', async () => {
+    await expect(
+      runFailureTriage(
+        {
+          profile: 'default',
+          output: 'json',
+          debug: false,
+          projectId: 'proj_1',
+          maxConcurrency: 0,
+        },
+        { stdout: () => undefined },
+      ),
+    ).rejects.toMatchObject({ code: 'VALIDATION_ERROR', exitCode: 5 });
+  });
+
+  it('--filter keeps only tests whose name matches (case-insensitive)', async () => {
+    const { credentialsPath } = makeCreds();
+    const fetchImpl = makeFetch(url => {
+      if (url.includes('/tests?')) {
+        return {
+          body: {
+            items: [FAILED_TEST_A, { ...FAILED_TEST_B, name: 'Profile update flow' }],
+            nextToken: null,
+          },
+        };
+      }
+      return { body: summaryFor('test_a') };
+    });
+    const got = await runFailureTriage(
+      {
+        profile: 'default',
+        output: 'json',
+        debug: false,
+        projectId: 'proj_1',
+        nameFilter: 'checkout',
+        maxConcurrency: 5,
+      },
+      { credentialsPath, fetchImpl, stdout: () => undefined },
+    );
+    expect(got.summary.totalFailed).toBe(1);
+    expect(got.clusters[0]?.memberTestIds).toEqual(['test_a']);
+  });
+});
+
 // ---------- §6.7 runFailureGet ----------
 
 const FAILED_STEPS: CliTestStep[] = [