diff --git a/packages/software-factory/.agents/skills/software-factory-operations/SKILL.md b/packages/software-factory/.agents/skills/software-factory-operations/SKILL.md index 7bcd658383..ce78ba4581 100644 --- a/packages/software-factory/.agents/skills/software-factory-operations/SKILL.md +++ b/packages/software-factory/.agents/skills/software-factory-operations/SKILL.md @@ -134,6 +134,7 @@ export function runTests() { - No external realm writes during tests — all test data lives in browser memory - Use `data-test-*` attributes for DOM selectors when testing rendered output - Use QUnit assertions: `assert.dom()`, `assert.strictEqual()`, `assert.ok()` +- **Never use `QUnit.skip()` or `QUnit.todo()`.** All tests must actually execute. Skipped/todo tests are flagged as `skipped` in the TestRun card and treated as a failure when no tests actually ran. The orchestrator will reject a TestRun where every test is skipped. ## Important Rules diff --git a/packages/software-factory/realm/test-results.gts b/packages/software-factory/realm/test-results.gts index 4e05ca8ad6..2be51fe39d 100644 --- a/packages/software-factory/realm/test-results.gts +++ b/packages/software-factory/realm/test-results.gts @@ -29,6 +29,7 @@ export const TestResultStatusField = enumField(StringField, { { value: 'passed', label: 'Passed' }, { value: 'failed', label: 'Failed' }, { value: 'error', label: 'Error' }, + { value: 'skipped', label: 'Skipped' }, ], }); @@ -49,6 +50,8 @@ export class TestResultEntry extends FieldDef { return '\u2717'; case 'error': return '!'; + case 'skipped': + return '\u2192'; case 'pending': return '\u2013'; default: @@ -92,6 +95,10 @@ export class TestResultEntry extends FieldDef { .status-pending { color: var(--boxel-400, #9ca3af); } + .status-skipped { + color: var(--boxel-400, #9ca3af); + font-style: italic; + } .test-name { flex: 1; } @@ -124,6 +131,12 @@ export class TestModuleResult extends FieldDef { }, }); + @field skippedCount = contains(NumberField, { + computeVia: function (this: TestModuleResult) { + return (this.results ?? []).filter((r) => r.status === 'skipped').length; + }, + }); + get moduleName() { return this.moduleRef?.module ?? 'default'; } @@ -145,6 +158,12 @@ export class TestModuleResult extends FieldDef { {{@model.passedCount}}/{{@model.totalCount}} passed + {{#if this.args.model.skippedCount}} + + ({{this.args.model.skippedCount}} + skipped) + + {{/if}} {{else}} running... @@ -174,6 +193,10 @@ export class TestModuleResult extends FieldDef { font-size: 0.8rem; color: var(--muted-foreground); } + .skipped-label { + color: var(--boxel-400, #9ca3af); + font-style: italic; + } .module-entries { padding-left: 0.5rem; } @@ -213,6 +236,15 @@ export class TestRun extends CardDef { }, }); + @field skippedCount = contains(NumberField, { + computeVia: function (this: TestRun) { + return (this.moduleResults ?? []).reduce( + (sum, sr) => sum + (sr.skippedCount ?? 0), + 0, + ); + }, + }); + @field title = contains(StringField, { computeVia: function (this: TestRun) { let seq = this.sequenceNumber ?? '?'; @@ -224,7 +256,9 @@ export class TestRun extends CardDef { static fitted = class Fitted extends Component { get total() { return ( - (this.args.model.passedCount ?? 0) + (this.args.model.failedCount ?? 0) + (this.args.model.passedCount ?? 0) + + (this.args.model.failedCount ?? 0) + + (this.args.model.skippedCount ?? 0) ); } @@ -237,6 +271,12 @@ export class TestRun extends CardDef {
{{@model.passedCount}}/{{this.total}} passed + {{#if @model.skippedCount}} + + ({{@model.skippedCount}} + skipped) + + {{/if}} {{#if @model.durationMs}} {{@model.durationMs}}ms {{/if}} @@ -285,6 +325,10 @@ export class TestRun extends CardDef { font-size: 0.85rem; color: var(--muted-foreground); } + .skipped-label { + color: var(--boxel-400, #9ca3af); + font-style: italic; + } .duration { margin-left: 0.5rem; font-size: 0.75rem; @@ -298,7 +342,9 @@ export class TestRun extends CardDef { static isolated = class Isolated extends Component { get total() { return ( - (this.args.model.passedCount ?? 0) + (this.args.model.failedCount ?? 0) + (this.args.model.passedCount ?? 0) + + (this.args.model.failedCount ?? 0) + + (this.args.model.skippedCount ?? 0) ); } @@ -314,6 +360,12 @@ export class TestRun extends CardDef {
{{@model.passedCount}}/{{this.total}} passed + {{#if @model.skippedCount}} + + ({{@model.skippedCount}} + skipped) + + {{/if}} {{#if @model.durationMs}} in {{@model.durationMs}}ms @@ -372,6 +424,12 @@ export class TestRun extends CardDef { {{moduleResult.passedCount}}/{{moduleResult.totalCount}} passed {{/if}} + {{#if moduleResult.skippedCount}} + + ({{moduleResult.skippedCount}} + skipped) + + {{/if}} {{else}} running... @@ -448,6 +506,10 @@ export class TestRun extends CardDef { font-size: 0.9rem; color: var(--muted-foreground); } + .skipped-label { + color: var(--boxel-400, #9ca3af); + font-style: italic; + } .error-message { color: var(--boxel-red, #dc2626); font-family: monospace; @@ -515,6 +577,10 @@ export class TestRun extends CardDef { .test-status-icon.status-pending { color: var(--boxel-400, #9ca3af); } + .test-status-icon.status-skipped { + color: var(--boxel-400, #9ca3af); + font-style: italic; + } .test-item-name { flex: 1; } diff --git a/packages/software-factory/src/factory-agent-types.ts b/packages/software-factory/src/factory-agent-types.ts index a0bc09bf6d..20f81bc7e5 100644 --- a/packages/software-factory/src/factory-agent-types.ts +++ b/packages/software-factory/src/factory-agent-types.ts @@ -107,6 +107,7 @@ export interface TestResult { status: 'passed' | 'failed' | 'error'; passedCount: number; failedCount: number; + skippedCount?: number; failures: TestFailure[]; durationMs: number; } diff --git a/packages/software-factory/src/factory-prompt-loader.ts b/packages/software-factory/src/factory-prompt-loader.ts index cfd55f1633..ade5011117 100644 --- a/packages/software-factory/src/factory-prompt-loader.ts +++ b/packages/software-factory/src/factory-prompt-loader.ts @@ -457,6 +457,7 @@ export function assembleIteratePrompt( status: context.testResults.status, passedCount: context.testResults.passedCount, failedCount: context.testResults.failedCount, + skippedCount: context.testResults.skippedCount, durationMs: context.testResults.durationMs, failures: testFailures, } diff --git a/packages/software-factory/src/test-run-parsing.ts b/packages/software-factory/src/test-run-parsing.ts index 40fb2ab9a4..a0a901b2d0 100644 --- a/packages/software-factory/src/test-run-parsing.ts +++ b/packages/software-factory/src/test-run-parsing.ts @@ -29,11 +29,18 @@ export function parseQunitResults(results: QunitResults): TestRunAttributes { moduleMap.set(moduleName, []); } - // Map QUnit statuses to terminal states. Skipped/todo are not failures - // and must not be 'pending' (which means "not yet executed" and would + // Map QUnit statuses to terminal states. Skipped/todo are surfaced as + // 'skipped' so the agent can see they weren't actually executed. + // They must not be 'pending' (which means "not yet executed" and would // confuse resume logic and isComplete checks). - let status: TestResultEntryData['status'] = - test.status === 'failed' ? 'failed' : 'passed'; + let status: TestResultEntryData['status']; + if (test.status === 'failed') { + status = 'failed'; + } else if (test.status === 'skipped' || test.status === 'todo') { + status = 'skipped'; + } else { + status = 'passed'; + } let entry: TestResultEntryData = { testName: test.name, @@ -65,19 +72,26 @@ export function parseQunitResults(results: QunitResults): TestRunAttributes { let failedCount = allResults.filter( (r) => r.status === 'failed' || r.status === 'error', ).length; + let skippedCount = allResults.filter((r) => r.status === 'skipped').length; - let hasFailures = failedCount > 0; - let status: TestRunAttributes['status'] = hasFailures ? 'failed' : 'passed'; - - // If no tests ran at all, mark as error + let status: TestRunAttributes['status']; if (results.tests.length === 0) { + // No tests ran at all status = 'error'; + } else if (failedCount > 0) { + status = 'failed'; + } else if (passedCount === 0 && skippedCount > 0) { + // All tests were skipped — nothing was actually verified + status = 'failed'; + } else { + status = 'passed'; } return { status, passedCount, failedCount, + skippedCount, durationMs: results.runEnd.runtime, moduleResults, }; @@ -87,9 +101,13 @@ export function parseQunitResults(results: QunitResults): TestRunAttributes { * Format a `TestResult` into a human-readable summary for agent prompts. */ export function formatTestResultSummary(result: TestResult): string { + let countLine = `Passed: ${result.passedCount}, Failed: ${result.failedCount}`; + if (result.skippedCount && result.skippedCount > 0) { + countLine += `, Skipped: ${result.skippedCount}`; + } let lines: string[] = [ `Status: ${result.status}`, - `Passed: ${result.passedCount}, Failed: ${result.failedCount}`, + countLine, `Duration: ${result.durationMs}ms`, ]; diff --git a/packages/software-factory/src/test-run-types.ts b/packages/software-factory/src/test-run-types.ts index bc06f9a243..531fe56c4a 100644 --- a/packages/software-factory/src/test-run-types.ts +++ b/packages/software-factory/src/test-run-types.ts @@ -70,6 +70,7 @@ export interface TestRunAttributes { status: 'running' | 'passed' | 'failed' | 'error'; passedCount: number; failedCount: number; + skippedCount?: number; durationMs?: number; errorMessage?: string; moduleResults: TestModuleResultData[]; @@ -78,7 +79,7 @@ export interface TestRunAttributes { /** Shape of a single test result entry within a TestRun card. */ export interface TestResultEntryData { testName: string; - status: 'pending' | 'passed' | 'failed' | 'error'; + status: 'pending' | 'passed' | 'failed' | 'error' | 'skipped'; message?: string; stackTrace?: string; durationMs?: number; diff --git a/packages/software-factory/src/validators/test-step.ts b/packages/software-factory/src/validators/test-step.ts index a9c8e9b87f..920d117055 100644 --- a/packages/software-factory/src/validators/test-step.ts +++ b/packages/software-factory/src/validators/test-step.ts @@ -62,6 +62,7 @@ export interface TestValidationDetails { testRunId: string; passedCount: number; failedCount: number; + skippedCount: number; durationMs: number; failures: TestValidationFailure[]; } @@ -212,7 +213,9 @@ export class TestValidationStep implements ValidationStepRunner { | TestValidationDetails | undefined; if (details && details.passedCount > 0) { - return `## Test Validation: PASSED\n${details.passedCount} test(s) passed (TestRun: ${details.testRunId})`; + let skippedNote = + details.skippedCount > 0 ? `, ${details.skippedCount} skipped` : ''; + return `## Test Validation: PASSED\n${details.passedCount} test(s) passed${skippedNote} (TestRun: ${details.testRunId})`; } return ''; } @@ -228,7 +231,7 @@ export class TestValidationStep implements ValidationStepRunner { let lines: string[] = [ `## Test Validation: FAILED`, - `${details.passedCount} passed, ${details.failedCount} failed (TestRun: ${details.testRunId})`, + `${details.passedCount} passed, ${details.failedCount} failed${details.skippedCount > 0 ? `, ${details.skippedCount} skipped` : ''} (TestRun: ${details.testRunId})`, ]; for (let failure of details.failures) { @@ -323,6 +326,7 @@ function extractTestDetails( let failures: TestValidationFailure[] = []; let passedCount = 0; let failedCount = 0; + let skippedCount = 0; for (let moduleResult of attrs.moduleResults ?? []) { let moduleName = moduleResult.moduleRef?.module ?? 'unknown'; @@ -337,6 +341,8 @@ function extractTestDetails( message: result.message ?? `Test ${result.status}`, stackTrace: result.stackTrace, }); + } else if (result.status === 'skipped') { + skippedCount++; } } } @@ -348,11 +354,15 @@ function extractTestDetails( if (attrs.failedCount != null) { failedCount = attrs.failedCount; } + if ((attrs as Record).skippedCount != null) { + skippedCount = (attrs as Record).skippedCount as number; + } return { testRunId, passedCount, failedCount, + skippedCount, durationMs: attrs.durationMs ?? 0, failures, }; diff --git a/packages/software-factory/tests/factory-test-realm.test.ts b/packages/software-factory/tests/factory-test-realm.test.ts index e311ccc395..48871d8b72 100644 --- a/packages/software-factory/tests/factory-test-realm.test.ts +++ b/packages/software-factory/tests/factory-test-realm.test.ts @@ -164,6 +164,81 @@ module('factory-test-realm > parseQunitResults', function () { Boolean(entry.stackTrace && entry.stackTrace.includes('at line 42')), ); }); + + test('maps skipped QUnit tests to skipped status', function (assert) { + let results = parseQunitResults({ + tests: [ + { + name: 'real test', + module: 'Mod', + status: 'passed', + runtime: 50, + errors: [], + }, + { + name: 'skipped test', + module: 'Mod', + status: 'skipped', + runtime: 0, + errors: [], + }, + { + name: 'todo test', + module: 'Mod', + status: 'todo', + runtime: 0, + errors: [], + }, + ], + runEnd: { + status: 'passed', + testCounts: { passed: 1, failed: 0, skipped: 1, todo: 1, total: 3 }, + runtime: 50, + }, + }); + assert.strictEqual(results.status, 'passed'); + assert.strictEqual(results.passedCount, 1); + assert.strictEqual(results.failedCount, 0); + assert.strictEqual(results.skippedCount, 2); + let entries = results.moduleResults[0].results; + assert.strictEqual(entries[0].status, 'passed'); + assert.strictEqual(entries[1].status, 'skipped'); + assert.strictEqual(entries[2].status, 'skipped'); + }); + + test('all-skipped tests are treated as failed', function (assert) { + let results = parseQunitResults({ + tests: [ + { + name: 'skip A', + module: 'Mod', + status: 'skipped', + runtime: 0, + errors: [], + }, + { + name: 'todo B', + module: 'Mod', + status: 'todo', + runtime: 0, + errors: [], + }, + ], + runEnd: { + status: 'passed', + testCounts: { passed: 0, failed: 0, skipped: 1, todo: 1, total: 2 }, + runtime: 0, + }, + }); + assert.strictEqual( + results.status, + 'failed', + 'all-skipped run should be failed', + ); + assert.strictEqual(results.passedCount, 0); + assert.strictEqual(results.failedCount, 0); + assert.strictEqual(results.skippedCount, 2); + }); }); // --------------------------------------------------------------------------- @@ -924,4 +999,32 @@ module('factory-test-realm > formatTestResultSummary', function () { let summary = formatTestResultSummary(result); assert.true(summary.length < longStack.length); }); + + test('includes skipped count when present', function (assert) { + let result: TestResult = { + status: 'passed', + passedCount: 3, + failedCount: 0, + skippedCount: 2, + failures: [], + durationMs: 1000, + }; + + let summary = formatTestResultSummary(result); + assert.true(summary.includes('Skipped: 2')); + }); + + test('omits skipped count when zero', function (assert) { + let result: TestResult = { + status: 'passed', + passedCount: 5, + failedCount: 0, + skippedCount: 0, + failures: [], + durationMs: 2000, + }; + + let summary = formatTestResultSummary(result); + assert.false(summary.includes('Skipped')); + }); }); diff --git a/packages/software-factory/tests/test-step.test.ts b/packages/software-factory/tests/test-step.test.ts index 79d557044c..8af66ff71e 100644 --- a/packages/software-factory/tests/test-step.test.ts +++ b/packages/software-factory/tests/test-step.test.ts @@ -312,6 +312,7 @@ module('TestValidationStep', function () { testRunId: 'Test Runs/validation-1', passedCount: 5, failedCount: 0, + skippedCount: 0, durationMs: 1000, failures: [], }, @@ -320,6 +321,33 @@ module('TestValidationStep', function () { let formatted = step.formatForContext(result); assert.ok(formatted.includes('PASSED')); assert.ok(formatted.includes('5')); + assert.notOk( + formatted.includes('skipped'), + 'no skipped note when skippedCount is 0', + ); + }); + + test('formatForContext with passing result includes skipped note', function (assert) { + let step = new TestValidationStep(makeConfig()); + + let result: ValidationStepResult = { + step: 'test', + passed: true, + errors: [], + details: { + testRunId: 'Test Runs/validation-1', + passedCount: 3, + failedCount: 0, + skippedCount: 2, + durationMs: 800, + failures: [], + }, + }; + + let formatted = step.formatForContext(result); + assert.ok(formatted.includes('PASSED')); + assert.ok(formatted.includes('3')); + assert.ok(formatted.includes('2 skipped'), 'includes skipped count'); }); test('formatForContext with failing result and detailed failures', function (assert) { @@ -333,6 +361,7 @@ module('TestValidationStep', function () { testRunId: 'Test Runs/validation-1', passedCount: 2, failedCount: 1, + skippedCount: 0, durationMs: 1500, failures: [ { @@ -352,6 +381,34 @@ module('TestValidationStep', function () { assert.ok(formatted.includes("Expected 'Alice'")); }); + test('formatForContext with failing result includes skipped note', function (assert) { + let step = new TestValidationStep(makeConfig()); + + let result: ValidationStepResult = { + step: 'test', + passed: false, + errors: [{ message: 'shows author: Expected Alice but got empty' }], + details: { + testRunId: 'Test Runs/validation-1', + passedCount: 2, + failedCount: 1, + skippedCount: 3, + durationMs: 1500, + failures: [ + { + testName: 'shows author', + module: 'hello.test.gts', + message: "Expected 'Alice' but got ''", + }, + ], + }, + }; + + let formatted = step.formatForContext(result); + assert.ok(formatted.includes('FAILED')); + assert.ok(formatted.includes('3 skipped'), 'includes skipped count'); + }); + test('formatForContext without details falls back to errors', function (assert) { let step = new TestValidationStep(makeConfig());