Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ export function runTests() {
- No external realm writes during tests — all test data lives in browser memory
- Use `data-test-*` attributes for DOM selectors when testing rendered output
- Use QUnit assertions: `assert.dom()`, `assert.strictEqual()`, `assert.ok()`
- **Never use `QUnit.skip()` or `QUnit.todo()`.** All tests must actually execute. Skipped/todo tests are flagged as `skipped` in the TestRun card and treated as a failure when no tests actually ran. The orchestrator will reject a TestRun where every test is skipped.

## Important Rules

Expand Down
70 changes: 68 additions & 2 deletions packages/software-factory/realm/test-results.gts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export const TestResultStatusField = enumField(StringField, {
{ value: 'passed', label: 'Passed' },
{ value: 'failed', label: 'Failed' },
{ value: 'error', label: 'Error' },
{ value: 'skipped', label: 'Skipped' },
],
});

Expand All @@ -49,6 +50,8 @@ export class TestResultEntry extends FieldDef {
return '\u2717';
case 'error':
return '!';
case 'skipped':
return '\u2192';
case 'pending':
return '\u2013';
default:
Expand Down Expand Up @@ -92,6 +95,10 @@ export class TestResultEntry extends FieldDef {
.status-pending {
color: var(--boxel-400, #9ca3af);
}
.status-skipped {
color: var(--boxel-400, #9ca3af);
font-style: italic;
}
.test-name {
flex: 1;
}
Expand Down Expand Up @@ -124,6 +131,12 @@ export class TestModuleResult extends FieldDef {
},
});

@field skippedCount = contains(NumberField, {
computeVia: function (this: TestModuleResult) {
return (this.results ?? []).filter((r) => r.status === 'skipped').length;
},
});

get moduleName() {
return this.moduleRef?.module ?? 'default';
}
Expand All @@ -145,6 +158,12 @@ export class TestModuleResult extends FieldDef {
<span class='module-counts'>
{{@model.passedCount}}/{{@model.totalCount}}
passed
{{#if this.args.model.skippedCount}}
<span class='skipped-label'>
({{this.args.model.skippedCount}}
skipped)
</span>
{{/if}}
</span>
{{else}}
<span class='module-counts'>running...</span>
Expand Down Expand Up @@ -174,6 +193,10 @@ export class TestModuleResult extends FieldDef {
font-size: 0.8rem;
color: var(--muted-foreground);
}
.skipped-label {
color: var(--boxel-400, #9ca3af);
font-style: italic;
}
.module-entries {
padding-left: 0.5rem;
}
Expand Down Expand Up @@ -213,6 +236,15 @@ export class TestRun extends CardDef {
},
});

@field skippedCount = contains(NumberField, {
computeVia: function (this: TestRun) {
return (this.moduleResults ?? []).reduce(
(sum, sr) => sum + (sr.skippedCount ?? 0),
0,
);
},
});

@field title = contains(StringField, {
computeVia: function (this: TestRun) {
let seq = this.sequenceNumber ?? '?';
Expand All @@ -224,7 +256,9 @@ export class TestRun extends CardDef {
static fitted = class Fitted extends Component<typeof TestRun> {
get total() {
return (
(this.args.model.passedCount ?? 0) + (this.args.model.failedCount ?? 0)
(this.args.model.passedCount ?? 0) +
(this.args.model.failedCount ?? 0) +
(this.args.model.skippedCount ?? 0)
);
}

Expand All @@ -237,6 +271,12 @@ export class TestRun extends CardDef {
<div class='counts'>
{{@model.passedCount}}/{{this.total}}
passed
{{#if @model.skippedCount}}
<span class='skipped-label'>
({{@model.skippedCount}}
skipped)
</span>
{{/if}}
{{#if @model.durationMs}}
<span class='duration'>{{@model.durationMs}}ms</span>
{{/if}}
Expand Down Expand Up @@ -285,6 +325,10 @@ export class TestRun extends CardDef {
font-size: 0.85rem;
color: var(--muted-foreground);
}
.skipped-label {
color: var(--boxel-400, #9ca3af);
font-style: italic;
}
.duration {
margin-left: 0.5rem;
font-size: 0.75rem;
Expand All @@ -298,7 +342,9 @@ export class TestRun extends CardDef {
static isolated = class Isolated extends Component<typeof TestRun> {
get total() {
return (
(this.args.model.passedCount ?? 0) + (this.args.model.failedCount ?? 0)
(this.args.model.passedCount ?? 0) +
(this.args.model.failedCount ?? 0) +
(this.args.model.skippedCount ?? 0)
);
}

Expand All @@ -314,6 +360,12 @@ export class TestRun extends CardDef {
<div class='summary'>
{{@model.passedCount}}/{{this.total}}
passed
{{#if @model.skippedCount}}
<span class='skipped-label'>
({{@model.skippedCount}}
skipped)
</span>
{{/if}}
{{#if @model.durationMs}}
in
{{@model.durationMs}}ms
Expand Down Expand Up @@ -372,6 +424,12 @@ export class TestRun extends CardDef {
{{moduleResult.passedCount}}/{{moduleResult.totalCount}}
passed
{{/if}}
{{#if moduleResult.skippedCount}}
<span class='skipped-label'>
({{moduleResult.skippedCount}}
skipped)
</span>
{{/if}}
</span>
{{else}}
<span class='module-group-counts running'>running...</span>
Expand Down Expand Up @@ -448,6 +506,10 @@ export class TestRun extends CardDef {
font-size: 0.9rem;
color: var(--muted-foreground);
}
.skipped-label {
color: var(--boxel-400, #9ca3af);
font-style: italic;
}
.error-message {
color: var(--boxel-red, #dc2626);
font-family: monospace;
Expand Down Expand Up @@ -515,6 +577,10 @@ export class TestRun extends CardDef {
.test-status-icon.status-pending {
color: var(--boxel-400, #9ca3af);
}
.test-status-icon.status-skipped {
color: var(--boxel-400, #9ca3af);
font-style: italic;
}
.test-item-name {
flex: 1;
}
Expand Down
1 change: 1 addition & 0 deletions packages/software-factory/src/factory-agent-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ export interface TestResult {
status: 'passed' | 'failed' | 'error';
passedCount: number;
failedCount: number;
skippedCount?: number;
failures: TestFailure[];
durationMs: number;
}
Expand Down
1 change: 1 addition & 0 deletions packages/software-factory/src/factory-prompt-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,7 @@ export function assembleIteratePrompt(
status: context.testResults.status,
passedCount: context.testResults.passedCount,
failedCount: context.testResults.failedCount,
skippedCount: context.testResults.skippedCount,
durationMs: context.testResults.durationMs,
failures: testFailures,
}
Expand Down
36 changes: 27 additions & 9 deletions packages/software-factory/src/test-run-parsing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,18 @@ export function parseQunitResults(results: QunitResults): TestRunAttributes {
moduleMap.set(moduleName, []);
}

// Map QUnit statuses to terminal states. Skipped/todo are not failures
// and must not be 'pending' (which means "not yet executed" and would
// Map QUnit statuses to terminal states. Skipped/todo are surfaced as
// 'skipped' so the agent can see they weren't actually executed.
// They must not be 'pending' (which means "not yet executed" and would
// confuse resume logic and isComplete checks).
let status: TestResultEntryData['status'] =
test.status === 'failed' ? 'failed' : 'passed';
let status: TestResultEntryData['status'];
if (test.status === 'failed') {
status = 'failed';
} else if (test.status === 'skipped' || test.status === 'todo') {
status = 'skipped';
} else {
status = 'passed';
}

let entry: TestResultEntryData = {
testName: test.name,
Expand Down Expand Up @@ -65,19 +72,26 @@ export function parseQunitResults(results: QunitResults): TestRunAttributes {
let failedCount = allResults.filter(
(r) => r.status === 'failed' || r.status === 'error',
).length;
let skippedCount = allResults.filter((r) => r.status === 'skipped').length;

let hasFailures = failedCount > 0;
let status: TestRunAttributes['status'] = hasFailures ? 'failed' : 'passed';

// If no tests ran at all, mark as error
let status: TestRunAttributes['status'];
if (results.tests.length === 0) {
// No tests ran at all
status = 'error';
} else if (failedCount > 0) {
status = 'failed';
} else if (passedCount === 0 && skippedCount > 0) {
// All tests were skipped — nothing was actually verified
status = 'failed';
} else {
status = 'passed';
}

return {
status,
passedCount,
failedCount,
skippedCount,
durationMs: results.runEnd.runtime,
moduleResults,
};
Expand All @@ -87,9 +101,13 @@ export function parseQunitResults(results: QunitResults): TestRunAttributes {
* Format a `TestResult` into a human-readable summary for agent prompts.
*/
export function formatTestResultSummary(result: TestResult): string {
let countLine = `Passed: ${result.passedCount}, Failed: ${result.failedCount}`;
if (result.skippedCount && result.skippedCount > 0) {
countLine += `, Skipped: ${result.skippedCount}`;
}
let lines: string[] = [
`Status: ${result.status}`,
`Passed: ${result.passedCount}, Failed: ${result.failedCount}`,
countLine,
`Duration: ${result.durationMs}ms`,
];

Expand Down
3 changes: 2 additions & 1 deletion packages/software-factory/src/test-run-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ export interface TestRunAttributes {
status: 'running' | 'passed' | 'failed' | 'error';
passedCount: number;
failedCount: number;
skippedCount?: number;
durationMs?: number;
errorMessage?: string;
moduleResults: TestModuleResultData[];
Expand All @@ -78,7 +79,7 @@ export interface TestRunAttributes {
/** Shape of a single test result entry within a TestRun card. */
export interface TestResultEntryData {
testName: string;
status: 'pending' | 'passed' | 'failed' | 'error';
status: 'pending' | 'passed' | 'failed' | 'error' | 'skipped';
message?: string;
stackTrace?: string;
durationMs?: number;
Expand Down
14 changes: 12 additions & 2 deletions packages/software-factory/src/validators/test-step.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ export interface TestValidationDetails {
testRunId: string;
passedCount: number;
failedCount: number;
skippedCount: number;
durationMs: number;
failures: TestValidationFailure[];
}
Expand Down Expand Up @@ -212,7 +213,9 @@ export class TestValidationStep implements ValidationStepRunner {
| TestValidationDetails
| undefined;
if (details && details.passedCount > 0) {
return `## Test Validation: PASSED\n${details.passedCount} test(s) passed (TestRun: ${details.testRunId})`;
let skippedNote =
details.skippedCount > 0 ? `, ${details.skippedCount} skipped` : '';
return `## Test Validation: PASSED\n${details.passedCount} test(s) passed${skippedNote} (TestRun: ${details.testRunId})`;
}
return '';
}
Expand All @@ -228,7 +231,7 @@ export class TestValidationStep implements ValidationStepRunner {

let lines: string[] = [
`## Test Validation: FAILED`,
`${details.passedCount} passed, ${details.failedCount} failed (TestRun: ${details.testRunId})`,
`${details.passedCount} passed, ${details.failedCount} failed${details.skippedCount > 0 ? `, ${details.skippedCount} skipped` : ''} (TestRun: ${details.testRunId})`,
];

for (let failure of details.failures) {
Expand Down Expand Up @@ -323,6 +326,7 @@ function extractTestDetails(
let failures: TestValidationFailure[] = [];
let passedCount = 0;
let failedCount = 0;
let skippedCount = 0;

for (let moduleResult of attrs.moduleResults ?? []) {
let moduleName = moduleResult.moduleRef?.module ?? 'unknown';
Expand All @@ -337,6 +341,8 @@ function extractTestDetails(
message: result.message ?? `Test ${result.status}`,
stackTrace: result.stackTrace,
});
} else if (result.status === 'skipped') {
skippedCount++;
}
}
}
Expand All @@ -348,11 +354,15 @@ function extractTestDetails(
if (attrs.failedCount != null) {
failedCount = attrs.failedCount;
}
if ((attrs as Record<string, unknown>).skippedCount != null) {
skippedCount = (attrs as Record<string, unknown>).skippedCount as number;
}

return {
testRunId,
passedCount,
failedCount,
skippedCount,
durationMs: attrs.durationMs ?? 0,
failures,
};
Expand Down
Loading
Loading