From 97cd9f22e3c6e3c8a59eb9616422a547a58bf314 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Thu, 9 Apr 2026 18:24:08 -0400 Subject: [PATCH 01/17] glint support for software factory realm gts --- packages/software-factory/realm/tsconfig.json | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 packages/software-factory/realm/tsconfig.json diff --git a/packages/software-factory/realm/tsconfig.json b/packages/software-factory/realm/tsconfig.json new file mode 100644 index 0000000000..9557565c2d --- /dev/null +++ b/packages/software-factory/realm/tsconfig.json @@ -0,0 +1,30 @@ +{ + "compilerOptions": { + "target": "es2020", + "allowJs": true, + "moduleResolution": "node16", + "allowSyntheticDefaultImports": true, + "noImplicitAny": true, + "noImplicitThis": true, + "alwaysStrict": true, + "strictNullChecks": true, + "strictPropertyInitialization": true, + "noFallthroughCasesInSwitch": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noImplicitReturns": true, + "noEmitOnError": false, + "noEmit": true, + "inlineSourceMap": true, + "inlineSources": true, + "baseUrl": ".", + "module": "node16", + "strict": true, + "experimentalDecorators": true, + "paths": { + "https://cardstack.com/base/*": ["../../base/*"] + }, + "types": ["@cardstack/local-types"] + }, + "include": ["**/*.ts", "**/*.gts"] +} From f5aea896691243181a7e79ec51e323a4349b6941 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Thu, 9 Apr 2026 18:26:14 -0400 Subject: [PATCH 02/17] Update phase-2-plan.md with implementation discoveries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integrate findings from CS-10672 implementation directly into the relevant plan sections rather than appending notes: - Selection algorithm uses backlog (not ready) to match darkfactory.gts - Priority enum includes critical - Issue lifecycle uses backlog → in_progress → done/blocked/review - Exhausted issues tracked via exclusion set to prevent re-picking - RealmIssueStore uses absolute darkfactory module URL (env-sensitive) - Boxel linksToMany uses dotted-key format for blockedBy relationships - Loop outcome disambiguation table for edge cases - Migration path references actual file organization and CS-10708 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../software-factory/docs/phase-2-plan.md | 108 +++++++++++++----- 1 file changed, 79 insertions(+), 29 deletions(-) diff --git a/packages/software-factory/docs/phase-2-plan.md b/packages/software-factory/docs/phase-2-plan.md index c6ba807568..dd86015735 100644 --- a/packages/software-factory/docs/phase-2-plan.md +++ b/packages/software-factory/docs/phase-2-plan.md @@ -24,18 +24,19 @@ This makes the loop generic. It doesn't need to know whether an issue is "implem Issues need properties that let the orchestrator determine execution order. Possible fields (may use a combination): -- **priority** — enum (`high`, `medium`, `low`), high = execute first +- **priority** — enum (`critical`, `high`, `medium`, `low`), critical = execute first - **predecessors / blockedBy** — explicit dependency edges; an issue cannot start until its blockers are done - **order** — explicit sequence number for tie-breaking -The selection algorithm: +The selection algorithm (implemented in `IssueScheduler.pickNextIssue()`): -1. Filter to issues with status `ready` or `in_progress` +1. Filter to issues with status `backlog` or `in_progress` 2. Exclude issues whose `blockedBy` list contains any non-completed issue -3. Sort by priority (high first, then medium, then low), then by order (ascending) -4. Pick the first one +3. Exclude exhausted issues (hit `maxIterationsPerIssue` in the current run) +4. Sort: `in_progress` first, then by priority (`critical` > `high` > `medium` > `low`), then by order (ascending) +5. Pick the first one -Resume semantics: if an issue is already `in_progress`, it takes priority over `ready` issues (the factory was interrupted and should continue where it left off). +Resume semantics: if an issue is already `in_progress`, it takes priority over `backlog` issues (the factory was interrupted and should continue where it left off). ## Validation Phase After Every Iteration @@ -127,23 +128,40 @@ This is the "quirk" where an issue's job is to create the project itself. But it The phase 2 orchestrator is a thin scheduler with a built-in validation phase that runs after every agent turn: -``` -while (hasUnblockedIssues()) { - let issue = pickNextIssue(); +```typescript +// As implemented in runIssueLoop() — src/issue-loop.ts +let exhaustedIssues = new Set(); + +while ( + scheduler.hasUnblockedIssues(exhaustedIssues) && + outerCycles < maxOuterCycles +) { + let issue = scheduler.pickNextIssue(exhaustedIssues); // Inner loop: multiple iterations per issue - let validationResults = null; - while (issue.status !== 'done' && issue.status !== 'blocked' && iterations < maxIterations) { - await agent.run(contextForIssue(issue, validationResults), tools); - refreshIssueState(issue); + let validationResults = undefined; + for (let iteration = 1; iteration <= maxIterationsPerIssue; iteration++) { + let context = await contextBuilder.buildForIssue({ + issue, + targetRealmUrl, + validationResults, + briefUrl, + }); + let result = await agent.run(context, tools); + + // Validation phase — runs after EVERY agent turn + validationResults = await validator.validate(targetRealmUrl); + + // Read issue state from realm (not from AgentRunResult.status) + issue = await scheduler.refreshIssueState(issue); + + if (issue.status === 'done' || issue.status === 'blocked') break; + } - // Validation phase — runs after EVERY iteration - validationResults = await validate(targetRealm); // parse, lint, evaluate, instantiate, run tests - // Failures are fed back as context in the next iteration — agent self-corrects - // Agent can also create new issues via tool calls if it decides to + if (exitReason === 'max_iterations') exhaustedIssues.add(issue.id); - iterations++; - } + // Reload to pick up new issues the agent may have created + await scheduler.loadIssues(); } ``` @@ -151,6 +169,37 @@ The agent signals progress by updating the issue — tagging it as blocked, mark All domain logic (what to implement, when to create sub-issues, when to tag as blocked) lives in the agent's prompt and skills. The orchestrator owns only: issue selection, agent invocation, and validation. +### Issue Loading via searchRealm() + +`RealmIssueStore` loads issues from the target realm using `searchRealm()` from `realm-operations.ts`. The search filter uses the absolute darkfactory module URL (from `inferDarkfactoryModuleUrl(targetRealmUrl)`), which varies by environment (production, staging, localhost). The store maps JSON:API card responses to `SchedulableIssue` objects. + +Boxel encodes `linksToMany` relationships with dotted keys rather than JSON:API `data` arrays: + +```json +{ + "relationships": { + "blockedBy.0": { "links": { "self": "../Issues/issue-a" } }, + "blockedBy.1": { "links": { "self": "../Issues/issue-b" } } + } +} +``` + +The `extractLinksToManyIds()` helper parses this format to extract blocker IDs for dependency resolution. + +When `searchRealm()` fails (auth, network, query errors), the store logs at `warn` level and returns an empty list — preventing the loop from silently treating a failure as "no issues exist." + +### Loop Outcome Determination + +The loop distinguishes several terminal states: + +| Condition | Outcome | +| --------------------------------------------- | --------------------- | +| No issues loaded | `all_issues_done` | +| Issues exist but all blocked at startup | `no_unblocked_issues` | +| All issues completed successfully | `all_issues_done` | +| Some issues done, others blocked or exhausted | `no_unblocked_issues` | +| Safety guard hit | `max_outer_cycles` | + ## Schema Refinement: darkfactory.gts Phase 1 defined Project and Ticket card types in `darkfactory.gts` with aspirational fields that were never used. Phase 2 trims these to only the fields that are actually set or read in code, and renames Ticket → Issue to match the issue-driven loop language. @@ -237,24 +286,25 @@ CS-10671 trims and renames the current schema as a first step. The adoption from ## Issue Lifecycle ``` -created → ready → in_progress → done - → blocked (needs human input) - → failed (max retries exceeded) +backlog → in_progress → done + → blocked (needs human input) + → review (optional) ``` +Issues that exhaust `maxIterationsPerIssue` without completing stay in their current status but are excluded from re-picking in the same loop run via an `exhaustedIssues` set. + The agent manages its own transitions by updating the issue directly (e.g., tagging as blocked, marking done). The orchestrator reads the issue state after the agent exits to decide what to do next — it does not inspect the agent's return value for status. ## Migration Path from Phase 1 -Phase 1 and phase 2 can coexist: +Phase 1 and phase 2 coexist during the transition. The implementation lives in separate files to avoid touching Phase 1 code: + +- `src/issue-scheduler.ts` — `IssueScheduler`, `IssueStore`, `RealmIssueStore` +- `src/issue-loop.ts` — `runIssueLoop()`, `Validator`, `NoOpValidator`, config/result types -1. Phase 1 ships with the hard-coded pipeline (`factory-loop.ts`) -2. Phase 2 introduces an `IssueScheduler` that replaces the fixed loop with issue-driven scheduling -3. The `LoopAgent` interface (`run(context, tools)`) stays the same — only the orchestrator changes -4. `ContextBuilder` gains an issue-aware mode that builds context from the current issue rather than a fixed ticket -5. The `TestRunner` callback becomes a tool the agent can call, rather than a loop phase +Phase 1's `factory-loop.ts` (`runFactoryLoop()`) remains untouched. The `LoopAgent` interface (`run(context, tools)`) is unchanged and reused by both loops. `FactoryTool[]` carries forward unchanged. -The `FactoryTool[]` type from phase 1 carries forward unchanged. `AgentRunResult` may be simplified — in phase 2 the agent signals completion by updating the issue (tagging as blocked, marking done), so the orchestrator reads issue state rather than inspecting a return status. The agent just needs to exit; the orchestrator figures out what happened from the issue. +CS-10708 tracks the integration work: wire `runIssueLoop()`, the validation phase (CS-10675), and bootstrap-as-seed (CS-10673) into the `factory:go` entrypoint, then retire all Phase 1 mechanisms (`factory-loop.ts`, `factory-loop.test.ts`, old bootstrap orchestration, `TestRunner`/`buildTestRunner()`). ## Refactor: request_clarification → Blocking Issue From f961346e81eff6bd3480f73469ee2f70dd3618b5 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Thu, 9 Apr 2026 19:16:54 -0400 Subject: [PATCH 03/17] CS-10675: Implement modular validation pipeline with test step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a modular validation pipeline that runs after every inner-loop agent turn. The pipeline runs all steps concurrently via Promise.allSettled() and aggregates results. Each step is a separate module implementing the ValidationStepRunner interface — adding a new step is one file plus one line in createDefaultPipeline(). Key changes: - ValidationPipeline class implementing the Validator interface with concurrent step execution and per-step exception capture - TestValidationStep wrapping executeTestRunFromRealm() with detailed failure data read back from the completed TestRun card - NoOpStepRunner placeholders for parse, lint, evaluate, instantiate (child tickets CS-10713 through CS-10716) - Step-specific failure shapes via details field on ValidationStepResult - formatForContext() on each step for LLM-friendly failure output - Max iterations + failing validation now blocks the issue with the reason and failure context in the issue description - Extract fetchRealmFilenames() from pullRealmFiles() for reuse - Optional updateIssue() on IssueStore for max-iteration blocking - Updated phase-2-plan.md with validation architecture documentation Co-Authored-By: Claude Opus 4.6 (1M context) --- .../software-factory/docs/phase-2-plan.md | 31 +- .../scripts/smoke-tests/issue-loop-smoke.ts | 75 +++- .../scripts/smoke-tests/smoke-test-realm.ts | 108 ++++- .../src/factory-agent-types.ts | 2 + packages/software-factory/src/issue-loop.ts | 85 +++- .../software-factory/src/issue-scheduler.ts | 5 + .../software-factory/src/realm-operations.ts | 66 ++- .../src/validators/noop-step.ts | 24 ++ .../src/validators/test-step.ts | 372 +++++++++++++++++ .../src/validators/validation-pipeline.ts | 176 ++++++++ packages/software-factory/tests/index.ts | 4 + .../software-factory/tests/issue-loop.test.ts | 88 +++- .../software-factory/tests/test-step.test.ts | 391 ++++++++++++++++++ .../tests/validation-pipeline.test.ts | 281 +++++++++++++ 14 files changed, 1676 insertions(+), 32 deletions(-) create mode 100644 packages/software-factory/src/validators/noop-step.ts create mode 100644 packages/software-factory/src/validators/test-step.ts create mode 100644 packages/software-factory/src/validators/validation-pipeline.ts create mode 100644 packages/software-factory/tests/test-step.test.ts create mode 100644 packages/software-factory/tests/validation-pipeline.test.ts diff --git a/packages/software-factory/docs/phase-2-plan.md b/packages/software-factory/docs/phase-2-plan.md index dd86015735..d78390cc9a 100644 --- a/packages/software-factory/docs/phase-2-plan.md +++ b/packages/software-factory/docs/phase-2-plan.md @@ -57,6 +57,34 @@ After each agent turn in the inner loop, the orchestrator runs these checks dete 4. **Card instantiation** — Verify that sample card instances can be instantiated from their definitions 5. **Run existing tests** — Execute all QUnit `.test.gts` files in the target realm via the QUnit test page +### Validation Architecture (CS-10675) + +The validation pipeline is implemented as a modular system in `src/validators/`: + +**`ValidationStepRunner` interface** — the contract every step must implement: + +```typescript +interface ValidationStepRunner { + readonly step: ValidationStep; + run(targetRealmUrl: string): Promise; + formatForContext(result: ValidationStepResult): string; +} +``` + +**`ValidationPipeline` class** — implements the `Validator` interface and composes step runners: + +- Steps run **concurrently** via `Promise.allSettled()` — a failure or exception in one step does not prevent others from running +- Exceptions thrown by a step are captured as failed `ValidationStepResult` entries with the error message +- `formatForContext()` delegates to each step runner to produce LLM-friendly markdown +- `createDefaultPipeline(config)` factory function composes all 5 steps with config injection + +**Step-specific failure shapes** — each validation type carries its own structured data in `ValidationStepResult.details` (flattened POJOs, not cards): + +- **Test step**: `{ testRunId, passedCount, failedCount, failures: [{ testName, module, message, stackTrace }] }` — reads back the completed TestRun card from the realm for detailed failure data (will become cheap local filesystem reads after boxel-cli integration) +- **Future parse/lint/evaluate/instantiate steps**: each defines its own `details` shape + +**Adding a new validation step** = creating a new module file in `src/validators/` + replacing the `NoOpStepRunner` in `createDefaultPipeline()`. + ### Handling Failures Validation failures are fed back to the agent as context in the **next inner-loop iteration**. The orchestrator does not create fix issues for validation failures — it iterates with the failure details so the agent can self-correct. This mirrors Phase 1's approach (feed test results back, iterate) but with a broader validation pipeline. @@ -65,7 +93,8 @@ The inner loop continues until: - The agent marks the issue as done (all validation passes) - The agent marks the issue as blocked (needs human input) -- Max iterations are reached +- Max iterations are reached with **failing validation** — the orchestrator blocks the issue with the reason ("max iteration limit reached") and the formatted validation failure context in the issue description, then moves to the next issue +- Max iterations are reached with **passing validation** — the issue is exhausted but not blocked (agent did not mark done despite passing validation) The agent always has the option to create new issues via tool calls if it determines that a failure requires separate work (e.g., "this card definition depends on another card that doesn't exist yet — creating a new issue for it"). But the orchestrator does not force this — the agent decides. diff --git a/packages/software-factory/scripts/smoke-tests/issue-loop-smoke.ts b/packages/software-factory/scripts/smoke-tests/issue-loop-smoke.ts index c170a46bad..2f50a3174e 100644 --- a/packages/software-factory/scripts/smoke-tests/issue-loop-smoke.ts +++ b/packages/software-factory/scripts/smoke-tests/issue-loop-smoke.ts @@ -31,11 +31,14 @@ import type { IssueStore } from '../../src/issue-scheduler'; import { runIssueLoop, NoOpValidator, + ValidationPipeline, type IssueContextBuilderLike, type IssueLoopResult, type Validator, } from '../../src/issue-loop'; +import { NoOpStepRunner } from '../../src/validators/noop-step'; + // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- @@ -461,8 +464,8 @@ async function scenarioMaxIterations(): Promise { printResult(result); check( - 'issue exits after max iterations', - result.issueResults[0]?.exitReason === 'max_iterations', + 'issue blocked after max iterations with failing validation', + result.issueResults[0]?.exitReason === 'blocked', ); check( 'last validation was failed', @@ -539,6 +542,73 @@ async function scenarioEmptyProject(): Promise { log.info(''); } +async function scenarioValidationPipeline(): Promise { + log.info('--- Scenario 7: ValidationPipeline integration ---'); + log.info(''); + + let store = new MockIssueStore([ + makeIssue({ + id: 'ISS-P1', + status: 'backlog', + priority: 'high', + order: 1, + summary: 'Test pipeline integration', + }), + ]); + + let agent = new MockLoopAgent( + [ + { + toolCalls: [ + { tool: 'write_file', args: { path: 'card.gts', content: 'v1' } }, + ], + updateIssue: { id: 'ISS-P1', status: 'done' }, + }, + ], + store, + ); + + // Use a real ValidationPipeline with all NoOp steps (no server needed) + let pipeline = new ValidationPipeline([ + new NoOpStepRunner('parse'), + new NoOpStepRunner('lint'), + new NoOpStepRunner('evaluate'), + new NoOpStepRunner('instantiate'), + new NoOpStepRunner('test'), + ]); + + let result = await runIssueLoop({ + agent, + contextBuilder: new StubContextBuilder(), + tools: TOOLS, + issueStore: store, + validator: pipeline, + targetRealmUrl: 'https://example.test/target/', + }); + + printResult(result); + check('outcome is all_issues_done', result.outcome === 'all_issues_done'); + check('exit reason is done', result.issueResults[0]?.exitReason === 'done'); + check( + 'validation passed (all NoOp steps)', + result.issueResults[0]?.lastValidation?.passed === true, + ); + check( + '5 validation steps reported', + result.issueResults[0]?.lastValidation?.steps.length === 5, + ); + + // Verify formatForContext works + let formatted = pipeline.formatForContext( + result.issueResults[0]?.lastValidation!, + ); + check( + 'formatForContext reports all passed', + formatted === 'All validation steps passed.', + ); + log.info(''); +} + // --------------------------------------------------------------------------- // Main // --------------------------------------------------------------------------- @@ -554,6 +624,7 @@ async function main(): Promise { await scenarioMaxIterations(); await scenarioBlockedIssue(); await scenarioEmptyProject(); + await scenarioValidationPipeline(); log.info('==========================='); log.info(` ${passed} passed, ${failed} failed`); diff --git a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts index 6b92160b2a..302f8022b7 100644 --- a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts +++ b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts @@ -45,6 +45,8 @@ import { getRealmScopedAuth, writeFile, } from '../../src/realm-operations'; +import { createDefaultPipeline } from '../../src/validators/validation-pipeline'; +import type { TestValidationDetails } from '../../src/validators/test-step'; // --------------------------------------------------------------------------- // Sample LLM output -- what the agent would produce in the implementation phase @@ -365,17 +367,111 @@ async function main() { ); log.info(`\n View in Boxel: ${targetRealmUrl}${handle.testRunId}`); - if (expectedStatus) { - log.info( - '\n✓ Smoke test passed! TestRun contains both pass and fail QUnit results.', - ); - } else { - log.info('\n✗ Smoke test had unexpected results.'); + if (!expectedStatus) { + log.info('\n✗ Phase 2 had unexpected results.'); log.info( ` Expected "failed" (mixed pass/fail QUnit tests) but got "${handle.status}"`, ); process.exit(1); } + + log.info( + '\n✓ Phase 2 passed! TestRun contains both pass and fail QUnit results.', + ); + + // ------------------------------------------------------------------------- + // Validation Pipeline — exercises the full ValidationPipeline with a real + // TestValidationStep against the same realm. + // ------------------------------------------------------------------------- + + log.info( + '\n--- Validation Pipeline: Running ValidationPipeline.validate() ---\n', + ); + + let pipeline = createDefaultPipeline({ + authorization, + fetch: fetchImpl, + realmServerUrl, + hostAppUrl: realmServerUrl, + testResultsModuleUrl, + targetRealmUrl, + }); + + let validationResults = await pipeline.validate(targetRealmUrl); + + log.info(` Pipeline result: ${validationResults.passed ? 'PASSED' : 'FAILED'} (${validationResults.steps.length} steps)`); + for (let step of validationResults.steps) { + let statusIcon = step.passed ? '✓' : '✗'; + let detail = ''; + if (step.details) { + let d = step.details as unknown as TestValidationDetails; + if (d.passedCount != null) { + detail = ` (${d.passedCount} passed, ${d.failedCount} failed)`; + } + } + log.info(` ${step.step}: ${statusIcon} ${step.passed ? 'passed' : 'failed'}${step.errors.length > 0 ? ' — no-op' : ''}${detail}`); + } + + // Verify pipeline results + let pipelinePassed = true; + + if (validationResults.passed) { + log.info('\n ✗ Expected pipeline to fail (deliberately failing test)'); + pipelinePassed = false; + } else { + log.info('\n ✓ Pipeline correctly reports failure'); + } + + let testStep = validationResults.steps.find((s) => s.step === 'test'); + if (!testStep) { + log.info(' ✗ No test step in results'); + pipelinePassed = false; + } else if (testStep.passed) { + log.info(' ✗ Test step should have failed'); + pipelinePassed = false; + } else { + log.info(' ✓ Test step correctly failed'); + } + + let noOpSteps = validationResults.steps.filter( + (s) => s.step !== 'test', + ); + let allNoOpsPassed = noOpSteps.every((s) => s.passed); + if (allNoOpsPassed) { + log.info(' ✓ All NoOp steps (parse, lint, evaluate, instantiate) passed'); + } else { + log.info(' ✗ Some NoOp steps failed unexpectedly'); + pipelinePassed = false; + } + + if (testStep?.details) { + let details = testStep.details as unknown as TestValidationDetails; + if (details.passedCount > 0 && details.failedCount > 0) { + log.info(` ✓ Test details: ${details.passedCount} passed, ${details.failedCount} failed`); + } else { + log.info(` ✗ Expected both passing and failing tests, got passed=${details.passedCount} failed=${details.failedCount}`); + pipelinePassed = false; + } + } else { + log.info(' ✗ No test details available'); + pipelinePassed = false; + } + + // Show formatted context for LLM + let formatted = pipeline.formatForContext(validationResults); + log.info('\n Formatted context for LLM:'); + log.info(' ─────────────────────────'); + for (let line of formatted.split('\n')) { + log.info(` ${line}`); + } + log.info(' ─────────────────────────'); + + if (pipelinePassed) { + log.info('\n✓ Validation pipeline smoke test passed!'); + } else { + log.info('\n✗ Validation pipeline smoke test failed.'); + process.exit(1); + } } main().catch((err) => { diff --git a/packages/software-factory/src/factory-agent-types.ts b/packages/software-factory/src/factory-agent-types.ts index def13887fd..e79ad6989d 100644 --- a/packages/software-factory/src/factory-agent-types.ts +++ b/packages/software-factory/src/factory-agent-types.ts @@ -135,6 +135,8 @@ export interface ValidationStepResult { passed: boolean; files?: string[]; errors: ValidationError[]; + /** Step-specific structured data for context formatting (POJOs, not cards). */ + details?: Record; } /** Aggregated results from a full validation run (all steps). */ diff --git a/packages/software-factory/src/issue-loop.ts b/packages/software-factory/src/issue-loop.ts index 971efbf2d2..49a5c74332 100644 --- a/packages/software-factory/src/issue-loop.ts +++ b/packages/software-factory/src/issue-loop.ts @@ -30,21 +30,23 @@ import { logger } from './logger'; let log = logger('issue-loop'); // --------------------------------------------------------------------------- -// Validator interface (placeholder for CS-10675) +// Validator interface // --------------------------------------------------------------------------- /** * Runs the post-iteration validation pipeline. * Steps: parse, lint, evaluate, instantiate, run tests. - * CS-10675 provides the real implementation. + * See ValidationPipeline for the real implementation. */ export interface Validator { validate(targetRealmUrl: string): Promise; + /** Format validation results for LLM context or issue descriptions. */ + formatForContext?(results: ValidationResults): string; } /** * No-op validator that always passes. Used for bootstrap issues - * or when CS-10675 validation is not yet available. + * or when validation is not needed. */ export class NoOpValidator implements Validator { async validate(): Promise { @@ -52,6 +54,17 @@ export class NoOpValidator implements Validator { } } +// --------------------------------------------------------------------------- +// Re-exports from validators/ +// --------------------------------------------------------------------------- + +export { + ValidationPipeline, + createDefaultPipeline, + type ValidationStepRunner, + type ValidationPipelineConfig, +} from './validators/validation-pipeline'; + // --------------------------------------------------------------------------- // Context builder interface for issue-driven loop // --------------------------------------------------------------------------- @@ -138,6 +151,42 @@ function formatValidation(results: ValidationResults): string { return `FAILED — ${failures.join(', ')}`; } +/** + * Build a description for an issue blocked due to max iterations with + * failing validation, including the formatted failure context. + */ +function buildMaxIterationBlockedDescription( + maxIterations: number, + validationResults: ValidationResults, + validator: Validator, +): string { + let lines = [ + `**Blocked: max iteration limit reached (${maxIterations} turns) with failing validation.**`, + '', + `The agent was unable to resolve validation failures within the allowed number of iterations.`, + '', + `### Last Validation Results`, + '', + ]; + + if (validator.formatForContext) { + lines.push(validator.formatForContext(validationResults)); + } else { + // Fallback: format from the raw results + for (let step of validationResults.steps) { + if (!step.passed) { + lines.push(`**${step.step}**: FAILED`); + for (let error of step.errors) { + lines.push(`- ${error.message}`); + } + lines.push(''); + } + } + } + + return lines.join('\n'); +} + // --------------------------------------------------------------------------- // Main loop // --------------------------------------------------------------------------- @@ -263,8 +312,36 @@ export async function runIssueLoop( if (exitReason === 'max_iterations') { log.info( - ` Max iterations (${maxIterationsPerIssue}) reached for issue ${issueSummaryLabel(issue)} — exiting inner loop`, + ` Max iterations (${maxIterationsPerIssue}) reached for issue ${issueSummaryLabel(issue)}`, ); + + // If validation still failing at max iterations, block the issue with + // the reason and failure context so it's visible in the realm. + if (validationResults && !validationResults.passed) { + log.info( + ` Validation still failing — blocking issue with failure context`, + ); + exitReason = 'blocked'; + + if (issueStore.updateIssue) { + try { + let description = buildMaxIterationBlockedDescription( + maxIterationsPerIssue, + validationResults, + validator, + ); + await issueStore.updateIssue(issue.id, { + status: 'blocked', + description, + }); + } catch (err) { + log.warn( + ` Failed to update issue status to blocked: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + } + exhaustedIssues.add(issue.id); } diff --git a/packages/software-factory/src/issue-scheduler.ts b/packages/software-factory/src/issue-scheduler.ts index bc3a7aa616..a8008c6d38 100644 --- a/packages/software-factory/src/issue-scheduler.ts +++ b/packages/software-factory/src/issue-scheduler.ts @@ -30,6 +30,11 @@ export interface IssueStore { listIssues(): Promise; /** Re-read a single issue's current state from the realm. */ refreshIssue(issueId: string): Promise; + /** Update issue fields in the realm. Used for max-iteration blocking. */ + updateIssue?( + issueId: string, + updates: { status?: string; description?: string }, + ): Promise; } // --------------------------------------------------------------------------- diff --git a/packages/software-factory/src/realm-operations.ts b/packages/software-factory/src/realm-operations.ts index baf1e55c86..71fede42f5 100644 --- a/packages/software-factory/src/realm-operations.ts +++ b/packages/software-factory/src/realm-operations.ts @@ -2,7 +2,7 @@ * Shared realm operations for the software-factory scripts. * * Centralizes HTTP-based realm API calls so they're easy to find and - * refactor to boxel-cli tool calls when --jwt support is added (CS-10529). + * refactor to boxel-cli tool calls (CS-10529). */ import { mkdirSync, writeFileSync } from 'node:fs'; @@ -793,22 +793,17 @@ async function addRealmToMatrixAccountData( } // --------------------------------------------------------------------------- -// Pull Realm Files +// Fetch Realm Filenames // --------------------------------------------------------------------------- /** - * Download all files from a remote realm to a local directory using the - * `_mtimes` endpoint to discover file paths. - * - * TODO: Replace with `boxel pull --jwt ` once CS-10529 is implemented. - * - * Returns the list of relative file paths that were downloaded. + * Fetch the list of file paths from a realm via the `_mtimes` endpoint. + * Returns relative file paths (e.g., `hello.gts`, `Cards/my-card.json`). */ -export async function pullRealmFiles( +export async function fetchRealmFilenames( realmUrl: string, - localDir: string, options?: RealmFetchOptions, -): Promise<{ files: string[]; error?: string }> { +): Promise<{ filenames: string[]; error?: string }> { let fetchImpl = options?.fetch ?? globalThis.fetch; let normalizedRealmUrl = ensureTrailingSlash(realmUrl); @@ -817,14 +812,13 @@ export async function pullRealmFiles( SupportedMimeType.JSONAPI, ); - // Fetch mtimes to discover all file paths. let mtimesUrl = `${normalizedRealmUrl}_mtimes`; let mtimesResponse: Response; try { mtimesResponse = await fetchImpl(mtimesUrl, { method: 'GET', headers }); } catch (err) { return { - files: [], + filenames: [], error: `Failed to fetch _mtimes: ${err instanceof Error ? err.message : String(err)}`, }; } @@ -832,7 +826,7 @@ export async function pullRealmFiles( if (!mtimesResponse.ok) { let body = await mtimesResponse.text(); return { - files: [], + filenames: [], error: `_mtimes returned HTTP ${mtimesResponse.status}: ${body.slice(0, 300)}`, }; } @@ -845,11 +839,13 @@ export async function pullRealmFiles( (json as { data?: { attributes?: { mtimes?: Record } } }) ?.data?.attributes?.mtimes ?? json; } catch { - return { files: [], error: 'Failed to parse _mtimes response as JSON' }; + return { + filenames: [], + error: 'Failed to parse _mtimes response as JSON', + }; } - // Download each file. - let downloadedFiles: string[] = []; + let filenames: string[] = []; for (let fullUrl of Object.keys(mtimes)) { if (!fullUrl.startsWith(normalizedRealmUrl)) { continue; @@ -858,6 +854,42 @@ export async function pullRealmFiles( if (!relativePath || relativePath.endsWith('/')) { continue; } + filenames.push(relativePath); + } + + return { filenames: filenames.sort() }; +} + +// --------------------------------------------------------------------------- +// Pull Realm Files +// --------------------------------------------------------------------------- + +/** + * Download all files from a remote realm to a local directory using the + * `_mtimes` endpoint to discover file paths. + * + * TODO: Replace with `boxel pull` once CS-10529 is implemented. + * + * Returns the list of relative file paths that were downloaded. + */ +export async function pullRealmFiles( + realmUrl: string, + localDir: string, + options?: RealmFetchOptions, +): Promise<{ files: string[]; error?: string }> { + let fetchImpl = options?.fetch ?? globalThis.fetch; + let normalizedRealmUrl = ensureTrailingSlash(realmUrl); + + // Use fetchRealmFilenames to discover all file paths. + let { filenames, error } = await fetchRealmFilenames(realmUrl, options); + if (error) { + return { files: [], error }; + } + + // Download each file. + let downloadedFiles: string[] = []; + for (let relativePath of filenames) { + let fullUrl = `${normalizedRealmUrl}${relativePath}`; let localPath = join(localDir, relativePath); mkdirSync(dirname(localPath), { recursive: true }); diff --git a/packages/software-factory/src/validators/noop-step.ts b/packages/software-factory/src/validators/noop-step.ts new file mode 100644 index 0000000000..bc109c0b9e --- /dev/null +++ b/packages/software-factory/src/validators/noop-step.ts @@ -0,0 +1,24 @@ +import type { ValidationStep, ValidationStepResult } from '../factory-agent'; + +import type { ValidationStepRunner } from './validation-pipeline'; + +/** + * No-op validation step that always passes. + * Used as a placeholder for unimplemented steps (parse, lint, evaluate, instantiate). + * Each placeholder will be replaced by a real implementation via child tickets. + */ +export class NoOpStepRunner implements ValidationStepRunner { + readonly step: ValidationStep; + + constructor(step: ValidationStep) { + this.step = step; + } + + async run(): Promise { + return { step: this.step, passed: true, errors: [] }; + } + + formatForContext(): string { + return ''; + } +} diff --git a/packages/software-factory/src/validators/test-step.ts b/packages/software-factory/src/validators/test-step.ts new file mode 100644 index 0000000000..5a5db73c82 --- /dev/null +++ b/packages/software-factory/src/validators/test-step.ts @@ -0,0 +1,372 @@ +/** + * Test validation step — runs QUnit tests in the target realm + * and reads back detailed results from the completed TestRun card. + * + * Wraps `executeTestRunFromRealm()` for the actual test execution, + * then reads the TestRun card from the realm to get detailed failure + * data (individual test names, assertions, stack traces). + * + * Per phase-2-plan.md, realm reads will eventually become local filesystem + * reads after boxel-cli integration — cheap rather than HTTP round-trips. + */ + +import type { ValidationStepResult } from '../factory-agent'; +import type { LooseSingleCardDocument } from '@cardstack/runtime-common'; + +import { executeTestRunFromRealm } from '../test-run-execution'; +import { + fetchRealmFilenames, + readFile, + type RealmFetchOptions, +} from '../realm-operations'; +import type { ExecuteTestRunOptions, TestRunHandle } from '../test-run-types'; +import { logger } from '../logger'; + +import type { ValidationStepRunner } from './validation-pipeline'; + +let log = logger('test-validation-step'); + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface TestValidationStepConfig { + authorization?: string; + fetch?: typeof globalThis.fetch; + realmServerUrl: string; + hostAppUrl: string; + testResultsModuleUrl: string; + targetRealmUrl: string; + issueId?: string; + /** Injected for testing — defaults to executeTestRunFromRealm. */ + executeTestRun?: ( + options: ExecuteTestRunOptions, + ) => Promise; + /** Injected for testing — defaults to fetchRealmFilenames. */ + fetchFilenames?: ( + realmUrl: string, + options?: RealmFetchOptions, + ) => Promise<{ filenames: string[]; error?: string }>; + /** Injected for testing — defaults to readFile from realm-operations. */ + readCard?: ( + realmUrl: string, + path: string, + options?: RealmFetchOptions, + ) => Promise<{ + ok: boolean; + document?: LooseSingleCardDocument; + error?: string; + }>; +} + +/** Flattened POJO for test validation details — not a card, just data. */ +export interface TestValidationDetails { + testRunId: string; + passedCount: number; + failedCount: number; + durationMs: number; + failures: TestValidationFailure[]; +} + +export interface TestValidationFailure { + testName: string; + module: string; + message: string; + stackTrace?: string; +} + +// --------------------------------------------------------------------------- +// TestValidationStep +// --------------------------------------------------------------------------- + +export class TestValidationStep implements ValidationStepRunner { + readonly step = 'test' as const; + + private config: TestValidationStepConfig; + private lastSequenceNumber = 0; + + private executeTestRunFn: ( + options: ExecuteTestRunOptions, + ) => Promise; + private fetchFilenamesFn: ( + realmUrl: string, + options?: RealmFetchOptions, + ) => Promise<{ filenames: string[]; error?: string }>; + private readCardFn: ( + realmUrl: string, + path: string, + options?: RealmFetchOptions, + ) => Promise<{ + ok: boolean; + document?: LooseSingleCardDocument; + error?: string; + }>; + + constructor(config: TestValidationStepConfig) { + this.config = config; + this.executeTestRunFn = config.executeTestRun ?? executeTestRunFromRealm; + this.fetchFilenamesFn = config.fetchFilenames ?? fetchRealmFilenames; + this.readCardFn = config.readCard ?? readFile; + } + + async run(targetRealmUrl: string): Promise { + // Step 1: Discover .test.gts files in the realm + let testFiles: string[]; + try { + testFiles = await this.discoverTestFiles(targetRealmUrl); + } catch (err) { + return { + step: 'test', + passed: false, + errors: [ + { + message: `Failed to discover test files: ${err instanceof Error ? err.message : String(err)}`, + }, + ], + }; + } + + if (testFiles.length === 0) { + log.info('No .test.gts files found — nothing to validate'); + return { step: 'test', passed: true, files: [], errors: [] }; + } + + log.info(`Found ${testFiles.length} test file(s): ${testFiles.join(', ')}`); + + // Step 2: Execute tests + let handle: TestRunHandle; + try { + let slug = this.config.issueId + ? deriveIssueSlug(this.config.issueId) + : 'validation'; + + handle = await this.executeTestRunFn({ + targetRealmUrl, + testResultsModuleUrl: this.config.testResultsModuleUrl, + slug, + testNames: [], + authorization: this.config.authorization, + fetch: this.config.fetch, + realmServerUrl: this.config.realmServerUrl, + hostAppUrl: this.config.hostAppUrl, + forceNew: true, + lastSequenceNumber: this.lastSequenceNumber, + }); + + if (handle.sequenceNumber != null) { + this.lastSequenceNumber = handle.sequenceNumber; + } + } catch (err) { + return { + step: 'test', + passed: false, + files: testFiles, + errors: [ + { + message: `Test execution failed: ${err instanceof Error ? err.message : String(err)}`, + }, + ], + }; + } + + // Step 3: Read back the completed TestRun card for detailed results + let details = await this.readTestRunDetails( + targetRealmUrl, + handle.testRunId, + ); + + // Step 4: Map to ValidationStepResult + if (handle.status === 'passed') { + return { + step: 'test', + passed: true, + files: testFiles, + errors: [], + details: details as unknown as Record, + }; + } + + let errors = details + ? details.failures.map((f) => ({ + file: f.module, + message: `${f.testName}: ${f.message}`, + stackTrace: f.stackTrace, + })) + : [ + { + message: + handle.errorMessage ?? `Tests ${handle.status}`, + }, + ]; + + return { + step: 'test', + passed: false, + files: testFiles, + errors, + details: details as unknown as Record, + }; + } + + formatForContext(result: ValidationStepResult): string { + if (result.passed) { + let details = result.details as unknown as + | TestValidationDetails + | undefined; + if (details && details.passedCount > 0) { + return `## Test Validation: PASSED\n${details.passedCount} test(s) passed (TestRun: ${details.testRunId})`; + } + return ''; + } + + let details = result.details as unknown as + | TestValidationDetails + | undefined; + if (!details) { + // No detailed data — format from errors array + let errorLines = result.errors.map((e) => `- ${e.message}`).join('\n'); + return `## Test Validation: FAILED\n${errorLines}`; + } + + let lines: string[] = [ + `## Test Validation: FAILED`, + `${details.passedCount} passed, ${details.failedCount} failed (TestRun: ${details.testRunId})`, + ]; + + for (let failure of details.failures) { + lines.push(''); + lines.push(`FAILED: "${failure.testName}" (${failure.module})`); + lines.push(` ${failure.message}`); + if (failure.stackTrace) { + lines.push(` ${failure.stackTrace.slice(0, 300)}`); + } + } + + return lines.join('\n'); + } + + // ------------------------------------------------------------------------- + // Private helpers + // ------------------------------------------------------------------------- + + private async discoverTestFiles( + targetRealmUrl: string, + ): Promise { + let result = await this.fetchFilenamesFn(targetRealmUrl, { + authorization: this.config.authorization, + fetch: this.config.fetch, + }); + + if (result.error) { + log.warn(`Failed to fetch realm filenames: ${result.error}`); + throw new Error(result.error); + } + + return result.filenames.filter((f) => f.endsWith('.test.gts')); + } + + private async readTestRunDetails( + targetRealmUrl: string, + testRunId: string, + ): Promise { + try { + let result = await this.readCardFn(targetRealmUrl, testRunId, { + authorization: this.config.authorization, + fetch: this.config.fetch, + }); + + if (!result.ok || !result.document) { + log.warn( + `Could not read TestRun card ${testRunId}: ${result.error ?? 'unknown error'}`, + ); + return undefined; + } + + return extractTestDetails(testRunId, result.document); + } catch (err) { + log.warn( + `Error reading TestRun card ${testRunId}: ${err instanceof Error ? err.message : String(err)}`, + ); + return undefined; + } + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Derive a slug from an issue ID. + * e.g., "Issues/sticky-note-define-core" → "sticky-note-define-core" + */ +function deriveIssueSlug(issueId: string): string { + let parts = issueId.split('/'); + return parts[parts.length - 1]; +} + +interface TestRunCardAttributes { + status?: string; + passedCount?: number; + failedCount?: number; + durationMs?: number; + errorMessage?: string; + moduleResults?: { + moduleRef?: { module: string; name: string }; + results?: { + testName?: string; + status?: string; + message?: string; + stackTrace?: string; + durationMs?: number; + }[]; + }[]; +} + +/** + * Extract test validation details from a TestRun card document. + * Handles the JSON:API document shape returned by `readFile()`. + */ +function extractTestDetails( + testRunId: string, + document: LooseSingleCardDocument, +): TestValidationDetails { + let attrs = (document.data?.attributes ?? {}) as TestRunCardAttributes; + + let failures: TestValidationFailure[] = []; + let passedCount = 0; + let failedCount = 0; + + for (let moduleResult of attrs.moduleResults ?? []) { + let moduleName = moduleResult.moduleRef?.module ?? 'unknown'; + for (let result of moduleResult.results ?? []) { + if (result.status === 'passed') { + passedCount++; + } else if (result.status === 'failed' || result.status === 'error') { + failedCount++; + failures.push({ + testName: result.testName ?? 'unknown test', + module: moduleName, + message: result.message ?? `Test ${result.status}`, + stackTrace: result.stackTrace, + }); + } + } + } + + // Prefer the card's computed counts if available + if (attrs.passedCount != null) { + passedCount = attrs.passedCount; + } + if (attrs.failedCount != null) { + failedCount = attrs.failedCount; + } + + return { + testRunId, + passedCount, + failedCount, + durationMs: attrs.durationMs ?? 0, + failures, + }; +} diff --git a/packages/software-factory/src/validators/validation-pipeline.ts b/packages/software-factory/src/validators/validation-pipeline.ts new file mode 100644 index 0000000000..889708e924 --- /dev/null +++ b/packages/software-factory/src/validators/validation-pipeline.ts @@ -0,0 +1,176 @@ +/** + * Modular validation pipeline for the issue-driven loop. + * + * Each validation step is a separate module implementing `ValidationStepRunner`. + * The pipeline runs all steps concurrently via `Promise.allSettled()` and + * aggregates results. Adding a new step = creating a new module + one line + * in `createDefaultPipeline()`. + */ + +import type { + ValidationStep, + ValidationStepResult, + ValidationResults, +} from '../factory-agent'; + +import type { Validator } from '../issue-loop'; + +import { NoOpStepRunner } from './noop-step'; +import { TestValidationStep } from './test-step'; + +import type { TestValidationStepConfig } from './test-step'; + +import { logger } from '../logger'; + +let log = logger('validation-pipeline'); + +// --------------------------------------------------------------------------- +// ValidationStepRunner interface +// --------------------------------------------------------------------------- + +/** + * Contract that every validation step module must implement. + * + * Each step: + * - Returns a result even when there's nothing to validate (passed: true) + * - Provides step-specific `details` on the result for context formatting + * - Implements `formatForContext()` to produce LLM-friendly output + */ +export interface ValidationStepRunner { + readonly step: ValidationStep; + run(targetRealmUrl: string): Promise; + /** Format step results for LLM context. Returns human-readable string, empty if nothing to report. */ + formatForContext(result: ValidationStepResult): string; +} + +// --------------------------------------------------------------------------- +// ValidationPipeline +// --------------------------------------------------------------------------- + +/** + * Implements the `Validator` interface from issue-loop.ts. + * Runs all step runners concurrently via `Promise.allSettled()`. + * A failure or exception in one step does not prevent others from completing. + */ +export class ValidationPipeline implements Validator { + private runners: ValidationStepRunner[]; + + constructor(runners: ValidationStepRunner[]) { + this.runners = runners; + } + + async validate(targetRealmUrl: string): Promise { + if (this.runners.length === 0) { + return { passed: true, steps: [] }; + } + + let settled = await Promise.allSettled( + this.runners.map((runner) => runner.run(targetRealmUrl)), + ); + + let stepResults: ValidationStepResult[] = []; + let allPassed = true; + + for (let i = 0; i < settled.length; i++) { + let outcome = settled[i]; + if (outcome.status === 'fulfilled') { + stepResults.push(outcome.value); + if (!outcome.value.passed) { + allPassed = false; + } + } else { + // Step threw an exception — capture as a failed result + let reason = outcome.reason; + let message = + reason instanceof Error ? reason.message : String(reason); + log.error( + `Validation step "${this.runners[i].step}" threw: ${message}`, + ); + stepResults.push({ + step: this.runners[i].step, + passed: false, + errors: [{ message }], + }); + allPassed = false; + } + } + + return { passed: allPassed, steps: stepResults }; + } + + /** + * Format all validation results for LLM context. + * Delegates to each step runner's `formatForContext()`. + * Returns a combined markdown string suitable for inclusion in the agent prompt. + */ + formatForContext(results: ValidationResults): string { + if (results.passed && results.steps.every((s) => s.passed)) { + return 'All validation steps passed.'; + } + + let sections: string[] = []; + + // Summary line + let failedSteps = results.steps.filter((s) => !s.passed); + let passedSteps = results.steps.filter((s) => s.passed); + sections.push( + `Validation: ${failedSteps.length} step(s) failed, ${passedSteps.length} passed.`, + ); + + // Per-step details from runners + for (let i = 0; i < this.runners.length; i++) { + let runner = this.runners[i]; + let stepResult = results.steps.find((s) => s.step === runner.step); + if (!stepResult) { + continue; + } + + let formatted = runner.formatForContext(stepResult); + if (formatted) { + sections.push(formatted); + } + } + + return sections.join('\n\n'); + } +} + +// --------------------------------------------------------------------------- +// Factory function +// --------------------------------------------------------------------------- + +export interface ValidationPipelineConfig { + authorization?: string; + fetch?: typeof globalThis.fetch; + realmServerUrl: string; + hostAppUrl: string; + testResultsModuleUrl: string; + targetRealmUrl: string; + issueId?: string; +} + +/** + * Create the default validation pipeline with all 5 steps. + * Currently only the test step is implemented; others are NoOp placeholders. + */ +export function createDefaultPipeline( + config: ValidationPipelineConfig, +): ValidationPipeline { + let testConfig: TestValidationStepConfig = { + authorization: config.authorization, + fetch: config.fetch, + realmServerUrl: config.realmServerUrl, + hostAppUrl: config.hostAppUrl, + testResultsModuleUrl: config.testResultsModuleUrl, + targetRealmUrl: config.targetRealmUrl, + issueId: config.issueId, + }; + + return new ValidationPipeline([ + new NoOpStepRunner('parse'), + new NoOpStepRunner('lint'), + new NoOpStepRunner('evaluate'), + new NoOpStepRunner('instantiate'), + new TestValidationStep(testConfig), + ]); +} diff --git a/packages/software-factory/tests/index.ts b/packages/software-factory/tests/index.ts index 1ac1eb4b27..4295a33dc1 100644 --- a/packages/software-factory/tests/index.ts +++ b/packages/software-factory/tests/index.ts @@ -16,3 +16,7 @@ import './factory-tool-builder.test'; import './factory-loop.test'; import './factory-implement.test'; import './realm-auth.test'; +import './issue-loop.test'; +import './issue-scheduler.test'; +import './validation-pipeline.test'; +import './test-step.test'; diff --git a/packages/software-factory/tests/issue-loop.test.ts b/packages/software-factory/tests/issue-loop.test.ts index 295d233d1d..17c229f71d 100644 --- a/packages/software-factory/tests/issue-loop.test.ts +++ b/packages/software-factory/tests/issue-loop.test.ts @@ -449,7 +449,7 @@ module('issue-loop > blocked issue', function () { // --------------------------------------------------------------------------- module('issue-loop > max inner iterations', function () { - test('exits inner loop after maxIterationsPerIssue', async function (assert) { + test('blocks issue when max iterations reached with failing validation', async function (assert) { let store = new MockIssueStore([ makeIssue({ id: 'iss-1', status: 'backlog', priority: 'high', order: 1 }), ]); @@ -481,13 +481,97 @@ module('issue-loop > max inner iterations', function () { }), ); - assert.strictEqual(result.issueResults[0].exitReason, 'max_iterations'); + // When max iterations is hit with failing validation, issue is blocked + assert.strictEqual(result.issueResults[0].exitReason, 'blocked'); assert.strictEqual(result.issueResults[0].innerIterations, 3); assert.false( result.issueResults[0].lastValidation?.passed, 'last validation was a failure', ); }); + + test('max iterations with passing validation keeps max_iterations exit reason', async function (assert) { + let store = new MockIssueStore([ + makeIssue({ id: 'iss-1', status: 'backlog', priority: 'high', order: 1 }), + ]); + + let turns: MockAgentTurn[] = []; + let validations: ValidationResults[] = []; + + for (let i = 0; i < 3; i++) { + turns.push({ + toolCalls: [ + { + tool: 'write_file', + args: { path: 'card.gts', content: `attempt ${i}` }, + }, + ], + }); + validations.push(makePassingValidation()); + } + + let agent = new MockLoopAgent(turns, store); + + let result = await runIssueLoop( + makeLoopConfig({ + agent, + issueStore: store, + validator: new MockValidator(validations), + maxIterationsPerIssue: 3, + }), + ); + + // When validation passes but issue not done, exit reason stays max_iterations + assert.strictEqual(result.issueResults[0].exitReason, 'max_iterations'); + assert.strictEqual(result.issueResults[0].innerIterations, 3); + }); + + test('updateIssue called when blocking due to max iterations + failing validation', async function (assert) { + let updateCalls: { issueId: string; updates: Record }[] = []; + + let store = new MockIssueStore([ + makeIssue({ id: 'iss-1', status: 'backlog', priority: 'high', order: 1 }), + ]); + + // Add updateIssue to mock store + (store as MockIssueStore & { updateIssue: Function }).updateIssue = async ( + issueId: string, + updates: { status?: string; description?: string }, + ) => { + updateCalls.push({ issueId, updates }); + }; + + let turns: MockAgentTurn[] = []; + let validations: ValidationResults[] = []; + + for (let i = 0; i < 2; i++) { + turns.push({ + toolCalls: [ + { tool: 'write_file', args: { path: 'card.gts', content: `v${i}` } }, + ], + }); + validations.push(makeFailingValidation()); + } + + let agent = new MockLoopAgent(turns, store); + + await runIssueLoop( + makeLoopConfig({ + agent, + issueStore: store, + validator: new MockValidator(validations), + maxIterationsPerIssue: 2, + }), + ); + + assert.strictEqual(updateCalls.length, 1, 'updateIssue called once'); + assert.strictEqual(updateCalls[0].issueId, 'iss-1'); + assert.strictEqual(updateCalls[0].updates.status, 'blocked'); + assert.ok( + (updateCalls[0].updates.description as string).includes('max iteration limit'), + 'description includes reason', + ); + }); }); // --------------------------------------------------------------------------- diff --git a/packages/software-factory/tests/test-step.test.ts b/packages/software-factory/tests/test-step.test.ts new file mode 100644 index 0000000000..43dafb4e14 --- /dev/null +++ b/packages/software-factory/tests/test-step.test.ts @@ -0,0 +1,391 @@ +import { module, test } from 'qunit'; + +import type { LooseSingleCardDocument } from '@cardstack/runtime-common'; + +import type { ValidationStepResult } from '../src/factory-agent'; + +import { + TestValidationStep, + type TestValidationStepConfig, + type TestValidationDetails, +} from '../src/validators/test-step'; + +import type { TestRunHandle, ExecuteTestRunOptions } from '../src/test-run-types'; +import type { RealmFetchOptions } from '../src/realm-operations'; + +// --------------------------------------------------------------------------- +// Mock helpers +// --------------------------------------------------------------------------- + +function makeConfig( + overrides: Partial = {}, +): TestValidationStepConfig { + return { + realmServerUrl: 'https://example.test/', + hostAppUrl: 'https://example.test/', + testResultsModuleUrl: 'https://example.test/test-results', + targetRealmUrl: 'https://example.test/realm/', + ...overrides, + }; +} + +function makeFetchFilenames( + filenames: string[], +): ( + realmUrl: string, + options?: RealmFetchOptions, +) => Promise<{ filenames: string[]; error?: string }> { + return async () => ({ filenames }); +} + +function makeFetchFilenamesError( + error: string, +): ( + realmUrl: string, + options?: RealmFetchOptions, +) => Promise<{ filenames: string[]; error?: string }> { + return async () => ({ filenames: [], error }); +} + +function makeExecuteTestRun( + handle: TestRunHandle, +): (options: ExecuteTestRunOptions) => Promise { + return async () => handle; +} + +function makeExecuteTestRunThrows( + errorMessage: string, +): (options: ExecuteTestRunOptions) => Promise { + return async () => { + throw new Error(errorMessage); + }; +} + +function makeTestRunCardDocument(attrs: Record): LooseSingleCardDocument { + return { + data: { + type: 'card', + attributes: attrs, + meta: { + adoptsFrom: { module: 'test-results', name: 'TestRun' }, + }, + }, + } as LooseSingleCardDocument; +} + +function makeReadCard( + document: LooseSingleCardDocument, +): (realmUrl: string, path: string, options?: RealmFetchOptions) => Promise<{ + ok: boolean; + document?: LooseSingleCardDocument; + error?: string; +}> { + return async () => ({ ok: true, document }); +} + +function makeReadCardError( + error: string, +): (realmUrl: string, path: string, options?: RealmFetchOptions) => Promise<{ + ok: boolean; + document?: LooseSingleCardDocument; + error?: string; +}> { + return async () => ({ ok: false, error }); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +module('TestValidationStep', function () { + test('no .test.gts files returns passed', async function (assert) { + let step = new TestValidationStep( + makeConfig({ + fetchFilenames: makeFetchFilenames([ + 'hello.gts', + 'Cards/my-card.json', + 'index.json', + ]), + }), + ); + + let result = await step.run('https://example.test/realm/'); + + assert.true(result.passed); + assert.strictEqual(result.step, 'test'); + assert.strictEqual(result.errors.length, 0); + assert.deepEqual(result.files, []); + }); + + test('tests exist and pass — returns passed with details', async function (assert) { + let testRunDoc = makeTestRunCardDocument({ + status: 'passed', + passedCount: 3, + failedCount: 0, + durationMs: 1500, + moduleResults: [ + { + moduleRef: { module: 'hello.test.gts', name: 'default' }, + results: [ + { testName: 'renders greeting', status: 'passed', durationMs: 500 }, + { testName: 'shows title', status: 'passed', durationMs: 500 }, + { testName: 'has style', status: 'passed', durationMs: 500 }, + ], + }, + ], + }); + + let step = new TestValidationStep( + makeConfig({ + fetchFilenames: makeFetchFilenames(['hello.gts', 'hello.test.gts']), + executeTestRun: makeExecuteTestRun({ + testRunId: 'Test Runs/validation-1', + status: 'passed', + sequenceNumber: 1, + }), + readCard: makeReadCard(testRunDoc), + }), + ); + + let result = await step.run('https://example.test/realm/'); + + assert.true(result.passed); + assert.strictEqual(result.step, 'test'); + assert.deepEqual(result.files, ['hello.test.gts']); + assert.ok(result.details, 'has details'); + + let details = result.details as unknown as TestValidationDetails; + assert.strictEqual(details.testRunId, 'Test Runs/validation-1'); + assert.strictEqual(details.passedCount, 3); + assert.strictEqual(details.failedCount, 0); + assert.strictEqual(details.failures.length, 0); + }); + + test('tests exist and fail — returns failed with detailed failures', async function (assert) { + let testRunDoc = makeTestRunCardDocument({ + status: 'failed', + passedCount: 1, + failedCount: 1, + durationMs: 2000, + moduleResults: [ + { + moduleRef: { module: 'hello.test.gts', name: 'default' }, + results: [ + { testName: 'renders greeting', status: 'passed', durationMs: 500 }, + { + testName: 'shows author', + status: 'failed', + message: "Expected 'Alice' but got ''", + stackTrace: 'at hello.test.gts:15:5', + durationMs: 500, + }, + ], + }, + ], + }); + + let step = new TestValidationStep( + makeConfig({ + fetchFilenames: makeFetchFilenames(['hello.test.gts']), + executeTestRun: makeExecuteTestRun({ + testRunId: 'Test Runs/validation-1', + status: 'failed', + sequenceNumber: 1, + }), + readCard: makeReadCard(testRunDoc), + }), + ); + + let result = await step.run('https://example.test/realm/'); + + assert.false(result.passed); + assert.strictEqual(result.step, 'test'); + assert.ok(result.errors.length > 0, 'has errors'); + assert.ok(result.details, 'has details'); + + let details = result.details as unknown as TestValidationDetails; + assert.strictEqual(details.passedCount, 1); + assert.strictEqual(details.failedCount, 1); + assert.strictEqual(details.failures.length, 1); + assert.strictEqual(details.failures[0].testName, 'shows author'); + assert.ok(details.failures[0].message.includes("Expected 'Alice'")); + }); + + test('executeTestRun throws — returns failed with error message', async function (assert) { + let step = new TestValidationStep( + makeConfig({ + fetchFilenames: makeFetchFilenames(['hello.test.gts']), + executeTestRun: makeExecuteTestRunThrows('Browser launch failed'), + }), + ); + + let result = await step.run('https://example.test/realm/'); + + assert.false(result.passed); + assert.strictEqual(result.errors.length, 1); + assert.ok(result.errors[0].message.includes('Browser launch failed')); + }); + + test('fetchFilenames fails — returns failed with error', async function (assert) { + let step = new TestValidationStep( + makeConfig({ + fetchFilenames: makeFetchFilenamesError('Network timeout'), + }), + ); + + let result = await step.run('https://example.test/realm/'); + + assert.false(result.passed); + assert.ok(result.errors[0].message.includes('Network timeout')); + }); + + test('sequence number tracked across calls', async function (assert) { + let capturedOptions: ExecuteTestRunOptions[] = []; + + let step = new TestValidationStep( + makeConfig({ + fetchFilenames: makeFetchFilenames(['hello.test.gts']), + executeTestRun: async (options) => { + capturedOptions.push(options); + return { + testRunId: `Test Runs/validation-${capturedOptions.length}`, + status: 'passed' as const, + sequenceNumber: capturedOptions.length, + }; + }, + readCard: makeReadCard( + makeTestRunCardDocument({ + status: 'passed', + passedCount: 1, + failedCount: 0, + moduleResults: [], + }), + ), + }), + ); + + await step.run('https://example.test/realm/'); + await step.run('https://example.test/realm/'); + + assert.strictEqual(capturedOptions[0].lastSequenceNumber, 0); + assert.strictEqual(capturedOptions[1].lastSequenceNumber, 1); + }); + + test('readCard failure falls back to handle-only result', async function (assert) { + let step = new TestValidationStep( + makeConfig({ + fetchFilenames: makeFetchFilenames(['hello.test.gts']), + executeTestRun: makeExecuteTestRun({ + testRunId: 'Test Runs/validation-1', + status: 'failed', + errorMessage: '2 tests failed', + sequenceNumber: 1, + }), + readCard: makeReadCardError('fetch failed'), + }), + ); + + let result = await step.run('https://example.test/realm/'); + + assert.false(result.passed); + assert.ok(result.errors.length > 0); + assert.ok(result.errors[0].message.includes('2 tests failed')); + assert.notOk(result.details, 'no details when card read fails'); + }); + + test('formatForContext with passing result and details', function (assert) { + let step = new TestValidationStep(makeConfig()); + + let result: ValidationStepResult = { + step: 'test', + passed: true, + errors: [], + details: { + testRunId: 'Test Runs/validation-1', + passedCount: 5, + failedCount: 0, + durationMs: 1000, + failures: [], + }, + }; + + let formatted = step.formatForContext(result); + assert.ok(formatted.includes('PASSED')); + assert.ok(formatted.includes('5')); + }); + + test('formatForContext with failing result and detailed failures', function (assert) { + let step = new TestValidationStep(makeConfig()); + + let result: ValidationStepResult = { + step: 'test', + passed: false, + errors: [{ message: 'shows author: Expected Alice but got empty' }], + details: { + testRunId: 'Test Runs/validation-1', + passedCount: 2, + failedCount: 1, + durationMs: 1500, + failures: [ + { + testName: 'shows author', + module: 'hello.test.gts', + message: "Expected 'Alice' but got ''", + }, + ], + }, + }; + + let formatted = step.formatForContext(result); + assert.ok(formatted.includes('FAILED')); + assert.ok(formatted.includes('2 passed')); + assert.ok(formatted.includes('1 failed')); + assert.ok(formatted.includes('shows author')); + assert.ok(formatted.includes("Expected 'Alice'")); + }); + + test('formatForContext without details falls back to errors', function (assert) { + let step = new TestValidationStep(makeConfig()); + + let result: ValidationStepResult = { + step: 'test', + passed: false, + errors: [{ message: 'Browser launch failed' }], + }; + + let formatted = step.formatForContext(result); + assert.ok(formatted.includes('FAILED')); + assert.ok(formatted.includes('Browser launch failed')); + }); + + test('issueId is used for slug derivation', async function (assert) { + let capturedOptions: ExecuteTestRunOptions | undefined; + + let step = new TestValidationStep( + makeConfig({ + issueId: 'Issues/sticky-note-define-core', + fetchFilenames: makeFetchFilenames(['hello.test.gts']), + executeTestRun: async (options) => { + capturedOptions = options; + return { + testRunId: 'Test Runs/sticky-note-define-core-1', + status: 'passed' as const, + sequenceNumber: 1, + }; + }, + readCard: makeReadCard( + makeTestRunCardDocument({ + status: 'passed', + passedCount: 1, + failedCount: 0, + moduleResults: [], + }), + ), + }), + ); + + await step.run('https://example.test/realm/'); + + assert.strictEqual(capturedOptions?.slug, 'sticky-note-define-core'); + }); +}); diff --git a/packages/software-factory/tests/validation-pipeline.test.ts b/packages/software-factory/tests/validation-pipeline.test.ts new file mode 100644 index 0000000000..937fe69058 --- /dev/null +++ b/packages/software-factory/tests/validation-pipeline.test.ts @@ -0,0 +1,281 @@ +import { module, test } from 'qunit'; + +import type { + ValidationStep, + ValidationStepResult, + ValidationResults, +} from '../src/factory-agent'; + +import { + ValidationPipeline, + createDefaultPipeline, + type ValidationStepRunner, +} from '../src/issue-loop'; + +import { NoOpStepRunner } from '../src/validators/noop-step'; + +// --------------------------------------------------------------------------- +// Test helpers +// --------------------------------------------------------------------------- + +class MockStepRunner implements ValidationStepRunner { + readonly step: ValidationStep; + private result: ValidationStepResult; + runCount = 0; + + constructor(step: ValidationStep, result: Partial) { + this.step = step; + this.result = { + step, + passed: true, + errors: [], + ...result, + }; + } + + async run(): Promise { + this.runCount++; + return this.result; + } + + formatForContext(result: ValidationStepResult): string { + if (result.passed) { + return ''; + } + let errors = result.errors.map((e) => `- ${e.message}`).join('\n'); + return `## ${result.step}: FAILED\n${errors}`; + } +} + +class ThrowingStepRunner implements ValidationStepRunner { + readonly step: ValidationStep; + private errorMessage: string; + runCount = 0; + + constructor(step: ValidationStep, errorMessage: string) { + this.step = step; + this.errorMessage = errorMessage; + } + + async run(): Promise { + this.runCount++; + throw new Error(this.errorMessage); + } + + formatForContext(): string { + return ''; + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +module('ValidationPipeline', function () { + test('empty pipeline returns passed with no steps', async function (assert) { + let pipeline = new ValidationPipeline([]); + let results = await pipeline.validate('https://example.test/realm/'); + + assert.true(results.passed); + assert.strictEqual(results.steps.length, 0); + }); + + test('all passing steps returns passed', async function (assert) { + let pipeline = new ValidationPipeline([ + new MockStepRunner('parse', { passed: true }), + new MockStepRunner('lint', { passed: true }), + new MockStepRunner('test', { passed: true }), + ]); + + let results = await pipeline.validate('https://example.test/realm/'); + + assert.true(results.passed); + assert.strictEqual(results.steps.length, 3); + assert.true(results.steps.every((s) => s.passed)); + }); + + test('one failing step makes overall result failed', async function (assert) { + let pipeline = new ValidationPipeline([ + new MockStepRunner('parse', { passed: true }), + new MockStepRunner('lint', { + passed: false, + errors: [{ message: 'lint violation' }], + }), + new MockStepRunner('test', { passed: true }), + ]); + + let results = await pipeline.validate('https://example.test/realm/'); + + assert.false(results.passed); + assert.strictEqual(results.steps.length, 3); + + let lintStep = results.steps.find((s) => s.step === 'lint'); + assert.false(lintStep?.passed); + assert.strictEqual(lintStep?.errors.length, 1); + assert.strictEqual(lintStep?.errors[0].message, 'lint violation'); + }); + + test('multiple failing steps all reported', async function (assert) { + let pipeline = new ValidationPipeline([ + new MockStepRunner('parse', { + passed: false, + errors: [{ message: 'syntax error' }], + }), + new MockStepRunner('lint', { + passed: false, + errors: [{ message: 'lint error' }], + }), + new MockStepRunner('test', { + passed: false, + errors: [{ message: 'test failure' }], + }), + ]); + + let results = await pipeline.validate('https://example.test/realm/'); + + assert.false(results.passed); + assert.strictEqual(results.steps.filter((s) => !s.passed).length, 3); + }); + + test('steps run concurrently (all runners invoked)', async function (assert) { + let runners = [ + new MockStepRunner('parse', { passed: true }), + new MockStepRunner('lint', { passed: true }), + new MockStepRunner('evaluate', { passed: true }), + new MockStepRunner('instantiate', { passed: true }), + new MockStepRunner('test', { passed: true }), + ]; + + let pipeline = new ValidationPipeline(runners); + await pipeline.validate('https://example.test/realm/'); + + for (let runner of runners) { + assert.strictEqual(runner.runCount, 1, `${runner.step} should run once`); + } + }); + + test('exception in one step does not prevent others', async function (assert) { + let goodStep = new MockStepRunner('parse', { passed: true }); + let throwingStep = new ThrowingStepRunner('lint', 'kaboom'); + let anotherGoodStep = new MockStepRunner('test', { passed: true }); + + let pipeline = new ValidationPipeline([ + goodStep, + throwingStep, + anotherGoodStep, + ]); + + let results = await pipeline.validate('https://example.test/realm/'); + + assert.false(results.passed); + assert.strictEqual(results.steps.length, 3); + + // Good steps still ran + assert.strictEqual(goodStep.runCount, 1); + assert.strictEqual(anotherGoodStep.runCount, 1); + assert.strictEqual(throwingStep.runCount, 1); + + // Throwing step captured as failed + let lintStep = results.steps.find((s) => s.step === 'lint'); + assert.false(lintStep?.passed); + assert.strictEqual(lintStep?.errors.length, 1); + assert.strictEqual(lintStep?.errors[0].message, 'kaboom'); + + // Good steps passed + assert.true(results.steps.find((s) => s.step === 'parse')?.passed); + assert.true(results.steps.find((s) => s.step === 'test')?.passed); + }); + + test('exception captured as failed step result with error message', async function (assert) { + let pipeline = new ValidationPipeline([ + new ThrowingStepRunner('evaluate', 'module load failed'), + ]); + + let results = await pipeline.validate('https://example.test/realm/'); + + assert.false(results.passed); + assert.strictEqual(results.steps[0].step, 'evaluate'); + assert.false(results.steps[0].passed); + assert.strictEqual( + results.steps[0].errors[0].message, + 'module load failed', + ); + }); + + test('createDefaultPipeline creates 5 steps in correct order', function (assert) { + let pipeline = createDefaultPipeline({ + realmServerUrl: 'https://example.test/', + hostAppUrl: 'https://example.test/', + testResultsModuleUrl: 'https://example.test/test-results', + targetRealmUrl: 'https://example.test/realm/', + }); + + // Access runners via validate + inspection + // Since runners is private, we verify through the pipeline's behavior + assert.ok(pipeline, 'pipeline was created'); + assert.ok(pipeline instanceof ValidationPipeline, 'is a ValidationPipeline'); + }); + + test('formatForContext returns simple message when all pass', function (assert) { + let pipeline = new ValidationPipeline([ + new MockStepRunner('parse', { passed: true }), + ]); + + let results: ValidationResults = { + passed: true, + steps: [{ step: 'parse', passed: true, errors: [] }], + }; + + let formatted = pipeline.formatForContext(results); + assert.strictEqual(formatted, 'All validation steps passed.'); + }); + + test('formatForContext includes failure details from runners', function (assert) { + let runner = new MockStepRunner('lint', { + passed: false, + errors: [{ message: 'unexpected semicolon' }], + }); + + let pipeline = new ValidationPipeline([runner]); + + let results: ValidationResults = { + passed: false, + steps: [ + { + step: 'lint', + passed: false, + errors: [{ message: 'unexpected semicolon' }], + }, + ], + }; + + let formatted = pipeline.formatForContext(results); + assert.ok(formatted.includes('FAILED'), 'includes FAILED'); + assert.ok( + formatted.includes('unexpected semicolon'), + 'includes error message', + ); + }); +}); + +module('NoOpStepRunner', function () { + test('always returns passed with empty errors', async function (assert) { + let runner = new NoOpStepRunner('parse'); + let result = await runner.run('https://example.test/realm/'); + + assert.strictEqual(result.step, 'parse'); + assert.true(result.passed); + assert.strictEqual(result.errors.length, 0); + }); + + test('formatForContext returns empty string', function (assert) { + let runner = new NoOpStepRunner('lint'); + let result: ValidationStepResult = { + step: 'lint', + passed: true, + errors: [], + }; + + assert.strictEqual(runner.formatForContext(result), ''); + }); +}); From b09391dd0f435acf1efb70aeec9e08eb0bd444cb Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Thu, 9 Apr 2026 19:28:22 -0400 Subject: [PATCH 04/17] =?UTF-8?q?Revert=20pullRealmFiles=20refactor=20?= =?UTF-8?q?=E2=80=94=20keep=20it=20unchanged,=20fetchRealmFilenames=20is?= =?UTF-8?q?=20standalone?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pullRealmFiles is dead code (only used in its own tests) and doesn't need to be refactored. fetchRealmFilenames stands on its own for the test validation step's file discovery. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../software-factory/src/realm-operations.ts | 48 ++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/packages/software-factory/src/realm-operations.ts b/packages/software-factory/src/realm-operations.ts index 71fede42f5..6ace513827 100644 --- a/packages/software-factory/src/realm-operations.ts +++ b/packages/software-factory/src/realm-operations.ts @@ -880,16 +880,52 @@ export async function pullRealmFiles( let fetchImpl = options?.fetch ?? globalThis.fetch; let normalizedRealmUrl = ensureTrailingSlash(realmUrl); - // Use fetchRealmFilenames to discover all file paths. - let { filenames, error } = await fetchRealmFilenames(realmUrl, options); - if (error) { - return { files: [], error }; + let headers = buildAuthHeaders( + options?.authorization, + SupportedMimeType.JSONAPI, + ); + + // Fetch mtimes to discover all file paths. + let mtimesUrl = `${normalizedRealmUrl}_mtimes`; + let mtimesResponse: Response; + try { + mtimesResponse = await fetchImpl(mtimesUrl, { method: 'GET', headers }); + } catch (err) { + return { + files: [], + error: `Failed to fetch _mtimes: ${err instanceof Error ? err.message : String(err)}`, + }; + } + + if (!mtimesResponse.ok) { + let body = await mtimesResponse.text(); + return { + files: [], + error: `_mtimes returned HTTP ${mtimesResponse.status}: ${body.slice(0, 300)}`, + }; + } + + let mtimes: Record; + try { + let json = await mtimesResponse.json(); + // _mtimes returns JSON:API format: { data: { attributes: { mtimes: {...} } } } + mtimes = + (json as { data?: { attributes?: { mtimes?: Record } } }) + ?.data?.attributes?.mtimes ?? json; + } catch { + return { files: [], error: 'Failed to parse _mtimes response as JSON' }; } // Download each file. let downloadedFiles: string[] = []; - for (let relativePath of filenames) { - let fullUrl = `${normalizedRealmUrl}${relativePath}`; + for (let fullUrl of Object.keys(mtimes)) { + if (!fullUrl.startsWith(normalizedRealmUrl)) { + continue; + } + let relativePath = fullUrl.slice(normalizedRealmUrl.length); + if (!relativePath || relativePath.endsWith('/')) { + continue; + } let localPath = join(localDir, relativePath); mkdirSync(dirname(localPath), { recursive: true }); From 56dc3cc030d7b79d5d7bdfb96ae81ed16e05754f Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Thu, 9 Apr 2026 19:35:06 -0400 Subject: [PATCH 05/17] Address PR review: fix error fallback, log suffix, test assertions, persistence warning - Fix test-step error mapping: fall back to handle.errorMessage when details.failures is empty (infrastructure errors with readable TestRun) - Remove incorrect "no-op" log suffix in smoke test - Improve createDefaultPipeline test to actually verify 5 steps in order - Add warning log when IssueStore lacks updateIssue for max-iteration blocking Co-Authored-By: Claude Opus 4.6 (1M context) --- .../scripts/smoke-tests/smoke-test-realm.ts | 2 +- packages/software-factory/src/issue-loop.ts | 4 ++++ .../src/validators/test-step.ts | 23 ++++++++++--------- .../src/validators/validation-pipeline.ts | 3 +++ .../tests/validation-pipeline.test.ts | 19 +++++++++++---- 5 files changed, 34 insertions(+), 17 deletions(-) diff --git a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts index 302f8022b7..38ffc9e908 100644 --- a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts +++ b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts @@ -409,7 +409,7 @@ async function main() { detail = ` (${d.passedCount} passed, ${d.failedCount} failed)`; } } - log.info(` ${step.step}: ${statusIcon} ${step.passed ? 'passed' : 'failed'}${step.errors.length > 0 ? ' — no-op' : ''}${detail}`); + log.info(` ${step.step}: ${statusIcon} ${step.passed ? 'passed' : 'failed'}${detail}`); } // Verify pipeline results diff --git a/packages/software-factory/src/issue-loop.ts b/packages/software-factory/src/issue-loop.ts index 49a5c74332..9b9645c2c7 100644 --- a/packages/software-factory/src/issue-loop.ts +++ b/packages/software-factory/src/issue-loop.ts @@ -339,6 +339,10 @@ export async function runIssueLoop( ` Failed to update issue status to blocked: ${err instanceof Error ? err.message : String(err)}`, ); } + } else { + log.warn( + ` IssueStore does not implement updateIssue — blocked status not persisted to realm`, + ); } } diff --git a/packages/software-factory/src/validators/test-step.ts b/packages/software-factory/src/validators/test-step.ts index 5a5db73c82..d12844824e 100644 --- a/packages/software-factory/src/validators/test-step.ts +++ b/packages/software-factory/src/validators/test-step.ts @@ -186,17 +186,18 @@ export class TestValidationStep implements ValidationStepRunner { }; } - let errors = details - ? details.failures.map((f) => ({ - file: f.module, - message: `${f.testName}: ${f.message}`, - stackTrace: f.stackTrace, - })) - : [ - { - message: - handle.errorMessage ?? `Tests ${handle.status}`, - }, + let errors = + details && details.failures.length > 0 + ? details.failures.map((f) => ({ + file: f.module, + message: `${f.testName}: ${f.message}`, + stackTrace: f.stackTrace, + })) + : [ + { + message: + handle.errorMessage ?? `Tests ${handle.status}`, + }, ]; return { diff --git a/packages/software-factory/src/validators/validation-pipeline.ts b/packages/software-factory/src/validators/validation-pipeline.ts index 889708e924..16ca95f8be 100644 --- a/packages/software-factory/src/validators/validation-pipeline.ts +++ b/packages/software-factory/src/validators/validation-pipeline.ts @@ -147,6 +147,8 @@ export interface ValidationPipelineConfig { testResultsModuleUrl: string; targetRealmUrl: string; issueId?: string; + /** Injected for testing — passed through to TestValidationStep. */ + fetchFilenames?: TestValidationStepConfig['fetchFilenames']; } /** @@ -164,6 +166,7 @@ export function createDefaultPipeline( testResultsModuleUrl: config.testResultsModuleUrl, targetRealmUrl: config.targetRealmUrl, issueId: config.issueId, + fetchFilenames: config.fetchFilenames, }; return new ValidationPipeline([ diff --git a/packages/software-factory/tests/validation-pipeline.test.ts b/packages/software-factory/tests/validation-pipeline.test.ts index 937fe69058..9a9bb35235 100644 --- a/packages/software-factory/tests/validation-pipeline.test.ts +++ b/packages/software-factory/tests/validation-pipeline.test.ts @@ -202,18 +202,27 @@ module('ValidationPipeline', function () { ); }); - test('createDefaultPipeline creates 5 steps in correct order', function (assert) { + test('createDefaultPipeline creates 5 steps in correct order', async function (assert) { let pipeline = createDefaultPipeline({ realmServerUrl: 'https://example.test/', hostAppUrl: 'https://example.test/', testResultsModuleUrl: 'https://example.test/test-results', targetRealmUrl: 'https://example.test/realm/', + // Inject a fetchFilenames that returns no test files so the test + // step returns "nothing to validate" without hitting a real realm + fetchFilenames: async () => ({ filenames: [] }), }); - // Access runners via validate + inspection - // Since runners is private, we verify through the pipeline's behavior - assert.ok(pipeline, 'pipeline was created'); - assert.ok(pipeline instanceof ValidationPipeline, 'is a ValidationPipeline'); + // Verify step count and order by running validate and inspecting results + let results = await pipeline.validate('https://example.test/realm/'); + + assert.strictEqual(results.steps.length, 5, 'has 5 steps'); + assert.strictEqual(results.steps[0].step, 'parse', 'step 1 is parse'); + assert.strictEqual(results.steps[1].step, 'lint', 'step 2 is lint'); + assert.strictEqual(results.steps[2].step, 'evaluate', 'step 3 is evaluate'); + assert.strictEqual(results.steps[3].step, 'instantiate', 'step 4 is instantiate'); + assert.strictEqual(results.steps[4].step, 'test', 'step 5 is test'); + assert.true(results.passed, 'all steps pass (NoOp + no test files)'); }); test('formatForContext returns simple message when all pass', function (assert) { From da16f06d9ac3769f10824627f17ef5e9e2841f51 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Thu, 9 Apr 2026 19:43:29 -0400 Subject: [PATCH 06/17] Implement RealmIssueStore.updateIssue, make updateIssue required, fix lint - Make IssueStore.updateIssue required (not optional) - Implement updateIssue on RealmIssueStore: read card, mutate status/description, write back to realm - Update all MockIssueStore implementations to include updateIssue - Fix prettier formatting and non-null assertion lint errors Co-Authored-By: Claude Opus 4.6 (1M context) --- .../scripts/smoke-tests/issue-loop-smoke.ts | 11 ++-- .../scripts/smoke-tests/smoke-test-realm.ts | 20 +++++--- packages/software-factory/src/issue-loop.ts | 30 +++++------ .../software-factory/src/issue-scheduler.ts | 51 +++++++++++++++++-- .../src/validators/test-step.ts | 13 ++--- .../src/validators/validation-pipeline.ts | 3 +- .../software-factory/tests/issue-loop.test.ts | 28 +++++----- .../tests/issue-scheduler.test.ts | 4 ++ .../software-factory/tests/test-step.test.ts | 25 ++++++--- .../tests/validation-pipeline.test.ts | 6 ++- 10 files changed, 126 insertions(+), 65 deletions(-) diff --git a/packages/software-factory/scripts/smoke-tests/issue-loop-smoke.ts b/packages/software-factory/scripts/smoke-tests/issue-loop-smoke.ts index 2f50a3174e..79dcb63bf4 100644 --- a/packages/software-factory/scripts/smoke-tests/issue-loop-smoke.ts +++ b/packages/software-factory/scripts/smoke-tests/issue-loop-smoke.ts @@ -78,6 +78,10 @@ class MockIssueStore implements IssueStore { if (!issue) throw new Error(`Issue "${issueId}" not found`); return { ...issue }; } + + async updateIssue(): Promise { + // no-op for smoke tests + } } interface MockAgentTurn { @@ -599,9 +603,10 @@ async function scenarioValidationPipeline(): Promise { ); // Verify formatForContext works - let formatted = pipeline.formatForContext( - result.issueResults[0]?.lastValidation!, - ); + let lastValidation = result.issueResults[0]?.lastValidation; + let formatted = lastValidation + ? pipeline.formatForContext(lastValidation) + : ''; check( 'formatForContext reports all passed', formatted === 'All validation steps passed.', diff --git a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts index 38ffc9e908..240705ba41 100644 --- a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts +++ b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts @@ -399,7 +399,9 @@ async function main() { let validationResults = await pipeline.validate(targetRealmUrl); - log.info(` Pipeline result: ${validationResults.passed ? 'PASSED' : 'FAILED'} (${validationResults.steps.length} steps)`); + log.info( + ` Pipeline result: ${validationResults.passed ? 'PASSED' : 'FAILED'} (${validationResults.steps.length} steps)`, + ); for (let step of validationResults.steps) { let statusIcon = step.passed ? '✓' : '✗'; let detail = ''; @@ -409,7 +411,9 @@ async function main() { detail = ` (${d.passedCount} passed, ${d.failedCount} failed)`; } } - log.info(` ${step.step}: ${statusIcon} ${step.passed ? 'passed' : 'failed'}${detail}`); + log.info( + ` ${step.step}: ${statusIcon} ${step.passed ? 'passed' : 'failed'}${detail}`, + ); } // Verify pipeline results @@ -433,9 +437,7 @@ async function main() { log.info(' ✓ Test step correctly failed'); } - let noOpSteps = validationResults.steps.filter( - (s) => s.step !== 'test', - ); + let noOpSteps = validationResults.steps.filter((s) => s.step !== 'test'); let allNoOpsPassed = noOpSteps.every((s) => s.passed); if (allNoOpsPassed) { log.info(' ✓ All NoOp steps (parse, lint, evaluate, instantiate) passed'); @@ -447,9 +449,13 @@ async function main() { if (testStep?.details) { let details = testStep.details as unknown as TestValidationDetails; if (details.passedCount > 0 && details.failedCount > 0) { - log.info(` ✓ Test details: ${details.passedCount} passed, ${details.failedCount} failed`); + log.info( + ` ✓ Test details: ${details.passedCount} passed, ${details.failedCount} failed`, + ); } else { - log.info(` ✗ Expected both passing and failing tests, got passed=${details.passedCount} failed=${details.failedCount}`); + log.info( + ` ✗ Expected both passing and failing tests, got passed=${details.passedCount} failed=${details.failedCount}`, + ); pipelinePassed = false; } } else { diff --git a/packages/software-factory/src/issue-loop.ts b/packages/software-factory/src/issue-loop.ts index 9b9645c2c7..a4d100118d 100644 --- a/packages/software-factory/src/issue-loop.ts +++ b/packages/software-factory/src/issue-loop.ts @@ -323,25 +323,19 @@ export async function runIssueLoop( ); exitReason = 'blocked'; - if (issueStore.updateIssue) { - try { - let description = buildMaxIterationBlockedDescription( - maxIterationsPerIssue, - validationResults, - validator, - ); - await issueStore.updateIssue(issue.id, { - status: 'blocked', - description, - }); - } catch (err) { - log.warn( - ` Failed to update issue status to blocked: ${err instanceof Error ? err.message : String(err)}`, - ); - } - } else { + try { + let description = buildMaxIterationBlockedDescription( + maxIterationsPerIssue, + validationResults, + validator, + ); + await issueStore.updateIssue(issue.id, { + status: 'blocked', + description, + }); + } catch (err) { log.warn( - ` IssueStore does not implement updateIssue — blocked status not persisted to realm`, + ` Failed to update issue status to blocked: ${err instanceof Error ? err.message : String(err)}`, ); } } diff --git a/packages/software-factory/src/issue-scheduler.ts b/packages/software-factory/src/issue-scheduler.ts index a8008c6d38..4599ffddff 100644 --- a/packages/software-factory/src/issue-scheduler.ts +++ b/packages/software-factory/src/issue-scheduler.ts @@ -12,7 +12,12 @@ import type { SchedulableIssue, } from './factory-agent-types'; -import { searchRealm, type RealmFetchOptions } from './realm-operations'; +import { + searchRealm, + readFile, + writeFile, + type RealmFetchOptions, +} from './realm-operations'; import { logger } from './logger'; let log = logger('issue-scheduler'); @@ -30,8 +35,8 @@ export interface IssueStore { listIssues(): Promise; /** Re-read a single issue's current state from the realm. */ refreshIssue(issueId: string): Promise; - /** Update issue fields in the realm. Used for max-iteration blocking. */ - updateIssue?( + /** Update issue fields in the realm (e.g., status, description). */ + updateIssue( issueId: string, updates: { status?: string; description?: string }, ): Promise; @@ -229,6 +234,46 @@ export class RealmIssueStore implements IssueStore { return mapCardToSchedulableIssue(result.data[0]); } + + async updateIssue( + issueId: string, + updates: { status?: string; description?: string }, + ): Promise { + // Read the existing card document + let readResult = await readFile(this.realmUrl, issueId, this.options); + if (!readResult.ok || !readResult.document) { + throw new Error( + `Failed to read issue "${issueId}" for update: ${readResult.error ?? 'no document returned'}`, + ); + } + + let doc = readResult.document; + let attrs = (doc.data.attributes ?? {}) as Record; + + if (updates.status != null) { + attrs.status = updates.status; + } + if (updates.description != null) { + attrs.description = updates.description; + } + + doc.data.attributes = attrs; + + let writeResult = await writeFile( + this.realmUrl, + `${issueId}.json`, + JSON.stringify(doc, null, 2), + this.options, + ); + + if (!writeResult.ok) { + throw new Error( + `Failed to write issue "${issueId}": ${writeResult.error}`, + ); + } + + log.info(`Updated issue "${issueId}": ${JSON.stringify(updates)}`); + } } // --------------------------------------------------------------------------- diff --git a/packages/software-factory/src/validators/test-step.ts b/packages/software-factory/src/validators/test-step.ts index d12844824e..9ef632af18 100644 --- a/packages/software-factory/src/validators/test-step.ts +++ b/packages/software-factory/src/validators/test-step.ts @@ -39,9 +39,7 @@ export interface TestValidationStepConfig { targetRealmUrl: string; issueId?: string; /** Injected for testing — defaults to executeTestRunFromRealm. */ - executeTestRun?: ( - options: ExecuteTestRunOptions, - ) => Promise; + executeTestRun?: (options: ExecuteTestRunOptions) => Promise; /** Injected for testing — defaults to fetchRealmFilenames. */ fetchFilenames?: ( realmUrl: string, @@ -195,10 +193,9 @@ export class TestValidationStep implements ValidationStepRunner { })) : [ { - message: - handle.errorMessage ?? `Tests ${handle.status}`, + message: handle.errorMessage ?? `Tests ${handle.status}`, }, - ]; + ]; return { step: 'test', @@ -250,9 +247,7 @@ export class TestValidationStep implements ValidationStepRunner { // Private helpers // ------------------------------------------------------------------------- - private async discoverTestFiles( - targetRealmUrl: string, - ): Promise { + private async discoverTestFiles(targetRealmUrl: string): Promise { let result = await this.fetchFilenamesFn(targetRealmUrl, { authorization: this.config.authorization, fetch: this.config.fetch, diff --git a/packages/software-factory/src/validators/validation-pipeline.ts b/packages/software-factory/src/validators/validation-pipeline.ts index 16ca95f8be..b9c8377a83 100644 --- a/packages/software-factory/src/validators/validation-pipeline.ts +++ b/packages/software-factory/src/validators/validation-pipeline.ts @@ -81,8 +81,7 @@ export class ValidationPipeline implements Validator { } else { // Step threw an exception — capture as a failed result let reason = outcome.reason; - let message = - reason instanceof Error ? reason.message : String(reason); + let message = reason instanceof Error ? reason.message : String(reason); log.error( `Validation step "${this.runners[i].step}" threw: ${message}`, ); diff --git a/packages/software-factory/tests/issue-loop.test.ts b/packages/software-factory/tests/issue-loop.test.ts index 17c229f71d..b5f1f72336 100644 --- a/packages/software-factory/tests/issue-loop.test.ts +++ b/packages/software-factory/tests/issue-loop.test.ts @@ -25,6 +25,7 @@ import { class MockIssueStore implements IssueStore { issues: SchedulableIssue[]; + updateCalls: { issueId: string; updates: Record }[] = []; constructor(issues: SchedulableIssue[]) { this.issues = issues.map((i) => ({ ...i })); @@ -41,6 +42,13 @@ class MockIssueStore implements IssueStore { } return { ...issue }; } + + async updateIssue( + issueId: string, + updates: { status?: string; description?: string }, + ): Promise { + this.updateCalls.push({ issueId, updates }); + } } // --------------------------------------------------------------------------- @@ -527,20 +535,10 @@ module('issue-loop > max inner iterations', function () { }); test('updateIssue called when blocking due to max iterations + failing validation', async function (assert) { - let updateCalls: { issueId: string; updates: Record }[] = []; - let store = new MockIssueStore([ makeIssue({ id: 'iss-1', status: 'backlog', priority: 'high', order: 1 }), ]); - // Add updateIssue to mock store - (store as MockIssueStore & { updateIssue: Function }).updateIssue = async ( - issueId: string, - updates: { status?: string; description?: string }, - ) => { - updateCalls.push({ issueId, updates }); - }; - let turns: MockAgentTurn[] = []; let validations: ValidationResults[] = []; @@ -564,11 +562,13 @@ module('issue-loop > max inner iterations', function () { }), ); - assert.strictEqual(updateCalls.length, 1, 'updateIssue called once'); - assert.strictEqual(updateCalls[0].issueId, 'iss-1'); - assert.strictEqual(updateCalls[0].updates.status, 'blocked'); + assert.strictEqual(store.updateCalls.length, 1, 'updateIssue called once'); + assert.strictEqual(store.updateCalls[0].issueId, 'iss-1'); + assert.strictEqual(store.updateCalls[0].updates.status, 'blocked'); assert.ok( - (updateCalls[0].updates.description as string).includes('max iteration limit'), + (store.updateCalls[0].updates.description as string).includes( + 'max iteration limit', + ), 'description includes reason', ); }); diff --git a/packages/software-factory/tests/issue-scheduler.test.ts b/packages/software-factory/tests/issue-scheduler.test.ts index 10cea3dafa..2ed9619722 100644 --- a/packages/software-factory/tests/issue-scheduler.test.ts +++ b/packages/software-factory/tests/issue-scheduler.test.ts @@ -26,6 +26,10 @@ class MockIssueStore implements IssueStore { } return { ...issue }; } + + async updateIssue(): Promise { + // no-op for scheduler tests + } } // --------------------------------------------------------------------------- diff --git a/packages/software-factory/tests/test-step.test.ts b/packages/software-factory/tests/test-step.test.ts index 43dafb4e14..cb06ab31c5 100644 --- a/packages/software-factory/tests/test-step.test.ts +++ b/packages/software-factory/tests/test-step.test.ts @@ -10,7 +10,10 @@ import { type TestValidationDetails, } from '../src/validators/test-step'; -import type { TestRunHandle, ExecuteTestRunOptions } from '../src/test-run-types'; +import type { + TestRunHandle, + ExecuteTestRunOptions, +} from '../src/test-run-types'; import type { RealmFetchOptions } from '../src/realm-operations'; // --------------------------------------------------------------------------- @@ -61,7 +64,9 @@ function makeExecuteTestRunThrows( }; } -function makeTestRunCardDocument(attrs: Record): LooseSingleCardDocument { +function makeTestRunCardDocument( + attrs: Record, +): LooseSingleCardDocument { return { data: { type: 'card', @@ -73,9 +78,11 @@ function makeTestRunCardDocument(attrs: Record): LooseSingleCar } as LooseSingleCardDocument; } -function makeReadCard( - document: LooseSingleCardDocument, -): (realmUrl: string, path: string, options?: RealmFetchOptions) => Promise<{ +function makeReadCard(document: LooseSingleCardDocument): ( + realmUrl: string, + path: string, + options?: RealmFetchOptions, +) => Promise<{ ok: boolean; document?: LooseSingleCardDocument; error?: string; @@ -83,9 +90,11 @@ function makeReadCard( return async () => ({ ok: true, document }); } -function makeReadCardError( - error: string, -): (realmUrl: string, path: string, options?: RealmFetchOptions) => Promise<{ +function makeReadCardError(error: string): ( + realmUrl: string, + path: string, + options?: RealmFetchOptions, +) => Promise<{ ok: boolean; document?: LooseSingleCardDocument; error?: string; diff --git a/packages/software-factory/tests/validation-pipeline.test.ts b/packages/software-factory/tests/validation-pipeline.test.ts index 9a9bb35235..66fd30349e 100644 --- a/packages/software-factory/tests/validation-pipeline.test.ts +++ b/packages/software-factory/tests/validation-pipeline.test.ts @@ -220,7 +220,11 @@ module('ValidationPipeline', function () { assert.strictEqual(results.steps[0].step, 'parse', 'step 1 is parse'); assert.strictEqual(results.steps[1].step, 'lint', 'step 2 is lint'); assert.strictEqual(results.steps[2].step, 'evaluate', 'step 3 is evaluate'); - assert.strictEqual(results.steps[3].step, 'instantiate', 'step 4 is instantiate'); + assert.strictEqual( + results.steps[3].step, + 'instantiate', + 'step 4 is instantiate', + ); assert.strictEqual(results.steps[4].step, 'test', 'step 5 is test'); assert.true(results.passed, 'all steps pass (NoOp + no test files)'); }); From f91d6cdaecb080533fc55e72b43e33c2bfa6dc80 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Thu, 9 Apr 2026 19:55:22 -0400 Subject: [PATCH 07/17] Update phase-2-plan.md, fix NoOpStepRunner parameter signatures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update orchestrator pseudocode to reflect max-iterations→blocked behavior - Document IssueStore.updateIssue and orchestrator realm writes - Update Issue Lifecycle section with max-iteration blocking - Clean up --jwt reference in boxel-cli auth section - Fix NoOpStepRunner to declare parameter names (TS strict mode) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../software-factory/docs/phase-2-plan.md | 31 ++++++++++++++----- .../src/validators/noop-step.ts | 4 +-- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/packages/software-factory/docs/phase-2-plan.md b/packages/software-factory/docs/phase-2-plan.md index d78390cc9a..f2f52a2163 100644 --- a/packages/software-factory/docs/phase-2-plan.md +++ b/packages/software-factory/docs/phase-2-plan.md @@ -169,6 +169,7 @@ while ( // Inner loop: multiple iterations per issue let validationResults = undefined; + let exitReason = 'max_iterations'; for (let iteration = 1; iteration <= maxIterationsPerIssue; iteration++) { let context = await contextBuilder.buildForIssue({ issue, @@ -184,10 +185,24 @@ while ( // Read issue state from realm (not from AgentRunResult.status) issue = await scheduler.refreshIssueState(issue); - if (issue.status === 'done' || issue.status === 'blocked') break; + if (issue.status === 'done' || issue.status === 'blocked') { + exitReason = issue.status; + break; + } } - if (exitReason === 'max_iterations') exhaustedIssues.add(issue.id); + if (exitReason === 'max_iterations') { + // If validation still failing at max iterations, block the issue + // with the reason and failure context written to the realm + if (validationResults && !validationResults.passed) { + exitReason = 'blocked'; + await issueStore.updateIssue(issue.id, { + status: 'blocked', + description: buildMaxIterationBlockedDescription(validationResults), + }); + } + exhaustedIssues.add(issue.id); + } // Reload to pick up new issues the agent may have created await scheduler.loadIssues(); @@ -196,7 +211,9 @@ while ( The agent signals progress by updating the issue — tagging it as blocked, marking it done, or leaving it in progress for another iteration. The orchestrator reads issue state from the realm after each agent turn, then runs validation. Validation failures are fed back as context in the next inner-loop iteration so the agent can self-correct. The agent can also create new issues via tool calls if it determines a failure requires separate work. -All domain logic (what to implement, when to create sub-issues, when to tag as blocked) lives in the agent's prompt and skills. The orchestrator owns only: issue selection, agent invocation, and validation. +The orchestrator also **writes** to the realm in one case: when max iterations are reached with failing validation, it updates the issue's status to `blocked` and writes the formatted validation failure context into the issue description. This uses `IssueStore.updateIssue()`, which performs a read-modify-write against the realm card. + +All domain logic (what to implement, when to create sub-issues, when to tag as blocked) lives in the agent's prompt and skills. The orchestrator owns only: issue selection, agent invocation, validation, and max-iteration blocking. ### Issue Loading via searchRealm() @@ -316,14 +333,14 @@ CS-10671 trims and renames the current schema as a first step. The adoption from ``` backlog → in_progress → done - → blocked (needs human input) + → blocked (needs human input or max iterations with failing validation) → review (optional) ``` -Issues that exhaust `maxIterationsPerIssue` without completing stay in their current status but are excluded from re-picking in the same loop run via an `exhaustedIssues` set. - The agent manages its own transitions by updating the issue directly (e.g., tagging as blocked, marking done). The orchestrator reads the issue state after the agent exits to decide what to do next — it does not inspect the agent's return value for status. +The orchestrator also transitions issues to `blocked` in one case: when max iterations are reached with validation still failing. It writes the reason ("max iteration limit reached") and the formatted validation failure context into the issue description via `IssueStore.updateIssue()`, making the blocking reason visible in the realm. Issues blocked this way are also added to an `exhaustedIssues` set to prevent re-selection within the same run. + ## Migration Path from Phase 1 Phase 1 and phase 2 coexist during the transition. The implementation lives in separate files to avoid touching Phase 1 code: @@ -485,7 +502,7 @@ The principle that boxel-cli owns the entire Boxel API surface extends to auth. 1. **Two-tier token model** — boxel-cli understands both realm server tokens (obtained via Matrix OpenID → `POST /_server-session`, grants server-level access) and per-realm tokens (obtained via `POST /_realm-auth`, grants access to specific realms). Both are cached and refreshed automatically. -2. **Automatic token acquisition on realm creation** — When `boxel create-realm` creates a new realm, boxel-cli automatically waits for readiness, obtains the per-realm JWT, and stores it in its auth state. Subsequent `boxel pull`/`boxel sync` on that realm Just Work — no `--jwt` flag, no token passing. +2. **Automatic token acquisition on realm creation** — When `boxel create-realm` creates a new realm, boxel-cli automatically waits for readiness, obtains the per-realm JWT, and stores it in its auth state. Subsequent `boxel pull`/`boxel sync` on that realm Just Work — tokens are managed internally by boxel-cli. 3. **Programmatic auth API** — Export a `BoxelAuth` class (or similar) so the factory imports it and never constructs HTTP requests or manages tokens: diff --git a/packages/software-factory/src/validators/noop-step.ts b/packages/software-factory/src/validators/noop-step.ts index bc109c0b9e..3076cdc7fe 100644 --- a/packages/software-factory/src/validators/noop-step.ts +++ b/packages/software-factory/src/validators/noop-step.ts @@ -14,11 +14,11 @@ export class NoOpStepRunner implements ValidationStepRunner { this.step = step; } - async run(): Promise { + async run(_targetRealmUrl: string): Promise { return { step: this.step, passed: true, errors: [] }; } - formatForContext(): string { + formatForContext(_result: ValidationStepResult): string { return ''; } } From ef78431f0a43128abdcf2bc7156f84503a7042aa Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Thu, 9 Apr 2026 20:11:09 -0400 Subject: [PATCH 08/17] Fix setup-logger import path in smoke-test-realm.ts The import was ../setup-logger (resolving to scripts/setup-logger which doesn't exist). Fixed to ../../src/setup-logger matching all other smoke test scripts. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../software-factory/scripts/smoke-tests/smoke-test-realm.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts index 240705ba41..09e1a771a5 100644 --- a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts +++ b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts @@ -35,7 +35,7 @@ */ // This should be first -import '../setup-logger'; +import '../../src/setup-logger'; import { getRealmServerToken, matrixLogin, parseArgs } from '../../src/boxel'; import { logger } from '../../src/logger'; From 158aad6d9d8398641bb3c80135dd050d254bd7fc Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Thu, 9 Apr 2026 20:23:52 -0400 Subject: [PATCH 09/17] Remove redundant direct executeTestRunFromRealm call from smoke test The validation pipeline's TestValidationStep already exercises executeTestRunFromRealm internally, so the separate direct call was redundant. The smoke test now goes straight from writing test files to running the ValidationPipeline. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../scripts/smoke-tests/smoke-test-realm.ts | 98 +++---------------- 1 file changed, 14 insertions(+), 84 deletions(-) diff --git a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts index 09e1a771a5..fc3d82bad6 100644 --- a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts +++ b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts @@ -1,26 +1,19 @@ /** - * Smoke test for the factory test realm management (QUnit). + * Smoke test for the validation pipeline with real QUnit test execution. * - * Simulates the full factory workflow: implementation phase output followed - * by the testing phase via executeTestRunFromRealm with QUnit .test.gts files. + * 1. Creates a target realm and writes simulated LLM output: + * - A HelloCard definition (.gts) + * - A Spec card instance pointing to the HelloCard definition + * - A passing QUnit test (hello.test.gts) + * - A deliberately failing QUnit test (hello-fail.test.gts) * - * Phase 1 -- Simulate LLM implementation output: - * Writes to the target realm (what the LLM would have produced): - * 1. A sample HelloCard definition (.gts) - * 2. A Spec card instance pointing to the HelloCard definition - * 3. A sample HelloCard instance (HelloCard/sample.json) - * 4. A QUnit test file (hello.test.gts) -- passing - * 5. A QUnit test file (hello-fail.test.gts) -- deliberately failing + * 2. Runs the full ValidationPipeline via createDefaultPipeline(), which + * executes all validation steps (parse, lint, evaluate, instantiate + * are NoOp placeholders; test step runs real QUnit tests via Playwright). * - * Phase 2 -- Run the testing phase via QUnit: - * Calls executeTestRunFromRealm, which: - * - Creates a TestRun card (status: running) in the target realm - * - Launches a headless browser pointing at the host app QUnit page - * - Collects QUnit results (testEnd / runEnd events) - * - Completes the TestRun card with module results - * - The passing test produces a result with passedCount=1 - * - The failing test produces a result with failedCount=1 - * - The overall TestRun status is 'failed' (mixed results) + * 3. Verifies pipeline results: test step fails (deliberately), NoOp steps + * pass, detailed failure data is read back from the TestRun card, and + * formatForContext() produces LLM-friendly markdown. * * Prerequisites: * @@ -39,7 +32,6 @@ import '../../src/setup-logger'; import { getRealmServerToken, matrixLogin, parseArgs } from '../../src/boxel'; import { logger } from '../../src/logger'; -import { executeTestRunFromRealm } from '../../src/test-run-execution'; import { createRealm, getRealmScopedAuth, @@ -321,72 +313,10 @@ async function main() { ); // ------------------------------------------------------------------------- - // Phase 2: Run QUnit tests via executeTestRunFromRealm + // Run validation pipeline against the realm // ------------------------------------------------------------------------- - log.info( - '\n--- Phase 2: Running QUnit tests via executeTestRunFromRealm ---\n', - ); - - let handle = await executeTestRunFromRealm({ - targetRealmUrl, - testResultsModuleUrl, - slug: 'hello-smoke', - testNames: [], - authorization, - fetch: fetchImpl, - forceNew: true, - realmServerUrl, - hostAppUrl: realmServerUrl, - }); - - log.info(` TestRun ID: ${handle.testRunId}`); - log.info(` Status: ${handle.status}`); - if (handle.errorMessage) { - log.info(` Error: ${handle.errorMessage}`); - } - if ((handle as unknown as Record).error) { - log.info( - ` Complete error: ${(handle as unknown as Record).error}`, - ); - } - - // ------------------------------------------------------------------------- - // Results - // ------------------------------------------------------------------------- - - log.info('\n--- Results ---\n'); - - // The TestRun should have status 'failed' because it contains both a - // passing and a deliberately failing QUnit test. The module results inside - // should show one test passed and one test failed. - let expectedStatus = handle.status === 'failed'; - - log.info( - ` TestRun status: ${expectedStatus ? '✓ failed (as expected -- one test passes, one fails)' : `✗ expected failed, got ${handle.status}`}`, - ); - log.info(`\n View in Boxel: ${targetRealmUrl}${handle.testRunId}`); - - if (!expectedStatus) { - log.info('\n✗ Phase 2 had unexpected results.'); - log.info( - ` Expected "failed" (mixed pass/fail QUnit tests) but got "${handle.status}"`, - ); - process.exit(1); - } - - log.info( - '\n✓ Phase 2 passed! TestRun contains both pass and fail QUnit results.', - ); - - // ------------------------------------------------------------------------- - // Validation Pipeline — exercises the full ValidationPipeline with a real - // TestValidationStep against the same realm. - // ------------------------------------------------------------------------- - - log.info( - '\n--- Validation Pipeline: Running ValidationPipeline.validate() ---\n', - ); + log.info('\n--- Running ValidationPipeline.validate() ---\n'); let pipeline = createDefaultPipeline({ authorization, From 313c4aa8acf4355d67ff7656da2b7d85483aa858 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Fri, 10 Apr 2026 11:47:50 -0400 Subject: [PATCH 10/17] Fix mock step runner parameter signatures to match ValidationStepRunner interface Add missing _targetRealmUrl parameter to run() and _result parameter to formatForContext() on MockStepRunner and ThrowingStepRunner. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/software-factory/tests/validation-pipeline.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/software-factory/tests/validation-pipeline.test.ts b/packages/software-factory/tests/validation-pipeline.test.ts index 66fd30349e..f2282c7ea1 100644 --- a/packages/software-factory/tests/validation-pipeline.test.ts +++ b/packages/software-factory/tests/validation-pipeline.test.ts @@ -33,7 +33,7 @@ class MockStepRunner implements ValidationStepRunner { }; } - async run(): Promise { + async run(_targetRealmUrl: string): Promise { this.runCount++; return this.result; } @@ -57,12 +57,12 @@ class ThrowingStepRunner implements ValidationStepRunner { this.errorMessage = errorMessage; } - async run(): Promise { + async run(_targetRealmUrl: string): Promise { this.runCount++; throw new Error(this.errorMessage); } - formatForContext(): string { + formatForContext(_result: ValidationStepResult): string { return ''; } } From 8c073b7835876b0adced22dba80c822038653431 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Fri, 10 Apr 2026 11:51:20 -0400 Subject: [PATCH 11/17] Set updatedAt timestamp when updating issue via RealmIssueStore Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/software-factory/src/issue-scheduler.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/software-factory/src/issue-scheduler.ts b/packages/software-factory/src/issue-scheduler.ts index 4599ffddff..dd323d70f8 100644 --- a/packages/software-factory/src/issue-scheduler.ts +++ b/packages/software-factory/src/issue-scheduler.ts @@ -256,6 +256,7 @@ export class RealmIssueStore implements IssueStore { if (updates.description != null) { attrs.description = updates.description; } + attrs.updatedAt = new Date().toISOString(); doc.data.attributes = attrs; From 59563f0de10e6f9bd525bd5a089c5f34a1a5ba0e Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Fri, 10 Apr 2026 11:52:30 -0400 Subject: [PATCH 12/17] Only set exitReason to blocked after successful realm update Move exitReason = 'blocked' inside the try block so it's only set after issueStore.updateIssue succeeds. If the realm write fails, exitReason stays max_iterations to accurately reflect that the issue wasn't actually blocked in the realm. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/software-factory/src/issue-loop.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/software-factory/src/issue-loop.ts b/packages/software-factory/src/issue-loop.ts index a4d100118d..7052bfc0fb 100644 --- a/packages/software-factory/src/issue-loop.ts +++ b/packages/software-factory/src/issue-loop.ts @@ -321,7 +321,6 @@ export async function runIssueLoop( log.info( ` Validation still failing — blocking issue with failure context`, ); - exitReason = 'blocked'; try { let description = buildMaxIterationBlockedDescription( @@ -333,6 +332,7 @@ export async function runIssueLoop( status: 'blocked', description, }); + exitReason = 'blocked'; } catch (err) { log.warn( ` Failed to update issue status to blocked: ${err instanceof Error ? err.message : String(err)}`, From 208a23ecee9f6b34a5d9df6cba1412bb847250cc Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Fri, 10 Apr 2026 13:14:22 -0400 Subject: [PATCH 13/17] Fix failing Playwright specs: update obsolete Phase 1 assertions - factory-seed.spec.ts: fixture realm has pre-existing issues, so use find() instead of asserting exactly 1 issue from listIssues() - factory-target-realm.spec.ts: update from old summary.bootstrap shape to new summary.seedIssue shape, verify seed issue exists in realm instead of Project card (Project is now created by the agent, not the entrypoint) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../tests/factory-target-realm.spec.ts | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/packages/software-factory/tests/factory-target-realm.spec.ts b/packages/software-factory/tests/factory-target-realm.spec.ts index 0984fbac4c..f20a031486 100644 --- a/packages/software-factory/tests/factory-target-realm.spec.ts +++ b/packages/software-factory/tests/factory-target-realm.spec.ts @@ -103,25 +103,18 @@ test('factory:go creates a target realm and bootstraps project artifacts end-to- let summary = JSON.parse(result.stdout) as { command: string; targetRealm: { url: string; ownerUsername: string }; - bootstrap: { - projectId: string; - issueIds: string[]; - knowledgeArticleIds: string[]; - activeIssue: { id: string; status: string }; + seedIssue: { + seedIssueId: string; + seedIssueStatus: string; }; }; expect(summary.command).toBe('factory:go'); expect(summary.targetRealm.ownerUsername).toBe(targetUsername); - expect(summary.bootstrap.projectId).toBe('Projects/sticky-note-mvp'); - expect(summary.bootstrap.issueIds).toHaveLength(3); - expect(summary.bootstrap.knowledgeArticleIds).toHaveLength(2); - expect(summary.bootstrap.activeIssue.id).toBe( - 'Issues/sticky-note-define-core', - ); - expect(summary.bootstrap.activeIssue.status).toBe('created'); + expect(summary.seedIssue.seedIssueId).toBe('Issues/bootstrap-seed'); + expect(summary.seedIssue.seedIssueStatus).toBe('created'); - // Verify the project card actually exists in the newly created target realm + // Verify the seed issue actually exists in the newly created target realm // by authenticating as the target user who owns the realm let targetRealmToken = await getRealmToken( matrixURL, @@ -130,22 +123,30 @@ test('factory:go creates a target realm and bootstraps project artifacts end-to- summary.targetRealm.url, ); - let projectUrl = new URL( - 'Projects/sticky-note-mvp', - summary.targetRealm.url, - ).href; - let projectResponse = await fetch(projectUrl, { + let seedIssueUrl = new URL('Issues/bootstrap-seed', summary.targetRealm.url) + .href; + let seedIssueResponse = await fetch(seedIssueUrl, { headers: { Accept: SupportedMimeType.CardSource, Authorization: targetRealmToken, }, }); - expect(projectResponse.ok).toBe(true); - let projectJson = (await projectResponse.json()) as { - data: { attributes: { projectName: string } }; + expect(seedIssueResponse.ok).toBe(true); + let issueJson = (await seedIssueResponse.json()) as { + data: { + attributes: { + issueType: string; + status: string; + summary: string; + }; + }; }; - expect(projectJson.data.attributes.projectName).toBe('Sticky Note MVP'); + expect(issueJson.data.attributes.issueType).toBe('bootstrap'); + expect(issueJson.data.attributes.status).toBe('backlog'); + expect(issueJson.data.attributes.summary).toContain( + 'Process brief and create project artifacts', + ); } finally { await new Promise((r, reject) => briefServer.close((err) => (err ? reject(err) : r())), From 2e1bfb32fd8b60c215a7412df3390c2e98cfe1ce Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Fri, 10 Apr 2026 13:42:06 -0400 Subject: [PATCH 14/17] Read source JSON file in updateIssue to avoid relationship stripping Read ${issueId}.json (the source file) instead of the indexed card document, which can have stripped relationships during indexing. This prevents data corruption when writing back the modified document. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/software-factory/src/issue-scheduler.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/software-factory/src/issue-scheduler.ts b/packages/software-factory/src/issue-scheduler.ts index dd323d70f8..41b434fa79 100644 --- a/packages/software-factory/src/issue-scheduler.ts +++ b/packages/software-factory/src/issue-scheduler.ts @@ -239,8 +239,13 @@ export class RealmIssueStore implements IssueStore { issueId: string, updates: { status?: string; description?: string }, ): Promise { - // Read the existing card document - let readResult = await readFile(this.realmUrl, issueId, this.options); + // Read the source JSON file (not the indexed card, which can have + // stripped relationships during indexing). + let readResult = await readFile( + this.realmUrl, + `${issueId}.json`, + this.options, + ); if (!readResult.ok || !readResult.document) { throw new Error( `Failed to read issue "${issueId}" for update: ${readResult.error ?? 'no document returned'}`, From a0b32539ba456d4e916414309882461dd0897119 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Fri, 10 Apr 2026 13:57:47 -0400 Subject: [PATCH 15/17] Remove unused targetRealmUrl from TestValidationStepConfig The run(targetRealmUrl) parameter is always used instead of this.config.targetRealmUrl. Remove the dead config field to avoid drift between config and runtime values. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../software-factory/scripts/smoke-tests/smoke-test-realm.ts | 1 - packages/software-factory/src/validators/test-step.ts | 1 - packages/software-factory/src/validators/validation-pipeline.ts | 2 -- packages/software-factory/tests/test-step.test.ts | 1 - packages/software-factory/tests/validation-pipeline.test.ts | 1 - 5 files changed, 6 deletions(-) diff --git a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts index fc3d82bad6..cabcfd9eec 100644 --- a/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts +++ b/packages/software-factory/scripts/smoke-tests/smoke-test-realm.ts @@ -324,7 +324,6 @@ async function main() { realmServerUrl, hostAppUrl: realmServerUrl, testResultsModuleUrl, - targetRealmUrl, }); let validationResults = await pipeline.validate(targetRealmUrl); diff --git a/packages/software-factory/src/validators/test-step.ts b/packages/software-factory/src/validators/test-step.ts index 9ef632af18..001e631b24 100644 --- a/packages/software-factory/src/validators/test-step.ts +++ b/packages/software-factory/src/validators/test-step.ts @@ -36,7 +36,6 @@ export interface TestValidationStepConfig { realmServerUrl: string; hostAppUrl: string; testResultsModuleUrl: string; - targetRealmUrl: string; issueId?: string; /** Injected for testing — defaults to executeTestRunFromRealm. */ executeTestRun?: (options: ExecuteTestRunOptions) => Promise; diff --git a/packages/software-factory/src/validators/validation-pipeline.ts b/packages/software-factory/src/validators/validation-pipeline.ts index b9c8377a83..c4686c6480 100644 --- a/packages/software-factory/src/validators/validation-pipeline.ts +++ b/packages/software-factory/src/validators/validation-pipeline.ts @@ -144,7 +144,6 @@ export interface ValidationPipelineConfig { realmServerUrl: string; hostAppUrl: string; testResultsModuleUrl: string; - targetRealmUrl: string; issueId?: string; /** Injected for testing — passed through to TestValidationStep. */ fetchFilenames?: TestValidationStepConfig['fetchFilenames']; @@ -163,7 +162,6 @@ export function createDefaultPipeline( realmServerUrl: config.realmServerUrl, hostAppUrl: config.hostAppUrl, testResultsModuleUrl: config.testResultsModuleUrl, - targetRealmUrl: config.targetRealmUrl, issueId: config.issueId, fetchFilenames: config.fetchFilenames, }; diff --git a/packages/software-factory/tests/test-step.test.ts b/packages/software-factory/tests/test-step.test.ts index cb06ab31c5..79d557044c 100644 --- a/packages/software-factory/tests/test-step.test.ts +++ b/packages/software-factory/tests/test-step.test.ts @@ -27,7 +27,6 @@ function makeConfig( realmServerUrl: 'https://example.test/', hostAppUrl: 'https://example.test/', testResultsModuleUrl: 'https://example.test/test-results', - targetRealmUrl: 'https://example.test/realm/', ...overrides, }; } diff --git a/packages/software-factory/tests/validation-pipeline.test.ts b/packages/software-factory/tests/validation-pipeline.test.ts index f2282c7ea1..923bb8540f 100644 --- a/packages/software-factory/tests/validation-pipeline.test.ts +++ b/packages/software-factory/tests/validation-pipeline.test.ts @@ -207,7 +207,6 @@ module('ValidationPipeline', function () { realmServerUrl: 'https://example.test/', hostAppUrl: 'https://example.test/', testResultsModuleUrl: 'https://example.test/test-results', - targetRealmUrl: 'https://example.test/realm/', // Inject a fetchFilenames that returns no test files so the test // step returns "nothing to validate" without hitting a real realm fetchFilenames: async () => ({ filenames: [] }), From 4647a027f823b7d5459d393545e5a9308a119500 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Fri, 10 Apr 2026 14:05:43 -0400 Subject: [PATCH 16/17] Centralize deriveIssueSlug in factory-agent-types.ts Move deriveIssueSlug to factory-agent-types.ts and remove duplicate copies from factory-implement.ts and validators/test-step.ts. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../software-factory/src/factory-agent-types.ts | 9 +++++++++ packages/software-factory/src/factory-implement.ts | 14 +------------- .../software-factory/src/validators/test-step.ts | 10 +--------- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/packages/software-factory/src/factory-agent-types.ts b/packages/software-factory/src/factory-agent-types.ts index e79ad6989d..a0bc09bf6d 100644 --- a/packages/software-factory/src/factory-agent-types.ts +++ b/packages/software-factory/src/factory-agent-types.ts @@ -243,3 +243,12 @@ export function resolveFactoryModel(cliModel?: string): string { return FACTORY_DEFAULT_MODEL; } + +/** + * Derive a slug from an issue ID by taking the last path segment. + * e.g., "Issues/sticky-note-define-core" → "sticky-note-define-core" + */ +export function deriveIssueSlug(issueId: string): string { + let parts = issueId.split('/'); + return parts[parts.length - 1]; +} diff --git a/packages/software-factory/src/factory-implement.ts b/packages/software-factory/src/factory-implement.ts index 5303f9ecf6..40ba347bd6 100644 --- a/packages/software-factory/src/factory-implement.ts +++ b/packages/software-factory/src/factory-implement.ts @@ -24,6 +24,7 @@ import type { TestResult, } from './factory-agent'; import { + deriveIssueSlug, resolveFactoryModel, ToolUseFactoryAgent, type FactoryAgentConfig, @@ -665,19 +666,6 @@ async function readTestRunFailures( } } -// --------------------------------------------------------------------------- -// Issue slug derivation -// --------------------------------------------------------------------------- - -/** - * Derive an issue slug from an issue ID. - * e.g., "Issues/sticky-note-define-core" → "sticky-note-define-core" - */ -function deriveIssueSlug(issueId: string): string { - let parts = issueId.split('/'); - return parts[parts.length - 1]; -} - // --------------------------------------------------------------------------- // Post-loop updates // --------------------------------------------------------------------------- diff --git a/packages/software-factory/src/validators/test-step.ts b/packages/software-factory/src/validators/test-step.ts index 001e631b24..a9c8e9b87f 100644 --- a/packages/software-factory/src/validators/test-step.ts +++ b/packages/software-factory/src/validators/test-step.ts @@ -11,6 +11,7 @@ */ import type { ValidationStepResult } from '../factory-agent'; +import { deriveIssueSlug } from '../factory-agent-types'; import type { LooseSingleCardDocument } from '@cardstack/runtime-common'; import { executeTestRunFromRealm } from '../test-run-execution'; @@ -291,15 +292,6 @@ export class TestValidationStep implements ValidationStepRunner { // Helpers // --------------------------------------------------------------------------- -/** - * Derive a slug from an issue ID. - * e.g., "Issues/sticky-note-define-core" → "sticky-note-define-core" - */ -function deriveIssueSlug(issueId: string): string { - let parts = issueId.split('/'); - return parts[parts.length - 1]; -} - interface TestRunCardAttributes { status?: string; passedCount?: number; From e4afc81de2f77be8d3a2f74af709f2d9da4d2eb3 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Fri, 10 Apr 2026 14:33:27 -0400 Subject: [PATCH 17/17] Restore factory-target-realm.spec.ts to main version The spec was prematurely updated to expect a seedIssue shape from CS-10673 work that hasn't landed on main yet. Restore the main branch version which expects the current summary.bootstrap shape. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../tests/factory-target-realm.spec.ts | 45 +++++++++---------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/packages/software-factory/tests/factory-target-realm.spec.ts b/packages/software-factory/tests/factory-target-realm.spec.ts index f20a031486..0984fbac4c 100644 --- a/packages/software-factory/tests/factory-target-realm.spec.ts +++ b/packages/software-factory/tests/factory-target-realm.spec.ts @@ -103,18 +103,25 @@ test('factory:go creates a target realm and bootstraps project artifacts end-to- let summary = JSON.parse(result.stdout) as { command: string; targetRealm: { url: string; ownerUsername: string }; - seedIssue: { - seedIssueId: string; - seedIssueStatus: string; + bootstrap: { + projectId: string; + issueIds: string[]; + knowledgeArticleIds: string[]; + activeIssue: { id: string; status: string }; }; }; expect(summary.command).toBe('factory:go'); expect(summary.targetRealm.ownerUsername).toBe(targetUsername); - expect(summary.seedIssue.seedIssueId).toBe('Issues/bootstrap-seed'); - expect(summary.seedIssue.seedIssueStatus).toBe('created'); + expect(summary.bootstrap.projectId).toBe('Projects/sticky-note-mvp'); + expect(summary.bootstrap.issueIds).toHaveLength(3); + expect(summary.bootstrap.knowledgeArticleIds).toHaveLength(2); + expect(summary.bootstrap.activeIssue.id).toBe( + 'Issues/sticky-note-define-core', + ); + expect(summary.bootstrap.activeIssue.status).toBe('created'); - // Verify the seed issue actually exists in the newly created target realm + // Verify the project card actually exists in the newly created target realm // by authenticating as the target user who owns the realm let targetRealmToken = await getRealmToken( matrixURL, @@ -123,30 +130,22 @@ test('factory:go creates a target realm and bootstraps project artifacts end-to- summary.targetRealm.url, ); - let seedIssueUrl = new URL('Issues/bootstrap-seed', summary.targetRealm.url) - .href; - let seedIssueResponse = await fetch(seedIssueUrl, { + let projectUrl = new URL( + 'Projects/sticky-note-mvp', + summary.targetRealm.url, + ).href; + let projectResponse = await fetch(projectUrl, { headers: { Accept: SupportedMimeType.CardSource, Authorization: targetRealmToken, }, }); - expect(seedIssueResponse.ok).toBe(true); - let issueJson = (await seedIssueResponse.json()) as { - data: { - attributes: { - issueType: string; - status: string; - summary: string; - }; - }; + expect(projectResponse.ok).toBe(true); + let projectJson = (await projectResponse.json()) as { + data: { attributes: { projectName: string } }; }; - expect(issueJson.data.attributes.issueType).toBe('bootstrap'); - expect(issueJson.data.attributes.status).toBe('backlog'); - expect(issueJson.data.attributes.summary).toContain( - 'Process brief and create project artifacts', - ); + expect(projectJson.data.attributes.projectName).toBe('Sticky Note MVP'); } finally { await new Promise((r, reject) => briefServer.close((err) => (err ? reject(err) : r())),