Skip to content

Commit 87a5afa

Browse files
ctehannesrudolph
andauthored
Revert "feat(e2e): Enable E2E tests - 39 passing tests" (#10794)
Co-authored-by: Hannes Rudolph <hrudolph@gmail.com>
1 parent f58b908 commit 87a5afa

12 files changed

Lines changed: 2043 additions & 1101 deletions

File tree

apps/vscode-e2e/README.md

Lines changed: 0 additions & 405 deletions
This file was deleted.

apps/vscode-e2e/src/suite/index.ts

Lines changed: 16 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,6 @@ import type { RooCodeAPI } from "@roo-code/types"
77

88
import { waitFor } from "./utils"
99

10-
/**
11-
* Models to test against - high-performing models from different providers
12-
*/
13-
const MODELS_TO_TEST = ["openai/gpt-5.2", "anthropic/claude-sonnet-4.5", "google/gemini-3-pro-preview"]
14-
15-
interface ModelTestResult {
16-
model: string
17-
failures: number
18-
passes: number
19-
duration: number
20-
}
21-
2210
export async function run() {
2311
const extension = vscode.extensions.getExtension<RooCodeAPI>("RooVeterinaryInc.roo-cline")
2412

@@ -28,18 +16,28 @@ export async function run() {
2816

2917
const api = extension.isActive ? extension.exports : await extension.activate()
3018

31-
// Initial configuration with first model (will be reconfigured per model)
3219
await api.setConfiguration({
3320
apiProvider: "openrouter" as const,
3421
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
35-
openRouterModelId: MODELS_TO_TEST[0],
22+
openRouterModelId: "openai/gpt-4.1",
3623
})
3724

3825
await vscode.commands.executeCommand("roo-cline.SidebarProvider.focus")
3926
await waitFor(() => api.isReady())
4027

4128
globalThis.api = api
4229

30+
const mochaOptions: Mocha.MochaOptions = {
31+
ui: "tdd",
32+
timeout: 20 * 60 * 1_000, // 20m
33+
}
34+
35+
if (process.env.TEST_GREP) {
36+
mochaOptions.grep = process.env.TEST_GREP
37+
console.log(`Running tests matching pattern: ${process.env.TEST_GREP}`)
38+
}
39+
40+
const mocha = new Mocha(mochaOptions)
4341
const cwd = path.resolve(__dirname, "..")
4442

4543
let testFiles: string[]
@@ -59,91 +57,9 @@ export async function run() {
5957
throw new Error(`No test files found matching criteria: ${process.env.TEST_FILE || "all tests"}`)
6058
}
6159

62-
const results: ModelTestResult[] = []
63-
let totalFailures = 0
64-
65-
// Run tests for each model sequentially
66-
for (const model of MODELS_TO_TEST) {
67-
console.log(`\n${"=".repeat(60)}`)
68-
console.log(` TESTING WITH MODEL: ${model}`)
69-
console.log(`${"=".repeat(60)}\n`)
70-
71-
// Reconfigure API for this model
72-
await api.setConfiguration({
73-
apiProvider: "openrouter" as const,
74-
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
75-
openRouterModelId: model,
76-
})
77-
78-
// Wait for API to be ready with new configuration
79-
await waitFor(() => api.isReady())
80-
81-
const startTime = Date.now()
82-
83-
const mochaOptions: Mocha.MochaOptions = {
84-
ui: "tdd",
85-
timeout: 20 * 60 * 1_000, // 20m
86-
}
87-
88-
if (process.env.TEST_GREP) {
89-
mochaOptions.grep = process.env.TEST_GREP
90-
console.log(`Running tests matching pattern: ${process.env.TEST_GREP}`)
91-
}
92-
93-
const mocha = new Mocha(mochaOptions)
94-
95-
// Add test files fresh for each model run
96-
testFiles.forEach((testFile) => mocha.addFile(path.resolve(cwd, testFile)))
97-
98-
// Run tests for this model
99-
const modelResult = await new Promise<{ failures: number; passes: number }>((resolve) => {
100-
const runner = mocha.run((failures) => {
101-
resolve({
102-
failures,
103-
passes: runner.stats?.passes ?? 0,
104-
})
105-
})
106-
})
107-
108-
const duration = Date.now() - startTime
109-
110-
results.push({
111-
model,
112-
failures: modelResult.failures,
113-
passes: modelResult.passes,
114-
duration,
115-
})
116-
117-
totalFailures += modelResult.failures
118-
119-
console.log(
120-
`\n[${model}] Completed: ${modelResult.passes} passed, ${modelResult.failures} failed (${(duration / 1000).toFixed(1)}s)\n`,
121-
)
122-
123-
// Clear mocha's require cache to allow re-running tests
124-
mocha.dispose()
125-
testFiles.forEach((testFile) => {
126-
const fullPath = path.resolve(cwd, testFile)
127-
delete require.cache[require.resolve(fullPath)]
128-
})
129-
}
130-
131-
// Print summary
132-
console.log(`\n${"=".repeat(60)}`)
133-
console.log(` MULTI-MODEL TEST SUMMARY`)
134-
console.log(`${"=".repeat(60)}`)
135-
136-
for (const result of results) {
137-
const status = result.failures === 0 ? "✓ PASS" : "✗ FAIL"
138-
console.log(` ${status} ${result.model}`)
139-
console.log(
140-
` ${result.passes} passed, ${result.failures} failed (${(result.duration / 1000).toFixed(1)}s)`,
141-
)
142-
}
60+
testFiles.forEach((testFile) => mocha.addFile(path.resolve(cwd, testFile)))
14361

144-
console.log(`${"=".repeat(60)}\n`)
145-
146-
if (totalFailures > 0) {
147-
throw new Error(`${totalFailures} total test failures across all models.`)
148-
}
62+
return new Promise<void>((resolve, reject) =>
63+
mocha.run((failures) => (failures === 0 ? resolve() : reject(new Error(`${failures} tests failed.`)))),
64+
)
14965
}

apps/vscode-e2e/src/suite/subtasks.test.ts

Lines changed: 47 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -2,92 +2,73 @@ import * as assert from "assert"
22

33
import { RooCodeEventName, type ClineMessage } from "@roo-code/types"
44

5-
import { waitFor } from "./utils"
5+
import { sleep, waitFor, waitUntilCompleted } from "./utils"
66

7-
suite("Roo Code Subtasks", () => {
8-
test("Should create and complete a subtask successfully", async function () {
9-
this.timeout(180_000) // 3 minutes for complex orchestration
7+
suite.skip("Roo Code Subtasks", () => {
8+
test("Should handle subtask cancellation and resumption correctly", async () => {
109
const api = globalThis.api
1110

12-
const messages: ClineMessage[] = []
13-
let childTaskCompleted = false
14-
let parentCompleted = false
11+
const messages: Record<string, ClineMessage[]> = {}
1512

16-
// Listen for messages to detect subtask result
17-
const messageHandler = ({ message }: { message: ClineMessage }) => {
18-
messages.push(message)
19-
20-
// Log completion messages
21-
if (message.type === "say" && message.say === "completion_result") {
22-
console.log("Completion result:", message.text?.substring(0, 100))
23-
}
24-
}
25-
api.on(RooCodeEventName.Message, messageHandler)
26-
27-
// Listen for task completion
28-
const completionHandler = (taskId: string) => {
29-
if (taskId === parentTaskId) {
30-
parentCompleted = true
31-
console.log("✓ Parent task completed")
32-
} else {
33-
childTaskCompleted = true
34-
console.log("✓ Child task completed:", taskId)
13+
api.on(RooCodeEventName.Message, ({ taskId, message }) => {
14+
if (message.type === "say" && message.partial === false) {
15+
messages[taskId] = messages[taskId] || []
16+
messages[taskId].push(message)
3517
}
36-
}
37-
api.on(RooCodeEventName.TaskCompleted, completionHandler)
18+
})
3819

39-
const childPrompt = "What is 2 + 2? Respond with just the number."
20+
const childPrompt = "You are a calculator. Respond only with numbers. What is the square root of 9?"
4021

41-
// Start a parent task that will create a subtask
42-
console.log("Starting parent task that will spawn subtask...")
22+
// Start a parent task that will create a subtask.
4323
const parentTaskId = await api.startNewTask({
4424
configuration: {
45-
mode: "code",
25+
mode: "ask",
4626
alwaysAllowModeSwitch: true,
4727
alwaysAllowSubtasks: true,
4828
autoApprovalEnabled: true,
4929
enableCheckpoints: false,
5030
},
51-
text: `Create a subtask using the new_task tool with this message: "${childPrompt}". Wait for the subtask to complete, then tell me the result.`,
31+
text:
32+
"You are the parent task. " +
33+
`Create a subtask by using the new_task tool with the message '${childPrompt}'.` +
34+
"After creating the subtask, wait for it to complete and then respond 'Parent task resumed'.",
5235
})
5336

54-
try {
55-
// Wait for child task to complete
56-
console.log("Waiting for child task to complete...")
57-
await waitFor(() => childTaskCompleted, { timeout: 90_000 })
58-
console.log("✓ Child task completed")
37+
let spawnedTaskId: string | undefined = undefined
5938

60-
// Wait for parent to complete
61-
console.log("Waiting for parent task to complete...")
62-
await waitFor(() => parentCompleted, { timeout: 90_000 })
63-
console.log("✓ Parent task completed")
39+
// Wait for the subtask to be spawned and then cancel it.
40+
api.on(RooCodeEventName.TaskSpawned, (_, childTaskId) => (spawnedTaskId = childTaskId))
41+
await waitFor(() => !!spawnedTaskId)
42+
await sleep(1_000) // Give the task a chance to start and populate the history.
43+
await api.cancelCurrentTask()
6444

65-
// Verify the parent task mentions the subtask result (should contain "4")
66-
const hasSubtaskResult = messages.some(
67-
(m) =>
68-
m.type === "say" &&
69-
m.say === "completion_result" &&
70-
m.text?.includes("4") &&
71-
m.text?.toLowerCase().includes("subtask"),
72-
)
45+
// Wait a bit to ensure any task resumption would have happened.
46+
await sleep(2_000)
7347

74-
// Verify all events occurred
75-
assert.ok(childTaskCompleted, "Child task should have completed")
76-
assert.ok(parentCompleted, "Parent task should have completed")
77-
assert.ok(hasSubtaskResult, "Parent task should mention the subtask result")
48+
// The parent task should not have resumed yet, so we shouldn't see
49+
// "Parent task resumed".
50+
assert.ok(
51+
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
52+
undefined,
53+
"Parent task should not have resumed after subtask cancellation",
54+
)
7855

79-
console.log("Test passed! Subtask orchestration working correctly")
80-
} finally {
81-
// Clean up
82-
api.off(RooCodeEventName.Message, messageHandler)
83-
api.off(RooCodeEventName.TaskCompleted, completionHandler)
56+
// Start a new task with the same message as the subtask.
57+
const anotherTaskId = await api.startNewTask({ text: childPrompt })
58+
await waitUntilCompleted({ api, taskId: anotherTaskId })
8459

85-
// Cancel any remaining tasks
86-
try {
87-
await api.cancelCurrentTask()
88-
} catch {
89-
// Task might already be complete
90-
}
91-
}
60+
// Wait a bit to ensure any task resumption would have happened.
61+
await sleep(2_000)
62+
63+
// The parent task should still not have resumed.
64+
assert.ok(
65+
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
66+
undefined,
67+
"Parent task should not have resumed after subtask cancellation",
68+
)
69+
70+
// Clean up - cancel all tasks.
71+
await api.clearCurrentTask()
72+
await waitUntilCompleted({ api, taskId: parentTaskId })
9273
})
9374
})

0 commit comments

Comments
 (0)