Skip to content

Commit f433965

Browse files
committed
Add OpenCode runner to BuffBench
1 parent 948dab3 commit f433965

6 files changed

Lines changed: 270 additions & 6 deletions

File tree

evals/buffbench/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ BuffBench supports running external CLI coding agents for comparison:
139139

140140
- **Claude Code**: Use `external:claude` - requires `claude` CLI installed
141141
- **Codex**: Use `external:codex` - requires `codex` CLI installed
142+
- **OpenCode**: Use `external:opencode` - requires `opencode` CLI installed
142143

143144
Example comparing Codebuff vs Claude Code:
144145

@@ -164,6 +165,13 @@ npm install -g @openai/codex
164165
# Set OPENAI_API_KEY environment variable
165166
```
166167

168+
**OpenCode CLI:**
169+
```bash
170+
# Install from https://opencode.ai/docs/install
171+
# Set OPENCODE_API_KEY environment variable
172+
# BuffBench uses opencode/kimi-k2.6 by default; override with OPENCODE_MODEL if needed.
173+
```
174+
167175
## Directory Structure
168176

169177
```

evals/buffbench/agent-runner.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
1-
import { execSync , exec } from 'child_process'
1+
import { execSync, exec } from 'child_process'
22
import { promisify } from 'util'
33

44
const execAsync = promisify(exec)
55

66
import { withTimeout } from '@codebuff/common/util/promise'
77

8-
98
import { withTestRepo } from '../subagents/test-repo-utils'
109
import { ClaudeRunner } from './runners/claude'
1110
import { CodebuffRunner } from './runners/codebuff'
1211
import { CodexRunner } from './runners/codex'
12+
import { OpenCodeRunner } from './runners/opencode'
1313

1414
import type { Runner, AgentStep } from './runners/runner'
1515
import type { EvalCommitV2, FinalCheckOutput } from './types'
1616
import type { CodebuffClient } from '@codebuff/sdk'
1717

1818
export type { AgentStep }
1919

20-
export type ExternalAgentType = 'claude' | 'codex'
20+
export type ExternalAgentType = 'claude' | 'codex' | 'opencode'
2121

2222
export async function runAgentOnCommit({
2323
client,
@@ -76,6 +76,8 @@ export async function runAgentOnCommit({
7676
runner = new ClaudeRunner(repoDir, env)
7777
} else if (externalAgentType === 'codex') {
7878
runner = new CodexRunner(repoDir, env)
79+
} else if (externalAgentType === 'opencode') {
80+
runner = new OpenCodeRunner(repoDir, env)
7981
} else {
8082
runner = new CodebuffRunner({
8183
cwd: repoDir,

evals/buffbench/main.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ async function main() {
88
// Compare Codebuff agents against external CLI agents
99
// Use 'external:claude' for Claude Code CLI
1010
// Use 'external:codex' for OpenAI Codex CLI
11+
// Use 'external:opencode' for OpenCode CLI
1112
await runBuffBench({
1213
evalDataPaths: [path.join(__dirname, 'eval-codebuff.json')],
1314
agents: ['base2-free-evals'],

evals/buffbench/run-buffbench.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,13 @@ function parseAgentId(agent: string): {
2727
} {
2828
if (agent.startsWith('external:')) {
2929
const externalType = agent.slice('external:'.length) as ExternalAgentType
30-
if (externalType !== 'claude' && externalType !== 'codex') {
30+
if (
31+
externalType !== 'claude' &&
32+
externalType !== 'codex' &&
33+
externalType !== 'opencode'
34+
) {
3135
throw new Error(
32-
`Unknown external agent type: ${externalType}. Supported: claude, codex`,
36+
`Unknown external agent type: ${externalType}. Supported: claude, codex, opencode`,
3337
)
3438
}
3539
return { agentId: agent, externalAgentType: externalType }
@@ -187,7 +191,10 @@ async function runTask(options: {
187191
tracesDir,
188192
`${index + 1}-${safeTaskId}-${safeAgentId}-${safeCommitShort}-agent.json`,
189193
)
190-
fs.writeFileSync(agentTracePath, JSON.stringify(agentResult.trace, null, 2))
194+
fs.writeFileSync(
195+
agentTracePath,
196+
JSON.stringify(agentResult.trace, null, 2),
197+
)
191198
}
192199

193200
fs.writeFileSync(

evals/buffbench/runners/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
export { ClaudeRunner } from './claude'
22
export { CodexRunner } from './codex'
3+
export { OpenCodeRunner } from './opencode'
34
export type { Runner, RunnerResult } from './runner'
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
import { execSync, spawn } from 'child_process'
2+
3+
import type { AgentStep, Runner, RunnerResult } from './runner'
4+
import type {
5+
PrintModeToolCall,
6+
PrintModeToolResult,
7+
} from '@codebuff/common/types/print-mode'
8+
import type { JSONValue } from '@codebuff/common/types/json'
9+
10+
const OPENCODE_MODEL = 'opencode/kimi-k2.6'
11+
12+
function toJsonValue(value: unknown): JSONValue {
13+
if (
14+
value === null ||
15+
typeof value === 'string' ||
16+
typeof value === 'number' ||
17+
typeof value === 'boolean'
18+
) {
19+
return value
20+
}
21+
22+
if (Array.isArray(value)) {
23+
return value.map(toJsonValue)
24+
}
25+
26+
if (typeof value === 'object') {
27+
return Object.fromEntries(
28+
Object.entries(value).map(([key, entry]) => [key, toJsonValue(entry)]),
29+
)
30+
}
31+
32+
return String(value)
33+
}
34+
35+
type OpenCodeEvent = {
36+
type?: string
37+
sessionID?: string
38+
error?: {
39+
name?: string
40+
message?: string
41+
statusCode?: number
42+
}
43+
part?: {
44+
id?: string
45+
type?: string
46+
text?: string
47+
tool?: string
48+
callID?: string
49+
state?: {
50+
input?: unknown
51+
output?: unknown
52+
}
53+
cost?: number
54+
}
55+
}
56+
57+
export class OpenCodeRunner implements Runner {
58+
private cwd: string
59+
private env: Record<string, string>
60+
61+
constructor(cwd: string, env: Record<string, string> = {}) {
62+
this.cwd = cwd
63+
this.env = env
64+
}
65+
66+
async run(prompt: string): Promise<RunnerResult> {
67+
const steps: AgentStep[] = []
68+
let totalCostUsd = 0
69+
70+
return new Promise((resolve, reject) => {
71+
let openCodeError: string | undefined
72+
const model =
73+
this.env.OPENCODE_MODEL || process.env.OPENCODE_MODEL || OPENCODE_MODEL
74+
const args = [
75+
'run',
76+
'--model',
77+
model,
78+
'--format',
79+
'json',
80+
'--agent',
81+
'build',
82+
prompt,
83+
]
84+
85+
console.log(`[OpenCodeRunner] Running: opencode run --model ${model}`)
86+
87+
const child = spawn('opencode', args, {
88+
cwd: this.cwd,
89+
env: {
90+
...process.env,
91+
...this.env,
92+
OPENCODE_API_KEY:
93+
this.env.OPENCODE_API_KEY || process.env.OPENCODE_API_KEY,
94+
},
95+
stdio: ['ignore', 'pipe', 'pipe'],
96+
})
97+
98+
let stdoutBuffer = ''
99+
let stderr = ''
100+
101+
const processEvent = (event: OpenCodeEvent) => {
102+
if (event.type === 'error') {
103+
const message =
104+
event.error?.message ||
105+
event.error?.name ||
106+
'OpenCode emitted an error event.'
107+
openCodeError = event.error?.statusCode
108+
? `${message} (status ${event.error.statusCode})`
109+
: message
110+
steps.push({
111+
type: 'text',
112+
text: `[OpenCode error] ${openCodeError}`,
113+
})
114+
return
115+
}
116+
117+
const part = event.part
118+
if (!part) {
119+
return
120+
}
121+
122+
if (event.type === 'text' || part.type === 'text') {
123+
const text = part.text ?? ''
124+
if (text.length > 0) {
125+
steps.push({ type: 'text', text })
126+
process.stdout.write(text)
127+
}
128+
return
129+
}
130+
131+
if (event.type === 'step_finish' || part.type === 'step-finish') {
132+
if (typeof part.cost === 'number') {
133+
totalCostUsd += part.cost
134+
}
135+
return
136+
}
137+
138+
if (part.type === 'tool') {
139+
const toolName = part.tool ?? 'unknown'
140+
const toolCallId = part.callID ?? part.id ?? `opencode-${Date.now()}`
141+
const input = part.state?.input ?? {}
142+
143+
const toolCall: PrintModeToolCall = {
144+
type: 'tool_call',
145+
toolName,
146+
toolCallId,
147+
input:
148+
input && typeof input === 'object'
149+
? (input as Record<string, unknown>)
150+
: { input },
151+
}
152+
steps.push(toolCall)
153+
154+
if (part.state && 'output' in part.state) {
155+
const toolResult: PrintModeToolResult = {
156+
type: 'tool_result',
157+
toolName,
158+
toolCallId,
159+
output: [
160+
{
161+
type: 'json',
162+
value: toJsonValue(part.state.output ?? ''),
163+
},
164+
],
165+
}
166+
steps.push(toolResult)
167+
}
168+
}
169+
}
170+
171+
const processLine = (line: string) => {
172+
if (!line.trim()) {
173+
return
174+
}
175+
176+
try {
177+
processEvent(JSON.parse(line))
178+
} catch {
179+
steps.push({ type: 'text', text: line })
180+
}
181+
}
182+
183+
child.stdout.on('data', (data: Buffer) => {
184+
stdoutBuffer += data.toString()
185+
186+
const lines = stdoutBuffer.split('\n')
187+
stdoutBuffer = lines.pop() ?? ''
188+
for (const line of lines) {
189+
processLine(line)
190+
}
191+
})
192+
193+
child.stderr.on('data', (data: Buffer) => {
194+
stderr += data.toString()
195+
process.stderr.write(data)
196+
})
197+
198+
child.on('error', (error) => {
199+
reject(
200+
new Error(
201+
`OpenCode CLI failed to start: ${error.message}. Make sure 'opencode' is installed and in PATH.`,
202+
),
203+
)
204+
})
205+
206+
child.on('close', (code) => {
207+
if (stdoutBuffer.trim()) {
208+
processLine(stdoutBuffer)
209+
}
210+
211+
let diff = ''
212+
try {
213+
execSync('git add .', { cwd: this.cwd, stdio: 'ignore' })
214+
diff = execSync('git diff HEAD', {
215+
cwd: this.cwd,
216+
encoding: 'utf-8',
217+
maxBuffer: 10 * 1024 * 1024,
218+
})
219+
} catch {
220+
// Ignore git errors
221+
}
222+
223+
if (code !== 0) {
224+
reject(
225+
new Error(
226+
`OpenCode CLI exited with code ${code}. stderr: ${stderr}`,
227+
),
228+
)
229+
return
230+
}
231+
232+
if (openCodeError) {
233+
reject(new Error(openCodeError))
234+
return
235+
}
236+
237+
resolve({
238+
steps,
239+
totalCostUsd,
240+
diff,
241+
})
242+
})
243+
})
244+
}
245+
}

0 commit comments

Comments
 (0)