Skip to content

Commit b217090

Browse files
authored
Gpt 5.2 + propose_str_replace, propose_write_file (#393)
1 parent ace7f65 commit b217090

40 files changed

+2005
-464
lines changed

.agents/base2/base2.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ export function createBase2(
5454
!isFast && 'suggest_followups',
5555
'str_replace',
5656
'write_file',
57+
'propose_str_replace',
58+
'propose_write_file',
5759
'ask_user',
5860
'set_output',
5961
),
@@ -68,7 +70,7 @@ export function createBase2(
6870
isDefault && 'thinker',
6971
isLite && 'editor-gpt-5',
7072
isDefault && 'editor',
71-
isMax && 'editor-multi-prompt',
73+
isMax && 'editor-multi-prompt2',
7274
isMax && 'thinker-best-of-n-opus',
7375
!isLite && 'code-reviewer',
7476
'context-pruner',
@@ -127,7 +129,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u
127129
(isDefault || isMax) &&
128130
`- Spawn the ${isDefault ? 'thinker' : 'thinker-best-of-n-opus'} after gathering context to solve complex problems or when the user asks you to think about a problem.`,
129131
isMax &&
130-
`- Spawn the editor-multi-prompt agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`,
132+
`- IMPORTANT: You must spawn the editor-multi-prompt2 agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`,
131133
'- Spawn commanders sequentially if the second command depends on the the first.',
132134
!isFast &&
133135
!isLite &&
@@ -181,7 +183,7 @@ ${
181183
? '[ You implement the changes using the str_replace or write_file tools ]'
182184
: isLite
183185
? '[ You implement the changes using the editor-gpt-5 agent ]'
184-
: '[ You implement the changes using the editor-multi-prompt agent ]'
186+
: '[ You implement the changes using the editor-multi-prompt2 agent ]'
185187
}
186188
187189
${
@@ -291,6 +293,8 @@ ${buildArray(
291293
EXPLORE_PROMPT,
292294
isMax &&
293295
`- Important: Read as many files as could possibly be relevant to the task over several steps to improve your understanding of the user's request and produce the best possible code changes. Find more examples within the codebase similar to the user's request, dependencies that help with understanding how things work, tests, etc. This is frequently 12-20 files, depending on the task.`,
296+
isMax &&
297+
'If needed, use the ask_user tool to ask the user for clarification on their request or alternate implementation strategies. It is good to get context on the codebase before asking questions so you can ask informed questions.',
294298
(isDefault || isMax) &&
295299
`- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`,
296300
isDefault &&
@@ -300,7 +304,7 @@ ${buildArray(
300304
isDefault &&
301305
'- IMPORTANT: You must spawn the editor agent to implement the changes after you have gathered all the context you need. This agent will do the best job of implementing the changes so you must spawn it for all non-trivial changes. Do not pass any prompt or params to the editor agent when spawning it. It will make its own best choices of what to do.',
302306
isMax &&
303-
`- IMPORTANT: You must spawn the editor-multi-prompt agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious.`,
307+
`- IMPORTANT: You must spawn the editor-multi-prompt2 agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious.`,
304308
isFast &&
305309
'- Implement the changes using the str_replace or write_file tools. Implement all the changes in one go.',
306310
isFast &&
@@ -334,7 +338,7 @@ function buildImplementationStepPrompt({
334338
isMax &&
335339
`Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`,
336340
isMax &&
337-
`You must spawn the 'editor-multi-prompt' agent to implement code changes, since it will generate the best code changes.`,
341+
`You must spawn the 'editor-multi-prompt2' agent to implement code changes, since it will generate the best code changes.`,
338342
(isDefault || isMax) &&
339343
'Spawn code-reviewer to review the changes after you have implemented the changes and in parallel with typechecking or testing.',
340344
`After completing the user request, summarize your changes in a sentence${isFast ? '' : ' or a few short bullet points'}.${isSonnet ? " Don't create any summary markdown files or example documentation files, unless asked by the user." : ''} Don't repeat yourself, especially if you have already concluded and summarized the changes in a previous step -- just end your turn.`,

.agents/context-pruner.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,12 @@ const definition: AgentDefinition = {
172172
if (lastInstructionsPromptIndex !== -1) {
173173
currentMessages.splice(lastInstructionsPromptIndex, 1)
174174
}
175+
const lastSubagentSpawnIndex = currentMessages.findLastIndex((message) =>
176+
message.tags?.includes('SUBAGENT_SPAWN'),
177+
)
178+
if (lastSubagentSpawnIndex !== -1) {
179+
currentMessages.splice(lastSubagentSpawnIndex, 1)
180+
}
175181

176182
// Initial check - if already under limit, return
177183
const initialTokens = countMessagesTokens(currentMessages)
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import {
2+
PLACEHOLDER,
3+
type SecretAgentDefinition,
4+
} from '../../types/secret-agent-definition'
5+
import { publisher } from '../../constants'
6+
7+
export const createBestOfNSelector2 = (options: {
8+
model: 'sonnet' | 'opus' | 'gpt-5'
9+
}): Omit<SecretAgentDefinition, 'id'> => {
10+
const { model } = options
11+
const isSonnet = model === 'sonnet'
12+
const isOpus = model === 'opus'
13+
const isGpt5 = model === 'gpt-5'
14+
return {
15+
publisher,
16+
model: isSonnet
17+
? 'anthropic/claude-sonnet-4.5'
18+
: isOpus
19+
? 'anthropic/claude-opus-4.5'
20+
: 'openai/gpt-5.2',
21+
...(isGpt5 && {
22+
reasoningOptions: {
23+
effort: 'high',
24+
},
25+
}),
26+
displayName: isGpt5
27+
? 'Best-of-N GPT-5 Diff Selector'
28+
: isOpus
29+
? 'Best-of-N Opus Diff Selector'
30+
: 'Best-of-N Sonnet Diff Selector',
31+
spawnerPrompt:
32+
'Analyzes multiple implementation proposals (as unified diffs) and selects the best one',
33+
34+
includeMessageHistory: true,
35+
inheritParentSystemPrompt: true,
36+
37+
toolNames: ['set_output'],
38+
spawnableAgents: [],
39+
40+
inputSchema: {
41+
params: {
42+
type: 'object',
43+
properties: {
44+
implementations: {
45+
type: 'array',
46+
items: {
47+
type: 'object',
48+
properties: {
49+
id: { type: 'string' },
50+
strategy: { type: 'string' },
51+
content: { type: 'string', description: 'Unified diff of the proposed changes' },
52+
},
53+
required: ['id', 'content'],
54+
},
55+
},
56+
},
57+
required: ['implementations'],
58+
},
59+
},
60+
outputMode: 'structured_output',
61+
outputSchema: {
62+
type: 'object',
63+
properties: {
64+
implementationId: {
65+
type: 'string',
66+
description: 'The id of the chosen implementation',
67+
},
68+
reason: {
69+
type: 'string',
70+
description:
71+
'An extremely short (1 sentence) description of why this implementation was chosen',
72+
},
73+
suggestedImprovements: {
74+
type: 'string',
75+
description:
76+
'A summary of suggested improvements from non-chosen implementations that could enhance the selected implementation. You can also include any new ideas you have to improve upon the selected implementation. Leave empty if no valuable improvements were found.',
77+
},
78+
},
79+
required: ['implementationId', 'reason', 'suggestedImprovements'],
80+
},
81+
82+
instructionsPrompt: `As part of the best-of-n workflow of agents, you are the implementation selector agent.
83+
84+
## Task Instructions
85+
86+
You have been provided with multiple implementation proposals via params. Each implementation shows a UNIFIED DIFF of the proposed changes.
87+
88+
The implementations are available in the params.implementations array, where each has:
89+
- id: A unique identifier for the implementation (A, B, C, etc.)
90+
- strategy: The strategy/approach used for this implementation
91+
- content: The unified diff showing what would change
92+
93+
Your task is to:
94+
1. Analyze each implementation's diff carefully, compare them against the original user requirements
95+
2. Select the best implementation
96+
3. Identify the best ideas/techniques from the NON-CHOSEN implementations that could improve the selected implementation
97+
98+
Evaluate each based on (in order of importance):
99+
- Correctness and completeness in fulfilling the user's request
100+
- Simplicity and maintainability
101+
- Code quality and adherence to project conventions
102+
- Proper reuse of existing code (helper functions, libraries, etc.)
103+
- Minimal changes to existing code (fewer files changed, fewer lines changed)
104+
- Clarity and readability
105+
106+
## Analyzing Non-Chosen Implementations
107+
108+
After selecting the best implementation, look at each non-chosen implementation and identify any valuable aspects that could enhance the selected implementation. These might include:
109+
- More elegant code patterns or abstractions
110+
- Simplified logic or reuse of existing code
111+
- Additional edge case handling
112+
- Better naming or organization
113+
- Useful comments or documentation
114+
- Additional features that align with the user's request
115+
116+
Only include improvements that are genuinely valuable and compatible with the selected implementation. If a non-chosen implementation has no useful improvements to offer, don't include it.
117+
118+
## User Request
119+
120+
For context, here is the original user request again:
121+
<user_message>
122+
${PLACEHOLDER.USER_INPUT_PROMPT}
123+
</user_message>
124+
125+
Try to select an implementation that fulfills all the requirements in the user's request.
126+
127+
## Response Format
128+
129+
${
130+
isSonnet || isOpus
131+
? `Use <think> tags to write out your thoughts about the implementations as needed to pick the best implementation. IMPORTANT: You should think really really hard to make sure you pick the absolute best implementation! Also analyze the non-chosen implementations for any valuable techniques or approaches that could improve the selected one.
132+
133+
Then, do not write any other explanations AT ALL. You should directly output a single tool call to set_output with the selected implementationId, short reason, and suggestedImprovements array.`
134+
: `Output a single tool call to set_output with the selected implementationId, reason, and suggestedImprovements. Do not write anything else.`
135+
}`,
136+
}
137+
}
138+
139+
const definition: SecretAgentDefinition = {
140+
...createBestOfNSelector2({ model: 'opus' }),
141+
id: 'best-of-n-selector2',
142+
}
143+
144+
export default definition

.agents/editor/best-of-n/editor-implementor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ export const createBestOfNImplementor = (options: {
1919
? 'anthropic/claude-opus-4.5'
2020
: isGemini
2121
? 'google/gemini-3-pro-preview'
22-
: 'openai/gpt-5.1',
22+
: 'openai/gpt-5.2',
2323
displayName: 'Implementation Generator',
2424
spawnerPrompt:
2525
'Generates a complete implementation plan with all code changes',
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import { createBestOfNImplementor2 } from './editor-implementor2'
2+
3+
const definition = {
4+
...createBestOfNImplementor2({ model: 'gpt-5' }),
5+
id: 'editor-implementor2-gpt-5',
6+
}
7+
export default definition
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import { publisher } from '../../constants'
2+
3+
import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
4+
5+
export const createBestOfNImplementor2 = (options: {
6+
model: 'gpt-5' | 'opus' | 'sonnet'
7+
}): Omit<SecretAgentDefinition, 'id'> => {
8+
const { model } = options
9+
const isGpt5 = model === 'gpt-5'
10+
const isOpus = model === 'opus'
11+
return {
12+
publisher,
13+
model: isGpt5
14+
? 'openai/gpt-5.2'
15+
: isOpus
16+
? 'anthropic/claude-opus-4.5'
17+
: 'anthropic/claude-sonnet-4.5',
18+
displayName: isGpt5
19+
? 'GPT-5 Implementation Generator v2'
20+
: isOpus
21+
? 'Opus Implementation Generator v2'
22+
: 'Sonnet Implementation Generator v2',
23+
spawnerPrompt:
24+
'Generates a complete implementation using propose_* tools that draft changes without applying them',
25+
26+
includeMessageHistory: true,
27+
inheritParentSystemPrompt: true,
28+
29+
toolNames: ['propose_write_file', 'propose_str_replace'],
30+
spawnableAgents: [],
31+
32+
inputSchema: {},
33+
outputMode: 'structured_output',
34+
35+
instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request.
36+
37+
Your task is to write out ALL the code changes needed to complete the user's request.
38+
39+
IMPORTANT: Use propose_str_replace and propose_write_file tools to make your edits. These tools draft changes without actually applying them - they will be reviewed first.
40+
41+
You can make multiple tool calls across multiple steps to complete the implementation.
42+
43+
After your edit tool calls, you can optionally mention any follow-up steps to take, like deleting a file, or a specific way to validate the changes.
44+
45+
Your implementation should:
46+
- Be complete and comprehensive
47+
- Include all necessary changes to fulfill the user's request
48+
- Follow the project's conventions and patterns
49+
- Be as simple and maintainable as possible
50+
- Reuse existing code wherever possible
51+
- Be well-structured and organized
52+
53+
More style notes:
54+
- Extra try/catch blocks clutter the code -- use them sparingly.
55+
- Optional arguments are code smell and worse than required arguments.
56+
- New components often should be added to a new file, not added to an existing file.
57+
58+
Write out your complete implementation now.`,
59+
60+
handleSteps: function* ({ agentState: initialAgentState }) {
61+
const initialMessageHistoryLength =
62+
initialAgentState.messageHistory.length
63+
64+
// Helper to check if a message is empty (no tool calls and empty/no text)
65+
const isEmptyAssistantMessage = (message: any): boolean => {
66+
if (message.role !== 'assistant' || !Array.isArray(message.content)) {
67+
return false
68+
}
69+
const hasToolCalls = message.content.some(
70+
(part: any) => part.type === 'tool-call',
71+
)
72+
if (hasToolCalls) {
73+
return false
74+
}
75+
// Check if all text parts are empty or there are no text parts
76+
const textParts = message.content.filter(
77+
(part: any) => part.type === 'text',
78+
)
79+
if (textParts.length === 0) {
80+
return true
81+
}
82+
return textParts.every((part: any) => !part.text || !part.text.trim())
83+
}
84+
85+
const { agentState } = yield 'STEP_ALL'
86+
87+
let postMessages = agentState.messageHistory.slice(
88+
initialMessageHistoryLength,
89+
)
90+
91+
// Retry if no messages or if the only message is empty (no tool calls and empty text)
92+
if (postMessages.length === 0) {
93+
const { agentState: postMessagesAgentState } = yield 'STEP_ALL'
94+
postMessages = postMessagesAgentState.messageHistory.slice(
95+
initialMessageHistoryLength,
96+
)
97+
} else if (
98+
postMessages.length === 1 &&
99+
isEmptyAssistantMessage(postMessages[0])
100+
) {
101+
const { agentState: postMessagesAgentState } = yield 'STEP_ALL'
102+
postMessages = postMessagesAgentState.messageHistory.slice(
103+
initialMessageHistoryLength,
104+
)
105+
}
106+
107+
// Extract tool calls from assistant messages
108+
// Handle both 'input' and 'args' property names for compatibility
109+
const toolCalls: { toolName: string; input: any }[] = []
110+
for (const message of postMessages) {
111+
if (message.role !== 'assistant' || !Array.isArray(message.content))
112+
continue
113+
for (const part of message.content) {
114+
if (part.type === 'tool-call') {
115+
toolCalls.push({
116+
toolName: part.toolName,
117+
input: part.input ?? (part as any).args ?? {},
118+
})
119+
}
120+
}
121+
}
122+
123+
// Extract tool results (unified diffs) from tool messages
124+
const toolResults: any[] = []
125+
for (const message of postMessages) {
126+
if (message.role !== 'tool' || !Array.isArray(message.content)) continue
127+
for (const part of message.content) {
128+
if (part.type === 'json' && part.value) {
129+
toolResults.push(part.value)
130+
}
131+
}
132+
}
133+
134+
// Concatenate all unified diffs for the selector to review
135+
const unifiedDiffs = toolResults
136+
.filter((result: any) => result.unifiedDiff)
137+
.map((result: any) => `--- ${result.file} ---\n${result.unifiedDiff}`)
138+
.join('\n\n')
139+
140+
yield {
141+
toolName: 'set_output',
142+
input: {
143+
toolCalls,
144+
toolResults,
145+
unifiedDiffs,
146+
},
147+
includeToolCall: false,
148+
}
149+
},
150+
}
151+
}
152+
const definition = {
153+
...createBestOfNImplementor2({ model: 'opus' }),
154+
id: 'editor-implementor2',
155+
}
156+
export default definition

0 commit comments

Comments
 (0)