diff --git a/.github/workflows/npm-app-release-staging.yml b/.github/workflows/npm-app-release-staging.yml index 58c6a1ade0..1d556cea6c 100644 --- a/.github/workflows/npm-app-release-staging.yml +++ b/.github/workflows/npm-app-release-staging.yml @@ -134,7 +134,7 @@ jobs: new-version: ${{ needs.prepare-and-commit-staging.outputs.new_version }} artifact-name: updated-staging-package checkout-ref: ${{ github.event.pull_request.head.sha }} - env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-221-we0m.onrender.com"}' + env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-312-3hui.onrender.com"}' secrets: inherit # Create GitHub prerelease with all binaries diff --git a/backend/package.json b/backend/package.json index aeac65984e..c2c1bfbac6 100644 --- a/backend/package.json +++ b/backend/package.json @@ -24,6 +24,7 @@ }, "dependencies": { "@ai-sdk/google-vertex": "3.0.6", + "benchify": "^0.1.0-alpha.41", "@ai-sdk/openai": "2.0.11", "@codebuff/billing": "workspace:*", "@codebuff/common": "workspace:*", diff --git a/backend/src/__tests__/process-str-replace.test.ts b/backend/src/__tests__/process-str-replace.test.ts index 1d5230690c..e10d9e16b3 100644 --- a/backend/src/__tests__/process-str-replace.test.ts +++ b/backend/src/__tests__/process-str-replace.test.ts @@ -1,7 +1,22 @@ -import { describe, expect, it } from 'bun:test' +import { describe, expect, it, spyOn, beforeEach, afterEach, mock } from 'bun:test' import { applyPatch } from 'diff' +// Mock the benchify module to simulate missing API key +mock.module('benchify', () => ({ + Benchify: class MockBenchify { + constructor() {} + runFixer() { + return Promise.resolve([]) + } + } +})) + import { processStrReplace } from '../process-str-replace' +import { mockFileContext } from './test-utils' +import { + executeBatchStrReplaces, + benchifyCanFixLanguage, +} from '../tools/batch-str-replace' describe('processStrReplace', () => { it('should replace exact string matches', async () => { @@ -213,6 +228,25 @@ describe('processStrReplace', () => { } }) + it('should handle replacement where old string equals new string', async () => { + const initialContent = 'const x = 1;\nconst y = 2;\n' + const oldStr = 'const y = 2;' + const newStr = 'const y = 2;' // Same as old string + + const result = await processStrReplace( + 'test.ts', + [{ old: oldStr, new: newStr, allowMultiple: false }], + Promise.resolve(initialContent), + ) + + expect(result).not.toBeNull() + expect('content' in result).toBe(true) + if ('content' in result) { + expect(result.content).toBe('const x = 1;\nconst y = 2;\n') + expect(result.messages).toEqual([]) + } + }) + // New comprehensive tests for allowMultiple functionality describe('allowMultiple functionality', () => { it('should error when multiple occurrences exist and allowMultiple is false', async () => { @@ -417,3 +451,150 @@ function test3() { ) }) }) + +// Tests for Benchify resilience +describe('Benchify resilience', () => { + describe('happy path', () => { + it('should identify Benchify-supported file types correctly', () => { + const testCases = [ + { path: 'component.tsx', expected: true }, + { path: 'utils.ts', expected: true }, + { path: 'script.js', expected: true }, + { path: 'styles.jsx', expected: true }, + { path: 'README.md', expected: false }, + { path: 'config.json', expected: false }, + { path: 'styles.css', expected: false }, + { path: 'index.html', expected: false }, + { path: 'test.py', expected: false }, + ] + + for (const { path, expected } of testCases) { + const result = benchifyCanFixLanguage(path) + expect(result).toBe(expected) + } + }) + + it('should handle file extensions case sensitivity', () => { + expect(benchifyCanFixLanguage('Component.TSX')).toBe(false) // Wrong case + expect(benchifyCanFixLanguage('component.tsx')).toBe(true) // Correct case + expect(benchifyCanFixLanguage('utils.TS')).toBe(false) // Wrong case + expect(benchifyCanFixLanguage('utils.ts')).toBe(true) // Correct case + }) + + it('should handle file paths with multiple dots', () => { + expect(benchifyCanFixLanguage('component.test.tsx')).toBe(true) + expect(benchifyCanFixLanguage('utils.spec.ts')).toBe(true) + expect(benchifyCanFixLanguage('config.local.js')).toBe(true) + expect(benchifyCanFixLanguage('styles.module.css')).toBe(false) + }) + + it('should handle files without extensions', () => { + expect(benchifyCanFixLanguage('Dockerfile')).toBe(false) + expect(benchifyCanFixLanguage('Makefile')).toBe(false) + expect(benchifyCanFixLanguage('README')).toBe(false) + }) + }) + + it('should fall back gracefully when Benchify is disabled', async () => { + // Mock the process.env to simulate missing BENCHIFY_API_KEY + const originalEnv = process.env.BENCHIFY_API_KEY + delete process.env.BENCHIFY_API_KEY + + try { + const result = await executeBatchStrReplaces({ + deferredStrReplaces: [ + { + toolCall: { + toolName: 'str_replace' as const, + toolCallId: 'test-call', + input: { + path: 'test.ts', + replacements: [ + { old: 'old', new: 'new', allowMultiple: false }, + ], + }, + }, + }, + ], + toolCalls: [], + toolResults: [], + ws: {} as any, + fileContext: mockFileContext, + agentStepId: 'test-step', + clientSessionId: 'test-session', + userInputId: 'test-input', + onResponseChunk: () => {}, + state: { messages: [] }, + userId: 'test-user', + }) + + // Should complete without error even when Benchify is unavailable + expect(result).toBeUndefined() // Function returns void + } finally { + // Restore the original environment variable + if (originalEnv !== undefined) { + process.env.BENCHIFY_API_KEY = originalEnv + } + } + }) + + describe('Batch str_replace integration tests', () => { + it('should handle empty deferred list without error', async () => { + // Simple test that doesn't require complex mocking + expect( + executeBatchStrReplaces({ + deferredStrReplaces: [], + toolCalls: [], + toolResults: [], + ws: {} as any, + fileContext: mockFileContext, + agentStepId: 'test-step', + clientSessionId: 'test-session', + userInputId: 'test-input', + onResponseChunk: () => {}, + state: { messages: [] }, + userId: 'test-user', + }), + ).resolves.toBeUndefined() // Should complete without throwing + }) + }) + + it('should identify Benchify-supported file types correctly', () => { + const testCases = [ + { path: 'component.tsx', expected: true }, + { path: 'utils.ts', expected: true }, + { path: 'script.js', expected: true }, + { path: 'styles.jsx', expected: true }, + { path: 'README.md', expected: false }, + { path: 'config.json', expected: false }, + { path: 'styles.css', expected: false }, + { path: 'index.html', expected: false }, + { path: 'test.py', expected: false }, + ] + + for (const { path, expected } of testCases) { + const result = benchifyCanFixLanguage(path) + expect(result).toBe(expected) + } + }) + + it('should handle executeBatchStrReplaces with empty list', async () => { + // Simple test that doesn't require complex mocking + const result = await executeBatchStrReplaces({ + deferredStrReplaces: [], + toolCalls: [], + toolResults: [], + ws: {} as any, + fileContext: mockFileContext, + agentStepId: 'test-step', + clientSessionId: 'test-session', + userInputId: 'test-input', + onResponseChunk: () => {}, + state: { messages: [] }, + userId: 'test-user', + }) + + // Should complete without throwing an error + expect(result).toBeUndefined() // Function returns void + }) +}) diff --git a/backend/src/process-str-replace.ts b/backend/src/process-str-replace.ts index 16821ac71e..ad26ab4e1c 100644 --- a/backend/src/process-str-replace.ts +++ b/backend/src/process-str-replace.ts @@ -35,6 +35,7 @@ export async function processStrReplace( let currentContent = initialContent let messages: string[] = [] const lineEnding = currentContent.includes('\r\n') ? '\r\n' : '\n' + let anyReplacementSuccessful = false for (const { old: oldStr, new: newStr, allowMultiple } of replacements) { // Regular case: require oldStr for replacements @@ -59,6 +60,7 @@ export async function processStrReplace( if (match.success) { updatedOldStr = match.oldStr + anyReplacementSuccessful = true } else { messages.push(match.error) updatedOldStr = null @@ -72,15 +74,15 @@ export async function processStrReplace( currentContent = currentContent.replaceAll('\n', lineEnding) - if (initialContent === currentContent) { + // If no successful replacements occurred, return error + if (!anyReplacementSuccessful) { logger.debug( { path, initialContent, }, - `processStrReplace: No change to ${path}`, + `processStrReplace: No successful replacements for ${path}`, ) - messages.push('No change to the file.') return { tool: 'str_replace' as const, path, diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts index 43d49edfd4..bd42d540a8 100644 --- a/backend/src/run-agent-step.ts +++ b/backend/src/run-agent-step.ts @@ -327,7 +327,6 @@ export const runAgentStep = async ( state, fullResponse: fullResponseAfterStream, fullResponseChunks, - messageId, } = await processStreamWithTools({ stream, ws, @@ -435,7 +434,7 @@ export const runAgentStep = async ( agentState, fullResponse, shouldEndTurn, - messageId, + messageId: null, } } diff --git a/backend/src/tools/batch-str-replace.ts b/backend/src/tools/batch-str-replace.ts new file mode 100644 index 0000000000..b151040ede --- /dev/null +++ b/backend/src/tools/batch-str-replace.ts @@ -0,0 +1,969 @@ +import { handleStrReplace } from './handlers/tool/str-replace' +import { getFileProcessingValues } from './handlers/tool/write-file' +import { logger } from '../util/logger' +import { Benchify } from 'benchify' +import { env } from '@codebuff/internal/env' +import { requestToolCall } from '../websockets/websocket-action' +import { ParsedDiff, parsePatch } from 'diff' +import { withRetry, withTimeout } from '@codebuff/common/util/promise' +import { match, P } from 'ts-pattern' +import type { + CodebuffToolCall, + CodebuffToolOutput, +} from '@codebuff/common/tools/list' +import type { ToolResultPart } from '@codebuff/common/types/messages/content-part' +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' + +import type { ProjectFileContext } from '@codebuff/common/util/file' +import type { WebSocket } from 'ws' +import { file } from 'bun' + +export type DeferredStrReplace = { + toolCall: CodebuffToolCall<'str_replace'> +} + +export type BatchStrReplaceState = { + deferredStrReplaces: DeferredStrReplace[] + otherToolsQueue: any[] + strReplacePhaseComplete: boolean + failures: any[] +} + +const BENCHIFY_FILE_TYPES = ['tsx', 'ts', 'jsx', 'js'] +const BENCHIFY_TIMEOUT_MS = 3000 // 3 second timeout for Benchify calls +const BENCHIFY_MAX_FILES = 10 // Maximum files to send to Benchify +const BENCHIFY_MAX_FILE_SIZE = 1024 * 1024 // 1MB max file size + +// Global Benchify client instance +let benchifyClient: Benchify | null = null + +// Circuit breaker state for Benchify +let benchifyCircuitBreaker = { + failureCount: 0, + lastFailureTime: 0, + isOpen: false, + openUntil: 0, +} + +const CIRCUIT_BREAKER_THRESHOLD = 3 // Open circuit after 3 consecutive failures +const CIRCUIT_BREAKER_TIMEOUT = 60000 // Keep circuit open for 1 minute + +export function getBenchifyClient(): Benchify | null { + if (!benchifyClient) { + let benchifyApiKey: string | undefined + try { + benchifyApiKey = env.BENCHIFY_API_KEY + } catch (error) { + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + }, + 'Failed to access BENCHIFY_API_KEY from environment', + ) + return null + } + + if (!benchifyApiKey) { + return null + } + + benchifyClient = new Benchify({ + apiKey: benchifyApiKey, + }) + } + return benchifyClient +} + +type BatchContext = { + ws: WebSocket + userInputId: string + onResponseChunk: (chunk: string | PrintModeEvent) => void + state: Record + originalContents: Record + editedFiles: Map + intendedChanges: Map +} + +export async function executeBatchStrReplaces({ + deferredStrReplaces, + toolCalls, + toolResults, + ws, + fileContext, + agentStepId, + clientSessionId, + userInputId, + onResponseChunk, + state, + userId, +}: { + deferredStrReplaces: DeferredStrReplace[] + toolCalls: (CodebuffToolCall | any)[] + toolResults: ToolResultPart[] + ws: WebSocket + fileContext: ProjectFileContext + agentStepId: string + clientSessionId: string + userInputId: string + onResponseChunk: (chunk: string | PrintModeEvent) => void + state: Record + userId: string | undefined +}) { + if (deferredStrReplaces.length === 0) { + return + } + + // Group operations by file path for per-path processing + const operationsByPath = new Map() + for (const operation of deferredStrReplaces) { + const path = operation.toolCall.input.path + if (!operationsByPath.has(path)) { + operationsByPath.set(path, []) + } + operationsByPath.get(path)!.push(operation) + } + + // Initialize batch context + const batchContext: BatchContext = { + ws, + userInputId, + onResponseChunk, + state, + originalContents: {}, + editedFiles: new Map(), + intendedChanges: new Map(), + } + + // Pre-load original content for all paths that support benchify + await preloadOriginalContent(operationsByPath, fileContext, batchContext) + + // Extract intended changes for benchify (before execution) + await extractAllIntendedChanges(operationsByPath, batchContext) + + // Execute operations grouped by path for better parallelization + const pathPromises = new Map>() + + for (const [path, operations] of operationsByPath) { + pathPromises.set( + path, + processPathOperations(path, operations, { + toolCalls, + toolResults, + agentStepId, + batchContext, + }), + ) + } + + // Wait for all path-based operations to complete + await Promise.all(pathPromises.values()) + + // Apply benchify if we have intended changes + await applyBenchifyIfNeeded(batchContext, { + agentStepId, + clientSessionId, + userInputId, + userId, + toolResults, + toolCalls: deferredStrReplaces.map((d) => d.toolCall), + }) +} + +/** + * Pre-loads original file content for all paths that support benchify + */ +async function preloadOriginalContent( + operationsByPath: Map, + fileContext: ProjectFileContext, + batchContext: BatchContext, +) { + const pathsToLoad = Array.from(operationsByPath.keys()).filter( + benchifyCanFixLanguage, + ) + + await Promise.all( + pathsToLoad.map(async (path) => { + try { + const content = await extractOriginalContent(path, fileContext) + if (content) { + batchContext.originalContents[path] = content + } + } catch (error) { + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + path, + }, + 'Failed to read original content for benchify', + ) + } + }), + ) +} + +/** + * Extracts intended changes for all operations (for benchify) + */ +async function extractAllIntendedChanges( + operationsByPath: Map, + batchContext: BatchContext, +) { + for (const [path, operations] of operationsByPath) { + if (!benchifyCanFixLanguage(path) || !batchContext.originalContents[path]) { + continue + } + + try { + let currentContent = batchContext.originalContents[path] + + // Apply all operations sequentially to get final intended content + for (const { toolCall } of operations) { + currentContent = + (await extractIntendedContent(toolCall, currentContent)) || + currentContent + } + + batchContext.intendedChanges.set(path, currentContent) + } catch (error) { + logger.warn( + { error: error instanceof Error ? error.message : String(error), path }, + 'Failed to extract intended content for benchify', + ) + } + } +} + +/** + * Processes all operations for a single file path sequentially + */ +async function processPathOperations( + path: string, + operations: DeferredStrReplace[], + context: { + toolCalls: (CodebuffToolCall | any)[] + toolResults: ToolResultPart[] + agentStepId: string + batchContext: BatchContext + }, +) { + let previousPromise = Promise.resolve() + + for (let i = 0; i < operations.length; i++) { + const { toolCall } = operations[i] + + previousPromise = previousPromise.then(() => + executeSingleStrReplace(toolCall, i + 1, operations.length, context), + ) + } + + await previousPromise +} + +/** + * Executes a single str_replace operation with proper error handling + */ +async function executeSingleStrReplace( + toolCall: CodebuffToolCall<'str_replace'>, + operationIndex: number, + totalOperations: number, + context: { + toolCalls: (CodebuffToolCall | any)[] + toolResults: ToolResultPart[] + agentStepId: string + batchContext: BatchContext + }, +) { + const { batchContext, toolCalls, toolResults, agentStepId } = context + + try { + // Create isolated state for each operation + const isolatedState = { + ...batchContext.state, + ws: batchContext.ws, + promisesByPath: {}, + allPromises: [], + fileChangeErrors: [], + fileChanges: [], + firstFileProcessed: false, + } + + const { result } = handleStrReplace({ + previousToolCallFinished: Promise.resolve(), + toolCall, + requestClientToolCall: createRequestClientToolCall(batchContext), + writeToClient: batchContext.onResponseChunk, + getLatestState: () => getFileProcessingValues(isolatedState), + state: isolatedState, + }) + + const toolResult = await result + + if (toolResult) { + const toolResultPart = createToolResultPart(toolCall, toolResult) + + toolResults.push(toolResultPart) + batchContext.onResponseChunk({ + type: 'tool_result', + toolCallId: toolCall.toolCallId, + output: toolResult, + }) + + // Add to message history + batchContext.state.messages.push({ + role: 'tool' as const, + content: toolResultPart, + }) + + // Track edited files for benchify + trackEditedFile(toolCall, toolResult, batchContext) + } + + toolCalls.push(toolCall) + } catch (error) { + handleStrReplaceError(error, toolCall, operationIndex, totalOperations, { + toolResults, + agentStepId, + batchContext, + }) + } +} + +/** + * Creates a typed requestClientToolCall function for batch mode + */ +function createRequestClientToolCall(batchContext: BatchContext) { + return async ( + clientToolCall: any, + ): Promise> => { + const result = await requestToolCall( + batchContext.ws, + batchContext.userInputId, + clientToolCall.toolName, + clientToolCall.input, + ) + return result.output as CodebuffToolOutput<'str_replace'> + } +} + +/** + * Creates a properly typed tool result part + */ +function createToolResultPart( + toolCall: CodebuffToolCall<'str_replace'>, + toolResult: CodebuffToolOutput<'str_replace'>, +): ToolResultPart { + return { + type: 'tool-result', + toolName: 'str_replace', + toolCallId: toolCall.toolCallId, + output: toolResult, + } +} + +/** + * Tracks successfully edited files for benchify processing + */ +function trackEditedFile( + toolCall: CodebuffToolCall<'str_replace'>, + toolResult: CodebuffToolOutput<'str_replace'>, + batchContext: BatchContext, +) { + if ( + Array.isArray(toolResult) && + toolResult.length > 0 && + benchifyCanFixLanguage(toolCall.input.path) + ) { + const result = toolResult[0] + if (result.type === 'json' && result.value && 'content' in result.value) { + batchContext.editedFiles.set( + toolCall.input.path, + result.value.content as string, + ) + } + } +} + +/** + * Handles errors from str_replace operations with proper logging and error results + */ +function handleStrReplaceError( + error: unknown, + toolCall: CodebuffToolCall<'str_replace'>, + operationIndex: number, + totalOperations: number, + context: { + toolResults: ToolResultPart[] + agentStepId: string + batchContext: BatchContext + }, +) { + const { toolResults, agentStepId, batchContext } = context + + logger.error( + { + error: + error instanceof Error + ? { + message: error.message, + stack: error.stack, + name: error.name, + } + : error, + toolCallId: toolCall.toolCallId, + path: toolCall.input.path, + agentStepId, + userInputId: batchContext.userInputId, + }, + `Error executing batched str_replace ${operationIndex}/${totalOperations}`, + ) + + const errorResult: ToolResultPart = { + type: 'tool-result', + toolName: 'str_replace', + toolCallId: toolCall.toolCallId, + output: [ + { + type: 'json', + value: { + errorMessage: `Batched str_replace failed: ${error instanceof Error ? error.message : String(error)}`, + }, + }, + ], + } + + toolResults.push(errorResult) + batchContext.onResponseChunk({ + type: 'tool_result', + toolCallId: toolCall.toolCallId, + output: errorResult.output, + }) +} + +/** + * Applies benchify results if there are intended changes (with graceful failure handling) + */ +async function applyBenchifyIfNeeded( + batchContext: BatchContext, + options: { + agentStepId: string + clientSessionId: string + userInputId: string + userId: string | undefined + toolResults: ToolResultPart[] + toolCalls: CodebuffToolCall<'str_replace'>[] + }, +) { + // Early exit conditions - fail gracefully without blocking user edits + if (batchContext.intendedChanges.size === 0) { + return + } + + // Check circuit breaker + if (isBenchifyCircuitOpen()) { + logger.debug( + { + circuitState: benchifyCircuitBreaker, + agentStepId: options.agentStepId, + userInputId: options.userInputId, + }, + 'Benchify circuit breaker is open, skipping call', + ) + return + } + + try { + // Filter and validate intended changes for Benchify + const filteredChanges = filterBenchifyFiles( + Array.from(batchContext.intendedChanges.entries()).map( + ([path, contents]) => ({ path, contents }), + ), + options.agentStepId, + ) + + if (filteredChanges.length === 0) { + logger.debug( + { agentStepId: options.agentStepId }, + 'No valid files for Benchify after filtering', + ) + return + } + + // Call Benchify with timeout and retry logic + const benchifyResult = await callBenchifyWithResilience( + filteredChanges, + options, + ) + + if (benchifyResult && benchifyResult.length > 0) { + logger.info( + { + benchifyResultCount: benchifyResult.length, + diffResults: benchifyResult.length, + agentStepId: options.agentStepId, + userInputId: options.userInputId, + }, + `executeBatchStrReplaces: Benchify returned ${benchifyResult.length} diff results, applying them`, + ) + + // Apply results with individual error handling to prevent one failure from blocking others + await applyBenchifyResultsGracefully(filteredChanges, benchifyResult, { + ws: batchContext.ws, + onResponseChunk: batchContext.onResponseChunk, + state: { + ...batchContext.state, + originalContents: batchContext.originalContents, + }, + toolResults: options.toolResults, + toolCalls: options.toolCalls, + userInputId: options.userInputId, + agentStepId: options.agentStepId, + }) + } + + // Reset circuit breaker on success + resetBenchifyCircuitBreaker() + } catch (error) { + // Handle Benchify failure gracefully without blocking user edits + handleBenchifyFailure(error, { + intendedChangeFiles: Array.from(batchContext.intendedChanges.keys()), + agentStepId: options.agentStepId, + userInputId: options.userInputId, + }) + } +} + +/** + * Filters files for Benchify processing based on size and count limits + */ +function filterBenchifyFiles( + files: { path: string; contents: string }[], + agentStepId: string, +): { path: string; contents: string }[] { + const filtered = files.filter((file) => { + // Check file size limit + if (file.contents.length > BENCHIFY_MAX_FILE_SIZE) { + logger.debug( + { path: file.path, size: file.contents.length, agentStepId }, + 'Skipping large file for Benchify', + ) + return false + } + + // Check if it's a supported file type + if (!benchifyCanFixLanguage(file.path)) { + return false + } + + return true + }) + + // Limit the number of files sent to Benchify + if (filtered.length > BENCHIFY_MAX_FILES) { + logger.debug( + { + totalFiles: filtered.length, + maxFiles: BENCHIFY_MAX_FILES, + agentStepId, + }, + 'Limiting files sent to Benchify', + ) + return filtered.slice(0, BENCHIFY_MAX_FILES) + } + + return filtered +} + +/** + * Calls benchify API with timeout and retry logic using common utilities + */ +async function callBenchifyWithResilience( + editedFiles: { path: string; contents: string }[], + context: { + agentStepId: string + clientSessionId: string + userInputId: string + userId: string | undefined + }, +): Promise { + const client = getBenchifyClient() + if (!client) { + return null + } + + return await withRetry( + async () => { + logger.info( + { + fileCount: editedFiles.length, + filePaths: editedFiles.map((f) => f.path), + agentStepId: context.agentStepId, + userInputId: context.userInputId, + }, + 'Calling Benchify API', + ) + + const diff_response = await withTimeout( + client.runFixer(editedFiles, { + fixes: ['parsing'], + mode: 'files', + response_format: 'DIFF', + }), + BENCHIFY_TIMEOUT_MS, + `Benchify call timed out after ${BENCHIFY_TIMEOUT_MS}ms`, + ) + if (diff_response) { + return diff_response + } + + return null + }, + { + maxRetries: 2, + retryIf: shouldRetryBenchifyError, + onRetry: (error, attempt) => { + logger.debug( + { + error: error instanceof Error ? error.message : String(error), + attempt, + agentStepId: context.agentStepId, + }, + 'Retrying Benchify call', + ) + }, + retryDelayMs: 100, + }, + ) +} + +/** + * Determines if a Benchify error should trigger a retry + */ +function shouldRetryBenchifyError(error: Error): boolean { + const message = error.message.toLowerCase() + + // Retry on network/timeout errors + if ( + message.includes('timeout') || + message.includes('network') || + message.includes('econnreset') + ) { + return true + } + + // Retry on 5xx server errors (but not 4xx client errors) + if ( + message.includes('5') && + (message.includes('error') || message.includes('server')) + ) { + return true + } + + // Don't retry on authentication, rate limit, or client errors + return false +} + +/** + * Applies benchify results back to the file system with individual error handling + */ +async function applyBenchifyResultsGracefully( + editedFiles: { path: string; contents: string }[], + benchifyDiff: string, + context: { + ws: WebSocket + onResponseChunk: (chunk: string | PrintModeEvent) => void + state: Record + toolResults: ToolResultPart[] + toolCalls: CodebuffToolCall<'str_replace'>[] + userInputId: string + agentStepId: string + }, +) { + const results = await Promise.allSettled( + editedFiles.map((editedFile) => { + if (benchifyDiff) { + applyBenchifyResultSafely(editedFile, benchifyDiff, context) + } else { + logger.warn( + { file: editedFile.path }, + 'No Benchify diff found for file.', + ) + } + }), + ) + + // Log any failures but don't throw - individual file failures shouldn't block the batch + const failures = results.filter((result) => result.status === 'rejected') + if (failures.length > 0) { + logger.warn( + { + failureCount: failures.length, + totalFiles: editedFiles.length, + agentStepId: context.agentStepId, + }, + 'Some Benchify results failed to apply', + ) + } +} + +/** + * Safely applies a single Benchify result with comprehensive error handling + */ +async function applyBenchifyResultSafely( + benchifyFile: { path: string; contents: string }, + benchifyDiff: string, + context: { + ws: WebSocket + onResponseChunk: (chunk: string | PrintModeEvent) => void + state: Record + toolResults: ToolResultPart[] + toolCalls: CodebuffToolCall<'str_replace'>[] + userInputId: string + agentStepId: string + }, +): Promise { + try { + // Find the corresponding tool call for this file + const relatedToolCall = context.toolCalls.find( + (tc) => tc.input.path === benchifyFile.path, + ) + + if (!relatedToolCall) { + logger.debug( + { fileName: benchifyFile.path, agentStepId: context.agentStepId }, + 'No matching tool call found for benchify result', + ) + return + } + + // Get the original content, preferring the latest applied content if available + let baseContent = context.state.originalContents?.[benchifyFile.path] + + // Try to get more recent content from tool results if available + const latestToolResult = context.toolResults + .filter( + (tr) => + tr.toolName === 'str_replace' && + tr.toolCallId === relatedToolCall.toolCallId, + ) + .pop() + + if (latestToolResult?.output?.[0]?.type === 'json') { + const toolValue = latestToolResult.output[0].value + if ( + toolValue && + typeof toolValue === 'object' && + 'content' in toolValue + ) { + baseContent = (toolValue as { content: string }).content + } + } + + if (!baseContent) { + logger.debug( + { path: benchifyFile.path, agentStepId: context.agentStepId }, + 'Could not find base content for Benchify diff generation', + ) + return + } + + // Apply with timeout to prevent hanging + const toolCallResult = await withTimeout( + requestToolCall(context.ws, context.userInputId, 'str_replace', { + type: 'patch', + path: benchifyFile.path, + content: benchifyDiff, + }), + 5000, + 'Benchify patch application timed out', + ) + + // Create a tool result indicating benchify was applied + const benchifyToolResult: ToolResultPart = { + type: 'tool-result', + toolName: 'str_replace', + toolCallId: relatedToolCall.toolCallId, + output: toolCallResult.output, + } + + // Update the existing tool result + const existingResultIndex = context.toolResults.findIndex( + (tr) => tr.toolCallId === relatedToolCall.toolCallId, + ) + + if (existingResultIndex >= 0) { + context.toolResults[existingResultIndex] = benchifyToolResult + } else { + context.toolResults.push(benchifyToolResult) + } + + // Notify client about the benchify update + context.onResponseChunk({ + type: 'tool_result', + toolCallId: relatedToolCall.toolCallId, + output: benchifyToolResult.output, + }) + + logger.debug( + { path: benchifyFile.path, agentStepId: context.agentStepId }, + 'Successfully applied Benchify result', + ) + } catch (error) { + // Log but don't throw - individual failures shouldn't block the entire batch + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + fileName: benchifyFile.path, + agentStepId: context.agentStepId, + }, + 'Failed to apply individual Benchify result', + ) + } +} + +/** + * Extracts the original file content before any modifications + */ +async function extractOriginalContent( + filePath: string, + fileContext: ProjectFileContext, +): Promise { + try { + const absolutePath = `${fileContext.projectRoot}/${filePath}` + const currentFile = await file(absolutePath) + return await currentFile.text() + } catch (error) { + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + path: filePath, + }, + 'Failed to read original file content', + ) + return null + } +} + +/** + * Extracts the intended file content by applying str_replace operations to the current content + */ +async function extractIntendedContent( + toolCall: CodebuffToolCall<'str_replace'>, + currentContent: string, +): Promise { + try { + let content = currentContent + + // Apply all replacements to get the intended content + for (const replacement of toolCall.input.replacements) { + const { old, new: newStr, allowMultiple } = replacement + + if (allowMultiple) { + content = content.replaceAll(old, newStr) + } else { + // Find the first occurrence and replace it + const index = content.indexOf(old) + if (index !== -1) { + content = + content.substring(0, index) + + newStr + + content.substring(index + old.length) + } else { + // Log warning but continue - this might be expected if operations are interdependent + logger.debug( + { + old: old.substring(0, 100), // Truncate for logging + new: newStr.substring(0, 100), + path: toolCall.input.path, + }, + 'String not found in content during intended content extraction', + ) + } + } + } + + return content + } catch (error) { + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + path: toolCall.input.path, + }, + 'Failed to apply replacements for intended content extraction', + ) + return null + } +} + +/** + * Circuit breaker functions for Benchify resilience + */ +function isBenchifyCircuitOpen(): boolean { + const now = Date.now() + + // Check if circuit should be half-open (reset after timeout) + if (benchifyCircuitBreaker.isOpen && now > benchifyCircuitBreaker.openUntil) { + benchifyCircuitBreaker.isOpen = false + benchifyCircuitBreaker.failureCount = 0 + logger.debug('Benchify circuit breaker reset to closed state') + } + + return benchifyCircuitBreaker.isOpen +} + +function handleBenchifyFailure( + error: unknown, + context: { + intendedChangeFiles: string[] + agentStepId: string + userInputId: string + }, +): void { + benchifyCircuitBreaker.failureCount++ + benchifyCircuitBreaker.lastFailureTime = Date.now() + + // Open circuit if failure threshold exceeded + if (benchifyCircuitBreaker.failureCount >= CIRCUIT_BREAKER_THRESHOLD) { + benchifyCircuitBreaker.isOpen = true + benchifyCircuitBreaker.openUntil = Date.now() + CIRCUIT_BREAKER_TIMEOUT + + logger.warn( + { + failureCount: benchifyCircuitBreaker.failureCount, + circuitOpenUntil: new Date( + benchifyCircuitBreaker.openUntil, + ).toISOString(), + agentStepId: context.agentStepId, + }, + 'Benchify circuit breaker opened due to consecutive failures', + ) + } + + // Log error but continue gracefully + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + failureCount: benchifyCircuitBreaker.failureCount, + intendedChangeFiles: context.intendedChangeFiles, + agentStepId: context.agentStepId, + userInputId: context.userInputId, + }, + 'Benchify call failed, continuing without fixes', + ) +} + +function resetBenchifyCircuitBreaker(): void { + if (benchifyCircuitBreaker.failureCount > 0) { + logger.debug( + { previousFailures: benchifyCircuitBreaker.failureCount }, + 'Benchify circuit breaker reset after successful call', + ) + } + + benchifyCircuitBreaker.failureCount = 0 + benchifyCircuitBreaker.isOpen = false + benchifyCircuitBreaker.openUntil = 0 +} + +export function benchifyCanFixLanguage(path: string): boolean { + return BENCHIFY_FILE_TYPES.some((extension) => path.endsWith(`.${extension}`)) +} diff --git a/backend/src/tools/handlers/tool/write-file.ts b/backend/src/tools/handlers/tool/write-file.ts index 4b912a0615..261cff72d0 100644 --- a/backend/src/tools/handlers/tool/write-file.ts +++ b/backend/src/tools/handlers/tool/write-file.ts @@ -230,7 +230,7 @@ export async function postStreamProcessing( if (errors.length > 0) { if (errors.length > 1) { throw new Error( - `Internal error: Unexpected number of matching errors for ${{ toolCall }}, found ${errors.length}, expected 1`, + `Internal error: Unexpected number of matching errors for ${JSON.stringify(toolCall)}, found ${errors.length}, expected 1`, ) } @@ -251,7 +251,7 @@ export async function postStreamProcessing( ) if (changes.length !== 1) { throw new Error( - `Internal error: Unexpected number of matching changes for ${{ toolCall }}, found ${changes.length}, expected 1`, + `Internal error: Unexpected number of matching changes for ${JSON.stringify(toolCall)}, found ${changes.length}, expected 1`, ) } diff --git a/backend/src/tools/stream-parser.ts b/backend/src/tools/stream-parser.ts index 69d1bb15e0..94a9c6af93 100644 --- a/backend/src/tools/stream-parser.ts +++ b/backend/src/tools/stream-parser.ts @@ -9,9 +9,14 @@ import { generateCompactId } from '@codebuff/common/util/string' import { cloneDeep } from 'lodash' import { expireMessages } from '../util/messages' +import { logger } from '../util/logger' import { sendAction } from '../websockets/websocket-action' import { processStreamWithTags } from '../xml-stream-parser' import { executeCustomToolCall, executeToolCall } from './tool-executor' +import { + executeBatchStrReplaces, + BatchStrReplaceState, +} from './batch-str-replace' import type { CustomToolCall } from './tool-executor' import type { StreamChunk } from '../llm-apis/vercel-ai-sdk/ai-sdk' @@ -36,7 +41,7 @@ export type ToolCallError = { } & Omit export async function processStreamWithTools(options: { - stream: AsyncGenerator + stream: AsyncGenerator ws: WebSocket agentStepId: string clientSessionId: string @@ -79,6 +84,15 @@ export async function processStreamWithTools(options: { const { promise: streamDonePromise, resolve: resolveStreamDonePromise } = Promise.withResolvers() let previousToolCallFinished = streamDonePromise + + // Two-phase execution state + const batchState: BatchStrReplaceState = { + deferredStrReplaces: [], + otherToolsQueue: [], + strReplacePhaseComplete: false, + failures: [], + } + const state: Record = { ws, fingerprintId, @@ -108,25 +122,82 @@ export async function processStreamWithTools(options: { return { onTagStart: () => {}, onTagEnd: async (_: string, input: Record) => { - // delegated to reusable helper - previousToolCallFinished = executeToolCall({ - toolName, - input, - toolCalls, - toolResults, - toolResultsToAddAfterStream, - previousToolCallFinished, - ws, - agentTemplate, - fileContext, - agentStepId, - clientSessionId, - userInputId, - fullResponse: fullResponseChunks.join(''), - onResponseChunk, - state, - userId, - }) + // Two-phase execution: defer str_replace tools, queue others + if (toolName === 'str_replace' && !batchState.strReplacePhaseComplete) { + // Defer str_replace execution + const toolCallId = generateCompactId() + const toolCall: CodebuffToolCall<'str_replace'> = { + toolName: 'str_replace', + input: input as any, + toolCallId, + } + + batchState.deferredStrReplaces.push({ toolCall }) + + // Still emit the tool call event + onResponseChunk({ + type: 'tool_call', + toolCallId, + toolName, + input, + }) + } else { + // First non-str_replace tool marks end of str_replace phase + if ( + !batchState.strReplacePhaseComplete && + batchState.deferredStrReplaces.length > 0 + ) { + logger.info( + { + triggeringTool: toolName, + deferredCount: batchState.deferredStrReplaces.length, + agentStepId, + userInputId, + }, + `toolCallback: Triggering batch str_replace execution (${batchState.deferredStrReplaces.length} deferred tools) due to ${toolName}`, + ) + + batchState.strReplacePhaseComplete = true + + // Execute all deferred str_replace tools as a batch + previousToolCallFinished = previousToolCallFinished.then( + async () => { + await executeBatchStrReplaces({ + deferredStrReplaces: batchState.deferredStrReplaces, + toolCalls, + toolResults, + ws, + fileContext, + agentStepId, + clientSessionId, + userInputId, + onResponseChunk, + state, + userId, + }) + }, + ) + } + + previousToolCallFinished = executeToolCall({ + toolName, + input, + toolCalls, + toolResults, + toolResultsToAddAfterStream, + previousToolCallFinished, + ws, + agentTemplate, + fileContext, + agentStepId, + clientSessionId, + userInputId, + fullResponse: fullResponseChunks.join(''), + onResponseChunk, + state, + userId, + }) + } }, } } @@ -186,14 +257,7 @@ export async function processStreamWithTools(options: { ) let reasoning = false - let messageId: string | null = null - while (true) { - const { value: chunk, done } = await streamWithTags.next() - if (done) { - messageId = chunk - break - } - + for await (const chunk of streamWithTags) { if (chunk.type === 'reasoning') { if (!reasoning) { reasoning = true @@ -231,14 +295,66 @@ export async function processStreamWithTools(options: { ]) resolveStreamDonePromise() - await previousToolCallFinished + // Handle case where only str_replace tools were generated and stream ended + if ( + !batchState.strReplacePhaseComplete && + batchState.deferredStrReplaces.length > 0 + ) { + logger.info( + { + triggeringEvent: 'stream_end', + deferredCount: batchState.deferredStrReplaces.length, + deferredFiles: batchState.deferredStrReplaces.map( + (d) => d.toolCall.input.path, + ), + agentStepId, + userInputId, + }, + `stream-parser: Triggering batch str_replace execution (${batchState.deferredStrReplaces.length} deferred tools) due to stream end`, + ) + + batchState.strReplacePhaseComplete = true + + // Execute all deferred str_replace tools as a batch + previousToolCallFinished = previousToolCallFinished.then(async () => { + logger.info( + { + agentStepId, + userInputId, + deferredCount: batchState.deferredStrReplaces.length, + }, + 'stream-parser: About to call executeBatchStrReplaces from stream end handler', + ) + await executeBatchStrReplaces({ + deferredStrReplaces: batchState.deferredStrReplaces, + toolCalls, + toolResults, + ws, + fileContext, + agentStepId, + clientSessionId, + userInputId, + onResponseChunk, + state, + userId, + }) + logger.info( + { + agentStepId, + userInputId, + }, + 'stream-parser: Completed executeBatchStrReplaces from stream end handler', + ) + }) + } + + await previousToolCallFinished return { toolCalls, toolResults, state, fullResponse: fullResponseChunks.join(''), fullResponseChunks, - messageId, } } diff --git a/bun.lock b/bun.lock index 1b8053bba8..b05b588d6c 100644 --- a/bun.lock +++ b/bun.lock @@ -5,6 +5,7 @@ "name": "codebuff-project", "dependencies": { "@t3-oss/env-nextjs": "^0.7.3", + "benchify": "^0.1.0-alpha.44", "zod": "3.25.67", }, "devDependencies": { @@ -49,6 +50,7 @@ "@jitl/quickjs-wasmfile-release-sync": "0.31.0", "@openrouter/ai-sdk-provider": "1.1.2", "ai": "5.0.0", + "benchify": "^0.1.0-alpha.41", "cors": "^2.8.5", "diff": "5.2.0", "dotenv": "16.4.5", @@ -1632,6 +1634,8 @@ "basic-ftp": ["basic-ftp@5.0.5", "", {}, "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg=="], + "benchify": ["benchify@0.1.0-alpha.44", "", { "dependencies": { "minimatch": "^9.0.3" }, "peerDependencies": { "react": ">=16.8.0" }, "optionalPeers": ["react"] }, "sha512-sGjAPgGKRCNB5h2fTIMHfKGLDBlGT+wUxVNOPJ5Ss5m0PDdtXdlE60CJAcnb2Z620gk5z9P8xppSjZuxKB731w=="], + "bidi-js": ["bidi-js@1.0.3", "", { "dependencies": { "require-from-string": "^2.0.2" } }, "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw=="], "big.js": ["big.js@6.2.2", "", {}, "sha512-y/ie+Faknx7sZA5MfGA2xKlu0GDv8RWrXGsmlteyJQ2lvoKv9GBK/fpRMc2qlSoBAgNxrixICFCBefIq8WCQpQ=="], @@ -4230,6 +4234,8 @@ "babel-plugin-istanbul/istanbul-lib-instrument": ["istanbul-lib-instrument@5.2.1", "", { "dependencies": { "@babel/core": "^7.12.3", "@babel/parser": "^7.14.7", "@istanbuljs/schema": "^0.1.2", "istanbul-lib-coverage": "^3.2.0", "semver": "^6.3.0" } }, "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg=="], + "benchify/minimatch": ["minimatch@9.0.5", "", { "dependencies": { "brace-expansion": "^2.0.1" } }, "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow=="], + "bl/buffer": ["buffer@5.7.1", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.1.13" } }, "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ=="], "bl/readable-stream": ["readable-stream@3.6.2", "", { "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", "util-deprecate": "^1.0.1" } }, "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA=="], @@ -5010,6 +5016,8 @@ "babel-plugin-istanbul/istanbul-lib-instrument/semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="], + "benchify/minimatch/brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="], + "body-parser/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="], "chalk/ansi-styles/color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="], diff --git a/npm-app/src/tool-handlers.ts b/npm-app/src/tool-handlers.ts index 7e90d81262..cf8c947c83 100644 --- a/npm-app/src/tool-handlers.ts +++ b/npm-app/src/tool-handlers.ts @@ -67,16 +67,26 @@ export const handleUpdateFile = async < console.log(green(`- Created ${file} ${counts}`)) } for (const file of modified) { - // Calculate added/deleted lines from the diff content + // Calculate added/deleted lines from the diff content, excluding metadata let addedLines = 0 let deletedLines = 0 - lines.forEach((line) => { + + for (const line of lines) { + // Skip all diff metadata lines (headers, hunk headers, etc.) + if ( + line.startsWith('---') || + line.startsWith('+++') || + line.startsWith('@@') + ) { + continue + } + // Count actual added/removed code lines if (line.startsWith('+')) { addedLines++ } else if (line.startsWith('-')) { deletedLines++ } - }) + } const counts = `(${green(`+${addedLines}`)}, ${red(`-${deletedLines}`)})` result.push([ diff --git a/package.json b/package.json index 7249f38774..4280d0c611 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ }, "dependencies": { "@t3-oss/env-nextjs": "^0.7.3", + "benchify": "^0.1.0-alpha.44", "zod": "3.25.67" }, "overrides": { diff --git a/packages/internal/src/env.ts b/packages/internal/src/env.ts index ecc4510305..44db06004b 100644 --- a/packages/internal/src/env.ts +++ b/packages/internal/src/env.ts @@ -10,6 +10,7 @@ const envSchema = { server: { // Backend variables CODEBUFF_API_KEY: z.string().optional(), + BENCHIFY_API_KEY: z.string().optional(), OPEN_ROUTER_API_KEY: z.string().min(1), RELACE_API_KEY: z.string().min(1), LINKUP_API_KEY: z.string().min(1), @@ -51,6 +52,7 @@ const envSchema = { runtimeEnv: { // Backend variables CODEBUFF_API_KEY: process.env.CODEBUFF_API_KEY, + BENCHIFY_API_KEY: process.env.BENCHIFY_API_KEY, OPEN_ROUTER_API_KEY: process.env.OPEN_ROUTER_API_KEY, RELACE_API_KEY: process.env.RELACE_API_KEY, LINKUP_API_KEY: process.env.LINKUP_API_KEY,