diff --git a/.github/workflows/npm-app-release-staging.yml b/.github/workflows/npm-app-release-staging.yml
index 58c6a1ade0..1d556cea6c 100644
--- a/.github/workflows/npm-app-release-staging.yml
+++ b/.github/workflows/npm-app-release-staging.yml
@@ -134,7 +134,7 @@ jobs:
       new-version: ${{ needs.prepare-and-commit-staging.outputs.new_version }}
       artifact-name: updated-staging-package
       checkout-ref: ${{ github.event.pull_request.head.sha }}
-      env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-221-we0m.onrender.com"}'
+      env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-312-3hui.onrender.com"}'
     secrets: inherit
 
   # Create GitHub prerelease with all binaries
diff --git a/backend/package.json b/backend/package.json
index aeac65984e..c2c1bfbac6 100644
--- a/backend/package.json
+++ b/backend/package.json
@@ -24,6 +24,7 @@
   },
   "dependencies": {
     "@ai-sdk/google-vertex": "3.0.6",
+    "benchify": "^0.1.0-alpha.41",
     "@ai-sdk/openai": "2.0.11",
     "@codebuff/billing": "workspace:*",
     "@codebuff/common": "workspace:*",
diff --git a/backend/src/__tests__/process-str-replace.test.ts b/backend/src/__tests__/process-str-replace.test.ts
index 1d5230690c..e10d9e16b3 100644
--- a/backend/src/__tests__/process-str-replace.test.ts
+++ b/backend/src/__tests__/process-str-replace.test.ts
@@ -1,7 +1,22 @@
-import { describe, expect, it } from 'bun:test'
+import { describe, expect, it, spyOn, beforeEach, afterEach, mock } from 'bun:test'
 import { applyPatch } from 'diff'
 
+// Mock the benchify module to simulate missing API key
+mock.module('benchify', () => ({
+  Benchify: class MockBenchify {
+    constructor() {}
+    runFixer() {
+      return Promise.resolve([])
+    }
+  }
+}))
+
 import { processStrReplace } from '../process-str-replace'
+import { mockFileContext } from './test-utils'
+import {
+  executeBatchStrReplaces,
+  benchifyCanFixLanguage,
+} from '../tools/batch-str-replace'
 
 describe('processStrReplace', () => {
   it('should replace exact string matches', async () => {
@@ -213,6 +228,25 @@ describe('processStrReplace', () => {
     }
   })
 
+  it('should handle replacement where old string equals new string', async () => {
+    const initialContent = 'const x = 1;\nconst y = 2;\n'
+    const oldStr = 'const y = 2;'
+    const newStr = 'const y = 2;' // Same as old string
+
+    const result = await processStrReplace(
+      'test.ts',
+      [{ old: oldStr, new: newStr, allowMultiple: false }],
+      Promise.resolve(initialContent),
+    )
+
+    expect(result).not.toBeNull()
+    expect('content' in result).toBe(true)
+    if ('content' in result) {
+      expect(result.content).toBe('const x = 1;\nconst y = 2;\n')
+      expect(result.messages).toEqual([])
+    }
+  })
+
   // New comprehensive tests for allowMultiple functionality
   describe('allowMultiple functionality', () => {
     it('should error when multiple occurrences exist and allowMultiple is false', async () => {
@@ -417,3 +451,150 @@ function test3() {
     )
   })
 })
+
+// Tests for Benchify resilience
+describe('Benchify resilience', () => {
+  describe('happy path', () => {
+    it('should identify Benchify-supported file types correctly', () => {
+      const testCases = [
+        { path: 'component.tsx', expected: true },
+        { path: 'utils.ts', expected: true },
+        { path: 'script.js', expected: true },
+        { path: 'styles.jsx', expected: true },
+        { path: 'README.md', expected: false },
+        { path: 'config.json', expected: false },
+        { path: 'styles.css', expected: false },
+        { path: 'index.html', expected: false },
+        { path: 'test.py', expected: false },
+      ]
+
+      for (const { path, expected } of testCases) {
+        const result = benchifyCanFixLanguage(path)
+        expect(result).toBe(expected)
+      }
+    })
+
+    it('should handle file extensions case sensitivity', () => {
+      expect(benchifyCanFixLanguage('Component.TSX')).toBe(false) // Wrong case
+      expect(benchifyCanFixLanguage('component.tsx')).toBe(true) // Correct case
+      expect(benchifyCanFixLanguage('utils.TS')).toBe(false) // Wrong case
+      expect(benchifyCanFixLanguage('utils.ts')).toBe(true) // Correct case
+    })
+
+    it('should handle file paths with multiple dots', () => {
+      expect(benchifyCanFixLanguage('component.test.tsx')).toBe(true)
+      expect(benchifyCanFixLanguage('utils.spec.ts')).toBe(true)
+      expect(benchifyCanFixLanguage('config.local.js')).toBe(true)
+      expect(benchifyCanFixLanguage('styles.module.css')).toBe(false)
+    })
+
+    it('should handle files without extensions', () => {
+      expect(benchifyCanFixLanguage('Dockerfile')).toBe(false)
+      expect(benchifyCanFixLanguage('Makefile')).toBe(false)
+      expect(benchifyCanFixLanguage('README')).toBe(false)
+    })
+  })
+
+  it('should fall back gracefully when Benchify is disabled', async () => {
+    // Mock the process.env to simulate missing BENCHIFY_API_KEY
+    const originalEnv = process.env.BENCHIFY_API_KEY
+    delete process.env.BENCHIFY_API_KEY
+
+    try {
+      const result = await executeBatchStrReplaces({
+        deferredStrReplaces: [
+          {
+            toolCall: {
+              toolName: 'str_replace' as const,
+              toolCallId: 'test-call',
+              input: {
+                path: 'test.ts',
+                replacements: [
+                  { old: 'old', new: 'new', allowMultiple: false },
+                ],
+              },
+            },
+          },
+        ],
+        toolCalls: [],
+        toolResults: [],
+        ws: {} as any,
+        fileContext: mockFileContext,
+        agentStepId: 'test-step',
+        clientSessionId: 'test-session',
+        userInputId: 'test-input',
+        onResponseChunk: () => {},
+        state: { messages: [] },
+        userId: 'test-user',
+      })
+
+      // Should complete without error even when Benchify is unavailable
+      expect(result).toBeUndefined() // Function returns void
+    } finally {
+      // Restore the original environment variable
+      if (originalEnv !== undefined) {
+        process.env.BENCHIFY_API_KEY = originalEnv
+      }
+    }
+  })
+
+  describe('Batch str_replace integration tests', () => {
+    it('should handle empty deferred list without error', async () => {
+      // Simple test that doesn't require complex mocking
+      expect(
+        executeBatchStrReplaces({
+          deferredStrReplaces: [],
+          toolCalls: [],
+          toolResults: [],
+          ws: {} as any,
+          fileContext: mockFileContext,
+          agentStepId: 'test-step',
+          clientSessionId: 'test-session',
+          userInputId: 'test-input',
+          onResponseChunk: () => {},
+          state: { messages: [] },
+          userId: 'test-user',
+        }),
+      ).resolves.toBeUndefined() // Should complete without throwing
+    })
+  })
+
+  it('should identify Benchify-supported file types correctly', () => {
+    const testCases = [
+      { path: 'component.tsx', expected: true },
+      { path: 'utils.ts', expected: true },
+      { path: 'script.js', expected: true },
+      { path: 'styles.jsx', expected: true },
+      { path: 'README.md', expected: false },
+      { path: 'config.json', expected: false },
+      { path: 'styles.css', expected: false },
+      { path: 'index.html', expected: false },
+      { path: 'test.py', expected: false },
+    ]
+
+    for (const { path, expected } of testCases) {
+      const result = benchifyCanFixLanguage(path)
+      expect(result).toBe(expected)
+    }
+  })
+
+  it('should handle executeBatchStrReplaces with empty list', async () => {
+    // Simple test that doesn't require complex mocking
+    const result = await executeBatchStrReplaces({
+      deferredStrReplaces: [],
+      toolCalls: [],
+      toolResults: [],
+      ws: {} as any,
+      fileContext: mockFileContext,
+      agentStepId: 'test-step',
+      clientSessionId: 'test-session',
+      userInputId: 'test-input',
+      onResponseChunk: () => {},
+      state: { messages: [] },
+      userId: 'test-user',
+    })
+
+    // Should complete without throwing an error
+    expect(result).toBeUndefined() // Function returns void
+  })
+})
diff --git a/backend/src/process-str-replace.ts b/backend/src/process-str-replace.ts
index 16821ac71e..ad26ab4e1c 100644
--- a/backend/src/process-str-replace.ts
+++ b/backend/src/process-str-replace.ts
@@ -35,6 +35,7 @@ export async function processStrReplace(
   let currentContent = initialContent
   let messages: string[] = []
   const lineEnding = currentContent.includes('\r\n') ? '\r\n' : '\n'
+  let anyReplacementSuccessful = false
 
   for (const { old: oldStr, new: newStr, allowMultiple } of replacements) {
     // Regular case: require oldStr for replacements
@@ -59,6 +60,7 @@ export async function processStrReplace(
 
     if (match.success) {
       updatedOldStr = match.oldStr
+      anyReplacementSuccessful = true
     } else {
       messages.push(match.error)
       updatedOldStr = null
@@ -72,15 +74,15 @@ export async function processStrReplace(
 
   currentContent = currentContent.replaceAll('\n', lineEnding)
 
-  if (initialContent === currentContent) {
+  // If no successful replacements occurred, return error
+  if (!anyReplacementSuccessful) {
     logger.debug(
       {
         path,
         initialContent,
       },
-      `processStrReplace: No change to ${path}`,
+      `processStrReplace: No successful replacements for ${path}`,
     )
-    messages.push('No change to the file.')
     return {
       tool: 'str_replace' as const,
       path,
diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts
index 43d49edfd4..bd42d540a8 100644
--- a/backend/src/run-agent-step.ts
+++ b/backend/src/run-agent-step.ts
@@ -327,7 +327,6 @@ export const runAgentStep = async (
     state,
     fullResponse: fullResponseAfterStream,
     fullResponseChunks,
-    messageId,
   } = await processStreamWithTools({
     stream,
     ws,
@@ -435,7 +434,7 @@ export const runAgentStep = async (
     agentState,
     fullResponse,
     shouldEndTurn,
-    messageId,
+    messageId: null,
   }
 }
 
diff --git a/backend/src/tools/batch-str-replace.ts b/backend/src/tools/batch-str-replace.ts
new file mode 100644
index 0000000000..b151040ede
--- /dev/null
+++ b/backend/src/tools/batch-str-replace.ts
@@ -0,0 +1,969 @@
+import { handleStrReplace } from './handlers/tool/str-replace'
+import { getFileProcessingValues } from './handlers/tool/write-file'
+import { logger } from '../util/logger'
+import { Benchify } from 'benchify'
+import { env } from '@codebuff/internal/env'
+import { requestToolCall } from '../websockets/websocket-action'
+import { ParsedDiff, parsePatch } from 'diff'
+import { withRetry, withTimeout } from '@codebuff/common/util/promise'
+import { match, P } from 'ts-pattern'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+
+import type { ProjectFileContext } from '@codebuff/common/util/file'
+import type { WebSocket } from 'ws'
+import { file } from 'bun'
+
+export type DeferredStrReplace = {
+  toolCall: CodebuffToolCall<'str_replace'>
+}
+
+export type BatchStrReplaceState = {
+  deferredStrReplaces: DeferredStrReplace[]
+  otherToolsQueue: any[]
+  strReplacePhaseComplete: boolean
+  failures: any[]
+}
+
+const BENCHIFY_FILE_TYPES = ['tsx', 'ts', 'jsx', 'js']
+const BENCHIFY_TIMEOUT_MS = 3000 // 3 second timeout for Benchify calls
+const BENCHIFY_MAX_FILES = 10 // Maximum files to send to Benchify
+const BENCHIFY_MAX_FILE_SIZE = 1024 * 1024 // 1MB max file size
+
+// Global Benchify client instance
+let benchifyClient: Benchify | null = null
+
+// Circuit breaker state for Benchify
+let benchifyCircuitBreaker = {
+  failureCount: 0,
+  lastFailureTime: 0,
+  isOpen: false,
+  openUntil: 0,
+}
+
+const CIRCUIT_BREAKER_THRESHOLD = 3 // Open circuit after 3 consecutive failures
+const CIRCUIT_BREAKER_TIMEOUT = 60000 // Keep circuit open for 1 minute
+
+export function getBenchifyClient(): Benchify | null {
+  if (!benchifyClient) {
+    let benchifyApiKey: string | undefined
+    try {
+      benchifyApiKey = env.BENCHIFY_API_KEY
+    } catch (error) {
+      logger.warn(
+        {
+          error: error instanceof Error ? error.message : String(error),
+        },
+        'Failed to access BENCHIFY_API_KEY from environment',
+      )
+      return null
+    }
+
+    if (!benchifyApiKey) {
+      return null
+    }
+
+    benchifyClient = new Benchify({
+      apiKey: benchifyApiKey,
+    })
+  }
+  return benchifyClient
+}
+
+type BatchContext = {
+  ws: WebSocket
+  userInputId: string
+  onResponseChunk: (chunk: string | PrintModeEvent) => void
+  state: Record<string, any>
+  originalContents: Record<string, string>
+  editedFiles: Map<string, string>
+  intendedChanges: Map<string, string>
+}
+
+export async function executeBatchStrReplaces({
+  deferredStrReplaces,
+  toolCalls,
+  toolResults,
+  ws,
+  fileContext,
+  agentStepId,
+  clientSessionId,
+  userInputId,
+  onResponseChunk,
+  state,
+  userId,
+}: {
+  deferredStrReplaces: DeferredStrReplace[]
+  toolCalls: (CodebuffToolCall | any)[]
+  toolResults: ToolResultPart[]
+  ws: WebSocket
+  fileContext: ProjectFileContext
+  agentStepId: string
+  clientSessionId: string
+  userInputId: string
+  onResponseChunk: (chunk: string | PrintModeEvent) => void
+  state: Record<string, any>
+  userId: string | undefined
+}) {
+  if (deferredStrReplaces.length === 0) {
+    return
+  }
+
+  // Group operations by file path for per-path processing
+  const operationsByPath = new Map<string, DeferredStrReplace[]>()
+  for (const operation of deferredStrReplaces) {
+    const path = operation.toolCall.input.path
+    if (!operationsByPath.has(path)) {
+      operationsByPath.set(path, [])
+    }
+    operationsByPath.get(path)!.push(operation)
+  }
+
+  // Initialize batch context
+  const batchContext: BatchContext = {
+    ws,
+    userInputId,
+    onResponseChunk,
+    state,
+    originalContents: {},
+    editedFiles: new Map(),
+    intendedChanges: new Map(),
+  }
+
+  // Pre-load original content for all paths that support benchify
+  await preloadOriginalContent(operationsByPath, fileContext, batchContext)
+
+  // Extract intended changes for benchify (before execution)
+  await extractAllIntendedChanges(operationsByPath, batchContext)
+
+  // Execute operations grouped by path for better parallelization
+  const pathPromises = new Map<string, Promise<void>>()
+
+  for (const [path, operations] of operationsByPath) {
+    pathPromises.set(
+      path,
+      processPathOperations(path, operations, {
+        toolCalls,
+        toolResults,
+        agentStepId,
+        batchContext,
+      }),
+    )
+  }
+
+  // Wait for all path-based operations to complete
+  await Promise.all(pathPromises.values())
+
+  // Apply benchify if we have intended changes
+  await applyBenchifyIfNeeded(batchContext, {
+    agentStepId,
+    clientSessionId,
+    userInputId,
+    userId,
+    toolResults,
+    toolCalls: deferredStrReplaces.map((d) => d.toolCall),
+  })
+}
+
+/**
+ * Pre-loads original file content for all paths that support benchify
+ */
+async function preloadOriginalContent(
+  operationsByPath: Map<string, DeferredStrReplace[]>,
+  fileContext: ProjectFileContext,
+  batchContext: BatchContext,
+) {
+  const pathsToLoad = Array.from(operationsByPath.keys()).filter(
+    benchifyCanFixLanguage,
+  )
+
+  await Promise.all(
+    pathsToLoad.map(async (path) => {
+      try {
+        const content = await extractOriginalContent(path, fileContext)
+        if (content) {
+          batchContext.originalContents[path] = content
+        }
+      } catch (error) {
+        logger.warn(
+          {
+            error: error instanceof Error ? error.message : String(error),
+            path,
+          },
+          'Failed to read original content for benchify',
+        )
+      }
+    }),
+  )
+}
+
+/**
+ * Extracts intended changes for all operations (for benchify)
+ */
+async function extractAllIntendedChanges(
+  operationsByPath: Map<string, DeferredStrReplace[]>,
+  batchContext: BatchContext,
+) {
+  for (const [path, operations] of operationsByPath) {
+    if (!benchifyCanFixLanguage(path) || !batchContext.originalContents[path]) {
+      continue
+    }
+
+    try {
+      let currentContent = batchContext.originalContents[path]
+
+      // Apply all operations sequentially to get final intended content
+      for (const { toolCall } of operations) {
+        currentContent =
+          (await extractIntendedContent(toolCall, currentContent)) ||
+          currentContent
+      }
+
+      batchContext.intendedChanges.set(path, currentContent)
+    } catch (error) {
+      logger.warn(
+        { error: error instanceof Error ? error.message : String(error), path },
+        'Failed to extract intended content for benchify',
+      )
+    }
+  }
+}
+
+/**
+ * Processes all operations for a single file path sequentially
+ */
+async function processPathOperations(
+  path: string,
+  operations: DeferredStrReplace[],
+  context: {
+    toolCalls: (CodebuffToolCall | any)[]
+    toolResults: ToolResultPart[]
+    agentStepId: string
+    batchContext: BatchContext
+  },
+) {
+  let previousPromise = Promise.resolve()
+
+  for (let i = 0; i < operations.length; i++) {
+    const { toolCall } = operations[i]
+
+    previousPromise = previousPromise.then(() =>
+      executeSingleStrReplace(toolCall, i + 1, operations.length, context),
+    )
+  }
+
+  await previousPromise
+}
+
+/**
+ * Executes a single str_replace operation with proper error handling
+ */
+async function executeSingleStrReplace(
+  toolCall: CodebuffToolCall<'str_replace'>,
+  operationIndex: number,
+  totalOperations: number,
+  context: {
+    toolCalls: (CodebuffToolCall | any)[]
+    toolResults: ToolResultPart[]
+    agentStepId: string
+    batchContext: BatchContext
+  },
+) {
+  const { batchContext, toolCalls, toolResults, agentStepId } = context
+
+  try {
+    // Create isolated state for each operation
+    const isolatedState = {
+      ...batchContext.state,
+      ws: batchContext.ws,
+      promisesByPath: {},
+      allPromises: [],
+      fileChangeErrors: [],
+      fileChanges: [],
+      firstFileProcessed: false,
+    }
+
+    const { result } = handleStrReplace({
+      previousToolCallFinished: Promise.resolve(),
+      toolCall,
+      requestClientToolCall: createRequestClientToolCall(batchContext),
+      writeToClient: batchContext.onResponseChunk,
+      getLatestState: () => getFileProcessingValues(isolatedState),
+      state: isolatedState,
+    })
+
+    const toolResult = await result
+
+    if (toolResult) {
+      const toolResultPart = createToolResultPart(toolCall, toolResult)
+
+      toolResults.push(toolResultPart)
+      batchContext.onResponseChunk({
+        type: 'tool_result',
+        toolCallId: toolCall.toolCallId,
+        output: toolResult,
+      })
+
+      // Add to message history
+      batchContext.state.messages.push({
+        role: 'tool' as const,
+        content: toolResultPart,
+      })
+
+      // Track edited files for benchify
+      trackEditedFile(toolCall, toolResult, batchContext)
+    }
+
+    toolCalls.push(toolCall)
+  } catch (error) {
+    handleStrReplaceError(error, toolCall, operationIndex, totalOperations, {
+      toolResults,
+      agentStepId,
+      batchContext,
+    })
+  }
+}
+
+/**
+ * Creates a typed requestClientToolCall function for batch mode
+ */
+function createRequestClientToolCall(batchContext: BatchContext) {
+  return async (
+    clientToolCall: any,
+  ): Promise<CodebuffToolOutput<'str_replace'>> => {
+    const result = await requestToolCall(
+      batchContext.ws,
+      batchContext.userInputId,
+      clientToolCall.toolName,
+      clientToolCall.input,
+    )
+    return result.output as CodebuffToolOutput<'str_replace'>
+  }
+}
+
+/**
+ * Creates a properly typed tool result part
+ */
+function createToolResultPart(
+  toolCall: CodebuffToolCall<'str_replace'>,
+  toolResult: CodebuffToolOutput<'str_replace'>,
+): ToolResultPart {
+  return {
+    type: 'tool-result',
+    toolName: 'str_replace',
+    toolCallId: toolCall.toolCallId,
+    output: toolResult,
+  }
+}
+
+/**
+ * Tracks successfully edited files for benchify processing
+ */
+function trackEditedFile(
+  toolCall: CodebuffToolCall<'str_replace'>,
+  toolResult: CodebuffToolOutput<'str_replace'>,
+  batchContext: BatchContext,
+) {
+  if (
+    Array.isArray(toolResult) &&
+    toolResult.length > 0 &&
+    benchifyCanFixLanguage(toolCall.input.path)
+  ) {
+    const result = toolResult[0]
+    if (result.type === 'json' && result.value && 'content' in result.value) {
+      batchContext.editedFiles.set(
+        toolCall.input.path,
+        result.value.content as string,
+      )
+    }
+  }
+}
+
+/**
+ * Handles errors from str_replace operations with proper logging and error results
+ */
+function handleStrReplaceError(
+  error: unknown,
+  toolCall: CodebuffToolCall<'str_replace'>,
+  operationIndex: number,
+  totalOperations: number,
+  context: {
+    toolResults: ToolResultPart[]
+    agentStepId: string
+    batchContext: BatchContext
+  },
+) {
+  const { toolResults, agentStepId, batchContext } = context
+
+  logger.error(
+    {
+      error:
+        error instanceof Error
+          ? {
+              message: error.message,
+              stack: error.stack,
+              name: error.name,
+            }
+          : error,
+      toolCallId: toolCall.toolCallId,
+      path: toolCall.input.path,
+      agentStepId,
+      userInputId: batchContext.userInputId,
+    },
+    `Error executing batched str_replace ${operationIndex}/${totalOperations}`,
+  )
+
+  const errorResult: ToolResultPart = {
+    type: 'tool-result',
+    toolName: 'str_replace',
+    toolCallId: toolCall.toolCallId,
+    output: [
+      {
+        type: 'json',
+        value: {
+          errorMessage: `Batched str_replace failed: ${error instanceof Error ? error.message : String(error)}`,
+        },
+      },
+    ],
+  }
+
+  toolResults.push(errorResult)
+  batchContext.onResponseChunk({
+    type: 'tool_result',
+    toolCallId: toolCall.toolCallId,
+    output: errorResult.output,
+  })
+}
+
+/**
+ * Applies benchify results if there are intended changes (with graceful failure handling)
+ */
+async function applyBenchifyIfNeeded(
+  batchContext: BatchContext,
+  options: {
+    agentStepId: string
+    clientSessionId: string
+    userInputId: string
+    userId: string | undefined
+    toolResults: ToolResultPart[]
+    toolCalls: CodebuffToolCall<'str_replace'>[]
+  },
+) {
+  // Early exit conditions - fail gracefully without blocking user edits
+  if (batchContext.intendedChanges.size === 0) {
+    return
+  }
+
+  // Check circuit breaker
+  if (isBenchifyCircuitOpen()) {
+    logger.debug(
+      {
+        circuitState: benchifyCircuitBreaker,
+        agentStepId: options.agentStepId,
+        userInputId: options.userInputId,
+      },
+      'Benchify circuit breaker is open, skipping call',
+    )
+    return
+  }
+
+  try {
+    // Filter and validate intended changes for Benchify
+    const filteredChanges = filterBenchifyFiles(
+      Array.from(batchContext.intendedChanges.entries()).map(
+        ([path, contents]) => ({ path, contents }),
+      ),
+      options.agentStepId,
+    )
+
+    if (filteredChanges.length === 0) {
+      logger.debug(
+        { agentStepId: options.agentStepId },
+        'No valid files for Benchify after filtering',
+      )
+      return
+    }
+
+    // Call Benchify with timeout and retry logic
+    const benchifyResult = await callBenchifyWithResilience(
+      filteredChanges,
+      options,
+    )
+
+    if (benchifyResult && benchifyResult.length > 0) {
+      logger.info(
+        {
+          benchifyResultCount: benchifyResult.length,
+          diffResults: benchifyResult.length,
+          agentStepId: options.agentStepId,
+          userInputId: options.userInputId,
+        },
+        `executeBatchStrReplaces: Benchify returned ${benchifyResult.length} diff results, applying them`,
+      )
+
+      // Apply results with individual error handling to prevent one failure from blocking others
+      await applyBenchifyResultsGracefully(filteredChanges, benchifyResult, {
+        ws: batchContext.ws,
+        onResponseChunk: batchContext.onResponseChunk,
+        state: {
+          ...batchContext.state,
+          originalContents: batchContext.originalContents,
+        },
+        toolResults: options.toolResults,
+        toolCalls: options.toolCalls,
+        userInputId: options.userInputId,
+        agentStepId: options.agentStepId,
+      })
+    }
+
+    // Reset circuit breaker on success
+    resetBenchifyCircuitBreaker()
+  } catch (error) {
+    // Handle Benchify failure gracefully without blocking user edits
+    handleBenchifyFailure(error, {
+      intendedChangeFiles: Array.from(batchContext.intendedChanges.keys()),
+      agentStepId: options.agentStepId,
+      userInputId: options.userInputId,
+    })
+  }
+}
+
+/**
+ * Filters files for Benchify processing based on size and count limits
+ */
+function filterBenchifyFiles(
+  files: { path: string; contents: string }[],
+  agentStepId: string,
+): { path: string; contents: string }[] {
+  const filtered = files.filter((file) => {
+    // Check file size limit
+    if (file.contents.length > BENCHIFY_MAX_FILE_SIZE) {
+      logger.debug(
+        { path: file.path, size: file.contents.length, agentStepId },
+        'Skipping large file for Benchify',
+      )
+      return false
+    }
+
+    // Check if it's a supported file type
+    if (!benchifyCanFixLanguage(file.path)) {
+      return false
+    }
+
+    return true
+  })
+
+  // Limit the number of files sent to Benchify
+  if (filtered.length > BENCHIFY_MAX_FILES) {
+    logger.debug(
+      {
+        totalFiles: filtered.length,
+        maxFiles: BENCHIFY_MAX_FILES,
+        agentStepId,
+      },
+      'Limiting files sent to Benchify',
+    )
+    return filtered.slice(0, BENCHIFY_MAX_FILES)
+  }
+
+  return filtered
+}
+
+/**
+ * Calls benchify API with timeout and retry logic using common utilities
+ */
+async function callBenchifyWithResilience(
+  editedFiles: { path: string; contents: string }[],
+  context: {
+    agentStepId: string
+    clientSessionId: string
+    userInputId: string
+    userId: string | undefined
+  },
+): Promise<string | null> {
+  const client = getBenchifyClient()
+  if (!client) {
+    return null
+  }
+
+  return await withRetry(
+    async () => {
+      logger.info(
+        {
+          fileCount: editedFiles.length,
+          filePaths: editedFiles.map((f) => f.path),
+          agentStepId: context.agentStepId,
+          userInputId: context.userInputId,
+        },
+        'Calling Benchify API',
+      )
+
+      const diff_response = await withTimeout(
+        client.runFixer(editedFiles, {
+          fixes: ['parsing'],
+          mode: 'files',
+          response_format: 'DIFF',
+        }),
+        BENCHIFY_TIMEOUT_MS,
+        `Benchify call timed out after ${BENCHIFY_TIMEOUT_MS}ms`,
+      )
+      if (diff_response) {
+        return diff_response
+      }
+
+      return null
+    },
+    {
+      maxRetries: 2,
+      retryIf: shouldRetryBenchifyError,
+      onRetry: (error, attempt) => {
+        logger.debug(
+          {
+            error: error instanceof Error ? error.message : String(error),
+            attempt,
+            agentStepId: context.agentStepId,
+          },
+          'Retrying Benchify call',
+        )
+      },
+      retryDelayMs: 100,
+    },
+  )
+}
+
+/**
+ * Determines if a Benchify error should trigger a retry
+ */
+function shouldRetryBenchifyError(error: Error): boolean {
+  const message = error.message.toLowerCase()
+
+  // Retry on network/timeout errors
+  if (
+    message.includes('timeout') ||
+    message.includes('network') ||
+    message.includes('econnreset')
+  ) {
+    return true
+  }
+
+  // Retry on 5xx server errors (but not 4xx client errors)
+  if (
+    message.includes('5') &&
+    (message.includes('error') || message.includes('server'))
+  ) {
+    return true
+  }
+
+  // Don't retry on authentication, rate limit, or client errors
+  return false
+}
+
+/**
+ * Applies benchify results back to the file system with individual error handling
+ */
+async function applyBenchifyResultsGracefully(
+  editedFiles: { path: string; contents: string }[],
+  benchifyDiff: string,
+  context: {
+    ws: WebSocket
+    onResponseChunk: (chunk: string | PrintModeEvent) => void
+    state: Record<string, any>
+    toolResults: ToolResultPart[]
+    toolCalls: CodebuffToolCall<'str_replace'>[]
+    userInputId: string
+    agentStepId: string
+  },
+) {
+  const results = await Promise.allSettled(
+    editedFiles.map((editedFile) => {
+      if (benchifyDiff) {
+        applyBenchifyResultSafely(editedFile, benchifyDiff, context)
+      } else {
+        logger.warn(
+          { file: editedFile.path },
+          'No Benchify diff found for file.',
+        )
+      }
+    }),
+  )
+
+  // Log any failures but don't throw - individual file failures shouldn't block the batch
+  const failures = results.filter((result) => result.status === 'rejected')
+  if (failures.length > 0) {
+    logger.warn(
+      {
+        failureCount: failures.length,
+        totalFiles: editedFiles.length,
+        agentStepId: context.agentStepId,
+      },
+      'Some Benchify results failed to apply',
+    )
+  }
+}
+
+/**
+ * Safely applies a single Benchify result with comprehensive error handling
+ */
+async function applyBenchifyResultSafely(
+  benchifyFile: { path: string; contents: string },
+  benchifyDiff: string,
+  context: {
+    ws: WebSocket
+    onResponseChunk: (chunk: string | PrintModeEvent) => void
+    state: Record<string, any>
+    toolResults: ToolResultPart[]
+    toolCalls: CodebuffToolCall<'str_replace'>[]
+    userInputId: string
+    agentStepId: string
+  },
+): Promise<void> {
+  try {
+    // Find the corresponding tool call for this file
+    const relatedToolCall = context.toolCalls.find(
+      (tc) => tc.input.path === benchifyFile.path,
+    )
+
+    if (!relatedToolCall) {
+      logger.debug(
+        { fileName: benchifyFile.path, agentStepId: context.agentStepId },
+        'No matching tool call found for benchify result',
+      )
+      return
+    }
+
+    // Get the original content, preferring the latest applied content if available
+    let baseContent = context.state.originalContents?.[benchifyFile.path]
+
+    // Try to get more recent content from tool results if available
+    const latestToolResult = context.toolResults
+      .filter(
+        (tr) =>
+          tr.toolName === 'str_replace' &&
+          tr.toolCallId === relatedToolCall.toolCallId,
+      )
+      .pop()
+
+    if (latestToolResult?.output?.[0]?.type === 'json') {
+      const toolValue = latestToolResult.output[0].value
+      if (
+        toolValue &&
+        typeof toolValue === 'object' &&
+        'content' in toolValue
+      ) {
+        baseContent = (toolValue as { content: string }).content
+      }
+    }
+
+    if (!baseContent) {
+      logger.debug(
+        { path: benchifyFile.path, agentStepId: context.agentStepId },
+        'Could not find base content for Benchify diff generation',
+      )
+      return
+    }
+
+    // Apply with timeout to prevent hanging
+    const toolCallResult = await withTimeout(
+      requestToolCall(context.ws, context.userInputId, 'str_replace', {
+        type: 'patch',
+        path: benchifyFile.path,
+        content: benchifyDiff,
+      }),
+      5000,
+      'Benchify patch application timed out',
+    )
+
+    // Create a tool result indicating benchify was applied
+    const benchifyToolResult: ToolResultPart = {
+      type: 'tool-result',
+      toolName: 'str_replace',
+      toolCallId: relatedToolCall.toolCallId,
+      output: toolCallResult.output,
+    }
+
+    // Update the existing tool result
+    const existingResultIndex = context.toolResults.findIndex(
+      (tr) => tr.toolCallId === relatedToolCall.toolCallId,
+    )
+
+    if (existingResultIndex >= 0) {
+      context.toolResults[existingResultIndex] = benchifyToolResult
+    } else {
+      context.toolResults.push(benchifyToolResult)
+    }
+
+    // Notify client about the benchify update
+    context.onResponseChunk({
+      type: 'tool_result',
+      toolCallId: relatedToolCall.toolCallId,
+      output: benchifyToolResult.output,
+    })
+
+    logger.debug(
+      { path: benchifyFile.path, agentStepId: context.agentStepId },
+      'Successfully applied Benchify result',
+    )
+  } catch (error) {
+    // Log but don't throw - individual failures shouldn't block the entire batch
+    logger.warn(
+      {
+        error: error instanceof Error ? error.message : String(error),
+        fileName: benchifyFile.path,
+        agentStepId: context.agentStepId,
+      },
+      'Failed to apply individual Benchify result',
+    )
+  }
+}
+
+/**
+ * Extracts the original file content before any modifications
+ */
+async function extractOriginalContent(
+  filePath: string,
+  fileContext: ProjectFileContext,
+): Promise<string | null> {
+  try {
+    const absolutePath = `${fileContext.projectRoot}/${filePath}`
+    const currentFile = await file(absolutePath)
+    return await currentFile.text()
+  } catch (error) {
+    logger.warn(
+      {
+        error: error instanceof Error ? error.message : String(error),
+        path: filePath,
+      },
+      'Failed to read original file content',
+    )
+    return null
+  }
+}
+
+/**
+ * Extracts the intended file content by applying str_replace operations to the current content
+ */
+async function extractIntendedContent(
+  toolCall: CodebuffToolCall<'str_replace'>,
+  currentContent: string,
+): Promise<string | null> {
+  try {
+    let content = currentContent
+
+    // Apply all replacements to get the intended content
+    for (const replacement of toolCall.input.replacements) {
+      const { old, new: newStr, allowMultiple } = replacement
+
+      if (allowMultiple) {
+        content = content.replaceAll(old, newStr)
+      } else {
+        // Find the first occurrence and replace it
+        const index = content.indexOf(old)
+        if (index !== -1) {
+          content =
+            content.substring(0, index) +
+            newStr +
+            content.substring(index + old.length)
+        } else {
+          // Log warning but continue - this might be expected if operations are interdependent
+          logger.debug(
+            {
+              old: old.substring(0, 100), // Truncate for logging
+              new: newStr.substring(0, 100),
+              path: toolCall.input.path,
+            },
+            'String not found in content during intended content extraction',
+          )
+        }
+      }
+    }
+
+    return content
+  } catch (error) {
+    logger.warn(
+      {
+        error: error instanceof Error ? error.message : String(error),
+        path: toolCall.input.path,
+      },
+      'Failed to apply replacements for intended content extraction',
+    )
+    return null
+  }
+}
+
+/**
+ * Circuit breaker functions for Benchify resilience
+ */
+function isBenchifyCircuitOpen(): boolean {
+  const now = Date.now()
+
+  // Check if circuit should be half-open (reset after timeout)
+  if (benchifyCircuitBreaker.isOpen && now > benchifyCircuitBreaker.openUntil) {
+    benchifyCircuitBreaker.isOpen = false
+    benchifyCircuitBreaker.failureCount = 0
+    logger.debug('Benchify circuit breaker reset to closed state')
+  }
+
+  return benchifyCircuitBreaker.isOpen
+}
+
+function handleBenchifyFailure(
+  error: unknown,
+  context: {
+    intendedChangeFiles: string[]
+    agentStepId: string
+    userInputId: string
+  },
+): void {
+  benchifyCircuitBreaker.failureCount++
+  benchifyCircuitBreaker.lastFailureTime = Date.now()
+
+  // Open circuit if failure threshold exceeded
+  if (benchifyCircuitBreaker.failureCount >= CIRCUIT_BREAKER_THRESHOLD) {
+    benchifyCircuitBreaker.isOpen = true
+    benchifyCircuitBreaker.openUntil = Date.now() + CIRCUIT_BREAKER_TIMEOUT
+
+    logger.warn(
+      {
+        failureCount: benchifyCircuitBreaker.failureCount,
+        circuitOpenUntil: new Date(
+          benchifyCircuitBreaker.openUntil,
+        ).toISOString(),
+        agentStepId: context.agentStepId,
+      },
+      'Benchify circuit breaker opened due to consecutive failures',
+    )
+  }
+
+  // Log error but continue gracefully
+  logger.warn(
+    {
+      error: error instanceof Error ? error.message : String(error),
+      failureCount: benchifyCircuitBreaker.failureCount,
+      intendedChangeFiles: context.intendedChangeFiles,
+      agentStepId: context.agentStepId,
+      userInputId: context.userInputId,
+    },
+    'Benchify call failed, continuing without fixes',
+  )
+}
+
+function resetBenchifyCircuitBreaker(): void {
+  if (benchifyCircuitBreaker.failureCount > 0) {
+    logger.debug(
+      { previousFailures: benchifyCircuitBreaker.failureCount },
+      'Benchify circuit breaker reset after successful call',
+    )
+  }
+
+  benchifyCircuitBreaker.failureCount = 0
+  benchifyCircuitBreaker.isOpen = false
+  benchifyCircuitBreaker.openUntil = 0
+}
+
+export function benchifyCanFixLanguage(path: string): boolean {
+  return BENCHIFY_FILE_TYPES.some((extension) => path.endsWith(`.${extension}`))
+}
diff --git a/backend/src/tools/handlers/tool/write-file.ts b/backend/src/tools/handlers/tool/write-file.ts
index 4b912a0615..261cff72d0 100644
--- a/backend/src/tools/handlers/tool/write-file.ts
+++ b/backend/src/tools/handlers/tool/write-file.ts
@@ -230,7 +230,7 @@ export async function postStreamProcessing<T extends FileProcessingTools>(
   if (errors.length > 0) {
     if (errors.length > 1) {
       throw new Error(
-        `Internal error: Unexpected number of matching errors for ${{ toolCall }}, found ${errors.length}, expected 1`,
+        `Internal error: Unexpected number of matching errors for ${JSON.stringify(toolCall)}, found ${errors.length}, expected 1`,
       )
     }
 
@@ -251,7 +251,7 @@ export async function postStreamProcessing<T extends FileProcessingTools>(
   )
   if (changes.length !== 1) {
     throw new Error(
-      `Internal error: Unexpected number of matching changes for ${{ toolCall }}, found ${changes.length}, expected 1`,
+      `Internal error: Unexpected number of matching changes for ${JSON.stringify(toolCall)}, found ${changes.length}, expected 1`,
     )
   }
 
diff --git a/backend/src/tools/stream-parser.ts b/backend/src/tools/stream-parser.ts
index 69d1bb15e0..94a9c6af93 100644
--- a/backend/src/tools/stream-parser.ts
+++ b/backend/src/tools/stream-parser.ts
@@ -9,9 +9,14 @@ import { generateCompactId } from '@codebuff/common/util/string'
 import { cloneDeep } from 'lodash'
 
 import { expireMessages } from '../util/messages'
+import { logger } from '../util/logger'
 import { sendAction } from '../websockets/websocket-action'
 import { processStreamWithTags } from '../xml-stream-parser'
 import { executeCustomToolCall, executeToolCall } from './tool-executor'
+import {
+  executeBatchStrReplaces,
+  BatchStrReplaceState,
+} from './batch-str-replace'
 
 import type { CustomToolCall } from './tool-executor'
 import type { StreamChunk } from '../llm-apis/vercel-ai-sdk/ai-sdk'
@@ -36,7 +41,7 @@ export type ToolCallError = {
 } & Omit<ToolCallPart, 'type'>
 
 export async function processStreamWithTools(options: {
-  stream: AsyncGenerator<StreamChunk, string | null>
+  stream: AsyncGenerator<StreamChunk>
   ws: WebSocket
   agentStepId: string
   clientSessionId: string
@@ -79,6 +84,15 @@ export async function processStreamWithTools(options: {
   const { promise: streamDonePromise, resolve: resolveStreamDonePromise } =
     Promise.withResolvers<void>()
   let previousToolCallFinished = streamDonePromise
+
+  // Two-phase execution state
+  const batchState: BatchStrReplaceState = {
+    deferredStrReplaces: [],
+    otherToolsQueue: [],
+    strReplacePhaseComplete: false,
+    failures: [],
+  }
+
   const state: Record<string, any> = {
     ws,
     fingerprintId,
@@ -108,25 +122,82 @@ export async function processStreamWithTools(options: {
     return {
       onTagStart: () => {},
       onTagEnd: async (_: string, input: Record<string, string>) => {
-        // delegated to reusable helper
-        previousToolCallFinished = executeToolCall({
-          toolName,
-          input,
-          toolCalls,
-          toolResults,
-          toolResultsToAddAfterStream,
-          previousToolCallFinished,
-          ws,
-          agentTemplate,
-          fileContext,
-          agentStepId,
-          clientSessionId,
-          userInputId,
-          fullResponse: fullResponseChunks.join(''),
-          onResponseChunk,
-          state,
-          userId,
-        })
+        // Two-phase execution: defer str_replace tools, queue others
+        if (toolName === 'str_replace' && !batchState.strReplacePhaseComplete) {
+          // Defer str_replace execution
+          const toolCallId = generateCompactId()
+          const toolCall: CodebuffToolCall<'str_replace'> = {
+            toolName: 'str_replace',
+            input: input as any,
+            toolCallId,
+          }
+
+          batchState.deferredStrReplaces.push({ toolCall })
+
+          // Still emit the tool call event
+          onResponseChunk({
+            type: 'tool_call',
+            toolCallId,
+            toolName,
+            input,
+          })
+        } else {
+          // First non-str_replace tool marks end of str_replace phase
+          if (
+            !batchState.strReplacePhaseComplete &&
+            batchState.deferredStrReplaces.length > 0
+          ) {
+            logger.info(
+              {
+                triggeringTool: toolName,
+                deferredCount: batchState.deferredStrReplaces.length,
+                agentStepId,
+                userInputId,
+              },
+              `toolCallback: Triggering batch str_replace execution (${batchState.deferredStrReplaces.length} deferred tools) due to ${toolName}`,
+            )
+
+            batchState.strReplacePhaseComplete = true
+
+            // Execute all deferred str_replace tools as a batch
+            previousToolCallFinished = previousToolCallFinished.then(
+              async () => {
+                await executeBatchStrReplaces({
+                  deferredStrReplaces: batchState.deferredStrReplaces,
+                  toolCalls,
+                  toolResults,
+                  ws,
+                  fileContext,
+                  agentStepId,
+                  clientSessionId,
+                  userInputId,
+                  onResponseChunk,
+                  state,
+                  userId,
+                })
+              },
+            )
+          }
+
+          previousToolCallFinished = executeToolCall({
+            toolName,
+            input,
+            toolCalls,
+            toolResults,
+            toolResultsToAddAfterStream,
+            previousToolCallFinished,
+            ws,
+            agentTemplate,
+            fileContext,
+            agentStepId,
+            clientSessionId,
+            userInputId,
+            fullResponse: fullResponseChunks.join(''),
+            onResponseChunk,
+            state,
+            userId,
+          })
+        }
       },
     }
   }
@@ -186,14 +257,7 @@ export async function processStreamWithTools(options: {
   )
 
   let reasoning = false
-  let messageId: string | null = null
-  while (true) {
-    const { value: chunk, done } = await streamWithTags.next()
-    if (done) {
-      messageId = chunk
-      break
-    }
-
+  for await (const chunk of streamWithTags) {
     if (chunk.type === 'reasoning') {
       if (!reasoning) {
         reasoning = true
@@ -231,14 +295,66 @@ export async function processStreamWithTools(options: {
   ])
 
   resolveStreamDonePromise()
-  await previousToolCallFinished
 
+  // Handle case where only str_replace tools were generated and stream ended
+  if (
+    !batchState.strReplacePhaseComplete &&
+    batchState.deferredStrReplaces.length > 0
+  ) {
+    logger.info(
+      {
+        triggeringEvent: 'stream_end',
+        deferredCount: batchState.deferredStrReplaces.length,
+        deferredFiles: batchState.deferredStrReplaces.map(
+          (d) => d.toolCall.input.path,
+        ),
+        agentStepId,
+        userInputId,
+      },
+      `stream-parser: Triggering batch str_replace execution (${batchState.deferredStrReplaces.length} deferred tools) due to stream end`,
+    )
+
+    batchState.strReplacePhaseComplete = true
+
+    // Execute all deferred str_replace tools as a batch
+    previousToolCallFinished = previousToolCallFinished.then(async () => {
+      logger.info(
+        {
+          agentStepId,
+          userInputId,
+          deferredCount: batchState.deferredStrReplaces.length,
+        },
+        'stream-parser: About to call executeBatchStrReplaces from stream end handler',
+      )
+      await executeBatchStrReplaces({
+        deferredStrReplaces: batchState.deferredStrReplaces,
+        toolCalls,
+        toolResults,
+        ws,
+        fileContext,
+        agentStepId,
+        clientSessionId,
+        userInputId,
+        onResponseChunk,
+        state,
+        userId,
+      })
+      logger.info(
+        {
+          agentStepId,
+          userInputId,
+        },
+        'stream-parser: Completed executeBatchStrReplaces from stream end handler',
+      )
+    })
+  }
+
+  await previousToolCallFinished
   return {
     toolCalls,
     toolResults,
     state,
     fullResponse: fullResponseChunks.join(''),
     fullResponseChunks,
-    messageId,
   }
 }
diff --git a/bun.lock b/bun.lock
index 1b8053bba8..b05b588d6c 100644
--- a/bun.lock
+++ b/bun.lock
@@ -5,6 +5,7 @@
       "name": "codebuff-project",
       "dependencies": {
         "@t3-oss/env-nextjs": "^0.7.3",
+        "benchify": "^0.1.0-alpha.44",
         "zod": "3.25.67",
       },
       "devDependencies": {
@@ -49,6 +50,7 @@
         "@jitl/quickjs-wasmfile-release-sync": "0.31.0",
         "@openrouter/ai-sdk-provider": "1.1.2",
         "ai": "5.0.0",
+        "benchify": "^0.1.0-alpha.41",
         "cors": "^2.8.5",
         "diff": "5.2.0",
         "dotenv": "16.4.5",
@@ -1632,6 +1634,8 @@
 
     "basic-ftp": ["basic-ftp@5.0.5", "", {}, "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg=="],
 
+    "benchify": ["benchify@0.1.0-alpha.44", "", { "dependencies": { "minimatch": "^9.0.3" }, "peerDependencies": { "react": ">=16.8.0" }, "optionalPeers": ["react"] }, "sha512-sGjAPgGKRCNB5h2fTIMHfKGLDBlGT+wUxVNOPJ5Ss5m0PDdtXdlE60CJAcnb2Z620gk5z9P8xppSjZuxKB731w=="],
+
     "bidi-js": ["bidi-js@1.0.3", "", { "dependencies": { "require-from-string": "^2.0.2" } }, "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw=="],
 
     "big.js": ["big.js@6.2.2", "", {}, "sha512-y/ie+Faknx7sZA5MfGA2xKlu0GDv8RWrXGsmlteyJQ2lvoKv9GBK/fpRMc2qlSoBAgNxrixICFCBefIq8WCQpQ=="],
@@ -4230,6 +4234,8 @@
 
     "babel-plugin-istanbul/istanbul-lib-instrument": ["istanbul-lib-instrument@5.2.1", "", { "dependencies": { "@babel/core": "^7.12.3", "@babel/parser": "^7.14.7", "@istanbuljs/schema": "^0.1.2", "istanbul-lib-coverage": "^3.2.0", "semver": "^6.3.0" } }, "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg=="],
 
+    "benchify/minimatch": ["minimatch@9.0.5", "", { "dependencies": { "brace-expansion": "^2.0.1" } }, "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow=="],
+
     "bl/buffer": ["buffer@5.7.1", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.1.13" } }, "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ=="],
 
     "bl/readable-stream": ["readable-stream@3.6.2", "", { "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", "util-deprecate": "^1.0.1" } }, "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA=="],
@@ -5010,6 +5016,8 @@
 
     "babel-plugin-istanbul/istanbul-lib-instrument/semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="],
 
+    "benchify/minimatch/brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="],
+
     "body-parser/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="],
 
     "chalk/ansi-styles/color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="],
diff --git a/npm-app/src/tool-handlers.ts b/npm-app/src/tool-handlers.ts
index 7e90d81262..cf8c947c83 100644
--- a/npm-app/src/tool-handlers.ts
+++ b/npm-app/src/tool-handlers.ts
@@ -67,16 +67,26 @@ export const handleUpdateFile = async <
     console.log(green(`- Created ${file} ${counts}`))
   }
   for (const file of modified) {
-    // Calculate added/deleted lines from the diff content
+    // Calculate added/deleted lines from the diff content, excluding metadata
     let addedLines = 0
     let deletedLines = 0
-    lines.forEach((line) => {
+
+    for (const line of lines) {
+      // Skip all diff metadata lines (headers, hunk headers, etc.)
+      if (
+        line.startsWith('---') ||
+        line.startsWith('+++') ||
+        line.startsWith('@@')
+      ) {
+        continue
+      }
+      // Count actual added/removed code lines
       if (line.startsWith('+')) {
         addedLines++
       } else if (line.startsWith('-')) {
         deletedLines++
       }
-    })
+    }
 
     const counts = `(${green(`+${addedLines}`)}, ${red(`-${deletedLines}`)})`
     result.push([
diff --git a/package.json b/package.json
index 7249f38774..4280d0c611 100644
--- a/package.json
+++ b/package.json
@@ -39,6 +39,7 @@
   },
   "dependencies": {
     "@t3-oss/env-nextjs": "^0.7.3",
+    "benchify": "^0.1.0-alpha.44",
     "zod": "3.25.67"
   },
   "overrides": {
diff --git a/packages/internal/src/env.ts b/packages/internal/src/env.ts
index ecc4510305..44db06004b 100644
--- a/packages/internal/src/env.ts
+++ b/packages/internal/src/env.ts
@@ -10,6 +10,7 @@ const envSchema = {
   server: {
     // Backend variables
     CODEBUFF_API_KEY: z.string().optional(),
+    BENCHIFY_API_KEY: z.string().optional(),
     OPEN_ROUTER_API_KEY: z.string().min(1),
     RELACE_API_KEY: z.string().min(1),
     LINKUP_API_KEY: z.string().min(1),
@@ -51,6 +52,7 @@ const envSchema = {
   runtimeEnv: {
     // Backend variables
     CODEBUFF_API_KEY: process.env.CODEBUFF_API_KEY,
+    BENCHIFY_API_KEY: process.env.BENCHIFY_API_KEY,
     OPEN_ROUTER_API_KEY: process.env.OPEN_ROUTER_API_KEY,
     RELACE_API_KEY: process.env.RELACE_API_KEY,
     LINKUP_API_KEY: process.env.LINKUP_API_KEY,