From c16ab692855ecd770210a54b59f59dda108182be Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 5 May 2026 11:56:07 -0700 Subject: [PATCH] Improve code search output formatting --- .../util/__tests__/format-code-search.test.ts | 60 +++++++++++++ common/src/util/format-code-search.ts | 88 ++++++++++++------- sdk/src/__tests__/code-search.test.ts | 79 +++++++++++++---- sdk/src/tools/code-search.ts | 74 ++++++++++------ 4 files changed, 226 insertions(+), 75 deletions(-) create mode 100644 common/src/util/__tests__/format-code-search.test.ts diff --git a/common/src/util/__tests__/format-code-search.test.ts b/common/src/util/__tests__/format-code-search.test.ts new file mode 100644 index 0000000000..f52e65af17 --- /dev/null +++ b/common/src/util/__tests__/format-code-search.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from 'bun:test' + +import { formatCodeSearchOutput } from '../format-code-search' + +describe('formatCodeSearchOutput', () => { + it('adds a match count and line labels', () => { + const output = formatCodeSearchOutput( + [ + 'src/a.ts:12:const alpha = true', + 'src/a.ts:18:return alpha', + 'src/b.ts:3:export const beta = false', + ].join('\n'), + { matchCount: 3 }, + ) + + expect(output).toBe( + [ + 'Found 3 matches', + 'src/a.ts:', + ' Line 12: const alpha = true', + ' Line 18: return alpha', + '', + 'src/b.ts:', + ' Line 3: export const beta = false', + ].join('\n'), + ) + }) + + it('uses the provided match count instead of counting context lines', () => { + const output = formatCodeSearchOutput( + [ + 'src/a.ts:10:const before = true', + 'src/a.ts:11:const match = true', + 'src/a.ts:12:const after = true', + ].join('\n'), + { matchCount: 1 }, + ) + + expect(output).toContain('Found 1 matches') + expect(output).toContain(' Line 10: const before = true') + expect(output).toContain(' Line 11: const match = true') + expect(output).toContain(' Line 12: const after = true') + }) + + it('does not count native ripgrep context lines as matches', () => { + const output = formatCodeSearchOutput( + [ + 'src/a.ts-10-const before = true', + 'src/a.ts:11:const match = true', + 'src/a.ts-12-const after = true', + ].join('\n'), + ) + + expect(output).toContain('Found 1 matches') + }) + + it('reports zero matches for empty output', () => { + expect(formatCodeSearchOutput('')).toBe('Found 0 matches') + }) +}) diff --git a/common/src/util/format-code-search.ts b/common/src/util/format-code-search.ts index 5b98edec31..8a89a7897e 100644 --- a/common/src/util/format-code-search.ts +++ b/common/src/util/format-code-search.ts @@ -1,24 +1,31 @@ /** * Formats code search output to group matches by file. * - * Input format: ./file.ts:line content + * Input format: ./file.ts:line:content * Output format: + * Found 3 matches * ./file.ts: - * line content - * another line content - * yet another line content + * Line 1: content + * Line 2: another line content + * Line 3: yet another line content * * (double newline between distinct files) * * @param stdout The raw stdout from ripgrep + * @param options.matchCount The number of actual matches, excluding context lines * @returns Formatted output with matches grouped by file */ -export function formatCodeSearchOutput(stdout: string): string { +export function formatCodeSearchOutput( + stdout: string, + options: { matchCount?: number } = {}, +): string { if (!stdout) { - return 'No results' + return 'Found 0 matches' } const lines = stdout.split('\n') - const formatted: string[] = [] + const formatted: string[] = [ + `Found ${options.matchCount ?? countFormattedMatches(lines)} matches`, + ] let currentFile: string | null = null for (const line of lines) { @@ -38,30 +45,13 @@ export function formatCodeSearchOutput(stdout: string): string { // Use regex to find the pattern: separator + digits + separator // This handles filenames with hyphens/colons by matching the line number pattern - let separatorIndex = -1 - let filePath = '' + const parsedLine = parseRipgrepLine(line) - // Try match line pattern: filename:digits:content - const matchLinePattern = /(.*?):(\d+):(.*)$/ - const matchLineMatch = line.match(matchLinePattern) - if (matchLineMatch) { - filePath = matchLineMatch[1] - separatorIndex = matchLineMatch[1].length - } else { - // Try context line pattern: filename-digits-content - const contextLinePattern = /(.*?)-(\d+)-(.*)$/ - const contextLineMatch = line.match(contextLinePattern) - if (contextLineMatch) { - filePath = contextLineMatch[1] - separatorIndex = contextLineMatch[1].length - } - } - - if (separatorIndex === -1) { + if (!parsedLine) { formatted.push(line) continue } - const content = line.substring(separatorIndex) + const { filePath, lineNumber, content } = parsedLine // Check if this is a new file (file paths don't start with whitespace) if (filePath && !filePath.startsWith(' ') && !filePath.startsWith('\t')) { @@ -73,11 +63,9 @@ export function formatCodeSearchOutput(stdout: string): string { currentFile = filePath // Show file path with colon on its own line formatted.push(filePath + ':') - // Show content without leading separator on next line - formatted.push(content.substring(1)) + formatted.push(` Line ${lineNumber}: ${content}`) } else { - // Same file - just show content without leading separator - formatted.push(content.substring(1)) + formatted.push(` Line ${lineNumber}: ${content}`) } } else { // Line doesn't match expected format, keep as-is @@ -87,3 +75,41 @@ export function formatCodeSearchOutput(stdout: string): string { return formatted.join('\n') } + +function parseRipgrepLine(line: string): { + filePath: string + lineNumber: string + content: string + isContext: boolean +} | null { + // Try match line pattern: filename:digits:content + const matchLineMatch = line.match(/(.*?):(\d+):(.*)$/) + if (matchLineMatch) { + return { + filePath: matchLineMatch[1], + lineNumber: matchLineMatch[2], + content: matchLineMatch[3], + isContext: false, + } + } + + // Try context line pattern: filename-digits-content + const contextLineMatch = line.match(/(.*?)-(\d+)-(.*)$/) + if (contextLineMatch) { + return { + filePath: contextLineMatch[1], + lineNumber: contextLineMatch[2], + content: contextLineMatch[3], + isContext: true, + } + } + + return null +} + +function countFormattedMatches(lines: string[]): number { + return lines.filter((line) => { + const parsedLine = parseRipgrepLine(line) + return parsedLine && !parsedLine.isContext + }).length +} diff --git a/sdk/src/__tests__/code-search.test.ts b/sdk/src/__tests__/code-search.test.ts index 2e4d27fcd0..2cad255613 100644 --- a/sdk/src/__tests__/code-search.test.ts +++ b/sdk/src/__tests__/code-search.test.ts @@ -51,7 +51,9 @@ describe('codeSearch', () => { const result = await searchPromise expect(result[0].type).toBe('json') const value = asCodeSearchResult(result[0]) + expect(value.stdout).toContain('Found 3 matches') expect(value.stdout).toContain('file1.ts:') + expect(value.stdout).toContain(' Line 1: import foo from "bar"') expect(value.stdout).toContain('file2.ts:') }) }) @@ -81,6 +83,8 @@ describe('codeSearch', () => { expect(result[0].type).toBe('json') const value = asCodeSearchResult(result[0]) + expect(value.stdout).toContain('Found 2 matches') + // Should contain match lines expect(value.stdout).toContain('import { env } from "./config"') expect(value.stdout).toContain('import env from "process"') @@ -104,7 +108,11 @@ describe('codeSearch', () => { createRgJsonContext('app.ts', 1, 'import React from "react"'), createRgJsonContext('app.ts', 2, ''), createRgJsonMatch('app.ts', 3, 'export const main = () => {}'), - createRgJsonContext('utils.ts', 8, 'function validateInput(x: string) {'), + createRgJsonContext( + 'utils.ts', + 8, + 'function validateInput(x: string) {', + ), createRgJsonContext('utils.ts', 9, ' return x.length > 0'), createRgJsonMatch('utils.ts', 10, 'export function helper() {}'), ].join('\n') @@ -343,6 +351,28 @@ describe('codeSearch', () => { } }) + it('should not report truncation when matches exactly equal maxResults', async () => { + const searchPromise = codeSearch({ + projectPath: '/test/project', + pattern: 'test', + maxResults: 2, + }) + + const output = [ + createRgJsonMatch('file.ts', 1, 'test 1'), + createRgJsonMatch('file.ts', 2, 'test 2'), + ].join('\n') + + mockProcess.stdout.emit('data', Buffer.from(output)) + mockProcess.emit('close', 0) + + const result = await searchPromise + const value = asCodeSearchResult(result[0]) + + expect(value.stdout).toContain('Found 2 matches') + expect(value.stdout).not.toContain('Results limited') + }) + it('should respect globalMaxResults with context lines', async () => { const searchPromise = codeSearch({ projectPath: '/test/project', @@ -447,8 +477,7 @@ describe('codeSearch', () => { const result = await searchPromise const value = asCodeSearchResult(result[0]) - // formatCodeSearchOutput returns 'No results' for empty input - expect(value.stdout).toBe('No results') + expect(value.stdout).toBe('Found 0 matches') }) }) @@ -544,7 +573,13 @@ describe('codeSearch', () => { // Generate matches with long content to quickly exceed output size const matches: string[] = [] for (let i = 0; i < 20; i++) { - matches.push(createRgJsonMatch('file.ts', i, `test line ${i} with some content that is quite long to fill up the buffer quickly`)) + matches.push( + createRgJsonMatch( + 'file.ts', + i, + `test line ${i} with some content that is quite long to fill up the buffer quickly`, + ), + ) } const output = matches.join('\n') @@ -559,8 +594,8 @@ describe('codeSearch', () => { const matchCount = (value.stdout!.match(/test line \d+/g) || []).length expect(matchCount).toBeLessThan(20) // Should indicate truncation happened - const hasTruncationMessage = - value.stdout!.includes('truncated') || + const hasTruncationMessage = + value.stdout!.includes('truncated') || value.stdout!.includes('limit reached') || value.stdout!.includes('Output size limit') expect(hasTruncationMessage).toBe(true) @@ -616,7 +651,7 @@ describe('codeSearch', () => { expect(result[0].type).toBe('json') const value = asCodeSearchResult(result[0]) expect(value.stdout).toContain('file.ts:') - + // Verify the args passed to spawn include the glob flag correctly expect(mockSpawn).toHaveBeenCalled() const spawnArgs = mockSpawn.mock.calls[0]![1] as string[] @@ -631,7 +666,11 @@ describe('codeSearch', () => { flags: '-g *.ts -g *.tsx', }) - const output = createRgJsonMatch('file.tsx', 1, 'import React from "react"') + const output = createRgJsonMatch( + 'file.tsx', + 1, + 'import React from "react"', + ) mockProcess.stdout.emit('data', Buffer.from(output)) mockProcess.emit('close', 0) @@ -640,11 +679,13 @@ describe('codeSearch', () => { expect(result[0].type).toBe('json') const value = asCodeSearchResult(result[0]) expect(value.stdout).toContain('file.tsx:') - + // Verify both glob patterns are passed correctly const spawnArgs = mockSpawn.mock.calls[0]![1] as string[] // Should have two -g flags, each followed by its pattern - const gFlagIndices = spawnArgs.map((arg, i) => arg === '-g' ? i : -1).filter(i => i !== -1) + const gFlagIndices = spawnArgs + .map((arg, i) => (arg === '-g' ? i : -1)) + .filter((i) => i !== -1) expect(gFlagIndices.length).toBe(2) expect(spawnArgs[gFlagIndices[0]! + 1]).toBe('*.ts') expect(spawnArgs[gFlagIndices[1]! + 1]).toBe('*.tsx') @@ -657,7 +698,11 @@ describe('codeSearch', () => { flags: "-g 'authentication.knowledge.md'", }) - const output = createRgJsonMatch('authentication.knowledge.md', 5, 'auth content') + const output = createRgJsonMatch( + 'authentication.knowledge.md', + 5, + 'auth content', + ) mockProcess.stdout.emit('data', Buffer.from(output)) mockProcess.emit('close', 0) @@ -721,13 +766,17 @@ describe('codeSearch', () => { flags: '-g *.ts -i -g *.tsx', }) - const output = createRgJsonMatch('file.tsx', 1, 'import React from "react"') + const output = createRgJsonMatch( + 'file.tsx', + 1, + 'import React from "react"', + ) mockProcess.stdout.emit('data', Buffer.from(output)) mockProcess.emit('close', 0) const result = await searchPromise - + // Verify flags are preserved in order without deduplication const spawnArgs = mockSpawn.mock.calls[0]![1] as string[] const flagsSection = spawnArgs.slice(0, spawnArgs.indexOf('--')) @@ -735,9 +784,9 @@ describe('codeSearch', () => { expect(flagsSection).toContain('*.ts') expect(flagsSection).toContain('-i') expect(flagsSection).toContain('*.tsx') - + // Count -g flags - should be 2, not deduplicated to 1 - const gCount = flagsSection.filter(arg => arg === '-g').length + const gCount = flagsSection.filter((arg) => arg === '-g').length expect(gCount).toBe(2) }) }) diff --git a/sdk/src/tools/code-search.ts b/sdk/src/tools/code-search.ts index 6bd656b6a4..2fa0286d5c 100644 --- a/sdk/src/tools/code-search.ts +++ b/sdk/src/tools/code-search.ts @@ -98,7 +98,10 @@ export function codeSearch({ const rgPath = getBundledRgPath(import.meta.url) if (logger) { - logger.info({ rgPath, args, searchCwd }, 'code-search: Spawning ripgrep process') + logger.info( + { rgPath, args, searchCwd }, + 'code-search: Spawning ripgrep process', + ) } const childProcess = spawn(rgPath, args, { cwd: searchCwd, @@ -111,6 +114,7 @@ export function codeSearch({ const fileGroups = new Map() // Track match count per file separately from total lines const fileMatchCounts = new Map() + const filesLimitedByMaxResults = new Set() let matchesGlobal = 0 let estimatedOutputLen = 0 let killedForLimit = false @@ -140,7 +144,7 @@ export function codeSearch({ const hardKill = () => { try { childProcess.kill('SIGTERM') - } catch { } + } catch {} // Store timeout reference so it can be cleared if process closes normally killTimeoutId = setTimeout(() => { try { @@ -148,12 +152,22 @@ export function codeSearch({ } catch { try { childProcess.kill() - } catch { } + } catch {} } killTimeoutId = null }, 1000) } + const formatCollectedOutput = (rawOutput: string) => + formatCodeSearchOutput(rawOutput, { + matchCount: matchesGlobal, + }) + + const truncateOutput = (output: string, maxLength: number) => + output.length > maxLength + ? output.substring(0, maxLength) + '\n\n[Output truncated]' + : output + const timeoutId = setTimeout(() => { if (isResolved) return hardKill() @@ -165,10 +179,10 @@ export function codeSearch({ } const partialOutput = collectedLines.join('\n') - const truncatedStdout = - partialOutput.length > 1000 - ? partialOutput.substring(0, 1000) + '\n\n[Output truncated]' - : partialOutput + const truncatedStdout = truncateOutput( + formatCollectedOutput(partialOutput), + 1000, + ) const truncatedStderr = stderrBuf.length > 1000 ? stderrBuf.substring(0, 1000) + '\n\n[Error output truncated]' @@ -228,6 +242,9 @@ export function codeSearch({ // For matches: only if we haven't hit the per-file limit // For context: always include (they don't count toward limit) const shouldInclude = !isMatch || fileMatchCount < maxResults + if (isMatch && !shouldInclude) { + filesLimitedByMaxResults.add(filePath) + } if (shouldInclude) { // Add the line to output @@ -253,13 +270,10 @@ export function codeSearch({ limitedLines.push(...lines) } const rawOutput = limitedLines.join('\n') - const formattedOutput = formatCodeSearchOutput(rawOutput) - - const finalOutput = - formattedOutput.length > maxOutputStringLength - ? formattedOutput.substring(0, maxOutputStringLength) + - '\n\n[Output truncated]' - : formattedOutput + const finalOutput = truncateOutput( + formatCollectedOutput(rawOutput), + maxOutputStringLength, + ) const limitReason = matchesGlobal >= globalMaxResults @@ -324,6 +338,13 @@ export function codeSearch({ !isMatch || (fileMatchCount < maxResults && matchesGlobal < globalMaxResults) + if ( + isMatch && + fileMatchCount >= maxResults && + matchesGlobal < globalMaxResults + ) { + filesLimitedByMaxResults.add(filePath) + } if (shouldInclude) { fileLines.push(formattedLine) @@ -335,10 +356,10 @@ export function codeSearch({ } } } - } catch { } + } catch {} } } - } catch { } + } catch {} // Build final output from collected matches const limitedLines: string[] = [] @@ -346,9 +367,7 @@ export function codeSearch({ for (const [filename, fileLines] of fileGroups) { limitedLines.push(...fileLines) - // Note if file was truncated (based on match count, not total lines) - const fileMatchCount = fileMatchCounts.get(filename) ?? 0 - if (fileMatchCount >= maxResults) { + if (filesLimitedByMaxResults.has(filename)) { truncatedFiles.push( `${filename}: limited to ${maxResults} results per file`, ) @@ -374,20 +393,17 @@ export function codeSearch({ rawOutput += `\n\n[${truncationMessages.join('\n\n')}]` } - const formattedOutput = formatCodeSearchOutput(rawOutput) - // Truncate output to prevent memory issues - const truncatedStdout = - formattedOutput.length > maxOutputStringLength - ? formattedOutput.substring(0, maxOutputStringLength) + - '\n\n[Output truncated]' - : formattedOutput + const truncatedStdout = truncateOutput( + formatCollectedOutput(rawOutput), + maxOutputStringLength, + ) const truncatedStderr = stderrBuf ? stderrBuf + - (stderrBuf.length >= Math.floor(maxOutputStringLength / 5) - ? '\n\n[Error output truncated]' - : '') + (stderrBuf.length >= Math.floor(maxOutputStringLength / 5) + ? '\n\n[Error output truncated]' + : '') : '' settle({