|
| 1 | +import { afterEach, describe, expect, it } from 'vitest'; |
| 2 | +import { promises as fs } from 'fs'; |
| 3 | +import os from 'os'; |
| 4 | +import path from 'path'; |
| 5 | +import { CodebaseIndexer } from '../src/core/indexer.js'; |
| 6 | +import { analyzerRegistry } from '../src/core/analyzer-registry.js'; |
| 7 | +import { GenericAnalyzer } from '../src/analyzers/generic/index.js'; |
| 8 | +import { |
| 9 | + CODEBASE_CONTEXT_DIRNAME, |
| 10 | + KEYWORD_INDEX_FILENAME |
| 11 | +} from '../src/constants/codebase-context.js'; |
| 12 | + |
| 13 | +describe('Indexer exclude patterns — nested directories', () => { |
| 14 | + let tempDir: string; |
| 15 | + |
| 16 | + afterEach(async () => { |
| 17 | + if (tempDir) await fs.rm(tempDir, { recursive: true, force: true }); |
| 18 | + }); |
| 19 | + |
| 20 | + it('excludes nested coverage, worktrees, .claude, and dist directories', async () => { |
| 21 | + analyzerRegistry.register(new GenericAnalyzer()); |
| 22 | + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'indexer-exclude-patterns-')); |
| 23 | + |
| 24 | + // Legitimate source file |
| 25 | + await fs.mkdir(path.join(tempDir, 'src'), { recursive: true }); |
| 26 | + await fs.writeFile( |
| 27 | + path.join(tempDir, 'src', 'app.ts'), |
| 28 | + 'export function main() { return "hello"; }\n' |
| 29 | + ); |
| 30 | + |
| 31 | + // Polluters — nested paths that should be excluded |
| 32 | + const polluters = [ |
| 33 | + ['packages', 'ui', 'coverage', 'prettify.js'], |
| 34 | + ['.claude', 'worktrees', 'branch', 'src', 'app.ts'], |
| 35 | + ['worktrees', 'portal30-pr', 'src', 'real.ts'], |
| 36 | + ['apps', 'web', 'dist', 'bundle.js'] |
| 37 | + ]; |
| 38 | + |
| 39 | + for (const segments of polluters) { |
| 40 | + const dir = path.join(tempDir, ...segments.slice(0, -1)); |
| 41 | + await fs.mkdir(dir, { recursive: true }); |
| 42 | + await fs.writeFile(path.join(tempDir, ...segments), 'export const polluter = true;\n'); |
| 43 | + } |
| 44 | + |
| 45 | + const indexer = new CodebaseIndexer({ |
| 46 | + rootPath: tempDir, |
| 47 | + config: { |
| 48 | + skipEmbedding: true, |
| 49 | + parsing: { |
| 50 | + maxFileSize: 1048576, |
| 51 | + chunkSize: 50, |
| 52 | + chunkOverlap: 0, |
| 53 | + parseTests: true, |
| 54 | + parseNodeModules: false |
| 55 | + } |
| 56 | + } |
| 57 | + }); |
| 58 | + |
| 59 | + await indexer.index(); |
| 60 | + |
| 61 | + const indexPath = path.join(tempDir, CODEBASE_CONTEXT_DIRNAME, KEYWORD_INDEX_FILENAME); |
| 62 | + const indexRaw = JSON.parse(await fs.readFile(indexPath, 'utf-8')) as Record<string, unknown>; |
| 63 | + |
| 64 | + let chunks: Array<{ filePath: string }>; |
| 65 | + if (Array.isArray(indexRaw)) { |
| 66 | + chunks = indexRaw; |
| 67 | + } else if (Array.isArray(indexRaw?.chunks)) { |
| 68 | + chunks = indexRaw.chunks as Array<{ filePath: string }>; |
| 69 | + } else { |
| 70 | + throw new Error( |
| 71 | + `Unexpected index format: keys=${JSON.stringify(Object.keys(indexRaw ?? {}))}` |
| 72 | + ); |
| 73 | + } |
| 74 | + |
| 75 | + const indexedPaths = chunks.map((chunk) => chunk.filePath); |
| 76 | + |
| 77 | + // The legitimate file must be indexed |
| 78 | + expect(indexedPaths.some((p) => p.includes('src/app.ts') || p.includes('src\\app.ts'))).toBe( |
| 79 | + true |
| 80 | + ); |
| 81 | + |
| 82 | + // None of the polluter paths should appear |
| 83 | + const polluterMarkers = ['coverage', '.claude', 'worktrees', 'dist']; |
| 84 | + for (const marker of polluterMarkers) { |
| 85 | + const leaked = indexedPaths.filter((p) => p.includes(marker)); |
| 86 | + expect(leaked, `paths containing "${marker}" should not be indexed`).toEqual([]); |
| 87 | + } |
| 88 | + }); |
| 89 | +}); |
0 commit comments