From e9eeefbb0468f86cd5b44ecd22be2f62b73c85fb Mon Sep 17 00:00:00 2001 From: Matthew Podwysocki Date: Tue, 14 Apr 2026 13:49:00 -0400 Subject: [PATCH] fix: raise docCache per-entry cap to 5MB with size warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit llms-full.txt files (GL JS 1.6MB, iOS Nav 696KB, Style Spec 466KB) previously fell under or near the 2MB hard cap. Raising to 5MB lets all current full-content files cache cleanly. Add warnings so operators can see large entries in logs: - entries 1MB–5MB: warn and cache - entries >5MB: warn and reject (instead of silently dropping) Update BatchGetDocumentsTool test to use 6MB as the "too large" value. Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 6 +++ src/utils/docCache.ts | 13 ++++++- .../BatchGetDocumentsTool.test.ts | 2 +- test/utils/docCache.test.ts | 37 +++++++++++++++++++ 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 866f2b9..0f99a4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ ## Unreleased +### Raise `docCache` per-entry limit to 5 MB with size warnings + +- **Hard cap raised from 2 MB → 5 MB** — allows `llms-full.txt` files (Style Spec 466 KB, iOS Nav 696 KB, GL JS 1.6 MB) to be cached after being fetched via `get_document_tool` +- **Warning at 1 MB** — `console.warn` when an entry between 1 MB and 5 MB is cached, so operators can see large entries in server logs +- **Warning on rejection** — entries exceeding the 5 MB cap now log `[docCache] Entry too large to cache` instead of being silently dropped + ### Dependencies - **Upgrade `tshy` to `^4.1.1`, `vitest` to `^4.1.4`, `typescript` to `^6.0.2`** — removed deprecated `baseUrl` from `tsconfig.base.json` (TS6), added `"types": ["node"]` (required because tshy compiles from `.tshy/` and does not auto-discover `@types/node` in CI); downgraded `@types/node` to `^22.0.0` for LTS consistency with other repos; bumped `typescript-eslint` packages to `^8.58.2` for TypeScript 6 support diff --git a/src/utils/docCache.ts b/src/utils/docCache.ts index b186735..6a64ef4 100644 --- a/src/utils/docCache.ts +++ b/src/utils/docCache.ts @@ -8,7 +8,8 @@ const DEFAULT_TTL_MS = parseInt( // Cache limits const MAX_ENTRIES = 512; -export const MAX_ENTRY_BYTES = 2 * 1024 * 1024; // 2 MB per entry +export const MAX_ENTRY_BYTES = 5 * 1024 * 1024; // 5 MB hard cap per entry +const LARGE_ENTRY_WARNING_BYTES = 1 * 1024 * 1024; // warn at 1 MB const MAX_TOTAL_BYTES = 50 * 1024 * 1024; // 50 MB total interface CacheEntry { @@ -55,7 +56,15 @@ class DocCache { set(url: string, content: string, ttlMs: number = DEFAULT_TTL_MS): void { const bytes = Buffer.byteLength(content, 'utf8'); if (bytes > MAX_ENTRY_BYTES) { - return; // Silently reject oversized entries + console.warn( + `[docCache] Entry too large to cache (${(bytes / 1024 / 1024).toFixed(1)} MB, limit ${MAX_ENTRY_BYTES / 1024 / 1024} MB): ${url}` + ); + return; + } + if (bytes > LARGE_ENTRY_WARNING_BYTES) { + console.warn( + `[docCache] Caching large entry (${(bytes / 1024 / 1024).toFixed(1)} MB): ${url}` + ); } const key = normalizeCacheKey(url); diff --git a/test/tools/batch-get-documents-tool/BatchGetDocumentsTool.test.ts b/test/tools/batch-get-documents-tool/BatchGetDocumentsTool.test.ts index cc3ae66..a50a6cc 100644 --- a/test/tools/batch-get-documents-tool/BatchGetDocumentsTool.test.ts +++ b/test/tools/batch-get-documents-tool/BatchGetDocumentsTool.test.ts @@ -93,7 +93,7 @@ describe('BatchGetDocumentsTool', () => { it('returns an error for a URL whose Content-Length exceeds the limit', async () => { const oversizeHeaders = new Headers({ 'content-type': 'text/plain', - 'content-length': String(3 * 1024 * 1024) // 3 MB > 2 MB limit + 'content-length': String(6 * 1024 * 1024) // 6 MB > 5 MB limit }); const httpRequest = vi .fn() diff --git a/test/utils/docCache.test.ts b/test/utils/docCache.test.ts index 01dea7e..3585af1 100644 --- a/test/utils/docCache.test.ts +++ b/test/utils/docCache.test.ts @@ -255,6 +255,43 @@ describe('DocCache', () => { cache.set('https://docs.mapbox.com/page', content); expect(cache.get('https://docs.mapbox.com/page')).toBe(content); }); + + it('warns and rejects when entry exceeds hard cap', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + try { + // Use the real docCache singleton via a tiny cap override isn't possible, + // so we verify the warning path via the real module with a 5MB content. + // Instead, test the warning logic on the actual singleton with a spy. + const { docCache: real } = await import('../../src/utils/docCache.js'); + real.clear(); + // Content just over 5MB hard cap + const oversized = 'x'.repeat(5 * 1024 * 1024 + 1); + real.set('https://docs.mapbox.com/page', oversized); + expect(real.get('https://docs.mapbox.com/page')).toBeNull(); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('too large to cache') + ); + } finally { + warnSpy.mockRestore(); + } + }); + + it('warns but caches entries between 1MB and 5MB', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + try { + const { docCache: real } = await import('../../src/utils/docCache.js'); + real.clear(); + // Content just over the 1MB warning threshold but under the 5MB cap + const large = 'x'.repeat(1 * 1024 * 1024 + 1); + real.set('https://docs.mapbox.com/page', large); + expect(real.get('https://docs.mapbox.com/page')).toBe(large); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('Caching large entry') + ); + } finally { + warnSpy.mockRestore(); + } + }); }); describe('total bytes limit', () => {