diff --git a/apps/web/src/lib/ai-gateway/experiments/build-direct-provider.ts b/apps/web/src/lib/ai-gateway/experiments/build-direct-provider.ts index 1a812a859..72f33588e 100644 --- a/apps/web/src/lib/ai-gateway/experiments/build-direct-provider.ts +++ b/apps/web/src/lib/ai-gateway/experiments/build-direct-provider.ts @@ -1,6 +1,7 @@ import { addCacheBreakpoints, injectReasoningIntoContent, + removeCacheBreakpoints, } from '@/lib/ai-gateway/providers/openrouter/request-helpers'; import type { CustomLlmProvider } from '@kilocode/db'; import type { GatewayChatApiKind, Provider } from '@/lib/ai-gateway/providers/types'; @@ -65,9 +66,12 @@ export type DirectProviderInput = ResolvedExperimentUpstream & { * contacted. The route layer is responsible for not applying provider * pinning or kilo-exclusive model rewrites on top of this provider. */ -export function buildDirectProvider(upstream: DirectProviderInput): Provider { +export function buildDirectProvider( + id: 'custom' | 'experiment', + upstream: DirectProviderInput +): Provider { return { - id: 'custom', + id, apiUrl: upstream.base_url, apiKey: upstream.api_key, supportedChatApis: inferSupportedChatApis(upstream.opencode_settings?.ai_sdk_provider), @@ -83,6 +87,9 @@ export function buildDirectProvider(upstream: DirectProviderInput): Provider { Object.assign(context.extraHeaders, upstream.extra_headers); } context.request.body.model = upstream.internal_id; + if (upstream.remove_cache_breakpoints) { + removeCacheBreakpoints(context.request); + } if (upstream.add_cache_breakpoints) { addCacheBreakpoints(context.request); } diff --git a/apps/web/src/lib/ai-gateway/experiments/upstream-schema.ts b/apps/web/src/lib/ai-gateway/experiments/upstream-schema.ts index ecf810923..0c3636ff7 100644 --- a/apps/web/src/lib/ai-gateway/experiments/upstream-schema.ts +++ b/apps/web/src/lib/ai-gateway/experiments/upstream-schema.ts @@ -20,7 +20,7 @@ import { CustomLlmExtraBodySchema, OpenCodeSettingsSchema } from '@kilocode/db/s export const ExperimentUpstreamSchema = z .object({ internal_id: z.string().min(1), - base_url: z.string().url(), + base_url: z.url(), opencode_settings: z .object({ ai_sdk_provider: OpenCodeSettingsSchema.shape.ai_sdk_provider, @@ -30,6 +30,7 @@ export const ExperimentUpstreamSchema = z extra_body: CustomLlmExtraBodySchema.optional(), remove_from_body: z.array(z.string()).optional(), add_cache_breakpoints: z.boolean().optional(), + remove_cache_breakpoints: z.boolean().optional(), inject_reasoning_into_content: z.boolean().optional(), }) .strict(); diff --git a/apps/web/src/lib/ai-gateway/providers/get-provider.ts b/apps/web/src/lib/ai-gateway/providers/get-provider.ts index 1bb83f99a..d00e76072 100644 --- a/apps/web/src/lib/ai-gateway/providers/get-provider.ts +++ b/apps/web/src/lib/ai-gateway/providers/get-provider.ts @@ -110,7 +110,7 @@ async function checkCustomLlm( } return { kind: 'provider', - provider: buildDirectProvider({ + provider: buildDirectProvider('custom', { internal_id: customLlm.internal_id, base_url: customLlm.base_url, api_key: customLlm.api_key, @@ -121,6 +121,7 @@ async function checkCustomLlm( extra_headers: customLlm.extra_headers, remove_from_body: customLlm.remove_from_body, add_cache_breakpoints: customLlm.add_cache_breakpoints, + remove_cache_breakpoints: customLlm.remove_cache_breakpoints, inject_reasoning_into_content: customLlm.inject_reasoning_into_content, }), userByok: null, @@ -209,7 +210,7 @@ export async function getProvider(input: GetProviderInput): Promise { expect(request.body.cache_control).toBeUndefined(); }); }); + +describe('removeCacheBreakpoints', () => { + test('removes all cache breakpoints added to a chat completions request', () => { + const request: GatewayRequest = { + kind: 'chat_completions', + body: { + model: 'test-model', + messages: [ + { role: 'system', content: 'You are helpful.' }, + { role: 'user', content: 'First prompt' }, + { role: 'assistant', content: 'First response' }, + { + role: 'user', + content: [ + { type: 'text', text: 'Latest prompt' }, + { type: 'text', text: 'Latest detail' }, + ], + }, + ], + }, + }; + + addCacheBreakpoints(request); + expect(containsCacheControlDeep(request.body.messages)).toBe(true); + + removeCacheBreakpoints(request); + + expect(containsCacheControlDeep(request.body.messages)).toBe(false); + }); + + test('removes all cache breakpoints added to a responses request', () => { + const request: GatewayRequest = { + kind: 'responses', + body: { + model: 'test-model', + input: [ + { type: 'message', role: 'system', content: 'You are helpful.' }, + { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: 'First prompt' }], + }, + { + type: 'function_call_output', + call_id: 'call_123', + output: [ + { type: 'input_text', text: 'Tool output' }, + { type: 'input_text', text: 'Tool detail' }, + ], + }, + ], + }, + }; + + addCacheBreakpoints(request); + if (request.kind !== 'responses' || !Array.isArray(request.body.input)) return; + expect(containsCacheControlDeep(request.body.input)).toBe(true); + + removeCacheBreakpoints(request); + + expect(containsCacheControlDeep(request.body.input)).toBe(false); + }); + + test('removes top-level and nested cache_control from a messages request', () => { + const request: GatewayRequest = { + kind: 'messages', + body: { + model: 'anthropic/claude-sonnet-4-5', + max_tokens: 1024, + cache_control: { type: 'ephemeral' }, + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'First prompt', + cache_control: { type: 'ephemeral' }, + }, + ], + }, + { role: 'assistant', content: 'First response' }, + { role: 'user', content: 'Latest prompt' }, + ], + }, + }; + + removeCacheBreakpoints(request); + + expect(request.body.cache_control).toBeUndefined(); + expect(containsCacheControlDeep(request.body.messages)).toBe(false); + }); +}); + +function containsCacheControlDeep(value: unknown): boolean { + if (Array.isArray(value)) { + return value.some(containsCacheControlDeep); + } + if (typeof value !== 'object' || value === null) { + return false; + } + if (Object.hasOwn(value, 'cache_control')) { + return true; + } + return Object.values(value).some(containsCacheControlDeep); +} diff --git a/packages/db/src/schema-types.ts b/packages/db/src/schema-types.ts index d67e4790c..192f47fea 100644 --- a/packages/db/src/schema-types.ts +++ b/packages/db/src/schema-types.ts @@ -1205,11 +1205,12 @@ export const CustomLlmDefinitionSchema = z.object({ display_name: z.string(), context_length: z.number(), max_completion_tokens: z.number(), - base_url: z.string(), + base_url: z.url(), api_key: z.string(), organization_ids: z.array(z.string()), supports_image_input: z.boolean().optional(), add_cache_breakpoints: z.boolean().optional(), + remove_cache_breakpoints: z.boolean().optional(), inject_reasoning_into_content: z.boolean().optional(), extra_headers: CustomLlmExtraHeadersSchema.optional(), extra_body: CustomLlmExtraBodySchema.optional(),