@@ -21,6 +21,7 @@ import { checkLiveUserInput, getLiveUserInputIds } from '../../live-user-inputs'
2121import { logger } from '../../util/logger'
2222import { saveMessage } from '../message-cost-tracker'
2323import { openRouterLanguageModel } from '../openrouter'
24+ import { systemPromptCache , responseCache } from '../prompt-cache'
2425import { vertexFinetuned } from './vertex-finetuned'
2526
2627import type {
@@ -36,6 +37,93 @@ import type {
3637import type { LanguageModel } from 'ai'
3738import type { z } from 'zod/v4'
3839
40+ // Cost optimization: Task-based parameter optimization
41+ interface TaskBasedParameters {
42+ temperature : number
43+ maxTokens : number
44+ }
45+
46+ type TaskType = 'file-operations' | 'simple-query' | 'code-generation' | 'analysis' | 'creative' | 'complex-reasoning' | 'default'
47+
48+ const getOptimalParametersByTask = ( taskType : TaskType ) : TaskBasedParameters => {
49+ const paramConfigs : Record < TaskType , TaskBasedParameters > = {
50+ 'file-operations' : { temperature : 0.0 , maxTokens : 1000 } , // Deterministic file ops
51+ 'simple-query' : { temperature : 0.0 , maxTokens : 500 } , // Quick factual responses
52+ 'code-generation' : { temperature : 0.1 , maxTokens : 2000 } , // Consistent code output
53+ 'analysis' : { temperature : 0.3 , maxTokens : 1500 } , // Balanced analysis
54+ 'creative' : { temperature : 0.8 , maxTokens : 4000 } , // High creativity
55+ 'complex-reasoning' : { temperature : 0.4 , maxTokens : 3000 } , // Deep thinking
56+ 'default' : { temperature : 0.3 , maxTokens : 2000 } // Balanced default
57+ }
58+
59+ return paramConfigs [ taskType ] || paramConfigs [ 'default' ]
60+ }
61+
62+ const detectTaskTypeFromMessages = ( messages : Message [ ] ) : TaskType => {
63+ const lastMessage = messages [ messages . length - 1 ]
64+ const content = typeof lastMessage ?. content === 'string'
65+ ? lastMessage . content . toLowerCase ( )
66+ : JSON . stringify ( lastMessage ?. content || '' ) . toLowerCase ( )
67+
68+ // Tool-based detection
69+ if ( content . includes ( 'write_file' ) || content . includes ( 'str_replace' ) || content . includes ( 'read_files' ) ) {
70+ return 'file-operations'
71+ }
72+ if ( content . includes ( 'run_terminal_command' ) || content . includes ( 'browser_logs' ) ) {
73+ return 'file-operations'
74+ }
75+ if ( content . includes ( 'spawn_agents' ) || content . includes ( 'think_deeply' ) ) {
76+ return 'complex-reasoning'
77+ }
78+ if ( content . includes ( 'code_search' ) || content . includes ( 'create_plan' ) ) {
79+ return 'analysis'
80+ }
81+
82+ // Content-based detection
83+ if ( content . length < 100 ) {
84+ return 'simple-query'
85+ }
86+ if ( content . includes ( 'write' ) && ( content . includes ( 'code' ) || content . includes ( 'function' ) || content . includes ( 'class' ) ) ) {
87+ return 'code-generation'
88+ }
89+ if ( content . includes ( 'analyze' ) || content . includes ( 'explain' ) || content . includes ( 'review' ) ) {
90+ return 'analysis'
91+ }
92+ if ( content . includes ( 'creative' ) || content . includes ( 'story' ) || content . includes ( 'poem' ) ) {
93+ return 'creative'
94+ }
95+ if ( content . includes ( 'complex' ) || content . includes ( 'architecture' ) || content . includes ( 'design' ) ) {
96+ return 'complex-reasoning'
97+ }
98+
99+ return 'default'
100+ }
101+
102+ // Cost optimization: Cache system prompts and common responses
103+ const isCacheableSystemPrompt = ( messages : Message [ ] ) : boolean => {
104+ // Cache system prompts (first message is usually system)
105+ if ( messages . length > 0 && messages [ 0 ] . role === 'system' ) {
106+ const content = typeof messages [ 0 ] . content === 'string'
107+ ? messages [ 0 ] . content
108+ : JSON . stringify ( messages [ 0 ] . content || '' )
109+
110+ // Cache if it's a system prompt > 500 chars (likely to be reused)
111+ return content . length > 500
112+ }
113+ return false
114+ }
115+
116+ const generateCacheKey = ( messages : Message [ ] , model : string , options : any ) : string => {
117+ // Create cache key from messages + model + key parameters
118+ const cacheableContent = {
119+ messages : messages . slice ( 0 , 2 ) , // Only first 2 messages (system + first user)
120+ model,
121+ temperature : options . temperature ,
122+ maxTokens : options . maxTokens
123+ }
124+ return JSON . stringify ( cacheableContent )
125+ }
126+
39127// TODO: We'll want to add all our models here!
40128const modelToAiSDKModel = ( model : Model ) : LanguageModel => {
41129 if (
@@ -100,8 +188,19 @@ export const promptAiSdkStream = async function* (
100188
101189 let aiSDKModel = modelToAiSDKModel ( options . model )
102190
103- const response = streamText ( {
191+ // Cost optimization: Apply task-based parameter optimization
192+ const taskType = detectTaskTypeFromMessages ( options . messages )
193+ const optimalParams = getOptimalParametersByTask ( taskType )
194+
195+ // Only override if not explicitly set by caller
196+ const finalOptions = {
104197 ...options ,
198+ temperature : options . temperature ?? optimalParams . temperature ,
199+ maxTokens : options . maxTokens ?? optimalParams . maxTokens ,
200+ }
201+
202+ const response = streamText ( {
203+ ...finalOptions ,
105204 model : aiSDKModel ,
106205 maxRetries : options . maxRetries ,
107206 messages : convertCbToModelMessages ( options ) ,
@@ -262,14 +361,49 @@ export const promptAiSdk = async function (
262361 const startTime = Date . now ( )
263362 let aiSDKModel = modelToAiSDKModel ( options . model )
264363
265- const response = await generateText ( {
364+ // Cost optimization: Apply task-based parameter optimization
365+ const taskType = detectTaskTypeFromMessages ( options . messages )
366+ const optimalParams = getOptimalParametersByTask ( taskType )
367+
368+ // Only override if not explicitly set by caller
369+ const finalOptions = {
266370 ...options ,
371+ temperature : options . temperature ?? optimalParams . temperature ,
372+ maxTokens : options . maxTokens ?? optimalParams . maxTokens ,
373+ }
374+
375+ // Cost optimization: Check cache for similar requests
376+ const cacheKey = generateCacheKey ( options . messages , options . model , finalOptions )
377+ const cachedResponse = responseCache . get ( cacheKey )
378+
379+ if ( cachedResponse && isCacheableSystemPrompt ( options . messages ) ) {
380+ logger . debug ( { cacheKey : cacheKey . substring ( 0 , 32 ) + '...' } , 'Cache hit for prompt' )
381+
382+ // Return cached response but still track for cost accounting
383+ const creditsUsed = 0 // Cache hits are free!
384+ if ( options . onCostCalculated ) {
385+ await options . onCostCalculated ( creditsUsed )
386+ }
387+
388+ return cachedResponse
389+ }
390+
391+ const response = await generateText ( {
392+ ...finalOptions ,
267393 model : aiSDKModel ,
268394 messages : convertCbToModelMessages ( options ) ,
269395 } )
396+
270397 const content = response . text
398+
399+ // Cache successful responses for cacheable system prompts
400+ if ( isCacheableSystemPrompt ( options . messages ) && content . length > 0 ) {
401+ responseCache . set ( cacheKey , content , 15 * 60 * 1000 ) // 15 min cache
402+ logger . debug ( { cacheKey : cacheKey . substring ( 0 , 32 ) + '...' } , 'Cached prompt response' )
403+ }
404+
271405 const inputTokens = response . usage . inputTokens || 0
272- const outputTokens = response . usage . inputTokens || 0
406+ const outputTokens = response . usage . outputTokens || 0
273407
274408 const creditsUsedPromise = saveMessage ( {
275409 messageId : generateCompactId ( ) ,
@@ -334,8 +468,19 @@ export const promptAiSdkStructured = async function <T>(options: {
334468 const startTime = Date . now ( )
335469 let aiSDKModel = modelToAiSDKModel ( options . model )
336470
337- const responsePromise = generateObject < z . ZodType < T > , 'object' > ( {
471+ // Cost optimization: Apply task-based parameter optimization
472+ const taskType = detectTaskTypeFromMessages ( options . messages )
473+ const optimalParams = getOptimalParametersByTask ( taskType )
474+
475+ // Only override if not explicitly set by caller
476+ const finalOptions = {
338477 ...options ,
478+ temperature : options . temperature ?? optimalParams . temperature ,
479+ maxTokens : options . maxTokens ?? optimalParams . maxTokens ,
480+ }
481+
482+ const responsePromise = generateObject < z . ZodType < T > , 'object' > ( {
483+ ...finalOptions ,
339484 model : aiSDKModel ,
340485 output : 'object' ,
341486 messages : convertCbToModelMessages ( options ) ,
0 commit comments