-
Notifications
You must be signed in to change notification settings - Fork 61
Expand file tree
/
Copy pathmodels.ts
More file actions
182 lines (164 loc) · 5.36 KB
/
models.ts
File metadata and controls
182 lines (164 loc) · 5.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import { z } from "zod";
import { _urljoin } from "../src/util";
export const PromptInputs = ["chat", "completion"] as const;
export type PromptInputType = (typeof PromptInputs)[number];
export const ModelFormats = [
"openai",
"anthropic",
"google",
"window",
"js",
"converse",
] as const;
export type ModelFormat = (typeof ModelFormats)[number];
export const ModelEndpointType = [
"openai",
"anthropic",
"google",
"mistral",
"bedrock",
"vertex",
"together",
"fireworks",
"baseten",
"perplexity",
"xAI",
"groq",
"azure",
"databricks",
"lepton",
"cerebras",
"ollama",
"replicate",
"js",
] as const;
export type ModelEndpointType = (typeof ModelEndpointType)[number];
export const ModelSchema = z.object({
format: z.enum(ModelFormats),
flavor: z.enum(PromptInputs),
multimodal: z.boolean().nullish(),
input_cost_per_token: z.number().nullish(),
output_cost_per_token: z.number().nullish(),
input_cost_per_mil_tokens: z.number().nullish(),
output_cost_per_mil_tokens: z.number().nullish(),
input_cache_read_cost_per_mil_tokens: z.number().nullish(),
input_cache_write_cost_per_mil_tokens: z.number().nullish(),
displayName: z
.string()
.nullish()
.describe("The model is the latest production/stable"),
o1_like: z.boolean().nullish().describe('DEPRECATED use "reasoning" instead'),
reasoning: z
.boolean()
.nullish()
.describe("The model supports reasoning/thinking tokens"),
reasoning_budget: z
.boolean()
.nullish()
.describe("The model supports reasoning/thinking budgets"),
experimental: z
.boolean()
.nullish()
.describe("The model is not allowed production load or API is unstable."),
deprecated: z
.boolean()
.nullish()
.describe(
"Discourage the use of the model (we will hide the model in the UI).",
),
parent: z.string().nullish().describe("The model was replaced this model."),
endpoint_types: z.array(z.enum(ModelEndpointType)).nullish(),
locations: z.array(z.string()).nullish(),
description: z.string().nullish(),
max_input_tokens: z
.number()
.nullish()
.describe("The model supports a maximum input token limit."),
max_output_tokens: z
.number()
.nullish()
.describe("The model supports a maximum output token limit."),
});
export type ModelSpec = z.infer<typeof ModelSchema>;
import modelListJson from "./model_list.json";
const modelListJsonTyped = z
.record(z.string(), ModelSchema)
.parse(modelListJson);
// Because this file can be included and bundled in various ways, it's important to
// really inject these variables into the global scope, rather than let the bundler
// have its way with them.
declare global {
var _proxy_availableModels: { [name: string]: ModelSpec } | undefined;
var _proxy_cachedModels: { [name: string]: ModelSpec } | null;
var _proxy_cacheTimestamp: number | null;
}
// This function will always return at least the static model list,
export function getAvailableModels(): { [name: string]: ModelSpec } {
return globalThis._proxy_availableModels ?? modelListJsonTyped;
}
// This function will reach out to the control plane and update the
// available models. It is not required to call. If you don't, you'll
// just get whatever models are in the static list.
export async function refreshModels(appUrl: string): Promise<void> {
if (isCacheValid()) {
return;
}
const dynamicModels = await loadModelsFromControlPlane(appUrl);
if (dynamicModels) {
if (!globalThis._proxy_availableModels) {
globalThis._proxy_availableModels = { ...modelListJsonTyped };
}
Object.assign(globalThis._proxy_availableModels, dynamicModels);
}
}
// Dynamic model loader with expiration
const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour in milliseconds
function isCacheValid(): boolean {
return !!(
globalThis._proxy_cachedModels &&
globalThis._proxy_cacheTimestamp &&
Date.now() - globalThis._proxy_cacheTimestamp < CACHE_TTL_MS
);
}
// Global variable to track ongoing fetch request (acts as a mutex)
let _loadModelsPromise: Promise<{ [name: string]: ModelSpec } | null> | null =
null;
async function loadModelsFromControlPlane(
appUrl: string,
): Promise<{ [name: string]: ModelSpec } | null> {
// Return cached models if still valid
if (isCacheValid()) {
return globalThis._proxy_cachedModels;
}
// If there's already a request in progress, wait for it
if (_loadModelsPromise) {
return await _loadModelsPromise;
}
// Create and store the promise to prevent concurrent requests
_loadModelsPromise = (async () => {
const fetchUrl = _urljoin(appUrl, "api/models/model_list.json");
try {
const response = await fetch(fetchUrl);
if (!response.ok) {
throw new Error(`Failed to fetch models: ${response.statusText}`);
}
const data = await response.json();
globalThis._proxy_cachedModels = data as { [name: string]: ModelSpec };
globalThis._proxy_cacheTimestamp = Date.now();
} catch (error) {
console.warn(
`Failed to load models dynamically from control plane (${fetchUrl}), falling back to static import:`,
error,
);
}
return globalThis._proxy_cachedModels;
})();
try {
// Wait for the request to complete
const result = await _loadModelsPromise;
return result;
} finally {
// Clear the promise so future requests can proceed
_loadModelsPromise = null;
}
}