openproxy/cost.ts at main · praveentcom/openproxy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/**
 * Usage object for logging.
 *
 * @param prompt_tokens: The number of prompt tokens.
 * @param completion_tokens: The number of completion tokens.
 * @param total_tokens: The total number of tokens.
 * @param prompt_tokens_details: The details of the prompt tokens.
 * @returns The usage object.
 */
export type Usage = {
  prompt_tokens?: number;
  completion_tokens?: number;
  total_tokens?: number;
  prompt_tokens_details?: {
    cached_tokens?: number;
  };
};

/**
 * Cost configuration for a model.
 *
 * @param input: The cost per million prompt tokens (USD).
 * @param cached: The cost per million cached tokens (USD).
 * @param output: The cost per million completion tokens (USD).
 * @returns The cost configuration.
 */
export type CostConfig = {
  input: number;
  cached: number;
  output: number;
};

/**
 * Model pricing table.
 *
 * @param models: Canonical model pricing.
 * @param aliases: Alias to canonical model mapping.
 * @returns The pricing table.
 */
export type ModelCostTable = Record<string, CostConfig>;

/**
 * Helicone API response types
 */
interface HeliconeModelCost {
  provider: string;
  model: string;
  operator: "equals" | "startsWith" | "includes";
  input_cost_per_1m: number;
  output_cost_per_1m: number;
  prompt_cache_write_per_1m?: number;
  prompt_cache_read_per_1m?: number;
  show_in_playground?: boolean;
}

interface HeliconeApiResponse {
  metadata: {
    total_models: number;
  };
  data: HeliconeModelCost[];
}

/**
 * Internal storage for cost data with matching operators
 */
interface CostEntry {
  operator: "equals" | "startsWith" | "includes";
  config: CostConfig;
}

// Storage for Helicone costs (loaded at runtime)
let heliconeCosts: Map<string, CostEntry> = new Map();
let heliconeCostsLoaded = false;

/**
 * ============================================================================
 * CUSTOM MODEL COSTS
 * ============================================================================
 *
 * Add your custom model costs here. These will take precedence over costs
 * fetched from the Helicone API. This is useful for:
 *
 * - Custom/fine-tuned models (e.g., "zlm-4.6")
 * - Self-hosted models with custom pricing
 * - Overriding Helicone costs for specific models
 * - Models not yet in the Helicone database
 *
 * Format:
 *   "model-name": { input: <cost>, cached: <cost>, output: <cost> }
 *
 * All costs are in USD per million tokens.
 *
 * @example
 * ```ts
 * export const CUSTOM_MODEL_COSTS: ModelCostTable = {
 *   "zlm-4.6": { input: 2.5, cached: 1.25, output: 10 },
 *   "zlm-4.5-air": { input: 0.15, cached: 0.075, output: 0.6 },
 * };
 * ```
 */
export const CUSTOM_MODEL_COSTS: ModelCostTable = {
  // Add your custom model costs here
};

/**
 * Fetches and loads cost data from the Helicone API.
 * This should be called once at application startup.
 *
 * @returns Promise that resolves when costs are loaded
 */
export async function loadHeliconeCosts(): Promise<void> {
  try {
    const response = await fetch("https://www.helicone.ai/api/llm-costs");

    if (!response.ok) {
      throw new Error(`Helicone API returned ${response.status}: ${response.statusText}`);
    }

    const data: HeliconeApiResponse = await response.json();

    heliconeCosts.clear();
    for (const model of data.data) {
      const config: CostConfig = {
        input: model.input_cost_per_1m ?? 0,
        output: model.output_cost_per_1m ?? 0,
        cached: model.prompt_cache_read_per_1m ?? model.input_cost_per_1m ?? 0,
      };

      heliconeCosts.set(model.model.toLowerCase(), {
        operator: model.operator,
        config,
      });
    }

    heliconeCostsLoaded = true;
    console.log(`\x1b[96m  🌎 Loaded ${data.metadata.total_models} model costs from Helicone API\x1b[0m`);
  } catch (error) {
    console.warn(`\x1b[33m  ⚠️  Failed to load Helicone costs: ${error instanceof Error ? error.message : error}\x1b[0m`);
  }
}

/**
 * Gets the cost configuration for a model.
 *
 * Priority order:
 * 1. Custom model costs (CUSTOM_MODEL_COSTS)
 * 2. Helicone API costs (with operator matching)
 * 3. Fallback cost
 *
 * @param model: The model name to look up
 * @returns The cost configuration for the model
 */
export function getCostConfig(model: string): CostConfig {
  const normalizedModel = model.toLowerCase();

  /**
   * Check custom costs first (highest priority)
   */
  if (CUSTOM_MODEL_COSTS[normalizedModel]) {
    return CUSTOM_MODEL_COSTS[normalizedModel];
  } else if (CUSTOM_MODEL_COSTS[model]) {
    return CUSTOM_MODEL_COSTS[model];
  }

  /**
   * Check Helicone costs with operator matching
   */
  const exactMatch = heliconeCosts.get(normalizedModel);
  if (exactMatch?.operator === "equals") {
    return exactMatch.config;
  }

  for (const [pattern, entry] of heliconeCosts) {
    if (entry.operator === "startsWith" && normalizedModel.startsWith(pattern)) {
      return entry.config;
    }
  }

  for (const [pattern, entry] of heliconeCosts) {
    if (entry.operator === "includes" && normalizedModel.includes(pattern)) {
      return entry.config;
    }
  }

  if (exactMatch) {
    return exactMatch.config;
  }

  /**
   * Return fallback since no matching cost was found
   */
  return { input: 0, cached: 0, output: 0 };
}

/**
 * Computes the total cost (in USD) for a given model and usage.
 *
 * @param model: The model to compute the cost for.
 * @param usage: The usage object.
 * @returns The total cost (in USD), or null if no usage data.
 */
export function calculateCost(
  model: string,
  usage?: Usage
): number | null {
  if (!usage) return null;

  const {
    prompt_tokens = 0,
    completion_tokens = 0,
    prompt_tokens_details = { cached_tokens: 0 },
  } = usage;

  const cost = getCostConfig(model);

  let inputCost = 0, cachedCost = 0;

  if (prompt_tokens_details.cached_tokens && cost.cached > 0) {
    cachedCost =
      (prompt_tokens_details.cached_tokens / 1_000_000) * cost.cached;
    inputCost =
      ((prompt_tokens - prompt_tokens_details.cached_tokens) / 1_000_000) *
      cost.input;
  } else {
    inputCost = (prompt_tokens / 1_000_000) * cost.input;
  }

  const outputCost =
    (completion_tokens / 1_000_000) * cost.output;

  const total = inputCost + cachedCost + outputCost;
  return total > 0 ? Number.parseFloat(total.toFixed(6)) : null;
}