Skip to content

Commit 27642bd

Browse files
committed
2.2.0
1 parent b0c9ca1 commit 27642bd

8 files changed

Lines changed: 34 additions & 28 deletions

File tree

threads/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@threaded/ai",
3-
"version": "2.1.0",
3+
"version": "2.2.0",
44
"description": "Composable LLM inference with multi-provider support, tool execution, streaming, and approval workflows",
55
"type": "module",
66
"main": "dist/index.js",

threads/src/providers/anthropic.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,13 @@ export const callAnthropic = async (
151151

152152
const inputTokens = data.usage?.input_tokens || 0;
153153
const outputTokens = data.usage?.output_tokens || 0;
154+
const cachedTokens = data.usage?.cache_read_input_tokens || 0;
154155

155156
return {
156157
...ctx,
157158
lastResponse: msg,
158159
history: [...ctx.history, msg],
159-
usage: addUsage(ctx.usage, inputTokens, outputTokens, inputTokens + outputTokens),
160+
usage: addUsage(ctx.usage, inputTokens, outputTokens, inputTokens + outputTokens, cachedTokens),
160161
};
161162
};
162163

@@ -172,6 +173,7 @@ const handleAnthropicStream = async (
172173
let buffer = "";
173174
let inputTokens = 0;
174175
let outputTokens = 0;
176+
let cachedTokens = 0;
175177

176178
try {
177179
while (true) {
@@ -197,6 +199,7 @@ const handleAnthropicStream = async (
197199

198200
if (parsed.type === "message_start" && parsed.message?.usage) {
199201
inputTokens = parsed.message.usage.input_tokens || 0;
202+
cachedTokens = parsed.message.usage.cache_read_input_tokens || 0;
200203
}
201204

202205
if (parsed.type === "message_delta" && parsed.usage) {
@@ -270,7 +273,7 @@ const handleAnthropicStream = async (
270273
msg.tool_calls = toolCalls.map(({ index, ...tc }) => tc);
271274
}
272275

273-
const usage = addUsage(ctx.usage, inputTokens, outputTokens, inputTokens + outputTokens);
276+
const usage = addUsage(ctx.usage, inputTokens, outputTokens, inputTokens + outputTokens, cachedTokens);
274277

275278
if (ctx.stream && (inputTokens || outputTokens)) {
276279
ctx.stream({ type: "usage", usage });

threads/src/providers/google.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ export const callGoogle = async (
171171
...ctx,
172172
lastResponse: msg,
173173
history: [...ctx.history, msg],
174-
usage: addUsage(ctx.usage, um?.promptTokenCount || 0, um?.candidatesTokenCount || 0, um?.totalTokenCount || 0),
174+
usage: addUsage(ctx.usage, um?.promptTokenCount || 0, um?.candidatesTokenCount || 0, um?.totalTokenCount || 0, um?.cachedContentTokenCount || 0),
175175
};
176176
};
177177

@@ -260,7 +260,7 @@ const handleGoogleStream = async (
260260
}
261261

262262
const um = usageMetadata;
263-
const usage = addUsage(ctx.usage, um?.promptTokenCount || 0, um?.candidatesTokenCount || 0, um?.totalTokenCount || 0);
263+
const usage = addUsage(ctx.usage, um?.promptTokenCount || 0, um?.candidatesTokenCount || 0, um?.totalTokenCount || 0, um?.cachedContentTokenCount || 0);
264264

265265
if (ctx.stream && um) {
266266
ctx.stream({ type: "usage", usage });

threads/src/providers/openai.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ export const callOpenAI = async (
107107
...ctx,
108108
lastResponse: msg,
109109
history: [...ctx.history, msg],
110-
usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0),
110+
usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0, data.usage?.prompt_tokens_details?.cached_tokens || 0),
111111
};
112112
};
113113

@@ -203,7 +203,7 @@ const handleOpenAIStream = async (
203203
msg.tool_calls = toolCalls;
204204
}
205205

206-
const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0);
206+
const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0, streamUsage?.prompt_tokens_details?.cached_tokens || 0);
207207

208208
if (ctx.stream && streamUsage) {
209209
ctx.stream({ type: "usage", usage });

threads/src/providers/xai.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ export const callXAI = async (
101101
...ctx,
102102
lastResponse: msg,
103103
history: [...ctx.history, msg],
104-
usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0),
104+
usage: addUsage(ctx.usage, data.usage?.prompt_tokens || 0, data.usage?.completion_tokens || 0, data.usage?.total_tokens || 0, data.usage?.prompt_tokens_details?.cached_tokens || 0),
105105
};
106106
};
107107

@@ -175,7 +175,7 @@ const handleXAIStream = async (
175175
msg.tool_calls = toolCalls;
176176
}
177177

178-
const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0);
178+
const usage = addUsage(ctx.usage, streamUsage?.prompt_tokens || 0, streamUsage?.completion_tokens || 0, streamUsage?.total_tokens || 0, streamUsage?.prompt_tokens_details?.cached_tokens || 0);
179179

180180
if (ctx.stream && streamUsage) {
181181
ctx.stream({ type: "usage", usage });

threads/src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,4 +197,5 @@ export interface TokenUsage {
197197
promptTokens: number;
198198
completionTokens: number;
199199
totalTokens: number;
200+
cachedTokens?: number;
200201
}

threads/src/utils.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,10 @@ export const addUsage = (
103103
promptTokens: number,
104104
completionTokens: number,
105105
totalTokens: number,
106+
cachedTokens: number = 0,
106107
): TokenUsage => ({
107108
promptTokens: (existing?.promptTokens || 0) + promptTokens,
108109
completionTokens: (existing?.completionTokens || 0) + completionTokens,
109110
totalTokens: (existing?.totalTokens || 0) + totalTokens,
111+
cachedTokens: (existing?.cachedTokens || 0) + cachedTokens,
110112
});

threads/tests/usage.test.ts

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@ const mockGoogleResponse = (usageMetadata?: any) => ({
4040
describe("addUsage", () => {
4141
it("accumulates from undefined", () => {
4242
const result = addUsage(undefined, 10, 20, 30);
43-
expect(result).toEqual({ promptTokens: 10, completionTokens: 20, totalTokens: 30 });
43+
expect(result).toEqual({ cachedTokens: 0, promptTokens: 10, completionTokens: 20, totalTokens: 30 });
4444
});
4545

4646
it("accumulates onto existing", () => {
4747
const existing = { promptTokens: 5, completionTokens: 10, totalTokens: 15 };
4848
const result = addUsage(existing, 10, 20, 30);
49-
expect(result).toEqual({ promptTokens: 15, completionTokens: 30, totalTokens: 45 });
49+
expect(result).toEqual({ cachedTokens: 0, promptTokens: 15, completionTokens: 30, totalTokens: 45 });
5050
});
5151
});
5252

@@ -66,14 +66,14 @@ describe("OpenAI usage tracking", () => {
6666
);
6767

6868
const result = await callOpenAI({ model: "gpt-4o-mini" }, baseCtx());
69-
expect(result.usage).toEqual({ promptTokens: 10, completionTokens: 20, totalTokens: 30 });
69+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 10, completionTokens: 20, totalTokens: 30 });
7070
});
7171

7272
it("handles missing usage gracefully", async () => {
7373
(fetch as any).mockResolvedValue(mockOpenAIResponse());
7474

7575
const result = await callOpenAI({ model: "gpt-4o-mini" }, baseCtx());
76-
expect(result.usage).toEqual({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
76+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 });
7777
});
7878

7979
it("accumulates usage across calls", async () => {
@@ -89,7 +89,7 @@ describe("OpenAI usage tracking", () => {
8989
);
9090

9191
ctx = await callOpenAI({ model: "gpt-4o-mini" }, ctx);
92-
expect(ctx.usage).toEqual({ promptTokens: 25, completionTokens: 45, totalTokens: 70 });
92+
expect(ctx.usage).toEqual({ cachedTokens: 0, promptTokens: 25, completionTokens: 45, totalTokens: 70 });
9393
});
9494

9595
it("extracts usage from streaming response", async () => {
@@ -121,10 +121,10 @@ describe("OpenAI usage tracking", () => {
121121
const ctx = { ...baseCtx(), stream: (e: StreamEvent) => events.push(e) };
122122

123123
const result = await callOpenAI({ model: "gpt-4o-mini" }, ctx);
124-
expect(result.usage).toEqual({ promptTokens: 10, completionTokens: 5, totalTokens: 15 });
124+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 10, completionTokens: 5, totalTokens: 15 });
125125
expect(events.find(e => e.type === "usage")).toEqual({
126126
type: "usage",
127-
usage: { promptTokens: 10, completionTokens: 5, totalTokens: 15 },
127+
usage: { cachedTokens: 0, promptTokens: 10, completionTokens: 5, totalTokens: 15 },
128128
});
129129
});
130130

@@ -164,14 +164,14 @@ describe("Anthropic usage tracking", () => {
164164
);
165165

166166
const result = await callAnthropic({ model: "claude-sonnet-4-5-20250929" }, baseCtx());
167-
expect(result.usage).toEqual({ promptTokens: 25, completionTokens: 15, totalTokens: 40 });
167+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 25, completionTokens: 15, totalTokens: 40 });
168168
});
169169

170170
it("handles missing usage gracefully", async () => {
171171
(fetch as any).mockResolvedValue(mockAnthropicResponse());
172172

173173
const result = await callAnthropic({ model: "claude-sonnet-4-5-20250929" }, baseCtx());
174-
expect(result.usage).toEqual({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
174+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 });
175175
});
176176

177177
it("extracts usage from streaming response", async () => {
@@ -202,7 +202,7 @@ describe("Anthropic usage tracking", () => {
202202
const ctx = { ...baseCtx(), stream: (e: StreamEvent) => events.push(e) };
203203

204204
const result = await callAnthropic({ model: "claude-sonnet-4-5-20250929" }, ctx);
205-
expect(result.usage).toEqual({ promptTokens: 25, completionTokens: 15, totalTokens: 40 });
205+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 25, completionTokens: 15, totalTokens: 40 });
206206

207207
const usageEvent = events.find(e => e.type === "usage") as any;
208208
expect(usageEvent.usage.promptTokens).toBe(25);
@@ -226,14 +226,14 @@ describe("Google usage tracking", () => {
226226
);
227227

228228
const result = await callGoogle({ model: "gemini-2.0-flash" }, baseCtx());
229-
expect(result.usage).toEqual({ promptTokens: 9, completionTokens: 87, totalTokens: 96 });
229+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 9, completionTokens: 87, totalTokens: 96 });
230230
});
231231

232232
it("handles missing usageMetadata gracefully", async () => {
233233
(fetch as any).mockResolvedValue(mockGoogleResponse());
234234

235235
const result = await callGoogle({ model: "gemini-2.0-flash" }, baseCtx());
236-
expect(result.usage).toEqual({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
236+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 });
237237
});
238238

239239
it("extracts usage from streaming response", async () => {
@@ -263,7 +263,7 @@ describe("Google usage tracking", () => {
263263
const ctx = { ...baseCtx(), stream: (e: StreamEvent) => events.push(e) };
264264

265265
const result = await callGoogle({ model: "gemini-2.0-flash" }, ctx);
266-
expect(result.usage).toEqual({ promptTokens: 9, completionTokens: 5, totalTokens: 14 });
266+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 9, completionTokens: 5, totalTokens: 14 });
267267
expect(events.find(e => e.type === "usage")).toBeDefined();
268268
});
269269
});
@@ -284,7 +284,7 @@ describe("xAI usage tracking", () => {
284284
);
285285

286286
const result = await callXAI({ model: "grok-3" }, baseCtx());
287-
expect(result.usage).toEqual({ promptTokens: 37, completionTokens: 530, totalTokens: 567 });
287+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 37, completionTokens: 530, totalTokens: 567 });
288288
});
289289
});
290290

@@ -302,7 +302,7 @@ describe("Ollama usage tracking", () => {
302302
);
303303

304304
const result = await callOpenAI({ model: "llama3", baseUrl: "http://localhost:11434/v1" }, baseCtx());
305-
expect(result.usage).toEqual({ promptTokens: 16, completionTokens: 1, totalTokens: 17 });
305+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 16, completionTokens: 1, totalTokens: 17 });
306306
});
307307
});
308308

@@ -323,7 +323,7 @@ describe("scope usage propagation", () => {
323323

324324
const step = scope({}, model({ model: "openai/gpt-4o-mini" }));
325325
const result = await step(baseCtx());
326-
expect(result.usage).toEqual({ promptTokens: 10, completionTokens: 20, totalTokens: 30 });
326+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 10, completionTokens: 20, totalTokens: 30 });
327327
});
328328

329329
it("propagates usage from silent scopes", async () => {
@@ -335,7 +335,7 @@ describe("scope usage propagation", () => {
335335
const result = await step(baseCtx());
336336

337337
expect(result.history).toHaveLength(1);
338-
expect(result.usage).toEqual({ promptTokens: 10, completionTokens: 20, totalTokens: 30 });
338+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 10, completionTokens: 20, totalTokens: 30 });
339339
});
340340

341341
it("accumulates usage across nested scopes", async () => {
@@ -355,7 +355,7 @@ describe("scope usage propagation", () => {
355355
);
356356
const result = await step(baseCtx());
357357

358-
expect(result.usage).toEqual({ promptTokens: 30, completionTokens: 60, totalTokens: 90 });
358+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 30, completionTokens: 60, totalTokens: 90 });
359359
});
360360

361361
it("carries pre-existing usage into scoped context", async () => {
@@ -370,6 +370,6 @@ describe("scope usage propagation", () => {
370370
const step = scope({}, model({ model: "openai/gpt-4o-mini" }));
371371
const result = await step(ctx);
372372

373-
expect(result.usage).toEqual({ promptTokens: 110, completionTokens: 220, totalTokens: 330 });
373+
expect(result.usage).toEqual({ cachedTokens: 0, promptTokens: 110, completionTokens: 220, totalTokens: 330 });
374374
});
375375
});

0 commit comments

Comments
 (0)