Skip to content

Commit 133c5ff

Browse files
committed
chore: add gpt-tokenizer as direct dependency to agent-runtime
1 parent c6a1825 commit 133c5ff

File tree

3 files changed

+23
-12
lines changed

3 files changed

+23
-12
lines changed

bun.lock

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/agent-runtime/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"bun": "^1.3.0"
2727
},
2828
"dependencies": {
29+
"gpt-tokenizer": "^2.8.1",
2930
"zod-from-json-schema": "0.4.2"
3031
},
3132
"devDependencies": {

packages/agent-runtime/src/util/token-counter.ts

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,30 @@
11
import { LRUCache } from '@codebuff/common/util/lru-cache'
2+
import { encode } from 'gpt-tokenizer/esm/model/gpt-4o'
23

34
const ANTHROPIC_TOKEN_FUDGE_FACTOR = 1.35
45

56
const TOKEN_COUNT_CACHE = new LRUCache<string, number>(1000)
67

78
export function countTokens(text: string): number {
8-
const cached = TOKEN_COUNT_CACHE.get(text)
9-
if (cached !== undefined) {
10-
return cached
11-
}
12-
13-
// Approximate token count when tokenizer isn't available
14-
const count = Math.floor((text.length / 3) * ANTHROPIC_TOKEN_FUDGE_FACTOR)
9+
try {
10+
const cached = TOKEN_COUNT_CACHE.get(text)
11+
if (cached !== undefined) {
12+
return cached
13+
}
14+
const count = Math.floor(
15+
encode(text, { allowedSpecial: 'all' }).length *
16+
ANTHROPIC_TOKEN_FUDGE_FACTOR,
17+
)
1518

16-
if (text.length > 100) {
17-
// Cache only if the text is long enough to be worth it.
18-
TOKEN_COUNT_CACHE.set(text, count)
19+
if (text.length > 100) {
20+
// Cache only if the text is long enough to be worth it.
21+
TOKEN_COUNT_CACHE.set(text, count)
22+
}
23+
return count
24+
} catch (e) {
25+
console.error('Error counting tokens', e)
26+
return Math.ceil(text.length / 3)
1927
}
20-
return count
2128
}
2229

2330
export function countTokensJson(text: string | object): number {

0 commit comments

Comments
 (0)