Skip to content

Commit 585260b

Browse files
jahoomaclaude
andauthored
Rate-limit freebuff GLM sessions to 5 per 20 hours (#537)
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 64edebb commit 585260b

13 files changed

Lines changed: 3976 additions & 22 deletions

File tree

cli/src/app.tsx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ const AuthedSurface = ({
380380
// 'queued' → waiting our turn
381381
// 'country_blocked' → terminal region-gate message
382382
// 'banned' → terminal account-banned message
383+
// 'rate_limited' → hit per-model session quota; terminal for this run
383384
//
384385
// 'ended' deliberately falls through to <Chat>: the agent may still be
385386
// finishing work under the server-side grace period, and the chat surface
@@ -390,7 +391,8 @@ const AuthedSurface = ({
390391
session.status === 'queued' ||
391392
session.status === 'none' ||
392393
session.status === 'country_blocked' ||
393-
session.status === 'banned')
394+
session.status === 'banned' ||
395+
session.status === 'rate_limited')
394396
) {
395397
return <WaitingRoomScreen session={session} error={sessionError} />
396398
}

cli/src/components/waiting-room-screen.tsx

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ const formatElapsed = (ms: number): string => {
4343
return `${minutes}m ${seconds.toString().padStart(2, '0')}s`
4444
}
4545

46+
/** "in ~3h 20m" / "in ~45 min" / "in under a minute". Used on the
47+
* rate-limited screen so users know when they can try again. */
48+
const formatRetryAfter = (ms: number): string => {
49+
if (!Number.isFinite(ms) || ms <= 0) return 'any moment now'
50+
const minutes = Math.round(ms / 60_000)
51+
if (minutes < 1) return 'under a minute'
52+
if (minutes < 60) return `${minutes} min`
53+
const hours = Math.floor(minutes / 60)
54+
const rem = minutes % 60
55+
return rem === 0 ? `${hours}h` : `${hours}h ${rem}m`
56+
}
57+
4658
export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
4759
session,
4860
error,
@@ -216,6 +228,18 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
216228
<span>Elapsed </span>
217229
{formatElapsed(elapsedMs)}
218230
</text>
231+
{/* Per-model session quota (e.g. GLM 5.1 caps at 5/20h). Only
232+
rendered for rate-limited models so the Minimax queue stays
233+
clutter-free. */}
234+
{session.rateLimit && (
235+
<text style={{ fg: theme.muted, alignSelf: 'flex-start' }}>
236+
<span>Sessions </span>
237+
<span fg={theme.foreground}>
238+
{session.rateLimit.recentCount} / {session.rateLimit.limit}
239+
</span>
240+
<span> used in last {session.rateLimit.windowHours}h</span>
241+
</text>
242+
)}
219243
</box>
220244
</>
221245
)}
@@ -258,6 +282,29 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
258282
</text>
259283
</>
260284
)}
285+
286+
{/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
287+
last 20h). Terminal for this run — the user can exit and come
288+
back once the oldest session in the window rolls off. */}
289+
{session?.status === 'rate_limited' && (
290+
<>
291+
<text style={{ fg: theme.secondary, marginBottom: 1 }}>
292+
⚠ Session limit reached
293+
</text>
294+
<text style={{ fg: theme.muted, wrapMode: 'word' }}>
295+
You've used{' '}
296+
<span fg={theme.foreground}>
297+
{session.recentCount} of {session.limit}
298+
</span>{' '}
299+
hour-long sessions on {session.model} in the last{' '}
300+
{session.windowHours}h. Try again in{' '}
301+
<span fg={theme.foreground}>
302+
{formatRetryAfter(session.retryAfterMs)}
303+
</span>
304+
. Press Ctrl+C to exit.
305+
</text>
306+
</>
307+
)}
261308
</box>
262309
</box>
263310

cli/src/hooks/use-freebuff-session.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,19 @@ async function callSession(
9191
return body
9292
}
9393
}
94+
// 429 from POST is the per-model session-quota reject (e.g. too many GLM
95+
// sessions in the last 20h). Terminal for the current poll — the CLI shows
96+
// a screen explaining the limit and when the user can try again. The 429
97+
// status (rather than 200) keeps older CLIs in their error path so they
98+
// back off instead of tight-polling an unrecognized 200 body.
99+
if (resp.status === 429 && method === 'POST') {
100+
const body = (await resp.json().catch(() => null)) as
101+
| FreebuffSessionResponse
102+
| null
103+
if (body && body.status === 'rate_limited') {
104+
return body
105+
}
106+
}
94107
if (!resp.ok) {
95108
const text = await resp.text().catch(() => '')
96109
throw new Error(
@@ -124,6 +137,7 @@ function nextDelayMs(next: FreebuffSessionResponse): number | null {
124137
case 'country_blocked':
125138
case 'banned':
126139
case 'model_locked':
140+
case 'rate_limited':
127141
case 'model_unavailable':
128142
return null
129143
}

common/src/types/freebuff-session.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,22 @@
55
*
66
* The CLI uses these shapes directly; there are no client-only states.
77
*/
8+
9+
/**
10+
* Per-model usage counter surfaced to the CLI so the waiting-room UI can
11+
* render "N of M sessions used" alongside queue/active state. Present when
12+
* the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
13+
* per 20-hour window). `recentCount` is the number of admissions inside
14+
* `windowHours` at the time the response was produced — see also the
15+
* standalone `rate_limited` status for the reject path.
16+
*/
17+
export interface FreebuffSessionRateLimit {
18+
model: string
19+
limit: number
20+
windowHours: number
21+
recentCount: number
22+
}
23+
824
export type FreebuffSessionServerResponse =
925
| {
1026
/** Waiting room is globally off; free-mode requests flow through
@@ -38,6 +54,10 @@ export type FreebuffSessionServerResponse =
3854
queueDepthByModel: Record<string, number>
3955
estimatedWaitMs: number
4056
queuedAt: string
57+
/** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
58+
* for unlimited models or when the status was produced outside the
59+
* rate-limit check path (e.g. pure read via GET). */
60+
rateLimit?: FreebuffSessionRateLimit
4161
}
4262
| {
4363
status: 'active'
@@ -47,6 +67,10 @@ export type FreebuffSessionServerResponse =
4767
admittedAt: string
4868
expiresAt: string
4969
remainingMs: number
70+
/** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
71+
* for unlimited models or when the status was produced outside the
72+
* rate-limit check path (e.g. pure read via GET). */
73+
rateLimit?: FreebuffSessionRateLimit
5074
}
5175
| {
5276
/** Session is over. While `instanceId` is present we're inside the
@@ -105,3 +129,24 @@ export type FreebuffSessionServerResponse =
105129
* stops polling and shows a banned message. */
106130
status: 'banned'
107131
}
132+
| {
133+
/** User has used up their per-model admission quota in the rolling
134+
* window (GLM 5.1: 5 one-hour sessions per 20h). Returned from POST
135+
* /session before the user is placed in the queue. `retryAfterMs` is
136+
* the time until the oldest admission inside the window falls off
137+
* and one quota slot opens up — clients should show the user when
138+
* they can try again. Terminal for the CLI's current poll session;
139+
* the user can exit and come back later. */
140+
status: 'rate_limited'
141+
/** The freebuff model the user tried to join. */
142+
model: string
143+
/** Max admissions permitted per window (e.g. 5). */
144+
limit: number
145+
/** Rolling window size in hours (e.g. 20). */
146+
windowHours: number
147+
/** Admission count inside the window at check time — will be ≥ limit. */
148+
recentCount: number
149+
/** Milliseconds from now until the oldest admission in the window
150+
* exits and the user regains one quota slot. */
151+
retryAfterMs: number
152+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CREATE TABLE "free_session_admit" (
2+
"id" text PRIMARY KEY NOT NULL,
3+
"user_id" text NOT NULL,
4+
"model" text NOT NULL,
5+
"admitted_at" timestamp with time zone DEFAULT now() NOT NULL
6+
);
7+
--> statement-breakpoint
8+
ALTER TABLE "free_session_admit" ADD CONSTRAINT "free_session_admit_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
9+
CREATE INDEX "idx_free_session_admit_user_model_time" ON "free_session_admit" USING btree ("user_id","model","admitted_at");

0 commit comments

Comments
 (0)