diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index 9cdc385c9..bbb7f501e 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -419,7 +419,7 @@ export const WaitingRoomScreen: React.FC = ({ {/* Shared premium-session quota exhausted. Terminal for this run — the user can exit and come - back once the oldest session in the window rolls off. */} + back once the daily Pacific reset passes. */} {session?.status === 'rate_limited' && ( <> @@ -430,7 +430,7 @@ export const WaitingRoomScreen: React.FC = ({ {formatSessionUnits(session.recentCount)} of {session.limit} {' '} - premium sessions in the last 20 hours. Try again in{' '} + premium sessions today. Try again in{' '} {formatRetryAfter(session.retryAfterMs)} diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index fedd5154c..8bfaf7b76 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -1,3 +1,10 @@ +import { + addDaysToYmd, + getUtcForZonedTime, + getZonedParts, + type ZonedDateParts, +} from '../util/zoned-time' + /** * Models a freebuff user can pick between in the waiting-room model selector. * @@ -31,18 +38,14 @@ export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1' export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6' export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7' export const FREEBUFF_PREMIUM_SESSION_LIMIT = 5 -export const FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS = 20 +export const FREEBUFF_PREMIUM_SESSION_RESET_TIMEZONE = 'America/Los_Angeles' +export const FREEBUFF_PREMIUM_SESSION_PERIOD = 'pacific_day' +/** Deprecated wire compatibility field. Premium usage now resets at midnight + * Pacific time rather than using a rolling hourly window. */ +export const FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS = 24 const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York' const FREEBUFF_PACIFIC_TIMEZONE = 'America/Los_Angeles' -interface ZonedDateParts { - year: number - month: number - day: number - hour: number - minute: number -} - interface LocalTimeFormatOptions { locale?: string timeZone?: string @@ -165,79 +168,6 @@ export function getFreebuffModel(id: string): FreebuffModelOption { ) } -function getZonedParts(date: Date, timeZone: string): ZonedDateParts { - const parts = new Intl.DateTimeFormat('en-US', { - timeZone, - year: 'numeric', - month: '2-digit', - day: '2-digit', - hour: '2-digit', - minute: '2-digit', - hourCycle: 'h23', - }).formatToParts(date) - const value = (type: string) => - parts.find((part) => part.type === type)?.value - const year = Number(value('year') ?? 0) - const month = Number(value('month') ?? 1) - const day = Number(value('day') ?? 1) - const hour = Number(value('hour') ?? 0) - const minute = Number(value('minute') ?? 0) - return { - year, - month, - day, - hour, - minute, - } -} - -function addDaysToYmd( - year: number, - month: number, - day: number, - days: number, -): Pick { - const next = new Date(Date.UTC(year, month - 1, day)) - next.setUTCDate(next.getUTCDate() + days) - return { - year: next.getUTCFullYear(), - month: next.getUTCMonth() + 1, - day: next.getUTCDate(), - } -} - -function getUtcForZonedTime( - parts: Pick, - timeZone: string, - hour: number, - minute: number, -): Date { - let guess = new Date( - Date.UTC(parts.year, parts.month - 1, parts.day, hour, minute), - ) - - for (let i = 0; i < 3; i++) { - const actual = getZonedParts(guess, timeZone) - const desiredUtc = Date.UTC( - parts.year, - parts.month - 1, - parts.day, - hour, - minute, - ) - const actualUtc = Date.UTC( - actual.year, - actual.month - 1, - actual.day, - actual.hour, - actual.minute, - ) - guess = new Date(guess.getTime() + (desiredUtc - actualUtc)) - } - - return guess -} - function getNextFreebuffDeploymentStart(now: Date): Date { const easternNow = getZonedParts(now, FREEBUFF_EASTERN_TIMEZONE) const isBeforeTodayOpen = easternNow.hour < 9 diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts index 6f44d202b..8d4eebd36 100644 --- a/common/src/types/freebuff-session.ts +++ b/common/src/types/freebuff-session.ts @@ -10,13 +10,18 @@ * Usage counter surfaced to the CLI so the waiting-room UI can render * "N of M sessions used" alongside queue/active state. Present when the * joined model consumes premium Freebuff sessions. `recentCount` is the - * rounded session units inside `windowHours` at the time the response was - * produced — see also the standalone `rate_limited` status for the reject - * path. + * rounded session units since the last midnight Pacific reset at the time + * the response was produced — see also the standalone `rate_limited` status + * for the reject path. */ export interface FreebuffSessionRateLimit { model: string limit: number + period: 'pacific_day' + resetTimeZone: string + resetAt: string + /** Deprecated wire field kept for older clients. Premium usage now resets + * at midnight Pacific time rather than using a rolling window. */ windowHours: number recentCount: number } @@ -63,7 +68,7 @@ export type FreebuffSessionServerResponse = * produces `none`). */ queueDepthByModel?: Record /** Current quota snapshots for premium models, keyed by model id. Lets - * the picker show rolling premium-session usage before the user commits + * the picker show today's premium-session usage before the user commits * to a queue. */ rateLimitsByModel?: FreebuffSessionRateLimitByModel } @@ -159,22 +164,23 @@ export type FreebuffSessionServerResponse = status: 'banned' } | { - /** User has used up their shared premium-session quota in the rolling - * window. Returned from POST /session before the user is placed in the - * queue. `retryAfterMs` is the time until enough session units fall out - * of the window to open one quota slot — clients should show the user - * when they can try again. Terminal for the CLI's current poll session; - * the user can exit and come back later. */ + /** User has used up their shared premium-session quota for the current + * Pacific day. Returned from POST /session before the user is placed in + * the queue. `retryAfterMs` is the time until the next midnight Pacific + * reset. Terminal for the CLI's current poll session; the user can exit + * and come back later. */ status: 'rate_limited' /** The freebuff model the user tried to join. */ model: string - /** Max premium session units permitted per window (e.g. 5). */ + /** Max premium session units permitted per Pacific day (e.g. 5). */ limit: number - /** Rolling window size in hours (e.g. 20). */ + period: 'pacific_day' + resetTimeZone: string + resetAt: string + /** Deprecated wire field kept for older clients. */ windowHours: number - /** Premium session units inside the window at check time — will be ≥ limit. */ + /** Premium session units since today's Pacific reset — will be ≥ limit. */ recentCount: number - /** Milliseconds from now until the oldest admission in the window - * exits and the user regains one quota slot. */ + /** Milliseconds from now until the next Pacific midnight reset. */ retryAfterMs: number } diff --git a/common/src/util/__tests__/zoned-time.test.ts b/common/src/util/__tests__/zoned-time.test.ts new file mode 100644 index 000000000..84a0233bd --- /dev/null +++ b/common/src/util/__tests__/zoned-time.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, test } from 'bun:test' + +import { getZonedDayBounds } from '../zoned-time' + +describe('getZonedDayBounds', () => { + test('returns the current Pacific day bounds on a normal day', () => { + const bounds = getZonedDayBounds( + new Date('2026-04-17T16:00:00Z'), + 'America/Los_Angeles', + ) + + expect(bounds.startsAt.toISOString()).toBe('2026-04-17T07:00:00.000Z') + expect(bounds.resetsAt.toISOString()).toBe('2026-04-18T07:00:00.000Z') + }) + + test('handles the shorter spring-forward Pacific day', () => { + const bounds = getZonedDayBounds( + new Date('2026-03-08T09:00:00Z'), + 'America/Los_Angeles', + ) + + expect(bounds.startsAt.toISOString()).toBe('2026-03-08T08:00:00.000Z') + expect(bounds.resetsAt.toISOString()).toBe('2026-03-09T07:00:00.000Z') + }) + + test('handles the longer fall-back Pacific day', () => { + const bounds = getZonedDayBounds( + new Date('2026-11-01T09:00:00Z'), + 'America/Los_Angeles', + ) + + expect(bounds.startsAt.toISOString()).toBe('2026-11-01T07:00:00.000Z') + expect(bounds.resetsAt.toISOString()).toBe('2026-11-02T08:00:00.000Z') + }) +}) diff --git a/common/src/util/zoned-time.ts b/common/src/util/zoned-time.ts new file mode 100644 index 000000000..36e13387f --- /dev/null +++ b/common/src/util/zoned-time.ts @@ -0,0 +1,98 @@ +export interface ZonedDateParts { + year: number + month: number + day: number + hour: number + minute: number +} + +export function getZonedParts(date: Date, timeZone: string): ZonedDateParts { + const parts = new Intl.DateTimeFormat('en-US', { + timeZone, + year: 'numeric', + month: '2-digit', + day: '2-digit', + hour: '2-digit', + minute: '2-digit', + hourCycle: 'h23', + }).formatToParts(date) + + const get = (type: string) => { + const value = parts.find((part) => part.type === type)?.value + if (!value) throw new Error(`Missing ${type} in ${timeZone} date parts`) + return Number(value) + } + + return { + year: get('year'), + month: get('month'), + day: get('day'), + hour: get('hour'), + minute: get('minute'), + } +} + +export function addDaysToYmd( + year: number, + month: number, + day: number, + days: number, +): Pick { + const next = new Date(Date.UTC(year, month - 1, day)) + next.setUTCDate(next.getUTCDate() + days) + return { + year: next.getUTCFullYear(), + month: next.getUTCMonth() + 1, + day: next.getUTCDate(), + } +} + +export function getUtcForZonedTime( + parts: Pick, + timeZone: string, + hour: number, + minute: number, +): Date { + let guess = new Date( + Date.UTC(parts.year, parts.month - 1, parts.day, hour, minute), + ) + + for (let i = 0; i < 3; i++) { + const actual = getZonedParts(guess, timeZone) + const desiredUtc = Date.UTC( + parts.year, + parts.month - 1, + parts.day, + hour, + minute, + ) + const actualUtc = Date.UTC( + actual.year, + actual.month - 1, + actual.day, + actual.hour, + actual.minute, + ) + guess = new Date(guess.getTime() + (desiredUtc - actualUtc)) + } + + return guess +} + +export function getZonedDayBounds( + now: Date, + timeZone: string, +): { startsAt: Date; resetsAt: Date } { + const nowParts = getZonedParts(now, timeZone) + const today = { + year: nowParts.year, + month: nowParts.month, + day: nowParts.day, + } + const tomorrow = addDaysToYmd(today.year, today.month, today.day, 1) + + return { + startsAt: getUtcForZonedTime(today, timeZone, 0, 0), + resetsAt: getUtcForZonedTime(tomorrow, timeZone, 0, 0), + } +} diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index 9ba7354ec..a4a74468b 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -162,6 +162,10 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r | `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | | `SESSION_GRACE_MS` | `web/src/server/free-session/config.ts` | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | +### Premium Session Quota + +DeepSeek, Kimi, and legacy GLM share a per-user premium quota. The server counts `free_session_admit` rows from the last midnight in `America/Los_Angeles`; when the user reaches `FREEBUFF_PREMIUM_SESSION_LIMIT`, the next premium `POST /session` is rejected until the next Pacific midnight reset. MiniMax remains unlimited. + ## HTTP API All endpoints authenticate via the standard `Authorization: Bearer ` or `x-codebuff-api-key` header. diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts index ee4f32509..79357c2b6 100644 --- a/packages/internal/src/db/schema.ts +++ b/packages/internal/src/db/schema.ts @@ -911,9 +911,9 @@ export const freeSession = pgTable( /** * Audit log of every admission — one row per queued→active transition. Used - * to track shared premium-session usage for Freebuff's 5 sessions / 20h - * allowance. `session_units` starts at 1.0 and may be reduced when users end - * active sessions early. + * to track shared premium-session usage for Freebuff's 5 sessions per Pacific + * day allowance. `session_units` starts at 1.0 and may be reduced when users + * end active sessions early. * * Separate from `free_session` because that table is one-row-per-user (state, * not history); the UPSERT path there would otherwise destroy prior admissions. diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index d29c2cb1f..2ac2ad75a 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -23,6 +23,19 @@ import type { InternalSessionRow } from '../types' const SESSION_LEN = 60 * 60 * 1000 const GRACE_MS = 30 * 60 * 1000 const DEFAULT_MODEL = 'minimax/minimax-m2.7' +const DEFAULT_PREMIUM_RESET_AT = '2026-04-18T07:00:00.000Z' + +function expectedRateLimit(model: string, recentCount: number) { + return { + model, + limit: FREEBUFF_PREMIUM_SESSION_LIMIT, + period: 'pacific_day', + resetTimeZone: 'America/Los_Angeles', + resetAt: DEFAULT_PREMIUM_RESET_AT, + windowHours: FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS, + recentCount, + } as const +} interface AdmitRecord { user_id: string @@ -269,12 +282,7 @@ describe('requestSession', () => { expect(state.status).toBe('queued') if (state.status !== 'queued') throw new Error('unreachable') expect(deps.rows.get('u1')?.model).toBe(FREEBUFF_GLM_MODEL_ID) - expect(state.rateLimit).toEqual({ - model: FREEBUFF_GLM_MODEL_ID, - limit: FREEBUFF_PREMIUM_SESSION_LIMIT, - windowHours: FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS, - recentCount: 0, - }) + expect(state.rateLimit).toEqual(expectedRateLimit(FREEBUFF_GLM_MODEL_ID, 0)) }) test('legacy GLM 5.1 active session can be reclaimed outside deployment hours', async () => { @@ -299,12 +307,7 @@ describe('requestSession', () => { expect(state.status).toBe('active') if (state.status !== 'active') throw new Error('unreachable') expect(state.instanceId).not.toBe('inst-pre') - expect(state.rateLimit).toEqual({ - model: FREEBUFF_GLM_MODEL_ID, - limit: FREEBUFF_PREMIUM_SESSION_LIMIT, - windowHours: FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS, - recentCount: 0, - }) + expect(state.rateLimit).toEqual(expectedRateLimit(FREEBUFF_GLM_MODEL_ID, 0)) }) test('queued response includes a per-model depth snapshot for the selector', async () => { @@ -432,9 +435,9 @@ describe('requestSession', () => { expect(s3.status).toBe('active') }) - // Per-user premium session limit (5 units per 20h) — the wire limit is - // hard-coded in public-api.ts, so tests seed the fake admit log directly - // rather than configuring it. + // Per-user premium session limit (5 units per Pacific day) — the wire + // limit is hard-coded in public-api.ts, so tests seed the fake admit log + // directly rather than configuring it. const PREMIUM_MODEL = FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID const KIMI_MODEL = FREEBUFF_KIMI_MODEL_ID const PREMIUM_LIMIT = FREEBUFF_PREMIUM_SESSION_LIMIT @@ -448,7 +451,7 @@ describe('requestSession', () => { deps.admits.push({ user_id: 'u1', model: i === 0 ? KIMI_MODEL : PREMIUM_MODEL, - admitted_at: new Date(now.getTime() - (19 - i) * 60 * 60 * 1000), + admitted_at: new Date(now.getTime() - i * 60 * 60 * 1000), }) } @@ -463,17 +466,38 @@ describe('requestSession', () => { expect(state.limit).toBe(PREMIUM_LIMIT) expect(state.windowHours).toBe(PREMIUM_WINDOW_HOURS) expect(state.recentCount).toBe(PREMIUM_LIMIT) - expect(state.retryAfterMs).toBe(60 * 60 * 1000) + expect(state.retryAfterMs).toBe(15 * 60 * 60 * 1000) expect(deps.rows.has('u1')).toBe(false) }) - test('rate_limited: DeepSeek admit outside 20h window does not count', async () => { - deps._tick(PREMIUM_OPEN_TIME) + test('rate_limited: reset follows Pacific midnight across DST changes', async () => { + deps._tick(new Date('2026-03-08T09:00:00Z')) const now = deps._now() + for (let i = 0; i < PREMIUM_LIMIT; i++) { + deps.admits.push({ + user_id: 'u1', + model: PREMIUM_MODEL, + admitted_at: new Date(now.getTime() - i * 60_000), + }) + } + + const state = await requestSession({ + userId: 'u1', + model: PREMIUM_MODEL, + deps, + }) + + expect(state.status).toBe('rate_limited') + if (state.status !== 'rate_limited') throw new Error('unreachable') + expect(state.retryAfterMs).toBe(22 * 60 * 60 * 1000) + }) + + test('rate_limited: DeepSeek admit before Pacific midnight does not count', async () => { + deps._tick(PREMIUM_OPEN_TIME) deps.admits.push({ user_id: 'u1', model: PREMIUM_MODEL, - admitted_at: new Date(now.getTime() - 21 * 60 * 60 * 1000), + admitted_at: new Date('2026-04-17T06:59:00Z'), }) const state = await requestSession({ @@ -483,21 +507,15 @@ describe('requestSession', () => { }) expect(state.status).toBe('queued') if (state.status !== 'queued') throw new Error('unreachable') - expect(state.rateLimit).toEqual({ - model: PREMIUM_MODEL, - limit: PREMIUM_LIMIT, - windowHours: PREMIUM_WINDOW_HOURS, - recentCount: 0, - }) + expect(state.rateLimit).toEqual(expectedRateLimit(PREMIUM_MODEL, 0)) }) - test('rate_limited: 5th Kimi admit in window blocks the 6th attempt', async () => { + test('rate_limited: 5th Kimi admit today blocks the 6th attempt', async () => { deps._tick(PREMIUM_OPEN_TIME) - // Seed 5 admits inside the 20h window, spaced so we can verify retryAfter - // points at the oldest one sliding off. + // Seed 5 admits inside today's Pacific day. retryAfter points at the + // next Pacific midnight reset, not the oldest admit. const now = deps._now() - // Oldest: 19h ago (still in window). Next 4: 1h, 2h, 3h, 4h ago. - const ages = [19, 4, 3, 2, 1] + const ages = [8, 4, 3, 2, 1] for (const hoursAgo of ages) { deps.admits.push({ user_id: 'u1', @@ -517,8 +535,7 @@ describe('requestSession', () => { expect(state.limit).toBe(PREMIUM_LIMIT) expect(state.windowHours).toBe(PREMIUM_WINDOW_HOURS) expect(state.recentCount).toBe(PREMIUM_LIMIT) - // Oldest admit is 19h ago; slot opens when it hits 20h, i.e. in 1h. - expect(state.retryAfterMs).toBe(60 * 60 * 1000) + expect(state.retryAfterMs).toBe(15 * 60 * 60 * 1000) // Blocked before any row is written — the user doesn't take a queue slot. expect(deps.rows.has('u1')).toBe(false) }) @@ -546,17 +563,13 @@ describe('requestSession', () => { expect(state.windowHours).toBe(PREMIUM_WINDOW_HOURS) }) - test('rate_limited: admits outside the 20h window do not count', async () => { + test("rate_limited: admits before today's Pacific reset do not count", async () => { deps._tick(PREMIUM_OPEN_TIME) - // 5 admits, each just over 20h old → all fall off the window. - const now = deps._now() for (let i = 0; i < 5; i++) { deps.admits.push({ user_id: 'u1', model: PREMIUM_MODEL, - admitted_at: new Date( - now.getTime() - (PREMIUM_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i), - ), + admitted_at: new Date(`2026-04-17T06:5${i}:00Z`), }) } const state = await requestSession({ @@ -592,7 +605,7 @@ describe('requestSession', () => { test('queued DeepSeek response carries the current admit count', async () => { deps._tick(PREMIUM_OPEN_TIME) const now = deps._now() - // 2 admits in the window — under the limit so the user still queues. + // 2 admits today — under the limit so the user still queues. deps.admits.push({ user_id: 'u1', model: PREMIUM_MODEL, @@ -609,12 +622,7 @@ describe('requestSession', () => { deps, }) if (state.status !== 'queued') throw new Error('unreachable') - expect(state.rateLimit).toEqual({ - model: PREMIUM_MODEL, - limit: PREMIUM_LIMIT, - windowHours: PREMIUM_WINDOW_HOURS, - recentCount: 2, - }) + expect(state.rateLimit).toEqual(expectedRateLimit(PREMIUM_MODEL, 2)) }) test('rate_limited: fractional premium usage under the cap can start another session', async () => { @@ -623,7 +631,7 @@ describe('requestSession', () => { deps.admits.push({ user_id: 'u1', model: KIMI_MODEL, - admitted_at: new Date(now.getTime() - 19 * 60 * 60 * 1000), + admitted_at: new Date(now.getTime() - 8 * 60 * 60 * 1000), session_units: 0.9, }) for (let i = 0; i < 4; i++) { @@ -655,7 +663,7 @@ describe('requestSession', () => { const now = deps._now() // Seed 5 prior admits (the cap), with the latest one matching the // active row we're about to install. - const ages = [19, 4, 3, 2, 0] + const ages = [8, 4, 3, 2, 0] for (const hoursAgo of ages) { deps.admits.push({ user_id: 'u1', @@ -685,7 +693,7 @@ describe('requestSession', () => { }) expect(state.status).toBe('active') if (state.status !== 'active') throw new Error('unreachable') - // Instance id rotated; quota snapshot still reflects the full window. + // Instance id rotated; quota snapshot still reflects today's usage. expect(state.instanceId).not.toBe('inst-pre') expect(state.rateLimit?.recentCount).toBe(PREMIUM_LIMIT) }) @@ -736,7 +744,7 @@ describe('requestSession', () => { // must be blocked by the quota. deps._tick(PREMIUM_OPEN_TIME) const now = deps._now() - const ages = [19, 4, 3, 2, 1] + const ages = [8, 4, 3, 2, 1] for (const hoursAgo of ages) { deps.admits.push({ user_id: 'u1', @@ -767,7 +775,7 @@ describe('requestSession', () => { test('instant-admit bumps the quota count for the freshly-written admit row', async () => { const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 }) admitDeps._tick(PREMIUM_OPEN_TIME) - // 1 existing admit in the window; this new call should instant-admit and + // 1 existing admit today; this new call should instant-admit and // write a second row, so the response's recentCount reflects 2. const now = admitDeps._now() admitDeps.admits.push({ @@ -816,7 +824,7 @@ describe('getSessionState', () => { deps.admits.push({ user_id: 'u1', model: FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, - admitted_at: new Date(now.getTime() - 19 * 60 * 60 * 1000), + admitted_at: new Date(now.getTime() - 60 * 60 * 1000), }) const state = await getSessionState({ userId: 'u1', deps }) @@ -824,12 +832,7 @@ describe('getSessionState', () => { if (state.status !== 'none') throw new Error('unreachable') expect( state.rateLimitsByModel?.[FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID], - ).toEqual({ - model: FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, - limit: FREEBUFF_PREMIUM_SESSION_LIMIT, - windowHours: FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS, - recentCount: 1, - }) + ).toEqual(expectedRateLimit(FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, 1)) }) test('active session with matching instance id returns active', async () => { @@ -891,12 +894,9 @@ describe('getSessionState', () => { deps, }) if (state.status !== 'active') throw new Error('unreachable') - expect(state.rateLimit).toEqual({ - model: FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, - limit: FREEBUFF_PREMIUM_SESSION_LIMIT, - windowHours: FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS, - recentCount: 1, - }) + expect(state.rateLimit).toEqual( + expectedRateLimit(FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, 1), + ) }) test('active session only fetches one shared premium quota snapshot', async () => { diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index a1a065abe..59af4db81 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -4,13 +4,16 @@ import { FREEBUFF_DEPLOYMENT_HOURS_LABEL, FREEBUFF_GEMINI_PRO_MODEL_ID, FREEBUFF_PREMIUM_MODEL_IDS, + FREEBUFF_PREMIUM_SESSION_PERIOD, FREEBUFF_PREMIUM_SESSION_LIMIT, + FREEBUFF_PREMIUM_SESSION_RESET_TIMEZONE, FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS, isFreebuffModelAvailable, isFreebuffPremiumModelId, isSupportedFreebuffModelId, resolveSupportedFreebuffModel, } from '@codebuff/common/constants/freebuff-models' +import { getZonedDayBounds } from '@codebuff/common/util/zoned-time' import { getInstantAdmitCapacity, @@ -46,34 +49,15 @@ function roundSessionUnits(units: number): number { return Math.round(units * 10) / 10 } -function getRetryAfterMsForPremiumLimit(params: { - admits: Awaited> - totalUnits: number - targetUnits: number - windowMs: number - now: Date -}): number { - let remainingUnits = params.totalUnits - for (const admit of params.admits) { - remainingUnits = roundSessionUnits(remainingUnits - admit.sessionUnits) - if (remainingUnits <= params.targetUnits) { - return Math.max( - 0, - admit.admittedAt.getTime() + params.windowMs - params.now.getTime(), - ) - } - } - return 0 -} - function canStartPremiumSession(snapshot: FreebuffSessionRateLimit): boolean { return snapshot.recentCount < snapshot.limit } +type PremiumQuotaInfo = Omit + interface PremiumQuotaSnapshot { - recentCount: number - admits: Awaited> - windowMs: number + info: PremiumQuotaInfo + resetsAt: Date } async function fetchPremiumQuotaSnapshot( @@ -81,19 +65,28 @@ async function fetchPremiumQuotaSnapshot( deps: SessionDeps, ): Promise { const now = nowOf(deps) - const windowMs = FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS * 60 * 60 * 1000 - const since = new Date(now.getTime() - windowMs) + const premiumDay = getZonedDayBounds( + now, + FREEBUFF_PREMIUM_SESSION_RESET_TIMEZONE, + ) const admits = await deps.listRecentPremiumAdmits({ userId, - since, + since: premiumDay.startsAt, models: FREEBUFF_PREMIUM_MODEL_IDS, }) + const recentCount = roundSessionUnits( + admits.reduce((sum, admit) => sum + admit.sessionUnits, 0), + ) return { - recentCount: roundSessionUnits( - admits.reduce((sum, admit) => sum + admit.sessionUnits, 0), - ), - admits, - windowMs, + info: { + limit: FREEBUFF_PREMIUM_SESSION_LIMIT, + period: FREEBUFF_PREMIUM_SESSION_PERIOD, + resetTimeZone: FREEBUFF_PREMIUM_SESSION_RESET_TIMEZONE, + resetAt: premiumDay.resetsAt.toISOString(), + windowHours: FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS, + recentCount, + }, + resetsAt: premiumDay.resetsAt, } } @@ -103,9 +96,7 @@ function toRateLimitInfo( ): FreebuffSessionRateLimit { return { model, - limit: FREEBUFF_PREMIUM_SESSION_LIMIT, - windowHours: FREEBUFF_PREMIUM_SESSION_WINDOW_HOURS, - recentCount: snapshot.recentCount, + ...snapshot.info, } } @@ -120,8 +111,7 @@ async function fetchRateLimitSnapshot( ): Promise< | { info: FreebuffSessionRateLimit - admits: Awaited> - windowMs: number + resetsAt: Date } | undefined > { @@ -129,8 +119,7 @@ async function fetchRateLimitSnapshot( const snapshot = await fetchPremiumQuotaSnapshot(userId, deps) return { info: toRateLimitInfo(model, snapshot), - admits: snapshot.admits, - windowMs: snapshot.windowMs, + resetsAt: snapshot.resetsAt, } } @@ -185,7 +174,8 @@ export interface SessionDeps { * bound to a given model. Compared against the model's configured * `instantAdmitCapacity` to decide whether a new joiner skips the queue. */ activeCountForModel: (model: string) => Promise - /** Rate-limit helper: oldest-first premium admissions inside the window. */ + /** Rate-limit helper: oldest-first premium admissions since today's + * Pacific midnight reset. */ listRecentPremiumAdmits: (params: { userId: string models: readonly string[] @@ -271,11 +261,14 @@ export type RequestSessionResult = requestedModel: string } | { - /** User has hit the per-model admission quota in the rolling window. + /** User has hit the per-model admission quota for the current Pacific day. * See `FreebuffSessionServerResponse`'s `rate_limited` variant. */ status: 'rate_limited' model: string limit: number + period: 'pacific_day' + resetTimeZone: string + resetAt: string windowHours: number recentCount: number retryAfterMs: number @@ -328,8 +321,8 @@ export async function requestSession(params: { } // Rate-limit check runs before joinOrTakeOver so heavy users never even - // create a queued row. Premium models share one 20h session-unit pool; - // Minimax falls through unchanged as unlimited. + // create a queued row. Premium models share one daily Pacific-time + // session-unit pool; Minimax falls through unchanged as unlimited. // // Takeover/reclaim exception: a user who already holds a queued or // active+unexpired row on this same model is re-anchoring (CLI restart, @@ -357,19 +350,13 @@ export async function requestSession(params: { if (!isReclaim) { const snapshot = await fetchRateLimitSnapshot(params.userId, model, deps) if (snapshot && !canStartPremiumSession(snapshot.info)) { - const retryAfterMs = getRetryAfterMsForPremiumLimit({ - admits: snapshot.admits, - totalUnits: snapshot.info.recentCount, - targetUnits: snapshot.info.limit, - windowMs: snapshot.windowMs, - now, - }) + const retryAfterMs = Math.max( + 0, + snapshot.resetsAt.getTime() - now.getTime(), + ) return { + ...snapshot.info, status: 'rate_limited', - model, - limit: snapshot.info.limit, - windowHours: snapshot.info.windowHours, - recentCount: snapshot.info.recentCount, retryAfterMs, } }