Skip to content

Commit 6e51874

Browse files
committed
feat(chutes): sliding window tracking with snapshot + local delta
The 4-hour window is a true sliding window (usage ages out), not a fixed window with resets. This requires the API to be authoritative (force=True) so aging is reflected. Key changes: - force=True: API values overwrite local state each refresh so that usage going DOWN (aging) is correctly reflected - int() → round(): Fix precision loss that truncated sub-cent API values to 0 (e.g. $0.007 → 0 cents, now correctly rounds to 1) - Local cost delta: Track per-request costs between API refreshes in _cost_since_refresh accumulator, added to API baseline on push - 60s refresh interval (was 300s) for responsiveness - record_request_cost() hook on provider, called by executor after calculate_cost() for both streaming and non-streaming paths
1 parent bbd0f64 commit 6e51874

4 files changed

Lines changed: 91 additions & 16 deletions

File tree

src/rotator_library/client/executor.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,16 @@ async def _execute_non_streaming(
593593
response
594594
)
595595

596+
# Notify plugin of request cost (for local delta tracking)
597+
if (
598+
approx_cost > 0
599+
and plugin
600+
and hasattr(plugin, "record_request_cost")
601+
):
602+
plugin.record_request_cost(
603+
cred, model, approx_cost
604+
)
605+
596606
cred_context.mark_success(
597607
response=response,
598608
prompt_tokens=prompt_tokens,
@@ -803,6 +813,9 @@ async def _execute_streaming(
803813

804814
# Hand off to streaming handler with cred_context
805815
# The handler will call mark_success on completion
816+
cost_recorder = None
817+
if plugin and hasattr(plugin, "record_request_cost"):
818+
cost_recorder = plugin.record_request_cost
806819
base_stream = self._streaming_handler.wrap_stream(
807820
stream,
808821
cred,
@@ -811,6 +824,7 @@ async def _execute_streaming(
811824
cred_context,
812825
skip_cost_calculation=skip_cost_calculation,
813826
cost_calculator=cost_calculator,
827+
cost_recorder=cost_recorder,
814828
)
815829

816830
lib_logger.info(

src/rotator_library/client/streaming.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ async def wrap_stream(
4949
cred_context: Optional["CredentialContext"] = None,
5050
skip_cost_calculation: bool = False,
5151
cost_calculator: Optional[Callable[[str, int, int], float]] = None,
52+
cost_recorder: Optional[Callable[[str, str, float], None]] = None,
5253
) -> AsyncGenerator[str, None]:
5354
"""
5455
Wrap a LiteLLM stream with error handling and usage tracking.
@@ -260,6 +261,12 @@ async def wrap_stream(
260261
prompt_tokens_cache_write=prompt_tokens_cache_write,
261262
approx_cost=approx_cost,
262263
)
264+
# Notify plugin of request cost (for local delta tracking)
265+
if approx_cost > 0 and cost_recorder:
266+
try:
267+
cost_recorder(credential, model, approx_cost)
268+
except Exception:
269+
pass
263270

264271
# Yield [DONE] for completed streams
265272
yield "data: [DONE]\n\n"

src/rotator_library/providers/chutes_provider.py

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,14 @@ def __init__(self, *args, **kwargs):
101101
self._balance_cache: Dict[str, Dict[str, Any]] = {}
102102

103103
self._quota_refresh_interval: int = int(
104-
os.environ.get("CHUTES_QUOTA_REFRESH_INTERVAL", "300")
104+
os.environ.get("CHUTES_QUOTA_REFRESH_INTERVAL", "60")
105105
)
106106

107+
# Local cost delta tracking between API refreshes.
108+
# Maps credential → dollars spent since last refresh.
109+
# Reset to 0 on each successful API refresh.
110+
self._cost_since_refresh: Dict[str, float] = {}
111+
107112
# =========================================================================
108113
# USAGE TRACKING CONFIGURATION
109114
# =========================================================================
@@ -120,6 +125,29 @@ def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
120125
"window_seconds": 14400, # 4 hours
121126
}
122127

128+
def record_request_cost(
129+
self,
130+
credential: str,
131+
model: str,
132+
cost_dollars: float,
133+
) -> None:
134+
"""
135+
Record the cost of a completed request for local delta tracking.
136+
137+
Called by the executor after `calculate_cost()` returns a non-zero
138+
value. This accumulates local cost between API refreshes so the
139+
display stays responsive even with the 60 s refresh interval.
140+
141+
Args:
142+
credential: API key that was used
143+
model: Model name (unused, all models share the balance)
144+
cost_dollars: Cost of the request in USD
145+
"""
146+
if cost_dollars > 0:
147+
self._cost_since_refresh[credential] = (
148+
self._cost_since_refresh.get(credential, 0.0) + cost_dollars
149+
)
150+
123151
# =========================================================================
124152
# QUOTA GROUPING
125153
# =========================================================================
@@ -281,30 +309,44 @@ async def refresh_single(api_key: str, client: httpx.AsyncClient) -> None:
281309
)
282310

283311
if balance_data.get("status") == "success":
312+
# API is authoritative for the sliding window.
313+
# Usage can go DOWN as old spending ages out,
314+
# so we must use force=True.
315+
316+
# Add local cost delta accumulated since last refresh
317+
local_delta_dollars = self._cost_since_refresh.get(
318+
api_key, 0.0
319+
)
320+
local_delta_cents = int(round(
321+
local_delta_dollars * CENTS_PER_DOLLAR
322+
))
323+
284324
# Push 4-hour window data (tighter constraint)
325+
four_hour_used_cents = (
326+
balance_data.get("four_hour_used_cents", 0)
327+
+ local_delta_cents
328+
)
285329
four_hour_cap_cents = balance_data.get(
286330
"four_hour_cap_cents", 0
287331
)
288-
four_hour_used_cents = balance_data.get(
289-
"four_hour_used_cents", 0
290-
)
291332

292333
await usage_manager.update_quota_baseline(
293334
api_key,
294335
"chutes/_balance_4h",
295336
quota_max_requests=four_hour_cap_cents,
296337
quota_reset_ts=None,
297338
quota_used=four_hour_used_cents,
298-
force=False, # Keep max(local, api) — API lags behind
339+
force=True, # API is authoritative (sliding window)
299340
)
300341

301342
# Push monthly window data (overall budget)
343+
monthly_used_cents = (
344+
balance_data.get("monthly_used_cents", 0)
345+
+ local_delta_cents
346+
)
302347
monthly_cap_cents = balance_data.get(
303348
"monthly_cap_cents", 0
304349
)
305-
monthly_used_cents = balance_data.get(
306-
"monthly_used_cents", 0
307-
)
308350

309351
await usage_manager.update_quota_baseline(
310352
api_key,
@@ -313,15 +355,19 @@ async def refresh_single(api_key: str, client: httpx.AsyncClient) -> None:
313355
quota_reset_ts=None,
314356
quota_used=monthly_used_cents,
315357
quota_group="monthly($)",
316-
force=False, # Keep max(local, api) — API lags behind
358+
force=True, # API is authoritative
317359
)
318360

361+
# Reset local delta — the API now includes these costs
362+
self._cost_since_refresh[api_key] = 0.0
363+
319364
monthly = balance_data.get("monthly", {})
320365
four_hour = balance_data.get("four_hour", {})
321366
lib_logger.debug(
322367
f"Updated Chutes balance baseline: "
323368
f"4h=${four_hour.get('usage', 0):.4f}/"
324-
f"${four_hour.get('cap', 0):.2f}, "
369+
f"${four_hour.get('cap', 0):.2f}"
370+
f"(+${local_delta_dollars:.4f} local), "
325371
f"monthly=${monthly.get('usage', 0):.4f}/"
326372
f"${monthly.get('cap', 0):.2f}, "
327373
f"models_priced={len(self._pricing_cache)}"

src/rotator_library/providers/utilities/chutes_quota_tracker.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -376,14 +376,22 @@ async def refresh_balance(
376376
"monthly": monthly,
377377
"four_hour": four_hour,
378378
# Cents-based values for UsageManager compatibility
379-
"monthly_cap_cents": int(monthly.get("cap", 0) * CENTS_PER_DOLLAR),
380-
"monthly_used_cents": int(monthly.get("usage", 0) * CENTS_PER_DOLLAR),
381-
"four_hour_cap_cents": int(
379+
# Use round() to avoid truncating sub-cent usage to 0
380+
"monthly_cap_cents": int(round(
381+
monthly.get("cap", 0) * CENTS_PER_DOLLAR
382+
)),
383+
"monthly_used_cents": int(round(
384+
monthly.get("usage", 0) * CENTS_PER_DOLLAR
385+
)),
386+
"four_hour_cap_cents": int(round(
382387
four_hour.get("cap", 0) * CENTS_PER_DOLLAR
383-
),
384-
"four_hour_used_cents": int(
388+
)),
389+
"four_hour_used_cents": int(round(
385390
four_hour.get("usage", 0) * CENTS_PER_DOLLAR
386-
),
391+
)),
392+
# Raw dollar values for precise logging
393+
"monthly_usage_dollars": monthly.get("usage", 0.0),
394+
"four_hour_usage_dollars": four_hour.get("usage", 0.0),
387395
"pricing_models_cached": len(self._pricing_cache),
388396
"fetched_at": time.time(),
389397
}

0 commit comments

Comments
 (0)