Skip to content

Commit 319b6c5

Browse files
committed
feat(chutes): switch to server-side subscription_usage API for quota tracking
Replace legacy request-count quota tracking with the new /users/me/subscription_usage endpoint which returns live dollar-based usage for both monthly and 4-hour rolling windows. Key changes: - fetch_subscription_usage() replaces fetch_legacy_quota() - Background job pushes 4-hour window (tighter constraint) to UsageManager - API values are authoritative (force=True), no more local estimation - Window config changed from 30-day to 4-hour to match API enforcement - Removed hardcoded monthly grant - API provides actual cap dynamically
1 parent 3d769ad commit 319b6c5

2 files changed

Lines changed: 521 additions & 247 deletions

File tree

Lines changed: 209 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,172 @@
11
# SPDX-License-Identifier: LGPL-3.0-only
22
# Copyright (c) 2026 Mirrowel
33

4+
"""
5+
Chutes Provider
6+
7+
Provider for Chutes (https://chutes.ai).
8+
OpenAI-compatible API with dollar-based subscription quota tracking.
9+
10+
Features:
11+
- Dynamic model discovery from /v1/models endpoint
12+
- Per-model pricing cached from models API for accurate cost tracking
13+
- Server-side dollar-based usage tracking via /users/me/subscription_usage
14+
- Monthly and 4-hour rolling window enforcement
15+
16+
Quota system:
17+
Chutes subscription plans include a PAYGO-equivalent allowance of 5×
18+
the subscription price. Limits are enforced across both a monthly window
19+
and a 4-hour rolling window.
20+
21+
$10/mo → $50 monthly cap → $4.17 per 4 h
22+
$15/mo → $75 monthly cap → $1.25 per 4 h
23+
$50/mo → $250 monthly cap → $4.17 per 4 h
24+
$100/mo → $500 monthly cap → $8.33 per 4 h
25+
26+
The /users/me/subscription_usage endpoint returns live dollar usage for
27+
both windows, eliminating the need for local cost estimation.
28+
29+
Environment variables:
30+
CHUTES_API_KEY_1=<api_key>
31+
CHUTES_QUOTA_REFRESH_INTERVAL=300 # optional, seconds
32+
"""
33+
434
import asyncio
535
import httpx
636
import os
37+
import logging
738
from typing import Any, Dict, List, Optional, TYPE_CHECKING
8-
from .provider_interface import ProviderInterface, UsageResetConfigDef
9-
from .utilities.chutes_quota_tracker import ChutesQuotaTracker
1039

1140
if TYPE_CHECKING:
1241
from ..usage import UsageManager
1342

14-
# Create a local logger for this module
15-
import logging
43+
from .provider_interface import ProviderInterface, UsageResetConfigDef
44+
from .utilities.chutes_quota_tracker import ChutesQuotaTracker, CENTS_PER_DOLLAR
1645

1746
lib_logger = logging.getLogger("rotator_library")
1847

19-
# Concurrency limit for parallel quota fetches
20-
QUOTA_FETCH_CONCURRENCY = 5
48+
# Concurrency limit for parallel balance fetches
49+
BALANCE_FETCH_CONCURRENCY = 5
2150

2251

2352
class ChutesProvider(ChutesQuotaTracker, ProviderInterface):
2453
"""
25-
Provider implementation for the chutes.ai API with quota tracking.
54+
Provider implementation for the chutes.ai API with dollar-based quota tracking.
55+
56+
All models share the same credential-level credit balance pool.
57+
Cost is calculated from per-model pricing cached from the /v1/models API.
58+
Usage caps are tracked server-side and fetched via subscription_usage API.
2659
"""
2760

61+
# Cost is calculated via our own calculate_cost() method using cached
62+
# per-model pricing from the Chutes API. The executor calls
63+
# plugin.calculate_cost() first, then falls back to LiteLLM (which
64+
# has no Chutes pricing) — so we must NOT set skip_cost_calculation
65+
# to True, or the executor would skip our calculator too.
66+
skip_cost_calculation = False
67+
68+
# =========================================================================
69+
# PROVIDER CONFIGURATION
70+
# =========================================================================
71+
2872
# Enable environment variable overrides (e.g., QUOTA_GROUPS_CHUTES_GLOBAL)
2973
provider_env_name = "chutes"
3074

31-
# Quota groups for tracking daily limits
32-
# Uses a virtual model "_quota" for credential-level quota tracking
75+
# Single quota group: all models share the same credit balance.
76+
# Named 'credits($)' so the TUI shows a human-readable dollar label.
3377
model_quota_groups = {
34-
"chutes_global": ["_quota"],
78+
"credits($)": ["_balance"],
3579
}
3680

37-
# Usage reset configuration for daily quota
81+
# 4-hour rolling window — the tighter of the two enforced windows.
82+
# Monthly usage is also tracked by the API but the 4-hour window is the
83+
# one that actually constrains usage in practice.
3884
usage_reset_configs = {
3985
"default": UsageResetConfigDef(
40-
window_seconds=86400, # 24 hours (daily quota reset)
86+
window_seconds=14400, # 4 hours
4187
mode="per_model",
42-
description="Chutes daily quota",
43-
field_name="daily",
88+
description="Chutes 4-hour credit window",
89+
field_name="4h",
4490
)
4591
}
4692

4793
def __init__(self, *args, **kwargs):
48-
"""Initialize ChutesProvider with quota tracking."""
94+
"""Initialize ChutesProvider with dollar-based quota tracking."""
4995
super().__init__(*args, **kwargs)
5096

51-
# Quota tracking cache and refresh interval
52-
self._quota_cache: Dict[str, Dict[str, Any]] = {}
97+
# Model pricing cache: model_id → {input, output, input_cache_read}
98+
self._pricing_cache: Dict[str, Dict[str, float]] = {}
99+
100+
# Balance cache: credential_identifier → balance data dict
101+
self._balance_cache: Dict[str, Dict[str, Any]] = {}
102+
53103
self._quota_refresh_interval: int = int(
54104
os.environ.get("CHUTES_QUOTA_REFRESH_INTERVAL", "300")
55105
)
56106

107+
# =========================================================================
108+
# USAGE TRACKING CONFIGURATION
109+
# =========================================================================
110+
111+
def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
112+
"""
113+
Return usage reset configuration for Chutes credentials.
114+
115+
Uses per_model mode with a 4-hour window to match the tighter
116+
rolling window enforced by the API.
117+
"""
118+
return {
119+
"mode": "per_model",
120+
"window_seconds": 14400, # 4 hours
121+
}
122+
123+
# =========================================================================
124+
# QUOTA GROUPING
125+
# =========================================================================
126+
57127
def get_model_quota_group(self, model: str) -> Optional[str]:
58128
"""
59129
Get the quota group for a model.
60130
61-
All Chutes models share the same credential-level quota pool,
131+
All Chutes models share the same credential-level credit balance pool,
62132
so they all belong to the same quota group.
63133
64134
Args:
65-
model: Model name (ignored - all models share quota)
135+
model: Model name (ignored — all models share one balance)
136+
137+
Returns:
138+
Quota group name
139+
"""
140+
return "credits($)"
141+
142+
def get_models_in_quota_group(self, group: str) -> List[str]:
143+
"""
144+
Return all models belonging to the given quota group.
145+
146+
Args:
147+
group: Quota group identifier
66148
67149
Returns:
68-
Quota group identifier for shared credential-level tracking
150+
List of model names in the group
69151
"""
70-
return "chutes_global"
152+
if group == "credits($)":
153+
return ["_balance"]
154+
return []
155+
156+
def get_quota_groups(self) -> List[str]:
157+
"""Return the list of quota groups for this provider."""
158+
return ["credits($)"]
159+
160+
# =========================================================================
161+
# MODEL DISCOVERY
162+
# =========================================================================
71163

72164
async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
73165
"""
74166
Fetch available models from the Chutes API.
75167
168+
Also caches per-model pricing for cost calculation.
169+
76170
Args:
77171
api_key: Chutes API key
78172
client: HTTP client
@@ -86,9 +180,61 @@ async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]
86180
headers={"Authorization": f"Bearer {api_key}"},
87181
)
88182
response.raise_for_status()
89-
return [
90-
f"chutes/{model['id']}" for model in response.json().get("data", [])
91-
]
183+
data = response.json()
184+
185+
models = []
186+
for model_data in data.get("data", []):
187+
model_id = model_data.get("id", "")
188+
if model_id:
189+
models.append(f"chutes/{model_id}")
190+
191+
# Cache pricing while we're at it
192+
price_info = model_data.get("pricing") or model_data.get(
193+
"price", {}
194+
)
195+
if price_info:
196+
if "prompt" in price_info:
197+
self._pricing_cache[model_id] = {
198+
"input": float(price_info.get("prompt", 0.0)),
199+
"output": float(price_info.get("completion", 0.0)),
200+
"input_cache_read": float(
201+
price_info.get(
202+
"input_cache_read",
203+
float(price_info.get("prompt", 0.0)) * 0.5,
204+
)
205+
),
206+
}
207+
elif "input" in price_info:
208+
input_data = price_info.get("input", {})
209+
output_data = price_info.get("output", {})
210+
cache_data = price_info.get("input_cache_read", {})
211+
input_cost = float(
212+
input_data.get("usd", 0.0)
213+
if isinstance(input_data, dict)
214+
else input_data
215+
)
216+
output_cost = float(
217+
output_data.get("usd", 0.0)
218+
if isinstance(output_data, dict)
219+
else output_data
220+
)
221+
cache_cost = float(
222+
cache_data.get("usd", input_cost * 0.5)
223+
if isinstance(cache_data, dict)
224+
else (cache_data if cache_data else input_cost * 0.5)
225+
)
226+
self._pricing_cache[model_id] = {
227+
"input": input_cost,
228+
"output": output_cost,
229+
"input_cache_read": cache_cost,
230+
}
231+
232+
if self._pricing_cache:
233+
lib_logger.info(
234+
f"Cached pricing for {len(self._pricing_cache)} Chutes models"
235+
)
236+
237+
return models
92238
except (httpx.RequestError, httpx.HTTPStatusError) as e:
93239
lib_logger.error(f"Failed to fetch chutes.ai models: {e}")
94240
return []
@@ -98,15 +244,10 @@ async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]
98244
# =========================================================================
99245

100246
def get_background_job_config(self) -> Optional[Dict[str, Any]]:
101-
"""
102-
Configure periodic quota usage refresh.
103-
104-
Returns:
105-
Background job configuration for quota refresh
106-
"""
247+
"""Configure periodic credit balance refresh."""
107248
return {
108249
"interval": self._quota_refresh_interval,
109-
"name": "chutes_quota_refresh",
250+
"name": "chutes_balance_refresh",
110251
"run_on_start": True,
111252
}
112253

@@ -116,55 +257,61 @@ async def run_background_job(
116257
credentials: List[str],
117258
) -> None:
118259
"""
119-
Refresh quota usage for all credentials in parallel.
260+
Refresh credit balance for all credentials from the subscription API.
261+
262+
Fetches live dollar usage from /users/me/subscription_usage and pushes
263+
both the 4-hour window (as the primary tracked window) and monthly cap
264+
data to the UsageManager.
120265
121266
Args:
122267
usage_manager: UsageManager instance
123268
credentials: List of API keys
124269
"""
125-
semaphore = asyncio.Semaphore(QUOTA_FETCH_CONCURRENCY)
270+
semaphore = asyncio.Semaphore(BALANCE_FETCH_CONCURRENCY)
126271

127-
async def refresh_single_credential(
128-
api_key: str, client: httpx.AsyncClient
129-
) -> None:
272+
async def refresh_single(api_key: str, client: httpx.AsyncClient) -> None:
130273
async with semaphore:
131274
try:
132-
usage_data = await self.fetch_quota_usage(api_key, client)
133-
134-
if usage_data.get("status") == "success":
135-
# Update quota cache
136-
self._quota_cache[api_key] = usage_data
275+
balance_data = await self.refresh_balance(
276+
api_key,
277+
credential_identifier=api_key,
278+
client=client,
279+
)
137280

138-
# Calculate values for usage manager
139-
remaining_fraction = usage_data.get("remaining_fraction", 0.0)
140-
quota = usage_data.get("quota", 0)
141-
reset_ts = usage_data.get("reset_at")
142-
143-
# Store baseline in usage manager
144-
# Since Chutes uses credential-level quota, we use a virtual model name
145-
quota_used = (
146-
int((1.0 - remaining_fraction) * quota) if quota > 0 else 0
281+
if balance_data.get("status") == "success":
282+
# Push 4-hour window data (the tighter constraint)
283+
four_hour_cap_cents = balance_data.get(
284+
"four_hour_cap_cents", 0
285+
)
286+
four_hour_used_cents = balance_data.get(
287+
"four_hour_used_cents", 0
147288
)
289+
148290
await usage_manager.update_quota_baseline(
149291
api_key,
150-
"chutes/_quota", # Virtual model for credential-level tracking
151-
quota_max_requests=quota,
152-
quota_reset_ts=reset_ts,
153-
quota_used=quota_used,
292+
"chutes/_balance",
293+
quota_max_requests=four_hour_cap_cents,
294+
quota_reset_ts=None,
295+
quota_used=four_hour_used_cents,
296+
force=True, # API values are authoritative
154297
)
155298

299+
monthly = balance_data.get("monthly", {})
300+
four_hour = balance_data.get("four_hour", {})
156301
lib_logger.debug(
157-
f"Updated Chutes quota baseline for credential: "
158-
f"{usage_data['remaining']:.0f}/{quota} remaining "
159-
f"({remaining_fraction * 100:.0f}%)"
302+
f"Updated Chutes balance baseline: "
303+
f"4h=${four_hour.get('usage', 0):.4f}/"
304+
f"${four_hour.get('cap', 0):.2f}, "
305+
f"monthly=${monthly.get('usage', 0):.4f}/"
306+
f"${monthly.get('cap', 0):.2f}, "
307+
f"models_priced={len(self._pricing_cache)}"
160308
)
161309

162310
except Exception as e:
163-
lib_logger.warning(f"Failed to refresh Chutes quota usage: {e}")
311+
lib_logger.warning(
312+
f"Failed to refresh Chutes balance: {e}"
313+
)
164314

165-
# Fetch all credentials in parallel with shared HTTP client
166315
async with httpx.AsyncClient(timeout=30.0) as client:
167-
tasks = [
168-
refresh_single_credential(api_key, client) for api_key in credentials
169-
]
316+
tasks = [refresh_single(api_key, client) for api_key in credentials]
170317
await asyncio.gather(*tasks, return_exceptions=True)

0 commit comments

Comments
 (0)