11# SPDX-License-Identifier: LGPL-3.0-only
22# Copyright (c) 2026 Mirrowel
33
4+ """
5+ Chutes Provider
6+
7+ Provider for Chutes (https://chutes.ai).
8+ OpenAI-compatible API with dollar-based subscription quota tracking.
9+
10+ Features:
11+ - Dynamic model discovery from /v1/models endpoint
12+ - Per-model pricing cached from models API for accurate cost tracking
13+ - Server-side dollar-based usage tracking via /users/me/subscription_usage
14+ - Monthly and 4-hour rolling window enforcement
15+
16+ Quota system:
17+ Chutes subscription plans include a PAYGO-equivalent allowance of 5×
18+ the subscription price. Limits are enforced across both a monthly window
19+ and a 4-hour rolling window.
20+
21+ $10/mo → $50 monthly cap → $4.17 per 4 h
22+ $15/mo → $75 monthly cap → $1.25 per 4 h
23+ $50/mo → $250 monthly cap → $4.17 per 4 h
24+ $100/mo → $500 monthly cap → $8.33 per 4 h
25+
26+ The /users/me/subscription_usage endpoint returns live dollar usage for
27+ both windows, eliminating the need for local cost estimation.
28+
29+ Environment variables:
30+ CHUTES_API_KEY_1=<api_key>
31+ CHUTES_QUOTA_REFRESH_INTERVAL=300 # optional, seconds
32+ """
33+
434import asyncio
535import httpx
636import os
37+ import logging
738from typing import Any , Dict , List , Optional , TYPE_CHECKING
8- from .provider_interface import ProviderInterface , UsageResetConfigDef
9- from .utilities .chutes_quota_tracker import ChutesQuotaTracker
1039
1140if TYPE_CHECKING :
1241 from ..usage import UsageManager
1342
14- # Create a local logger for this module
15- import logging
43+ from . provider_interface import ProviderInterface , UsageResetConfigDef
44+ from . utilities . chutes_quota_tracker import ChutesQuotaTracker , CENTS_PER_DOLLAR
1645
1746lib_logger = logging .getLogger ("rotator_library" )
1847
19- # Concurrency limit for parallel quota fetches
20- QUOTA_FETCH_CONCURRENCY = 5
48+ # Concurrency limit for parallel balance fetches
49+ BALANCE_FETCH_CONCURRENCY = 5
2150
2251
2352class ChutesProvider (ChutesQuotaTracker , ProviderInterface ):
2453 """
25- Provider implementation for the chutes.ai API with quota tracking.
54+ Provider implementation for the chutes.ai API with dollar-based quota tracking.
55+
56+ All models share the same credential-level credit balance pool.
57+ Cost is calculated from per-model pricing cached from the /v1/models API.
58+ Usage caps are tracked server-side and fetched via subscription_usage API.
2659 """
2760
61+ # Cost is calculated via our own calculate_cost() method using cached
62+ # per-model pricing from the Chutes API. The executor calls
63+ # plugin.calculate_cost() first, then falls back to LiteLLM (which
64+ # has no Chutes pricing) — so we must NOT set skip_cost_calculation
65+ # to True, or the executor would skip our calculator too.
66+ skip_cost_calculation = False
67+
68+ # =========================================================================
69+ # PROVIDER CONFIGURATION
70+ # =========================================================================
71+
2872 # Enable environment variable overrides (e.g., QUOTA_GROUPS_CHUTES_GLOBAL)
2973 provider_env_name = "chutes"
3074
31- # Quota groups for tracking daily limits
32- # Uses a virtual model "_quota" for credential-level quota tracking
75+ # Single quota group: all models share the same credit balance.
76+ # Named 'credits($)' so the TUI shows a human-readable dollar label.
3377 model_quota_groups = {
34- "chutes_global " : ["_quota " ],
78+ "credits($) " : ["_balance " ],
3579 }
3680
37- # Usage reset configuration for daily quota
81+ # 4-hour rolling window — the tighter of the two enforced windows.
82+ # Monthly usage is also tracked by the API but the 4-hour window is the
83+ # one that actually constrains usage in practice.
3884 usage_reset_configs = {
3985 "default" : UsageResetConfigDef (
40- window_seconds = 86400 , # 24 hours (daily quota reset)
86+ window_seconds = 14400 , # 4 hours
4187 mode = "per_model" ,
42- description = "Chutes daily quota " ,
43- field_name = "daily " ,
88+ description = "Chutes 4-hour credit window " ,
89+ field_name = "4h " ,
4490 )
4591 }
4692
4793 def __init__ (self , * args , ** kwargs ):
48- """Initialize ChutesProvider with quota tracking."""
94+ """Initialize ChutesProvider with dollar-based quota tracking."""
4995 super ().__init__ (* args , ** kwargs )
5096
51- # Quota tracking cache and refresh interval
52- self ._quota_cache : Dict [str , Dict [str , Any ]] = {}
97+ # Model pricing cache: model_id → {input, output, input_cache_read}
98+ self ._pricing_cache : Dict [str , Dict [str , float ]] = {}
99+
100+ # Balance cache: credential_identifier → balance data dict
101+ self ._balance_cache : Dict [str , Dict [str , Any ]] = {}
102+
53103 self ._quota_refresh_interval : int = int (
54104 os .environ .get ("CHUTES_QUOTA_REFRESH_INTERVAL" , "300" )
55105 )
56106
107+ # =========================================================================
108+ # USAGE TRACKING CONFIGURATION
109+ # =========================================================================
110+
111+ def get_usage_reset_config (self , credential : str ) -> Optional [Dict [str , Any ]]:
112+ """
113+ Return usage reset configuration for Chutes credentials.
114+
115+ Uses per_model mode with a 4-hour window to match the tighter
116+ rolling window enforced by the API.
117+ """
118+ return {
119+ "mode" : "per_model" ,
120+ "window_seconds" : 14400 , # 4 hours
121+ }
122+
123+ # =========================================================================
124+ # QUOTA GROUPING
125+ # =========================================================================
126+
57127 def get_model_quota_group (self , model : str ) -> Optional [str ]:
58128 """
59129 Get the quota group for a model.
60130
61- All Chutes models share the same credential-level quota pool,
131+ All Chutes models share the same credential-level credit balance pool,
62132 so they all belong to the same quota group.
63133
64134 Args:
65- model: Model name (ignored - all models share quota)
135+ model: Model name (ignored — all models share one balance)
136+
137+ Returns:
138+ Quota group name
139+ """
140+ return "credits($)"
141+
142+ def get_models_in_quota_group (self , group : str ) -> List [str ]:
143+ """
144+ Return all models belonging to the given quota group.
145+
146+ Args:
147+ group: Quota group identifier
66148
67149 Returns:
68- Quota group identifier for shared credential-level tracking
150+ List of model names in the group
69151 """
70- return "chutes_global"
152+ if group == "credits($)" :
153+ return ["_balance" ]
154+ return []
155+
156+ def get_quota_groups (self ) -> List [str ]:
157+ """Return the list of quota groups for this provider."""
158+ return ["credits($)" ]
159+
160+ # =========================================================================
161+ # MODEL DISCOVERY
162+ # =========================================================================
71163
72164 async def get_models (self , api_key : str , client : httpx .AsyncClient ) -> List [str ]:
73165 """
74166 Fetch available models from the Chutes API.
75167
168+ Also caches per-model pricing for cost calculation.
169+
76170 Args:
77171 api_key: Chutes API key
78172 client: HTTP client
@@ -86,9 +180,61 @@ async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]
86180 headers = {"Authorization" : f"Bearer { api_key } " },
87181 )
88182 response .raise_for_status ()
89- return [
90- f"chutes/{ model ['id' ]} " for model in response .json ().get ("data" , [])
91- ]
183+ data = response .json ()
184+
185+ models = []
186+ for model_data in data .get ("data" , []):
187+ model_id = model_data .get ("id" , "" )
188+ if model_id :
189+ models .append (f"chutes/{ model_id } " )
190+
191+ # Cache pricing while we're at it
192+ price_info = model_data .get ("pricing" ) or model_data .get (
193+ "price" , {}
194+ )
195+ if price_info :
196+ if "prompt" in price_info :
197+ self ._pricing_cache [model_id ] = {
198+ "input" : float (price_info .get ("prompt" , 0.0 )),
199+ "output" : float (price_info .get ("completion" , 0.0 )),
200+ "input_cache_read" : float (
201+ price_info .get (
202+ "input_cache_read" ,
203+ float (price_info .get ("prompt" , 0.0 )) * 0.5 ,
204+ )
205+ ),
206+ }
207+ elif "input" in price_info :
208+ input_data = price_info .get ("input" , {})
209+ output_data = price_info .get ("output" , {})
210+ cache_data = price_info .get ("input_cache_read" , {})
211+ input_cost = float (
212+ input_data .get ("usd" , 0.0 )
213+ if isinstance (input_data , dict )
214+ else input_data
215+ )
216+ output_cost = float (
217+ output_data .get ("usd" , 0.0 )
218+ if isinstance (output_data , dict )
219+ else output_data
220+ )
221+ cache_cost = float (
222+ cache_data .get ("usd" , input_cost * 0.5 )
223+ if isinstance (cache_data , dict )
224+ else (cache_data if cache_data else input_cost * 0.5 )
225+ )
226+ self ._pricing_cache [model_id ] = {
227+ "input" : input_cost ,
228+ "output" : output_cost ,
229+ "input_cache_read" : cache_cost ,
230+ }
231+
232+ if self ._pricing_cache :
233+ lib_logger .info (
234+ f"Cached pricing for { len (self ._pricing_cache )} Chutes models"
235+ )
236+
237+ return models
92238 except (httpx .RequestError , httpx .HTTPStatusError ) as e :
93239 lib_logger .error (f"Failed to fetch chutes.ai models: { e } " )
94240 return []
@@ -98,15 +244,10 @@ async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]
98244 # =========================================================================
99245
100246 def get_background_job_config (self ) -> Optional [Dict [str , Any ]]:
101- """
102- Configure periodic quota usage refresh.
103-
104- Returns:
105- Background job configuration for quota refresh
106- """
247+ """Configure periodic credit balance refresh."""
107248 return {
108249 "interval" : self ._quota_refresh_interval ,
109- "name" : "chutes_quota_refresh " ,
250+ "name" : "chutes_balance_refresh " ,
110251 "run_on_start" : True ,
111252 }
112253
@@ -116,55 +257,61 @@ async def run_background_job(
116257 credentials : List [str ],
117258 ) -> None :
118259 """
119- Refresh quota usage for all credentials in parallel.
260+ Refresh credit balance for all credentials from the subscription API.
261+
262+ Fetches live dollar usage from /users/me/subscription_usage and pushes
263+ both the 4-hour window (as the primary tracked window) and monthly cap
264+ data to the UsageManager.
120265
121266 Args:
122267 usage_manager: UsageManager instance
123268 credentials: List of API keys
124269 """
125- semaphore = asyncio .Semaphore (QUOTA_FETCH_CONCURRENCY )
270+ semaphore = asyncio .Semaphore (BALANCE_FETCH_CONCURRENCY )
126271
127- async def refresh_single_credential (
128- api_key : str , client : httpx .AsyncClient
129- ) -> None :
272+ async def refresh_single (api_key : str , client : httpx .AsyncClient ) -> None :
130273 async with semaphore :
131274 try :
132- usage_data = await self .fetch_quota_usage ( api_key , client )
133-
134- if usage_data . get ( "status" ) == "success" :
135- # Update quota cache
136- self . _quota_cache [ api_key ] = usage_data
275+ balance_data = await self .refresh_balance (
276+ api_key ,
277+ credential_identifier = api_key ,
278+ client = client ,
279+ )
137280
138- # Calculate values for usage manager
139- remaining_fraction = usage_data .get ("remaining_fraction" , 0.0 )
140- quota = usage_data .get ("quota" , 0 )
141- reset_ts = usage_data .get ("reset_at" )
142-
143- # Store baseline in usage manager
144- # Since Chutes uses credential-level quota, we use a virtual model name
145- quota_used = (
146- int ((1.0 - remaining_fraction ) * quota ) if quota > 0 else 0
281+ if balance_data .get ("status" ) == "success" :
282+ # Push 4-hour window data (the tighter constraint)
283+ four_hour_cap_cents = balance_data .get (
284+ "four_hour_cap_cents" , 0
285+ )
286+ four_hour_used_cents = balance_data .get (
287+ "four_hour_used_cents" , 0
147288 )
289+
148290 await usage_manager .update_quota_baseline (
149291 api_key ,
150- "chutes/_quota" , # Virtual model for credential-level tracking
151- quota_max_requests = quota ,
152- quota_reset_ts = reset_ts ,
153- quota_used = quota_used ,
292+ "chutes/_balance" ,
293+ quota_max_requests = four_hour_cap_cents ,
294+ quota_reset_ts = None ,
295+ quota_used = four_hour_used_cents ,
296+ force = True , # API values are authoritative
154297 )
155298
299+ monthly = balance_data .get ("monthly" , {})
300+ four_hour = balance_data .get ("four_hour" , {})
156301 lib_logger .debug (
157- f"Updated Chutes quota baseline for credential: "
158- f"{ usage_data ['remaining' ]:.0f} /{ quota } remaining "
159- f"({ remaining_fraction * 100 :.0f} %)"
302+ f"Updated Chutes balance baseline: "
303+ f"4h=${ four_hour .get ('usage' , 0 ):.4f} /"
304+ f"${ four_hour .get ('cap' , 0 ):.2f} , "
305+ f"monthly=${ monthly .get ('usage' , 0 ):.4f} /"
306+ f"${ monthly .get ('cap' , 0 ):.2f} , "
307+ f"models_priced={ len (self ._pricing_cache )} "
160308 )
161309
162310 except Exception as e :
163- lib_logger .warning (f"Failed to refresh Chutes quota usage: { e } " )
311+ lib_logger .warning (
312+ f"Failed to refresh Chutes balance: { e } "
313+ )
164314
165- # Fetch all credentials in parallel with shared HTTP client
166315 async with httpx .AsyncClient (timeout = 30.0 ) as client :
167- tasks = [
168- refresh_single_credential (api_key , client ) for api_key in credentials
169- ]
316+ tasks = [refresh_single (api_key , client ) for api_key in credentials ]
170317 await asyncio .gather (* tasks , return_exceptions = True )
0 commit comments