b3nw
diff --git a/‎src/rotator_library/providers/chutes_provider.py‎
Lines changed: 209 additions & 62 deletions b/‎src/rotator_library/providers/chutes_provider.py‎
Lines changed: 209 additions & 62 deletions
@@ -1,78 +1,172 @@
 # SPDX-License-Identifier: LGPL-3.0-only
 # Copyright (c) 2026 Mirrowel
 
+"""
+Chutes Provider
+
+Provider for Chutes (https://chutes.ai).
+OpenAI-compatible API with dollar-based subscription quota tracking.
+
+Features:
+- Dynamic model discovery from /v1/models endpoint
+- Per-model pricing cached from models API for accurate cost tracking
+- Server-side dollar-based usage tracking via /users/me/subscription_usage
+- Monthly and 4-hour rolling window enforcement
+
+Quota system:
+Chutes subscription plans include a PAYGO-equivalent allowance of 5×
+the subscription price.  Limits are enforced across both a monthly window
+and a 4-hour rolling window.
+
+    $10/mo  →  $50   monthly cap  →  $4.17  per 4 h
+    $15/mo  →  $75   monthly cap  →  $1.25  per 4 h
+    $50/mo  →  $250  monthly cap  →  $4.17  per 4 h
+    $100/mo →  $500  monthly cap  →  $8.33  per 4 h
+
+The /users/me/subscription_usage endpoint returns live dollar usage for
+both windows, eliminating the need for local cost estimation.
+
+Environment variables:
+    CHUTES_API_KEY_1=<api_key>
+    CHUTES_QUOTA_REFRESH_INTERVAL=300  # optional, seconds
+"""
+
 import asyncio
 import httpx
 import os
+import logging
 from typing import Any, Dict, List, Optional, TYPE_CHECKING
-from .provider_interface import ProviderInterface, UsageResetConfigDef
-from .utilities.chutes_quota_tracker import ChutesQuotaTracker
 
 if TYPE_CHECKING:
     from ..usage import UsageManager
 
-# Create a local logger for this module
-import logging
+from .provider_interface import ProviderInterface, UsageResetConfigDef
+from .utilities.chutes_quota_tracker import ChutesQuotaTracker, CENTS_PER_DOLLAR
 
 lib_logger = logging.getLogger("rotator_library")
 
-# Concurrency limit for parallel quota fetches
-QUOTA_FETCH_CONCURRENCY = 5
+# Concurrency limit for parallel balance fetches
+BALANCE_FETCH_CONCURRENCY = 5
 
 
 class ChutesProvider(ChutesQuotaTracker, ProviderInterface):
     """
-    Provider implementation for the chutes.ai API with quota tracking.
+    Provider implementation for the chutes.ai API with dollar-based quota tracking.
+
+    All models share the same credential-level credit balance pool.
+    Cost is calculated from per-model pricing cached from the /v1/models API.
+    Usage caps are tracked server-side and fetched via subscription_usage API.
     """
 
+    # Cost is calculated via our own calculate_cost() method using cached
+    # per-model pricing from the Chutes API.  The executor calls
+    # plugin.calculate_cost() first, then falls back to LiteLLM (which
+    # has no Chutes pricing) — so we must NOT set skip_cost_calculation
+    # to True, or the executor would skip our calculator too.
+    skip_cost_calculation = False
+
+    # =========================================================================
+    # PROVIDER CONFIGURATION
+    # =========================================================================
+
     # Enable environment variable overrides (e.g., QUOTA_GROUPS_CHUTES_GLOBAL)
     provider_env_name = "chutes"
 
-    # Quota groups for tracking daily limits
-    # Uses a virtual model "_quota" for credential-level quota tracking
+    # Single quota group: all models share the same credit balance.
+    # Named 'credits($)' so the TUI shows a human-readable dollar label.
     model_quota_groups = {
-        "chutes_global": ["_quota"],
+        "credits($)": ["_balance"],
     }
 
-    # Usage reset configuration for daily quota
+    # 4-hour rolling window — the tighter of the two enforced windows.
+    # Monthly usage is also tracked by the API but the 4-hour window is the
+    # one that actually constrains usage in practice.
     usage_reset_configs = {
         "default": UsageResetConfigDef(
-            window_seconds=86400,  # 24 hours (daily quota reset)
+            window_seconds=14400,  # 4 hours
             mode="per_model",
-            description="Chutes daily quota",
-            field_name="daily",
+            description="Chutes 4-hour credit window",
+            field_name="4h",
         )
     }
 
     def __init__(self, *args, **kwargs):
-        """Initialize ChutesProvider with quota tracking."""
+        """Initialize ChutesProvider with dollar-based quota tracking."""
         super().__init__(*args, **kwargs)
 
-        # Quota tracking cache and refresh interval
-        self._quota_cache: Dict[str, Dict[str, Any]] = {}
+        # Model pricing cache: model_id → {input, output, input_cache_read}
+        self._pricing_cache: Dict[str, Dict[str, float]] = {}
+
+        # Balance cache: credential_identifier → balance data dict
+        self._balance_cache: Dict[str, Dict[str, Any]] = {}
+
         self._quota_refresh_interval: int = int(
             os.environ.get("CHUTES_QUOTA_REFRESH_INTERVAL", "300")
         )
 
+    # =========================================================================
+    # USAGE TRACKING CONFIGURATION
+    # =========================================================================
+
+    def get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
+        """
+        Return usage reset configuration for Chutes credentials.
+
+        Uses per_model mode with a 4-hour window to match the tighter
+        rolling window enforced by the API.
+        """
+        return {
+            "mode": "per_model",
+            "window_seconds": 14400,  # 4 hours
+        }
+
+    # =========================================================================
+    # QUOTA GROUPING
+    # =========================================================================
+
     def get_model_quota_group(self, model: str) -> Optional[str]:
         """
         Get the quota group for a model.
 
-        All Chutes models share the same credential-level quota pool,
+        All Chutes models share the same credential-level credit balance pool,
         so they all belong to the same quota group.
 
         Args:
-            model: Model name (ignored - all models share quota)
+            model: Model name (ignored — all models share one balance)
+
+        Returns:
+            Quota group name
+        """
+        return "credits($)"
+
+    def get_models_in_quota_group(self, group: str) -> List[str]:
+        """
+        Return all models belonging to the given quota group.
+
+        Args:
+            group: Quota group identifier
 
         Returns:
-            Quota group identifier for shared credential-level tracking
+            List of model names in the group
         """
-        return "chutes_global"
+        if group == "credits($)":
+            return ["_balance"]
+        return []
+
+    def get_quota_groups(self) -> List[str]:
+        """Return the list of quota groups for this provider."""
+        return ["credits($)"]
+
+    # =========================================================================
+    # MODEL DISCOVERY
+    # =========================================================================
 
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
         Fetch available models from the Chutes API.
 
+        Also caches per-model pricing for cost calculation.
+
         Args:
             api_key: Chutes API key
             client: HTTP client
@@ -86,9 +180,61 @@ async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]
                 headers={"Authorization": f"Bearer {api_key}"},
             )
             response.raise_for_status()
-            return [
-                f"chutes/{model['id']}" for model in response.json().get("data", [])
-            ]
+            data = response.json()
+
+            models = []
+            for model_data in data.get("data", []):
+                model_id = model_data.get("id", "")
+                if model_id:
+                    models.append(f"chutes/{model_id}")
+
+                    # Cache pricing while we're at it
+                    price_info = model_data.get("pricing") or model_data.get(
+                        "price", {}
+                    )
+                    if price_info:
+                        if "prompt" in price_info:
+                            self._pricing_cache[model_id] = {
+                                "input": float(price_info.get("prompt", 0.0)),
+                                "output": float(price_info.get("completion", 0.0)),
+                                "input_cache_read": float(
+                                    price_info.get(
+                                        "input_cache_read",
+                                        float(price_info.get("prompt", 0.0)) * 0.5,
+                                    )
+                                ),
+                            }
+                        elif "input" in price_info:
+                            input_data = price_info.get("input", {})
+                            output_data = price_info.get("output", {})
+                            cache_data = price_info.get("input_cache_read", {})
+                            input_cost = float(
+                                input_data.get("usd", 0.0)
+                                if isinstance(input_data, dict)
+                                else input_data
+                            )
+                            output_cost = float(
+                                output_data.get("usd", 0.0)
+                                if isinstance(output_data, dict)
+                                else output_data
+                            )
+                            cache_cost = float(
+                                cache_data.get("usd", input_cost * 0.5)
+                                if isinstance(cache_data, dict)
+                                else (cache_data if cache_data else input_cost * 0.5)
+                            )
+                            self._pricing_cache[model_id] = {
+                                "input": input_cost,
+                                "output": output_cost,
+                                "input_cache_read": cache_cost,
+                            }
+
+            if self._pricing_cache:
+                lib_logger.info(
+                    f"Cached pricing for {len(self._pricing_cache)} Chutes models"
+                )
+
+            return models
         except (httpx.RequestError, httpx.HTTPStatusError) as e:
             lib_logger.error(f"Failed to fetch chutes.ai models: {e}")
             return []
@@ -98,15 +244,10 @@ async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]
     # =========================================================================
 
     def get_background_job_config(self) -> Optional[Dict[str, Any]]:
-        """
-        Configure periodic quota usage refresh.
-
-        Returns:
-            Background job configuration for quota refresh
-        """
+        """Configure periodic credit balance refresh."""
         return {
             "interval": self._quota_refresh_interval,
-            "name": "chutes_quota_refresh",
+            "name": "chutes_balance_refresh",
             "run_on_start": True,
         }
 
@@ -116,55 +257,61 @@ async def run_background_job(
         credentials: List[str],
     ) -> None:
         """
-        Refresh quota usage for all credentials in parallel.
+        Refresh credit balance for all credentials from the subscription API.
+
+        Fetches live dollar usage from /users/me/subscription_usage and pushes
+        both the 4-hour window (as the primary tracked window) and monthly cap
+        data to the UsageManager.
 
         Args:
             usage_manager: UsageManager instance
             credentials: List of API keys
         """
-        semaphore = asyncio.Semaphore(QUOTA_FETCH_CONCURRENCY)
+        semaphore = asyncio.Semaphore(BALANCE_FETCH_CONCURRENCY)
 
-        async def refresh_single_credential(
-            api_key: str, client: httpx.AsyncClient
-        ) -> None:
+        async def refresh_single(api_key: str, client: httpx.AsyncClient) -> None:
             async with semaphore:
                 try:
-                    usage_data = await self.fetch_quota_usage(api_key, client)
-
-                    if usage_data.get("status") == "success":
-                        # Update quota cache
-                        self._quota_cache[api_key] = usage_data
+                    balance_data = await self.refresh_balance(
+                        api_key,
+                        credential_identifier=api_key,
+                        client=client,
+                    )
 
-                        # Calculate values for usage manager
-                        remaining_fraction = usage_data.get("remaining_fraction", 0.0)
-                        quota = usage_data.get("quota", 0)
-                        reset_ts = usage_data.get("reset_at")
-
-                        # Store baseline in usage manager
-                        # Since Chutes uses credential-level quota, we use a virtual model name
-                        quota_used = (
-                            int((1.0 - remaining_fraction) * quota) if quota > 0 else 0
+                    if balance_data.get("status") == "success":
+                        # Push 4-hour window data (the tighter constraint)
+                        four_hour_cap_cents = balance_data.get(
+                            "four_hour_cap_cents", 0
+                        )
+                        four_hour_used_cents = balance_data.get(
+                            "four_hour_used_cents", 0
                         )
+
                         await usage_manager.update_quota_baseline(
                             api_key,
-                            "chutes/_quota",  # Virtual model for credential-level tracking
-                            quota_max_requests=quota,
-                            quota_reset_ts=reset_ts,
-                            quota_used=quota_used,
+                            "chutes/_balance",
+                            quota_max_requests=four_hour_cap_cents,
+                            quota_reset_ts=None,
+                            quota_used=four_hour_used_cents,
+                            force=True,  # API values are authoritative
                         )
 
+                        monthly = balance_data.get("monthly", {})
+                        four_hour = balance_data.get("four_hour", {})
                         lib_logger.debug(
-                            f"Updated Chutes quota baseline for credential: "
-                            f"{usage_data['remaining']:.0f}/{quota} remaining "
-                            f"({remaining_fraction * 100:.0f}%)"
+                            f"Updated Chutes balance baseline: "
+                            f"4h=${four_hour.get('usage', 0):.4f}/"
+                            f"${four_hour.get('cap', 0):.2f}, "
+                            f"monthly=${monthly.get('usage', 0):.4f}/"
+                            f"${monthly.get('cap', 0):.2f}, "
+                            f"models_priced={len(self._pricing_cache)}"
                         )
 
                 except Exception as e:
-                    lib_logger.warning(f"Failed to refresh Chutes quota usage: {e}")
+                    lib_logger.warning(
+                        f"Failed to refresh Chutes balance: {e}"
+                    )
 
-        # Fetch all credentials in parallel with shared HTTP client
         async with httpx.AsyncClient(timeout=30.0) as client:
-            tasks = [
-                refresh_single_credential(api_key, client) for api_key in credentials
-            ]
+            tasks = [refresh_single(api_key, client) for api_key in credentials]
             await asyncio.gather(*tasks, return_exceptions=True)