LLM-API-Key-Proxy/src/rotator_library/usage_manager.py at fa2e98749f13327ce94069b203b58a3b67860e9f · Mirrowel/LLM-API-Key-Proxy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import json
import os
import time
import logging
import asyncio
import random
from datetime import date, datetime, timezone, time as dt_time
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union
import aiofiles
import litellm

from .error_handler import ClassifiedError, NoAvailableKeysError, mask_credential
from .providers import PROVIDER_PLUGINS
from .utils.resilient_io import ResilientStateWriter
from .utils.paths import get_data_file
from .config import (
    DEFAULT_FAIR_CYCLE_DURATION,
    DEFAULT_EXHAUSTION_COOLDOWN_THRESHOLD,
    DEFAULT_CUSTOM_CAP_COOLDOWN_MODE,
    DEFAULT_CUSTOM_CAP_COOLDOWN_VALUE,
    COOLDOWN_BACKOFF_TIERS,
    COOLDOWN_BACKOFF_MAX,
    COOLDOWN_AUTH_ERROR,
    COOLDOWN_TRANSIENT_ERROR,
    COOLDOWN_RATE_LIMIT_DEFAULT,
)

lib_logger = logging.getLogger("rotator_library")
lib_logger.propagate = False
if not lib_logger.handlers:
    lib_logger.addHandler(logging.NullHandler())


class UsageManager:
    """
    Manages usage statistics and cooldowns for API keys with asyncio-safe locking,
    asynchronous file I/O, lazy-loading mechanism, and weighted random credential rotation.

    The credential rotation strategy can be configured via the `rotation_tolerance` parameter:

    - **tolerance = 0.0**: Deterministic least-used selection. The credential with
      the lowest usage count is always selected. This provides predictable, perfectly balanced
      load distribution but may be vulnerable to fingerprinting.

    - **tolerance = 2.0 - 4.0 (default, recommended)**: Balanced weighted randomness. Credentials are selected
      randomly with weights biased toward less-used ones. Credentials within 2 uses of the
      maximum can still be selected with reasonable probability. This provides security through
      unpredictability while maintaining good load balance.

    - **tolerance = 5.0+**: High randomness. Even heavily-used credentials have significant
      selection probability. Useful for stress testing or maximum unpredictability, but may
      result in less balanced load distribution.

    The weight formula is: `weight = (max_usage - credential_usage) + tolerance + 1`

    This ensures lower-usage credentials are preferred while tolerance controls how much
    randomness is introduced into the selection process.

    Additionally, providers can specify a rotation mode:
    - "balanced" (default): Rotate credentials to distribute load evenly
    - "sequential": Use one credential until exhausted (preserves caching)
    """

    def __init__(
        self,
        file_path: Optional[Union[str, Path]] = None,
        daily_reset_time_utc: Optional[str] = "03:00",
        rotation_tolerance: float = 0.0,
        provider_rotation_modes: Optional[Dict[str, str]] = None,
        provider_plugins: Optional[Dict[str, Any]] = None,
        priority_multipliers: Optional[Dict[str, Dict[int, int]]] = None,
        priority_multipliers_by_mode: Optional[
            Dict[str, Dict[str, Dict[int, int]]]
        ] = None,
        sequential_fallback_multipliers: Optional[Dict[str, int]] = None,
        fair_cycle_enabled: Optional[Dict[str, bool]] = None,
        fair_cycle_tracking_mode: Optional[Dict[str, str]] = None,
        fair_cycle_cross_tier: Optional[Dict[str, bool]] = None,
        fair_cycle_duration: Optional[Dict[str, int]] = None,
        exhaustion_cooldown_threshold: Optional[Dict[str, int]] = None,
        custom_caps: Optional[
            Dict[str, Dict[Union[int, Tuple[int, ...], str], Dict[str, Dict[str, Any]]]]
        ] = None,
    ):
        """
        Initialize the UsageManager.

        Args:
            file_path: Path to the usage data JSON file. If None, uses get_data_file("key_usage.json").
                       Can be absolute Path, relative Path, or string.
            daily_reset_time_utc: Time in UTC when daily stats should reset (HH:MM format)
            rotation_tolerance: Tolerance for weighted random credential rotation.
                - 0.0: Deterministic, least-used credential always selected
                - tolerance = 2.0 - 4.0 (default, recommended): Balanced randomness, can pick credentials within 2 uses of max
                - 5.0+: High randomness, more unpredictable selection patterns
            provider_rotation_modes: Dict mapping provider names to rotation modes.
                - "balanced": Rotate credentials to distribute load evenly (default)
                - "sequential": Use one credential until exhausted (preserves caching)
            provider_plugins: Dict mapping provider names to provider plugin instances.
                Used for per-provider usage reset configuration (window durations, field names).
            priority_multipliers: Dict mapping provider -> priority -> multiplier.
                Universal multipliers that apply regardless of rotation mode.
                Example: {"antigravity": {1: 5, 2: 3}}
            priority_multipliers_by_mode: Dict mapping provider -> mode -> priority -> multiplier.
                Mode-specific overrides. Example: {"antigravity": {"balanced": {3: 1}}}
            sequential_fallback_multipliers: Dict mapping provider -> fallback multiplier.
                Used in sequential mode when priority not in priority_multipliers.
                Example: {"antigravity": 2}
            fair_cycle_enabled: Dict mapping provider -> bool to enable fair cycle rotation.
                When enabled, credentials must all exhaust before any can be reused.
                Default: enabled for sequential mode only.
            fair_cycle_tracking_mode: Dict mapping provider -> tracking mode.
                - "model_group": Track per quota group or model (default)
                - "credential": Track per credential globally
            fair_cycle_cross_tier: Dict mapping provider -> bool for cross-tier tracking.
                - False: Each tier cycles independently (default)
                - True: All credentials must exhaust regardless of tier
            fair_cycle_duration: Dict mapping provider -> cycle duration in seconds.
                Default: 86400 (24 hours)
            exhaustion_cooldown_threshold: Dict mapping provider -> threshold in seconds.
                A cooldown must exceed this to qualify as "exhausted". Default: 300 (5 min)
            custom_caps: Dict mapping provider -> tier -> model/group -> cap config.
                Allows setting custom usage limits per tier, per model or quota group.
                See ProviderInterface.default_custom_caps for format details.
        """
        # Resolve file_path - use default if not provided
        if file_path is None:
            self.file_path = str(get_data_file("key_usage.json"))
        elif isinstance(file_path, Path):
            self.file_path = str(file_path)
        else:
            # String path - could be relative or absolute
            self.file_path = file_path
        self.rotation_tolerance = rotation_tolerance
        self.provider_rotation_modes = provider_rotation_modes or {}
        self.provider_plugins = provider_plugins or PROVIDER_PLUGINS
        self.priority_multipliers = priority_multipliers or {}
        self.priority_multipliers_by_mode = priority_multipliers_by_mode or {}
        self.sequential_fallback_multipliers = sequential_fallback_multipliers or {}
        self._provider_instances: Dict[str, Any] = {}  # Cache for provider instances
        self.key_states: Dict[str, Dict[str, Any]] = {}

        # Fair cycle rotation configuration
        self.fair_cycle_enabled = fair_cycle_enabled or {}
        self.fair_cycle_tracking_mode = fair_cycle_tracking_mode or {}
        self.fair_cycle_cross_tier = fair_cycle_cross_tier or {}
        self.fair_cycle_duration = fair_cycle_duration or {}
        self.exhaustion_cooldown_threshold = exhaustion_cooldown_threshold or {}
        self.custom_caps = custom_caps or {}
        # In-memory cycle state: {provider: {tier_key: {tracking_key: {"cycle_started_at": float, "exhausted": Set[str]}}}}
        self._cycle_exhausted: Dict[str, Dict[str, Dict[str, Dict[str, Any]]]] = {}

        self._data_lock = asyncio.Lock()
        self._usage_data: Optional[Dict] = None
        self._initialized = asyncio.Event()
        self._init_lock = asyncio.Lock()

        self._timeout_lock = asyncio.Lock()
        self._claimed_on_timeout: Set[str] = set()

        # Resilient writer for usage data persistence
        self._state_writer = ResilientStateWriter(file_path, lib_logger)

        # Forced credential for manual override (TUI control)
        self._forced_credential: Optional[str] = None
        self._forced_credential_lock = asyncio.Lock()

        if daily_reset_time_utc:
            hour, minute = map(int, daily_reset_time_utc.split(":"))
            self.daily_reset_time_utc = dt_time(
                hour=hour, minute=minute, tzinfo=timezone.utc
            )
        else:
            self.daily_reset_time_utc = None

    def _get_rotation_mode(self, provider: str) -> str:
        """
        Get the rotation mode for a provider.

        Args:
            provider: Provider name (e.g., "antigravity", "gemini_cli")

        Returns:
            "balanced" or "sequential"
        """
        return self.provider_rotation_modes.get(provider, "balanced")

    # =========================================================================
    # FORCED CREDENTIAL (TUI OVERRIDE)
    # =========================================================================

    async def set_forced_credential(self, credential: Optional[str]) -> None:
        """
        Force the usage manager to use a specific credential for all requests.

        This overrides the normal rotation logic and always selects the specified
        credential, if it's available and not on cooldown.

        Args:
            credential: Full credential path/identifier, or None to clear the override
        """
        async with self._forced_credential_lock:
            self._forced_credential = credential
            if credential:
                lib_logger.info(f"Forced credential set to: {mask_credential(credential)}")
            else:
                lib_logger.info("Forced credential cleared")

    async def get_forced_credential(self) -> Optional[str]:
        """
        Get the currently forced credential, if any.

        Returns:
            The forced credential path/identifier, or None if no override is active
        """
        async with self._forced_credential_lock:
            return self._forced_credential

    # =========================================================================
    # FAIR CYCLE ROTATION HELPERS
    # =========================================================================

    def _is_fair_cycle_enabled(self, provider: str, rotation_mode: str) -> bool:
        """
        Check if fair cycle rotation is enabled for a provider.

        Args:
            provider: Provider name
            rotation_mode: Current rotation mode ("balanced" or "sequential")

        Returns:
            True if fair cycle is enabled
        """
        # Check provider-specific setting first
        if provider in self.fair_cycle_enabled:
            return self.fair_cycle_enabled[provider]
        # Default: enabled only for sequential mode
        return rotation_mode == "sequential"

    def _get_fair_cycle_tracking_mode(self, provider: str) -> str:
        """
        Get fair cycle tracking mode for a provider.

        Returns:
            "model_group" or "credential"
        """
        return self.fair_cycle_tracking_mode.get(provider, "model_group")

    def _is_fair_cycle_cross_tier(self, provider: str) -> bool:
        """
        Check if fair cycle tracks across all tiers (ignoring priority boundaries).

        Returns:
            True if cross-tier tracking is enabled
        """
        return self.fair_cycle_cross_tier.get(provider, False)

    def _get_fair_cycle_duration(self, provider: str) -> int:
        """
        Get fair cycle duration in seconds for a provider.

        Returns:
            Duration in seconds (default 86400 = 24 hours)
        """
        return self.fair_cycle_duration.get(provider, DEFAULT_FAIR_CYCLE_DURATION)

    def _get_exhaustion_cooldown_threshold(self, provider: str) -> int:
        """
        Get exhaustion cooldown threshold in seconds for a provider.

        A cooldown must exceed this duration to qualify as "exhausted" for fair cycle.

        Returns:
            Threshold in seconds (default 300 = 5 minutes)
        """
        return self.exhaustion_cooldown_threshold.get(
            provider, DEFAULT_EXHAUSTION_COOLDOWN_THRESHOLD
        )

    # =========================================================================
    # CUSTOM CAPS HELPERS
    # =========================================================================

    def _get_custom_cap_config(
        self,
        provider: str,
        tier_priority: int,
        model: str,
    ) -> Optional[Dict[str, Any]]:
        """
        Get custom cap config for a provider/tier/model combination.

        Resolution order:
        1. tier + model (exact match)
        2. tier + group (model's quota group)
        3. "default" + model
        4. "default" + group

        Args:
            provider: Provider name
            tier_priority: Credential's priority level
            model: Model name (with provider prefix)

        Returns:
            Cap config dict or None if no custom cap applies
        """
        provider_caps = self.custom_caps.get(provider)
        if not provider_caps:
            return None

        # Strip provider prefix from model
        clean_model = model.split("/")[-1] if "/" in model else model

        # Get quota group for this model
        group = self._get_model_quota_group_by_provider(provider, model)

        # Try to find matching tier config
        tier_config = None
        default_config = None

        for tier_key, models_config in provider_caps.items():
            if tier_key == "default":
                default_config = models_config
                continue

            # Check if this tier_key matches our priority
            if isinstance(tier_key, int) and tier_key == tier_priority:
                tier_config = models_config
                break
            elif isinstance(tier_key, tuple) and tier_priority in tier_key:
                tier_config = models_config
                break

        # Resolution order for tier config
        if tier_config:
            # Try model first
            if clean_model in tier_config:
                return tier_config[clean_model]
            # Try group
            if group and group in tier_config:
                return tier_config[group]

        # Resolution order for default config
        if default_config:
            # Try model first
            if clean_model in default_config:
                return default_config[clean_model]
            # Try group
            if group and group in default_config:
                return default_config[group]

        return None

    def _get_model_quota_group_by_provider(
        self, provider: str, model: str
    ) -> Optional[str]:
        """
        Get quota group for a model using provider name instead of credential.

        Args:
            provider: Provider name
            model: Model name

        Returns:
            Group name or None
        """
        plugin_instance = self._get_provider_instance(provider)
        if plugin_instance and hasattr(plugin_instance, "get_model_quota_group"):
            return plugin_instance.get_model_quota_group(model)
        return None

    def _resolve_custom_cap_max(
        self,
        provider: str,
        model: str,
        cap_config: Dict[str, Any],
        actual_max: Optional[int],
    ) -> Optional[int]:
        """
        Resolve custom cap max_requests value, handling percentages and clamping.

        Args:
            provider: Provider name
            model: Model name (for logging)
            cap_config: Custom cap configuration
            actual_max: Actual API max requests (may be None if unknown)

        Returns:
            Resolved cap value (clamped), or None if can't be calculated
        """
        max_requests = cap_config.get("max_requests")
        if max_requests is None:
            return None

        # Handle percentage
        if isinstance(max_requests, str) and max_requests.endswith("%"):
            if actual_max is None:
                lib_logger.warning(
                    f"Custom cap '{max_requests}' for {provider}/{model} requires known max_requests. "
                    f"Skipping until quota baseline is fetched. Use absolute value for immediate enforcement."
                )
                return None
            try:
                percentage = float(max_requests.rstrip("%")) / 100.0
                calculated = int(actual_max * percentage)
            except ValueError:
                lib_logger.warning(
                    f"Invalid percentage cap '{max_requests}' for {provider}/{model}"
                )
                return None
        else:
            # Absolute value
            try:
                calculated = int(max_requests)
            except (ValueError, TypeError):
                lib_logger.warning(
                    f"Invalid cap value '{max_requests}' for {provider}/{model}"
                )
                return None

        # Clamp to actual max (can only be MORE restrictive)
        if actual_max is not None:
            return min(calculated, actual_max)
        return calculated

    def _calculate_custom_cooldown_until(
        self,
        cap_config: Dict[str, Any],
        window_start_ts: Optional[float],
        natural_reset_ts: Optional[float],
    ) -> Optional[float]:
        """
        Calculate when custom cap cooldown should end, clamped to natural reset.

        Args:
            cap_config: Custom cap configuration
            window_start_ts: When first request was made (for fixed mode)
            natural_reset_ts: Natural quota reset timestamp

        Returns:
            Cooldown end timestamp (clamped), or None if can't calculate
        """
        mode = cap_config.get("cooldown_mode", DEFAULT_CUSTOM_CAP_COOLDOWN_MODE)
        value = cap_config.get("cooldown_value", DEFAULT_CUSTOM_CAP_COOLDOWN_VALUE)

        if mode == "quota_reset":
            calculated = natural_reset_ts
        elif mode == "offset":
            if natural_reset_ts is None:
                return None
            calculated = natural_reset_ts + value
        elif mode == "fixed":
            if window_start_ts is None:
                return None
            calculated = window_start_ts + value
        else:
            lib_logger.warning(f"Unknown cooldown_mode '{mode}', using quota_reset")
            calculated = natural_reset_ts

        if calculated is None:
            return None

        # Clamp to natural reset (can only be MORE restrictive = longer cooldown)
        if natural_reset_ts is not None:
            return max(calculated, natural_reset_ts)
        return calculated

    def _check_and_apply_custom_cap(
        self,
        credential: str,
        model: str,
        request_count: int,
    ) -> bool:
        """
        Check if custom cap is exceeded and apply cooldown if so.

        This should be called after incrementing request_count in record_success().

        Args:
            credential: Credential identifier
            model: Model name (with provider prefix)
            request_count: Current request count for this model

        Returns:
            True if cap exceeded and cooldown applied, False otherwise
        """
        provider = self._get_provider_from_credential(credential)
        if not provider:
            return False

        priority = self._get_credential_priority(credential, provider)
        cap_config = self._get_custom_cap_config(provider, priority, model)
        if not cap_config:
            return False

        # Get model data for actual max and timing info
        key_data = self._usage_data.get(credential, {})
        model_data = key_data.get("models", {}).get(model, {})
        actual_max = model_data.get("quota_max_requests")
        window_start_ts = model_data.get("window_start_ts")
        natural_reset_ts = model_data.get("quota_reset_ts")

        # Resolve custom cap max
        custom_max = self._resolve_custom_cap_max(
            provider, model, cap_config, actual_max
        )
        if custom_max is None:
            return False

        # Check if exceeded
        if request_count < custom_max:
            return False

        # Calculate cooldown end time
        cooldown_until = self._calculate_custom_cooldown_until(
            cap_config, window_start_ts, natural_reset_ts
        )
        if cooldown_until is None:
            # Can't calculate cooldown, use natural reset if available
            if natural_reset_ts:
                cooldown_until = natural_reset_ts
            else:
                lib_logger.warning(
                    f"Custom cap hit for {mask_credential(credential)}/{model} but can't calculate cooldown. "
                    f"Skipping cooldown application."
                )
                return False

        now_ts = time.time()

        # Apply cooldown
        model_cooldowns = key_data.setdefault("model_cooldowns", {})
        model_cooldowns[model] = cooldown_until

        # Store custom cap info in model data for reference
        model_data["custom_cap_max"] = custom_max
        model_data["custom_cap_hit_at"] = now_ts
        model_data["custom_cap_cooldown_until"] = cooldown_until

        hours_until = (cooldown_until - now_ts) / 3600
        lib_logger.info(
            f"Custom cap hit: {mask_credential(credential)} reached {request_count}/{custom_max} "
            f"for {model}. Cooldown for {hours_until:.1f}h"
        )

        # Sync cooldown across quota group
        group = self._get_model_quota_group(credential, model)
        if group:
            grouped_models = self._get_grouped_models(credential, group)
            for grouped_model in grouped_models:
                if grouped_model != model:
                    model_cooldowns[grouped_model] = cooldown_until

        # Check if this should trigger fair cycle exhaustion
        cooldown_duration = cooldown_until - now_ts
        threshold = self._get_exhaustion_cooldown_threshold(provider)
        if cooldown_duration > threshold:
            rotation_mode = self._get_rotation_mode(provider)
            if self._is_fair_cycle_enabled(provider, rotation_mode):
                tier_key = self._get_tier_key(provider, priority)
                tracking_key = self._get_tracking_key(credential, model, provider)
                self._mark_credential_exhausted(
                    credential, provider, tier_key, tracking_key
                )

        return True

    def _get_tier_key(self, provider: str, priority: int) -> str:
        """
        Get the tier key for cycle tracking based on cross_tier setting.

        Args:
            provider: Provider name
            priority: Credential priority level

        Returns:
            "__all_tiers__" if cross-tier enabled, else str(priority)
        """
        if self._is_fair_cycle_cross_tier(provider):
            return "__all_tiers__"
        return str(priority)

    def _get_tracking_key(self, credential: str, model: str, provider: str) -> str:
        """
        Get the key for exhaustion tracking based on tracking mode.

        Args:
            credential: Credential identifier
            model: Model name (with provider prefix)
            provider: Provider name

        Returns:
            Tracking key string (quota group name, model name, or "__credential__")
        """
        mode = self._get_fair_cycle_tracking_mode(provider)
        if mode == "credential":
            return "__credential__"
        # model_group mode: use quota group if exists, else model
        group = self._get_model_quota_group(credential, model)
        return group if group else model

    def _get_credential_priority(self, credential: str, provider: str) -> int:
        """
        Get the priority level for a credential.

        Args:
            credential: Credential identifier
            provider: Provider name

        Returns:
            Priority level (default 999 if unknown)
        """
        plugin_instance = self._get_provider_instance(provider)
        if plugin_instance and hasattr(plugin_instance, "get_credential_priority"):
            priority = plugin_instance.get_credential_priority(credential)
            if priority is not None:
                return priority
        return 999

    def _get_cycle_data(
        self, provider: str, tier_key: str, tracking_key: str
    ) -> Optional[Dict[str, Any]]:
        """
        Get cycle data for a provider/tier/tracking key combination.

        Returns:
            Cycle data dict or None if not exists
        """
        return (
            self._cycle_exhausted.get(provider, {}).get(tier_key, {}).get(tracking_key)
        )

    def _ensure_cycle_structure(
        self, provider: str, tier_key: str, tracking_key: str
    ) -> Dict[str, Any]:
        """
        Ensure the nested cycle structure exists and return the cycle data dict.
        """
        if provider not in self._cycle_exhausted:
            self._cycle_exhausted[provider] = {}
        if tier_key not in self._cycle_exhausted[provider]:
            self._cycle_exhausted[provider][tier_key] = {}
        if tracking_key not in self._cycle_exhausted[provider][tier_key]:
            self._cycle_exhausted[provider][tier_key][tracking_key] = {
                "cycle_started_at": None,
                "exhausted": set(),
            }
        return self._cycle_exhausted[provider][tier_key][tracking_key]

    def _mark_credential_exhausted(
        self,
        credential: str,
        provider: str,
        tier_key: str,
        tracking_key: str,
    ) -> None:
        """
        Mark a credential as exhausted for fair cycle tracking.

        Starts the cycle timer on first exhaustion.
        Skips if credential is already in the exhausted set (prevents duplicate logging).
        """
        cycle_data = self._ensure_cycle_structure(provider, tier_key, tracking_key)

        # Skip if already exhausted in this cycle (prevents duplicate logging)
        if credential in cycle_data.get("exhausted", set()):
            return

        # Start cycle timer on first exhaustion
        if cycle_data["cycle_started_at"] is None:
            cycle_data["cycle_started_at"] = time.time()
            lib_logger.info(
                f"Fair cycle started for {provider} tier={tier_key} tracking='{tracking_key}'"
            )

        cycle_data["exhausted"].add(credential)
        lib_logger.info(
            f"Fair cycle: marked {mask_credential(credential)} exhausted "
            f"for {tracking_key} ({len(cycle_data['exhausted'])} total)"
        )

    def _is_credential_exhausted_in_cycle(
        self,
        credential: str,
        provider: str,
        tier_key: str,
        tracking_key: str,
    ) -> bool:
        """
        Check if a credential was exhausted in the current cycle.
        """
        cycle_data = self._get_cycle_data(provider, tier_key, tracking_key)
        if cycle_data is None:
            return False
        return credential in cycle_data.get("exhausted", set())

    def _is_cycle_expired(
        self, provider: str, tier_key: str, tracking_key: str
    ) -> bool:
        """
        Check if the current cycle has exceeded its duration.
        """
        cycle_data = self._get_cycle_data(provider, tier_key, tracking_key)
        if cycle_data is None:
            return False
        cycle_started = cycle_data.get("cycle_started_at")
        if cycle_started is None:
            return False
        duration = self._get_fair_cycle_duration(provider)
        return time.time() >= cycle_started + duration

    def _should_reset_cycle(
        self,
        provider: str,
        tier_key: str,
        tracking_key: str,
        all_credentials_in_tier: List[str],
        available_not_on_cooldown: Optional[List[str]] = None,
    ) -> bool:
        """
        Check if cycle should reset.

        Returns True if:
        1. Cycle duration has expired, OR
        2. No credentials remain available (after cooldown + fair cycle exclusion), OR
        3. All credentials in the tier have been marked exhausted (fallback)
        """
        # Check duration first
        if self._is_cycle_expired(provider, tier_key, tracking_key):
            return True

        cycle_data = self._get_cycle_data(provider, tier_key, tracking_key)
        if cycle_data is None:
            return False

        # If available credentials are provided, reset when none remain usable
        if available_not_on_cooldown is not None:
            has_available = any(
                not self._is_credential_exhausted_in_cycle(
                    cred, provider, tier_key, tracking_key
                )
                for cred in available_not_on_cooldown
            )
            if not has_available and len(all_credentials_in_tier) > 0:
                return True

        exhausted = cycle_data.get("exhausted", set())
        # All must be exhausted (and there must be at least one credential)
        return (
            len(exhausted) >= len(all_credentials_in_tier)
            and len(all_credentials_in_tier) > 0
        )

    def _reset_cycle(self, provider: str, tier_key: str, tracking_key: str) -> None:
        """
        Reset exhaustion tracking for a completed cycle.
        """
        cycle_data = self._get_cycle_data(provider, tier_key, tracking_key)
        if cycle_data:
            exhausted_count = len(cycle_data.get("exhausted", set()))
            lib_logger.info(
                f"Fair cycle complete for {provider} tier={tier_key} "
                f"tracking='{tracking_key}' - resetting ({exhausted_count} credentials cycled)"
            )
            cycle_data["cycle_started_at"] = None
            cycle_data["exhausted"] = set()

    def _get_all_credentials_for_tier_key(
        self,
        provider: str,
        tier_key: str,
        available_keys: List[str],
        credential_priorities: Optional[Dict[str, int]],
    ) -> List[str]:
        """
        Get all credentials that belong to a tier key.

        Args:
            provider: Provider name
            tier_key: Either "__all_tiers__" or str(priority)
            available_keys: List of available credential identifiers
            credential_priorities: Dict mapping credentials to priorities

        Returns:
            List of credentials belonging to this tier key
        """
        if tier_key == "__all_tiers__":
            # Cross-tier: all credentials for this provider
            return list(available_keys)
        else:
            # Within-tier: only credentials with matching priority
            priority = int(tier_key)
            if credential_priorities:
                return [
                    k
                    for k in available_keys
                    if credential_priorities.get(k, 999) == priority
                ]
            return list(available_keys)

    def _count_fair_cycle_excluded(
        self,
        provider: str,
        tier_key: str,
        tracking_key: str,
        candidates: List[str],
    ) -> int:
        """
        Count how many candidates are excluded by fair cycle.

        Args:
            provider: Provider name
            tier_key: Tier key for tracking
            tracking_key: Model/group tracking key
            candidates: List of candidate credentials (not on cooldown)

        Returns:
            Number of candidates excluded by fair cycle
        """
        count = 0
        for cred in candidates:
            if self._is_credential_exhausted_in_cycle(
                cred, provider, tier_key, tracking_key
            ):
                count += 1
        return count

    def _get_priority_multiplier(
        self, provider: str, priority: int, rotation_mode: str
    ) -> int:
        """
        Get the concurrency multiplier for a provider/priority/mode combination.

        Lookup order:
        1. Mode-specific tier override: priority_multipliers_by_mode[provider][mode][priority]
        2. Universal tier multiplier: priority_multipliers[provider][priority]
        3. Sequential fallback (if mode is sequential): sequential_fallback_multipliers[provider]
        4. Global default: 1 (no multiplier effect)

        Args:
            provider: Provider name (e.g., "antigravity")
            priority: Priority level (1 = highest priority)
            rotation_mode: Current rotation mode ("sequential" or "balanced")

        Returns:
            Multiplier value
        """
        provider_lower = provider.lower()

        # 1. Check mode-specific override
        if provider_lower in self.priority_multipliers_by_mode:
            mode_multipliers = self.priority_multipliers_by_mode[provider_lower]
            if rotation_mode in mode_multipliers:
                if priority in mode_multipliers[rotation_mode]:
                    return mode_multipliers[rotation_mode][priority]

        # 2. Check universal tier multiplier
        if provider_lower in self.priority_multipliers:
            if priority in self.priority_multipliers[provider_lower]:
                return self.priority_multipliers[provider_lower][priority]

        # 3. Sequential fallback (only for sequential mode)
        if rotation_mode == "sequential":
            if provider_lower in self.sequential_fallback_multipliers:
                return self.sequential_fallback_multipliers[provider_lower]

        # 4. Global default
        return 1

    def _get_provider_from_credential(self, credential: str) -> Optional[str]:
        """
        Extract provider name from credential path or identifier.

        Supports multiple credential formats:
        - OAuth: "oauth_creds/antigravity_oauth_15.json" -> "antigravity"
        - OAuth: "C:\\...\\oauth_creds\\gemini_cli_oauth_1.json" -> "gemini_cli"
        - OAuth filename only: "antigravity_oauth_1.json" -> "antigravity"
        - API key style: stored with provider prefix metadata

        Args:
            credential: The credential identifier (path or key)

        Returns:
            Provider name string or None if cannot be determined
        """
        import re

        # Pattern: env:// URI format (e.g., "env://antigravity/1" -> "antigravity")
        if credential.startswith("env://"):
            parts = credential[6:].split("/")  # Remove "env://" prefix
            if parts and parts[0]:
                return parts[0].lower()
            # Malformed env:// URI (empty provider name)
            lib_logger.warning(f"Malformed env:// credential URI: {credential}")
            return None

        # Normalize path separators
        normalized = credential.replace("\\", "/")

        # Pattern: path ending with {provider}_oauth_{number}.json
        match = re.search(r"/([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
        if match:
            return match.group(1).lower()

        # Pattern: oauth_creds/{provider}_...
        match = re.search(r"oauth_creds/([a-z_]+)_", normalized, re.IGNORECASE)
        if match:
            return match.group(1).lower()

        # Pattern: filename only {provider}_oauth_{number}.json (no path)
        match = re.match(r"([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
        if match:
            return match.group(1).lower()

        return None

    def _get_provider_instance(self, provider: str) -> Optional[Any]:
        """
        Get or create a provider plugin instance.

        Args:
            provider: The provider name

        Returns:
            Provider plugin instance or None
        """
        if not provider:
            return None

        plugin_class = self.provider_plugins.get(provider)
        if not plugin_class:
            return None

        # Get or create provider instance from cache
        if provider not in self._provider_instances:
            # Instantiate the plugin if it's a class, or use it directly if already an instance
            if isinstance(plugin_class, type):
                self._provider_instances[provider] = plugin_class()
            else:
                self._provider_instances[provider] = plugin_class

        return self._provider_instances[provider]

    def _get_usage_reset_config(self, credential: str) -> Optional[Dict[str, Any]]:
        """
        Get the usage reset configuration for a credential from its provider plugin.

        Args:
            credential: The credential identifier

        Returns:
            Configuration dict with window_seconds, field_name, etc.
            or None to use default daily reset.
        """
        provider = self._get_provider_from_credential(credential)
        plugin_instance = self._get_provider_instance(provider)

        if plugin_instance and hasattr(plugin_instance, "get_usage_reset_config"):
            return plugin_instance.get_usage_reset_config(credential)

        return None

    def _get_reset_mode(self, credential: str) -> str:
        """
        Get the reset mode for a credential: 'credential' or 'per_model'.

        Args:
            credential: The credential identifier

        Returns:
            "per_model" or "credential" (default)
        """
        config = self._get_usage_reset_config(credential)
        return config.get("mode", "credential") if config else "credential"

    def _get_model_quota_group(self, credential: str, model: str) -> Optional[str]:
        """
        Get the quota group for a model, if the provider defines one.

        Args:
            credential: The credential identifier
            model: Model name (with or without provider prefix)

        Returns:
            Group name (e.g., "claude") or None if not grouped
        """
        provider = self._get_provider_from_credential(credential)
        plugin_instance = self._get_provider_instance(provider)

        if plugin_instance and hasattr(plugin_instance, "get_model_quota_group"):
            return plugin_instance.get_model_quota_group(model)

        return None

    def _get_grouped_models(self, credential: str, group: str) -> List[str]:
        """
        Get all model names in a quota group (with provider prefix), normalized.

        Returns only public-facing model names, deduplicated. Internal variants