CIPOC/llm_client.py at main · RENCI/CIPOC · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import json

from openai import OpenAI, RateLimitError
from pydantic import BaseModel
from typing import Literal

from limiter import RateLimiter


class ChatClient:
    DEFAULT_LIMITER_SETTINGS = {
        "max_retries": 10,
        "initial_delay": 1.0,
        "exponential_base": 2.0,
        "max_delay": 60.0,
        "jitter": True,
        "retry_on": (RateLimitError,),
        "logger_name": None,
    }

    def __init__(
        self,
        model_name: str,
        api_key: str,
        endpoint_url: str,
        tools: list[dict] | None = None,
        tool_choice_mode: Literal["auto", "required", "none"] | dict = "auto",
        limiter_kwargs: dict | None = None
    ):
        self.model_name = model_name
        self.api_key = api_key
        self.endpoint_url = endpoint_url
        self.client = OpenAI(api_key=self.api_key, base_url=self.endpoint_url, max_retries=0)
        self._tools = tools or []
        self.tool_choice_mode = tool_choice_mode

        self._limiter_settings = self.DEFAULT_LIMITER_SETTINGS
        self.set_limiter(limiter_kwargs)

    def __repr__(self) -> str:
        rep = f"""
        Model name: {self.model_name}
        Endpoint URL: {self.endpoint_url}
        Tool choice mode: {self.tool_choice_mode}
        Tools: {json.dumps(self.get_tools(), indent=2)}
        Limiter: {json.dumps(self._limiter_settings, indent=2)}
        """
        return rep

    def _set_limiter(self):
        limiter = RateLimiter(**self._limiter_settings)
        # Get undecorated _chat function and wrap it
        original_chat = self.__class__._chat
        self._chat = limiter(lambda **kw: original_chat(self, **kw))

    def set_limiter(self, limiter_kwargs: dict | None = None):
        self._limiter_settings.update(limiter_kwargs or self.DEFAULT_LIMITER_SETTINGS)
        self._set_limiter()

    def get_limiter_settings(self):
        return self._limiter_settings

    @staticmethod
    def tool_from_pydantic(name: str, description: str, data_model: type[BaseModel]) -> dict:
        tool = {
            "type": "function",
            "function": {
                "name": name,
                "description": description,
                "parameters": data_model.model_json_schema()
            }
        }

        return tool

    def add_tool_from_pydantic(self, name: str, description: str, data_model: type[BaseModel]):
        self._tools.append(ChatClient.tool_from_pydantic(name=name, description=description, data_model=data_model))

    def get_tools(self):
        return self._tools

    def _chat(self, **kwargs):
        if self._tools:
            kwargs.update({"tools": self._tools, "tool_choice": self.tool_choice_mode})

        response = self.client.chat.completions.create(model=self.model_name, **kwargs)
        return response

    def chat(self, messages, **kwargs):
        return self._chat(messages=messages, **kwargs)


class OpenAIChatClient(ChatClient):
    def __init__(self, reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = "medium", **kwargs):
        super().__init__(**kwargs)
        self._reasoning_effort = reasoning_effort

    def chat(self, messages: list[dict], reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None, **kwargs):
        reasoning_effort = reasoning_effort or self._reasoning_effort #type: ignore
        if reasoning_effort is not None:
            kwargs.update({"reasoning_effort": reasoning_effort})

        return self._chat(messages=messages, **kwargs)


class AnthropicChatClient(ChatClient):
    def __init__(self, thinking_enabled: bool = True, **kwargs):
        super().__init__(**kwargs)
        self._thinking_enabled = thinking_enabled

    def chat(self, messages: list[dict], thinking_enabled: bool | None = None, thinking_tokens: int = 1024, **kwargs):
        if thinking_enabled or (thinking_enabled is None and self._thinking_enabled):
            if not thinking_tokens > 0:
                raise ValueError("Thinking token budget must be an integer > 0")

            thinking = {
                "thinking": {
                    "type": "enabled",
                    "budget_tokens": thinking_tokens
                }
            }

            extra_body = kwargs.get("extra_body", {})
            extra_body.update(thinking)
            kwargs.update({"extra_body": extra_body})

        if self._tools:
            for tool in self._tools:
                tool["function"]["parameters"].pop("title", None)

        return self._chat(messages=messages, **kwargs)