-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
86 lines (69 loc) · 2.9 KB
/
config.py
File metadata and controls
86 lines (69 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from pathlib import Path
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings, SettingsConfigDict
ENV_PREFIX = "SLM_"
# Get the absolute path to the project's root directory
# This assumes config.py is in slm_server/
CONFIG_PY_PATH = Path(__file__).resolve()
PROJECT_ROOT = CONFIG_PY_PATH.parent.parent
MODELS_DIR = PROJECT_ROOT / "models"
DOTENV_PATH = PROJECT_ROOT / ".env"
MODEL_PATH_DEFAULT = str(MODELS_DIR / "Qwen3-0.6B-Q4_K_M.gguf")
MODEL_OWNER_DEFAULT = "second-state"
class LoggingSettings(BaseModel):
verbose: bool = Field(True, description="If logging to stdout by cpp llama")
level: str = Field("INFO", description="Log level default for loggers.")
class MetricsSettings(BaseModel):
enabled: bool = Field(
True,
description="If enable metrics to port",
)
endpoint: str = Field("/metrics", description="Endpoint of metrics get handler.")
class TraceSettings(BaseModel):
enabled: bool = Field(True, description="Enable OpenTelemetry tracing")
service_name: str = Field(
"slm_server", description="Service Name used in trace provider."
)
endpoint: str = Field("", description="Grafana Tempo OTLP endpoint URL")
username: str = Field("", description="Grafana Tempo basic auth username")
password: str = Field("", description="Grafana Tempo basic auth password")
sample_rate: float = Field(
0.1, description="Trace sampling rate (0.0-1.0), default 10%"
)
excluded_urls: list[str] = Field(
["/metrics", "/health"],
description="List of URLs to exclude from tracing",
)
class Settings(BaseSettings):
model_config = SettingsConfigDict(
case_sensitive=False,
env_prefix=ENV_PREFIX,
env_nested_delimiter="__",
env_file=DOTENV_PATH,
env_file_encoding="utf-8",
)
model_path: str = Field(MODEL_PATH_DEFAULT, description="Model path for llama_cpp.")
model_owner: str = Field(
MODEL_OWNER_DEFAULT,
description="Owner label for /models list. Set SLM_MODEL_OWNER to override.",
)
n_ctx: int = Field(
8192, description="Maximum context window (input + generated tokens)."
)
n_threads: int = Field(
2, description="Number of OpenMP threads llama‑cpp will spawn."
)
n_batch: int = Field(
512, description="Number of tokens to process in a single batch."
)
seed: int = Field(42, description="Seed to inject for llama_cpp.")
s_timeout: int = Field(
1, description="Seconds to wait if undergoing another inference."
)
logging: LoggingSettings = Field(default_factory=LoggingSettings)
metrics: MetricsSettings = Field(default_factory=MetricsSettings)
tracing: TraceSettings = Field(default_factory=TraceSettings)
def get_settings() -> Settings:
if not hasattr(get_settings, "_instance"):
get_settings._instance = Settings()
return get_settings._instance