-
Notifications
You must be signed in to change notification settings - Fork 74
Expand file tree
/
Copy pathllm_entity.py
More file actions
34 lines (26 loc) · 813 Bytes
/
llm_entity.py
File metadata and controls
34 lines (26 loc) · 813 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from dataclasses import dataclass
from enum import Enum
from typing import Optional
class LLMSource(str, Enum):
HUGGING_FACE = "hugging_face"
class LLMInferenceFramework(str, Enum):
DEEPSPEED = "deepspeed"
TEXT_GENERATION_INFERENCE = "text_generation_inference"
VLLM = "vllm"
LIGHTLLM = "lightllm"
TENSORRT_LLM = "tensorrt_llm"
SGLANG = "sglang"
class Quantization(str, Enum):
BITSANDBYTES = "bitsandbytes"
AWQ = "awq"
@dataclass
class LLMMetadata:
model_name: str
source: LLMSource
inference_framework: LLMInferenceFramework
inference_framework_image_tag: str
num_shards: int
quantize: Optional[Quantization] = None
checkpoint_path: Optional[str] = None
chat_template_override: Optional[str] = None
hf_weights_syncing: bool = False