Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tests/common/vllm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1228,6 +1228,8 @@ def setUp(self):
self.config.explorer.rollout_model.tensor_parallel_size = 1
self.config.explorer.rollout_model.chat_template = CHAT_TEMPLATE
self.config.explorer.rollout_model.enable_openai_api = True
self.config.explorer.rollout_model.enable_lora = True
self.config.explorer.rollout_model.enable_runtime_lora_updating = True

self.config.check_and_update()
self.engines, self.auxiliary_engines = create_inference_models(self.config)
Expand Down
1 change: 1 addition & 0 deletions trinity/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,7 @@ class InferenceModelConfig:

# ! DO NOT SET, automatically set from model.lora_configs
enable_lora: bool = False
enable_runtime_lora_updating: bool = False
lora_modules: Optional[List[Dict]] = None
lora_kwargs: Optional[dict] = field(default_factory=dict)

Expand Down
2 changes: 2 additions & 0 deletions trinity/common/models/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def __init__(
os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
if self.vllm_version >= parse_version("0.11.0"):
os.environ["VLLM_ALLREDUCE_USE_SYMM_MEM"] = "0"
if self.config.enable_runtime_lora_updating:
os.environ["VLLM_ALLOW_RUNTIME_LORA_UPDATING"] = "1"
if not config.enforce_eager:
# To avoid torch compile conflicts when multiple model are started simultaneously.
# remove this when the following PR is released:
Expand Down