Add dedicated VLM model configuration and wire it through multi-tenant VLM creation (#10)

Copilot · BukeLy · web-flow · commit f7e2565a6faa · 2025-12-15T18:13:27.000+08:00
* Initial plan

* 拆分VLM模型配置

Co-authored-by: BukeLy &lt;19304666+BukeLy@users.noreply.github.com&gt;

* 完善VLM配置独立化

Co-authored-by: BukeLy &lt;19304666+BukeLy@users.noreply.github.com&gt;

* 优化VLM密钥回退一致性

Co-authored-by: BukeLy &lt;19304666+BukeLy@users.noreply.github.com&gt;

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: BukeLy &lt;19304666+BukeLy@users.noreply.github.com&gt;
diff --git a/env.example b/env.example
@@ -15,6 +15,11 @@ LLM_API_KEY="your_llm_api_key_here"
 LLM_BASE_URL="https://ark.ap-southeast.bytepluses.com/api/v3"
 # 使用的模型名称
 LLM_MODEL=seed-1-6-250615
+# 用于多模态图片理解的 VLM 模型（必填，独立于 LLM_MODEL）
+VLM_MODEL=seed-1-6-250615
+# 可选：VLM 使用独立的密钥/域名（未设置时复用 LLM 配置）
+# VLM_API_KEY="your_vlm_api_key_here"
+# VLM_BASE_URL="https://api.example.com/v1"
 # LLM 供应商标识（ark/openai/claude）
 LLM_PROVIDER=ark
 # VLM 图片理解 API 超时时间（秒，默认 120 秒）
@@ -318,4 +323,3 @@ TZ=Asia/Shanghai
 
 # --- Python 配置 ---
 PYTHONUNBUFFERED=1  # 禁用输出缓冲，实时查看日志
-
diff --git a/src/config.py b/src/config.py
@@ -8,7 +8,6 @@
 重构原因: 统一配置管理，从服务商导向改为功能导向命名
 """
 
-import os
 from typing import Optional
 from pydantic import Field
 from pydantic_settings import BaseSettings
@@ -22,6 +21,9 @@ class LLMConfig(BaseSettings):
     api_key: str = Field(..., description="LLM API Key")
     base_url: str = Field(..., description="LLM API Base URL")
     model: str = Field(default="seed-1-6-250615", description="LLM Model Name")
+    vlm_model: str = Field(..., description="VLM Model Name", alias="VLM_MODEL")
+    vlm_api_key: Optional[str] = Field(default=None, description="VLM API Key", alias="VLM_API_KEY")
+    vlm_base_url: Optional[str] = Field(default=None, description="VLM API Base URL", alias="VLM_BASE_URL")
     vlm_timeout: int = Field(default=120, description="VLM Image Understanding Timeout (seconds)")
     timeout: int = Field(default=60, description="General LLM Timeout (seconds)")
 
diff --git a/src/multi_tenant.py b/src/multi_tenant.py
@@ -48,9 +48,12 @@ def __init__(
         self._creation_locks: defaultdict = defaultdict(asyncio.Lock)
 
         # 共享配置（从集中配置管理读取）
-        self.ark_api_key = config.llm.api_key
-        self.ark_base_url = config.llm.base_url
-        self.ark_model = config.llm.model
+        self.llm_api_key = config.llm.api_key
+        self.llm_base_url = config.llm.base_url
+        self.llm_model = config.llm.model
+        self.vlm_model = config.llm.vlm_model
+        self.vlm_api_key = config.llm.vlm_api_key or config.llm.api_key
+        self.vlm_base_url = config.llm.vlm_base_url or config.llm.base_url
 
         self.sf_api_key = config.embedding.api_key
         self.sf_base_url = config.embedding.base_url
@@ -85,9 +88,9 @@ def _create_llm_func(self, llm_config: Dict):
         import asyncio
 
         # 从配置中提取参数（支持租户覆盖）
-        model = llm_config.get("model", self.ark_model)
-        api_key = llm_config.get("api_key", self.ark_api_key)
-        base_url = llm_config.get("base_url", self.ark_base_url)
+        model = llm_config.get("model", self.llm_model)
+        api_key = llm_config.get("api_key", self.llm_api_key)
+        base_url = llm_config.get("base_url", self.llm_base_url)
 
         # 获取 RateLimiter 参数（租户可配置）
         # 注意：这里的 max_async 是 RateLimiter 的并发控制，不是 LightRAG 的
@@ -278,9 +281,9 @@ def _create_vision_model_func(self, llm_config: Dict):
         import aiohttp
 
         # 从配置中提取参数（支持租户覆盖）
-        model = llm_config.get("model", self.ark_model)
-        api_key = llm_config.get("api_key", self.ark_api_key)
-        base_url = llm_config.get("base_url", self.ark_base_url)
+        model = llm_config.get("vlm_model", self.vlm_model)
+        api_key = llm_config.get("vlm_api_key") or llm_config.get("api_key") or self.vlm_api_key
+        base_url = llm_config.get("vlm_base_url") or llm_config.get("base_url") or self.vlm_base_url
         vlm_timeout = llm_config.get("vlm_timeout", self.vlm_timeout)
 
         # 获取速率限制器（VLM 使用 LLM 的限制）
@@ -552,4 +555,4 @@ async def get_tenant_lightrag(tenant_id: str) -> LightRAG:
         LightRAG: 该租户的实例
     """
     manager = get_multi_tenant_manager()
-    return await manager.get_instance(tenant_id)
+    return await manager.get_instance(tenant_id)
diff --git a/src/tenant_config.py b/src/tenant_config.py
@@ -301,6 +301,9 @@ def _merge_llm_config(self, tenant_config: Optional[TenantConfigModel]) -> Dict[
         """
         base = {
             "model": config.llm.model,
+            "vlm_model": config.llm.vlm_model,
+            "vlm_api_key": config.llm.vlm_api_key,
+            "vlm_base_url": config.llm.vlm_base_url,
             "api_key": config.llm.api_key,
             "base_url": config.llm.base_url,
             "timeout": config.llm.timeout,