OpenGVLab · octo-patch · Mar 26, 2026
diff --git a/README.md b/README.md
@@ -69,6 +69,7 @@ https://user-images.githubusercontent.com/24236723/233631602-6a69d83c-83ef-41ed-
 ### Build video chat with:
 * [End2End](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat#running-usage)
 * [ChatGPT](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_text/video_chat_with_ChatGPT#running-usage)
+* [MiniMax](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_with_ChatGPT#using-minimax-as-llm-provider) — Use MiniMax M2.7 as an alternative LLM provider
 * [StableLM](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_text/video_chat_with_StableLM#running-usage)
 * [MOSS](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_text/video_chat_with_MOSS#running-usage)
 * [MiniGPT-4](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_text/video_miniGPT4#running-usage)

diff --git a/video_chat_with_ChatGPT/README.md b/video_chat_with_ChatGPT/README.md
@@ -40,13 +40,30 @@ cd ./pretrained_models/flan-t5-large-finetuned-openai-summarize_from_feedback
 git lfs pull  
 cd ../..  
 
-# Configure the necessary ChatGPT APIs  
-export OPENAI_API_KEY={Your_Private_Openai_Key}  
-
-# Run the VideoChat gradio demo.  
-python app.py  
+# Configure the necessary ChatGPT APIs
+export OPENAI_API_KEY={Your_Private_Openai_Key}
+
+# Run the VideoChat gradio demo.
+python app.py
+```
+
+## Using MiniMax as LLM Provider
+
+You can use [MiniMax](https://www.minimaxi.com) as an alternative LLM provider instead of OpenAI. MiniMax offers the MiniMax-M2.7 model via an OpenAI-compatible API.
+
+```shell
+# Set your MiniMax API key
+export MINIMAX_API_KEY={Your_MiniMax_API_Key}
+
+# Optionally set the default provider via environment variable
+export LLM_PROVIDER=minimax
+
+# Run the demo
+python app.py
 ```
 
+You can also select the LLM provider from the **LLM Provider** dropdown in the Gradio UI at runtime.
+
 # Acknowledgement
 
 The project is based on [InternVideo](https://github.com/OpenGVLab/InternVideo), [Tag2Text](https://github.com/xinyu1205/Tag2Text), [GRiT](https://github.com/JialianW/GRiT), [mrm8488](https://huggingface.co/mrm8488/flan-t5-large-finetuned-openai-summarize_from_feedback) and [ChatGPT](https://openai.com/blog/chatgpt). Thanks for the authors for their efforts.

diff --git a/video_chat_with_ChatGPT/app.py b/video_chat_with_ChatGPT/app.py
@@ -8,6 +8,7 @@
 from util import *
 import gradio as gr
 from chatbot import *
+from chatbot import LLM_PROVIDERS
 from load_internvideo import *
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 from simplet5 import SimpleT5
@@ -116,17 +117,22 @@ def set_example_video(example: list) -> dict:
         with gr.Column():
             input_video_path = gr.inputs.Video(label="Input Video")
             input_tag = gr.Textbox(lines=1, label="User Prompt (Optional, Enter with commas)",visible=False)
-          
+
             with gr.Row():
                 with gr.Column(sclae=0.3, min_width=0):
                     caption = gr.Button("✍ Upload")
                     chat_video = gr.Button(" 🎥 Let's Chat! ", interactive=False)
                 with gr.Column(scale=0.7, min_width=0):
                     loadinglabel = gr.Label(label="State")
         with gr.Column():
+            llm_provider = gr.Dropdown(
+                choices=list(LLM_PROVIDERS.keys()),
+                value="openai",
+                label="LLM Provider",
+            )
             openai_api_key_textbox = gr.Textbox(
-                value=os.environ["OPENAI_API_KEY"],
-                placeholder="Paste your OpenAI API key here to start (sk-...)",
+                value=os.environ.get("OPENAI_API_KEY", ""),
+                placeholder="Paste your API key here to start",
                 show_label=False,
                 lines=1,
                 type="password",
@@ -156,7 +162,7 @@ def set_example_video(example: list) -> dict:
     caption.click(lambda: [], None, state)    
     caption.click(inference,[input_video_path,input_tag],[model_tag_output, user_tag_output, image_caption_output, dense_caption_output,video_caption_output, chat_video, loadinglabel])
 
-    chat_video.click(bot.init_agent, [openai_api_key_textbox, image_caption_output, dense_caption_output, video_caption_output, model_tag_output, state], [input_raws,chatbot, state, openai_api_key_textbox])
+    chat_video.click(bot.init_agent, [openai_api_key_textbox, image_caption_output, dense_caption_output, video_caption_output, model_tag_output, state, llm_provider], [input_raws,chatbot, state, openai_api_key_textbox])
 
     txt.submit(bot.run_text, [txt, state], [chatbot, state])
     txt.submit(lambda: "", None, txt)

diff --git a/video_chat_with_ChatGPT/chatbot.py b/video_chat_with_ChatGPT/chatbot.py
@@ -2,10 +2,64 @@
 from langchain.agents.tools import Tool
 from langchain.chains.conversation.memory import ConversationBufferMemory
 from langchain.llms.openai import OpenAI
+from langchain.chat_models import ChatOpenAI
+import os
 import re
 import gradio as gr
 import openai
 
+# Supported LLM providers and their default models
+LLM_PROVIDERS = {
+    "openai": {
+        "default_model": "gpt-4",
+        "api_base": None,  # uses default OpenAI endpoint
+    },
+    "minimax": {
+        "default_model": "MiniMax-M2.7",
+        "api_base": "https://api.minimax.io/v1",
+    },
+}
+
+
+def create_llm(provider, api_key, model_name=None, temperature=0):
+    """Create an LLM instance based on the selected provider.
+
+    Args:
+        provider: LLM provider name ("openai" or "minimax").
+        api_key: API key for the chosen provider.
+        model_name: Model name override.  Uses provider default when None.
+        temperature: Sampling temperature.
+
+    Returns:
+        A LangChain LLM or ChatModel instance.
+    """
+    provider = provider.lower()
+    if provider not in LLM_PROVIDERS:
+        raise ValueError(
+            f"Unsupported provider '{provider}'. "
+            f"Supported: {list(LLM_PROVIDERS.keys())}"
+        )
+
+    cfg = LLM_PROVIDERS[provider]
+    model = model_name or cfg["default_model"]
+
+    if provider == "minimax":
+        # MiniMax requires temperature in (0.0, 1.0]
+        temperature = max(0.01, min(temperature, 1.0))
+        return ChatOpenAI(
+            model_name=model,
+            openai_api_key=api_key,
+            openai_api_base=cfg["api_base"],
+            temperature=temperature,
+        )
+
+    # Default: OpenAI
+    return OpenAI(
+        temperature=temperature,
+        openai_api_key=api_key,
+        model_name=model,
+    )
+
 
 def cut_dialogue_history(history_memory, keep_last_n_words=400):
     if history_memory is None or len(history_memory) == 0:
@@ -32,14 +86,14 @@ def run_text(self, text, state):
         self.agent.memory.buffer = cut_dialogue_history(self.agent.memory.buffer, keep_last_n_words=500)
         res = self.agent({"input": text.strip()})
         res['output'] = res['output'].replace("\\", "/")
-        response = res['output'] 
+        response = res['output']
         state = state + [(text, response)]
         print(f"\nProcessed run_text, Input text: {text}\nCurrent state: {state}\n"
               f"Current Memory: {self.agent.memory.buffer}")
         return state, state
 
 
-    def init_agent(self, openai_api_key, image_caption, dense_caption, video_caption, tags, state):
+    def init_agent(self, api_key, image_caption, dense_caption, video_caption, tags, state, provider="openai"):
         chat_history =''
         PREFIX = "ChatVideo is a chatbot that chats with you based on video descriptions."
         FORMAT_INSTRUCTIONS = """
@@ -65,10 +119,18 @@ def init_agent(self, openai_api_key, image_caption, dense_caption, video_caption
                 {agent_scratchpad}
                 """
         self.memory.clear()
-        if not openai_api_key.startswith('sk-'):
-            return gr.update(visible = False),state, state, "Please paste your key here !"
-        self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key,model_name="gpt-4")
-        # openai.api_base = 'https://api.openai-proxy.com/v1/'  
+
+        # Resolve provider from argument or environment
+        provider = (provider or os.environ.get("LLM_PROVIDER", "openai")).lower()
+
+        if not api_key or not api_key.strip():
+            return gr.update(visible=False), state, state, "Please paste your API key!"
+
+        # Provider-specific API key validation
+        if provider == "openai" and not api_key.startswith("sk-"):
+            return gr.update(visible=False), state, state, "Please paste your OpenAI key (sk-...)!"
+
+        self.llm = create_llm(provider=provider, api_key=api_key)
         self.agent = initialize_agent(
             self.tools,
             self.llm,
@@ -78,7 +140,7 @@ def init_agent(self, openai_api_key, image_caption, dense_caption, video_caption
             return_intermediate_steps=True,
             agent_kwargs={'prefix': PREFIX, 'format_instructions': FORMAT_INSTRUCTIONS, 'suffix': SUFFIX}, )
         state = state + [("I upload a video, Please watch it first! ","I have watch this video, Let's chat!")]
-        return gr.update(visible = True),state, state, openai_api_key
+        return gr.update(visible = True),state, state, api_key
 
 if __name__=="__main__":
     import pdb

diff --git a/video_chat_with_ChatGPT/tests/__init__.py b/video_chat_with_ChatGPT/tests/__init__.py