Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ https://user-images.githubusercontent.com/24236723/233631602-6a69d83c-83ef-41ed-
### Build video chat with:
* [End2End](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat#running-usage)
* [ChatGPT](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_text/video_chat_with_ChatGPT#running-usage)
* [MiniMax](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_with_ChatGPT#using-minimax-as-llm-provider) — Use MiniMax M2.7 as an alternative LLM provider
* [StableLM](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_text/video_chat_with_StableLM#running-usage)
* [MOSS](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_text/video_chat_with_MOSS#running-usage)
* [MiniGPT-4](https://github.com/OpenGVLab/Ask-Anything/tree/main/video_chat_text/video_miniGPT4#running-usage)
Expand Down
27 changes: 22 additions & 5 deletions video_chat_with_ChatGPT/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,30 @@ cd ./pretrained_models/flan-t5-large-finetuned-openai-summarize_from_feedback
git lfs pull
cd ../..

# Configure the necessary ChatGPT APIs
export OPENAI_API_KEY={Your_Private_Openai_Key}

# Run the VideoChat gradio demo.
python app.py
# Configure the necessary ChatGPT APIs
export OPENAI_API_KEY={Your_Private_Openai_Key}

# Run the VideoChat gradio demo.
python app.py
```

## Using MiniMax as LLM Provider

You can use [MiniMax](https://www.minimaxi.com) as an alternative LLM provider instead of OpenAI. MiniMax offers the MiniMax-M2.7 model via an OpenAI-compatible API.

```shell
# Set your MiniMax API key
export MINIMAX_API_KEY={Your_MiniMax_API_Key}

# Optionally set the default provider via environment variable
export LLM_PROVIDER=minimax

# Run the demo
python app.py
```

You can also select the LLM provider from the **LLM Provider** dropdown in the Gradio UI at runtime.

# Acknowledgement

The project is based on [InternVideo](https://github.com/OpenGVLab/InternVideo), [Tag2Text](https://github.com/xinyu1205/Tag2Text), [GRiT](https://github.com/JialianW/GRiT), [mrm8488](https://huggingface.co/mrm8488/flan-t5-large-finetuned-openai-summarize_from_feedback) and [ChatGPT](https://openai.com/blog/chatgpt). Thanks for the authors for their efforts.
Expand Down
14 changes: 10 additions & 4 deletions video_chat_with_ChatGPT/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from util import *
import gradio as gr
from chatbot import *
from chatbot import LLM_PROVIDERS
from load_internvideo import *
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from simplet5 import SimpleT5
Expand Down Expand Up @@ -116,17 +117,22 @@ def set_example_video(example: list) -> dict:
with gr.Column():
input_video_path = gr.inputs.Video(label="Input Video")
input_tag = gr.Textbox(lines=1, label="User Prompt (Optional, Enter with commas)",visible=False)

with gr.Row():
with gr.Column(sclae=0.3, min_width=0):
caption = gr.Button("✍ Upload")
chat_video = gr.Button(" 🎥 Let's Chat! ", interactive=False)
with gr.Column(scale=0.7, min_width=0):
loadinglabel = gr.Label(label="State")
with gr.Column():
llm_provider = gr.Dropdown(
choices=list(LLM_PROVIDERS.keys()),
value="openai",
label="LLM Provider",
)
openai_api_key_textbox = gr.Textbox(
value=os.environ["OPENAI_API_KEY"],
placeholder="Paste your OpenAI API key here to start (sk-...)",
value=os.environ.get("OPENAI_API_KEY", ""),
placeholder="Paste your API key here to start",
show_label=False,
lines=1,
type="password",
Expand Down Expand Up @@ -156,7 +162,7 @@ def set_example_video(example: list) -> dict:
caption.click(lambda: [], None, state)
caption.click(inference,[input_video_path,input_tag],[model_tag_output, user_tag_output, image_caption_output, dense_caption_output,video_caption_output, chat_video, loadinglabel])

chat_video.click(bot.init_agent, [openai_api_key_textbox, image_caption_output, dense_caption_output, video_caption_output, model_tag_output, state], [input_raws,chatbot, state, openai_api_key_textbox])
chat_video.click(bot.init_agent, [openai_api_key_textbox, image_caption_output, dense_caption_output, video_caption_output, model_tag_output, state, llm_provider], [input_raws,chatbot, state, openai_api_key_textbox])

txt.submit(bot.run_text, [txt, state], [chatbot, state])
txt.submit(lambda: "", None, txt)
Expand Down
76 changes: 69 additions & 7 deletions video_chat_with_ChatGPT/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,64 @@
from langchain.agents.tools import Tool
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.llms.openai import OpenAI
from langchain.chat_models import ChatOpenAI
import os
import re
import gradio as gr
import openai

# Supported LLM providers and their default models
LLM_PROVIDERS = {
"openai": {
"default_model": "gpt-4",
"api_base": None, # uses default OpenAI endpoint
},
"minimax": {
"default_model": "MiniMax-M2.7",
"api_base": "https://api.minimax.io/v1",
},
}


def create_llm(provider, api_key, model_name=None, temperature=0):
"""Create an LLM instance based on the selected provider.

Args:
provider: LLM provider name ("openai" or "minimax").
api_key: API key for the chosen provider.
model_name: Model name override. Uses provider default when None.
temperature: Sampling temperature.

Returns:
A LangChain LLM or ChatModel instance.
"""
provider = provider.lower()
if provider not in LLM_PROVIDERS:
raise ValueError(
f"Unsupported provider '{provider}'. "
f"Supported: {list(LLM_PROVIDERS.keys())}"
)

cfg = LLM_PROVIDERS[provider]
model = model_name or cfg["default_model"]

if provider == "minimax":
# MiniMax requires temperature in (0.0, 1.0]
temperature = max(0.01, min(temperature, 1.0))
return ChatOpenAI(
model_name=model,
openai_api_key=api_key,
openai_api_base=cfg["api_base"],
temperature=temperature,
)

# Default: OpenAI
return OpenAI(
temperature=temperature,
openai_api_key=api_key,
model_name=model,
)


def cut_dialogue_history(history_memory, keep_last_n_words=400):
if history_memory is None or len(history_memory) == 0:
Expand All @@ -32,14 +86,14 @@ def run_text(self, text, state):
self.agent.memory.buffer = cut_dialogue_history(self.agent.memory.buffer, keep_last_n_words=500)
res = self.agent({"input": text.strip()})
res['output'] = res['output'].replace("\\", "/")
response = res['output']
response = res['output']
state = state + [(text, response)]
print(f"\nProcessed run_text, Input text: {text}\nCurrent state: {state}\n"
f"Current Memory: {self.agent.memory.buffer}")
return state, state


def init_agent(self, openai_api_key, image_caption, dense_caption, video_caption, tags, state):
def init_agent(self, api_key, image_caption, dense_caption, video_caption, tags, state, provider="openai"):
chat_history =''
PREFIX = "ChatVideo is a chatbot that chats with you based on video descriptions."
FORMAT_INSTRUCTIONS = """
Expand All @@ -65,10 +119,18 @@ def init_agent(self, openai_api_key, image_caption, dense_caption, video_caption
{agent_scratchpad}
"""
self.memory.clear()
if not openai_api_key.startswith('sk-'):
return gr.update(visible = False),state, state, "Please paste your key here !"
self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key,model_name="gpt-4")
# openai.api_base = 'https://api.openai-proxy.com/v1/'

# Resolve provider from argument or environment
provider = (provider or os.environ.get("LLM_PROVIDER", "openai")).lower()

if not api_key or not api_key.strip():
return gr.update(visible=False), state, state, "Please paste your API key!"

# Provider-specific API key validation
if provider == "openai" and not api_key.startswith("sk-"):
return gr.update(visible=False), state, state, "Please paste your OpenAI key (sk-...)!"

self.llm = create_llm(provider=provider, api_key=api_key)
self.agent = initialize_agent(
self.tools,
self.llm,
Expand All @@ -78,7 +140,7 @@ def init_agent(self, openai_api_key, image_caption, dense_caption, video_caption
return_intermediate_steps=True,
agent_kwargs={'prefix': PREFIX, 'format_instructions': FORMAT_INSTRUCTIONS, 'suffix': SUFFIX}, )
state = state + [("I upload a video, Please watch it first! ","I have watch this video, Let's chat!")]
return gr.update(visible = True),state, state, openai_api_key
return gr.update(visible = True),state, state, api_key

if __name__=="__main__":
import pdb
Expand Down
Empty file.
Loading