codeperfectplus · codeperfectplus · Jun 9, 2026 · Jun 9, 2026
diff --git a/.env.example b/.env.example
@@ -0,0 +1,5 @@
+OPENAI_API_KEY=
+LOCAL_LLM_API_URL=
+GROQ_API_KEY=
+LOCAL_LLM_REQUEST_TIMEOUT=
+GUNICORN_TIMEOUT=
diff --git a/.gitignore b/.gitignore
@@ -178,4 +178,5 @@ logs/*
 *.wav
 *.mp3
 *.index
-*.pkl
+*.pkl
+*.videos
diff --git a/README.md b/README.md
@@ -5,7 +5,9 @@
 ## Features
 
 * 🎙️ **Audio Transcription**: Convert spoken content from meetings into text using advanced speech-to-text capabilities.
+* 🎬 **Video Upload Support**: Upload meeting videos directly and extract audio automatically before transcription.
 * 🧠 **Summarization with LLMs**: Generate concise summaries of transcribed text utilizing powerful language models.
+* 🧩 **Flexible LLM Providers**: Use cloud models (OpenAI, Groq) or connect to local models through LM Studio or Ollama.
 * Mind Map Generation: Create visual representations of meeting summaries to enhance understanding and retention.
 * 🖥️ **User-Friendly Interface**: Interact with a clean and responsive web UI built with Flask.
 * 🐳 **Dockerized Deployment**: Easily deploy the application using Docker and Docker Compose for a consistent environment setup.
@@ -43,11 +45,14 @@
    ```bash
    export GROQ_API_KEY=your_groq_api_key
    export OPENAI_API_KEY=your_openai_api_key
+   export LOCAL_LLM_API_URL=http://your-local-llm-host:1234
+   export LOCAL_LLM_REQUEST_TIMEOUT=900
+   export GUNICORN_TIMEOUT=960
    ```
 
 
 
-*Replace `your_groq_api_key` and `your_openai_api_key` with your actual API keys.*
+*Replace the placeholder values with your actual keys and local LLM URL. For Ollama, use port 11434. Increase `LOCAL_LLM_REQUEST_TIMEOUT` for slower local models, and keep `GUNICORN_TIMEOUT` higher than that value.*
 
 4. **Build and Run the Docker Container**
 

diff --git a/audio_extraction.py b/audio_extraction.py
@@ -0,0 +1,27 @@
+import subprocess
+from pathlib import Path
+
+
+def extract_audio(video_file, output_file=None):
+    video_path = Path(video_file)
+
+    if output_file is None:
+        output_file = video_path.with_suffix(".wav")
+
+    command = [
+        "ffmpeg",
+        "-i", str(video_path),
+        "-vn",                # no video
+        "-acodec", "pcm_s16le",
+        "-ar", "16000",       # 16 kHz sample rate
+        "-ac", "1",           # mono audio
+        str(output_file),
+        "-y"
+    ]
+
+    subprocess.run(command, check=True)
+    print(f"Audio saved to: {output_file}")
+
+
+if __name__ == "__main__":
+    extract_audio("")
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,21 +1,19 @@
 services:
   flask:
-    container_name: Speak2Summary-flask
+    container_name: s2s-flask
     build:
       context: .
       dockerfile: Dockerfile
     restart: always
-    # ports:
-    #   - "5000:5000"
-    environment:
-      - GROQ_API_KEY=${GROQ_API_KEY}
-      - OPENAI_API_KEY=${OPENAI_API_KEY}
+    ports:
+      - "5000:5000"
+    env_file:
+      - .env
     depends_on:
       - redis
-    command: gunicorn -w 4 -b 0.0.0.0:5000 src.app:app --timeout 120
+    command: gunicorn -w 4 -b 0.0.0.0:5000 src.app:app --timeout ${GUNICORN_TIMEOUT:-960} --graceful-timeout 120
     networks:
       - Speak2Summary-net
-      - homelab
     labels:
       - "tsdproxy.enable=true"
       - "tsdproxy.name=speak2summary"
@@ -33,15 +31,14 @@ services:
       retries: 5
 
   celery:
-    container_name: Speak2Summary-celery
+    container_name: s2s-celery
     build:
       context: .
       dockerfile: Dockerfile
     restart: always
     command: celery -A src.celery_worker.celery worker --loglevel=info
-    environment:
-      - GROQ_API_KEY=${GROQ_API_KEY}
-      - OPENAI_API_KEY=${OPENAI_API_KEY}
+    env_file:
+      - .env
     depends_on:
       - redis
     networks:
@@ -53,7 +50,7 @@ services:
       com.Speak2Summary.service: "celery-worker"
 
   redis:
-    container_name: Speak2Summary-redis
+    container_name: s2s-redis
     image: redis:7-alpine
     restart: always
     ports:
@@ -71,8 +68,7 @@ services:
 networks:
   Speak2Summary-net:
     driver: bridge
-  homelab:
-    external: true
+
 
 # networks:
 #   homelab:

diff --git a/src/audio_extraction.py b/src/audio_extraction.py
@@ -0,0 +1,36 @@
+# cython: language_level=3
+import subprocess
+from pathlib import Path
+from typing import Optional
+
+
+def extract_audio(video_file: str, output_file: Optional[str] = None) -> str:
+    """Extract mono 16kHz WAV audio from a video file using ffmpeg."""
+    video_path = Path(video_file)
+
+    if output_file is None:
+        output_path = video_path.with_suffix(".wav")
+    else:
+        output_path = Path(output_file)
+
+    command = [
+        "ffmpeg",
+        "-i",
+        str(video_path),
+        "-vn",
+        "-acodec",
+        "pcm_s16le",
+        "-ar",
+        "16000",
+        "-ac",
+        "1",
+        str(output_path),
+        "-y",
+    ]
+
+    subprocess.run(command, check=True)
+    return str(output_path)
+
+
+if __name__ == "__main__":
+    extract_audio("videos/sample.mp4")
-Original file line number
+Diff line change
@@ Expand Up / @@ -178,4 +178,5 @@ logs/* @@
     *.wav
     *.mp3
     *.index
-    *.pkl
+    *.pkl
+    *.videos