Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
OPENAI_API_KEY=
LOCAL_LLM_API_URL=
GROQ_API_KEY=
LOCAL_LLM_REQUEST_TIMEOUT=
GUNICORN_TIMEOUT=
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,5 @@ logs/*
*.wav
*.mp3
*.index
*.pkl
*.pkl
*.videos
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
## Features

* 🎙️ **Audio Transcription**: Convert spoken content from meetings into text using advanced speech-to-text capabilities.
* 🎬 **Video Upload Support**: Upload meeting videos directly and extract audio automatically before transcription.
* 🧠 **Summarization with LLMs**: Generate concise summaries of transcribed text utilizing powerful language models.
* 🧩 **Flexible LLM Providers**: Use cloud models (OpenAI, Groq) or connect to local models through LM Studio or Ollama.
* Mind Map Generation: Create visual representations of meeting summaries to enhance understanding and retention.
* 🖥️ **User-Friendly Interface**: Interact with a clean and responsive web UI built with Flask.
* 🐳 **Dockerized Deployment**: Easily deploy the application using Docker and Docker Compose for a consistent environment setup.
Expand Down Expand Up @@ -43,11 +45,14 @@
```bash
export GROQ_API_KEY=your_groq_api_key
export OPENAI_API_KEY=your_openai_api_key
export LOCAL_LLM_API_URL=http://your-local-llm-host:1234
export LOCAL_LLM_REQUEST_TIMEOUT=900
export GUNICORN_TIMEOUT=960
```



*Replace `your_groq_api_key` and `your_openai_api_key` with your actual API keys.*
*Replace the placeholder values with your actual keys and local LLM URL. For Ollama, use port 11434. Increase `LOCAL_LLM_REQUEST_TIMEOUT` for slower local models, and keep `GUNICORN_TIMEOUT` higher than that value.*

4. **Build and Run the Docker Container**

Expand Down
27 changes: 27 additions & 0 deletions audio_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import subprocess
from pathlib import Path


def extract_audio(video_file, output_file=None):
video_path = Path(video_file)

if output_file is None:
output_file = video_path.with_suffix(".wav")

command = [
"ffmpeg",
"-i", str(video_path),
"-vn", # no video
"-acodec", "pcm_s16le",
"-ar", "16000", # 16 kHz sample rate
"-ac", "1", # mono audio
str(output_file),
"-y"
]

subprocess.run(command, check=True)
print(f"Audio saved to: {output_file}")


if __name__ == "__main__":
extract_audio("")
26 changes: 11 additions & 15 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
services:
flask:
container_name: Speak2Summary-flask
container_name: s2s-flask
build:
context: .
dockerfile: Dockerfile
restart: always
# ports:
# - "5000:5000"
environment:
- GROQ_API_KEY=${GROQ_API_KEY}
- OPENAI_API_KEY=${OPENAI_API_KEY}
ports:
- "5000:5000"
env_file:
- .env
depends_on:
- redis
command: gunicorn -w 4 -b 0.0.0.0:5000 src.app:app --timeout 120
command: gunicorn -w 4 -b 0.0.0.0:5000 src.app:app --timeout ${GUNICORN_TIMEOUT:-960} --graceful-timeout 120
networks:
- Speak2Summary-net
- homelab
labels:
- "tsdproxy.enable=true"
- "tsdproxy.name=speak2summary"
Expand All @@ -33,15 +31,14 @@ services:
retries: 5

celery:
container_name: Speak2Summary-celery
container_name: s2s-celery
build:
context: .
dockerfile: Dockerfile
restart: always
command: celery -A src.celery_worker.celery worker --loglevel=info
environment:
- GROQ_API_KEY=${GROQ_API_KEY}
- OPENAI_API_KEY=${OPENAI_API_KEY}
env_file:
- .env
depends_on:
- redis
networks:
Expand All @@ -53,7 +50,7 @@ services:
com.Speak2Summary.service: "celery-worker"

redis:
container_name: Speak2Summary-redis
container_name: s2s-redis
image: redis:7-alpine
restart: always
ports:
Expand All @@ -71,8 +68,7 @@ services:
networks:
Speak2Summary-net:
driver: bridge
homelab:
external: true


# networks:
# homelab:
Expand Down
36 changes: 36 additions & 0 deletions src/audio_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# cython: language_level=3
import subprocess
from pathlib import Path
from typing import Optional


def extract_audio(video_file: str, output_file: Optional[str] = None) -> str:
"""Extract mono 16kHz WAV audio from a video file using ffmpeg."""
video_path = Path(video_file)

if output_file is None:
output_path = video_path.with_suffix(".wav")
else:
output_path = Path(output_file)

command = [
"ffmpeg",
"-i",
str(video_path),
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
str(output_path),
"-y",
]

subprocess.run(command, check=True)
return str(output_path)


if __name__ == "__main__":
extract_audio("videos/sample.mp4")
Loading