-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathMakefile
More file actions
122 lines (102 loc) · 4.48 KB
/
Makefile
File metadata and controls
122 lines (102 loc) · 4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
.PHONY: help install install-dev clean test lint format db-start db-stop transcribe fetch-coda import-sessions fetch-metadata copy-to-journal extract-entities enumerate-channel download-transcripts download-audio download-video download-all
# Load .env file if it exists
ifneq (,$(wildcard .env))
include .env
export
endif
help:
@echo "Available commands:"
@echo ""
@echo "Installation & Setup:"
@echo " make install - Install project dependencies"
@echo " make install-dev - Install project with dev dependencies"
@echo " make clean - Clean up cache and temporary files"
@echo ""
@echo "Development:"
@echo " make test - Run tests"
@echo " make lint - Run linter (ruff)"
@echo " make format - Format code with black"
@echo ""
@echo "Database:"
@echo " make db-start - Start SurrealDB"
@echo ""
@echo "Transcription Pipeline:"
@echo " make fetch-coda - Fetch latest data from Coda API"
@echo " make import-sessions - Import sessions from Coda JSON to DB"
@echo " make fetch-metadata - Fetch YouTube metadata for sessions"
@echo " make transcribe - Run transcription pipeline (WhisperX)"
@echo " make copy-to-journal - Copy transcripts to journal repository"
@echo ""
@echo "Entity Extraction Pipeline:"
@echo " make extract-entities - Extract entities from transcripts (Cohere AI)"
@echo ""
@echo "YouTube Channel Download:"
@echo " make enumerate-channel - Enumerate all videos on the channel"
@echo " make download-transcripts - Download transcripts for all channel videos"
@echo " make download-audio - Download audio (MP3) for all channel videos"
@echo " make download-video - Download video for all channel videos"
@echo " make download-all - Download transcripts, audio, and video"
install:
uv sync
install-dev:
uv sync --all-extras
clean:
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
find . -type f -name "*.pyc" -delete
rm -rf .pytest_cache
rm -rf .ruff_cache
rm -rf build dist *.egg-info
test:
. .venv/bin/activate && python -m pytest tests/
lint:
. .venv/bin/activate && ruff check src/ tests/
format:
. .venv/bin/activate && black src/ tests/
. .venv/bin/activate && ruff check --fix src/ tests/
db-start:
surreal start --log trace --user root --pass root --bind 0.0.0.0:8080 rocksdb:///mnt/md0/projects/Journal-Utilities/data/database
fetch-coda:
@echo "Fetching latest data from Coda API..."
@if [ -z "$(CODA_API_TOKEN)" ]; then \
echo "Error: CODA_API_TOKEN not found in .env file"; \
exit 1; \
fi
@mkdir -p data/input
@curl -X GET "https://coda.io/apis/v1/docs/TwB_SP81yq/tables/grid-cjvFiXp3a3/rows?useColumnNames=true" \
-H "Authorization: Bearer $(CODA_API_TOKEN)" \
-o data/input/livestream_fulldata_table.json
@echo "Data saved to data/input/livestream_fulldata_table.json"
import-sessions:
@echo "Importing sessions from Coda JSON to database..."
. .venv/bin/activate && cd src/journal_utilities && python ingest_db_create_wav.py --step import
fetch-metadata:
@echo "Fetching YouTube metadata for sessions..."
. .venv/bin/activate && cd src/journal_utilities && python ingest_db_create_wav.py --step metadata
transcribe:
@echo "Starting transcription pipeline..."
. .venv/bin/activate && cd src/journal_utilities && python transcribe.py
copy-to-journal:
@echo "Copying transcripts to journal repository..."
. .venv/bin/activate && cd src/journal_utilities && python ingest_db_create_wav.py --step copy
extract-entities:
@echo "Extracting entities from transcripts using Cohere AI..."
@if [ -z "$(COHERE_API_KEY)" ]; then \
echo "Error: COHERE_API_KEY not found in .env file"; \
exit 1; \
fi
. .venv/bin/activate && python -m journalrag.main
enumerate-channel:
@echo "Enumerating videos on the Active Inference channel..."
. .venv/bin/activate && python scripts/download_channel.py --enumerate-only
download-transcripts:
@echo "Downloading transcripts for all channel videos..."
. .venv/bin/activate && python scripts/download_channel.py --transcripts --resume
download-audio:
@echo "Downloading audio (MP3) for all channel videos..."
. .venv/bin/activate && python scripts/download_channel.py --audio --resume
download-video:
@echo "Downloading video for all channel videos..."
. .venv/bin/activate && python scripts/download_channel.py --video --resume
download-all:
@echo "Downloading transcripts, audio, and video for all channel videos..."
. .venv/bin/activate && python scripts/download_channel.py --transcripts --audio --video --resume