-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
155 lines (145 loc) · 4.87 KB
/
docker-compose.yml
File metadata and controls
155 lines (145 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# ============================================================
# Code Documentation Assistant — Docker Compose (Local Dev)
# ============================================================
# Usage:
# docker compose up # default: full tier
# MODEL_TIER=lightweight docker compose up # lightweight tier
# docker compose up --build # rebuild after code changes
# docker compose --profile observability up # + MLflow tracking server
#
# GPU acceleration:
# docker compose -f docker-compose.yml -f docker-compose.gpu.yml up
# (or use ./run.sh — auto-detects GPU)
#
# Access:
# Streamlit UI: http://localhost:8501
# MLflow UI: http://localhost:5000 (--profile observability only)
# ============================================================
services:
# --- Ollama (LLM inference server) ---
ollama:
image: ollama/ollama:latest
container_name: code-doc-ollama
ports:
- "11434:11434"
volumes:
- ollama_data:/root/.ollama
environment:
- OLLAMA_HOST=0.0.0.0
healthcheck:
test: ["CMD", "ollama", "list"]
interval: 10s
timeout: 5s
retries: 5
start_period: 15s
# GPU acceleration is optional — Ollama falls back to CPU if no GPU available.
# To enable GPU: docker compose -f docker-compose.yml -f docker-compose.gpu.yml up
# See docker-compose.gpu.yml for NVIDIA GPU reservation.
# --- Model Bootstrap ---
# Pulls the LLM and embedding models on first startup.
# Runs once then exits — models persist in the ollama_data volume.
ollama-bootstrap:
image: ollama/ollama:latest
container_name: code-doc-bootstrap
depends_on:
ollama:
condition: service_healthy
environment:
- OLLAMA_HOST=ollama:11434
- MODEL_TIER=${MODEL_TIER:-full}
- EMBEDDING_MODEL=${EMBEDDING_MODEL:-nomic-embed-text}
entrypoint: >
bash -c '
echo "=== Bootstrapping models ==="
case "$${MODEL_TIER}" in
full) MODEL="mistral-nemo" ;;
balanced) MODEL="qwen2.5-coder:7b" ;;
lightweight) MODEL="phi3.5" ;;
*) MODEL="mistral-nemo" ;;
esac
echo "Pulling LLM: $${MODEL} (tier: $${MODEL_TIER})"
ollama pull $${MODEL}
echo "Pulling embedding model: $${EMBEDDING_MODEL}"
ollama pull $${EMBEDDING_MODEL}
echo "=== Bootstrap complete ==="
'
restart: "no"
# --- ChromaDB (Vector Database) ---
chromadb:
image: chromadb/chroma:0.6.3
container_name: code-doc-chromadb
ports:
- "8000:8000"
volumes:
- chroma_data:/chroma/chroma
environment:
- IS_PERSISTENT=TRUE
- PERSIST_DIRECTORY=/chroma/chroma
- ANONYMIZED_TELEMETRY=false
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/v1/heartbeat')"]
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
# --- MLflow tracking server (optional — profile: observability) ---
# Included here so the research repo's master branch can inherit it without
# needing to add a new service. Disabled by default to keep the basic
# `docker compose up` experience unchanged.
#
# Enable with:
# docker compose --profile observability up
#
mlflow:
image: ghcr.io/mlflow/mlflow:latest
container_name: code-doc-mlflow
profiles: ["observability"]
ports:
- "5000:5000"
volumes:
- mlflow_data:/mlflow
command:
- mlflow
- server
- --host=0.0.0.0
- --port=5000
- --backend-store-uri=sqlite:////mlflow/mlflow.db
- --default-artifact-root=/mlflow/artifacts
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/health')"]
interval: 15s
timeout: 5s
retries: 5
start_period: 10s
# --- Application (Streamlit UI + RAG pipeline) ---
app:
build:
context: .
dockerfile: Dockerfile
container_name: code-doc-app
ports:
- "8501:8501"
depends_on:
ollama:
condition: service_healthy
chromadb:
condition: service_healthy
environment:
- OLLAMA_HOST=http://ollama:11434
- CHROMA_HOST=http://chromadb:8000
- MODEL_TIER=${MODEL_TIER:-full}
- EMBEDDING_MODEL=${EMBEDDING_MODEL:-nomic-embed-text}
- LOG_LEVEL=${LOG_LEVEL:-info}
# MLflow is optional in main — only connected when the observability profile is active.
# The app tolerates a missing MLFLOW_TRACKING_URI gracefully (all MLflow calls are
# wrapped in try/except throughout). Set this to connect to an external MLflow server:
- MLFLOW_TRACKING_URI=${MLFLOW_TRACKING_URI:-}
volumes:
- ${REPO_PATH:-./repos}:/data/repos:ro
volumes:
ollama_data:
driver: local
chroma_data:
driver: local
mlflow_data:
driver: local