code-doc-assistant/docker-compose.yml at master · Chrisys93/code-doc-assistant · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# ============================================================
# Code Documentation Assistant — Docker Compose (Local Dev)
# ============================================================
# Usage:
#   docker compose up                           # default: full tier
#   MODEL_TIER=lightweight docker compose up    # lightweight tier
#   docker compose up --build                   # rebuild after code changes
#   docker compose --profile observability up   # + MLflow tracking server
#
# GPU acceleration:
#   docker compose -f docker-compose.yml -f docker-compose.gpu.yml up
#   (or use ./run.sh — auto-detects GPU)
#
# Access:
#   Streamlit UI: http://localhost:8501
#   MLflow UI:    http://localhost:5000  (--profile observability only)
# ============================================================

services:
  # --- Ollama (LLM inference server) ---
  ollama:
    image: ollama/ollama:latest
    container_name: code-doc-ollama
    ports:
      - "11434:11434"
    volumes:
      - ollama_data:/root/.ollama
    environment:
      - OLLAMA_HOST=0.0.0.0
    healthcheck:
      test: ["CMD", "ollama", "list"]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 15s
    # GPU acceleration is optional — Ollama falls back to CPU if no GPU available.
    # To enable GPU: docker compose -f docker-compose.yml -f docker-compose.gpu.yml up
    # See docker-compose.gpu.yml for NVIDIA GPU reservation.

  # --- Model Bootstrap ---
  # Pulls the LLM and embedding models on first startup.
  # Runs once then exits — models persist in the ollama_data volume.
  ollama-bootstrap:
    image: ollama/ollama:latest
    container_name: code-doc-bootstrap
    depends_on:
      ollama:
        condition: service_healthy
    environment:
      - OLLAMA_HOST=ollama:11434
      - MODEL_TIER=${MODEL_TIER:-full}
      - EMBEDDING_MODEL=${EMBEDDING_MODEL:-nomic-embed-text}
    entrypoint: >
      bash -c '
        echo "=== Bootstrapping models ==="

        case "$${MODEL_TIER}" in
          full)        MODEL="mistral-nemo" ;;
          balanced)    MODEL="qwen2.5-coder:7b" ;;
          lightweight) MODEL="phi3.5" ;;
          *)           MODEL="mistral-nemo" ;;
        esac

        echo "Pulling LLM: $${MODEL} (tier: $${MODEL_TIER})"
        ollama pull $${MODEL}

        echo "Pulling embedding model: $${EMBEDDING_MODEL}"
        ollama pull $${EMBEDDING_MODEL}

        echo "=== Bootstrap complete ==="
      '
    restart: "no"

  # --- ChromaDB (Vector Database) ---
  chromadb:
    image: chromadb/chroma:0.6.3
    container_name: code-doc-chromadb
    ports:
      - "8000:8000"
    volumes:
      - chroma_data:/chroma/chroma
    environment:
      - IS_PERSISTENT=TRUE
      - PERSIST_DIRECTORY=/chroma/chroma
      - ANONYMIZED_TELEMETRY=false
    healthcheck:
      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/v1/heartbeat')"]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 10s

  # --- MLflow tracking server (optional — profile: observability) ---
  # Included here so the research repo's master branch can inherit it without
  # needing to add a new service. Disabled by default to keep the basic
  # `docker compose up` experience unchanged.
  #
  # Enable with:
  #   docker compose --profile observability up
  #
  mlflow:
    image: ghcr.io/mlflow/mlflow:latest
    container_name: code-doc-mlflow
    profiles: ["observability"]
    ports:
      - "5000:5000"
    volumes:
      - mlflow_data:/mlflow
    command:
      - mlflow
      - server
      - --host=0.0.0.0
      - --port=5000
      - --backend-store-uri=sqlite:////mlflow/mlflow.db
      - --default-artifact-root=/mlflow/artifacts
    healthcheck:
      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/health')"]
      interval: 15s
      timeout: 5s
      retries: 5
      start_period: 10s

  # --- Application (Streamlit UI + RAG pipeline) ---
  app:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: code-doc-app
    ports:
      - "8501:8501"
    depends_on:
      ollama:
        condition: service_healthy
      chromadb:
        condition: service_healthy
    environment:
      - OLLAMA_HOST=http://ollama:11434
      - CHROMA_HOST=http://chromadb:8000
      - MODEL_TIER=${MODEL_TIER:-full}
      - EMBEDDING_MODEL=${EMBEDDING_MODEL:-nomic-embed-text}
      - LOG_LEVEL=${LOG_LEVEL:-info}
      # MLflow is optional in main — only connected when the observability profile is active.
      # The app tolerates a missing MLFLOW_TRACKING_URI gracefully (all MLflow calls are
      # wrapped in try/except throughout). Set this to connect to an external MLflow server:
      - MLFLOW_TRACKING_URI=${MLFLOW_TRACKING_URI:-}
    volumes:
      - ${REPO_PATH:-./repos}:/data/repos:ro

volumes:
  ollama_data:
    driver: local
  chroma_data:
    driver: local
  mlflow_data:
    driver: local