openproject-haystack/docker-compose.local-ollama.yml at main · opf/openproject-haystack · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Temporary docker compose file for testing with local Ollama
# Use this with: docker compose -f docker-compose.local-ollama.yml up api

services:
  # Ollama services commented out for local testing
  # ollama:
  #   image: ollama/ollama
  #   ports:
  #     - "11434:11434"
  #   volumes:
  #     - ollama-data:/root/.ollama
  #   environment:
  #     - OLLAMA_MAX_LOADED_MODELS=1
  #     - OLLAMA_NUM_PARALLEL=4
  #   deploy:
  #     resources:
  #       limits:
  #         memory: 8G
  #         cpus: '4.0'
  #       reservations:
  #         memory: 6G
  #         cpus: '2.0'
  #   healthcheck:
  #     test: ["CMD", "/bin/ollama", "list"]
  #     interval: 30s
  #     timeout: 10s
  #     retries: 5
  #     start_period: 30s
  #   networks:
  #     - haystack-internal
  #   command: serve

  # ollama-init:
  #   image: curlimages/curl:latest
  #   volumes:
  #     - ./scripts:/scripts:ro
  #   environment:
  #     - OLLAMA_HOST=http://ollama:11434
  #     - MODELS_TO_PULL=${MODELS_TO_PULL:-mistral:latest}
  #   depends_on:
  #     ollama:
  #       condition: service_healthy
  #   networks:
  #     - haystack-internal
  #   command: ["/bin/sh", "/scripts/init-ollama-models.sh"]
  #   restart: "no"

  api:
    build:
      context: .
      dockerfile: Dockerfile
    volumes:
      - ./src:/app/src
      - ./config:/app/config
    network_mode: host
    env_file:
      - .env
    environment:
      - LOG_LEVEL=INFO
      - PYTHONUNBUFFERED=1
      - PYTHONDONTWRITEBYTECODE=1
      - OLLAMA_URL=http://localhost:11434
      - OLLAMA_MODEL=mistral:latest
      - MODELS_TO_PULL=mistral:latest
      - REQUIRED_MODELS=mistral:latest
      - GENERATION_NUM_PREDICT=1000
      - GENERATION_TEMPERATURE=0.8
    deploy:
      resources:
        limits:
          memory: 2G
          cpus: '2.0'
        reservations:
          memory: 512M
          cpus: '0.5'
    # Dependencies removed for local Ollama testing
    # depends_on:
    #   - ollama
    #   - ollama-init
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 10s
      timeout: 5s
      retries: 5
    command: uvicorn src.main:app --host 0.0.0.0 --port 8000 --reload


volumes:
  ollama-data:

networks:
  #openproject_network:
  #  external: true
  haystack-internal:
    driver: bridge