@@ -27,34 +27,40 @@ ollama:
2727 - nomic-embed-text
2828
2929image :
30- tag : 0.17.7
30+ repository : intelanalytics/ipex-llm-inference-cpp-xpu
31+ tag : 2.3.0-SNAPSHOT
32+
33+ extraArgs :
34+ - /bin/bash
35+ - -c
36+ - |
37+ mkdir -p /llm/ollama
38+ cd /llm/ollama
39+ init-ollama
40+ exec ./ollama serve
3141
3242extraEnv :
33- # --- GPU settings for ARC ---
34- - name : OLLAMA_VULKAN
35- value : " 0"
43+ # --- GPU settings for ARC (Level Zero/SYCL via IPEX) ---
3644 - name : OLLAMA_NUM_GPU
3745 value : " 999"
38- - name : GGML_VK_DISABLE_COOPMAT # prevents cooperative matrix corruption on Intel Arc
46+ - name : ZES_ENABLE_SYSMAN
3947 value : " 1"
40- - name : GGML_VK_DISABLE_COOPMAT2
48+ - name : OLLAMA_INTEL_GPU
4149 value : " 1"
4250
4351 # --- Memory ---
4452 - name : OLLAMA_CONTEXT_LENGTH
4553 value : " 16384"
46- - name : OLLAMA_KV_CACHE_TYPE
47- value : " "
4854 - name : OLLAMA_FLASH_ATTENTION
4955 value : " 0"
5056 - name : OLLAMA_GPU_OVERHEAD
51- value : " 536870912" # reserve 512MB — prevents edge-case OOM evictions
57+ value : " 536870912"
5258
5359 # --- Scheduling ---
5460 - name : OLLAMA_KEEP_ALIVE
5561 value : " 5m"
5662 - name : OLLAMA_MAX_LOADED_MODELS
57- value : " 1" # prevents VRAM thrashing between model switches
63+ value : " 1"
5864 - name : OLLAMA_NUM_PARALLEL
5965 value : " 1"
6066
@@ -69,6 +75,19 @@ persistentVolume:
6975 existingClaim : ollama-models-hostpath
7076
7177extraObjects :
78+ - apiVersion : v1
79+ kind : PersistentVolumeClaim
80+ metadata :
81+ name : ollama-models-hostpath
82+ namespace : ollama
83+ spec :
84+ accessModes :
85+ - ReadWriteMany
86+ resources :
87+ requests :
88+ storage : 200Gi
89+ volumeName : ollama-models-hostpath
90+ storageClassName : " "
7291 - apiVersion : v1
7392 kind : PersistentVolume
7493 metadata :
@@ -88,17 +107,4 @@ extraObjects:
88107 - key : intel.feature.node.kubernetes.io/gpu
89108 operator : In
90109 values :
91- - " true"
92- - apiVersion : v1
93- kind : PersistentVolumeClaim
94- metadata :
95- name : ollama-models-hostpath
96- namespace : ollama
97- spec :
98- accessModes :
99- - ReadWriteMany
100- resources :
101- requests :
102- storage : 200Gi
103- volumeName : ollama-models-hostpath
104- storageClassName : " "
110+ - " true"
0 commit comments