@@ -27,34 +27,32 @@ ollama:
2727 - nomic-embed-text
2828
2929image :
30- tag : 0.17.7
30+ repository : intelanalytics/ipex-llm-inference-cpp-xpu
31+ tag : 2.3.0-SNAPSHOT
32+
33+ extraArgs :
34+ - " sleep 3600"
3135
3236extraEnv :
33- # --- GPU settings for ARC ---
34- - name : OLLAMA_VULKAN
35- value : " 0"
37+ # --- GPU settings for ARC (Level Zero/SYCL via IPEX) ---
3638 - name : OLLAMA_NUM_GPU
3739 value : " 999"
38- - name : GGML_VK_DISABLE_COOPMAT # prevents cooperative matrix corruption on Intel Arc
39- value : " 1"
40- - name : GGML_VK_DISABLE_COOPMAT2
40+ - name : ZES_ENABLE_SYSMAN
4141 value : " 1"
4242
4343 # --- Memory ---
4444 - name : OLLAMA_CONTEXT_LENGTH
4545 value : " 16384"
46- - name : OLLAMA_KV_CACHE_TYPE
47- value : " "
4846 - name : OLLAMA_FLASH_ATTENTION
4947 value : " 0"
5048 - name : OLLAMA_GPU_OVERHEAD
51- value : " 536870912" # reserve 512MB — prevents edge-case OOM evictions
49+ value : " 536870912"
5250
5351 # --- Scheduling ---
5452 - name : OLLAMA_KEEP_ALIVE
5553 value : " 5m"
5654 - name : OLLAMA_MAX_LOADED_MODELS
57- value : " 1" # prevents VRAM thrashing between model switches
55+ value : " 1"
5856 - name : OLLAMA_NUM_PARALLEL
5957 value : " 1"
6058
@@ -69,6 +67,19 @@ persistentVolume:
6967 existingClaim : ollama-models-hostpath
7068
7169extraObjects :
70+ - apiVersion : v1
71+ kind : PersistentVolumeClaim
72+ metadata :
73+ name : ollama-models-hostpath
74+ namespace : ollama
75+ spec :
76+ accessModes :
77+ - ReadWriteMany
78+ resources :
79+ requests :
80+ storage : 200Gi
81+ volumeName : ollama-models-hostpath
82+ storageClassName : " "
7283 - apiVersion : v1
7384 kind : PersistentVolume
7485 metadata :
@@ -88,17 +99,4 @@ extraObjects:
8899 - key : intel.feature.node.kubernetes.io/gpu
89100 operator : In
90101 values :
91- - " true"
92- - apiVersion : v1
93- kind : PersistentVolumeClaim
94- metadata :
95- name : ollama-models-hostpath
96- namespace : ollama
97- spec :
98- accessModes :
99- - ReadWriteMany
100- resources :
101- requests :
102- storage : 200Gi
103- volumeName : ollama-models-hostpath
104- storageClassName : " "
102+ - " true"
0 commit comments