Skip to content

Commit 1839241

Browse files
committed
feat: switched to ipex-llm instead of ollama for gpu support
1 parent fe477cf commit 1839241

1 file changed

Lines changed: 23 additions & 25 deletions

File tree

kubernetes/overrides/ollama/values.yaml

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,34 +27,32 @@ ollama:
2727
- nomic-embed-text
2828

2929
image:
30-
tag: 0.17.7
30+
repository: intelanalytics/ipex-llm-inference-cpp-xpu
31+
tag: 2.3.0-SNAPSHOT
32+
33+
extraArgs:
34+
- "sleep 3600"
3135

3236
extraEnv:
33-
# --- GPU settings for ARC ---
34-
- name: OLLAMA_VULKAN
35-
value: "0"
37+
# --- GPU settings for ARC (Level Zero/SYCL via IPEX) ---
3638
- name: OLLAMA_NUM_GPU
3739
value: "999"
38-
- name: GGML_VK_DISABLE_COOPMAT # prevents cooperative matrix corruption on Intel Arc
39-
value: "1"
40-
- name: GGML_VK_DISABLE_COOPMAT2
40+
- name: ZES_ENABLE_SYSMAN
4141
value: "1"
4242

4343
# --- Memory ---
4444
- name: OLLAMA_CONTEXT_LENGTH
4545
value: "16384"
46-
- name: OLLAMA_KV_CACHE_TYPE
47-
value: ""
4846
- name: OLLAMA_FLASH_ATTENTION
4947
value: "0"
5048
- name: OLLAMA_GPU_OVERHEAD
51-
value: "536870912" # reserve 512MB — prevents edge-case OOM evictions
49+
value: "536870912"
5250

5351
# --- Scheduling ---
5452
- name: OLLAMA_KEEP_ALIVE
5553
value: "5m"
5654
- name: OLLAMA_MAX_LOADED_MODELS
57-
value: "1" # prevents VRAM thrashing between model switches
55+
value: "1"
5856
- name: OLLAMA_NUM_PARALLEL
5957
value: "1"
6058

@@ -69,6 +67,19 @@ persistentVolume:
6967
existingClaim: ollama-models-hostpath
7068

7169
extraObjects:
70+
- apiVersion: v1
71+
kind: PersistentVolumeClaim
72+
metadata:
73+
name: ollama-models-hostpath
74+
namespace: ollama
75+
spec:
76+
accessModes:
77+
- ReadWriteMany
78+
resources:
79+
requests:
80+
storage: 200Gi
81+
volumeName: ollama-models-hostpath
82+
storageClassName: ""
7283
- apiVersion: v1
7384
kind: PersistentVolume
7485
metadata:
@@ -88,17 +99,4 @@ extraObjects:
8899
- key: intel.feature.node.kubernetes.io/gpu
89100
operator: In
90101
values:
91-
- "true"
92-
- apiVersion: v1
93-
kind: PersistentVolumeClaim
94-
metadata:
95-
name: ollama-models-hostpath
96-
namespace: ollama
97-
spec:
98-
accessModes:
99-
- ReadWriteMany
100-
resources:
101-
requests:
102-
storage: 200Gi
103-
volumeName: ollama-models-hostpath
104-
storageClassName: ""
102+
- "true"

0 commit comments

Comments
 (0)