Skip to content

Commit 4404942

Browse files
committed
feat: switched to ipex-llm instead of ollama for gpu support
1 parent fe477cf commit 4404942

1 file changed

Lines changed: 30 additions & 24 deletions

File tree

kubernetes/overrides/ollama/values.yaml

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,34 +27,40 @@ ollama:
2727
- nomic-embed-text
2828

2929
image:
30-
tag: 0.17.7
30+
repository: intelanalytics/ipex-llm-inference-cpp-xpu
31+
tag: 2.3.0-SNAPSHOT
32+
33+
extraArgs:
34+
- /bin/bash
35+
- -c
36+
- |
37+
mkdir -p /llm/ollama
38+
cd /llm/ollama
39+
init-ollama
40+
exec ./ollama serve
3141
3242
extraEnv:
33-
# --- GPU settings for ARC ---
34-
- name: OLLAMA_VULKAN
35-
value: "0"
43+
# --- GPU settings for ARC (Level Zero/SYCL via IPEX) ---
3644
- name: OLLAMA_NUM_GPU
3745
value: "999"
38-
- name: GGML_VK_DISABLE_COOPMAT # prevents cooperative matrix corruption on Intel Arc
46+
- name: ZES_ENABLE_SYSMAN
3947
value: "1"
40-
- name: GGML_VK_DISABLE_COOPMAT2
48+
- name: OLLAMA_INTEL_GPU
4149
value: "1"
4250

4351
# --- Memory ---
4452
- name: OLLAMA_CONTEXT_LENGTH
4553
value: "16384"
46-
- name: OLLAMA_KV_CACHE_TYPE
47-
value: ""
4854
- name: OLLAMA_FLASH_ATTENTION
4955
value: "0"
5056
- name: OLLAMA_GPU_OVERHEAD
51-
value: "536870912" # reserve 512MB — prevents edge-case OOM evictions
57+
value: "536870912"
5258

5359
# --- Scheduling ---
5460
- name: OLLAMA_KEEP_ALIVE
5561
value: "5m"
5662
- name: OLLAMA_MAX_LOADED_MODELS
57-
value: "1" # prevents VRAM thrashing between model switches
63+
value: "1"
5864
- name: OLLAMA_NUM_PARALLEL
5965
value: "1"
6066

@@ -69,6 +75,19 @@ persistentVolume:
6975
existingClaim: ollama-models-hostpath
7076

7177
extraObjects:
78+
- apiVersion: v1
79+
kind: PersistentVolumeClaim
80+
metadata:
81+
name: ollama-models-hostpath
82+
namespace: ollama
83+
spec:
84+
accessModes:
85+
- ReadWriteMany
86+
resources:
87+
requests:
88+
storage: 200Gi
89+
volumeName: ollama-models-hostpath
90+
storageClassName: ""
7291
- apiVersion: v1
7392
kind: PersistentVolume
7493
metadata:
@@ -88,17 +107,4 @@ extraObjects:
88107
- key: intel.feature.node.kubernetes.io/gpu
89108
operator: In
90109
values:
91-
- "true"
92-
- apiVersion: v1
93-
kind: PersistentVolumeClaim
94-
metadata:
95-
name: ollama-models-hostpath
96-
namespace: ollama
97-
spec:
98-
accessModes:
99-
- ReadWriteMany
100-
resources:
101-
requests:
102-
storage: 200Gi
103-
volumeName: ollama-models-hostpath
104-
storageClassName: ""
110+
- "true"

0 commit comments

Comments
 (0)