From 6c1d0109b25edfe76f16f7ecf8596dc2d807be2d Mon Sep 17 00:00:00 2001
From: bussyjd <jd@obol.tech>
Date: Mon, 9 Feb 2026 18:02:19 +0400
Subject: [PATCH 1/4] fix(llm): route OpenClaw through LLMSpy gateway

LLMSpy was scaled to 0 due to a missing providers.json in the deployed
init container. The source llm.yaml already had the fix but the cluster
was stale. This commit also updates OpenClaw's default Ollama baseUrl
to point to LLMSpy (port 8000) instead of directly to Ollama, and
fixes the default model name from "glm4" to "glm-4.7-flash".
---
 .../infrastructure/base/templates/llm.yaml    | 39 ++++++++++++-------
 .../openclaw/chart/templates/_helpers.tpl     |  7 +++-
 internal/openclaw/chart/values.yaml           | 18 +++++----
 internal/openclaw/openclaw.go                 |  2 +-
 4 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml
index 5633866..a130036 100644
--- a/internal/embed/infrastructure/base/templates/llm.yaml
+++ b/internal/embed/infrastructure/base/templates/llm.yaml
@@ -90,8 +90,8 @@ spec:
               cpu: 100m
               memory: 256Mi
             limits:
-              cpu: 2000m
-              memory: 4Gi
+              cpu: 8000m
+              memory: 24Gi
       volumes:
         - name: ollama-home
           persistentVolumeClaim:
@@ -118,9 +118,9 @@ spec:
       protocol: TCP
 
 ---
-# llms.py configuration for Obol Stack:
-# - Only enable the Ollama provider
-# - Default model is `glm-4.7:cloud` (cloud-first)
+# llms.py v3 configuration for Obol Stack:
+# - Only enable the Ollama provider (in-cluster)
+# - Default model is glm-4.7-flash
 apiVersion: v1
 kind: ConfigMap
 metadata:
@@ -129,27 +129,35 @@ metadata:
 data:
   llms.json: |
     {
+      "version": 3,
       "defaults": {
         "headers": {
-          "Content-Type": "application/json"
+          "Content-Type": "application/json",
+          "User-Agent": "llmspy.org/3.0"
         },
         "text": {
-          "model": "glm-4.7:cloud",
+          "model": "glm-4.7-flash",
           "messages": [
-            { "role": "user", "content": "" }
+            { "role": "user", "content": [{ "type": "text", "text": "" }] }
           ]
         }
       },
       "providers": {
         "ollama": {
-          "enabled": true,
-          "type": "OllamaProvider",
-          "base_url": "http://ollama.llm.svc.cluster.local:11434",
-          "models": {},
-          "all_models": true
+          "enabled": true
         }
       }
     }
+  providers.json: |
+    {
+      "ollama": {
+        "id": "ollama",
+        "npm": "ollama",
+        "api": "http://ollama.llm.svc.cluster.local:11434",
+        "models": {},
+        "all_models": true
+      }
+    }
 
 ---
 apiVersion: apps/v1
@@ -182,6 +190,7 @@ spec:
               set -eu
               mkdir -p /data
               cp /config/llms.json /data/llms.json
+              cp /config/providers.json /data/providers.json
           volumeMounts:
             - name: llmspy-config
               mountPath: /config
@@ -192,7 +201,7 @@ spec:
         - name: llmspy
           # Official LLMSpy container image (published by upstream).
           # Pin a specific version for reproducibility.
-          image: ghcr.io/servicestack/llms:v2.0.30
+          image: ghcr.io/servicestack/llms:latest
           imagePullPolicy: IfNotPresent
           ports:
             - name: http
@@ -240,6 +249,8 @@ spec:
             items:
               - key: llms.json
                 path: llms.json
+              - key: providers.json
+                path: providers.json
         - name: llmspy-home
           emptyDir: {}
 
diff --git a/internal/openclaw/chart/templates/_helpers.tpl b/internal/openclaw/chart/templates/_helpers.tpl
index 6549246..c68bc79 100644
--- a/internal/openclaw/chart/templates/_helpers.tpl
+++ b/internal/openclaw/chart/templates/_helpers.tpl
@@ -148,9 +148,14 @@ Render openclaw.json as strict JSON. If config.content is provided, it is used v
   "http" (dict "endpoints" (dict "chatCompletions" (dict "enabled" .Values.openclaw.gateway.http.endpoints.chatCompletions.enabled)))
 -}}
 
+{{- $agentDefaults := dict "workspace" .Values.openclaw.workspaceDir -}}
+{{- if .Values.openclaw.agentModel -}}
+{{- $_ := set $agentDefaults "model" (dict "primary" .Values.openclaw.agentModel) -}}
+{{- end -}}
+
 {{- $cfg := dict
   "gateway" $gateway
-  "agents" (dict "defaults" (dict "workspace" .Values.openclaw.workspaceDir))
+  "agents" (dict "defaults" $agentDefaults)
 -}}
 
 {{- if .Values.skills.enabled -}}
diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml
index d63d173..0edddbc 100644
--- a/internal/openclaw/chart/values.yaml
+++ b/internal/openclaw/chart/values.yaml
@@ -13,7 +13,8 @@ image:
 
   # -- Override the container command (ENTRYPOINT)
   command:
-    - openclaw
+    - node
+    - openclaw.mjs
   # -- Override the container args (CMD)
   args:
     - gateway
@@ -54,7 +55,8 @@ initJob:
     tag: ""
     pullPolicy: IfNotPresent
   command:
-    - openclaw
+    - node
+    - openclaw.mjs
     - agent
     - init
   args: []
@@ -142,6 +144,8 @@ config:
 openclaw:
   stateDir: /data/.openclaw
   workspaceDir: /data/.openclaw/workspace
+  # -- Default agent model (provider/model). Set to route agent traffic to a specific provider.
+  agentModel: ""
 
   gateway:
     mode: local
@@ -179,17 +183,17 @@ models:
         name: GPT-4o
   ollama:
     enabled: true
-    # -- OpenAI-compatible base URL for Ollama
-    baseUrl: http://ollama.llm.svc.cluster.local:11434/v1
-    # -- OpenClaw provider API type (optional; omit to let OpenClaw auto-detect)
-    api: ""
+    # -- OpenAI-compatible base URL (via LLMSpy gateway)
+    baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1
+    # -- OpenClaw provider API type (openai-completions for Ollama)
+    api: openai-completions
     # -- Env var used for provider API key interpolation in openclaw.json
     apiKeyEnvVar: OLLAMA_API_KEY
     # -- Value set for the apiKey env var (not a secret for Ollama)
     apiKeyValue: ollama-local
     models:
       - id: glm-4.7-flash
-        name: glm-4.7-flash
+        name: GLM-4.7 Flash
 
 # -- Chat channel integrations
 # Tokens are stored in the chart Secret and injected as env vars.
diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go
index 2c6ddaa..983b096 100644
--- a/internal/openclaw/openclaw.go
+++ b/internal/openclaw/openclaw.go
@@ -310,7 +310,7 @@ openclaw:
 models:
   ollama:
     enabled: true
-    baseUrl: http://ollama.llm.svc.cluster.local:11434/v1
+    baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1
     api: openai-completions
     apiKeyEnvVar: OLLAMA_API_KEY
     apiKeyValue: ollama-local

From bae5bd9202c3dcaec3cdc3cb1be77573f05b8c41 Mon Sep 17 00:00:00 2001
From: bussyjd <jd@obol.tech>
Date: Mon, 9 Feb 2026 18:13:12 +0400
Subject: [PATCH 2/4] fix(llm): restore conservative Ollama resource limits

Revert Ollama limits to cpu: 2000m, memory: 4Gi to match the previous
defaults and avoid over-provisioning on local dev clusters.
---
 internal/embed/infrastructure/base/templates/llm.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml
index a130036..b139459 100644
--- a/internal/embed/infrastructure/base/templates/llm.yaml
+++ b/internal/embed/infrastructure/base/templates/llm.yaml
@@ -90,8 +90,8 @@ spec:
               cpu: 100m
               memory: 256Mi
             limits:
-              cpu: 8000m
-              memory: 24Gi
+              cpu: 2000m
+              memory: 4Gi
       volumes:
         - name: ollama-home
           persistentVolumeClaim:

From d5d928dd93f145354c589e35e8422dd38bfb292c Mon Sep 17 00:00:00 2001
From: bussyjd <jd@obol.tech>
Date: Mon, 9 Feb 2026 18:19:12 +0400
Subject: [PATCH 3/4] Revert "fix(llm): restore conservative Ollama resource
 limits"

This reverts commit bae5bd9202c3dcaec3cdc3cb1be77573f05b8c41.
---
 internal/embed/infrastructure/base/templates/llm.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml
index b139459..a130036 100644
--- a/internal/embed/infrastructure/base/templates/llm.yaml
+++ b/internal/embed/infrastructure/base/templates/llm.yaml
@@ -90,8 +90,8 @@ spec:
               cpu: 100m
               memory: 256Mi
             limits:
-              cpu: 2000m
-              memory: 4Gi
+              cpu: 8000m
+              memory: 24Gi
       volumes:
         - name: ollama-home
           persistentVolumeClaim:

From 29ad4a5ddd60d3f6e4f2a735a9fc7400e18e4dd0 Mon Sep 17 00:00:00 2001
From: bussyjd <jd@obol.tech>
Date: Mon, 9 Feb 2026 19:14:50 +0400
Subject: [PATCH 4/4] refactor(llm): remove in-cluster Ollama, proxy to host
 via ExternalName
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the in-cluster Ollama Deployment/PVC/Service with an
ExternalName Service that routes ollama.llm.svc.cluster.local to the
host machine. LLMSpy and all consumers use the stable cluster-internal
DNS name; the ExternalName target is resolved at stack init time:

  k3d  → host.k3d.internal
  k3s  → node gateway IP (future)

CopyDefaults now accepts a replacements map so stack init can resolve
{{OLLAMA_HOST}} (and future placeholders) in infrastructure templates.
---
 internal/embed/embed.go                       |  14 ++-
 .../infrastructure/base/templates/llm.yaml    | 107 +++---------------
 internal/stack/stack.go                       |   7 +-
 3 files changed, 30 insertions(+), 98 deletions(-)

diff --git a/internal/embed/embed.go b/internal/embed/embed.go
index 2c189eb..99b8607 100644
--- a/internal/embed/embed.go
+++ b/internal/embed/embed.go
@@ -21,8 +21,10 @@ var infrastructureFS embed.FS
 //go:embed all:networks
 var networksFS embed.FS
 
-// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory
-func CopyDefaults(destDir string) error {
+// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory.
+// The replacements map is applied to every file: each key (e.g. "{{OLLAMA_HOST}}") is replaced
+// with its value. Pass nil for a verbatim copy.
+func CopyDefaults(destDir string, replacements map[string]string) error {
 	return fs.WalkDir(infrastructureFS, "infrastructure", func(path string, d fs.DirEntry, err error) error {
 		if err != nil {
 			return err
@@ -57,8 +59,14 @@ func CopyDefaults(destDir string) error {
 			return fmt.Errorf("failed to read embedded file %s: %w", path, err)
 		}
 
+		// Apply placeholder replacements
+		content := string(data)
+		for placeholder, value := range replacements {
+			content = strings.ReplaceAll(content, placeholder, value)
+		}
+
 		// Write to destination
-		if err := os.WriteFile(destPath, data, 0644); err != nil {
+		if err := os.WriteFile(destPath, []byte(content), 0644); err != nil {
 			return fmt.Errorf("failed to write file %s: %w", destPath, err)
 		}
 
diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml
index a130036..4547c8f 100644
--- a/internal/embed/infrastructure/base/templates/llm.yaml
+++ b/internal/embed/infrastructure/base/templates/llm.yaml
@@ -2,104 +2,25 @@
 # LLM foundation services (OKR-1)
 #
 # This deploys:
-# - Ollama (as the upstream LLM runtime)
+# - An ExternalName Service "ollama" that resolves to the host's Ollama server
 # - llms.py (LLMSpy) as an OpenAI-compatible gateway / router over providers
 #
 # Design notes:
-# - We default to Ollama Cloud (`glm-4.7:cloud`) to avoid requiring local GPU/VRAM.
-# - We persist Ollama's identity keypair at `/root/.ollama/id_ed25519` so the
-#   Ollama Cloud "connect" binding survives pod restarts/upgrades.
-# - Model cache is kept on `emptyDir` (ephemeral) per product decision.
+# - No in-cluster Ollama is deployed; the host is expected to run Ollama
+#   (or another OpenAI-compatible server) on port 11434.
+# - The ollama Service abstracts host resolution:
+#     k3d  → host.k3d.internal
+#     k3s  → resolved at stack init via node IP
+# - LLMSpy and all consumers reference ollama.llm.svc.cluster.local:11434,
+#   which the ExternalName Service routes to the host.
 apiVersion: v1
 kind: Namespace
 metadata:
   name: llm
 
 ---
-# Persist Ollama identity (Ollama Cloud connect uses the public key derived from this keypair).
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: ollama-home
-  namespace: llm
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 256Mi
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: ollama
-  namespace: llm
-  labels:
-    app: ollama
-spec:
-  replicas: 1
-  # Ollama uses a ReadWriteOnce PVC; avoid surging a second pod during updates.
-  strategy:
-    type: Recreate
-  selector:
-    matchLabels:
-      app: ollama
-  template:
-    metadata:
-      labels:
-        app: ollama
-    spec:
-      containers:
-        - name: ollama
-          image: ollama/ollama:latest
-          imagePullPolicy: IfNotPresent
-          ports:
-            - name: http
-              containerPort: 11434
-              protocol: TCP
-          env:
-            # Store model blobs (including any cloud model stubs/cache) in an ephemeral volume.
-            - name: OLLAMA_MODELS
-              value: /models
-            # Explicitly bind the HTTP API to all interfaces in-cluster.
-            - name: OLLAMA_HOST
-              value: 0.0.0.0:11434
-          volumeMounts:
-            # Persist identity + config (e.g. ~/.ollama/id_ed25519) for Ollama Cloud connect.
-            - name: ollama-home
-              mountPath: /root/.ollama
-            - name: ollama-models
-              mountPath: /models
-          readinessProbe:
-            httpGet:
-              path: /api/version
-              port: http
-            initialDelaySeconds: 5
-            periodSeconds: 5
-            timeoutSeconds: 2
-          livenessProbe:
-            httpGet:
-              path: /api/version
-              port: http
-            initialDelaySeconds: 30
-            periodSeconds: 10
-            timeoutSeconds: 2
-          resources:
-            requests:
-              cpu: 100m
-              memory: 256Mi
-            limits:
-              cpu: 8000m
-              memory: 24Gi
-      volumes:
-        - name: ollama-home
-          persistentVolumeClaim:
-            claimName: ollama-home
-        - name: ollama-models
-          emptyDir: {}
-
----
+# ExternalName Service: routes ollama.llm.svc.cluster.local → host Ollama.
+# The externalName is resolved during `obol stack init` via the {{OLLAMA_HOST}} placeholder.
 apiVersion: v1
 kind: Service
 metadata:
@@ -108,18 +29,16 @@ metadata:
   labels:
     app: ollama
 spec:
-  type: ClusterIP
-  selector:
-    app: ollama
+  type: ExternalName
+  externalName: {{OLLAMA_HOST}}
   ports:
     - name: http
       port: 11434
-      targetPort: http
       protocol: TCP
 
 ---
 # llms.py v3 configuration for Obol Stack:
-# - Only enable the Ollama provider (in-cluster)
+# - Only enable the Ollama provider (host machine via ollama Service)
 # - Default model is glm-4.7-flash
 apiVersion: v1
 kind: ConfigMap
diff --git a/internal/stack/stack.go b/internal/stack/stack.go
index 077c235..61dd60b 100644
--- a/internal/stack/stack.go
+++ b/internal/stack/stack.go
@@ -79,8 +79,13 @@ func Init(cfg *config.Config, force bool) error {
 	fmt.Printf("K3d config saved to: %s\n", k3dConfigPath)
 
 	// Copy embedded defaults (helmfile + charts for infrastructure)
+	// Resolve placeholders: {{OLLAMA_HOST}} → host DNS for the cluster runtime.
+	// k3d uses host.k3d.internal; bare k3s would use the node's gateway IP.
+	ollamaHost := "host.k3d.internal"
 	defaultsDir := filepath.Join(cfg.ConfigDir, "defaults")
-	if err := embed.CopyDefaults(defaultsDir); err != nil {
+	if err := embed.CopyDefaults(defaultsDir, map[string]string{
+		"{{OLLAMA_HOST}}": ollamaHost,
+	}); err != nil {
 		return fmt.Errorf("failed to copy defaults: %w", err)
 	}
 	fmt.Printf("Defaults copied to: %s\n", defaultsDir)