From 6c1d0109b25edfe76f16f7ecf8596dc2d807be2d Mon Sep 17 00:00:00 2001 From: bussyjd Date: Mon, 9 Feb 2026 18:02:19 +0400 Subject: [PATCH 1/4] fix(llm): route OpenClaw through LLMSpy gateway LLMSpy was scaled to 0 due to a missing providers.json in the deployed init container. The source llm.yaml already had the fix but the cluster was stale. This commit also updates OpenClaw's default Ollama baseUrl to point to LLMSpy (port 8000) instead of directly to Ollama, and fixes the default model name from "glm4" to "glm-4.7-flash". --- .../infrastructure/base/templates/llm.yaml | 39 ++++++++++++------- .../openclaw/chart/templates/_helpers.tpl | 7 +++- internal/openclaw/chart/values.yaml | 18 +++++---- internal/openclaw/openclaw.go | 2 +- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 5633866..a130036 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -90,8 +90,8 @@ spec: cpu: 100m memory: 256Mi limits: - cpu: 2000m - memory: 4Gi + cpu: 8000m + memory: 24Gi volumes: - name: ollama-home persistentVolumeClaim: @@ -118,9 +118,9 @@ spec: protocol: TCP --- -# llms.py configuration for Obol Stack: -# - Only enable the Ollama provider -# - Default model is `glm-4.7:cloud` (cloud-first) +# llms.py v3 configuration for Obol Stack: +# - Only enable the Ollama provider (in-cluster) +# - Default model is glm-4.7-flash apiVersion: v1 kind: ConfigMap metadata: @@ -129,27 +129,35 @@ metadata: data: llms.json: | { + "version": 3, "defaults": { "headers": { - "Content-Type": "application/json" + "Content-Type": "application/json", + "User-Agent": "llmspy.org/3.0" }, "text": { - "model": "glm-4.7:cloud", + "model": "glm-4.7-flash", "messages": [ - { "role": "user", "content": "" } + { "role": "user", "content": [{ "type": "text", "text": "" }] } ] } }, "providers": { "ollama": { - "enabled": true, - "type": "OllamaProvider", - "base_url": "http://ollama.llm.svc.cluster.local:11434", - "models": {}, - "all_models": true + "enabled": true } } } + providers.json: | + { + "ollama": { + "id": "ollama", + "npm": "ollama", + "api": "http://ollama.llm.svc.cluster.local:11434", + "models": {}, + "all_models": true + } + } --- apiVersion: apps/v1 @@ -182,6 +190,7 @@ spec: set -eu mkdir -p /data cp /config/llms.json /data/llms.json + cp /config/providers.json /data/providers.json volumeMounts: - name: llmspy-config mountPath: /config @@ -192,7 +201,7 @@ spec: - name: llmspy # Official LLMSpy container image (published by upstream). # Pin a specific version for reproducibility. - image: ghcr.io/servicestack/llms:v2.0.30 + image: ghcr.io/servicestack/llms:latest imagePullPolicy: IfNotPresent ports: - name: http @@ -240,6 +249,8 @@ spec: items: - key: llms.json path: llms.json + - key: providers.json + path: providers.json - name: llmspy-home emptyDir: {} diff --git a/internal/openclaw/chart/templates/_helpers.tpl b/internal/openclaw/chart/templates/_helpers.tpl index 6549246..c68bc79 100644 --- a/internal/openclaw/chart/templates/_helpers.tpl +++ b/internal/openclaw/chart/templates/_helpers.tpl @@ -148,9 +148,14 @@ Render openclaw.json as strict JSON. If config.content is provided, it is used v "http" (dict "endpoints" (dict "chatCompletions" (dict "enabled" .Values.openclaw.gateway.http.endpoints.chatCompletions.enabled))) -}} +{{- $agentDefaults := dict "workspace" .Values.openclaw.workspaceDir -}} +{{- if .Values.openclaw.agentModel -}} +{{- $_ := set $agentDefaults "model" (dict "primary" .Values.openclaw.agentModel) -}} +{{- end -}} + {{- $cfg := dict "gateway" $gateway - "agents" (dict "defaults" (dict "workspace" .Values.openclaw.workspaceDir)) + "agents" (dict "defaults" $agentDefaults) -}} {{- if .Values.skills.enabled -}} diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml index d63d173..0edddbc 100644 --- a/internal/openclaw/chart/values.yaml +++ b/internal/openclaw/chart/values.yaml @@ -13,7 +13,8 @@ image: # -- Override the container command (ENTRYPOINT) command: - - openclaw + - node + - openclaw.mjs # -- Override the container args (CMD) args: - gateway @@ -54,7 +55,8 @@ initJob: tag: "" pullPolicy: IfNotPresent command: - - openclaw + - node + - openclaw.mjs - agent - init args: [] @@ -142,6 +144,8 @@ config: openclaw: stateDir: /data/.openclaw workspaceDir: /data/.openclaw/workspace + # -- Default agent model (provider/model). Set to route agent traffic to a specific provider. + agentModel: "" gateway: mode: local @@ -179,17 +183,17 @@ models: name: GPT-4o ollama: enabled: true - # -- OpenAI-compatible base URL for Ollama - baseUrl: http://ollama.llm.svc.cluster.local:11434/v1 - # -- OpenClaw provider API type (optional; omit to let OpenClaw auto-detect) - api: "" + # -- OpenAI-compatible base URL (via LLMSpy gateway) + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + # -- OpenClaw provider API type (openai-completions for Ollama) + api: openai-completions # -- Env var used for provider API key interpolation in openclaw.json apiKeyEnvVar: OLLAMA_API_KEY # -- Value set for the apiKey env var (not a secret for Ollama) apiKeyValue: ollama-local models: - id: glm-4.7-flash - name: glm-4.7-flash + name: GLM-4.7 Flash # -- Chat channel integrations # Tokens are stored in the chart Secret and injected as env vars. diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 2c6ddaa..983b096 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -310,7 +310,7 @@ openclaw: models: ollama: enabled: true - baseUrl: http://ollama.llm.svc.cluster.local:11434/v1 + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 api: openai-completions apiKeyEnvVar: OLLAMA_API_KEY apiKeyValue: ollama-local From bae5bd9202c3dcaec3cdc3cb1be77573f05b8c41 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Mon, 9 Feb 2026 18:13:12 +0400 Subject: [PATCH 2/4] fix(llm): restore conservative Ollama resource limits Revert Ollama limits to cpu: 2000m, memory: 4Gi to match the previous defaults and avoid over-provisioning on local dev clusters. --- internal/embed/infrastructure/base/templates/llm.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index a130036..b139459 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -90,8 +90,8 @@ spec: cpu: 100m memory: 256Mi limits: - cpu: 8000m - memory: 24Gi + cpu: 2000m + memory: 4Gi volumes: - name: ollama-home persistentVolumeClaim: From d5d928dd93f145354c589e35e8422dd38bfb292c Mon Sep 17 00:00:00 2001 From: bussyjd Date: Mon, 9 Feb 2026 18:19:12 +0400 Subject: [PATCH 3/4] Revert "fix(llm): restore conservative Ollama resource limits" This reverts commit bae5bd9202c3dcaec3cdc3cb1be77573f05b8c41. --- internal/embed/infrastructure/base/templates/llm.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index b139459..a130036 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -90,8 +90,8 @@ spec: cpu: 100m memory: 256Mi limits: - cpu: 2000m - memory: 4Gi + cpu: 8000m + memory: 24Gi volumes: - name: ollama-home persistentVolumeClaim: From 29ad4a5ddd60d3f6e4f2a735a9fc7400e18e4dd0 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Mon, 9 Feb 2026 19:14:50 +0400 Subject: [PATCH 4/4] refactor(llm): remove in-cluster Ollama, proxy to host via ExternalName MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the in-cluster Ollama Deployment/PVC/Service with an ExternalName Service that routes ollama.llm.svc.cluster.local to the host machine. LLMSpy and all consumers use the stable cluster-internal DNS name; the ExternalName target is resolved at stack init time: k3d → host.k3d.internal k3s → node gateway IP (future) CopyDefaults now accepts a replacements map so stack init can resolve {{OLLAMA_HOST}} (and future placeholders) in infrastructure templates. --- internal/embed/embed.go | 14 ++- .../infrastructure/base/templates/llm.yaml | 107 +++--------------- internal/stack/stack.go | 7 +- 3 files changed, 30 insertions(+), 98 deletions(-) diff --git a/internal/embed/embed.go b/internal/embed/embed.go index 2c189eb..99b8607 100644 --- a/internal/embed/embed.go +++ b/internal/embed/embed.go @@ -21,8 +21,10 @@ var infrastructureFS embed.FS //go:embed all:networks var networksFS embed.FS -// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory -func CopyDefaults(destDir string) error { +// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory. +// The replacements map is applied to every file: each key (e.g. "{{OLLAMA_HOST}}") is replaced +// with its value. Pass nil for a verbatim copy. +func CopyDefaults(destDir string, replacements map[string]string) error { return fs.WalkDir(infrastructureFS, "infrastructure", func(path string, d fs.DirEntry, err error) error { if err != nil { return err @@ -57,8 +59,14 @@ func CopyDefaults(destDir string) error { return fmt.Errorf("failed to read embedded file %s: %w", path, err) } + // Apply placeholder replacements + content := string(data) + for placeholder, value := range replacements { + content = strings.ReplaceAll(content, placeholder, value) + } + // Write to destination - if err := os.WriteFile(destPath, data, 0644); err != nil { + if err := os.WriteFile(destPath, []byte(content), 0644); err != nil { return fmt.Errorf("failed to write file %s: %w", destPath, err) } diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index a130036..4547c8f 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -2,104 +2,25 @@ # LLM foundation services (OKR-1) # # This deploys: -# - Ollama (as the upstream LLM runtime) +# - An ExternalName Service "ollama" that resolves to the host's Ollama server # - llms.py (LLMSpy) as an OpenAI-compatible gateway / router over providers # # Design notes: -# - We default to Ollama Cloud (`glm-4.7:cloud`) to avoid requiring local GPU/VRAM. -# - We persist Ollama's identity keypair at `/root/.ollama/id_ed25519` so the -# Ollama Cloud "connect" binding survives pod restarts/upgrades. -# - Model cache is kept on `emptyDir` (ephemeral) per product decision. +# - No in-cluster Ollama is deployed; the host is expected to run Ollama +# (or another OpenAI-compatible server) on port 11434. +# - The ollama Service abstracts host resolution: +# k3d → host.k3d.internal +# k3s → resolved at stack init via node IP +# - LLMSpy and all consumers reference ollama.llm.svc.cluster.local:11434, +# which the ExternalName Service routes to the host. apiVersion: v1 kind: Namespace metadata: name: llm --- -# Persist Ollama identity (Ollama Cloud connect uses the public key derived from this keypair). -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: ollama-home - namespace: llm -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 256Mi - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ollama - namespace: llm - labels: - app: ollama -spec: - replicas: 1 - # Ollama uses a ReadWriteOnce PVC; avoid surging a second pod during updates. - strategy: - type: Recreate - selector: - matchLabels: - app: ollama - template: - metadata: - labels: - app: ollama - spec: - containers: - - name: ollama - image: ollama/ollama:latest - imagePullPolicy: IfNotPresent - ports: - - name: http - containerPort: 11434 - protocol: TCP - env: - # Store model blobs (including any cloud model stubs/cache) in an ephemeral volume. - - name: OLLAMA_MODELS - value: /models - # Explicitly bind the HTTP API to all interfaces in-cluster. - - name: OLLAMA_HOST - value: 0.0.0.0:11434 - volumeMounts: - # Persist identity + config (e.g. ~/.ollama/id_ed25519) for Ollama Cloud connect. - - name: ollama-home - mountPath: /root/.ollama - - name: ollama-models - mountPath: /models - readinessProbe: - httpGet: - path: /api/version - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 2 - livenessProbe: - httpGet: - path: /api/version - port: http - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 2 - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 8000m - memory: 24Gi - volumes: - - name: ollama-home - persistentVolumeClaim: - claimName: ollama-home - - name: ollama-models - emptyDir: {} - ---- +# ExternalName Service: routes ollama.llm.svc.cluster.local → host Ollama. +# The externalName is resolved during `obol stack init` via the {{OLLAMA_HOST}} placeholder. apiVersion: v1 kind: Service metadata: @@ -108,18 +29,16 @@ metadata: labels: app: ollama spec: - type: ClusterIP - selector: - app: ollama + type: ExternalName + externalName: {{OLLAMA_HOST}} ports: - name: http port: 11434 - targetPort: http protocol: TCP --- # llms.py v3 configuration for Obol Stack: -# - Only enable the Ollama provider (in-cluster) +# - Only enable the Ollama provider (host machine via ollama Service) # - Default model is glm-4.7-flash apiVersion: v1 kind: ConfigMap diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 077c235..61dd60b 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -79,8 +79,13 @@ func Init(cfg *config.Config, force bool) error { fmt.Printf("K3d config saved to: %s\n", k3dConfigPath) // Copy embedded defaults (helmfile + charts for infrastructure) + // Resolve placeholders: {{OLLAMA_HOST}} → host DNS for the cluster runtime. + // k3d uses host.k3d.internal; bare k3s would use the node's gateway IP. + ollamaHost := "host.k3d.internal" defaultsDir := filepath.Join(cfg.ConfigDir, "defaults") - if err := embed.CopyDefaults(defaultsDir); err != nil { + if err := embed.CopyDefaults(defaultsDir, map[string]string{ + "{{OLLAMA_HOST}}": ollamaHost, + }); err != nil { return fmt.Errorf("failed to copy defaults: %w", err) } fmt.Printf("Defaults copied to: %s\n", defaultsDir)