diff --git a/internal/embed/embed.go b/internal/embed/embed.go index 2c189eb..99b8607 100644 --- a/internal/embed/embed.go +++ b/internal/embed/embed.go @@ -21,8 +21,10 @@ var infrastructureFS embed.FS //go:embed all:networks var networksFS embed.FS -// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory -func CopyDefaults(destDir string) error { +// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory. +// The replacements map is applied to every file: each key (e.g. "{{OLLAMA_HOST}}") is replaced +// with its value. Pass nil for a verbatim copy. +func CopyDefaults(destDir string, replacements map[string]string) error { return fs.WalkDir(infrastructureFS, "infrastructure", func(path string, d fs.DirEntry, err error) error { if err != nil { return err @@ -57,8 +59,14 @@ func CopyDefaults(destDir string) error { return fmt.Errorf("failed to read embedded file %s: %w", path, err) } + // Apply placeholder replacements + content := string(data) + for placeholder, value := range replacements { + content = strings.ReplaceAll(content, placeholder, value) + } + // Write to destination - if err := os.WriteFile(destPath, data, 0644); err != nil { + if err := os.WriteFile(destPath, []byte(content), 0644); err != nil { return fmt.Errorf("failed to write file %s: %w", destPath, err) } diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 5633866..4547c8f 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -2,104 +2,25 @@ # LLM foundation services (OKR-1) # # This deploys: -# - Ollama (as the upstream LLM runtime) +# - An ExternalName Service "ollama" that resolves to the host's Ollama server # - llms.py (LLMSpy) as an OpenAI-compatible gateway / router over providers # # Design notes: -# - We default to Ollama Cloud (`glm-4.7:cloud`) to avoid requiring local GPU/VRAM. -# - We persist Ollama's identity keypair at `/root/.ollama/id_ed25519` so the -# Ollama Cloud "connect" binding survives pod restarts/upgrades. -# - Model cache is kept on `emptyDir` (ephemeral) per product decision. +# - No in-cluster Ollama is deployed; the host is expected to run Ollama +# (or another OpenAI-compatible server) on port 11434. +# - The ollama Service abstracts host resolution: +# k3d → host.k3d.internal +# k3s → resolved at stack init via node IP +# - LLMSpy and all consumers reference ollama.llm.svc.cluster.local:11434, +# which the ExternalName Service routes to the host. apiVersion: v1 kind: Namespace metadata: name: llm --- -# Persist Ollama identity (Ollama Cloud connect uses the public key derived from this keypair). -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: ollama-home - namespace: llm -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 256Mi - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ollama - namespace: llm - labels: - app: ollama -spec: - replicas: 1 - # Ollama uses a ReadWriteOnce PVC; avoid surging a second pod during updates. - strategy: - type: Recreate - selector: - matchLabels: - app: ollama - template: - metadata: - labels: - app: ollama - spec: - containers: - - name: ollama - image: ollama/ollama:latest - imagePullPolicy: IfNotPresent - ports: - - name: http - containerPort: 11434 - protocol: TCP - env: - # Store model blobs (including any cloud model stubs/cache) in an ephemeral volume. - - name: OLLAMA_MODELS - value: /models - # Explicitly bind the HTTP API to all interfaces in-cluster. - - name: OLLAMA_HOST - value: 0.0.0.0:11434 - volumeMounts: - # Persist identity + config (e.g. ~/.ollama/id_ed25519) for Ollama Cloud connect. - - name: ollama-home - mountPath: /root/.ollama - - name: ollama-models - mountPath: /models - readinessProbe: - httpGet: - path: /api/version - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 2 - livenessProbe: - httpGet: - path: /api/version - port: http - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 2 - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 2000m - memory: 4Gi - volumes: - - name: ollama-home - persistentVolumeClaim: - claimName: ollama-home - - name: ollama-models - emptyDir: {} - ---- +# ExternalName Service: routes ollama.llm.svc.cluster.local → host Ollama. +# The externalName is resolved during `obol stack init` via the {{OLLAMA_HOST}} placeholder. apiVersion: v1 kind: Service metadata: @@ -108,19 +29,17 @@ metadata: labels: app: ollama spec: - type: ClusterIP - selector: - app: ollama + type: ExternalName + externalName: {{OLLAMA_HOST}} ports: - name: http port: 11434 - targetPort: http protocol: TCP --- -# llms.py configuration for Obol Stack: -# - Only enable the Ollama provider -# - Default model is `glm-4.7:cloud` (cloud-first) +# llms.py v3 configuration for Obol Stack: +# - Only enable the Ollama provider (host machine via ollama Service) +# - Default model is glm-4.7-flash apiVersion: v1 kind: ConfigMap metadata: @@ -129,27 +48,35 @@ metadata: data: llms.json: | { + "version": 3, "defaults": { "headers": { - "Content-Type": "application/json" + "Content-Type": "application/json", + "User-Agent": "llmspy.org/3.0" }, "text": { - "model": "glm-4.7:cloud", + "model": "glm-4.7-flash", "messages": [ - { "role": "user", "content": "" } + { "role": "user", "content": [{ "type": "text", "text": "" }] } ] } }, "providers": { "ollama": { - "enabled": true, - "type": "OllamaProvider", - "base_url": "http://ollama.llm.svc.cluster.local:11434", - "models": {}, - "all_models": true + "enabled": true } } } + providers.json: | + { + "ollama": { + "id": "ollama", + "npm": "ollama", + "api": "http://ollama.llm.svc.cluster.local:11434", + "models": {}, + "all_models": true + } + } --- apiVersion: apps/v1 @@ -182,6 +109,7 @@ spec: set -eu mkdir -p /data cp /config/llms.json /data/llms.json + cp /config/providers.json /data/providers.json volumeMounts: - name: llmspy-config mountPath: /config @@ -192,7 +120,7 @@ spec: - name: llmspy # Official LLMSpy container image (published by upstream). # Pin a specific version for reproducibility. - image: ghcr.io/servicestack/llms:v2.0.30 + image: ghcr.io/servicestack/llms:latest imagePullPolicy: IfNotPresent ports: - name: http @@ -240,6 +168,8 @@ spec: items: - key: llms.json path: llms.json + - key: providers.json + path: providers.json - name: llmspy-home emptyDir: {} diff --git a/internal/openclaw/chart/templates/_helpers.tpl b/internal/openclaw/chart/templates/_helpers.tpl index 6549246..c68bc79 100644 --- a/internal/openclaw/chart/templates/_helpers.tpl +++ b/internal/openclaw/chart/templates/_helpers.tpl @@ -148,9 +148,14 @@ Render openclaw.json as strict JSON. If config.content is provided, it is used v "http" (dict "endpoints" (dict "chatCompletions" (dict "enabled" .Values.openclaw.gateway.http.endpoints.chatCompletions.enabled))) -}} +{{- $agentDefaults := dict "workspace" .Values.openclaw.workspaceDir -}} +{{- if .Values.openclaw.agentModel -}} +{{- $_ := set $agentDefaults "model" (dict "primary" .Values.openclaw.agentModel) -}} +{{- end -}} + {{- $cfg := dict "gateway" $gateway - "agents" (dict "defaults" (dict "workspace" .Values.openclaw.workspaceDir)) + "agents" (dict "defaults" $agentDefaults) -}} {{- if .Values.skills.enabled -}} diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml index d63d173..0edddbc 100644 --- a/internal/openclaw/chart/values.yaml +++ b/internal/openclaw/chart/values.yaml @@ -13,7 +13,8 @@ image: # -- Override the container command (ENTRYPOINT) command: - - openclaw + - node + - openclaw.mjs # -- Override the container args (CMD) args: - gateway @@ -54,7 +55,8 @@ initJob: tag: "" pullPolicy: IfNotPresent command: - - openclaw + - node + - openclaw.mjs - agent - init args: [] @@ -142,6 +144,8 @@ config: openclaw: stateDir: /data/.openclaw workspaceDir: /data/.openclaw/workspace + # -- Default agent model (provider/model). Set to route agent traffic to a specific provider. + agentModel: "" gateway: mode: local @@ -179,17 +183,17 @@ models: name: GPT-4o ollama: enabled: true - # -- OpenAI-compatible base URL for Ollama - baseUrl: http://ollama.llm.svc.cluster.local:11434/v1 - # -- OpenClaw provider API type (optional; omit to let OpenClaw auto-detect) - api: "" + # -- OpenAI-compatible base URL (via LLMSpy gateway) + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + # -- OpenClaw provider API type (openai-completions for Ollama) + api: openai-completions # -- Env var used for provider API key interpolation in openclaw.json apiKeyEnvVar: OLLAMA_API_KEY # -- Value set for the apiKey env var (not a secret for Ollama) apiKeyValue: ollama-local models: - id: glm-4.7-flash - name: glm-4.7-flash + name: GLM-4.7 Flash # -- Chat channel integrations # Tokens are stored in the chart Secret and injected as env vars. diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 2c6ddaa..983b096 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -310,7 +310,7 @@ openclaw: models: ollama: enabled: true - baseUrl: http://ollama.llm.svc.cluster.local:11434/v1 + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 api: openai-completions apiKeyEnvVar: OLLAMA_API_KEY apiKeyValue: ollama-local diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 077c235..61dd60b 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -79,8 +79,13 @@ func Init(cfg *config.Config, force bool) error { fmt.Printf("K3d config saved to: %s\n", k3dConfigPath) // Copy embedded defaults (helmfile + charts for infrastructure) + // Resolve placeholders: {{OLLAMA_HOST}} → host DNS for the cluster runtime. + // k3d uses host.k3d.internal; bare k3s would use the node's gateway IP. + ollamaHost := "host.k3d.internal" defaultsDir := filepath.Join(cfg.ConfigDir, "defaults") - if err := embed.CopyDefaults(defaultsDir); err != nil { + if err := embed.CopyDefaults(defaultsDir, map[string]string{ + "{{OLLAMA_HOST}}": ollamaHost, + }); err != nil { return fmt.Errorf("failed to copy defaults: %w", err) } fmt.Printf("Defaults copied to: %s\n", defaultsDir)