Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions internal/embed/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ var infrastructureFS embed.FS
//go:embed all:networks
var networksFS embed.FS

// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory
func CopyDefaults(destDir string) error {
// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory.
// The replacements map is applied to every file: each key (e.g. "{{OLLAMA_HOST}}") is replaced
// with its value. Pass nil for a verbatim copy.
func CopyDefaults(destDir string, replacements map[string]string) error {
return fs.WalkDir(infrastructureFS, "infrastructure", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
Expand Down Expand Up @@ -57,8 +59,14 @@ func CopyDefaults(destDir string) error {
return fmt.Errorf("failed to read embedded file %s: %w", path, err)
}

// Apply placeholder replacements
content := string(data)
for placeholder, value := range replacements {
content = strings.ReplaceAll(content, placeholder, value)
}

// Write to destination
if err := os.WriteFile(destPath, data, 0644); err != nil {
if err := os.WriteFile(destPath, []byte(content), 0644); err != nil {
return fmt.Errorf("failed to write file %s: %w", destPath, err)
}

Expand Down
140 changes: 35 additions & 105 deletions internal/embed/infrastructure/base/templates/llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,104 +2,25 @@
# LLM foundation services (OKR-1)
#
# This deploys:
# - Ollama (as the upstream LLM runtime)
# - An ExternalName Service "ollama" that resolves to the host's Ollama server
# - llms.py (LLMSpy) as an OpenAI-compatible gateway / router over providers
#
# Design notes:
# - We default to Ollama Cloud (`glm-4.7:cloud`) to avoid requiring local GPU/VRAM.
# - We persist Ollama's identity keypair at `/root/.ollama/id_ed25519` so the
# Ollama Cloud "connect" binding survives pod restarts/upgrades.
# - Model cache is kept on `emptyDir` (ephemeral) per product decision.
# - No in-cluster Ollama is deployed; the host is expected to run Ollama
# (or another OpenAI-compatible server) on port 11434.
# - The ollama Service abstracts host resolution:
# k3d → host.k3d.internal
# k3s → resolved at stack init via node IP
# - LLMSpy and all consumers reference ollama.llm.svc.cluster.local:11434,
# which the ExternalName Service routes to the host.
apiVersion: v1
kind: Namespace
metadata:
name: llm

---
# Persist Ollama identity (Ollama Cloud connect uses the public key derived from this keypair).
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ollama-home
namespace: llm
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 256Mi

---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama
namespace: llm
labels:
app: ollama
spec:
replicas: 1
# Ollama uses a ReadWriteOnce PVC; avoid surging a second pod during updates.
strategy:
type: Recreate
selector:
matchLabels:
app: ollama
template:
metadata:
labels:
app: ollama
spec:
containers:
- name: ollama
image: ollama/ollama:latest
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 11434
protocol: TCP
env:
# Store model blobs (including any cloud model stubs/cache) in an ephemeral volume.
- name: OLLAMA_MODELS
value: /models
# Explicitly bind the HTTP API to all interfaces in-cluster.
- name: OLLAMA_HOST
value: 0.0.0.0:11434
volumeMounts:
# Persist identity + config (e.g. ~/.ollama/id_ed25519) for Ollama Cloud connect.
- name: ollama-home
mountPath: /root/.ollama
- name: ollama-models
mountPath: /models
readinessProbe:
httpGet:
path: /api/version
port: http
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 2
livenessProbe:
httpGet:
path: /api/version
port: http
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 2
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 2000m
memory: 4Gi
volumes:
- name: ollama-home
persistentVolumeClaim:
claimName: ollama-home
- name: ollama-models
emptyDir: {}

---
# ExternalName Service: routes ollama.llm.svc.cluster.local → host Ollama.
# The externalName is resolved during `obol stack init` via the {{OLLAMA_HOST}} placeholder.
apiVersion: v1
kind: Service
metadata:
Expand All @@ -108,19 +29,17 @@ metadata:
labels:
app: ollama
spec:
type: ClusterIP
selector:
app: ollama
type: ExternalName
externalName: {{OLLAMA_HOST}}
ports:
- name: http
port: 11434
targetPort: http
protocol: TCP

---
# llms.py configuration for Obol Stack:
# - Only enable the Ollama provider
# - Default model is `glm-4.7:cloud` (cloud-first)
# llms.py v3 configuration for Obol Stack:
# - Only enable the Ollama provider (host machine via ollama Service)
# - Default model is glm-4.7-flash
apiVersion: v1
kind: ConfigMap
metadata:
Expand All @@ -129,27 +48,35 @@ metadata:
data:
llms.json: |
{
"version": 3,
"defaults": {
"headers": {
"Content-Type": "application/json"
"Content-Type": "application/json",
"User-Agent": "llmspy.org/3.0"
},
"text": {
"model": "glm-4.7:cloud",
"model": "glm-4.7-flash",
"messages": [
{ "role": "user", "content": "" }
{ "role": "user", "content": [{ "type": "text", "text": "" }] }
]
}
},
"providers": {
"ollama": {
"enabled": true,
"type": "OllamaProvider",
"base_url": "http://ollama.llm.svc.cluster.local:11434",
"models": {},
"all_models": true
"enabled": true
}
}
}
providers.json: |
{
"ollama": {
"id": "ollama",
"npm": "ollama",
"api": "http://ollama.llm.svc.cluster.local:11434",
"models": {},
"all_models": true
}
}

---
apiVersion: apps/v1
Expand Down Expand Up @@ -182,6 +109,7 @@ spec:
set -eu
mkdir -p /data
cp /config/llms.json /data/llms.json
cp /config/providers.json /data/providers.json
volumeMounts:
- name: llmspy-config
mountPath: /config
Expand All @@ -192,7 +120,7 @@ spec:
- name: llmspy
# Official LLMSpy container image (published by upstream).
# Pin a specific version for reproducibility.
image: ghcr.io/servicestack/llms:v2.0.30
image: ghcr.io/servicestack/llms:latest
imagePullPolicy: IfNotPresent
ports:
- name: http
Expand Down Expand Up @@ -240,6 +168,8 @@ spec:
items:
- key: llms.json
path: llms.json
- key: providers.json
path: providers.json
- name: llmspy-home
emptyDir: {}

Expand Down
7 changes: 6 additions & 1 deletion internal/openclaw/chart/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,14 @@ Render openclaw.json as strict JSON. If config.content is provided, it is used v
"http" (dict "endpoints" (dict "chatCompletions" (dict "enabled" .Values.openclaw.gateway.http.endpoints.chatCompletions.enabled)))
-}}

{{- $agentDefaults := dict "workspace" .Values.openclaw.workspaceDir -}}
{{- if .Values.openclaw.agentModel -}}
{{- $_ := set $agentDefaults "model" (dict "primary" .Values.openclaw.agentModel) -}}
{{- end -}}

{{- $cfg := dict
"gateway" $gateway
"agents" (dict "defaults" (dict "workspace" .Values.openclaw.workspaceDir))
"agents" (dict "defaults" $agentDefaults)
-}}

{{- if .Values.skills.enabled -}}
Expand Down
18 changes: 11 additions & 7 deletions internal/openclaw/chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ image:

# -- Override the container command (ENTRYPOINT)
command:
- openclaw
- node
- openclaw.mjs
# -- Override the container args (CMD)
args:
- gateway
Expand Down Expand Up @@ -54,7 +55,8 @@ initJob:
tag: ""
pullPolicy: IfNotPresent
command:
- openclaw
- node
- openclaw.mjs
- agent
- init
args: []
Expand Down Expand Up @@ -142,6 +144,8 @@ config:
openclaw:
stateDir: /data/.openclaw
workspaceDir: /data/.openclaw/workspace
# -- Default agent model (provider/model). Set to route agent traffic to a specific provider.
agentModel: ""

gateway:
mode: local
Expand Down Expand Up @@ -179,17 +183,17 @@ models:
name: GPT-4o
ollama:
enabled: true
# -- OpenAI-compatible base URL for Ollama
baseUrl: http://ollama.llm.svc.cluster.local:11434/v1
# -- OpenClaw provider API type (optional; omit to let OpenClaw auto-detect)
api: ""
# -- OpenAI-compatible base URL (via LLMSpy gateway)
baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1
# -- OpenClaw provider API type (openai-completions for Ollama)
api: openai-completions
# -- Env var used for provider API key interpolation in openclaw.json
apiKeyEnvVar: OLLAMA_API_KEY
# -- Value set for the apiKey env var (not a secret for Ollama)
apiKeyValue: ollama-local
models:
- id: glm-4.7-flash
name: glm-4.7-flash
name: GLM-4.7 Flash

# -- Chat channel integrations
# Tokens are stored in the chart Secret and injected as env vars.
Expand Down
2 changes: 1 addition & 1 deletion internal/openclaw/openclaw.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ openclaw:
models:
ollama:
enabled: true
baseUrl: http://ollama.llm.svc.cluster.local:11434/v1
baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1
api: openai-completions
apiKeyEnvVar: OLLAMA_API_KEY
apiKeyValue: ollama-local
Expand Down
7 changes: 6 additions & 1 deletion internal/stack/stack.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,13 @@ func Init(cfg *config.Config, force bool) error {
fmt.Printf("K3d config saved to: %s\n", k3dConfigPath)

// Copy embedded defaults (helmfile + charts for infrastructure)
// Resolve placeholders: {{OLLAMA_HOST}} → host DNS for the cluster runtime.
// k3d uses host.k3d.internal; bare k3s would use the node's gateway IP.
ollamaHost := "host.k3d.internal"
defaultsDir := filepath.Join(cfg.ConfigDir, "defaults")
if err := embed.CopyDefaults(defaultsDir); err != nil {
if err := embed.CopyDefaults(defaultsDir, map[string]string{
"{{OLLAMA_HOST}}": ollamaHost,
}); err != nil {
return fmt.Errorf("failed to copy defaults: %w", err)
}
fmt.Printf("Defaults copied to: %s\n", defaultsDir)
Expand Down