From 2d1ba04a2675ae40f86d0a4acef07bcb5c6c8335 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Mon, 12 Jan 2026 12:26:49 +0400 Subject: [PATCH 01/42] chore: upgrade pinned dependency versions in obolup.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update dependency versions to latest stable releases: - kubectl: 1.31.0 → 1.35.0 - helm: 3.19.1 → 3.19.4 - helmfile: 1.2.2 → 1.2.3 - k9s: 0.32.5 → 0.50.18 - helm-diff: 3.9.11 → 3.14.1 k3d remains at 5.8.3 (already current). --- obolup.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/obolup.sh b/obolup.sh index 2741a53..f6430ab 100755 --- a/obolup.sh +++ b/obolup.sh @@ -49,12 +49,12 @@ fi # Pinned dependency versions # Update these versions to upgrade dependencies across all installations -readonly KUBECTL_VERSION="1.31.0" -readonly HELM_VERSION="3.19.1" +readonly KUBECTL_VERSION="1.35.0" +readonly HELM_VERSION="3.19.4" readonly K3D_VERSION="5.8.3" -readonly HELMFILE_VERSION="1.2.2" -readonly K9S_VERSION="0.32.5" -readonly HELM_DIFF_VERSION="3.9.11" +readonly HELMFILE_VERSION="1.2.3" +readonly K9S_VERSION="0.50.18" +readonly HELM_DIFF_VERSION="3.14.1" # Repository URL for building from source readonly OBOL_REPO_URL="git@github.com:ObolNetwork/obol-stack.git" From d1101042a2baf5a589bcc8a31c0cf0ba228f1d1c Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 14 Jan 2026 00:21:46 +0400 Subject: [PATCH 02/42] feat: replace nginx-ingress with Traefik and Gateway API Replace nginx-ingress controller with Traefik 38.0.2 using Kubernetes Gateway API for routing. This addresses the nginx-ingress deprecation (end of maintenance March 2026). Changes: - Remove --disable=traefik from k3d config to use k3s built-in Traefik - Replace nginx-ingress helm release with Traefik 38.0.2 in infrastructure - Configure Gateway API provider with cross-namespace routing support - Add GatewayClass and Gateway resources via Traefik helm chart - Convert all Ingress resources to HTTPRoute format: - eRPC: /rpc path routing - obol-frontend: / path routing - ethereum: /execution and /beacon path routing with URL rewrite - aztec: namespace-based path routing with URL rewrite - helios: namespace-based path routing with URL rewrite - Disable legacy Ingress in service helm values Closes #125 --- internal/embed/infrastructure/helmfile.yaml | 155 +++++++++++++++--- .../infrastructure/values/erpc.yaml.gotmpl | 9 +- .../values/obol-frontend.yaml.gotmpl | 9 +- internal/embed/k3d-config.yaml | 4 - .../networks/aztec/templates/ingress.yaml | 38 +++-- .../networks/ethereum/templates/ingress.yaml | 75 ++++++--- .../networks/helios/helmfile.yaml.gotmpl | 45 +++-- 7 files changed, 243 insertions(+), 92 deletions(-) diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index 9f49d09..c2751a1 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -1,17 +1,20 @@ # Helmfile for Obol Stack default infrastructure # Orchestrates core infrastructure components deployed with every stack +# Uses Traefik with Gateway API for routing (replaces nginx-ingress) repositories: - - name: ingress-nginx - url: https://kubernetes.github.io/ingress-nginx + - name: traefik + url: https://traefik.github.io/charts - name: obol url: https://obolnetwork.github.io/helm-charts/ - name: ethereum url: https://ethpandaops.github.io/ethereum-helm-charts + - name: bedag + url: https://bedag.github.io/helm-charts/ # Single source of truth: change this to switch networks values: - - network: mainnet + - network: mainnet releases: # Local storage provisioner (raw manifests wrapped as chart) @@ -22,45 +25,149 @@ releases: - dataDir: /data - network: "{{ .Values.network }}" - # Nginx ingress controller (upstream chart) - - name: ingress-nginx - namespace: ingress-nginx - chart: ingress-nginx/ingress-nginx - version: 4.13.3 + # Traefik ingress controller with Gateway API support + - name: traefik + namespace: traefik + createNamespace: true + chart: traefik/traefik + version: 38.0.2 values: - - controller: - replicaCount: 1 - service: - type: LoadBalancer - externalTrafficPolicy: Local - resources: - limits: - cpu: 500m - memory: 512Mi - requests: - cpu: 100m - memory: 128Mi - tolerations: [] - admissionWebhooks: + # Gateway API provider configuration + - providers: + kubernetesGateway: + enabled: true + namespaces: [] # Watch all namespaces + kubernetesCRD: + enabled: true + kubernetesIngress: + enabled: false # Disable legacy Ingress support + # GatewayClass configuration + - gatewayClass: + enabled: true + name: traefik + # Gateway configuration (main entry point) + - gateway: + enabled: true + name: traefik-gateway + namespace: traefik + listeners: + web: + port: 8000 + protocol: HTTP + namespacePolicy: + from: All + # Ports configuration + - ports: + web: + port: 8000 + expose: + default: true + exposedPort: 80 + protocol: TCP + websecure: + port: 8443 + expose: + default: true + exposedPort: 443 + protocol: TCP + tls: + enabled: false # TLS termination disabled for local dev + # Service configuration + - service: + type: LoadBalancer + externalTrafficPolicy: Local + # Resource limits + - resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 128Mi + # Disable dashboard by default + - ingressRoute: + dashboard: enabled: false # eRPC - name: erpc namespace: erpc + createNamespace: true chart: ethereum/erpc needs: - kube-system/base - - ingress-nginx/ingress-nginx + - traefik/traefik values: - ./values/erpc.yaml.gotmpl + # eRPC HTTPRoute + - name: erpc-httproute + namespace: erpc + chart: bedag/raw + needs: + - traefik/traefik + - erpc/erpc + values: + - resources: + - apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: erpc + namespace: erpc + spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack + rules: + - matches: + - path: + type: PathPrefix + value: /rpc + backendRefs: + - name: erpc + port: 4000 + # Obol Stack frontend - name: obol-frontend namespace: obol-frontend + createNamespace: true chart: obol/obol-app version: 0.1.0 needs: - - ingress-nginx/ingress-nginx + - traefik/traefik - erpc/erpc values: - ./values/obol-frontend.yaml.gotmpl + + # Obol Frontend HTTPRoute + - name: obol-frontend-httproute + namespace: obol-frontend + chart: bedag/raw + needs: + - traefik/traefik + - obol-frontend/obol-frontend + values: + - resources: + - apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: obol-frontend + namespace: obol-frontend + spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: obol-frontend + port: 3000 diff --git a/internal/embed/infrastructure/values/erpc.yaml.gotmpl b/internal/embed/infrastructure/values/erpc.yaml.gotmpl index fdedc69..6799332 100644 --- a/internal/embed/infrastructure/values/erpc.yaml.gotmpl +++ b/internal/embed/infrastructure/values/erpc.yaml.gotmpl @@ -87,14 +87,9 @@ extraArgs: [] # Command replacement for the erpc container customCommand: [] +# Disable legacy Ingress - using Gateway API HTTPRoute instead ingress: - enabled: true - className: nginx - hosts: - - host: obol.stack - paths: - - path: /rpc - pathType: Prefix + enabled: false service: type: ClusterIP diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index 3301156..08aa9e0 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -19,11 +19,6 @@ service: type: ClusterIP port: 3000 +# Disable legacy Ingress - using Gateway API HTTPRoute instead ingress: - enabled: true - className: "nginx" - hosts: - - host: obol.stack - paths: - - path: / - pathType: Prefix + enabled: false diff --git a/internal/embed/k3d-config.yaml b/internal/embed/k3d-config.yaml index 563d697..0acd911 100644 --- a/internal/embed/k3d-config.yaml +++ b/internal/embed/k3d-config.yaml @@ -35,10 +35,6 @@ options: - arg: --kube-apiserver-arg=feature-gates=KubeletInUserNamespace=true nodeFilters: - server:* - # Disable Traefik to use nginx instead - - arg: --disable=traefik - nodeFilters: - - server:* # Disable local-storage addon (we provide our own config) - arg: --disable=local-storage nodeFilters: diff --git a/internal/embed/networks/aztec/templates/ingress.yaml b/internal/embed/networks/aztec/templates/ingress.yaml index 1e8ddd3..cdd664c 100644 --- a/internal/embed/networks/aztec/templates/ingress.yaml +++ b/internal/embed/networks/aztec/templates/ingress.yaml @@ -1,23 +1,29 @@ {{- if eq .Release.Name "aztec-ingress" }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +# HTTPRoute for Aztec sequencer node RPC +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: name: aztec namespace: {{ .Release.Namespace }} - annotations: - nginx.ingress.kubernetes.io/rewrite-target: /$2 - nginx.ingress.kubernetes.io/use-regex: "true" spec: - ingressClassName: nginx + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack rules: - - host: obol.stack - http: - paths: - - path: /{{ .Release.Namespace }}(/|$)(.*) - pathType: ImplementationSpecific - backend: - service: - name: l2-sequencer-node-{{ .Values.id }}-node - port: - number: 8080 + - matches: + - path: + type: PathPrefix + value: /{{ .Release.Namespace }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: l2-sequencer-node-{{ .Values.id }}-node + port: 8080 {{- end }} diff --git a/internal/embed/networks/ethereum/templates/ingress.yaml b/internal/embed/networks/ethereum/templates/ingress.yaml index 75a39a6..a8cda39 100644 --- a/internal/embed/networks/ethereum/templates/ingress.yaml +++ b/internal/embed/networks/ethereum/templates/ingress.yaml @@ -1,30 +1,57 @@ {{- if eq .Release.Name "ethereum-ingress" }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +# HTTPRoute for Ethereum execution client RPC +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - name: ethereum + name: ethereum-execution namespace: {{ .Release.Namespace }} - annotations: - nginx.ingress.kubernetes.io/rewrite-target: /$2 - nginx.ingress.kubernetes.io/use-regex: "true" spec: - ingressClassName: nginx + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack rules: - - host: obol.stack - http: - paths: - - path: /{{ .Release.Namespace }}/execution(/|$)(.*) - pathType: ImplementationSpecific - backend: - service: - name: ethereum-execution - port: - number: 8545 - - path: /{{ .Release.Namespace }}/beacon(/|$)(.*) - pathType: ImplementationSpecific - backend: - service: - name: ethereum-beacon - port: - number: 5052 + - matches: + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/execution + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: ethereum-execution + port: 8545 +--- +# HTTPRoute for Ethereum beacon client RPC +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: ethereum-beacon + namespace: {{ .Release.Namespace }} +spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack + rules: + - matches: + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/beacon + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: ethereum-beacon + port: 5052 {{- end }} diff --git a/internal/embed/networks/helios/helmfile.yaml.gotmpl b/internal/embed/networks/helios/helmfile.yaml.gotmpl index 2be4293..7fbbf53 100644 --- a/internal/embed/networks/helios/helmfile.yaml.gotmpl +++ b/internal/embed/networks/helios/helmfile.yaml.gotmpl @@ -28,17 +28,42 @@ releases: size: 10Gi storageClass: local-path + # Disable legacy Ingress - using Gateway API HTTPRoute instead - ingress: - enabled: true - className: nginx - annotations: - nginx.ingress.kubernetes.io/rewrite-target: /$2 - nginx.ingress.kubernetes.io/use-regex: "true" - hosts: - - host: obol.stack - paths: - - path: /helios-{{ .Values.id }}(/|$)(.*) - pathType: ImplementationSpecific + enabled: false + + # HTTPRoute for Helios RPC endpoint + - name: helios-httproute + namespace: helios-{{ .Values.id }} + chart: bedag/raw + values: + - resources: + - apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: helios + namespace: helios-{{ .Values.id }} + spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack + rules: + - matches: + - path: + type: PathPrefix + value: /helios-{{ .Values.id }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: helios-{{ .Values.network }} + port: 8545 # Metadata ConfigMap for frontend discovery - name: helios-metadata From ba54ea5b01d45105f36e6b7252e114b849fa3c31 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 14 Jan 2026 14:10:04 +0400 Subject: [PATCH 03/42] feat: add monitoring stack and gateway updates --- CLAUDE.md | 13 ++++++++ internal/embed/infrastructure/helmfile.yaml | 32 +++++++++++++++++- .../values/monitoring.yaml.gotmpl | 33 +++++++++++++++++++ .../values/obol-frontend.yaml.gotmpl | 2 +- internal/embed/k3d-config.yaml | 4 +++ .../networks/aztec/templates/ingress.yaml | 5 ++- .../networks/ethereum/templates/ingress.yaml | 10 ++++-- .../networks/helios/helmfile.yaml.gotmpl | 5 ++- renovate.json | 13 ++++++++ 9 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 internal/embed/infrastructure/values/monitoring.yaml.gotmpl diff --git a/CLAUDE.md b/CLAUDE.md index bc40752..a6f3ba8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -20,6 +20,19 @@ The Obol Stack is a local Kubernetes-based framework for running blockchain netw 5. **Two-stage templating**: CLI flags → Go templates → Helmfile → Kubernetes resources 6. **Development mode**: Local `.workspace/` directory with `go run` wrapper for rapid development +### Routing and Gateway API + +Obol Stack uses Traefik with the Kubernetes Gateway API for HTTP routing. + +- Controller: Traefik Helm chart (`traefik` namespace) +- GatewayClass: `traefik` +- Gateway: `traefik-gateway` in `traefik` namespace +- HTTPRoute patterns: + - `/` → `obol-frontend` + - `/rpc` → `erpc` + - `/ethereum-/execution` and `/ethereum-/beacon` + - `/aztec-` and `/helios-` + ## Bootstrap Installer: obolup.sh ### Purpose diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index c2751a1..6f4d2b5 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -5,6 +5,8 @@ repositories: - name: traefik url: https://traefik.github.io/charts + - name: prometheus-community + url: https://prometheus-community.github.io/helm-charts - name: obol url: https://obolnetwork.github.io/helm-charts/ - name: ethereum @@ -15,6 +17,7 @@ repositories: # Single source of truth: change this to switch networks values: - network: mainnet + - gatewayApiVersion: v1.4.1 releases: # Local storage provisioner (raw manifests wrapped as chart) @@ -25,12 +28,39 @@ releases: - dataDir: /data - network: "{{ .Values.network }}" + # Monitoring stack (Prometheus operator + Prometheus) + - name: monitoring + namespace: monitoring + createNamespace: true + chart: prometheus-community/kube-prometheus-stack + version: 79.5.0 + values: + - ./values/monitoring.yaml.gotmpl + + # Gateway API CRDs (applied from upstream release) + - name: gateway-api-crds + namespace: gateway-system + createNamespace: true + chart: bedag/raw + values: + - resources: [] + hooks: + - events: ["presync"] + showlogs: true + command: kubectl + args: + - apply + - -f + - https://github.com/kubernetes-sigs/gateway-api/releases/download/{{ .Values.gatewayApiVersion }}/standard-install.yaml + # Traefik ingress controller with Gateway API support - name: traefik namespace: traefik createNamespace: true chart: traefik/traefik version: 38.0.2 + needs: + - gateway-system/gateway-api-crds values: # Gateway API provider configuration - providers: @@ -169,5 +199,5 @@ releases: type: PathPrefix value: / backendRefs: - - name: obol-frontend + - name: obol-frontend-obol-app port: 3000 diff --git a/internal/embed/infrastructure/values/monitoring.yaml.gotmpl b/internal/embed/infrastructure/values/monitoring.yaml.gotmpl new file mode 100644 index 0000000..d7a0dc1 --- /dev/null +++ b/internal/embed/infrastructure/values/monitoring.yaml.gotmpl @@ -0,0 +1,33 @@ +prometheus: + enabled: true + prometheusSpec: + serviceMonitorSelectorNilUsesHelmValues: false + serviceMonitorSelector: + matchLabels: + release: monitoring + serviceMonitorNamespaceSelector: {} + podMonitorSelectorNilUsesHelmValues: false + podMonitorSelector: + matchLabels: + release: monitoring + podMonitorNamespaceSelector: {} + retention: 6h + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 1Gi + +grafana: + enabled: false # Enable when we want UI access + +alertmanager: + enabled: false # Disable to keep the local stack lean + +kubeStateMetrics: + enabled: true + +nodeExporter: + enabled: true diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index 08aa9e0..92aab95 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -13,7 +13,7 @@ image: repository: obolnetwork/obol-stack-front-end pullPolicy: Always - tag: "v0.1.1" + tag: "latest" service: type: ClusterIP diff --git a/internal/embed/k3d-config.yaml b/internal/embed/k3d-config.yaml index 0acd911..9a97c5d 100644 --- a/internal/embed/k3d-config.yaml +++ b/internal/embed/k3d-config.yaml @@ -35,6 +35,10 @@ options: - arg: --kube-apiserver-arg=feature-gates=KubeletInUserNamespace=true nodeFilters: - server:* + # Disable bundled Traefik (we install Traefik via Helm) + - arg: --disable=traefik + nodeFilters: + - server:* # Disable local-storage addon (we provide our own config) - arg: --disable=local-storage nodeFilters: diff --git a/internal/embed/networks/aztec/templates/ingress.yaml b/internal/embed/networks/aztec/templates/ingress.yaml index cdd664c..821537d 100644 --- a/internal/embed/networks/aztec/templates/ingress.yaml +++ b/internal/embed/networks/aztec/templates/ingress.yaml @@ -15,8 +15,11 @@ spec: rules: - matches: - path: - type: PathPrefix + type: Exact value: /{{ .Release.Namespace }} + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/ filters: - type: URLRewrite urlRewrite: diff --git a/internal/embed/networks/ethereum/templates/ingress.yaml b/internal/embed/networks/ethereum/templates/ingress.yaml index a8cda39..76c745e 100644 --- a/internal/embed/networks/ethereum/templates/ingress.yaml +++ b/internal/embed/networks/ethereum/templates/ingress.yaml @@ -15,8 +15,11 @@ spec: rules: - matches: - path: - type: PathPrefix + type: Exact value: /{{ .Release.Namespace }}/execution + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/execution/ filters: - type: URLRewrite urlRewrite: @@ -43,8 +46,11 @@ spec: rules: - matches: - path: - type: PathPrefix + type: Exact value: /{{ .Release.Namespace }}/beacon + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/beacon/ filters: - type: URLRewrite urlRewrite: diff --git a/internal/embed/networks/helios/helmfile.yaml.gotmpl b/internal/embed/networks/helios/helmfile.yaml.gotmpl index 7fbbf53..c0a5d96 100644 --- a/internal/embed/networks/helios/helmfile.yaml.gotmpl +++ b/internal/embed/networks/helios/helmfile.yaml.gotmpl @@ -53,8 +53,11 @@ releases: rules: - matches: - path: - type: PathPrefix + type: Exact value: /helios-{{ .Values.id }} + - path: + type: PathPrefix + value: /helios-{{ .Values.id }}/ filters: - type: URLRewrite urlRewrite: diff --git a/renovate.json b/renovate.json index 6932b83..afab9bf 100644 --- a/renovate.json +++ b/renovate.json @@ -20,6 +20,19 @@ "datasourceTemplate": "github-releases", "depNameTemplate": "ObolNetwork/obol-stack-front-end", "versioningTemplate": "semver" + }, + { + "customType": "regex", + "description": "Update Gateway API release version", + "matchStrings": [ + "gatewayApiVersion:\\s*[\"']?(?v[0-9]+\\.[0-9]+\\.[0-9]+)[\"']?" + ], + "fileMatch": [ + "^internal/embed/infrastructure/helmfile\\.yaml$" + ], + "datasourceTemplate": "github-releases", + "depNameTemplate": "kubernetes-sigs/gateway-api", + "versioningTemplate": "semver" } ], "packageRules": [ From ccfef5553074d82d0468d988f76bb17a8ec669c5 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 14 Jan 2026 17:39:57 +0400 Subject: [PATCH 04/42] feat: add cloudflared tunnel for public service exposure Add Cloudflare Tunnel integration to expose obol-stack services publicly without port forwarding or static IPs. Uses quick tunnel mode for MVP. Changes: - Add cloudflared Helm chart (internal/embed/infrastructure/cloudflared/) - Add tunnel management package (internal/tunnel/) - Add CLI commands: obol tunnel status/restart/logs - Integrate cloudflared into infrastructure helmfile The tunnel deploys automatically with `obol stack up` and provides a random trycloudflare.com URL accessible via `obol tunnel status`. Future: Named tunnel support for persistent URLs (obol tunnel login) --- cmd/obol/main.go | 43 +++++ .../infrastructure/cloudflared/Chart.yaml | 6 + .../cloudflared/templates/deployment.yaml | 44 +++++ internal/embed/infrastructure/helmfile.yaml | 7 + internal/tunnel/tunnel.go | 177 ++++++++++++++++++ 5 files changed, 277 insertions(+) create mode 100644 internal/embed/infrastructure/cloudflared/Chart.yaml create mode 100644 internal/embed/infrastructure/cloudflared/templates/deployment.yaml create mode 100644 internal/tunnel/tunnel.go diff --git a/cmd/obol/main.go b/cmd/obol/main.go index cde6626..69f92c5 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -12,6 +12,7 @@ import ( "github.com/ObolNetwork/obol-stack/internal/app" "github.com/ObolNetwork/obol-stack/internal/config" "github.com/ObolNetwork/obol-stack/internal/stack" + "github.com/ObolNetwork/obol-stack/internal/tunnel" "github.com/ObolNetwork/obol-stack/internal/version" "github.com/urfave/cli/v2" ) @@ -57,6 +58,11 @@ COMMANDS: app sync Deploy application to cluster app delete Remove application and cluster resources + Tunnel Management: + tunnel status Show tunnel status and public URL + tunnel restart Restart tunnel to get a new URL + tunnel logs View cloudflared logs + Kubernetes Tools (with auto-configured KUBECONFIG): kubectl Run kubectl with stack kubeconfig (passthrough) helm Run helm with stack kubeconfig (passthrough) @@ -157,6 +163,43 @@ GLOBAL OPTIONS: }, }, // ============================================================ + // Tunnel Management Commands + // ============================================================ + { + Name: "tunnel", + Usage: "Manage Cloudflare tunnel for public access", + Subcommands: []*cli.Command{ + { + Name: "status", + Usage: "Show tunnel status and public URL", + Action: func(c *cli.Context) error { + return tunnel.Status(cfg) + }, + }, + { + Name: "restart", + Usage: "Restart the tunnel to get a new URL", + Action: func(c *cli.Context) error { + return tunnel.Restart(cfg) + }, + }, + { + Name: "logs", + Usage: "View cloudflared logs", + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "follow", + Aliases: []string{"f"}, + Usage: "Follow log output", + }, + }, + Action: func(c *cli.Context) error { + return tunnel.Logs(cfg, c.Bool("follow")) + }, + }, + }, + }, + // ============================================================ // Kubernetes Tool Passthroughs (with auto-configured KUBECONFIG) // ============================================================ { diff --git a/internal/embed/infrastructure/cloudflared/Chart.yaml b/internal/embed/infrastructure/cloudflared/Chart.yaml new file mode 100644 index 0000000..894505e --- /dev/null +++ b/internal/embed/infrastructure/cloudflared/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: cloudflared +description: Cloudflare Tunnel for public access +type: application +version: 0.1.0 +appVersion: "2024.12.2" diff --git a/internal/embed/infrastructure/cloudflared/templates/deployment.yaml b/internal/embed/infrastructure/cloudflared/templates/deployment.yaml new file mode 100644 index 0000000..212556d --- /dev/null +++ b/internal/embed/infrastructure/cloudflared/templates/deployment.yaml @@ -0,0 +1,44 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudflared + labels: + app.kubernetes.io/name: cloudflared + app.kubernetes.io/part-of: obol-stack +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: cloudflared + template: + metadata: + labels: + app.kubernetes.io/name: cloudflared + spec: + containers: + - name: cloudflared + image: cloudflare/cloudflared:2024.12.2 + args: + - tunnel + - --no-autoupdate + - --metrics + - 0.0.0.0:2000 + - --url + - http://traefik.traefik.svc.cluster.local:80 + ports: + - name: metrics + containerPort: 2000 + livenessProbe: + httpGet: + path: /ready + port: metrics + initialDelaySeconds: 10 + periodSeconds: 10 + resources: + requests: + cpu: 10m + memory: 64Mi + limits: + cpu: 100m + memory: 128Mi + restartPolicy: Always diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index 6f4d2b5..310ff46 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -119,6 +119,13 @@ releases: dashboard: enabled: false + # Cloudflare Tunnel (quick tunnel mode for public access) + - name: cloudflared + namespace: traefik + chart: ./cloudflared + needs: + - traefik/traefik + # eRPC - name: erpc namespace: erpc diff --git a/internal/tunnel/tunnel.go b/internal/tunnel/tunnel.go new file mode 100644 index 0000000..355e9ea --- /dev/null +++ b/internal/tunnel/tunnel.go @@ -0,0 +1,177 @@ +package tunnel + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +const ( + tunnelNamespace = "traefik" + tunnelLabelSelector = "app.kubernetes.io/name=cloudflared" +) + +// Status displays the current tunnel status and URL +func Status(cfg *config.Config) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + // Check if kubeconfig exists + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + // Check pod status first + podStatus, err := getPodStatus(kubectlPath, kubeconfigPath) + if err != nil { + printStatusBox("quick", "not deployed", "", time.Now()) + fmt.Println("\nTroubleshooting:") + fmt.Println(" - Start the stack: obol stack up") + return nil + } + + // Try to get tunnel URL from logs + url, err := GetTunnelURL(cfg) + if err != nil { + printStatusBox("quick", podStatus, "(not available)", time.Now()) + fmt.Println("\nTroubleshooting:") + fmt.Println(" - Check logs: obol tunnel logs") + fmt.Println(" - Restart tunnel: obol tunnel restart") + return nil + } + + printStatusBox("quick", "active", url, time.Now()) + fmt.Printf("\nTest with: curl %s/\n", url) + + return nil +} + +// GetTunnelURL parses cloudflared logs to extract the quick tunnel URL +func GetTunnelURL(cfg *config.Config) (string, error) { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "logs", "-n", tunnelNamespace, + "-l", tunnelLabelSelector, + "--tail=100", + ) + + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get tunnel logs: %w", err) + } + + // Parse URL from logs (quick tunnel uses cfargotunnel.com) + re := regexp.MustCompile(`https://[a-z0-9-]+\.cfargotunnel\.com`) + matches := re.FindString(string(output)) + if matches == "" { + // Also try trycloudflare.com as fallback + re = regexp.MustCompile(`https://[a-z0-9-]+\.trycloudflare\.com`) + matches = re.FindString(string(output)) + } + if matches == "" { + return "", fmt.Errorf("tunnel URL not found in logs") + } + + return matches, nil +} + +// Restart restarts the cloudflared deployment to get a new tunnel URL +func Restart(cfg *config.Config) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + // Check if kubeconfig exists + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + fmt.Println("Restarting cloudflared tunnel...") + + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "rollout", "restart", "deployment/cloudflared", + "-n", tunnelNamespace, + ) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to restart tunnel: %w", err) + } + + fmt.Println("\nTunnel restarting...") + fmt.Println("Run 'obol tunnel status' to see the new URL once ready (may take 10-30 seconds).") + + return nil +} + +// Logs displays cloudflared logs +func Logs(cfg *config.Config, follow bool) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + // Check if kubeconfig exists + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + args := []string{ + "--kubeconfig", kubeconfigPath, + "logs", "-n", tunnelNamespace, + "-l", tunnelLabelSelector, + } + + if follow { + args = append(args, "-f") + } + + cmd := exec.Command(kubectlPath, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin + + return cmd.Run() +} + +// getPodStatus returns the status of the cloudflared pod +func getPodStatus(kubectlPath, kubeconfigPath string) (string, error) { + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "get", "pods", "-n", tunnelNamespace, + "-l", tunnelLabelSelector, + "-o", "jsonpath={.items[0].status.phase}", + ) + + output, err := cmd.Output() + if err != nil { + return "", err + } + + status := strings.TrimSpace(string(output)) + if status == "" { + return "", fmt.Errorf("no pods found") + } + + return strings.ToLower(status), nil +} + +// printStatusBox prints a formatted status box +func printStatusBox(mode, status, url string, lastUpdated time.Time) { + fmt.Println() + fmt.Println("Cloudflare Tunnel Status") + fmt.Println(strings.Repeat("─", 50)) + fmt.Printf("Mode: %s\n", mode) + fmt.Printf("Status: %s\n", status) + fmt.Printf("URL: %s\n", url) + fmt.Printf("Last Updated: %s\n", lastUpdated.Format(time.RFC3339)) + fmt.Println(strings.Repeat("─", 50)) +} From bd21826697ab5e0150de8d727188c76c5359e499 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 27 Jan 2026 12:47:15 +0100 Subject: [PATCH 05/42] docs: update CLAUDE.md with new dependency versions Update documentation to reflect the upgraded dependency versions in obolup.sh. This keeps the documentation in sync with the actual pinned versions used by the bootstrap installer. --- CLAUDE.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index bc40752..8aa79e8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -58,12 +58,12 @@ Uses local workspace: **Pinned versions** (lines 50-57): ```bash -KUBECTL_VERSION="1.31.0" -HELM_VERSION="3.16.2" +KUBECTL_VERSION="1.35.0" +HELM_VERSION="3.19.4" K3D_VERSION="5.8.3" -HELMFILE_VERSION="0.169.1" -K9S_VERSION="0.32.5" -HELM_DIFF_VERSION="3.9.11" +HELMFILE_VERSION="1.2.3" +K9S_VERSION="0.50.18" +HELM_DIFF_VERSION="3.14.1" ``` **Smart installation logic**: @@ -811,12 +811,12 @@ obol network delete ethereum- --force - Go 1.21+ (for building from source) **Installed by obolup.sh**: -- kubectl 1.31.0 -- helm 3.16.2 +- kubectl 1.35.0 +- helm 3.19.4 - k3d 5.8.3 -- helmfile 0.169.1 -- k9s 0.32.5 -- helm-diff plugin 3.9.11 +- helmfile 1.2.3 +- k9s 0.50.18 +- helm-diff plugin 3.14.1 **Go dependencies** (key packages): - `github.com/urfave/cli/v2` - CLI framework From d5e5ccd6be7d65197d9e52b5b56e686ca758a4b3 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Mon, 2 Feb 2026 17:02:03 +0100 Subject: [PATCH 06/42] feat(auth): add dashboard auth and nodecore token refresh --- README.md | 29 +++ .../base/templates/oauth-token.yaml | 176 ++++++++++++++++++ internal/embed/infrastructure/helmfile.yaml | 14 +- .../infrastructure/values/erpc.yaml.gotmpl | 21 ++- .../values/obol-frontend.yaml.gotmpl | 27 ++- 5 files changed, 261 insertions(+), 6 deletions(-) create mode 100644 internal/embed/infrastructure/base/templates/oauth-token.yaml diff --git a/README.md b/README.md index 0f24b0d..e525dca 100644 --- a/README.md +++ b/README.md @@ -394,6 +394,35 @@ obol stack purge -f > [!WARNING] > The `purge` command permanently deletes all cluster data and configuration. The `-f` flag is required to remove persistent volume claims (PVCs) owned by root. Use with caution. +### Dashboard Authentication (Better Auth) + +The dashboard UI is protected behind login when configured. RPC endpoints under `/rpc/*` remain unauthenticated (the x402 payment flow is handled separately). + +**Required environment variables (set before `obol stack up`):** + +- `STACK_PUBLIC_DOMAIN` (defaults to `obol.stack`; set to your Cloudflare tunnel hostname for internet exposure) +- `BETTER_AUTH_SECRET` (min 32 chars) +- `OBOL_GOOGLE_CLIENT_ID` +- `OBOL_GOOGLE_CLIENT_SECRET` + +**Google OAuth redirect URI:** + +Register this in Google Cloud Console: + +```text +https:///api/auth/callback/google +``` + +**Nodecore token refresh (for eRPC upstream header injection):** + +Create/update the Secret `erpc/nodecore-oauth-refresh` with: + +- `client_id` +- `client_secret` +- `refresh_token` + +The in-cluster CronJob refreshes a short-lived Google `id_token` and writes it into `erpc/obol-oauth-token`, which eRPC uses to inject `X-Nodecore-Token` on upstream requests. + ### Working with Kubernetes The `obol` CLI includes convenient wrappers for common Kubernetes tools. These automatically use the correct cluster configuration: diff --git a/internal/embed/infrastructure/base/templates/oauth-token.yaml b/internal/embed/infrastructure/base/templates/oauth-token.yaml new file mode 100644 index 0000000..d5baf56 --- /dev/null +++ b/internal/embed/infrastructure/base/templates/oauth-token.yaml @@ -0,0 +1,176 @@ +--- +# Nodecore OAuth token plumbing for eRPC upstream auth (issue #124) +apiVersion: v1 +kind: Namespace +metadata: + name: erpc + +--- +apiVersion: v1 +kind: Secret +metadata: + name: obol-oauth-token + namespace: erpc +type: Opaque +stringData: + # Google `id_token` (JWT). CronJob refreshes and writes into this Secret. + token: "" + +--- +apiVersion: v1 +kind: Secret +metadata: + name: nodecore-oauth-refresh + namespace: erpc +type: Opaque +stringData: + # Google OAuth client credentials + refresh token. + # This is intentionally stored separately from the ID token written to `obol-oauth-token`. + client_id: "" + client_secret: "" + refresh_token: "" + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: nodecore-token-writer + namespace: erpc +rules: + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["obol-oauth-token"] + verbs: ["get", "update", "patch"] + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nodecore-token-refresher + namespace: erpc + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: nodecore-token-writer + namespace: erpc +subjects: + - kind: ServiceAccount + name: nodecore-token-refresher + namespace: erpc +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: nodecore-token-writer + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: nodecore-token-refresher + namespace: erpc +spec: + # Refresh every 45 minutes to stay ahead of typical 1h ID token expiry. + schedule: "0,45 * * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + template: + spec: + serviceAccountName: nodecore-token-refresher + restartPolicy: OnFailure + containers: + - name: refresh + image: python:3.12-alpine + imagePullPolicy: IfNotPresent + env: + - name: GOOGLE_CLIENT_ID + valueFrom: + secretKeyRef: + name: nodecore-oauth-refresh + key: client_id + - name: GOOGLE_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: nodecore-oauth-refresh + key: client_secret + - name: GOOGLE_REFRESH_TOKEN + valueFrom: + secretKeyRef: + name: nodecore-oauth-refresh + key: refresh_token + command: + - python + - -c + - | + import base64 + import json + import os + import ssl + import urllib.parse + import urllib.request + + client_id = os.environ.get("GOOGLE_CLIENT_ID") + client_secret = os.environ.get("GOOGLE_CLIENT_SECRET") + refresh_token = os.environ.get("GOOGLE_REFRESH_TOKEN") + + if not client_id or not client_secret or not refresh_token: + raise SystemExit("Missing GOOGLE_CLIENT_ID/GOOGLE_CLIENT_SECRET/GOOGLE_REFRESH_TOKEN in Secret erpc/nodecore-oauth-refresh") + + token_url = "https://oauth2.googleapis.com/token" + body = urllib.parse.urlencode({ + "client_id": client_id, + "client_secret": client_secret, + "refresh_token": refresh_token, + "grant_type": "refresh_token", + }).encode("utf-8") + + req = urllib.request.Request( + token_url, + data=body, + method="POST", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + + with urllib.request.urlopen(req, timeout=20) as resp: + payload = json.loads(resp.read().decode("utf-8")) + + id_token = payload.get("id_token") + if not id_token: + raise SystemExit(f"Google token endpoint response missing id_token: {payload}") + + token_b64 = base64.b64encode(id_token.encode("utf-8")).decode("utf-8") + + namespace = "erpc" + secret_name = "obol-oauth-token" + api_server = "https://kubernetes.default.svc" + + sa_token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" + sa_ca_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + + with open(sa_token_path, "r", encoding="utf-8") as f: + sa_token = f.read().strip() + + patch = json.dumps({"data": {"token": token_b64}}).encode("utf-8") + patch_url = f"{api_server}/api/v1/namespaces/{namespace}/secrets/{secret_name}" + + ctx = ssl.create_default_context(cafile=sa_ca_path) + patch_req = urllib.request.Request( + patch_url, + data=patch, + method="PATCH", + headers={ + "Authorization": f"Bearer {sa_token}", + "Content-Type": "application/merge-patch+json", + "Accept": "application/json", + }, + ) + + with urllib.request.urlopen(patch_req, timeout=20, context=ctx) as resp: + if resp.status < 200 or resp.status >= 300: + raise SystemExit(f"Failed to patch Secret {namespace}/{secret_name}: HTTP {resp.status} {resp.read().decode('utf-8')}") + + print("Updated Secret erpc/obol-oauth-token") diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index 310ff46..e3ce9a3 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -1,6 +1,7 @@ # Helmfile for Obol Stack default infrastructure # Orchestrates core infrastructure components deployed with every stack # Uses Traefik with Gateway API for routing (replaces nginx-ingress) +{{- $publicDomain := env "STACK_PUBLIC_DOMAIN" | default "obol.stack" -}} repositories: - name: traefik @@ -13,6 +14,8 @@ repositories: url: https://ethpandaops.github.io/ethereum-helm-charts - name: bedag url: https://bedag.github.io/helm-charts/ + - name: stakater + url: https://stakater.github.io/stakater-charts # Single source of truth: change this to switch networks values: @@ -126,6 +129,13 @@ releases: needs: - traefik/traefik + # Stakater Reloader (restarts workloads on Secret/ConfigMap change) + - name: reloader + namespace: reloader + createNamespace: true + chart: stakater/reloader + version: 2.2.7 + # eRPC - name: erpc namespace: erpc @@ -157,7 +167,7 @@ releases: namespace: traefik sectionName: web hostnames: - - obol.stack + - "{{ $publicDomain }}" rules: - matches: - path: @@ -199,7 +209,7 @@ releases: namespace: traefik sectionName: web hostnames: - - obol.stack + - "{{ $publicDomain }}" rules: - matches: - path: diff --git a/internal/embed/infrastructure/values/erpc.yaml.gotmpl b/internal/embed/infrastructure/values/erpc.yaml.gotmpl index 6799332..b7c07f8 100644 --- a/internal/embed/infrastructure/values/erpc.yaml.gotmpl +++ b/internal/embed/infrastructure/values/erpc.yaml.gotmpl @@ -1,4 +1,5 @@ {{- $network := .Values.network -}} +{{- $publicDomain := env "STACK_PUBLIC_DOMAIN" | default "obol.stack" -}} {{- $chainId := 1 -}} {{/* Default: mainnet */}} {{- if eq $network "hoodi" -}} {{- $chainId = 560048 -}} @@ -48,6 +49,14 @@ config: |- projects: - id: rpc + upstreams: + - id: nodecore + endpoint: https://rpc.nodecore.io + evm: + chainId: {{ $chainId }} + jsonRpc: + headers: + X-Nodecore-Token: "${OBOL_OAUTH_TOKEN}" networks: - architecture: evm evm: @@ -79,7 +88,11 @@ config: |- maxAge: 3600 # Secret env variables -secretEnv: {} +secretEnv: + OBOL_OAUTH_TOKEN: + secretKeyRef: + name: obol-oauth-token + key: token # Extra args for the erpc container extraArgs: [] @@ -101,7 +114,8 @@ affinity: {} imagePullSecrets: [] # Annotations for the Deployment -annotations: {} +annotations: + secret.reloader.stakater.com/reload: "obol-oauth-token" # Liveness probe livenessProbe: @@ -126,7 +140,8 @@ nodeSelector: {} podLabels: {} # Pod annotations -podAnnotations: {} +podAnnotations: + secret.reloader.stakater.com/reload: "obol-oauth-token" # Pod management policy podManagementPolicy: OrderedReady diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index 92aab95..b3c0d56 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -1,15 +1,29 @@ {{- $network := .Values.network -}} +{{- $publicDomain := env "STACK_PUBLIC_DOMAIN" | default "obol.stack" -}} replicaCount: 1 +serviceAccount: + name: obol-frontend + image: environment: - name: NEXT_PUBLIC_HELIOS_CLIENT_URL value: "http://helios-{{ $network }}.helios.svc.cluster.local:8545" - name: NEXT_PUBLIC_ERPC_URL - value: "http://erpc.default.svc.cluster.local:4000/rpc" + value: "{{ printf \"https://%s/rpc\" $publicDomain }}" - name: NEXT_PUBLIC_AZTEC_SEQUENCER_URL value: "http://l2-sequencer-node-mainnet-node.aztec.svc.cluster.local:8080" + - name: BETTER_AUTH_SECRET + value: "{{ env \"BETTER_AUTH_SECRET\" }}" + - name: BETTER_AUTH_URL + value: "{{ printf \"https://%s\" $publicDomain }}" + - name: OBOL_GOOGLE_CLIENT_ID + value: "{{ env \"OBOL_GOOGLE_CLIENT_ID\" }}" + - name: OBOL_GOOGLE_CLIENT_SECRET + value: "{{ env \"OBOL_GOOGLE_CLIENT_SECRET\" }}" + - name: OBOL_AUTH_DB_PATH + value: "/data/auth.sqlite" repository: obolnetwork/obol-stack-front-end pullPolicy: Always @@ -19,6 +33,17 @@ service: type: ClusterIP port: 3000 +podSecurityContext: + fsGroup: 1001 + +volumes: + - name: auth-db + emptyDir: {} + +volumeMounts: + - name: auth-db + mountPath: /data + # Disable legacy Ingress - using Gateway API HTTPRoute instead ingress: enabled: false From 09356aa167a7aa05d2de62136b98318eda1ca203 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 3 Feb 2026 14:21:04 +0100 Subject: [PATCH 07/42] feat(llm): add ollama cloud + llmspy foundation --- .../infrastructure/base/templates/llm.yaml | 266 ++++++++++++++++++ .../base/templates/obol-agent.yaml | 20 +- .../values/obol-frontend.yaml.gotmpl | 10 + plans/okr1-llmspy-integration.md | 263 +++++++++++++++++ 4 files changed, 558 insertions(+), 1 deletion(-) create mode 100644 internal/embed/infrastructure/base/templates/llm.yaml create mode 100644 plans/okr1-llmspy-integration.md diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml new file mode 100644 index 0000000..4ad413a --- /dev/null +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -0,0 +1,266 @@ +--- +# LLM foundation services (OKR-1) +# +# This deploys: +# - Ollama (as the upstream LLM runtime) +# - llms.py (LLMSpy) as an OpenAI-compatible gateway / router over providers +# +# Design notes: +# - We default to Ollama Cloud (`glm-4.7:cloud`) to avoid requiring local GPU/VRAM. +# - We persist Ollama's identity keypair at `/root/.ollama/id_ed25519` so the +# Ollama Cloud "connect" binding survives pod restarts/upgrades. +# - Model cache is kept on `emptyDir` (ephemeral) per product decision. +apiVersion: v1 +kind: Namespace +metadata: + name: llm + +--- +# Persist Ollama identity (Ollama Cloud connect uses the public key derived from this keypair). +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ollama-home + namespace: llm +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 256Mi + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: llm + labels: + app: ollama +spec: + replicas: 1 + # Ollama uses a ReadWriteOnce PVC; avoid surging a second pod during updates. + strategy: + type: Recreate + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + containers: + - name: ollama + image: ollama/ollama:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 11434 + protocol: TCP + env: + # Store model blobs (including any cloud model stubs/cache) in an ephemeral volume. + - name: OLLAMA_MODELS + value: /models + # Explicitly bind the HTTP API to all interfaces in-cluster. + - name: OLLAMA_HOST + value: 0.0.0.0:11434 + volumeMounts: + # Persist identity + config (e.g. ~/.ollama/id_ed25519) for Ollama Cloud connect. + - name: ollama-home + mountPath: /root/.ollama + - name: ollama-models + mountPath: /models + readinessProbe: + httpGet: + path: /api/version + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + livenessProbe: + httpGet: + path: /api/version + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 2 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 2000m + memory: 4Gi + volumes: + - name: ollama-home + persistentVolumeClaim: + claimName: ollama-home + - name: ollama-models + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: llm + labels: + app: ollama +spec: + type: ClusterIP + selector: + app: ollama + ports: + - name: http + port: 11434 + targetPort: http + protocol: TCP + +--- +# llms.py configuration for Obol Stack: +# - Only enable the Ollama provider +# - Default model is `glm-4.7:cloud` (cloud-first) +apiVersion: v1 +kind: ConfigMap +metadata: + name: llmspy-config + namespace: llm +data: + llms.json: | + { + "defaults": { + "headers": { + "Content-Type": "application/json" + }, + "text": { + "model": "glm-4.7:cloud", + "messages": [ + { "role": "user", "content": "" } + ] + } + }, + "providers": { + "ollama": { + "enabled": true, + "type": "OllamaProvider", + "base_url": "http://ollama.llm.svc.cluster.local:11434", + "models": {}, + "all_models": true + } + } + } + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llmspy + namespace: llm + labels: + app: llmspy +spec: + replicas: 1 + selector: + matchLabels: + app: llmspy + template: + metadata: + labels: + app: llmspy + spec: + initContainers: + # Seed ~/.llms/llms.json from the ConfigMap. llms.py also writes runtime + # state (e.g. analytics) under ~/.llms, so we keep the directory writable. + - name: seed-config + image: busybox:1.36.1 + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + set -eu + mkdir -p /data + cp /config/llms.json /data/llms.json + volumeMounts: + - name: llmspy-config + mountPath: /config + readOnly: true + - name: llmspy-home + mountPath: /data + containers: + - name: llmspy + # NOTE: We install `llms.py` at runtime to avoid coupling Obol Stack to a + # specific upstream container image. If/when llmspy publishes an official + # image, we can switch to it for faster cold starts. + image: python:3.12-slim + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 8000 + protocol: TCP + # llms.py uses `~/.llms/llms.json` by default; in the container, that's + # /home/llms/.llms/llms.json (from upstream docker docs). + command: + - sh + - -c + - | + set -eu + python -m pip install --no-cache-dir --upgrade pip + python -m pip install --no-cache-dir llms + llms --config /home/llms/.llms/llms.json --serve 8000 + env: + # Avoid surprises if the image changes its default HOME. + - name: HOME + value: /home/llms + volumeMounts: + - name: llmspy-home + mountPath: /home/llms/.llms + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 2 + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 1000m + memory: 1Gi + volumes: + - name: llmspy-config + configMap: + name: llmspy-config + items: + - key: llms.json + path: llms.json + - name: llmspy-home + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: llmspy + namespace: llm + labels: + app: llmspy +spec: + type: ClusterIP + selector: + app: llmspy + ports: + - name: http + port: 8000 + targetPort: http + protocol: TCP diff --git a/internal/embed/infrastructure/base/templates/obol-agent.yaml b/internal/embed/infrastructure/base/templates/obol-agent.yaml index f73dda7..7220dbf 100644 --- a/internal/embed/infrastructure/base/templates/obol-agent.yaml +++ b/internal/embed/infrastructure/base/templates/obol-agent.yaml @@ -139,6 +139,24 @@ spec: - name: PUBLIC_MODE value: "false" + # OKR-1: Default LLM backend via llms.py + Ollama Cloud + # + # The Obol Stack agent is provider-agnostic: + # - `llms.py` (LLMSpy) exposes an OpenAI-compatible API at /v1 + # - LLMSpy forwards to Ollama (in-cluster), which can run `*:cloud` models + # + # Important: Ollama Cloud requires a one-time "connect" of the pod identity + # (public key derived from /root/.ollama/id_ed25519). We persist that key + # in the `llm/ollama-home` PVC so upgrades/restarts don't require re-connect. + - name: LLM_BACKEND + value: "llmspy" + - name: LLM_MODEL + value: "glm-4.7:cloud" + - name: OPENAI_API_BASE + value: "http://llmspy.llm.svc.cluster.local:8000/v1" + - name: OPENAI_API_KEY + value: "ollama" + # Health checks ensure the pod is ready to receive traffic livenessProbe: httpGet: @@ -179,4 +197,4 @@ spec: protocol: TCP name: http selector: - app: obol-agent # Routes traffic to pods with this label \ No newline at end of file + app: obol-agent # Routes traffic to pods with this label diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index b3c0d56..caff157 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -25,6 +25,16 @@ image: - name: OBOL_AUTH_DB_PATH value: "/data/auth.sqlite" + # Obol Agent (ADK) in-cluster URL for CopilotKit runtime + - name: ADK_AGENT_URL + value: "http://obol-agent.agent.svc.cluster.local:8000/" + - name: NEXT_PUBLIC_ADK_AGENT_URL + value: "http://obol-agent.agent.svc.cluster.local:8000/" + + # Ollama in-cluster URL (used by dashboard to surface Ollama Cloud connect URL) + - name: OLLAMA_URL + value: "http://ollama.llm.svc.cluster.local:11434" + repository: obolnetwork/obol-stack-front-end pullPolicy: Always tag: "latest" diff --git a/plans/okr1-llmspy-integration.md b/plans/okr1-llmspy-integration.md new file mode 100644 index 0000000..250378d --- /dev/null +++ b/plans/okr1-llmspy-integration.md @@ -0,0 +1,263 @@ +# OKR-1 Integration Plan: LLMSpy (`llms.py`) for Keyless, Multi-Provider LLM Access + +Date: 2026-02-03 + +## Goal (Objective 1) +Make Obol Stack the easiest way to spin up and use an on-chain AI agent. + +**Key Results** +1. Median time from install to first successful agent query ≤ **10 minutes** +2. Agent setup requires ≤ **5 user actions** (**no manual API key copy/paste in default flow**) +3. **100 Monthly Active Returning Users (MAUs)** interacting with the agent at least once per month +4. ≥ **60% of new Stack installs** complete agent setup successfully + +## Scope of this integration +Integrate **LLMSpy (`llms.py`)** as an **in-cluster OpenAI-compatible LLM gateway** that can route requests to: +- **Local LLMs** (default path to satisfy “no API key”) +- **Remote providers** (optional, later; keys or OAuth-derived tokens) + +This enables Obol Agent (ADK/FastAPI) to become **provider-agnostic**, while keeping the Dashboard UX simple. + +## Non-goals (for this iteration) +- Building a hosted “Obol-managed” LLM key/service (would change threat model/cost structure) +- Exposing LLMSpy publicly by default (we keep it internal unless explicitly enabled) +- Replacing ADK/AG-UI or refactoring the agent’s tool system +- Adding x402 payment to LLM calls (future candidate; not required for LLMSpy integration) + +--- + +## Current state (baseline) +### User experience bottleneck +- `obol agent init` currently requires a **manually created Google AI Studio API key** (copy/paste) before the agent works. +- Dashboard agent sidebar shows “Initialize your Obol Agent by running `obol agent init`…” when the agent is unavailable. + +### System architecture (today) +``` +Browser + -> Dashboard (Next.js, Better Auth) + -> POST /api/copilotkit (server route) + -> HttpAgent -> obol-agent (FastAPI / Google ADK) + -> Gemini via GOOGLE_API_KEY (direct) +``` + +--- + +## Proposed target architecture (with LLMSpy + Ollama; cloud-first) + +### Runtime request flow (agent query) +``` +Browser (signed-in) + -> Dashboard (Next.js) + -> /api/copilotkit (server; auth-gated) + -> obol-agent (FastAPI/ADK, AG-UI) + -> LiteLLM client (OpenAI-compatible) + -> LLMSpy (llms.py) [cluster-internal service] + -> Provider A: Local (Ollama) [no keys, default] + -> Provider B+: Remote (optional; keys/OAuth later) +``` + +### Deployment topology (Kubernetes) +Namespaces: +- `agent` + - `obol-agent` Deployment (existing) +- `llm` (new) + - **`llmspy`** (`llms.py`) Deployment + ClusterIP Service + - **`ollama`** Deployment + ClusterIP Service (default provider) + - Optional model warmup Job (`ollama pull `) + +Storage: +- Ollama runtime + model cache uses `emptyDir` (ephemeral). +- **Ollama Cloud auth key**: + - Minimum viable: also `emptyDir` (user reconnects after pod restart). + - Recommended: mount a small PVC or Secret-backed volume for `/root/.ollama/id_ed25519` so reconnect isn’t needed after upgrades/restarts. + +--- + +## UX: “≤5 actions” and “≤10 minutes” target + +### Default flow (no API keys) +**Default provider:** Ollama (in-cluster) via LLMSpy, using **Ollama Cloud models** (e.g. `glm-4.7:cloud`). + +Target action count: +1. Install Obol Stack CLI (existing flow) +2. `obol stack init` (if required by current UX) +3. `obol stack up` +4. Open Dashboard URL and sign in +5. Send first message in agent sidebar + +Notes: +- Remove the **mandatory** `obol agent init` step from the default path. +- Replace the “paste an API key” step with an **Ollama Cloud connect** step: + - If Ollama isn’t signed in, show a “Connect Ollama Cloud” action in the dashboard. + - Clicking it surfaces the `https://ollama.com/connect?...` URL returned by the Ollama API and guides the user through login. + +### Time-to-first-query tactics +- Default to a **cloud model** to avoid GPU/VRAM constraints: + - `glm-4.7:cloud` is explicitly supported as a cloud model in Ollama. +- Add a lightweight warmup/prefetch mechanism: + - Post-install Job: `ollama pull glm-4.7:cloud` (downloads the stub/metadata so first chat is faster) + - Readiness gate: “ready” once Ollama is connected and the model is pullable +- Ensure agent readiness checks are reliable and fast: + - Keep `/api/copilotkit/health` public (already required) + - Add `llmspy` and `ollama` readiness checks and surface status in the UI + +--- + +## Configuration model + +### LLMSpy +LLMSpy is configured by `~/.llms/llms.json` (in-container: `/home/llms/.llms/llms.json`). + +We will manage this in-cluster using: +- ConfigMap for `llms.json` +- Volume mount to `/home/llms/.llms` (likely `emptyDir`; no secrets required for Ollama) + +Key config points (concrete based on llms.py docs): +- Only one enabled provider: `ollama` +- `providers.ollama.type = "OllamaProvider"` +- `providers.ollama.base_url = "http://ollama.llm.svc.cluster.local:11434"` +- `providers.ollama.all_models = true` (or restrict to `glm-4.7:cloud`) +- `defaults.text.model = "glm-4.7:cloud"` + +### Obol Agent +Make the agent model/backend configurable: +- `LLM_BACKEND`: + - `gemini` (existing path, requires `GOOGLE_API_KEY`) + - `llmspy` (new default path) +- `LLM_MODEL` (default to the cloud model) +- `OPENAI_API_BASE` set to `http://llmspy.llm.svc.cluster.local:/v1` +- `OPENAI_API_KEY` set to a dummy value (LiteLLM/OpenAI provider compatibility) + +NOTE: With `llmspy` as backend, the agent sends OpenAI-style requests to LLMSpy and LLMSpy forwards to Ollama. + +## Default model choice +Use `glm-4.7:cloud` by default to maximize quality and avoid local GPU requirements. + +This keeps the “no manual API key copy/paste” OKR achievable because Ollama supports a browser-based connect flow (user signs in; Ollama authenticates subsequent cloud requests). + +## OpenClaw tie-in (validation + reuse) +We can validate “tool-calling robustness” of the chosen Ollama model in two ways: + +1) **Direct OpenClaw + Ollama** (matches Ollama’s built-in `openclaw` integration) + - OpenClaw already supports an Ollama provider using the OpenAI-compatible `/v1` API. + - Ollama’s own code includes an integration that edits `~/.openclaw/openclaw.json` to point at Ollama and set `agents.defaults.model.primary`. + +2) **OpenClaw + LLMSpy (preferred for consistency)** + - Configure OpenClaw’s “OpenAI” provider baseUrl to LLMSpy (`http://llmspy.llm.svc.cluster.local:/v1`) + - This ensures OpenClaw and Obol Agent exercise the same gateway path. + +We should treat OpenClaw as: +- A **validation harness** for model/tool behavior (pre-flight testing + regression checks) +- Potential future **multi-channel UX** (WhatsApp/Telegram/etc) once dashboard MVP is stable + +### Obol Stack CLI changes (user-facing) +Reframe `obol agent init` into a provider configuration command: +- Default: **no command needed** +- Optional: `obol agent configure --provider <...>` or `obol agent set-llm --provider <...>` + - Writes K8s secrets/configmaps and triggers rollout restart of `obol-agent` and/or `llmspy` + +--- + +## Security & exposure +- Dashboard remains protected by Better Auth (Google now; GitHub later). +- `/rpc/*` remains public/unprotected (x402 responsibility). +- `/api/copilotkit/health` remains public for monitoring. +- **LLMSpy and Ollama remain cluster-internal by default**: + - No HTTPRoute for them + - ClusterIP only + - (Optional later) expose behind dashboard auth for debugging + +Threat model considerations: +- Ensure LLMSpy cannot be used as an open relay from the internet. +- Ensure remote provider keys (if configured) never get logged or surfaced in UI. + +--- + +## Observability + OKR measurement plan + +### Metrics we can measure in-product (self-hosted) +- `agent_query_success_total` / `agent_query_error_total` +- `agent_query_latency_seconds` histogram +- `agent_first_success_timestamp` (per install) – used for “time to first query” +- `agent_provider_backend` label (gemini vs llmspy; local vs remote) + +### MAU / “install success rate” (cross-install aggregation) +This requires centralized telemetry. Options: +- Opt-in telemetry to an Obol endpoint (privacy-preserving, hashed install id) +- Or a “bring your own analytics” integration (PostHog/Amplitude) + +Proposed approach for this OKR: +- Add **opt-in** telemetry flag at install time +- Emit minimal events: + - `stack_install_completed` + - `agent_ready` + - `agent_first_query_success` + - `agent_returning_user_monthly` (count only) + +--- + +## Implementation workstreams (by repo) + +### 1) `obol-stack` (installer + infra) +- Add `llmspy` Deployment/Service manifest under `internal/embed/infrastructure/base/templates/` +- Add `ollama` Deployment/Service (or allow external Ollama endpoint) +- Add “model warmup” Job (optional but recommended for ≤10 min) +- Add values/env wiring to configure: + - LLMSpy port, config map, and secret mounts + - Obol Agent env vars (`LLM_BACKEND`, `LLM_MODEL`, `OPENAI_API_BASE`, etc.) +- Update CLI: + - Make `obol agent init` optional or replace with `obol agent configure` + - Provide a keyless default; ensure docs and errors reflect new flow +- Update README (agent quickstart + troubleshooting) + +### 2) `obol-agent` (runtime changes) +- Read `LLM_MODEL` from env (remove hard-coded model) +- Add `LLM_BACKEND` switch: + - `gemini` (current) + - `llmspy` using ADK’s `LiteLlm` wrapper + OpenAI-compatible base URL +- Add health diagnostics: + - Include provider status in `/health` (e.g., “llm backend reachable”) +- Add unit/integration tests: + - Mock LLMSpy OpenAI endpoint + - Verify tool calling works with chosen default local model + +### 3) `obol-stack-front-end` (onboarding UX) +- Replace “run `obol agent init`” message with: + - “Agent is initializing” / “Model downloading” (with helpful tips) + - A “Retry health check” action + - A link to agent setup docs for optional remote providers +- Add an “Agent Setup” panel: + - Shows current backend (local/remote) + - Shows readiness status (agent/llmspy/ollama) + +### 4) `helm-charts` (if needed) +- Only if we decide to migrate these new services into charts instead of raw manifests. +- Otherwise, keep in `base/templates/` for speed. + +--- + +## Milestones + +### Milestone A — “Keyless Agent Works Locally” +Acceptance: +- Fresh install: no API keys required +- Agent responds from dashboard +- Median time to first response ≤ 10 min in test environment + +### Milestone B — “Provider Choice” +Acceptance: +- Optional remote providers via secrets/config (still no copy/paste required in default) +- Failover behavior works (local first, remote fallback if configured) + +### Milestone C — “OKR Instrumentation” +Acceptance: +- Prometheus metrics available +- Optional telemetry pipeline documented and implemented (if approved) + +--- + +## Open questions (needs product decision) +1. Do we persist `/root/.ollama/id_ed25519` so the Ollama Cloud connection survives pod restarts/upgrades? +2. Do we want to expose a “Connect Ollama Cloud” UX in the dashboard (recommended) or require a CLI step? +3. Telemetry: opt-in vs opt-out; where is the endpoint; privacy guarantees. +4. Do we expose LLMSpy UI behind auth for debugging, or keep it internal-only? From 5328fc6b38317662a818e21e2e7f88d1b9ee3bd0 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 3 Feb 2026 15:04:18 +0100 Subject: [PATCH 08/42] docs: note llmspy + ollama cloud default --- notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notes.md b/notes.md index 025b7ef..6550e6a 100644 --- a/notes.md +++ b/notes.md @@ -6,7 +6,7 @@ - obol agent - skeleton out the cmd - this should have a dummy manifest which templates a config map secret - - obol agent init, gets the secret from google account + - OKR-1: default LLM flow is llms.py -> Ollama Cloud (no API key copy/paste) - frontend (default) - erpc, helios (default) From 9e4b885cd7dbd670c0536aea6a3741d34dbafcdb Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 3 Feb 2026 17:19:10 +0100 Subject: [PATCH 09/42] chore(llm): use official llmspy image and tcp probes --- .../infrastructure/base/templates/llm.yaml | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 4ad413a..4f367c9 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -190,24 +190,19 @@ spec: mountPath: /data containers: - name: llmspy - # NOTE: We install `llms.py` at runtime to avoid coupling Obol Stack to a - # specific upstream container image. If/when llmspy publishes an official - # image, we can switch to it for faster cold starts. - image: python:3.12-slim + # Official LLMSpy container image (published by upstream). + # Pin a specific version for reproducibility. + image: ghcr.io/servicestack/llms:v2.0.30 imagePullPolicy: IfNotPresent ports: - name: http containerPort: 8000 protocol: TCP - # llms.py uses `~/.llms/llms.json` by default; in the container, that's - # /home/llms/.llms/llms.json (from upstream docker docs). command: - sh - -c - | set -eu - python -m pip install --no-cache-dir --upgrade pip - python -m pip install --no-cache-dir llms llms --config /home/llms/.llms/llms.json --serve 8000 env: # Avoid surprises if the image changes its default HOME. @@ -217,15 +212,13 @@ spec: - name: llmspy-home mountPath: /home/llms/.llms readinessProbe: - httpGet: - path: / + tcpSocket: port: http initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 2 livenessProbe: - httpGet: - path: / + tcpSocket: port: http initialDelaySeconds: 30 periodSeconds: 10 From 8e8767b79b7d8fdd362c99df6fe069e45a47afa7 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 3 Feb 2026 17:20:35 +0100 Subject: [PATCH 10/42] docs(okr1): note official llmspy image --- plans/okr1-llmspy-integration.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plans/okr1-llmspy-integration.md b/plans/okr1-llmspy-integration.md index 250378d..a6f1fc7 100644 --- a/plans/okr1-llmspy-integration.md +++ b/plans/okr1-llmspy-integration.md @@ -112,6 +112,10 @@ We will manage this in-cluster using: - ConfigMap for `llms.json` - Volume mount to `/home/llms/.llms` (likely `emptyDir`; no secrets required for Ollama) +Runtime: +- Prefer the upstream-published container image for reproducibility: + - `ghcr.io/servicestack/llms:v2.0.30` (pinned) + Key config points (concrete based on llms.py docs): - Only one enabled provider: `ollama` - `providers.ollama.type = "OllamaProvider"` From 9b98def7dbbaaee8ea3b33780af8ccb94220cc34 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 3 Feb 2026 17:21:40 +0100 Subject: [PATCH 11/42] fix(llm): run llmspy via llms entrypoint --- internal/embed/infrastructure/base/templates/llm.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 4f367c9..b9d00b4 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -199,11 +199,12 @@ spec: containerPort: 8000 protocol: TCP command: - - sh - - -c - - | - set -eu - llms --config /home/llms/.llms/llms.json --serve 8000 + - llms + args: + - --config + - /home/llms/.llms/llms.json + - --serve + - "8000" env: # Avoid surprises if the image changes its default HOME. - name: HOME From 8798d0712e8dc5740af83dbe4e9e20c552dc5a99 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 3 Feb 2026 17:31:24 +0100 Subject: [PATCH 12/42] fix(llm): use http probes for llmspy --- internal/embed/infrastructure/base/templates/llm.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index b9d00b4..5633866 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -213,13 +213,15 @@ spec: - name: llmspy-home mountPath: /home/llms/.llms readinessProbe: - tcpSocket: + httpGet: + path: / port: http initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 2 livenessProbe: - tcpSocket: + httpGet: + path: / port: http initialDelaySeconds: 30 periodSeconds: 10 From 37ed2414f9722929c258ba4a0dfb53830fc21f29 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Wed, 4 Feb 2026 20:39:39 +0400 Subject: [PATCH 13/42] feat: persist Cloudflare Tunnel hostname via login + loosen Gateway hostnames --- cmd/obol/main.go | 57 ++++- .../cloudflared/templates/deployment.yaml | 78 +++++- .../infrastructure/cloudflared/values.yaml | 20 ++ internal/embed/infrastructure/helmfile.yaml | 4 - internal/stack/stack.go | 34 ++- internal/tunnel/cloudflare.go | 224 ++++++++++++++++++ internal/tunnel/login.go | 213 +++++++++++++++++ internal/tunnel/provision.go | 206 ++++++++++++++++ internal/tunnel/stackid.go | 20 ++ internal/tunnel/state.go | 62 +++++ internal/tunnel/tunnel.go | 87 ++++--- internal/tunnel/tunnel_test.go | 37 +++ 12 files changed, 1001 insertions(+), 41 deletions(-) create mode 100644 internal/embed/infrastructure/cloudflared/values.yaml create mode 100644 internal/tunnel/cloudflare.go create mode 100644 internal/tunnel/login.go create mode 100644 internal/tunnel/provision.go create mode 100644 internal/tunnel/stackid.go create mode 100644 internal/tunnel/state.go create mode 100644 internal/tunnel/tunnel_test.go diff --git a/cmd/obol/main.go b/cmd/obol/main.go index 69f92c5..4292964 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -176,9 +176,64 @@ GLOBAL OPTIONS: return tunnel.Status(cfg) }, }, + { + Name: "login", + Usage: "Authenticate via browser and create a locally-managed tunnel (no API token)", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "hostname", + Aliases: []string{"H"}, + Usage: "Public hostname to route (e.g. stack.example.com)", + Required: true, + }, + }, + Action: func(c *cli.Context) error { + return tunnel.Login(cfg, tunnel.LoginOptions{ + Hostname: c.String("hostname"), + }) + }, + }, + { + Name: "provision", + Usage: "Provision a persistent (DNS-routed) Cloudflare Tunnel", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "hostname", + Aliases: []string{"H"}, + Usage: "Public hostname to route (e.g. stack.example.com)", + Required: true, + }, + &cli.StringFlag{ + Name: "account-id", + Aliases: []string{"a"}, + Usage: "Cloudflare account ID (or set CLOUDFLARE_ACCOUNT_ID)", + EnvVars: []string{"CLOUDFLARE_ACCOUNT_ID"}, + }, + &cli.StringFlag{ + Name: "zone-id", + Aliases: []string{"z"}, + Usage: "Cloudflare zone ID for the hostname (or set CLOUDFLARE_ZONE_ID)", + EnvVars: []string{"CLOUDFLARE_ZONE_ID"}, + }, + &cli.StringFlag{ + Name: "api-token", + Aliases: []string{"t"}, + Usage: "Cloudflare API token (or set CLOUDFLARE_API_TOKEN)", + EnvVars: []string{"CLOUDFLARE_API_TOKEN"}, + }, + }, + Action: func(c *cli.Context) error { + return tunnel.Provision(cfg, tunnel.ProvisionOptions{ + Hostname: c.String("hostname"), + AccountID: c.String("account-id"), + ZoneID: c.String("zone-id"), + APIToken: c.String("api-token"), + }) + }, + }, { Name: "restart", - Usage: "Restart the tunnel to get a new URL", + Usage: "Restart the tunnel connector (quick tunnels get a new URL)", Action: func(c *cli.Context) error { return tunnel.Restart(cfg) }, diff --git a/internal/embed/infrastructure/cloudflared/templates/deployment.yaml b/internal/embed/infrastructure/cloudflared/templates/deployment.yaml index 212556d..c4e0b77 100644 --- a/internal/embed/infrastructure/cloudflared/templates/deployment.yaml +++ b/internal/embed/infrastructure/cloudflared/templates/deployment.yaml @@ -1,3 +1,33 @@ +{{- $mode := default "auto" .Values.mode -}} +{{- $remoteSecretName := default "cloudflared-tunnel-token" .Values.remoteManaged.tokenSecretName -}} +{{- $remoteSecretKey := default "TUNNEL_TOKEN" .Values.remoteManaged.tokenSecretKey -}} +{{- $localSecretName := default "cloudflared-local-credentials" .Values.localManaged.secretName -}} +{{- $localConfigMapName := default "cloudflared-local-config" .Values.localManaged.configMapName -}} +{{- $localTunnelIDKey := default "tunnel_id" .Values.localManaged.tunnelIDKey -}} + +{{- $useLocal := false -}} +{{- if eq $mode "local" -}} +{{- $useLocal = true -}} +{{- else if eq $mode "auto" -}} +{{- $ls := lookup "v1" "Secret" .Release.Namespace $localSecretName -}} +{{- $cm := lookup "v1" "ConfigMap" .Release.Namespace $localConfigMapName -}} +{{- if and $ls $cm -}} +{{- $useLocal = true -}} +{{- end -}} +{{- end -}} + +{{- $useRemote := false -}} +{{- if not $useLocal -}} +{{- if eq $mode "remote" -}} +{{- $useRemote = true -}} +{{- else if eq $mode "auto" -}} +{{- $rs := lookup "v1" "Secret" .Release.Namespace $remoteSecretName -}} +{{- if $rs -}} +{{- $useRemote = true -}} +{{- end -}} +{{- end -}} +{{- end -}} + apiVersion: apps/v1 kind: Deployment metadata: @@ -17,14 +47,46 @@ spec: spec: containers: - name: cloudflared - image: cloudflare/cloudflared:2024.12.2 + image: {{ printf "%s:%s" .Values.image.repository .Values.image.tag | quote }} args: - tunnel - --no-autoupdate - --metrics - - 0.0.0.0:2000 + - {{ .Values.metrics.address | quote }} + {{ if $useLocal }} + - --origincert + - /etc/cloudflared/cert.pem + - --config + - /etc/cloudflared/config.yml + - run + - "$(TUNNEL_ID)" + {{ else if $useRemote }} + - run + - --token + - "$(TUNNEL_TOKEN)" + {{ else }} - --url - - http://traefik.traefik.svc.cluster.local:80 + - {{ .Values.quickTunnel.url | quote }} + {{ end }} + {{ if $useLocal }} + env: + - name: TUNNEL_ID + valueFrom: + configMapKeyRef: + name: {{ $localConfigMapName | quote }} + key: {{ $localTunnelIDKey | quote }} + volumeMounts: + - name: cloudflared-local + mountPath: /etc/cloudflared + readOnly: true + {{ else if $useRemote }} + env: + - name: TUNNEL_TOKEN + valueFrom: + secretKeyRef: + name: {{ $remoteSecretName | quote }} + key: {{ $remoteSecretKey | quote }} + {{ end }} ports: - name: metrics containerPort: 2000 @@ -41,4 +103,14 @@ spec: limits: cpu: 100m memory: 128Mi + {{ if $useLocal }} + volumes: + - name: cloudflared-local + projected: + sources: + - secret: + name: {{ $localSecretName | quote }} + - configMap: + name: {{ $localConfigMapName | quote }} + {{ end }} restartPolicy: Always diff --git a/internal/embed/infrastructure/cloudflared/values.yaml b/internal/embed/infrastructure/cloudflared/values.yaml new file mode 100644 index 0000000..1faddbc --- /dev/null +++ b/internal/embed/infrastructure/cloudflared/values.yaml @@ -0,0 +1,20 @@ +mode: auto + +image: + repository: cloudflare/cloudflared + tag: "2024.12.2" + +metrics: + address: "0.0.0.0:2000" + +quickTunnel: + url: "http://traefik.traefik.svc.cluster.local:80" + +remoteManaged: + tokenSecretName: "cloudflared-tunnel-token" + tokenSecretKey: "TUNNEL_TOKEN" + +localManaged: + secretName: "cloudflared-local-credentials" + configMapName: "cloudflared-local-config" + tunnelIDKey: "tunnel_id" diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index 310ff46..781845f 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -156,8 +156,6 @@ releases: - name: traefik-gateway namespace: traefik sectionName: web - hostnames: - - obol.stack rules: - matches: - path: @@ -198,8 +196,6 @@ releases: - name: traefik-gateway namespace: traefik sectionName: web - hostnames: - - obol.stack rules: - matches: - path: diff --git a/internal/stack/stack.go b/internal/stack/stack.go index c8366f6..8aa4872 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -321,9 +321,21 @@ func syncDefaults(cfg *config.Config, kubeconfigPath string) error { // Sync defaults using helmfile (handles Helm hooks properly) defaultsHelmfilePath := filepath.Join(cfg.ConfigDir, "defaults") + helmfilePath := filepath.Join(defaultsHelmfilePath, "helmfile.yaml") + + // Compatibility migration: older defaults pinned HTTPRoutes to `obol.stack` via + // `spec.hostnames`. This breaks public access for: + // - quick tunnels (random *.trycloudflare.com host) + // - user-provided DNS hostnames (e.g. agent.example.com) + // Removing hostnames makes routes match all hostnames while preserving existing + // path-based routing. + if err := migrateDefaultsHTTPRouteHostnames(helmfilePath); err != nil { + fmt.Printf("Warning: failed to migrate defaults helmfile hostnames: %v\n", err) + } + helmfileCmd := exec.Command( filepath.Join(cfg.BinDir, "helmfile"), - "--file", filepath.Join(defaultsHelmfilePath, "helmfile.yaml"), + "--file", helmfilePath, "--kubeconfig", kubeconfigPath, "sync", ) @@ -342,3 +354,23 @@ func syncDefaults(cfg *config.Config, kubeconfigPath string) error { fmt.Println("Default infrastructure deployed") return nil } + +func migrateDefaultsHTTPRouteHostnames(helmfilePath string) error { + data, err := os.ReadFile(helmfilePath) + if err != nil { + return err + } + + // Only removes the legacy default single-hostname block; if users customized their + // helmfile with different hostnames, we leave it alone. + needle := " hostnames:\n - obol.stack\n" + s := string(data) + if !strings.Contains(s, needle) { + return nil + } + updated := strings.ReplaceAll(s, needle, "") + if updated == s { + return nil + } + return os.WriteFile(helmfilePath, []byte(updated), 0644) +} diff --git a/internal/tunnel/cloudflare.go b/internal/tunnel/cloudflare.go new file mode 100644 index 0000000..669e880 --- /dev/null +++ b/internal/tunnel/cloudflare.go @@ -0,0 +1,224 @@ +package tunnel + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" +) + +type cloudflareTunnel struct { + ID string `json:"id"` + Token string `json:"token"` +} + +type cloudflareClient struct { + apiToken string +} + +func newCloudflareClient(apiToken string) *cloudflareClient { + return &cloudflareClient{apiToken: apiToken} +} + +func (c *cloudflareClient) CreateTunnel(accountID, tunnelName string) (*cloudflareTunnel, error) { + reqBody := map[string]any{ + "name": tunnelName, + "config_src": "cloudflare", + } + + var resp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + Result struct { + ID string `json:"id"` + Token string `json:"token"` + } `json:"result"` + } + + if err := c.doJSON("POST", fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/cfd_tunnel", accountID), reqBody, &resp); err != nil { + return nil, err + } + if !resp.Success { + return nil, fmt.Errorf("cloudflare tunnel create failed: %v", resp.Errors) + } + return &cloudflareTunnel{ID: resp.Result.ID, Token: resp.Result.Token}, nil +} + +func (c *cloudflareClient) GetTunnelToken(accountID, tunnelID string) (string, error) { + var resp struct { + Success bool `json:"success"` + Errors []any `json:"errors"` + Result string `json:"result"` + } + + if err := c.doJSON("GET", fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/cfd_tunnel/%s/token", accountID, tunnelID), nil, &resp); err != nil { + return "", err + } + if !resp.Success || resp.Result == "" { + return "", fmt.Errorf("cloudflare tunnel token fetch failed") + } + return resp.Result, nil +} + +func (c *cloudflareClient) UpdateTunnelConfiguration(accountID, tunnelID, hostname, serviceURL string) error { + reqBody := map[string]any{ + "config": map[string]any{ + "ingress": []map[string]any{ + { + "hostname": hostname, + "service": serviceURL, + "originRequest": map[string]any{}, + }, + { + "service": "http_status:404", + }, + }, + }, + } + + var resp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + } + + url := fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/cfd_tunnel/%s/configurations", accountID, tunnelID) + if err := c.doJSON("PUT", url, reqBody, &resp); err != nil { + return err + } + if !resp.Success { + return fmt.Errorf("cloudflare tunnel configuration update failed: %v", resp.Errors) + } + return nil +} + +type dnsRecord struct { + ID string `json:"id"` + Type string `json:"type"` + Name string `json:"name"` + Content string `json:"content"` + Proxied bool `json:"proxied"` +} + +func (c *cloudflareClient) UpsertTunnelDNSRecord(zoneID, hostname, content string) error { + // Find existing records for this exact name/type. + listURL := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records?type=CNAME&name=%s", zoneID, url.QueryEscape(hostname)) + var listResp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + Result []dnsRecord `json:"result"` + } + if err := c.doJSON("GET", listURL, nil, &listResp); err != nil { + return err + } + if !listResp.Success { + return fmt.Errorf("cloudflare dns record list failed: %v", listResp.Errors) + } + + if len(listResp.Result) > 0 { + // Update first matching record. + recID := listResp.Result[0].ID + updateURL := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records/%s", zoneID, recID) + reqBody := map[string]any{ + "type": "CNAME", + "proxied": true, + "name": hostname, + "content": content, + } + + var updResp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + } + if err := c.doJSON("PUT", updateURL, reqBody, &updResp); err != nil { + return err + } + if !updResp.Success { + return fmt.Errorf("cloudflare dns record update failed: %v", updResp.Errors) + } + return nil + } + + // Create new record. + createURL := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records", zoneID) + reqBody := map[string]any{ + "type": "CNAME", + "proxied": true, + "name": hostname, + "content": content, + } + + var createResp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + } + + if err := c.doJSON("POST", createURL, reqBody, &createResp); err != nil { + return err + } + if !createResp.Success { + return fmt.Errorf("cloudflare dns record create failed: %v", createResp.Errors) + } + return nil +} + +func (c *cloudflareClient) doJSON(method, url string, reqBody any, out any) error { + var body []byte + var err error + if reqBody != nil { + body, err = json.Marshal(reqBody) + if err != nil { + return err + } + } + + req, err := http.NewRequest(method, url, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+c.apiToken) + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + // Best effort: surface body for debugging without leaking secrets. + return fmt.Errorf("cloudflare api error (%s): %s", resp.Status, strings.TrimSpace(string(respBody))) + } + + if out == nil { + return nil + } + if err := json.Unmarshal(respBody, out); err != nil { + return err + } + return nil +} diff --git a/internal/tunnel/login.go b/internal/tunnel/login.go new file mode 100644 index 0000000..de79882 --- /dev/null +++ b/internal/tunnel/login.go @@ -0,0 +1,213 @@ +package tunnel + +import ( + "bytes" + "encoding/base64" + "fmt" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +type LoginOptions struct { + Hostname string +} + +// Login provisions a locally-managed tunnel using `cloudflared tunnel login` (browser auth), +// then writes the required credentials/config into Kubernetes and upgrades the cloudflared +// Helm release so the in-cluster connector runs the locally-managed tunnel. +// +// Docs: +// - Create a locally-managed tunnel: https://developers.cloudflare.com/cloudflare-one/networks/connectors/cloudflare-tunnel/do-more-with-tunnels/local-management/create-local-tunnel/ +// - Configuration file for published apps: https://developers.cloudflare.com/cloudflare-one/networks/connectors/cloudflare-tunnel/do-more-with-tunnels/local-management/configuration-file/ +// - `origincert` run parameter (locally-managed tunnels): https://developers.cloudflare.com/cloudflare-one/networks/connectors/cloudflare-tunnel/configure-tunnels/cloudflared-parameters/run-parameters/ +func Login(cfg *config.Config, opts LoginOptions) error { + hostname := normalizeHostname(opts.Hostname) + if hostname == "" { + return fmt.Errorf("--hostname is required (e.g. stack.example.com)") + } + + // Stack must be running so we can write secrets/config to the cluster. + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + stackID := getStackID(cfg) + if stackID == "" { + return fmt.Errorf("stack not initialized, run 'obol stack init' first") + } + tunnelName := fmt.Sprintf("obol-stack-%s", stackID) + + cloudflaredPath, err := exec.LookPath("cloudflared") + if err != nil { + return fmt.Errorf("cloudflared not found in PATH. Install it first (e.g. 'brew install cloudflared' on macOS)") + } + + fmt.Println("Authenticating cloudflared (browser)...") + loginCmd := exec.Command(cloudflaredPath, "tunnel", "login") + loginCmd.Stdin = os.Stdin + loginCmd.Stdout = os.Stdout + loginCmd.Stderr = os.Stderr + if err := loginCmd.Run(); err != nil { + return fmt.Errorf("cloudflared tunnel login failed: %w", err) + } + + fmt.Printf("\nCreating tunnel: %s\n", tunnelName) + if out, err := exec.Command(cloudflaredPath, "tunnel", "create", tunnelName).CombinedOutput(); err != nil { + // "Already exists" is common if user re-runs. We'll recover by querying tunnel info. + fmt.Printf("cloudflared tunnel create returned an error (continuing): %s\n", strings.TrimSpace(string(out))) + } + + infoOut, err := exec.Command(cloudflaredPath, "tunnel", "info", tunnelName).CombinedOutput() + if err != nil { + return fmt.Errorf("cloudflared tunnel info failed: %w\n%s", err, strings.TrimSpace(string(infoOut))) + } + tunnelID, err := parseFirstUUID(string(infoOut)) + if err != nil { + return fmt.Errorf("could not parse tunnel UUID from cloudflared tunnel info:\n%s", strings.TrimSpace(string(infoOut))) + } + + cloudflaredDir := defaultCloudflaredDir() + certPath := filepath.Join(cloudflaredDir, "cert.pem") + credPath := filepath.Join(cloudflaredDir, tunnelID+".json") + + cert, err := os.ReadFile(certPath) + if err != nil { + return fmt.Errorf("failed to read %s: %w", certPath, err) + } + cred, err := os.ReadFile(credPath) + if err != nil { + return fmt.Errorf("failed to read %s: %w", credPath, err) + } + + fmt.Printf("\nCreating DNS route for %s...\n", hostname) + routeOut, err := exec.Command(cloudflaredPath, "tunnel", "route", "dns", tunnelName, hostname).CombinedOutput() + if err != nil { + return fmt.Errorf("cloudflared tunnel route dns failed: %w\n%s", err, strings.TrimSpace(string(routeOut))) + } + + if err := applyLocalManagedK8sResources(cfg, kubeconfigPath, hostname, tunnelID, cert, cred); err != nil { + return err + } + + // Re-render the chart so it flips from quick tunnel to locally-managed. + if err := helmUpgradeCloudflared(cfg, kubeconfigPath); err != nil { + return err + } + + st, _ := loadTunnelState(cfg) + if st == nil { + st = &tunnelState{} + } + st.Mode = "dns" + st.Hostname = hostname + st.TunnelID = tunnelID + st.TunnelName = tunnelName + if err := saveTunnelState(cfg, st); err != nil { + return fmt.Errorf("tunnel created, but failed to save local state: %w", err) + } + + fmt.Println("\n✓ Tunnel login complete") + fmt.Printf("Persistent URL: https://%s\n", hostname) + fmt.Println("Tip: run 'obol tunnel status' to verify the connector is active.") + return nil +} + +func defaultCloudflaredDir() string { + home, err := os.UserHomeDir() + if err != nil { + return ".cloudflared" + } + return filepath.Join(home, ".cloudflared") +} + +func parseFirstUUID(s string) (string, error) { + re := regexp.MustCompile(`[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}`) + if m := re.FindString(strings.ToLower(s)); m != "" { + return m, nil + } + return "", fmt.Errorf("uuid not found") +} + +func applyLocalManagedK8sResources(cfg *config.Config, kubeconfigPath, hostname, tunnelID string, certPEM, credJSON []byte) error { + // Secret: account certificate + tunnel credentials (locally-managed tunnel requires origincert). + secretYAML, err := buildLocalManagedSecretYAML(hostname, certPEM, credJSON) + if err != nil { + return err + } + if err := kubectlApply(cfg, kubeconfigPath, secretYAML); err != nil { + return err + } + + // ConfigMap: config.yml + tunnel_id used for command arg expansion. + cfgYAML := buildLocalManagedConfigYAML(hostname, tunnelID) + if err := kubectlApply(cfg, kubeconfigPath, cfgYAML); err != nil { + return err + } + + return nil +} + +const ( + localManagedSecretName = "cloudflared-local-credentials" + localManagedConfigMapName = "cloudflared-local-config" +) + +func buildLocalManagedSecretYAML(hostname string, certPEM, credJSON []byte) ([]byte, error) { + certB64 := base64.StdEncoding.EncodeToString(certPEM) + credB64 := base64.StdEncoding.EncodeToString(credJSON) + + secret := fmt.Sprintf(`apiVersion: v1 +kind: Secret +metadata: + name: %s + namespace: %s +type: Opaque +data: + cert.pem: %s + credentials.json: %s +`, localManagedSecretName, tunnelNamespace, certB64, credB64) + _ = hostname // reserved for future labels/annotations + return []byte(secret), nil +} + +func buildLocalManagedConfigYAML(hostname, tunnelID string) []byte { + cfg := fmt.Sprintf(`apiVersion: v1 +kind: ConfigMap +metadata: + name: %s + namespace: %s +data: + tunnel_id: %s + config.yml: | + tunnel: %s + credentials-file: /etc/cloudflared/credentials.json + + ingress: + - hostname: %s + service: http://traefik.traefik.svc.cluster.local:80 + - service: http_status:404 +`, localManagedConfigMapName, tunnelNamespace, tunnelID, tunnelID, hostname) + return []byte(cfg) +} + +func kubectlApply(cfg *config.Config, kubeconfigPath string, manifest []byte) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "apply", "-f", "-", + ) + cmd.Stdin = bytes.NewReader(manifest) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("kubectl apply failed: %w", err) + } + return nil +} diff --git a/internal/tunnel/provision.go b/internal/tunnel/provision.go new file mode 100644 index 0000000..b4c592a --- /dev/null +++ b/internal/tunnel/provision.go @@ -0,0 +1,206 @@ +package tunnel + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +// ProvisionOptions configures `obol tunnel provision`. +type ProvisionOptions struct { + Hostname string + AccountID string + ZoneID string + APIToken string +} + +// Provision provisions a persistent Cloudflare Tunnel routed via a proxied DNS record. +// +// Based on Cloudflare's "Create a tunnel (API)" guide: +// - POST /accounts/$ACCOUNT_ID/cfd_tunnel +// - PUT /accounts/$ACCOUNT_ID/cfd_tunnel/$TUNNEL_ID/configurations +// - POST /zones/$ZONE_ID/dns_records (proxied CNAME to .cfargotunnel.com) +func Provision(cfg *config.Config, opts ProvisionOptions) error { + hostname := normalizeHostname(opts.Hostname) + if hostname == "" { + return fmt.Errorf("--hostname is required (e.g. stack.example.com)") + } + if opts.AccountID == "" { + return fmt.Errorf("--account-id is required (or set CLOUDFLARE_ACCOUNT_ID)") + } + if opts.ZoneID == "" { + return fmt.Errorf("--zone-id is required (or set CLOUDFLARE_ZONE_ID)") + } + if opts.APIToken == "" { + return fmt.Errorf("--api-token is required (or set CLOUDFLARE_API_TOKEN)") + } + + // Stack must be running so we can store the tunnel token in-cluster. + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + stackID := getStackID(cfg) + if stackID == "" { + return fmt.Errorf("stack not initialized, run 'obol stack init' first") + } + tunnelName := fmt.Sprintf("obol-stack-%s", stackID) + + client := newCloudflareClient(opts.APIToken) + + // Try to reuse existing local state to keep the same tunnel ID. + st, _ := loadTunnelState(cfg) + if st != nil && st.AccountID == opts.AccountID && st.ZoneID == opts.ZoneID && st.TunnelID != "" && st.TunnelName != "" { + tunnelName = st.TunnelName + } + + fmt.Println("Provisioning Cloudflare Tunnel (API)...") + fmt.Printf("Hostname: %s\n", hostname) + fmt.Printf("Tunnel: %s\n", tunnelName) + + tunnelID := "" + tunnelToken := "" + + if st != nil && st.AccountID == opts.AccountID && st.TunnelID != "" { + tunnelID = st.TunnelID + tok, err := client.GetTunnelToken(opts.AccountID, tunnelID) + if err != nil { + // If the tunnel no longer exists, create a new one. + fmt.Printf("Existing tunnel token fetch failed (%v); creating a new tunnel...\n", err) + tunnelID = "" + } else { + tunnelToken = tok + } + } + + if tunnelID == "" { + t, err := client.CreateTunnel(opts.AccountID, tunnelName) + if err != nil { + return err + } + tunnelID = t.ID + tunnelToken = t.Token + } + + if err := client.UpdateTunnelConfiguration(opts.AccountID, tunnelID, hostname, "http://traefik.traefik.svc.cluster.local:80"); err != nil { + return err + } + + if err := client.UpsertTunnelDNSRecord(opts.ZoneID, hostname, tunnelID+".cfargotunnel.com"); err != nil { + return err + } + + if err := applyTunnelTokenSecret(cfg, kubeconfigPath, tunnelToken); err != nil { + return err + } + + // Ensure cloudflared switches to remotely-managed mode immediately (chart defaults to mode:auto). + if err := helmUpgradeCloudflared(cfg, kubeconfigPath); err != nil { + return err + } + + if st == nil { + st = &tunnelState{} + } + st.Mode = "dns" + st.Hostname = hostname + st.AccountID = opts.AccountID + st.ZoneID = opts.ZoneID + st.TunnelID = tunnelID + st.TunnelName = tunnelName + + if err := saveTunnelState(cfg, st); err != nil { + return fmt.Errorf("tunnel provisioned, but failed to save local state: %w", err) + } + + fmt.Println("\n✓ Tunnel provisioned") + fmt.Printf("Persistent URL: https://%s\n", hostname) + fmt.Println("Tip: run 'obol tunnel status' to verify the connector is active.") + return nil +} + +func normalizeHostname(s string) string { + s = strings.TrimSpace(s) + s = strings.TrimSuffix(s, "/") + s = strings.TrimPrefix(s, "https://") + s = strings.TrimPrefix(s, "http://") + + // Strip any path/query fragments users accidentally paste. + if idx := strings.IndexByte(s, '/'); idx >= 0 { + s = s[:idx] + } + if idx := strings.IndexByte(s, '?'); idx >= 0 { + s = s[:idx] + } + if idx := strings.IndexByte(s, '#'); idx >= 0 { + s = s[:idx] + } + + return strings.ToLower(s) +} + +func applyTunnelTokenSecret(cfg *config.Config, kubeconfigPath, token string) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + + createCmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "-n", tunnelNamespace, + "create", "secret", "generic", tunnelTokenSecretName, + fmt.Sprintf("--from-literal=%s=%s", tunnelTokenSecretKey, token), + "--dry-run=client", + "-o", "yaml", + ) + out, err := createCmd.Output() + if err != nil { + return fmt.Errorf("failed to create secret manifest: %w", err) + } + + applyCmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "apply", "-f", "-", + ) + applyCmd.Stdin = bytes.NewReader(out) + applyCmd.Stdout = os.Stdout + applyCmd.Stderr = os.Stderr + if err := applyCmd.Run(); err != nil { + return fmt.Errorf("failed to apply tunnel token secret: %w", err) + } + return nil +} + +func helmUpgradeCloudflared(cfg *config.Config, kubeconfigPath string) error { + helmPath := filepath.Join(cfg.BinDir, "helm") + defaultsDir := filepath.Join(cfg.ConfigDir, "defaults") + + if _, err := os.Stat(helmPath); os.IsNotExist(err) { + return fmt.Errorf("helm not found at %s", helmPath) + } + if _, err := os.Stat(filepath.Join(defaultsDir, "cloudflared", "Chart.yaml")); os.IsNotExist(err) { + return fmt.Errorf("cloudflared chart not found in %s (re-run 'obol stack init --force' to refresh defaults)", defaultsDir) + } + + // Run from the defaults dir so "./cloudflared" resolves correctly. + cmd := exec.Command(helmPath, + "--kubeconfig", kubeconfigPath, + "upgrade", + "--install", + "cloudflared", + "./cloudflared", + "--namespace", tunnelNamespace, + "--wait", + "--timeout", "2m", + ) + cmd.Dir = defaultsDir + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to upgrade cloudflared release: %w", err) + } + return nil +} diff --git a/internal/tunnel/stackid.go b/internal/tunnel/stackid.go new file mode 100644 index 0000000..4115638 --- /dev/null +++ b/internal/tunnel/stackid.go @@ -0,0 +1,20 @@ +package tunnel + +import ( + "os" + "path/filepath" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +const stackIDFile = ".stack-id" + +func getStackID(cfg *config.Config) string { + data, err := os.ReadFile(filepath.Join(cfg.ConfigDir, stackIDFile)) + if err != nil { + return "" + } + return strings.TrimSpace(string(data)) +} + diff --git a/internal/tunnel/state.go b/internal/tunnel/state.go new file mode 100644 index 0000000..f7b026d --- /dev/null +++ b/internal/tunnel/state.go @@ -0,0 +1,62 @@ +package tunnel + +import ( + "encoding/json" + "os" + "path/filepath" + "time" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +type tunnelState struct { + Mode string `json:"mode"` // "quick" or "dns" + Hostname string `json:"hostname"` + AccountID string `json:"account_id,omitempty"` + ZoneID string `json:"zone_id,omitempty"` + TunnelID string `json:"tunnel_id,omitempty"` + TunnelName string `json:"tunnel_name,omitempty"` + UpdatedAt time.Time `json:"updated_at"` +} + +func tunnelStatePath(cfg *config.Config) string { + return filepath.Join(cfg.ConfigDir, "tunnel", "cloudflared.json") +} + +func loadTunnelState(cfg *config.Config) (*tunnelState, error) { + data, err := os.ReadFile(tunnelStatePath(cfg)) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + var st tunnelState + if err := json.Unmarshal(data, &st); err != nil { + return nil, err + } + return &st, nil +} + +func saveTunnelState(cfg *config.Config, st *tunnelState) error { + if err := os.MkdirAll(filepath.Dir(tunnelStatePath(cfg)), 0755); err != nil { + return err + } + st.UpdatedAt = time.Now().UTC() + + data, err := json.MarshalIndent(st, "", " ") + if err != nil { + return err + } + + // Contains non-secret metadata only, but keep it user-private by default. + return os.WriteFile(tunnelStatePath(cfg), data, 0600) +} + +func tunnelModeAndURL(st *tunnelState) (mode, url string) { + if st != nil && st.Hostname != "" { + return "dns", "https://" + st.Hostname + } + return "quick", "" +} diff --git a/internal/tunnel/tunnel.go b/internal/tunnel/tunnel.go index 355e9ea..1ad3f23 100644 --- a/internal/tunnel/tunnel.go +++ b/internal/tunnel/tunnel.go @@ -13,46 +13,62 @@ import ( ) const ( - tunnelNamespace = "traefik" + tunnelNamespace = "traefik" tunnelLabelSelector = "app.kubernetes.io/name=cloudflared" + + // cloudflared-tunnel-token is created by `obol tunnel provision`. + tunnelTokenSecretName = "cloudflared-tunnel-token" + tunnelTokenSecretKey = "TUNNEL_TOKEN" ) -// Status displays the current tunnel status and URL +// Status displays the current tunnel status and URL. func Status(cfg *config.Config) error { kubectlPath := filepath.Join(cfg.BinDir, "kubectl") kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") - // Check if kubeconfig exists + // Check if kubeconfig exists. if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { return fmt.Errorf("stack not running, use 'obol stack up' first") } - // Check pod status first + st, _ := loadTunnelState(cfg) + + // Check pod status first. podStatus, err := getPodStatus(kubectlPath, kubeconfigPath) if err != nil { - printStatusBox("quick", "not deployed", "", time.Now()) + mode, url := tunnelModeAndURL(st) + printStatusBox(mode, "not deployed", url, time.Now()) fmt.Println("\nTroubleshooting:") fmt.Println(" - Start the stack: obol stack up") return nil } - // Try to get tunnel URL from logs - url, err := GetTunnelURL(cfg) - if err != nil { - printStatusBox("quick", podStatus, "(not available)", time.Now()) - fmt.Println("\nTroubleshooting:") - fmt.Println(" - Check logs: obol tunnel logs") - fmt.Println(" - Restart tunnel: obol tunnel restart") - return nil + statusLabel := podStatus + if podStatus == "running" { + statusLabel = "active" + } + + mode, url := tunnelModeAndURL(st) + if mode == "quick" { + // Quick tunnels only: try to get URL from logs. + u, err := GetTunnelURL(cfg) + if err != nil { + printStatusBox(mode, podStatus, "(not available)", time.Now()) + fmt.Println("\nTroubleshooting:") + fmt.Println(" - Check logs: obol tunnel logs") + fmt.Println(" - Restart tunnel: obol tunnel restart") + return nil + } + url = u } - printStatusBox("quick", "active", url, time.Now()) + printStatusBox(mode, statusLabel, url, time.Now()) fmt.Printf("\nTest with: curl %s/\n", url) return nil } -// GetTunnelURL parses cloudflared logs to extract the quick tunnel URL +// GetTunnelURL parses cloudflared logs to extract the quick tunnel URL. func GetTunnelURL(cfg *config.Config) (string, error) { kubectlPath := filepath.Join(cfg.BinDir, "kubectl") kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") @@ -69,27 +85,25 @@ func GetTunnelURL(cfg *config.Config) (string, error) { return "", fmt.Errorf("failed to get tunnel logs: %w", err) } - // Parse URL from logs (quick tunnel uses cfargotunnel.com) - re := regexp.MustCompile(`https://[a-z0-9-]+\.cfargotunnel\.com`) - matches := re.FindString(string(output)) - if matches == "" { - // Also try trycloudflare.com as fallback - re = regexp.MustCompile(`https://[a-z0-9-]+\.trycloudflare\.com`) - matches = re.FindString(string(output)) + if url, ok := parseQuickTunnelURL(string(output)); ok { + return url, nil } - if matches == "" { - return "", fmt.Errorf("tunnel URL not found in logs") + + // Back-compat: allow cfargotunnel.com to be detected too. + re := regexp.MustCompile(`https://[a-z0-9-]+\.cfargotunnel\.com`) + if url := re.FindString(string(output)); url != "" { + return url, nil } - return matches, nil + return "", fmt.Errorf("tunnel URL not found in logs") } -// Restart restarts the cloudflared deployment to get a new tunnel URL +// Restart restarts the cloudflared deployment. func Restart(cfg *config.Config) error { kubectlPath := filepath.Join(cfg.BinDir, "kubectl") kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") - // Check if kubeconfig exists + // Check if kubeconfig exists. if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { return fmt.Errorf("stack not running, use 'obol stack up' first") } @@ -109,17 +123,17 @@ func Restart(cfg *config.Config) error { } fmt.Println("\nTunnel restarting...") - fmt.Println("Run 'obol tunnel status' to see the new URL once ready (may take 10-30 seconds).") + fmt.Println("Run 'obol tunnel status' to see the URL once ready (may take 10-30 seconds).") return nil } -// Logs displays cloudflared logs +// Logs displays cloudflared logs. func Logs(cfg *config.Config, follow bool) error { kubectlPath := filepath.Join(cfg.BinDir, "kubectl") kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") - // Check if kubeconfig exists + // Check if kubeconfig exists. if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { return fmt.Errorf("stack not running, use 'obol stack up' first") } @@ -142,7 +156,7 @@ func Logs(cfg *config.Config, follow bool) error { return cmd.Run() } -// getPodStatus returns the status of the cloudflared pod +// getPodStatus returns the status of the cloudflared pod. func getPodStatus(kubectlPath, kubeconfigPath string) (string, error) { cmd := exec.Command(kubectlPath, "--kubeconfig", kubeconfigPath, @@ -164,7 +178,7 @@ func getPodStatus(kubectlPath, kubeconfigPath string) (string, error) { return strings.ToLower(status), nil } -// printStatusBox prints a formatted status box +// printStatusBox prints a formatted status box. func printStatusBox(mode, status, url string, lastUpdated time.Time) { fmt.Println() fmt.Println("Cloudflare Tunnel Status") @@ -175,3 +189,12 @@ func printStatusBox(mode, status, url string, lastUpdated time.Time) { fmt.Printf("Last Updated: %s\n", lastUpdated.Format(time.RFC3339)) fmt.Println(strings.Repeat("─", 50)) } + +func parseQuickTunnelURL(logs string) (string, bool) { + // Quick tunnel logs print a random *.trycloudflare.com URL. + re := regexp.MustCompile(`https://[a-z0-9-]+\.trycloudflare\.com`) + if url := re.FindString(logs); url != "" { + return url, true + } + return "", false +} diff --git a/internal/tunnel/tunnel_test.go b/internal/tunnel/tunnel_test.go new file mode 100644 index 0000000..74f8f3e --- /dev/null +++ b/internal/tunnel/tunnel_test.go @@ -0,0 +1,37 @@ +package tunnel + +import "testing" + +func TestNormalizeHostname(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"stack.example.com", "stack.example.com"}, + {"https://stack.example.com", "stack.example.com"}, + {"http://stack.example.com/", "stack.example.com"}, + {"https://stack.example.com/foo?bar=baz#x", "stack.example.com"}, + {" stack.example.com ", "stack.example.com"}, + } + + for _, tt := range tests { + if got := normalizeHostname(tt.in); got != tt.want { + t.Fatalf("normalizeHostname(%q)=%q want %q", tt.in, got, tt.want) + } + } +} + +func TestParseQuickTunnelURL(t *testing.T) { + logs := ` +2026-01-14T12:00:00Z INF | Your quick tunnel URL is: | +2026-01-14T12:00:00Z INF | https://seasonal-deck-organisms-sf.trycloudflare.com | +` + + url, ok := parseQuickTunnelURL(logs) + if !ok { + t.Fatalf("expected ok=true") + } + if url != "https://seasonal-deck-organisms-sf.trycloudflare.com" { + t.Fatalf("unexpected url: %q", url) + } +} From 0de01e506f8375703460fd1a1465ad11484bff21 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Thu, 5 Feb 2026 09:33:45 +0400 Subject: [PATCH 14/42] chore: bump cloudflared to 2026.1.2 --- internal/embed/infrastructure/cloudflared/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/embed/infrastructure/cloudflared/values.yaml b/internal/embed/infrastructure/cloudflared/values.yaml index 1faddbc..58b3d8f 100644 --- a/internal/embed/infrastructure/cloudflared/values.yaml +++ b/internal/embed/infrastructure/cloudflared/values.yaml @@ -2,7 +2,7 @@ mode: auto image: repository: cloudflare/cloudflared - tag: "2024.12.2" + tag: "2026.1.2" metrics: address: "0.0.0.0:2000" From 1f71012c6ab00984496fc62e032074dccd898747 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 6 Feb 2026 15:37:44 +0400 Subject: [PATCH 15/42] feat(inference): add x402 pay-per-inference gateway (Phase 1) Introduce the inference marketplace foundation: an x402-enabled reverse proxy that wraps any OpenAI-compatible inference service with USDC micropayments via the x402 protocol. Components: - internal/inference/gateway.go: net/http reverse proxy with x402 middleware - cmd/inference-gateway/: standalone binary for containerisation - cmd/obol/inference.go: `obol inference serve` CLI command - internal/embed/networks/inference/: helmfile network template deploying Ollama + gateway + HTTPRoute (auto-discovered by existing CLI) - Dockerfile.inference-gateway: distroless multi-stage build Provider: obol network install inference --wallet-address 0x... --model llama3.2:3b Consumer: POST /v1/chat/completions with X-PAYMENT header (USDC on Base) --- Dockerfile.inference-gateway | 11 + cmd/inference-gateway/main.go | 67 ++++++ cmd/obol/inference.go | 114 ++++++++++ cmd/obol/main.go | 12 +- go.mod | 31 ++- go.sum | 147 +++++++++++- internal/embed/networks/inference/Chart.yaml | 5 + .../networks/inference/helmfile.yaml.gotmpl | 49 ++++ .../networks/inference/templates/gateway.yaml | 211 ++++++++++++++++++ .../networks/inference/values.yaml.gotmpl | 23 ++ internal/inference/gateway.go | 140 ++++++++++++ 11 files changed, 805 insertions(+), 5 deletions(-) create mode 100644 Dockerfile.inference-gateway create mode 100644 cmd/inference-gateway/main.go create mode 100644 cmd/obol/inference.go create mode 100644 internal/embed/networks/inference/Chart.yaml create mode 100644 internal/embed/networks/inference/helmfile.yaml.gotmpl create mode 100644 internal/embed/networks/inference/templates/gateway.yaml create mode 100644 internal/embed/networks/inference/values.yaml.gotmpl create mode 100644 internal/inference/gateway.go diff --git a/Dockerfile.inference-gateway b/Dockerfile.inference-gateway new file mode 100644 index 0000000..42164c1 --- /dev/null +++ b/Dockerfile.inference-gateway @@ -0,0 +1,11 @@ +FROM golang:1.25-alpine AS builder + +WORKDIR /build +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -o /inference-gateway ./cmd/inference-gateway + +FROM gcr.io/distroless/static-debian12:nonroot +COPY --from=builder /inference-gateway /inference-gateway +ENTRYPOINT ["/inference-gateway"] diff --git a/cmd/inference-gateway/main.go b/cmd/inference-gateway/main.go new file mode 100644 index 0000000..d9e3f6a --- /dev/null +++ b/cmd/inference-gateway/main.go @@ -0,0 +1,67 @@ +package main + +import ( + "flag" + "log" + "os" + "os/signal" + "syscall" + + "github.com/ObolNetwork/obol-stack/internal/inference" + "github.com/mark3labs/x402-go" +) + +func main() { + listen := flag.String("listen", ":8402", "Listen address") + upstream := flag.String("upstream", "http://ollama:11434", "Upstream inference service URL") + wallet := flag.String("wallet", "", "USDC recipient wallet address (required)") + price := flag.String("price", "0.001", "USDC price per request") + chain := flag.String("chain", "base-sepolia", "Blockchain network (base, base-sepolia)") + facilitator := flag.String("facilitator", "https://facilitator.x402.rs", "x402 facilitator URL") + flag.Parse() + + if *wallet == "" { + // Check environment variable + *wallet = os.Getenv("X402_WALLET") + if *wallet == "" { + log.Fatal("--wallet flag or X402_WALLET env var required") + } + } + + var x402Chain x402.ChainConfig + switch *chain { + case "base", "base-mainnet": + x402Chain = x402.BaseMainnet + case "base-sepolia": + x402Chain = x402.BaseSepolia + default: + log.Fatalf("unsupported chain: %s (use: base, base-sepolia)", *chain) + } + + gw, err := inference.NewGateway(inference.GatewayConfig{ + ListenAddr: *listen, + UpstreamURL: *upstream, + WalletAddress: *wallet, + PricePerRequest: *price, + Chain: x402Chain, + FacilitatorURL: *facilitator, + }) + if err != nil { + log.Fatalf("failed to create gateway: %v", err) + } + + // Handle graceful shutdown + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigCh + log.Println("shutting down...") + if err := gw.Stop(); err != nil { + log.Printf("shutdown error: %v", err) + } + }() + + if err := gw.Start(); err != nil { + log.Fatalf("gateway error: %v", err) + } +} diff --git a/cmd/obol/inference.go b/cmd/obol/inference.go new file mode 100644 index 0000000..59b2d06 --- /dev/null +++ b/cmd/obol/inference.go @@ -0,0 +1,114 @@ +package main + +import ( + "fmt" + "os" + "os/signal" + "syscall" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/inference" + "github.com/mark3labs/x402-go" + "github.com/urfave/cli/v2" +) + +// inferenceCommand returns the inference management command group +func inferenceCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "inference", + Usage: "Manage paid inference services (x402)", + Subcommands: []*cli.Command{ + { + Name: "serve", + Usage: "Start the x402 inference gateway (local process)", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "listen", + Aliases: []string{"l"}, + Usage: "Listen address for the gateway", + Value: ":8402", + }, + &cli.StringFlag{ + Name: "upstream", + Aliases: []string{"u"}, + Usage: "Upstream inference service URL", + Value: "http://localhost:11434", + }, + &cli.StringFlag{ + Name: "wallet", + Aliases: []string{"w"}, + Usage: "USDC recipient wallet address", + EnvVars: []string{"X402_WALLET"}, + Required: true, + }, + &cli.StringFlag{ + Name: "price", + Usage: "USDC price per inference request", + Value: "0.001", + }, + &cli.StringFlag{ + Name: "chain", + Usage: "Blockchain network for payments (base, base-sepolia)", + Value: "base-sepolia", + }, + &cli.StringFlag{ + Name: "facilitator", + Usage: "x402 facilitator service URL", + Value: "https://facilitator.x402.rs", + }, + }, + Action: func(c *cli.Context) error { + chain, err := resolveChain(c.String("chain")) + if err != nil { + return err + } + + gw, err := inference.NewGateway(inference.GatewayConfig{ + ListenAddr: c.String("listen"), + UpstreamURL: c.String("upstream"), + WalletAddress: c.String("wallet"), + PricePerRequest: c.String("price"), + Chain: chain, + FacilitatorURL: c.String("facilitator"), + }) + if err != nil { + return fmt.Errorf("failed to create gateway: %w", err) + } + + // Handle graceful shutdown + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigCh + fmt.Println("\nShutting down gateway...") + if err := gw.Stop(); err != nil { + fmt.Fprintf(os.Stderr, "shutdown error: %v\n", err) + } + }() + + return gw.Start() + }, + }, + }, + } +} + +// resolveChain maps a chain name string to an x402 ChainConfig. +func resolveChain(name string) (x402.ChainConfig, error) { + switch name { + case "base", "base-mainnet": + return x402.BaseMainnet, nil + case "base-sepolia": + return x402.BaseSepolia, nil + case "polygon", "polygon-mainnet": + return x402.PolygonMainnet, nil + case "polygon-amoy": + return x402.PolygonAmoy, nil + case "avalanche", "avalanche-mainnet": + return x402.AvalancheMainnet, nil + case "avalanche-fuji": + return x402.AvalancheFuji, nil + default: + return x402.ChainConfig{}, fmt.Errorf("unsupported chain: %s (use: base, base-sepolia, polygon, polygon-amoy, avalanche, avalanche-fuji)", name) + } +} diff --git a/cmd/obol/main.go b/cmd/obol/main.go index 4292964..b5175d6 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -52,6 +52,9 @@ COMMANDS: network install Install and deploy network to cluster network delete Remove network and clean up cluster resources + Inference (x402 Pay-Per-Request): + inference serve Start the x402 inference gateway + App Management: app install Install a Helm chart as an application app list List installed applications @@ -59,9 +62,11 @@ COMMANDS: app delete Remove application and cluster resources Tunnel Management: - tunnel status Show tunnel status and public URL - tunnel restart Restart tunnel to get a new URL - tunnel logs View cloudflared logs + tunnel status Show tunnel status and public URL + tunnel login Authenticate and create persistent tunnel (browser) + tunnel provision Provision persistent tunnel (API token) + tunnel restart Restart tunnel connector (quick tunnels get new URL) + tunnel logs View cloudflared logs Kubernetes Tools (with auto-configured KUBECONFIG): kubectl Run kubectl with stack kubeconfig (passthrough) @@ -425,6 +430,7 @@ GLOBAL OPTIONS: }, }, networkCommand(cfg), + inferenceCommand(cfg), { Name: "app", Usage: "Manage applications", diff --git a/go.mod b/go.mod index ac5aa02..f7424b9 100644 --- a/go.mod +++ b/go.mod @@ -1,15 +1,44 @@ module github.com/ObolNetwork/obol-stack -go 1.25 +go 1.25.1 require ( github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0 + github.com/mark3labs/x402-go v0.13.0 github.com/urfave/cli/v2 v2.27.7 gopkg.in/yaml.v3 v3.0.1 ) require ( + filippo.io/edwards25519 v1.1.0 // indirect + github.com/benbjohnson/clock v1.3.5 // indirect + github.com/blendle/zapdriver v1.3.1 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/fatih/color v1.18.0 // indirect + github.com/gagliardetto/binary v0.8.0 // indirect + github.com/gagliardetto/solana-go v1.14.0 // indirect + github.com/gagliardetto/treeout v0.1.4 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.18.1 // indirect + github.com/logrusorgru/aurora v2.0.3+incompatible // indirect + github.com/mattn/go-colorable v0.1.14 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mitchellh/go-testing-interface v1.14.1 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1 // indirect + github.com/mr-tron/base58 v1.2.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/streamingfast/logging v0.0.0-20250918142248-ac5a1e292845 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect + go.mongodb.org/mongo-driver v1.17.6 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/ratelimit v0.3.1 // indirect + go.uber.org/zap v1.27.0 // indirect + golang.org/x/crypto v0.43.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/term v0.36.0 // indirect + golang.org/x/time v0.14.0 // indirect ) diff --git a/go.sum b/go.sum index 5b3c61c..4fdd793 100644 --- a/go.sum +++ b/go.sum @@ -1,14 +1,159 @@ +filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= +github.com/AlekSi/pointer v1.1.0 h1:SSDMPcXD9jSl8FPy9cRzoRaMJtm9g9ggGTxecRUbQoI= +github.com/AlekSi/pointer v1.1.0/go.mod h1:y7BvfRI3wXPWKXEBhU71nbnIEEZX0QTSB2Bj48UJIZE= +github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/benbjohnson/clock v1.3.5 h1:VvXlSJBzZpA/zum6Sj74hxwYI2DIxRWuNIoXAzHZz5o= +github.com/benbjohnson/clock v1.3.5/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/blendle/zapdriver v1.3.1 h1:C3dydBOWYRiOk+B8X9IVZ5IOe+7cl+tGOexN4QqHfpE= +github.com/blendle/zapdriver v1.3.1/go.mod h1:mdXfREi6u5MArG4j9fewC+FGnXaBR+T4Ox4J2u4eHCc= github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0 h1:aYo8nnk3ojoQkP5iErif5Xxv0Mo0Ga/FR5+ffl/7+Nk= github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0/go.mod h1:8AuBTZBRSFqEYBPYULd+NN474/zZBLP+6WeT5S9xlAc= +github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= +github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= +github.com/gagliardetto/binary v0.8.0 h1:U9ahc45v9HW0d15LoN++vIXSJyqR/pWw8DDlhd7zvxg= +github.com/gagliardetto/binary v0.8.0/go.mod h1:2tfj51g5o9dnvsc+fL3Jxr22MuWzYXwx9wEoN0XQ7/c= +github.com/gagliardetto/gofuzz v1.2.2 h1:XL/8qDMzcgvR4+CyRQW9UGdwPRPMHVJfqQ/uMvSUuQw= +github.com/gagliardetto/gofuzz v1.2.2/go.mod h1:bkH/3hYLZrMLbfYWA0pWzXmi5TTRZnu4pMGZBkqMKvY= +github.com/gagliardetto/solana-go v1.14.0 h1:3WfAi70jOOjAJ0deFMjdhFYlLXATF4tOQXsDNWJtOLw= +github.com/gagliardetto/solana-go v1.14.0/go.mod h1:l/qqqIN6qJJPtxW/G1PF4JtcE3Zg2vD2EliZrr9Gn5k= +github.com/gagliardetto/treeout v0.1.4 h1:ozeYerrLCmCubo1TcIjFiOWTTGteOOHND1twdFpgwaw= +github.com/gagliardetto/treeout v0.1.4/go.mod h1:loUefvXTrlRG5rYmJmExNryyBRh8f89VZhmMOyCyqok= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= +github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8= +github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= +github.com/mark3labs/x402-go v0.13.0 h1:Ppm3GXZx2ZCLJM511mFYeMOw/605h9+M6UT630GdRG0= +github.com/mark3labs/x402-go v0.13.0/go.mod h1:srAvV9FosjBiqrclF15thrQbz0fVVfNXtMcqD0e1hKU= +github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= +github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU= +github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1 h1:mPMvm6X6tf4w8y7j9YIt6V9jfWhL6QlbEc7CCmeQlWk= +github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1/go.mod h1:ye2e/VUEtE2BHE+G/QcKkcLQVAEJoYRFj5VUOQatCRE= +github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= +github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/onsi/gomega v1.10.1 h1:o0+MgICZLuZ7xjH7Vx6zS/zcu93/BEp1VwkIW1mEXCE= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8= +github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/streamingfast/logging v0.0.0-20230608130331-f22c91403091/go.mod h1:VlduQ80JcGJSargkRU4Sg9Xo63wZD/l8A5NC/Uo1/uU= +github.com/streamingfast/logging v0.0.0-20250918142248-ac5a1e292845 h1:VMA0pZ3MI8BErRA3kh8dKJThP5d0Xh5vZVk5yFIgH/A= +github.com/streamingfast/logging v0.0.0-20250918142248-ac5a1e292845/go.mod h1:BtDq81Tyc7H8up5aXNi/I95nPmG3C0PLEqGWY/iWQ2E= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/test-go/testify v1.1.4 h1:Tf9lntrKUMHiXQ07qBScBTSA0dhYQlu83hswqelv1iE= +github.com/test-go/testify v1.1.4/go.mod h1:rH7cfJo/47vWGdi4GPj16x3/t1xGOj2YxzmNQzk2ghU= github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.mongodb.org/mongo-driver v1.17.6 h1:87JUG1wZfWsr6rIz3ZmpH90rL5tea7O3IHuSwHUpsss= +go.mongodb.org/mongo-driver v1.17.6/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/ratelimit v0.3.1 h1:K4qVE+byfv/B3tC+4nYWP7v/6SimcO7HzHekoMNBma0= +go.uber.org/ratelimit v0.3.1/go.mod h1:6euWsTB6U/Nb3X++xEUXA8ciPJvr19Q/0h1+oDcJhRk= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +go.uber.org/zap v1.21.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= +golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/embed/networks/inference/Chart.yaml b/internal/embed/networks/inference/Chart.yaml new file mode 100644 index 0000000..7859bbc --- /dev/null +++ b/internal/embed/networks/inference/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: inference-core +description: x402-enabled inference gateway with Ollama +type: application +version: 0.1.0 diff --git a/internal/embed/networks/inference/helmfile.yaml.gotmpl b/internal/embed/networks/inference/helmfile.yaml.gotmpl new file mode 100644 index 0000000..e9af653 --- /dev/null +++ b/internal/embed/networks/inference/helmfile.yaml.gotmpl @@ -0,0 +1,49 @@ +repositories: + - name: bedag + url: https://bedag.github.io/helm-charts/ + +releases: + # Core inference resources: Ollama, x402 gateway, Services, HTTPRoute + - name: inference-core + namespace: inference-{{ .Values.id }} + createNamespace: true + chart: . + values: + - id: '{{ .Values.id }}' + model: '{{ .Values.model }}' + pricePerRequest: '{{ .Values.pricePerRequest }}' + walletAddress: '{{ .Values.walletAddress }}' + chain: '{{ .Values.chain }}' + gatewayPort: '{{ .Values.gatewayPort }}' + + # Metadata ConfigMap for frontend discovery + - name: inference-metadata + namespace: inference-{{ .Values.id }} + chart: bedag/raw + values: + - resources: + - apiVersion: v1 + kind: ConfigMap + metadata: + name: inference-{{ .Values.id }}-metadata + namespace: inference-{{ .Values.id }} + labels: + app.kubernetes.io/part-of: obol.stack + obol.stack/id: {{ .Values.id }} + obol.stack/app: inference + data: + metadata.json: | + { + "model": "{{ .Values.model }}", + "pricing": { + "pricePerRequest": "{{ .Values.pricePerRequest }}", + "currency": "USDC", + "chain": "{{ .Values.chain }}" + }, + "endpoints": { + "gateway": { + "external": "http://obol.stack/inference-{{ .Values.id }}/v1", + "internal": "http://inference-gateway.inference-{{ .Values.id }}.svc.cluster.local:{{ .Values.gatewayPort }}" + } + } + } diff --git a/internal/embed/networks/inference/templates/gateway.yaml b/internal/embed/networks/inference/templates/gateway.yaml new file mode 100644 index 0000000..7f4d0ea --- /dev/null +++ b/internal/embed/networks/inference/templates/gateway.yaml @@ -0,0 +1,211 @@ +{{- if eq .Release.Name "inference-core" }} +--- +# Ollama inference runtime +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: {{ .Release.Namespace }} + labels: + app: ollama + app.kubernetes.io/part-of: obol.stack +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + containers: + - name: ollama + image: ollama/ollama:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 11434 + protocol: TCP + env: + - name: OLLAMA_MODELS + value: /models + - name: OLLAMA_HOST + value: 0.0.0.0:11434 + volumeMounts: + - name: ollama-models + mountPath: /models + readinessProbe: + httpGet: + path: /api/version + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + livenessProbe: + httpGet: + path: /api/version + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 2 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 4000m + memory: 8Gi + volumes: + - name: ollama-models + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: {{ .Release.Namespace }} + labels: + app: ollama +spec: + type: ClusterIP + selector: + app: ollama + ports: + - name: http + port: 11434 + targetPort: http + protocol: TCP + +--- +# x402 inference gateway +apiVersion: v1 +kind: ConfigMap +metadata: + name: gateway-config + namespace: {{ .Release.Namespace }} +data: + UPSTREAM_URL: "http://ollama.{{ .Release.Namespace }}.svc.cluster.local:11434" + LISTEN_ADDR: ":{{ .Values.gatewayPort }}" + PRICE_PER_REQUEST: "{{ .Values.pricePerRequest }}" + WALLET_ADDRESS: "{{ .Values.walletAddress }}" + CHAIN: "{{ .Values.chain }}" + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: inference-gateway + namespace: {{ .Release.Namespace }} + labels: + app: inference-gateway + app.kubernetes.io/part-of: obol.stack +spec: + replicas: 1 + selector: + matchLabels: + app: inference-gateway + template: + metadata: + labels: + app: inference-gateway + spec: + containers: + - name: gateway + image: ghcr.io/obolnetwork/inference-gateway:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: {{ .Values.gatewayPort }} + protocol: TCP + args: + - --listen=:{{ .Values.gatewayPort }} + - --upstream=http://ollama.{{ .Release.Namespace }}.svc.cluster.local:11434 + - --wallet={{ .Values.walletAddress }} + - --price={{ .Values.pricePerRequest }} + - --chain={{ .Values.chain }} + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 500m + memory: 256Mi + +--- +apiVersion: v1 +kind: Service +metadata: + name: inference-gateway + namespace: {{ .Release.Namespace }} + labels: + app: inference-gateway +spec: + type: ClusterIP + selector: + app: inference-gateway + ports: + - name: http + port: {{ .Values.gatewayPort }} + targetPort: http + protocol: TCP + +--- +# HTTPRoute for external access via Traefik Gateway API +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: inference-gateway + namespace: {{ .Release.Namespace }} +spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack + rules: + - matches: + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/v1 + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: /v1 + backendRefs: + - name: inference-gateway + port: {{ .Values.gatewayPort }} + - matches: + - path: + type: Exact + value: /{{ .Release.Namespace }}/health + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: /health + backendRefs: + - name: inference-gateway + port: {{ .Values.gatewayPort }} +{{- end }} diff --git a/internal/embed/networks/inference/values.yaml.gotmpl b/internal/embed/networks/inference/values.yaml.gotmpl new file mode 100644 index 0000000..75f5ed6 --- /dev/null +++ b/internal/embed/networks/inference/values.yaml.gotmpl @@ -0,0 +1,23 @@ +# Configuration via CLI flags +# Template fields populated by obol CLI during network installation + +# @enum llama3.3:70b,llama3.2:3b,qwen2.5:72b,qwen2.5:7b,glm-4.7:cloud,deepseek-r1:7b,phi4:14b +# @default glm-4.7:cloud +# @description Ollama model to serve for inference +model: {{.Model}} + +# @default 0.001 +# @description USDC price per inference request +pricePerRequest: {{.PricePerRequest}} + +# @description USDC recipient wallet address (EVM) +walletAddress: {{.WalletAddress}} + +# @enum base,base-sepolia +# @default base-sepolia +# @description Blockchain network for x402 payments +chain: {{.Chain}} + +# @default 8402 +# @description Port for the x402 inference gateway +gatewayPort: {{.GatewayPort}} diff --git a/internal/inference/gateway.go b/internal/inference/gateway.go new file mode 100644 index 0000000..43379e5 --- /dev/null +++ b/internal/inference/gateway.go @@ -0,0 +1,140 @@ +package inference + +import ( + "context" + "fmt" + "log" + "net" + "net/http" + "net/http/httputil" + "net/url" + "time" + + "github.com/mark3labs/x402-go" + x402http "github.com/mark3labs/x402-go/http" +) + +// GatewayConfig holds configuration for the x402 inference gateway. +type GatewayConfig struct { + // ListenAddr is the address to listen on (e.g., ":8402"). + ListenAddr string + + // UpstreamURL is the upstream inference service URL (e.g., "http://localhost:11434"). + UpstreamURL string + + // WalletAddress is the USDC recipient address for payments. + WalletAddress string + + // PricePerRequest is the USDC amount charged per inference request (e.g., "0.001"). + PricePerRequest string + + // Chain is the x402 chain configuration (e.g., x402.BaseMainnet). + Chain x402.ChainConfig + + // FacilitatorURL is the x402 facilitator service URL. + FacilitatorURL string +} + +// Gateway is an x402-enabled reverse proxy for LLM inference. +type Gateway struct { + config GatewayConfig + server *http.Server +} + +// NewGateway creates a new inference gateway with the given configuration. +func NewGateway(cfg GatewayConfig) (*Gateway, error) { + if cfg.ListenAddr == "" { + cfg.ListenAddr = ":8402" + } + if cfg.FacilitatorURL == "" { + cfg.FacilitatorURL = "https://facilitator.x402.rs" + } + if cfg.Chain.NetworkID == "" { + cfg.Chain = x402.BaseSepolia + } + if cfg.PricePerRequest == "" { + cfg.PricePerRequest = "0.001" + } + + return &Gateway{config: cfg}, nil +} + +// Start begins serving the gateway. Blocks until the server is shut down. +func (g *Gateway) Start() error { + upstream, err := url.Parse(g.config.UpstreamURL) + if err != nil { + return fmt.Errorf("invalid upstream URL %q: %w", g.config.UpstreamURL, err) + } + + // Build reverse proxy to upstream inference service + proxy := httputil.NewSingleHostReverseProxy(upstream) + proxy.ErrorHandler = func(w http.ResponseWriter, r *http.Request, err error) { + log.Printf("proxy error: %v", err) + http.Error(w, "upstream unavailable", http.StatusBadGateway) + } + + // Create x402 payment requirement + requirement, err := x402.NewUSDCPaymentRequirement(x402.USDCRequirementConfig{ + Chain: g.config.Chain, + Amount: g.config.PricePerRequest, + RecipientAddress: g.config.WalletAddress, + }) + if err != nil { + return fmt.Errorf("failed to create payment requirement: %w", err) + } + + // Configure x402 middleware + x402Config := &x402http.Config{ + FacilitatorURL: g.config.FacilitatorURL, + PaymentRequirements: []x402.PaymentRequirement{requirement}, + } + paymentMiddleware := x402http.NewX402Middleware(x402Config) + + // Build HTTP mux + mux := http.NewServeMux() + + // Health check (no payment required) + mux.HandleFunc("GET /health", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + fmt.Fprintln(w, `{"status":"ok"}`) + }) + + // Protected inference endpoints (x402 payment required) + mux.Handle("POST /v1/chat/completions", paymentMiddleware(proxy)) + mux.Handle("POST /v1/completions", paymentMiddleware(proxy)) + mux.Handle("POST /v1/embeddings", paymentMiddleware(proxy)) + mux.Handle("GET /v1/models", paymentMiddleware(proxy)) + + // Unprotected OpenAI-compat metadata + mux.Handle("/", proxy) + + g.server = &http.Server{ + Addr: g.config.ListenAddr, + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, + } + + listener, err := net.Listen("tcp", g.config.ListenAddr) + if err != nil { + return fmt.Errorf("failed to listen on %s: %w", g.config.ListenAddr, err) + } + + log.Printf("x402 inference gateway listening on %s", g.config.ListenAddr) + log.Printf(" upstream: %s", g.config.UpstreamURL) + log.Printf(" wallet: %s", g.config.WalletAddress) + log.Printf(" price: %s USDC/request", g.config.PricePerRequest) + log.Printf(" chain: %s", g.config.Chain.NetworkID) + log.Printf(" facilitator: %s", g.config.FacilitatorURL) + + return g.server.Serve(listener) +} + +// Stop gracefully shuts down the gateway. +func (g *Gateway) Stop() error { + if g.server == nil { + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + return g.server.Shutdown(ctx) +} From b5564fcc4b3946686f5443858f848da2ccde7c0d Mon Sep 17 00:00:00 2001 From: bussyjd Date: Sun, 8 Feb 2026 16:48:16 +0400 Subject: [PATCH 16/42] fix(infra): fix helmfile template errors in defaults deployment - Remove unused $publicDomain variable from helmfile.yaml (caused Helmfile v1 gotmpl pre-processing to fail on .Values.* references) - Fix eRPC secretEnv: chart expects plain strings, not secretKeyRef maps; move OBOL_OAUTH_TOKEN to extraEnv with valueFrom - Fix obol-frontend escaped quotes in gotmpl (invalid \\" in operand) --- internal/embed/infrastructure/helmfile.yaml | 1 - .../infrastructure/values/erpc.yaml.gotmpl | 20 +++++++++++-------- .../values/obol-frontend.yaml.gotmpl | 10 +++++----- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index 1b173d2..dd57261 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -1,7 +1,6 @@ # Helmfile for Obol Stack default infrastructure # Orchestrates core infrastructure components deployed with every stack # Uses Traefik with Gateway API for routing (replaces nginx-ingress) -{{- $publicDomain := env "STACK_PUBLIC_DOMAIN" | default "obol.stack" -}} repositories: - name: traefik diff --git a/internal/embed/infrastructure/values/erpc.yaml.gotmpl b/internal/embed/infrastructure/values/erpc.yaml.gotmpl index b7c07f8..051670c 100644 --- a/internal/embed/infrastructure/values/erpc.yaml.gotmpl +++ b/internal/embed/infrastructure/values/erpc.yaml.gotmpl @@ -87,12 +87,17 @@ config: |- allowCredentials: true maxAge: 3600 -# Secret env variables -secretEnv: - OBOL_OAUTH_TOKEN: - secretKeyRef: - name: obol-oauth-token - key: token +# Secret env variables (chart-managed secret for inline values) +secretEnv: {} + +# Extra env variables (reference external obol-oauth-token secret) +extraEnv: + - name: OBOL_OAUTH_TOKEN + valueFrom: + secretKeyRef: + name: obol-oauth-token + key: token + optional: true # Extra args for the erpc container extraArgs: [] @@ -198,8 +203,7 @@ extraVolumeMounts: [] # Additional ports extraPorts: [] -# Additional env variables -extraEnv: [] +# Additional env variables (defined above with OBOL_OAUTH_TOKEN) serviceMonitor: enabled: false diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index caff157..f95ca2b 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -11,17 +11,17 @@ image: - name: NEXT_PUBLIC_HELIOS_CLIENT_URL value: "http://helios-{{ $network }}.helios.svc.cluster.local:8545" - name: NEXT_PUBLIC_ERPC_URL - value: "{{ printf \"https://%s/rpc\" $publicDomain }}" + value: "https://{{ $publicDomain }}/rpc" - name: NEXT_PUBLIC_AZTEC_SEQUENCER_URL value: "http://l2-sequencer-node-mainnet-node.aztec.svc.cluster.local:8080" - name: BETTER_AUTH_SECRET - value: "{{ env \"BETTER_AUTH_SECRET\" }}" + value: {{ env "BETTER_AUTH_SECRET" | default "" | quote }} - name: BETTER_AUTH_URL - value: "{{ printf \"https://%s\" $publicDomain }}" + value: "https://{{ $publicDomain }}" - name: OBOL_GOOGLE_CLIENT_ID - value: "{{ env \"OBOL_GOOGLE_CLIENT_ID\" }}" + value: {{ env "OBOL_GOOGLE_CLIENT_ID" | default "" | quote }} - name: OBOL_GOOGLE_CLIENT_SECRET - value: "{{ env \"OBOL_GOOGLE_CLIENT_SECRET\" }}" + value: {{ env "OBOL_GOOGLE_CLIENT_SECRET" | default "" | quote }} - name: OBOL_AUTH_DB_PATH value: "/data/auth.sqlite" From 6c4cbc69830f19931dec7c06d6e40afa424fa1a3 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Mon, 9 Feb 2026 19:45:53 +0400 Subject: [PATCH 17/42] refactor(llm): remove in-cluster Ollama, proxy to host via ExternalName MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the in-cluster Ollama Deployment/PVC/Service with an ExternalName Service that routes ollama.llm.svc.cluster.local to the host machine's Ollama server. LLMSpy and all consumers use the stable cluster-internal DNS name; the ExternalName target is resolved during stack init via the {{OLLAMA_HOST}} placeholder: k3d → host.k3d.internal k3s → node gateway IP (future) This avoids duplicating the model cache inside the cluster and leverages the host's GPU/VRAM for inference. Also updates CopyDefaults to accept a replacements map, following the same pattern used for k3d.yaml placeholder resolution. --- internal/embed/embed.go | 14 +- .../infrastructure/base/templates/llm.yaml | 140 +++++------------- internal/stack/stack.go | 7 +- 3 files changed, 52 insertions(+), 109 deletions(-) diff --git a/internal/embed/embed.go b/internal/embed/embed.go index 2c189eb..99b8607 100644 --- a/internal/embed/embed.go +++ b/internal/embed/embed.go @@ -21,8 +21,10 @@ var infrastructureFS embed.FS //go:embed all:networks var networksFS embed.FS -// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory -func CopyDefaults(destDir string) error { +// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory. +// The replacements map is applied to every file: each key (e.g. "{{OLLAMA_HOST}}") is replaced +// with its value. Pass nil for a verbatim copy. +func CopyDefaults(destDir string, replacements map[string]string) error { return fs.WalkDir(infrastructureFS, "infrastructure", func(path string, d fs.DirEntry, err error) error { if err != nil { return err @@ -57,8 +59,14 @@ func CopyDefaults(destDir string) error { return fmt.Errorf("failed to read embedded file %s: %w", path, err) } + // Apply placeholder replacements + content := string(data) + for placeholder, value := range replacements { + content = strings.ReplaceAll(content, placeholder, value) + } + // Write to destination - if err := os.WriteFile(destPath, data, 0644); err != nil { + if err := os.WriteFile(destPath, []byte(content), 0644); err != nil { return fmt.Errorf("failed to write file %s: %w", destPath, err) } diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 5633866..4547c8f 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -2,104 +2,25 @@ # LLM foundation services (OKR-1) # # This deploys: -# - Ollama (as the upstream LLM runtime) +# - An ExternalName Service "ollama" that resolves to the host's Ollama server # - llms.py (LLMSpy) as an OpenAI-compatible gateway / router over providers # # Design notes: -# - We default to Ollama Cloud (`glm-4.7:cloud`) to avoid requiring local GPU/VRAM. -# - We persist Ollama's identity keypair at `/root/.ollama/id_ed25519` so the -# Ollama Cloud "connect" binding survives pod restarts/upgrades. -# - Model cache is kept on `emptyDir` (ephemeral) per product decision. +# - No in-cluster Ollama is deployed; the host is expected to run Ollama +# (or another OpenAI-compatible server) on port 11434. +# - The ollama Service abstracts host resolution: +# k3d → host.k3d.internal +# k3s → resolved at stack init via node IP +# - LLMSpy and all consumers reference ollama.llm.svc.cluster.local:11434, +# which the ExternalName Service routes to the host. apiVersion: v1 kind: Namespace metadata: name: llm --- -# Persist Ollama identity (Ollama Cloud connect uses the public key derived from this keypair). -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: ollama-home - namespace: llm -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 256Mi - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ollama - namespace: llm - labels: - app: ollama -spec: - replicas: 1 - # Ollama uses a ReadWriteOnce PVC; avoid surging a second pod during updates. - strategy: - type: Recreate - selector: - matchLabels: - app: ollama - template: - metadata: - labels: - app: ollama - spec: - containers: - - name: ollama - image: ollama/ollama:latest - imagePullPolicy: IfNotPresent - ports: - - name: http - containerPort: 11434 - protocol: TCP - env: - # Store model blobs (including any cloud model stubs/cache) in an ephemeral volume. - - name: OLLAMA_MODELS - value: /models - # Explicitly bind the HTTP API to all interfaces in-cluster. - - name: OLLAMA_HOST - value: 0.0.0.0:11434 - volumeMounts: - # Persist identity + config (e.g. ~/.ollama/id_ed25519) for Ollama Cloud connect. - - name: ollama-home - mountPath: /root/.ollama - - name: ollama-models - mountPath: /models - readinessProbe: - httpGet: - path: /api/version - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 2 - livenessProbe: - httpGet: - path: /api/version - port: http - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 2 - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 2000m - memory: 4Gi - volumes: - - name: ollama-home - persistentVolumeClaim: - claimName: ollama-home - - name: ollama-models - emptyDir: {} - ---- +# ExternalName Service: routes ollama.llm.svc.cluster.local → host Ollama. +# The externalName is resolved during `obol stack init` via the {{OLLAMA_HOST}} placeholder. apiVersion: v1 kind: Service metadata: @@ -108,19 +29,17 @@ metadata: labels: app: ollama spec: - type: ClusterIP - selector: - app: ollama + type: ExternalName + externalName: {{OLLAMA_HOST}} ports: - name: http port: 11434 - targetPort: http protocol: TCP --- -# llms.py configuration for Obol Stack: -# - Only enable the Ollama provider -# - Default model is `glm-4.7:cloud` (cloud-first) +# llms.py v3 configuration for Obol Stack: +# - Only enable the Ollama provider (host machine via ollama Service) +# - Default model is glm-4.7-flash apiVersion: v1 kind: ConfigMap metadata: @@ -129,27 +48,35 @@ metadata: data: llms.json: | { + "version": 3, "defaults": { "headers": { - "Content-Type": "application/json" + "Content-Type": "application/json", + "User-Agent": "llmspy.org/3.0" }, "text": { - "model": "glm-4.7:cloud", + "model": "glm-4.7-flash", "messages": [ - { "role": "user", "content": "" } + { "role": "user", "content": [{ "type": "text", "text": "" }] } ] } }, "providers": { "ollama": { - "enabled": true, - "type": "OllamaProvider", - "base_url": "http://ollama.llm.svc.cluster.local:11434", - "models": {}, - "all_models": true + "enabled": true } } } + providers.json: | + { + "ollama": { + "id": "ollama", + "npm": "ollama", + "api": "http://ollama.llm.svc.cluster.local:11434", + "models": {}, + "all_models": true + } + } --- apiVersion: apps/v1 @@ -182,6 +109,7 @@ spec: set -eu mkdir -p /data cp /config/llms.json /data/llms.json + cp /config/providers.json /data/providers.json volumeMounts: - name: llmspy-config mountPath: /config @@ -192,7 +120,7 @@ spec: - name: llmspy # Official LLMSpy container image (published by upstream). # Pin a specific version for reproducibility. - image: ghcr.io/servicestack/llms:v2.0.30 + image: ghcr.io/servicestack/llms:latest imagePullPolicy: IfNotPresent ports: - name: http @@ -240,6 +168,8 @@ spec: items: - key: llms.json path: llms.json + - key: providers.json + path: providers.json - name: llmspy-home emptyDir: {} diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 8aa4872..3c51d6f 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -77,8 +77,13 @@ func Init(cfg *config.Config, force bool) error { fmt.Printf("K3d config saved to: %s\n", k3dConfigPath) // Copy embedded defaults (helmfile + charts for infrastructure) + // Resolve placeholders: {{OLLAMA_HOST}} → host DNS for the cluster runtime. + // k3d uses host.k3d.internal; bare k3s would use the node's gateway IP. + ollamaHost := "host.k3d.internal" defaultsDir := filepath.Join(cfg.ConfigDir, "defaults") - if err := embed.CopyDefaults(defaultsDir); err != nil { + if err := embed.CopyDefaults(defaultsDir, map[string]string{ + "{{OLLAMA_HOST}}": ollamaHost, + }); err != nil { return fmt.Errorf("failed to copy defaults: %w", err) } fmt.Printf("Defaults copied to: %s\n", defaultsDir) From fa40287921ac94232bec0da67d68592cc82b7ac5 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Mon, 9 Feb 2026 20:21:47 +0400 Subject: [PATCH 18/42] fix(infra): disable obol-agent from default stack deployment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The obol-agent deployment in the agent namespace fails with ImagePullBackOff because its container image is not publicly accessible. Wrap the template in a Helm conditional (obolAgent.enabled) defaulting to false so it no longer deploys automatically. The manifest is preserved for future use — set obolAgent.enabled=true in the base chart values to re-enable. --- internal/embed/infrastructure/base/templates/obol-agent.yaml | 5 +++++ internal/embed/infrastructure/helmfile.yaml | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/internal/embed/infrastructure/base/templates/obol-agent.yaml b/internal/embed/infrastructure/base/templates/obol-agent.yaml index 7220dbf..7451db7 100644 --- a/internal/embed/infrastructure/base/templates/obol-agent.yaml +++ b/internal/embed/infrastructure/base/templates/obol-agent.yaml @@ -1,8 +1,12 @@ +{{- if .Values.obolAgent.enabled }} --- # Obol Agent Kubernetes Manifest # This manifest deploys the Obol AI Agent with namespace-scoped RBAC permissions # The agent can read cluster-wide resources (nodes, namespaces) but can only modify # resources in specific namespaces: agent (and others via dynamic bindings) +# +# To enable the obol-agent, set obolAgent.enabled=true in the base chart values +# (infrastructure helmfile.yaml → base release → values). #------------------------------------------------------------------------------ # Namespace - Ensure the agent namespace exists @@ -198,3 +202,4 @@ spec: name: http selector: app: obol-agent # Routes traffic to pods with this label +{{- end }} diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index dd57261..c1e9bb3 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -29,6 +29,10 @@ releases: values: - dataDir: /data - network: "{{ .Values.network }}" + # obol-agent is disabled by default (image not publicly available). + # Set obolAgent.enabled=true to deploy it. + - obolAgent: + enabled: false # Monitoring stack (Prometheus operator + Prometheus) - name: monitoring From a2718dea9d1e891dc64db019d464997fdd7eb91f Mon Sep 17 00:00:00 2001 From: bussyjd Date: Mon, 9 Feb 2026 22:51:52 +0400 Subject: [PATCH 19/42] ci(openclaw): add Docker image build workflow with Renovate auto-bump Add GitHub Actions workflow to build and publish the OpenClaw container image to ghcr.io/obolnetwork/openclaw from the upstream openclaw/openclaw repo at a pinned version. Renovate watches for new upstream releases and auto-opens PRs to bump the version file. Closes #142 --- .github/workflows/docker-publish-openclaw.yml | 117 ++++++++++++++++++ internal/openclaw/OPENCLAW_VERSION | 2 + renovate.json | 27 ++++ 3 files changed, 146 insertions(+) create mode 100644 .github/workflows/docker-publish-openclaw.yml create mode 100644 internal/openclaw/OPENCLAW_VERSION diff --git a/.github/workflows/docker-publish-openclaw.yml b/.github/workflows/docker-publish-openclaw.yml new file mode 100644 index 0000000..777e73b --- /dev/null +++ b/.github/workflows/docker-publish-openclaw.yml @@ -0,0 +1,117 @@ +name: Build and Publish OpenClaw Image + +on: + push: + branches: + - main + paths: + - 'internal/openclaw/OPENCLAW_VERSION' + workflow_dispatch: + inputs: + version: + description: 'OpenClaw version to build (e.g. v2026.2.3)' + required: false + type: string + +env: + REGISTRY: ghcr.io + IMAGE_NAME: obolnetwork/openclaw + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout obol-stack + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Read pinned version + id: version + run: | + if [ -n "${{ github.event.inputs.version }}" ]; then + VERSION="${{ github.event.inputs.version }}" + else + VERSION=$(grep -v '^#' internal/openclaw/OPENCLAW_VERSION | tr -d '[:space:]') + fi + echo "version=$VERSION" >> "$GITHUB_OUTPUT" + echo "Building OpenClaw $VERSION" + + - name: Checkout upstream OpenClaw + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: openclaw/openclaw + ref: ${{ steps.version.outputs.version }} + path: openclaw-src + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 + + - name: Set up QEMU + uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0 + + - name: Login to GitHub Container Registry + uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},value=${{ steps.version.outputs.version }} + type=semver,pattern={{major}}.{{minor}},value=${{ steps.version.outputs.version }} + type=sha,prefix= + type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} + labels: | + org.opencontainers.image.title=OpenClaw + org.opencontainers.image.description=AI agent gateway for Obol Stack + org.opencontainers.image.vendor=Obol Network + org.opencontainers.image.source=https://github.com/openclaw/openclaw + org.opencontainers.image.version=${{ steps.version.outputs.version }} + + - name: Build and push Docker image + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + context: openclaw-src + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + provenance: true + sbom: true + + security-scan: + needs: build-and-push + runs-on: ubuntu-latest + permissions: + security-events: write + + steps: + - name: Read pinned version + id: version + run: | + # Re-derive for the scan job + echo "Scanning latest pushed image" + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@915b19bbe73b92421caafd48a29a70a5d22ba401 # v0.30.0 + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + format: 'sarif' + output: 'trivy-results.sarif' + severity: 'CRITICAL,HIGH' + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@c36620d31ac7c881962c3d9dd939c40ec9434f2b # v3.28.0 + with: + sarif_file: 'trivy-results.sarif' + if: always() diff --git a/internal/openclaw/OPENCLAW_VERSION b/internal/openclaw/OPENCLAW_VERSION new file mode 100644 index 0000000..06196e4 --- /dev/null +++ b/internal/openclaw/OPENCLAW_VERSION @@ -0,0 +1,2 @@ +# renovate: datasource=github-releases depName=openclaw/openclaw +v2026.2.3 diff --git a/renovate.json b/renovate.json index afab9bf..81e8188 100644 --- a/renovate.json +++ b/renovate.json @@ -33,6 +33,17 @@ "datasourceTemplate": "github-releases", "depNameTemplate": "kubernetes-sigs/gateway-api", "versioningTemplate": "semver" + }, + { + "customType": "regex", + "description": "Update OpenClaw version from upstream GitHub releases", + "matchStrings": [ + "#\\s*renovate:\\s*datasource=(?.*?)\\s+depName=(?.*?)\\n(?v[0-9]+\\.[0-9]+\\.[0-9]+)" + ], + "fileMatch": [ + "^internal/openclaw/OPENCLAW_VERSION$" + ], + "versioningTemplate": "semver" } ], "packageRules": [ @@ -89,6 +100,22 @@ ], "dependencyDashboardApproval": true, "prBodyTemplate": "⚠️ **MAJOR VERSION UPDATE** ⚠️\n\nThis PR updates **obol-stack-front-end** from `{{currentVersion}}` to `{{newVersion}}`.\n\n### ⚠️ Breaking Changes Expected\n\nMajor version updates may include breaking changes. Please review the release notes carefully.\n\n### Release Notes\n\n{{{changelog}}}\n\n### Migration Checklist\n- [ ] Review breaking changes in release notes\n- [ ] Test the new version in staging environment\n- [ ] Update any integration code if needed\n- [ ] Verify deployment scripts still work\n\n---\n**⚠️ This PR requires manual approval due to major version change**\n**Auto-generated by Renovate Bot**" + }, + { + "description": "Group OpenClaw updates", + "matchDatasources": [ + "github-releases" + ], + "matchPackageNames": [ + "openclaw/openclaw" + ], + "labels": [ + "renovate/openclaw" + ], + "schedule": [ + "every hour" + ], + "groupName": "OpenClaw updates" } ] } From bf4039fe01d0dade80fa9a1ee1c86fcc4137a5f7 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 10 Feb 2026 13:58:46 +0400 Subject: [PATCH 20/42] ci(openclaw): temporarily add test branches to workflow triggers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add integration-okr-1 and feat/openclaw-ci to push triggers for testing. Remove after verifying the workflow runs successfully — limit to main only. --- .github/workflows/docker-publish-openclaw.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docker-publish-openclaw.yml b/.github/workflows/docker-publish-openclaw.yml index 777e73b..c666948 100644 --- a/.github/workflows/docker-publish-openclaw.yml +++ b/.github/workflows/docker-publish-openclaw.yml @@ -4,6 +4,8 @@ on: push: branches: - main + - integration-okr-1 # TODO: remove after testing — limit to main only + - feat/openclaw-ci # TODO: remove after testing — limit to main only paths: - 'internal/openclaw/OPENCLAW_VERSION' workflow_dispatch: From 104c03b7a9fd8c7dee8711bee972da0feae5c10f Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 10 Feb 2026 14:03:35 +0400 Subject: [PATCH 21/42] ci(openclaw): trigger workflow test run --- internal/openclaw/OPENCLAW_VERSION | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/openclaw/OPENCLAW_VERSION b/internal/openclaw/OPENCLAW_VERSION index 06196e4..9bf0820 100644 --- a/internal/openclaw/OPENCLAW_VERSION +++ b/internal/openclaw/OPENCLAW_VERSION @@ -1,2 +1,3 @@ # renovate: datasource=github-releases depName=openclaw/openclaw +# This file pins the upstream OpenClaw version to build and publish. v2026.2.3 From 2fa8ae7d78ba1c29ff3d7cee799564abddab7c72 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 10 Feb 2026 14:21:06 +0400 Subject: [PATCH 22/42] fix(ci): update Trivy and CodeQL action SHAs to latest The pinned SHAs from charon-dkg-sidecar were stale and caused the security-scan job to fail at setup. --- .github/workflows/docker-publish-openclaw.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-publish-openclaw.yml b/.github/workflows/docker-publish-openclaw.yml index c666948..7cf12cb 100644 --- a/.github/workflows/docker-publish-openclaw.yml +++ b/.github/workflows/docker-publish-openclaw.yml @@ -105,7 +105,7 @@ jobs: echo "Scanning latest pushed image" - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@915b19bbe73b92421caafd48a29a70a5d22ba401 # v0.30.0 + uses: aquasecurity/trivy-action@22438a435773de8c97dc0958cc0b823c45b064ac # master with: image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest format: 'sarif' @@ -113,7 +113,7 @@ jobs: severity: 'CRITICAL,HIGH' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@c36620d31ac7c881962c3d9dd939c40ec9434f2b # v3.28.0 + uses: github/codeql-action/upload-sarif@b13d724d35ff0a814e21683638ed68ed34cf53d1 # main with: sarif_file: 'trivy-results.sarif' if: always() From 13f84ca6d909f7ce0e01d67581778cb813cbba63 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 10 Feb 2026 14:52:04 +0400 Subject: [PATCH 23/42] ci(openclaw): re-trigger workflow to verify security scan fix --- internal/openclaw/OPENCLAW_VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/openclaw/OPENCLAW_VERSION b/internal/openclaw/OPENCLAW_VERSION index 9bf0820..50df0a3 100644 --- a/internal/openclaw/OPENCLAW_VERSION +++ b/internal/openclaw/OPENCLAW_VERSION @@ -1,3 +1,3 @@ # renovate: datasource=github-releases depName=openclaw/openclaw -# This file pins the upstream OpenClaw version to build and publish. +# Pins the upstream OpenClaw version to build and publish. v2026.2.3 From e27de5861f9727ed3408b1649e93458ff17951f8 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 10 Feb 2026 16:43:21 +0400 Subject: [PATCH 24/42] chore(openclaw): bump version to v2026.2.9 --- internal/openclaw/OPENCLAW_VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/openclaw/OPENCLAW_VERSION b/internal/openclaw/OPENCLAW_VERSION index 50df0a3..04b5d69 100644 --- a/internal/openclaw/OPENCLAW_VERSION +++ b/internal/openclaw/OPENCLAW_VERSION @@ -1,3 +1,3 @@ # renovate: datasource=github-releases depName=openclaw/openclaw # Pins the upstream OpenClaw version to build and publish. -v2026.2.3 +v2026.2.9 From 8bd173d0d768e787903ba46dec628c88f0b5765b Mon Sep 17 00:00:00 2001 From: JeanDaniel Bussy Date: Thu, 12 Feb 2026 16:57:16 +0400 Subject: [PATCH 25/42] feat(openclaw): add OpenClaw CLI and Helm chart (#137) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(openclaw): add OpenClaw CLI and embedded chart for Obol Stack Adds `obol openclaw` subcommands to deploy and manage OpenClaw AI agent instances on the local k3d cluster. The chart is embedded via go:embed for development use; the canonical chart lives in ObolNetwork/helm-charts. CLI commands: openclaw up - Create and deploy an instance openclaw sync - Re-deploy / update an existing instance openclaw token - Retrieve the gateway token openclaw list - List deployed instances openclaw delete - Remove an instance openclaw skills - Sync skills from a local directory The embedded Helm chart supports: - Pluggable model providers (Anthropic, OpenAI, Ollama) - Chat channels (Telegram, Discord, Slack) - Skills injection via ConfigMap + init container - RBAC, Gateway API HTTPRoute, values schema validation * feat(openclaw): integrate OpenClaw into stack setup with config import OpenClaw is now deployed automatically as a default instance during `obol stack up`. Adds ~/.openclaw/openclaw.json detection and import, interactive provider selection for direct CLI usage, and idempotent re-sync behavior for the default instance. * fix: resolve CRD conflicts, OpenClaw command, HTTPRoute spec, and KUBECONFIG propagation - Remove gateway-api-crds presync hook; Traefik v38+ manages its own CRDs - Fix Ethereum HTTPRoute: use single PathPrefix match (Gateway API spec) - Fix OpenClaw chart command to match upstream Dockerfile (node openclaw.mjs) - Update OpenClaw image tag to match GHCR published format (no v prefix) - Add KUBECONFIG env to helmfile subprocess in stack.go (aligns with all other packages) * feat(openclaw): detect and import existing ~/.openclaw workspace + bump to v2026.2.9 Auto-detect existing OpenClaw installations during `obol stack up` and `obol openclaw up`. When ~/.openclaw/ contains a workspace directory with personality files (SOUL.md, AGENTS.md, etc.), copies them into the pod's PVC after deployment. Auto-skips interactive provider prompts when an existing config with providers is detected. Also bumps the chart image to v2026.2.9 to match the CI-published image. * feat(openclaw): add setup wizard and dashboard commands Add `obol openclaw setup ` which port-forwards to the deployed gateway and runs the native OpenClaw onboard wizard via PTY. The wizard provides the full onboarding experience (personality, channels, skills, providers) against the running k8s instance. Add `obol openclaw dashboard ` which port-forwards and opens the web dashboard in the browser with auto-injected gateway token. Implementation details: - Port-forward lifecycle manager with auto-port selection - PTY-based wizard with raw terminal mode for @clack/prompts support - Sliding-window marker detection to exit cleanly when wizard completes - Proper PTY shutdown sequence (close master -> kill -> wait) to avoid hang caused by stdin copy goroutine blocking cmd.Wait() - Refactored Token() into reusable getToken() helper - findOpenClawBinary() searches PATH then cfg.BinDir with install hints - obolup.sh gains install_openclaw() for npm-based binary management * feat(llm,openclaw): llmspy universal proxy + openclaw CLI passthrough Route all cloud API traffic through llmspy as a universal gateway: - Add Anthropic/OpenAI providers to llm.yaml (ConfigMap + Secret + envFrom) - New `internal/llm` package with ConfigureLLMSpy() for imperative patching - New `obol llm configure` command for standalone provider setup - OpenClaw overlay routes through llmspy:8000/v1 instead of direct cloud APIs - Bump llmspy image to obol fork rc.2 (fixes SQLite startup race) Add `obol openclaw cli -- ` passthrough: - Remote-capable commands (gateway, acp, browser, logs) via port-forward - Local-only commands (doctor, models, config) via kubectl exec - Replace PTY-based setup wizard with non-interactive helmfile sync flow - Remove creack/pty and golang.org/x/term dependencies * fix(openclaw): rename up→onboard, fix api field and macOS host resolution - Rename `obol openclaw up` to `obol openclaw onboard` - Set api: "openai-completions" in llmspy-routed overlay (fixes "No API provider registered for api: undefined" in OpenClaw) - Use host.docker.internal on macOS for Ollama ExternalName service (host.k3d.internal doesn't resolve on Docker Desktop) * feat(openclaw): detect Ollama availability before offering it in setup wizard SetupDefault() now probes the host Ollama endpoint before deploying with Ollama defaults — skips gracefully when unreachable so users without Ollama can configure a cloud provider later via `obol openclaw setup`. interactiveSetup() dynamically shows a 3-option menu (Ollama/OpenAI/ Anthropic) when Ollama is detected, or a 2-option menu (OpenAI/Anthropic) when it isn't. * docs: add LLM configuration architecture to CLAUDE.md Document the two-tier model: global llmspy gateway (cluster-wide keys and provider routing) vs per-instance OpenClaw config (overlay values pointing at llmspy or directly at cloud APIs). Includes data flow diagram, summary table, and key source files reference. --- CLAUDE.md | 127 ++ README.md | 18 + cmd/obol/llm.go | 88 ++ cmd/obol/main.go | 16 + cmd/obol/openclaw.go | 190 +++ .../infrastructure/base/templates/llm.yaml | 43 +- internal/embed/infrastructure/helmfile.yaml | 19 +- .../networks/ethereum/templates/ingress.yaml | 10 +- internal/llm/llm.go | 152 ++ internal/openclaw/chart/Chart.yaml | 20 + internal/openclaw/chart/templates/NOTES.txt | 33 + .../openclaw/chart/templates/_helpers.tpl | 228 +++ .../openclaw/chart/templates/configmap.yaml | 11 + .../openclaw/chart/templates/deployment.yaml | 187 +++ .../openclaw/chart/templates/httproute.yaml | 25 + .../openclaw/chart/templates/ingress.yaml | 43 + .../openclaw/chart/templates/init-job.yaml | 64 + internal/openclaw/chart/templates/pvc.yaml | 19 + internal/openclaw/chart/templates/role.yaml | 22 + .../openclaw/chart/templates/rolebinding.yaml | 16 + internal/openclaw/chart/templates/secret.yaml | 35 + .../openclaw/chart/templates/service.yaml | 15 + .../chart/templates/serviceaccount.yaml | 13 + .../chart/templates/skills-configmap.yaml | 11 + .../templates/tests/test-connection.yaml | 30 + .../openclaw/chart/templates/validate.yaml | 19 + internal/openclaw/chart/values.schema.json | 377 +++++ internal/openclaw/chart/values.yaml | 301 ++++ internal/openclaw/import.go | 349 +++++ internal/openclaw/openclaw.go | 1246 +++++++++++++++++ internal/openclaw/overlay_test.go | 154 ++ internal/stack/stack.go | 17 +- internal/tunnel/stackid.go | 1 - obolup.sh | 75 + 34 files changed, 3943 insertions(+), 31 deletions(-) create mode 100644 cmd/obol/llm.go create mode 100644 cmd/obol/openclaw.go create mode 100644 internal/llm/llm.go create mode 100644 internal/openclaw/chart/Chart.yaml create mode 100644 internal/openclaw/chart/templates/NOTES.txt create mode 100644 internal/openclaw/chart/templates/_helpers.tpl create mode 100644 internal/openclaw/chart/templates/configmap.yaml create mode 100644 internal/openclaw/chart/templates/deployment.yaml create mode 100644 internal/openclaw/chart/templates/httproute.yaml create mode 100644 internal/openclaw/chart/templates/ingress.yaml create mode 100644 internal/openclaw/chart/templates/init-job.yaml create mode 100644 internal/openclaw/chart/templates/pvc.yaml create mode 100644 internal/openclaw/chart/templates/role.yaml create mode 100644 internal/openclaw/chart/templates/rolebinding.yaml create mode 100644 internal/openclaw/chart/templates/secret.yaml create mode 100644 internal/openclaw/chart/templates/service.yaml create mode 100644 internal/openclaw/chart/templates/serviceaccount.yaml create mode 100644 internal/openclaw/chart/templates/skills-configmap.yaml create mode 100644 internal/openclaw/chart/templates/tests/test-connection.yaml create mode 100644 internal/openclaw/chart/templates/validate.yaml create mode 100644 internal/openclaw/chart/values.schema.json create mode 100644 internal/openclaw/chart/values.yaml create mode 100644 internal/openclaw/import.go create mode 100644 internal/openclaw/openclaw.go create mode 100644 internal/openclaw/overlay_test.go diff --git a/CLAUDE.md b/CLAUDE.md index e2fa9b4..7ea97d1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -148,6 +148,12 @@ obol │ │ ├── helios (dynamically generated) │ │ └── aztec (dynamically generated) │ └── delete +├── llm (LLM provider management) +│ └── configure +├── openclaw (OpenClaw AI assistant) +│ ├── setup +│ ├── onboard +│ └── dashboard ├── kubectl (passthrough with KUBECONFIG) ├── helm (passthrough with KUBECONFIG) ├── helmfile (passthrough with KUBECONFIG) @@ -560,6 +566,118 @@ obol network install ethereum --id hoodi-test --network=hoodi - k3s auto-applies all YAML files on startup - Uses k3s HelmChart CRD for Helm deployments +## LLM Configuration Architecture + +The stack uses a two-tier architecture for LLM routing. A cluster-wide proxy (llmspy) handles actual provider communication, while each application instance (e.g., OpenClaw) sees a simplified single-provider view. + +### Tier 1: Global llmspy Gateway (`llm` namespace) + +**Purpose**: Shared OpenAI-compatible proxy that routes LLM traffic from all applications to actual providers (Ollama, Anthropic, OpenAI). + +**Kubernetes resources** (defined in `internal/embed/infrastructure/base/templates/llm.yaml`): + +| Resource | Type | Purpose | +|----------|------|---------| +| `llm` | Namespace | Dedicated namespace for LLM infrastructure | +| `llmspy-config` | ConfigMap | `llms.json` (provider enable/disable) + `providers.json` (provider definitions) | +| `llms-secrets` | Secret | Cloud API keys (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`) — empty by default | +| `llmspy` | Deployment | `ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.2`, port 8000 | +| `llmspy` | Service (ClusterIP) | `llmspy.llm.svc.cluster.local:8000` | +| `ollama` | Service (ExternalName) | Routes to host Ollama via `{{OLLAMA_HOST}}` placeholder | + +**Configuration mechanism** (`internal/llm/llm.go` — `ConfigureLLMSpy()`): +1. Patches `llms-secrets` Secret with the API key +2. Reads `llmspy-config` ConfigMap, sets `providers..enabled = true` in `llms.json` +3. Restarts `llmspy` Deployment via rollout restart +4. Waits for rollout to complete (60s timeout) + +**CLI surface** (`cmd/obol/llm.go`): +- `obol llm configure --provider=anthropic --api-key=sk-...` +- Interactive prompt if flags omitted (choice of Anthropic or OpenAI) + +**Key design**: Ollama is enabled by default; cloud providers are disabled until configured via `obol llm configure`. An init container copies the ConfigMap into a writable emptyDir so llmspy can write runtime state. + +### Tier 2: Per-Instance Application Config (per-deployment namespace) + +**Purpose**: Each application instance (e.g., OpenClaw) has its own model configuration, rendered by its Helm chart from values files. + +**Values file hierarchy** (helmfile merges in order): +1. `values.yaml` — chart defaults (from embedded chart, e.g., `internal/openclaw/chart/values.yaml`) +2. `values-obol.yaml` — Obol Stack overlay (generated by `generateOverlayValues()`) + +**How providers become application config** (OpenClaw example, `_helpers.tpl` lines 167-189): +- Iterates provider list from `.Values.models` +- Only emits providers where `enabled == true` +- For each enabled provider: `baseUrl`, `apiKey` (as `${ENV_VAR}` reference), `models` array +- `api` field is only emitted if non-empty (required for llmspy routing) + +### The llmspy-Routed Overlay Pattern + +When a cloud provider is selected during setup, two things happen simultaneously: + +1. **Global tier**: `llm.ConfigureLLMSpy()` patches the cluster-wide llmspy gateway with the API key and enables the provider +2. **Instance tier**: `buildLLMSpyRoutedOverlay()` creates an overlay where a single "ollama" provider points at llmspy, the cloud model is listed under that provider, and `api` is set to `openai-completions` + +**Result**: The application never talks directly to cloud APIs. All traffic is routed through llmspy. + +**Data flow**: +``` +Application (openclaw.json) + │ model: "ollama/claude-sonnet-4-5-20250929" + │ api: "openai-completions" + │ baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + │ + ▼ +llmspy (llm namespace, port 8000) + │ POST /v1/chat/completions + │ → resolves "claude-sonnet-4-5-20250929" to anthropic provider + │ + ▼ +Anthropic API (or Ollama, OpenAI — depending on provider) +``` + +**Overlay example** (`values-obol.yaml`): +```yaml +models: + ollama: + enabled: true + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + api: openai-completions + apiKeyEnvVar: OLLAMA_API_KEY + apiKeyValue: ollama-local + models: + - id: claude-sonnet-4-5-20250929 + name: Claude Sonnet 4.5 + anthropic: + enabled: false + openai: + enabled: false +``` + +### Summary Table + +| Aspect | Tier 1 (llmspy) | Tier 2 (Application instance) | +|--------|-----------------|-------------------------------| +| **Scope** | Cluster-wide | Per-deployment | +| **Namespace** | `llm` | `-` (e.g., `openclaw-`) | +| **Config storage** | ConfigMap `llmspy-config` | ConfigMap `-config` | +| **Secrets** | Secret `llms-secrets` | Secret `-secrets` | +| **Configure via** | `obol llm configure` | `obol openclaw setup ` | +| **Providers** | Real (Ollama, Anthropic, OpenAI) | Virtual: everything appears as "ollama" pointing at llmspy | +| **API field** | N/A (provider-native) | Must be `openai-completions` for llmspy routing | + +### Key Source Files + +| File | Role | +|------|------| +| `internal/llm/llm.go` | `ConfigureLLMSpy()` — patches global Secret + ConfigMap + restart | +| `cmd/obol/llm.go` | `obol llm configure` CLI command | +| `internal/embed/infrastructure/base/templates/llm.yaml` | llmspy Kubernetes resource definitions | +| `internal/openclaw/openclaw.go` | `Setup()`, `interactiveSetup()`, `generateOverlayValues()`, `buildLLMSpyRoutedOverlay()` | +| `internal/openclaw/import.go` | `DetectExistingConfig()`, `TranslateToOverlayYAML()` | +| `internal/openclaw/chart/values.yaml` | Default per-instance model config | +| `internal/openclaw/chart/templates/_helpers.tpl` | Renders model providers into application JSON config | + ## Network Install Implementation Details ### Template Field Parser @@ -799,6 +917,14 @@ obol network delete ethereum- --force - `internal/network/network.go` - Network deployment - `internal/embed/embed.go` - Embedded asset management +**LLM and OpenClaw**: +- `internal/llm/llm.go` - llmspy gateway configuration (`ConfigureLLMSpy()`) +- `cmd/obol/llm.go` - `obol llm configure` CLI command +- `internal/embed/infrastructure/base/templates/llm.yaml` - llmspy K8s resources +- `internal/openclaw/openclaw.go` - OpenClaw setup, overlay generation, llmspy routing +- `internal/openclaw/import.go` - Existing config detection and translation +- `internal/openclaw/chart/` - OpenClaw Helm chart (values, templates, helpers) + **Embedded assets**: - `internal/embed/k3d-config.yaml` - k3d configuration template - `internal/embed/networks/` - Network definitions @@ -806,6 +932,7 @@ obol network delete ethereum- --force - `helios/helmfile.yaml.gotmpl` - `aztec/helmfile.yaml.gotmpl` - `internal/embed/defaults/` - Default stack resources +- `internal/embed/infrastructure/` - Infrastructure resources (llmspy, Traefik) **Build and version**: - `justfile` - Task runner (install, build, up, down commands) diff --git a/README.md b/README.md index e525dca..b7ec40d 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,24 @@ obol k9s The stack will create a local Kubernetes cluster. Each network installation creates a uniquely-namespaced deployment instance, allowing you to run multiple configurations simultaneously. +## Public Access (Cloudflare Tunnel) + +By default, the stack deploys a Cloudflare Tunnel connector in “quick tunnel” mode, which provides a random public URL. Check it with: + +```bash +obol tunnel status +``` + +To use a persistent hostname instead: + +- Browser login flow (requires `cloudflared` installed locally, e.g. `brew install cloudflared` on macOS): + - `obol tunnel login --hostname stack.example.com` +- API-driven provisioning: + - `obol tunnel provision --hostname stack.example.com --account-id ... --zone-id ... --api-token ...` + - Or set `CLOUDFLARE_ACCOUNT_ID`, `CLOUDFLARE_ZONE_ID`, `CLOUDFLARE_API_TOKEN`. + +Note: the stack ID (used in tunnel naming) is preserved across `obol stack init --force`. Use `obol stack purge` to reset it. + > [!TIP] > Use `obol network list` to see all available networks. Customize installations with flags (e.g., `obol network install ethereum --network=holesky --execution-client=geth`) to create different deployment configurations. After installation, deploy to the cluster with `obol network sync /`. diff --git a/cmd/obol/llm.go b/cmd/obol/llm.go new file mode 100644 index 0000000..8f11ac5 --- /dev/null +++ b/cmd/obol/llm.go @@ -0,0 +1,88 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/llm" + "github.com/urfave/cli/v2" +) + +func llmCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "llm", + Usage: "Manage LLM providers (llmspy universal proxy)", + Subcommands: []*cli.Command{ + { + Name: "configure", + Usage: "Configure a cloud AI provider in the llmspy gateway", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "provider", + Usage: "Provider name (anthropic, openai)", + }, + &cli.StringFlag{ + Name: "api-key", + Usage: "API key for the provider", + EnvVars: []string{"LLM_API_KEY"}, + }, + }, + Action: func(c *cli.Context) error { + provider := c.String("provider") + apiKey := c.String("api-key") + + // Interactive mode if flags not provided + if provider == "" || apiKey == "" { + var err error + provider, apiKey, err = promptLLMConfig() + if err != nil { + return err + } + } + + return llm.ConfigureLLMSpy(cfg, provider, apiKey) + }, + }, + }, + } +} + +// promptLLMConfig interactively asks the user for provider and API key. +func promptLLMConfig() (string, string, error) { + reader := bufio.NewReader(os.Stdin) + + fmt.Println("Select a provider:") + fmt.Println(" [1] Anthropic") + fmt.Println(" [2] OpenAI") + fmt.Print("\nChoice [1]: ") + + line, _ := reader.ReadString('\n') + choice := strings.TrimSpace(line) + if choice == "" { + choice = "1" + } + + var provider, display string + switch choice { + case "1": + provider = "anthropic" + display = "Anthropic" + case "2": + provider = "openai" + display = "OpenAI" + default: + return "", "", fmt.Errorf("unknown choice: %s", choice) + } + + fmt.Printf("\n%s API key: ", display) + apiKey, _ := reader.ReadString('\n') + apiKey = strings.TrimSpace(apiKey) + if apiKey == "" { + return "", "", fmt.Errorf("API key is required") + } + + return provider, apiKey, nil +} diff --git a/cmd/obol/main.go b/cmd/obol/main.go index b5175d6..203c662 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -52,6 +52,20 @@ COMMANDS: network install Install and deploy network to cluster network delete Remove network and clean up cluster resources + OpenClaw (AI Agent): + openclaw onboard Create and deploy an OpenClaw instance + openclaw setup Reconfigure model providers for a deployed instance + openclaw dashboard Open the dashboard in a browser + openclaw cli Run openclaw CLI against a deployed instance + openclaw sync Deploy or update an instance + openclaw token Retrieve gateway token + openclaw list List instances + openclaw delete Remove instance and cluster resources + openclaw skills Manage skills (sync from local dir) + + LLM Gateway: + llm configure Configure cloud AI provider in llmspy gateway + Inference (x402 Pay-Per-Request): inference serve Start the x402 inference gateway @@ -430,7 +444,9 @@ GLOBAL OPTIONS: }, }, networkCommand(cfg), + openclawCommand(cfg), inferenceCommand(cfg), + llmCommand(cfg), { Name: "app", Usage: "Manage applications", diff --git a/cmd/obol/openclaw.go b/cmd/obol/openclaw.go new file mode 100644 index 0000000..80d4ec6 --- /dev/null +++ b/cmd/obol/openclaw.go @@ -0,0 +1,190 @@ +package main + +import ( + "fmt" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/openclaw" + "github.com/urfave/cli/v2" +) + +func openclawCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "openclaw", + Usage: "Manage OpenClaw AI agent instances", + Subcommands: []*cli.Command{ + { + Name: "onboard", + Usage: "Create and deploy an OpenClaw instance", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "id", + Usage: "Instance ID (defaults to generated petname)", + }, + &cli.BoolFlag{ + Name: "force", + Aliases: []string{"f"}, + Usage: "Overwrite existing instance", + }, + &cli.BoolFlag{ + Name: "no-sync", + Usage: "Only scaffold config, don't deploy to cluster", + }, + }, + Action: func(c *cli.Context) error { + return openclaw.Onboard(cfg, openclaw.OnboardOptions{ + ID: c.String("id"), + Force: c.Bool("force"), + Sync: !c.Bool("no-sync"), + Interactive: true, + }) + }, + }, + { + Name: "sync", + Usage: "Deploy or update an OpenClaw instance", + ArgsUsage: "", + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw sync happy-otter)") + } + return openclaw.Sync(cfg, c.Args().First()) + }, + }, + { + Name: "token", + Usage: "Retrieve gateway token for an OpenClaw instance", + ArgsUsage: "", + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw token happy-otter)") + } + return openclaw.Token(cfg, c.Args().First()) + }, + }, + { + Name: "list", + Usage: "List OpenClaw instances", + Action: func(c *cli.Context) error { + return openclaw.List(cfg) + }, + }, + { + Name: "delete", + Usage: "Remove an OpenClaw instance and its cluster resources", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "force", + Aliases: []string{"f"}, + Usage: "Skip confirmation prompt", + }, + }, + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw delete happy-otter)") + } + return openclaw.Delete(cfg, c.Args().First(), c.Bool("force")) + }, + }, + { + Name: "setup", + Usage: "Reconfigure model providers for a deployed instance", + ArgsUsage: "", + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw setup default)") + } + return openclaw.Setup(cfg, c.Args().First(), openclaw.SetupOptions{}) + }, + }, + { + Name: "dashboard", + Usage: "Open the OpenClaw dashboard in a browser", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.IntFlag{ + Name: "port", + Usage: "Local port for port-forward (0 = auto)", + Value: 0, + }, + &cli.BoolFlag{ + Name: "no-browser", + Usage: "Print URL without opening browser", + }, + }, + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw dashboard default)") + } + noBrowser := c.Bool("no-browser") + return openclaw.Dashboard(cfg, c.Args().First(), openclaw.DashboardOptions{ + Port: c.Int("port"), + NoBrowser: noBrowser, + }, func(url string) { + if !noBrowser { + openBrowser(url) + } + }) + }, + }, + { + Name: "skills", + Usage: "Manage OpenClaw skills", + Subcommands: []*cli.Command{ + { + Name: "sync", + Usage: "Package a local skills directory into a ConfigMap", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "from", + Usage: "Path to local skills directory", + Required: true, + }, + }, + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw skills sync happy-otter --from ./skills)") + } + return openclaw.SkillsSync(cfg, c.Args().First(), c.String("from")) + }, + }, + }, + }, + { + Name: "cli", + Usage: "Run openclaw CLI commands against a deployed instance", + ArgsUsage: " [-- ]", + SkipFlagParsing: true, + Action: func(c *cli.Context) error { + args := c.Args().Slice() + if len(args) == 0 { + return fmt.Errorf("instance ID required\n\nUsage:\n" + + " obol openclaw cli -- \n\n" + + "Examples:\n" + + " obol openclaw cli default -- gateway health\n" + + " obol openclaw cli default -- gateway call config.get\n" + + " obol openclaw cli default -- doctor") + } + + id := args[0] + // Everything after "--" is the openclaw command + var openclawArgs []string + for i, arg := range args[1:] { + if arg == "--" { + openclawArgs = args[i+2:] + break + } + } + if len(openclawArgs) == 0 && len(args) > 1 { + // No "--" separator found; treat remaining args as openclaw command + openclawArgs = args[1:] + } + + return openclaw.CLI(cfg, id, openclawArgs) + }, + }, + }, + } +} diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 4547c8f..8c8acf8 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -38,7 +38,8 @@ spec: --- # llms.py v3 configuration for Obol Stack: -# - Only enable the Ollama provider (host machine via ollama Service) +# - Ollama provider enabled by default (host machine via ollama Service) +# - Anthropic and OpenAI providers available (disabled by default; enabled via `obol llm configure`) # - Default model is glm-4.7-flash apiVersion: v1 kind: ConfigMap @@ -64,6 +65,12 @@ data: "providers": { "ollama": { "enabled": true + }, + "anthropic": { + "enabled": false + }, + "openai": { + "enabled": false } } } @@ -75,9 +82,34 @@ data: "api": "http://ollama.llm.svc.cluster.local:11434", "models": {}, "all_models": true + }, + "anthropic": { + "id": "anthropic", + "api_key": "$ANTHROPIC_API_KEY", + "models": {}, + "all_models": true + }, + "openai": { + "id": "openai", + "api_key": "$OPENAI_API_KEY", + "models": {}, + "all_models": true } } +--- +# Secret for cloud provider API keys. Empty by default; patched imperatively +# via `obol llm configure` or `obol openclaw setup`. +apiVersion: v1 +kind: Secret +metadata: + name: llms-secrets + namespace: llm +type: Opaque +stringData: + ANTHROPIC_API_KEY: "" + OPENAI_API_KEY: "" + --- apiVersion: apps/v1 kind: Deployment @@ -110,6 +142,7 @@ spec: mkdir -p /data cp /config/llms.json /data/llms.json cp /config/providers.json /data/providers.json + chmod 666 /data/llms.json /data/providers.json volumeMounts: - name: llmspy-config mountPath: /config @@ -118,9 +151,9 @@ spec: mountPath: /data containers: - name: llmspy - # Official LLMSpy container image (published by upstream). + # Obol fork of LLMSpy with smart routing extension. # Pin a specific version for reproducibility. - image: ghcr.io/servicestack/llms:latest + image: ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.2 imagePullPolicy: IfNotPresent ports: - name: http @@ -133,6 +166,10 @@ spec: - /home/llms/.llms/llms.json - --serve - "8000" + envFrom: + - secretRef: + name: llms-secrets + optional: true env: # Avoid surprises if the image changes its default HOME. - name: HOME diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index c1e9bb3..dc2bf22 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -43,30 +43,13 @@ releases: values: - ./values/monitoring.yaml.gotmpl - # Gateway API CRDs (applied from upstream release) - - name: gateway-api-crds - namespace: gateway-system - createNamespace: true - chart: bedag/raw - values: - - resources: [] - hooks: - - events: ["presync"] - showlogs: true - command: kubectl - args: - - apply - - -f - - https://github.com/kubernetes-sigs/gateway-api/releases/download/{{ .Values.gatewayApiVersion }}/standard-install.yaml - # Traefik ingress controller with Gateway API support + # Traefik v38+ bundles Gateway API CRDs in its crds/ directory - name: traefik namespace: traefik createNamespace: true chart: traefik/traefik version: 38.0.2 - needs: - - gateway-system/gateway-api-crds values: # Gateway API provider configuration - providers: diff --git a/internal/embed/networks/ethereum/templates/ingress.yaml b/internal/embed/networks/ethereum/templates/ingress.yaml index 76c745e..a8cda39 100644 --- a/internal/embed/networks/ethereum/templates/ingress.yaml +++ b/internal/embed/networks/ethereum/templates/ingress.yaml @@ -14,12 +14,9 @@ spec: - obol.stack rules: - matches: - - path: - type: Exact - value: /{{ .Release.Namespace }}/execution - path: type: PathPrefix - value: /{{ .Release.Namespace }}/execution/ + value: /{{ .Release.Namespace }}/execution filters: - type: URLRewrite urlRewrite: @@ -45,12 +42,9 @@ spec: - obol.stack rules: - matches: - - path: - type: Exact - value: /{{ .Release.Namespace }}/beacon - path: type: PathPrefix - value: /{{ .Release.Namespace }}/beacon/ + value: /{{ .Release.Namespace }}/beacon filters: - type: URLRewrite urlRewrite: diff --git a/internal/llm/llm.go b/internal/llm/llm.go new file mode 100644 index 0000000..2980069 --- /dev/null +++ b/internal/llm/llm.go @@ -0,0 +1,152 @@ +package llm + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +const ( + namespace = "llm" + secretName = "llms-secrets" + configMapName = "llmspy-config" + deployName = "llmspy" +) + +// providerEnvKeys maps provider names to their Secret key names. +var providerEnvKeys = map[string]string{ + "anthropic": "ANTHROPIC_API_KEY", + "openai": "OPENAI_API_KEY", +} + +// ConfigureLLMSpy enables a cloud provider in the llmspy gateway. +// It patches the llms-secrets Secret with the API key, enables the provider +// in the llmspy-config ConfigMap, and restarts the deployment. +func ConfigureLLMSpy(cfg *config.Config, provider, apiKey string) error { + envKey, ok := providerEnvKeys[provider] + if !ok { + return fmt.Errorf("unsupported llmspy provider: %s (supported: anthropic, openai)", provider) + } + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + // 1. Patch the Secret with the API key + fmt.Printf("Configuring llmspy: setting %s key...\n", provider) + patchJSON := fmt.Sprintf(`{"stringData":{"%s":"%s"}}`, envKey, apiKey) + if err := kubectl(kubectlBinary, kubeconfigPath, + "patch", "secret", secretName, "-n", namespace, + "-p", patchJSON, "--type=merge"); err != nil { + return fmt.Errorf("failed to patch llmspy secret: %w", err) + } + + // 2. Read current ConfigMap, enable the provider in llms.json + fmt.Printf("Enabling %s provider in llmspy config...\n", provider) + if err := enableProviderInConfigMap(kubectlBinary, kubeconfigPath, provider); err != nil { + return fmt.Errorf("failed to update llmspy config: %w", err) + } + + // 3. Restart the deployment so it picks up new Secret + ConfigMap + fmt.Printf("Restarting llmspy deployment...\n") + if err := kubectl(kubectlBinary, kubeconfigPath, + "rollout", "restart", fmt.Sprintf("deployment/%s", deployName), "-n", namespace); err != nil { + return fmt.Errorf("failed to restart llmspy: %w", err) + } + + // 4. Wait for rollout to complete + if err := kubectl(kubectlBinary, kubeconfigPath, + "rollout", "status", fmt.Sprintf("deployment/%s", deployName), "-n", namespace, + "--timeout=60s"); err != nil { + fmt.Printf("Warning: llmspy rollout not confirmed: %v\n", err) + fmt.Println("The deployment may still be rolling out.") + } else { + fmt.Printf("llmspy restarted with %s provider enabled.\n", provider) + } + + return nil +} + +// enableProviderInConfigMap reads the llmspy-config ConfigMap, parses llms.json, +// sets providers..enabled = true, and patches the ConfigMap back. +func enableProviderInConfigMap(kubectlBinary, kubeconfigPath, provider string) error { + // Read current llms.json from ConfigMap + var stdout bytes.Buffer + cmd := exec.Command(kubectlBinary, "get", "configmap", configMapName, + "-n", namespace, "-o", "jsonpath={.data.llms\\.json}") + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + cmd.Stdout = &stdout + var stderr bytes.Buffer + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to read ConfigMap: %w\n%s", err, stderr.String()) + } + + // Parse JSON + var llmsConfig map[string]interface{} + if err := json.Unmarshal(stdout.Bytes(), &llmsConfig); err != nil { + return fmt.Errorf("failed to parse llms.json: %w", err) + } + + // Set providers..enabled = true + providers, ok := llmsConfig["providers"].(map[string]interface{}) + if !ok { + providers = make(map[string]interface{}) + llmsConfig["providers"] = providers + } + + providerCfg, ok := providers[provider].(map[string]interface{}) + if !ok { + providerCfg = make(map[string]interface{}) + providers[provider] = providerCfg + } + providerCfg["enabled"] = true + + // Marshal back to JSON + updated, err := json.Marshal(llmsConfig) + if err != nil { + return fmt.Errorf("failed to marshal llms.json: %w", err) + } + + // Patch ConfigMap + // Use strategic merge patch with the new llms.json + patchData := map[string]interface{}{ + "data": map[string]string{ + "llms.json": string(updated), + }, + } + patchJSON, err := json.Marshal(patchData) + if err != nil { + return fmt.Errorf("failed to marshal patch: %w", err) + } + + return kubectl(kubectlBinary, kubeconfigPath, + "patch", "configmap", configMapName, "-n", namespace, + "-p", string(patchJSON), "--type=merge") +} + +// kubectl runs a kubectl command with the given kubeconfig and returns any error. +func kubectl(binary, kubeconfig string, args ...string) error { + cmd := exec.Command(binary, args...) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfig)) + var stderr bytes.Buffer + cmd.Stderr = &stderr + cmd.Stdout = os.Stdout + if err := cmd.Run(); err != nil { + errMsg := strings.TrimSpace(stderr.String()) + if errMsg != "" { + return fmt.Errorf("%w: %s", err, errMsg) + } + return err + } + return nil +} diff --git a/internal/openclaw/chart/Chart.yaml b/internal/openclaw/chart/Chart.yaml new file mode 100644 index 0000000..970d251 --- /dev/null +++ b/internal/openclaw/chart/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +name: openclaw +description: OpenClaw gateway deployment (agent runtime) for Kubernetes. +type: application +version: 0.1.0 +appVersion: "2026.2.9" +kubeVersion: ">=1.26.0-0" + +home: https://docs.openclaw.ai +sources: + - https://docs.openclaw.ai +maintainers: + - name: Obol Platform Team + email: platform@obol.tech +keywords: + - openclaw + - agent + - ai + - gateway + - obol diff --git a/internal/openclaw/chart/templates/NOTES.txt b/internal/openclaw/chart/templates/NOTES.txt new file mode 100644 index 0000000..b69ffbb --- /dev/null +++ b/internal/openclaw/chart/templates/NOTES.txt @@ -0,0 +1,33 @@ +OpenClaw is now installed. + +Namespace: {{ .Release.Namespace }} +Service: {{ include "openclaw.fullname" . }} +Port: {{ .Values.service.port }} + +Gateway token: + kubectl get secret -n {{ .Release.Namespace }} {{ include "openclaw.secretsName" . }} -o jsonpath='{.data.{{ .Values.secrets.gatewayToken.key }}}' | base64 --decode + +{{- if .Values.httpRoute.enabled }} + +HTTPRoute is enabled. Access OpenClaw at: +{{- range .Values.httpRoute.hostnames }} + http://{{ . }} +{{- end }} + +{{- else if .Values.ingress.enabled }} + +Ingress is enabled. Access OpenClaw at: +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} + +{{- else }} + +Port-forward for local access: + export POD_NAME=$(kubectl get pods -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "openclaw.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + kubectl -n {{ .Release.Namespace }} port-forward $POD_NAME 18789:{{ .Values.service.port }} + open http://127.0.0.1:18789 + +{{- end }} diff --git a/internal/openclaw/chart/templates/_helpers.tpl b/internal/openclaw/chart/templates/_helpers.tpl new file mode 100644 index 0000000..c68bc79 --- /dev/null +++ b/internal/openclaw/chart/templates/_helpers.tpl @@ -0,0 +1,228 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "openclaw.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "openclaw.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "openclaw.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels. +*/}} +{{- define "openclaw.labels" -}} +helm.sh/chart: {{ include "openclaw.chart" . }} +{{ include "openclaw.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels. +*/}} +{{- define "openclaw.selectorLabels" -}} +app.kubernetes.io/name: {{ include "openclaw.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use. +*/}} +{{- define "openclaw.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "openclaw.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Compute the full image reference. +*/}} +{{- define "openclaw.image" -}} +{{- $tag := .Values.image.tag -}} +{{- if not $tag -}} +{{- $tag = .Chart.AppVersion -}} +{{- end -}} +{{- printf "%s:%s" .Values.image.repository $tag -}} +{{- end }} + +{{/* +Name of the Secret used for envFrom. +*/}} +{{- define "openclaw.secretsName" -}} +{{- if .Values.secrets.existingSecret -}} +{{- .Values.secrets.existingSecret -}} +{{- else if .Values.secrets.name -}} +{{- .Values.secrets.name -}} +{{- else -}} +{{- printf "%s-secrets" (include "openclaw.fullname" .) -}} +{{- end -}} +{{- end }} + +{{/* +Name of the ConfigMap containing openclaw.json. +*/}} +{{- define "openclaw.configMapName" -}} +{{- if .Values.config.existingConfigMap -}} +{{- .Values.config.existingConfigMap -}} +{{- else -}} +{{- printf "%s-config" (include "openclaw.fullname" .) -}} +{{- end -}} +{{- end }} + +{{/* +Name of the PVC used for state storage. +*/}} +{{- define "openclaw.pvcName" -}} +{{- if .Values.persistence.existingClaim -}} +{{- .Values.persistence.existingClaim -}} +{{- else -}} +{{- printf "%s-data" (include "openclaw.fullname" .) -}} +{{- end -}} +{{- end }} + +{{/* +Compute (or reuse) the gateway token value. +*/}} +{{- define "openclaw.gatewayTokenValue" -}} +{{- if .Values.secrets.gatewayToken.value -}} +{{- .Values.secrets.gatewayToken.value -}} +{{- else -}} +{{- $secretName := include "openclaw.secretsName" . -}} +{{- $key := .Values.secrets.gatewayToken.key -}} +{{- $existing := (lookup "v1" "Secret" .Release.Namespace $secretName) -}} +{{- if $existing -}} + {{- $data := index $existing "data" -}} + {{- if and $data (hasKey $data $key) -}} + {{- index $data $key | b64dec -}} + {{- else -}} + {{- randAlphaNum 48 -}} + {{- end -}} +{{- else -}} + {{- randAlphaNum 48 -}} +{{- end -}} +{{- end -}} +{{- end }} + +{{/* +Render openclaw.json as strict JSON. If config.content is provided, it is used verbatim. +*/}} +{{- define "openclaw.configJson" -}} +{{- if .Values.config.content -}} +{{- .Values.config.content -}} +{{- else -}} +{{- $gatewayAuth := dict "mode" .Values.openclaw.gateway.auth.mode -}} +{{- if ne .Values.openclaw.gateway.auth.mode "none" -}} +{{- $_ := set $gatewayAuth "token" (printf "${%s}" .Values.secrets.gatewayToken.key) -}} +{{- end -}} + +{{- $gateway := dict + "mode" .Values.openclaw.gateway.mode + "bind" .Values.openclaw.gateway.bind + "port" .Values.service.port + "auth" $gatewayAuth + "http" (dict "endpoints" (dict "chatCompletions" (dict "enabled" .Values.openclaw.gateway.http.endpoints.chatCompletions.enabled))) +-}} + +{{- $agentDefaults := dict "workspace" .Values.openclaw.workspaceDir -}} +{{- if .Values.openclaw.agentModel -}} +{{- $_ := set $agentDefaults "model" (dict "primary" .Values.openclaw.agentModel) -}} +{{- end -}} + +{{- $cfg := dict + "gateway" $gateway + "agents" (dict "defaults" $agentDefaults) +-}} + +{{- if .Values.skills.enabled -}} +{{- $_ := set $cfg "skills" (dict "load" (dict + "extraDirs" (list .Values.skills.extractDir) +)) -}} +{{- end -}} + +{{- /* Build providers map from all enabled model providers */ -}} +{{- $providers := dict -}} +{{- range $name := list "anthropic" "openai" "ollama" -}} +{{- $p := index $.Values.models $name -}} +{{- if $p.enabled -}} +{{- $models := list -}} +{{- range $m := $p.models -}} +{{- $models = append $models (dict "id" $m.id "name" $m.name) -}} +{{- end -}} +{{- $entry := dict + "baseUrl" $p.baseUrl + "apiKey" (printf "${%s}" $p.apiKeyEnvVar) + "models" $models +-}} +{{- if $p.api -}} +{{- $_ := set $entry "api" $p.api -}} +{{- end -}} +{{- $_ := set $providers $name $entry -}} +{{- end -}} +{{- end -}} +{{- if $providers -}} +{{- $_ := set $cfg "models" (dict "providers" $providers) -}} +{{- end -}} + +{{- /* Build channels config from enabled integrations */ -}} +{{- $channels := dict -}} +{{- if .Values.channels.telegram.enabled -}} +{{- $tg := dict "botToken" (printf "${TELEGRAM_BOT_TOKEN}") -}} +{{- if .Values.channels.telegram.dmPolicy -}} +{{- $_ := set $tg "dmPolicy" .Values.channels.telegram.dmPolicy -}} +{{- end -}} +{{- $_ := set $channels "telegram" $tg -}} +{{- end -}} +{{- if .Values.channels.discord.enabled -}} +{{- $dc := dict "botToken" (printf "${DISCORD_BOT_TOKEN}") -}} +{{- if .Values.channels.discord.dmPolicy -}} +{{- $_ := set $dc "dmPolicy" .Values.channels.discord.dmPolicy -}} +{{- end -}} +{{- $_ := set $channels "discord" $dc -}} +{{- end -}} +{{- if .Values.channels.slack.enabled -}} +{{- $sl := dict "botToken" (printf "${SLACK_BOT_TOKEN}") "appToken" (printf "${SLACK_APP_TOKEN}") -}} +{{- $_ := set $channels "slack" $sl -}} +{{- end -}} +{{- if $channels -}} +{{- $_ := set $cfg "channels" $channels -}} +{{- end -}} + +{{- $cfg | toPrettyJson -}} +{{- end -}} +{{- end }} + +{{/* +Name of the skills ConfigMap (user-provided or chart-created default). +*/}} +{{- define "openclaw.skillsConfigMapName" -}} +{{- if .Values.skills.configMapName -}} +{{- .Values.skills.configMapName -}} +{{- else -}} +{{- printf "%s-skills" (include "openclaw.fullname" .) -}} +{{- end -}} +{{- end }} diff --git a/internal/openclaw/chart/templates/configmap.yaml b/internal/openclaw/chart/templates/configmap.yaml new file mode 100644 index 0000000..fafe456 --- /dev/null +++ b/internal/openclaw/chart/templates/configmap.yaml @@ -0,0 +1,11 @@ +{{- if not .Values.config.existingConfigMap -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "openclaw.configMapName" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +data: + {{ .Values.config.key }}: |- + {{- include "openclaw.configJson" . | nindent 4 }} +{{- end }} diff --git a/internal/openclaw/chart/templates/deployment.yaml b/internal/openclaw/chart/templates/deployment.yaml new file mode 100644 index 0000000..599c646 --- /dev/null +++ b/internal/openclaw/chart/templates/deployment.yaml @@ -0,0 +1,187 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + strategy: + type: Recreate + selector: + matchLabels: + {{- include "openclaw.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "openclaw.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "openclaw.serviceAccountName" . }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName | quote }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.skills.enabled }} + initContainers: + - name: extract-skills + image: "{{ .Values.skills.initContainer.image.repository }}:{{ .Values.skills.initContainer.image.tag }}" + imagePullPolicy: {{ .Values.skills.initContainer.image.pullPolicy }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + command: + - sh + - -c + - | + set -eu + mkdir -p {{ .Values.skills.extractDir | quote }} + if [ -f /skills/{{ .Values.skills.archiveKey }} ]; then + rm -rf {{ .Values.skills.extractDir | quote }}/* + tar -xzf /skills/{{ .Values.skills.archiveKey }} -C {{ .Values.skills.extractDir | quote }} + echo "Skills extracted successfully" + else + echo "No skills archive found, skipping extraction" + fi + volumeMounts: + - name: data + mountPath: {{ .Values.persistence.mountPath }} + - name: skills-archive + mountPath: /skills + readOnly: true + {{- end }} + containers: + - name: openclaw + image: "{{ include "openclaw.image" . }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.image.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.image.args }} + args: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + env: + - name: OPENCLAW_CONFIG_PATH + value: "/etc/openclaw/{{ .Values.config.key }}" + - name: OPENCLAW_STATE_DIR + value: {{ .Values.openclaw.stateDir | quote }} + - name: ERPC_URL + value: {{ .Values.erpc.url | quote }} + {{- /* Inject non-secret provider API key values (e.g. Ollama placeholder) */ -}} + {{- range $name := list "anthropic" "openai" "ollama" }} + {{- $p := index $.Values.models $name }} + {{- if and $p.enabled $p.apiKeyValue (not (has $name (list "anthropic" "openai"))) }} + - name: {{ $p.apiKeyEnvVar }} + value: {{ $p.apiKeyValue | quote }} + {{- end }} + {{- end }} + {{- with .Values.image.env }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + - secretRef: + name: {{ include "openclaw.secretsName" . }} + {{- range .Values.secrets.extraEnvFromSecrets }} + - secretRef: + name: {{ . | quote }} + {{- end }} + {{- if .Values.startupProbe.enabled }} + startupProbe: + tcpSocket: + port: http + periodSeconds: {{ .Values.startupProbe.periodSeconds }} + failureThreshold: {{ .Values.startupProbe.failureThreshold }} + timeoutSeconds: {{ .Values.startupProbe.timeoutSeconds }} + {{- end }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + tcpSocket: + port: http + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + {{- end }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + tcpSocket: + port: http + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: config + mountPath: /etc/openclaw + readOnly: true + - name: data + mountPath: {{ .Values.persistence.mountPath }} + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ include "openclaw.configMapName" . }} + - name: data + {{- if .Values.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ include "openclaw.pvcName" . }} + {{- else }} + emptyDir: {} + {{- end }} + {{- if .Values.skills.enabled }} + - name: skills-archive + configMap: + name: {{ include "openclaw.skillsConfigMapName" . }} + optional: true + {{- end }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/internal/openclaw/chart/templates/httproute.yaml b/internal/openclaw/chart/templates/httproute.yaml new file mode 100644 index 0000000..d7c6518 --- /dev/null +++ b/internal/openclaw/chart/templates/httproute.yaml @@ -0,0 +1,25 @@ +{{- if .Values.httpRoute.enabled -}} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} + {{- with .Values.httpRoute.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + parentRefs: + {{- toYaml .Values.httpRoute.parentRefs | nindent 4 }} + hostnames: + {{- toYaml .Values.httpRoute.hostnames | nindent 4 }} + rules: + - matches: + - path: + type: PathPrefix + value: {{ .Values.httpRoute.pathPrefix | quote }} + backendRefs: + - name: {{ include "openclaw.fullname" . }} + port: {{ .Values.service.port }} +{{- end }} diff --git a/internal/openclaw/chart/templates/ingress.yaml b/internal/openclaw/chart/templates/ingress.yaml new file mode 100644 index 0000000..cf55fb0 --- /dev/null +++ b/internal/openclaw/chart/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- with .pathType }} + pathType: {{ . }} + {{- end }} + backend: + service: + name: {{ include "openclaw.fullname" $ }} + port: + number: {{ $.Values.service.port }} + {{- end }} + {{- end }} +{{- end }} diff --git a/internal/openclaw/chart/templates/init-job.yaml b/internal/openclaw/chart/templates/init-job.yaml new file mode 100644 index 0000000..a58bbd7 --- /dev/null +++ b/internal/openclaw/chart/templates/init-job.yaml @@ -0,0 +1,64 @@ +{{- if .Values.initJob.enabled -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "openclaw.fullname" . }}-init + labels: + {{- include "openclaw.labels" . | nindent 4 }} + annotations: + helm.sh/hook: post-install + helm.sh/hook-weight: "0" + helm.sh/hook-delete-policy: before-hook-creation +spec: + backoffLimit: 3 + template: + metadata: + labels: + {{- include "openclaw.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: init + spec: + restartPolicy: OnFailure + serviceAccountName: {{ include "openclaw.serviceAccountName" . }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: init + {{- $tag := .Values.initJob.image.tag | default .Values.image.tag | default .Chart.AppVersion }} + image: "{{ .Values.initJob.image.repository }}:{{ $tag }}" + imagePullPolicy: {{ .Values.initJob.image.pullPolicy }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.initJob.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.initJob.args }} + args: + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: OPENCLAW_STATE_DIR + value: {{ .Values.openclaw.stateDir | quote }} + {{- with .Values.initJob.env }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.initJob.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: data + mountPath: {{ .Values.persistence.mountPath }} + volumes: + - name: data + {{- if .Values.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ include "openclaw.pvcName" . }} + {{- else }} + emptyDir: {} + {{- end }} +{{- end }} diff --git a/internal/openclaw/chart/templates/pvc.yaml b/internal/openclaw/chart/templates/pvc.yaml new file mode 100644 index 0000000..69bdda3 --- /dev/null +++ b/internal/openclaw/chart/templates/pvc.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "openclaw.pvcName" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} + annotations: + "helm.sh/resource-policy": keep +spec: + accessModes: + {{- toYaml .Values.persistence.accessModes | nindent 4 }} + {{- if .Values.persistence.storageClass }} + storageClassName: {{ .Values.persistence.storageClass }} + {{- end }} + resources: + requests: + storage: {{ .Values.persistence.size }} +{{- end }} diff --git a/internal/openclaw/chart/templates/role.yaml b/internal/openclaw/chart/templates/role.yaml new file mode 100644 index 0000000..e7d7a55 --- /dev/null +++ b/internal/openclaw/chart/templates/role.yaml @@ -0,0 +1,22 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +rules: + # Read-only access to common namespace resources + - apiGroups: [""] + resources: ["pods", "pods/log", "services", "configmaps", "events", "persistentvolumeclaims"] + verbs: ["get", "list", "watch"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets", "replicasets"] + verbs: ["get", "list", "watch"] + - apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["get", "list", "watch"] + {{- with .Values.rbac.extraRules }} + {{- toYaml . | nindent 2 }} + {{- end }} +{{- end }} diff --git a/internal/openclaw/chart/templates/rolebinding.yaml b/internal/openclaw/chart/templates/rolebinding.yaml new file mode 100644 index 0000000..06f5d48 --- /dev/null +++ b/internal/openclaw/chart/templates/rolebinding.yaml @@ -0,0 +1,16 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "openclaw.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ include "openclaw.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/internal/openclaw/chart/templates/secret.yaml b/internal/openclaw/chart/templates/secret.yaml new file mode 100644 index 0000000..61a8f89 --- /dev/null +++ b/internal/openclaw/chart/templates/secret.yaml @@ -0,0 +1,35 @@ +{{- if and .Values.secrets.create (not .Values.secrets.existingSecret) -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "openclaw.secretsName" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +type: Opaque +stringData: + {{ .Values.secrets.gatewayToken.key }}: {{ include "openclaw.gatewayTokenValue" . | quote }} + {{- if and .Values.models.anthropic.enabled .Values.models.anthropic.apiKeyValue }} + {{ .Values.models.anthropic.apiKeyEnvVar }}: {{ .Values.models.anthropic.apiKeyValue | quote }} + {{- end }} + {{- if and .Values.models.openai.enabled .Values.models.openai.apiKeyValue }} + {{ .Values.models.openai.apiKeyEnvVar }}: {{ .Values.models.openai.apiKeyValue | quote }} + {{- end }} + {{- if and .Values.channels.telegram.enabled .Values.channels.telegram.botToken }} + TELEGRAM_BOT_TOKEN: {{ .Values.channels.telegram.botToken | quote }} + {{- end }} + {{- if .Values.channels.telegram.dmPolicy }} + TELEGRAM_DM_POLICY: {{ .Values.channels.telegram.dmPolicy | quote }} + {{- end }} + {{- if and .Values.channels.discord.enabled .Values.channels.discord.botToken }} + DISCORD_BOT_TOKEN: {{ .Values.channels.discord.botToken | quote }} + {{- end }} + {{- if .Values.channels.discord.dmPolicy }} + DISCORD_DM_POLICY: {{ .Values.channels.discord.dmPolicy | quote }} + {{- end }} + {{- if and .Values.channels.slack.enabled .Values.channels.slack.botToken }} + SLACK_BOT_TOKEN: {{ .Values.channels.slack.botToken | quote }} + {{- end }} + {{- if and .Values.channels.slack.enabled .Values.channels.slack.appToken }} + SLACK_APP_TOKEN: {{ .Values.channels.slack.appToken | quote }} + {{- end }} +{{- end }} diff --git a/internal/openclaw/chart/templates/service.yaml b/internal/openclaw/chart/templates/service.yaml new file mode 100644 index 0000000..4fae8c4 --- /dev/null +++ b/internal/openclaw/chart/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "openclaw.selectorLabels" . | nindent 4 }} diff --git a/internal/openclaw/chart/templates/serviceaccount.yaml b/internal/openclaw/chart/templates/serviceaccount.yaml new file mode 100644 index 0000000..906e6e4 --- /dev/null +++ b/internal/openclaw/chart/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "openclaw.serviceAccountName" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/internal/openclaw/chart/templates/skills-configmap.yaml b/internal/openclaw/chart/templates/skills-configmap.yaml new file mode 100644 index 0000000..a184edd --- /dev/null +++ b/internal/openclaw/chart/templates/skills-configmap.yaml @@ -0,0 +1,11 @@ +{{- if and .Values.skills.enabled .Values.skills.createDefault (not .Values.skills.configMapName) -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "openclaw.fullname" . }}-skills + labels: + {{- include "openclaw.labels" . | nindent 4 }} + annotations: + helm.sh/resource-policy: keep +data: {} +{{- end }} diff --git a/internal/openclaw/chart/templates/tests/test-connection.yaml b/internal/openclaw/chart/templates/tests/test-connection.yaml new file mode 100644 index 0000000..b529313 --- /dev/null +++ b/internal/openclaw/chart/templates/tests/test-connection.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "openclaw.fullname" . }}-test-connection + labels: + {{- include "openclaw.labels" . | nindent 4 }} + app.kubernetes.io/component: test + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + containers: + - name: tcp-check + image: busybox:1.36.1 + command: + - sh + - -c + - | + echo "Testing TCP connection to {{ include "openclaw.fullname" . }}:{{ .Values.service.port }}..." + for i in $(seq 1 10); do + if nc -z {{ include "openclaw.fullname" . }} {{ .Values.service.port }} 2>/dev/null; then + echo "Connection successful!" + exit 0 + fi + echo "Attempt $i/10 failed, retrying in 3s..." + sleep 3 + done + echo "Connection failed after 10 attempts" + exit 1 diff --git a/internal/openclaw/chart/templates/validate.yaml b/internal/openclaw/chart/templates/validate.yaml new file mode 100644 index 0000000..12ec2c4 --- /dev/null +++ b/internal/openclaw/chart/templates/validate.yaml @@ -0,0 +1,19 @@ +{{- if ne (int .Values.replicaCount) 1 -}} +{{- fail "openclaw: replicaCount must be 1 (OpenClaw stores state on disk and should not be scaled horizontally)" -}} +{{- end -}} + +{{- if and .Values.secrets.existingSecret .Values.secrets.create -}} +{{- fail "openclaw: secrets.existingSecret is set; set secrets.create=false" -}} +{{- end -}} + +{{- if and (not .Values.secrets.existingSecret) (not .Values.secrets.create) -}} +{{- fail "openclaw: set secrets.existingSecret or enable secrets.create" -}} +{{- end -}} + +{{- if and .Values.httpRoute.enabled (eq (len .Values.httpRoute.hostnames) 0) -}} +{{- fail "openclaw: httpRoute.enabled is true but httpRoute.hostnames is empty" -}} +{{- end -}} + +{{- if and .Values.skills.enabled (eq .Values.skills.configMapName "") (not .Values.skills.createDefault) -}} +{{- fail "openclaw: skills.enabled is true but no skills.configMapName or skills.createDefault" -}} +{{- end -}} diff --git a/internal/openclaw/chart/values.schema.json b/internal/openclaw/chart/values.schema.json new file mode 100644 index 0000000..e52dc8a --- /dev/null +++ b/internal/openclaw/chart/values.schema.json @@ -0,0 +1,377 @@ +{ + "$schema": "https://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["image", "service", "secrets"], + "properties": { + "replicaCount": { + "type": "integer", + "minimum": 1, + "maximum": 1, + "description": "Must be 1 — OpenClaw uses SQLite and cannot scale horizontally" + }, + "image": { + "type": "object", + "required": ["repository"], + "properties": { + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + }, + "pullPolicy": { + "type": "string", + "enum": ["Always", "IfNotPresent", "Never"] + }, + "command": { + "type": "array", + "items": { "type": "string" } + }, + "args": { + "type": "array", + "items": { "type": "string" } + }, + "env": { + "type": "array" + } + } + }, + "imagePullSecrets": { + "type": "array" + }, + "nameOverride": { + "type": "string" + }, + "fullnameOverride": { + "type": "string" + }, + "serviceAccount": { + "type": "object", + "properties": { + "create": { "type": "boolean" }, + "automount": { "type": "boolean" }, + "annotations": { "type": "object" }, + "name": { "type": "string" } + } + }, + "rbac": { + "type": "object", + "properties": { + "create": { "type": "boolean" }, + "extraRules": { "type": "array" } + } + }, + "initJob": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "image": { + "type": "object", + "properties": { + "repository": { "type": "string" }, + "tag": { "type": "string" }, + "pullPolicy": { + "type": "string", + "enum": ["Always", "IfNotPresent", "Never"] + } + } + }, + "command": { + "type": "array", + "items": { "type": "string" } + }, + "args": { + "type": "array", + "items": { "type": "string" } + }, + "env": { "type": "array" }, + "resources": { "type": "object" } + } + }, + "podAnnotations": { "type": "object" }, + "podLabels": { "type": "object" }, + "podSecurityContext": { "type": "object" }, + "containerSecurityContext": { "type": "object" }, + "service": { + "type": "object", + "required": ["port"], + "properties": { + "type": { + "type": "string", + "enum": ["ClusterIP", "NodePort", "LoadBalancer"] + }, + "port": { + "type": "integer", + "minimum": 1, + "maximum": 65535 + } + } + }, + "ingress": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "className": { "type": "string" }, + "annotations": { "type": "object" }, + "hosts": { "type": "array" }, + "tls": { "type": "array" } + } + }, + "httpRoute": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "annotations": { "type": "object" }, + "hostnames": { + "type": "array", + "items": { "type": "string" } + }, + "parentRefs": { "type": "array" }, + "pathPrefix": { "type": "string" } + } + }, + "persistence": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "existingClaim": { "type": "string" }, + "storageClass": { "type": "string" }, + "accessModes": { + "type": "array", + "items": { "type": "string" } + }, + "size": { "type": "string" }, + "mountPath": { "type": "string" } + } + }, + "config": { + "type": "object", + "properties": { + "existingConfigMap": { "type": "string" }, + "key": { "type": "string" }, + "content": { "type": "string" } + } + }, + "openclaw": { + "type": "object", + "properties": { + "stateDir": { "type": "string" }, + "workspaceDir": { "type": "string" }, + "gateway": { + "type": "object", + "properties": { + "mode": { "type": "string" }, + "bind": { "type": "string" }, + "auth": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["token", "none"] + } + } + }, + "http": { "type": "object" } + } + } + } + }, + "models": { + "type": "object", + "properties": { + "anthropic": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "baseUrl": { "type": "string", "format": "uri" }, + "api": { "type": "string" }, + "apiKeyEnvVar": { "type": "string" }, + "apiKeyValue": { "type": "string" }, + "models": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "name"], + "properties": { + "id": { "type": "string" }, + "name": { "type": "string" } + } + } + } + } + }, + "openai": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "baseUrl": { "type": "string", "format": "uri" }, + "api": { "type": "string" }, + "apiKeyEnvVar": { "type": "string" }, + "apiKeyValue": { "type": "string" }, + "models": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "name"], + "properties": { + "id": { "type": "string" }, + "name": { "type": "string" } + } + } + } + } + }, + "ollama": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "baseUrl": { "type": "string" }, + "api": { "type": "string" }, + "apiKeyEnvVar": { "type": "string" }, + "apiKeyValue": { "type": "string" }, + "models": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "name"], + "properties": { + "id": { "type": "string" }, + "name": { "type": "string" } + } + } + } + } + } + } + }, + "channels": { + "type": "object", + "properties": { + "telegram": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "botToken": { "type": "string" }, + "dmPolicy": { + "type": "string", + "enum": ["", "open", "paired", "closed"] + } + } + }, + "discord": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "botToken": { "type": "string" }, + "dmPolicy": { + "type": "string", + "enum": ["", "open", "paired", "closed"] + } + } + }, + "slack": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "botToken": { "type": "string" }, + "appToken": { "type": "string" } + } + } + } + }, + "skills": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "createDefault": { "type": "boolean" }, + "configMapName": { "type": "string" }, + "archiveKey": { "type": "string" }, + "extractDir": { "type": "string" }, + "initContainer": { + "type": "object", + "properties": { + "image": { + "type": "object", + "properties": { + "repository": { "type": "string" }, + "tag": { "type": "string" }, + "pullPolicy": { + "type": "string", + "enum": ["Always", "IfNotPresent", "Never"] + } + } + } + } + } + } + }, + "erpc": { + "type": "object", + "properties": { + "url": { "type": "string" } + } + }, + "secrets": { + "type": "object", + "properties": { + "existingSecret": { "type": "string" }, + "create": { "type": "boolean" }, + "name": { "type": "string" }, + "gatewayToken": { + "type": "object", + "properties": { + "key": { "type": "string" }, + "value": { "type": "string" } + } + }, + "extraEnvFromSecrets": { + "type": "array", + "items": { "type": "string" } + } + } + }, + "resources": { + "type": "object", + "properties": { + "limits": { "type": "object" }, + "requests": { "type": "object" } + } + }, + "startupProbe": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "periodSeconds": { "type": "integer", "minimum": 1 }, + "failureThreshold": { "type": "integer", "minimum": 1 }, + "timeoutSeconds": { "type": "integer", "minimum": 1 } + } + }, + "livenessProbe": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "initialDelaySeconds": { "type": "integer", "minimum": 0 }, + "periodSeconds": { "type": "integer", "minimum": 1 }, + "timeoutSeconds": { "type": "integer", "minimum": 1 }, + "failureThreshold": { "type": "integer", "minimum": 1 } + } + }, + "readinessProbe": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "initialDelaySeconds": { "type": "integer", "minimum": 0 }, + "periodSeconds": { "type": "integer", "minimum": 1 }, + "timeoutSeconds": { "type": "integer", "minimum": 1 }, + "failureThreshold": { "type": "integer", "minimum": 1 } + } + }, + "extraVolumes": { "type": "array" }, + "extraVolumeMounts": { "type": "array" }, + "extraEnv": { "type": "array" }, + "nodeSelector": { "type": "object" }, + "tolerations": { "type": "array" }, + "affinity": { "type": "object" }, + "priorityClassName": { "type": "string" } + } +} diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml new file mode 100644 index 0000000..b88ff6f --- /dev/null +++ b/internal/openclaw/chart/values.yaml @@ -0,0 +1,301 @@ +# Default values for openclaw. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of replicas (OpenClaw should run as a single instance) +replicaCount: 1 + +# -- OpenClaw image repository, pull policy, and tag version +image: + repository: ghcr.io/obolnetwork/openclaw + pullPolicy: IfNotPresent + tag: "2026.2.9" + + # -- Override the container command (ENTRYPOINT) + command: + - node + # -- Override the container args (CMD) + args: + - openclaw.mjs + - gateway + - --allow-unconfigured + + # -- Additional environment variables for the container + env: [] + # - name: FOO + # value: bar + +# -- Credentials to fetch images from private registry +imagePullSecrets: [] + +# -- Override the chart name +nameOverride: "" +# -- Override the full resource name +fullnameOverride: "" + +# -- Create a ServiceAccount for OpenClaw +serviceAccount: + create: true + # -- Automatically mount a ServiceAccount's API credentials? + # Set to true when rbac.create is true so the agent can access the K8s API. + automount: false + annotations: {} + name: "" + +# -- RBAC for the ServiceAccount (read-only access to namespace resources) +rbac: + create: false + # -- Extra rules to append to the generated Role (list of PolicyRule objects) + extraRules: [] + +# -- One-shot init Job (runs once to bootstrap workspace/personality) +initJob: + enabled: false + image: + repository: ghcr.io/obolnetwork/openclaw + tag: "" + pullPolicy: IfNotPresent + command: + - node + - openclaw.mjs + - agent + - init + args: [] + # -- Extra environment variables for the init job + env: [] + # -- Resource requests/limits for the init job + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + memory: 512Mi + +# -- Pod annotations +podAnnotations: {} +# -- Pod labels +podLabels: {} + +# -- Pod security context +podSecurityContext: + fsGroup: 1000 + +# -- Container security context +containerSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +# -- Service configuration +service: + type: ClusterIP + port: 18789 + +# -- Kubernetes Ingress (optional; not used in Obol Stack which uses Gateway API) +ingress: + enabled: false + className: "" + annotations: {} + hosts: + - host: chart-example.local + paths: + - path: / + pathType: Prefix + tls: [] + +# -- Gateway API HTTPRoute (recommended for Obol Stack / Traefik Gateway API) +httpRoute: + enabled: false + annotations: {} + # -- Hostnames for routing (required when enabled) + hostnames: [] + # - openclaw-myid.obol.stack + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + pathPrefix: / + +# -- Persistence settings for OpenClaw state directory (contains runtime state + secrets) +persistence: + enabled: true + existingClaim: "" + storageClass: "" + accessModes: + - ReadWriteOnce + size: 1Gi + mountPath: /data + +# -- Configuration for the OpenClaw config file (openclaw.json) +config: + # -- Use an existing ConfigMap instead of creating one + existingConfigMap: "" + # -- ConfigMap key / filename + key: openclaw.json + # -- Optional raw JSON5 configuration (overrides generated config when set) + content: "" + +# -- OpenClaw state/workspace settings (paths should be inside persistence.mountPath) +openclaw: + stateDir: /data/.openclaw + workspaceDir: /data/.openclaw/workspace + # -- Default agent model (e.g. "anthropic/claude-sonnet-4-5-20250929"). Empty = use provider default. + agentModel: "" + + gateway: + mode: local + bind: lan + auth: + mode: token + + http: + endpoints: + chatCompletions: + enabled: true + +# -- Model provider configuration +# Each provider is independently toggled. At least one must be enabled. +# API keys are stored in the chart Secret and injected as env vars. +models: + anthropic: + enabled: false + baseUrl: https://api.anthropic.com/v1 + api: "" + apiKeyEnvVar: ANTHROPIC_API_KEY + # -- API key value (stored in Secret). Leave empty to provide via extraEnvFromSecrets. + apiKeyValue: "" + models: + - id: claude-sonnet-4-5-20250929 + name: Claude Sonnet 4.5 + openai: + enabled: false + baseUrl: https://api.openai.com/v1 + api: "" + apiKeyEnvVar: OPENAI_API_KEY + apiKeyValue: "" + models: + - id: gpt-4o + name: GPT-4o + ollama: + enabled: true + # -- OpenAI-compatible base URL for Ollama (routed through llmspy global proxy) + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + # -- OpenClaw provider API type. Set to "openai-completions" because llmspy exposes an OpenAI-compatible chat/completions endpoint. + api: "openai-completions" + # -- Env var used for provider API key interpolation in openclaw.json + apiKeyEnvVar: OLLAMA_API_KEY + # -- Value set for the apiKey env var (not a secret for Ollama) + apiKeyValue: ollama-local + models: + - id: glm-4.7-flash + name: glm-4.7-flash + +# -- Chat channel integrations +# Tokens are stored in the chart Secret and injected as env vars. +channels: + telegram: + enabled: false + # -- Telegram Bot API token (from @BotFather) + botToken: "" + # -- DM policy: "open" | "paired" | "closed" + dmPolicy: "" + discord: + enabled: false + # -- Discord bot token + botToken: "" + # -- DM policy: "open" | "paired" | "closed" + dmPolicy: "" + slack: + enabled: false + # -- Slack Bot User OAuth Token (xoxb-...) + botToken: "" + # -- Slack App-Level Token (xapp-...) + appToken: "" + +# -- Skills injection from a ConfigMap archive (created by an external tool; e.g. `obol openclaw skills sync`) +skills: + enabled: false + # -- Create a default empty skills ConfigMap when configMapName is not set. + # This allows the chart to deploy without requiring an external ConfigMap. + # Use `obol openclaw skills sync` to populate it later. + createDefault: true + # -- Name of the ConfigMap containing the skills archive (overrides createDefault) + configMapName: "" + archiveKey: skills.tgz + extractDir: /data/.openclaw/skills-injected + initContainer: + image: + repository: busybox + tag: 1.36.1 + pullPolicy: IfNotPresent + +# -- eRPC integration (exposed as ERPC_URL env var) +erpc: + url: http://erpc.erpc.svc.cluster.local:4000/rpc + +# -- OpenClaw secrets (one Secret per instance) +secrets: + # -- Use an existing secret instead of creating one + existingSecret: "" + # -- Create the secret when existingSecret is not set + create: true + # -- Override the created Secret name (defaults to -openclaw-secrets) + name: "" + + gatewayToken: + # -- Secret key name + env var name for gateway token + key: OPENCLAW_GATEWAY_TOKEN + # -- Explicit token value (discouraged). If empty, a token is generated and persisted across upgrades. + value: "" + + # -- Extra Secret names to load via envFrom (for provider/channel keys, etc.) + extraEnvFromSecrets: [] + +# -- Resource requests and limits +resources: + requests: + cpu: 250m + memory: 512Mi + limits: + memory: 2Gi + +# -- Startup probe (tcpSocket; allows generous boot time before liveness kicks in) +startupProbe: + enabled: true + periodSeconds: 5 + failureThreshold: 30 + timeoutSeconds: 3 + +# -- Liveness probe (tcpSocket by default to avoid auth-protected HTTP endpoints) +livenessProbe: + enabled: true + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + +# -- Readiness probe (tcpSocket by default to avoid auth-protected HTTP endpoints) +readinessProbe: + enabled: true + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + +# -- Additional volumes +extraVolumes: [] +# -- Additional volume mounts +extraVolumeMounts: [] +# -- Additional environment variables +extraEnv: [] + +nodeSelector: {} +tolerations: [] +affinity: {} +priorityClassName: "" diff --git a/internal/openclaw/import.go b/internal/openclaw/import.go new file mode 100644 index 0000000..52ca5dd --- /dev/null +++ b/internal/openclaw/import.go @@ -0,0 +1,349 @@ +package openclaw + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" +) + +// ImportResult holds the parsed configuration from ~/.openclaw/openclaw.json +type ImportResult struct { + Providers []ImportedProvider + AgentModel string + Channels ImportedChannels + WorkspaceDir string // path to ~/.openclaw/workspace/ if it exists and contains marker files +} + +// ImportedProvider represents a model provider extracted from openclaw.json +type ImportedProvider struct { + Name string + BaseURL string + API string + APIKey string // literal only; empty if env-var reference + APIKeyEnvVar string // env var name for apiKey interpolation (e.g. OLLAMA_API_KEY) + Models []ImportedModel + Disabled bool // when true, emit only enabled: false (used to override chart defaults) +} + +// ImportedModel represents a model entry +type ImportedModel struct { + ID string + Name string +} + +// ImportedChannels holds detected channel configurations +type ImportedChannels struct { + Telegram *ImportedTelegram + Discord *ImportedDiscord + Slack *ImportedSlack +} + +// ImportedTelegram holds Telegram bot config +type ImportedTelegram struct { + BotToken string +} + +// ImportedDiscord holds Discord bot config +type ImportedDiscord struct { + BotToken string +} + +// ImportedSlack holds Slack bot config +type ImportedSlack struct { + BotToken string + AppToken string +} + +// openclawConfig mirrors the relevant parts of ~/.openclaw/openclaw.json +type openclawConfig struct { + Models struct { + Providers map[string]openclawProvider `json:"providers"` + } `json:"models"` + Agents struct { + Defaults struct { + Model struct { + Primary string `json:"primary"` + } `json:"model"` + Workspace string `json:"workspace"` + } `json:"defaults"` + } `json:"agents"` + Channels struct { + Telegram *struct { + BotToken string `json:"botToken"` + } `json:"telegram"` + Discord *struct { + BotToken string `json:"botToken"` + } `json:"discord"` + Slack *struct { + BotToken string `json:"botToken"` + AppToken string `json:"appToken"` + } `json:"slack"` + } `json:"channels"` +} + +type openclawProvider struct { + BaseURL string `json:"baseUrl"` + API string `json:"api"` + APIKey string `json:"apiKey"` + Models []openclawModel `json:"models"` +} + +type openclawModel struct { + ID string `json:"id"` + Name string `json:"name"` +} + +// DetectExistingConfig checks for ~/.openclaw/openclaw.json and parses it. +// Returns nil (not an error) if the file does not exist. +func DetectExistingConfig() (*ImportResult, error) { + home, err := os.UserHomeDir() + if err != nil { + return nil, nil + } + + configPath := filepath.Join(home, ".openclaw", "openclaw.json") + data, err := os.ReadFile(configPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("failed to read %s: %w", configPath, err) + } + + var cfg openclawConfig + if err := json.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("failed to parse %s: %w", configPath, err) + } + + result := &ImportResult{ + AgentModel: cfg.Agents.Defaults.Model.Primary, + } + + // Detect workspace directory + result.WorkspaceDir = detectWorkspace(home, cfg.Agents.Defaults.Workspace) + + for name, p := range cfg.Models.Providers { + ip := ImportedProvider{ + Name: name, + BaseURL: p.BaseURL, + API: sanitizeModelAPI(p.API), + } + // Only import literal API keys, skip env-var references like ${...} + if p.APIKey != "" && !isEnvVarRef(p.APIKey) { + ip.APIKey = p.APIKey + } + for _, m := range p.Models { + ip.Models = append(ip.Models, ImportedModel{ID: m.ID, Name: m.Name}) + } + result.Providers = append(result.Providers, ip) + } + + if cfg.Channels.Telegram != nil && cfg.Channels.Telegram.BotToken != "" && !isEnvVarRef(cfg.Channels.Telegram.BotToken) { + result.Channels.Telegram = &ImportedTelegram{BotToken: cfg.Channels.Telegram.BotToken} + } + if cfg.Channels.Discord != nil && cfg.Channels.Discord.BotToken != "" && !isEnvVarRef(cfg.Channels.Discord.BotToken) { + result.Channels.Discord = &ImportedDiscord{BotToken: cfg.Channels.Discord.BotToken} + } + if cfg.Channels.Slack != nil { + botToken := cfg.Channels.Slack.BotToken + appToken := cfg.Channels.Slack.AppToken + if botToken != "" && !isEnvVarRef(botToken) { + result.Channels.Slack = &ImportedSlack{ + BotToken: botToken, + } + if appToken != "" && !isEnvVarRef(appToken) { + result.Channels.Slack.AppToken = appToken + } + } + } + + return result, nil +} + +// TranslateToOverlayYAML maps imported config fields to chart values YAML fragment. +// The returned string is appended to the base overlay. +func TranslateToOverlayYAML(result *ImportResult) string { + if result == nil { + return "" + } + + var b strings.Builder + + if result.AgentModel != "" { + b.WriteString(fmt.Sprintf("openclaw:\n agentModel: %s\n\n", result.AgentModel)) + } + + if len(result.Providers) > 0 { + b.WriteString("models:\n") + for _, p := range result.Providers { + b.WriteString(fmt.Sprintf(" %s:\n", p.Name)) + if p.Disabled { + b.WriteString(" enabled: false\n") + continue + } + b.WriteString(" enabled: true\n") + if p.BaseURL != "" { + b.WriteString(fmt.Sprintf(" baseUrl: %s\n", p.BaseURL)) + } + // Always emit api to override any stale base chart value. + // Empty string makes the Helm template omit it from JSON, + // letting OpenClaw auto-detect the protocol. + if p.API != "" { + b.WriteString(fmt.Sprintf(" api: %s\n", p.API)) + } else { + b.WriteString(" api: \"\"\n") + } + if p.APIKeyEnvVar != "" { + b.WriteString(fmt.Sprintf(" apiKeyEnvVar: %s\n", p.APIKeyEnvVar)) + } + if p.APIKey != "" { + b.WriteString(fmt.Sprintf(" apiKeyValue: %s\n", p.APIKey)) + } + if len(p.Models) > 0 { + b.WriteString(" models:\n") + for _, m := range p.Models { + b.WriteString(fmt.Sprintf(" - id: %s\n", m.ID)) + if m.Name != "" { + b.WriteString(fmt.Sprintf(" name: %s\n", m.Name)) + } + } + } + } + b.WriteString("\n") + } + + // Channels + hasChannels := result.Channels.Telegram != nil || result.Channels.Discord != nil || result.Channels.Slack != nil + if hasChannels { + b.WriteString("channels:\n") + if result.Channels.Telegram != nil { + b.WriteString(" telegram:\n") + b.WriteString(" enabled: true\n") + b.WriteString(fmt.Sprintf(" botToken: %s\n", result.Channels.Telegram.BotToken)) + } + if result.Channels.Discord != nil { + b.WriteString(" discord:\n") + b.WriteString(" enabled: true\n") + b.WriteString(fmt.Sprintf(" botToken: %s\n", result.Channels.Discord.BotToken)) + } + if result.Channels.Slack != nil { + b.WriteString(" slack:\n") + b.WriteString(" enabled: true\n") + b.WriteString(fmt.Sprintf(" botToken: %s\n", result.Channels.Slack.BotToken)) + if result.Channels.Slack.AppToken != "" { + b.WriteString(fmt.Sprintf(" appToken: %s\n", result.Channels.Slack.AppToken)) + } + } + b.WriteString("\n") + } + + return b.String() +} + +// PrintImportSummary prints a human-readable summary of detected config +func PrintImportSummary(result *ImportResult) { + if result == nil { + return + } + + fmt.Println("Detected existing OpenClaw installation (~/.openclaw/):") + if len(result.Providers) > 0 { + fmt.Printf(" Providers: ") + names := make([]string, 0, len(result.Providers)) + for _, p := range result.Providers { + names = append(names, p.Name) + } + fmt.Println(strings.Join(names, ", ")) + } + if result.AgentModel != "" { + fmt.Printf(" Agent model: %s\n", result.AgentModel) + } + if result.Channels.Telegram != nil { + fmt.Println(" Telegram: configured") + } + if result.Channels.Discord != nil { + fmt.Println(" Discord: configured") + } + if result.Channels.Slack != nil { + fmt.Println(" Slack: configured") + } + if result.WorkspaceDir != "" { + files := detectWorkspaceFiles(result.WorkspaceDir) + fmt.Printf(" Workspace: %s (%s)\n", result.WorkspaceDir, strings.Join(files, ", ")) + } +} + +// workspaceMarkers are files that indicate a valid OpenClaw workspace +var workspaceMarkers = []string{"SOUL.md", "AGENTS.md", "IDENTITY.md"} + +// detectWorkspace checks for an OpenClaw workspace directory and returns +// its path if it exists and contains at least one marker file. +func detectWorkspace(home, configWorkspace string) string { + // Use custom workspace path from config if set + wsDir := configWorkspace + if wsDir == "" { + wsDir = filepath.Join(home, ".openclaw", "workspace") + } + + info, err := os.Stat(wsDir) + if err != nil || !info.IsDir() { + return "" + } + + // Verify at least one marker file exists + for _, marker := range workspaceMarkers { + if _, err := os.Stat(filepath.Join(wsDir, marker)); err == nil { + return wsDir + } + } + + return "" +} + +// detectWorkspaceFiles returns the names of workspace files that exist +func detectWorkspaceFiles(wsDir string) []string { + candidates := []string{ + "SOUL.md", "AGENTS.md", "IDENTITY.md", "USER.md", + "TOOLS.md", "MEMORY.md", + } + var found []string + for _, name := range candidates { + if _, err := os.Stat(filepath.Join(wsDir, name)); err == nil { + found = append(found, name) + } + } + // Check for memory/ directory + if info, err := os.Stat(filepath.Join(wsDir, "memory")); err == nil && info.IsDir() { + found = append(found, "memory/") + } + return found +} + +// validModelAPIs is the set of values accepted by OpenClaw's ModelApiSchema (Zod enum). +// Any other value will be rejected at startup. When the api field is omitted, +// OpenClaw auto-detects the protocol from the provider name / baseUrl. +var validModelAPIs = map[string]bool{ + "openai-completions": true, + "openai-responses": true, + "anthropic-messages": true, + "google-generative-ai": true, + "github-copilot": true, + "bedrock-converse-stream": true, +} + +// sanitizeModelAPI returns api unchanged if it is a valid OpenClaw ModelApi enum +// value, or "" (omit) if it is unrecognised. This prevents invalid values +// imported from ~/.openclaw/openclaw.json from crashing the gateway. +func sanitizeModelAPI(api string) string { + if validModelAPIs[api] { + return api + } + return "" +} + +// isEnvVarRef returns true if the value looks like an environment variable reference (${...}) +func isEnvVarRef(s string) bool { + return strings.Contains(s, "${") +} diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go new file mode 100644 index 0000000..a00f10a --- /dev/null +++ b/internal/openclaw/openclaw.go @@ -0,0 +1,1246 @@ +package openclaw + +import ( + "bufio" + "bytes" + "context" + "embed" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "io/fs" + "net/http" + "net/url" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/llm" + "github.com/dustinkirkland/golang-petname" +) + +// CloudProviderInfo holds the cloud provider selection from interactive setup. +// This is used to configure llmspy with the API key separately from the +// OpenClaw overlay (which routes through llmspy). +type CloudProviderInfo struct { + Name string // "anthropic" or "openai" + APIKey string + ModelID string // e.g. "claude-sonnet-4-5-20250929" + Display string // e.g. "Claude Sonnet 4.5" +} + +const ( + appName = "openclaw" + defaultDomain = "obol.stack" +) + +// Embed the OpenClaw Helm chart from the shared charts directory. +// The chart source lives in internal/embed/charts/openclaw/ and is +// referenced here so the openclaw package owns its own chart lifecycle. +// +//go:embed all:chart +var chartFS embed.FS + +// OnboardOptions contains options for the onboard command +type OnboardOptions struct { + ID string // Deployment ID (empty = generate petname) + Force bool // Overwrite existing deployment + Sync bool // Also run helmfile sync after install + Interactive bool // true = prompt for provider choice; false = silent defaults + IsDefault bool // true = use fixed ID "default", idempotent on re-run +} + +// SetupDefault deploys a default OpenClaw instance as part of stack setup. +// It is idempotent: if a "default" deployment already exists, it re-syncs. +// When Ollama is not detected on the host and no existing ~/.openclaw config +// is found, it skips provider setup gracefully so the user can configure +// later with `obol openclaw setup`. +func SetupDefault(cfg *config.Config) error { + // Check whether the default deployment already exists (re-sync path). + // If it does, proceed unconditionally — the overlay was already written. + deploymentDir := deploymentPath(cfg, "default") + if _, err := os.Stat(deploymentDir); err == nil { + // Existing deployment — always re-sync regardless of Ollama status. + return Onboard(cfg, OnboardOptions{ + ID: "default", + Sync: true, + IsDefault: true, + }) + } + + // Check if there is an existing ~/.openclaw config with providers + imported, _ := DetectExistingConfig() + hasImportedProviders := imported != nil && len(imported.Providers) > 0 + + // If no imported providers, check Ollama availability for the default overlay + if !hasImportedProviders { + ollamaAvailable := detectOllama() + if ollamaAvailable { + fmt.Printf(" ✓ Ollama detected at %s\n", ollamaEndpoint()) + } else { + fmt.Printf(" ⚠ Ollama not detected on host (%s)\n", ollamaEndpoint()) + fmt.Println(" Skipping default OpenClaw provider setup.") + fmt.Println(" Run 'obol openclaw setup default' to configure a provider later.") + return nil + } + } + + return Onboard(cfg, OnboardOptions{ + ID: "default", + Sync: true, + IsDefault: true, + }) +} + +// Onboard creates and optionally deploys an OpenClaw instance +func Onboard(cfg *config.Config, opts OnboardOptions) error { + id := opts.ID + if opts.IsDefault { + id = "default" + } + if id == "" { + id = petname.Generate(2, "-") + fmt.Printf("Generated deployment ID: %s\n", id) + } else { + fmt.Printf("Using deployment ID: %s\n", id) + } + + deploymentDir := deploymentPath(cfg, id) + + // Idempotent re-run for default deployment: just re-sync + if opts.IsDefault && !opts.Force { + if _, err := os.Stat(deploymentDir); err == nil { + fmt.Println("Default OpenClaw instance already configured, re-syncing...") + if opts.Sync { + if err := doSync(cfg, id); err != nil { + return err + } + // Import workspace on re-sync too + imported, _ := DetectExistingConfig() + if imported != nil && imported.WorkspaceDir != "" { + copyWorkspaceToPod(cfg, id, imported.WorkspaceDir) + } + return nil + } + return nil + } + } + + if _, err := os.Stat(deploymentDir); err == nil { + if !opts.Force && !opts.IsDefault { + return fmt.Errorf("deployment already exists: %s/%s\n"+ + "Directory: %s\n"+ + "Use --force or -f to overwrite", appName, id, deploymentDir) + } + fmt.Printf("WARNING: Overwriting existing deployment at %s\n", deploymentDir) + } + + // Detect existing ~/.openclaw config + imported, err := DetectExistingConfig() + if err != nil { + fmt.Printf("Warning: failed to read existing config: %v\n", err) + } + if imported != nil { + PrintImportSummary(imported) + } + + // Interactive setup: auto-skip prompts when existing config has providers + if opts.Interactive { + if imported != nil && len(imported.Providers) > 0 { + fmt.Println("\nUsing detected configuration from ~/.openclaw/") + } else { + var cloudProvider *CloudProviderInfo + imported, cloudProvider, err = interactiveSetup(imported) + if err != nil { + return fmt.Errorf("interactive setup failed: %w", err) + } + // Push cloud API key to llmspy if a cloud provider was selected + if cloudProvider != nil { + if llmErr := llm.ConfigureLLMSpy(cfg, cloudProvider.Name, cloudProvider.APIKey); llmErr != nil { + fmt.Printf("Warning: failed to configure llmspy: %v\n", llmErr) + fmt.Println("You can configure it later with: obol llm configure") + } + } + } + } + + if err := os.MkdirAll(deploymentDir, 0755); err != nil { + return fmt.Errorf("failed to create deployment directory: %w", err) + } + + // Copy embedded chart to deployment/chart/ + chartDir := filepath.Join(deploymentDir, "chart") + if err := copyEmbeddedChart(chartDir); err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to copy chart: %w", err) + } + + // Write values.yaml from the embedded chart defaults + defaultValues, err := chartFS.ReadFile("chart/values.yaml") + if err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to read chart defaults: %w", err) + } + if err := os.WriteFile(filepath.Join(deploymentDir, "values.yaml"), defaultValues, 0644); err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to write values.yaml: %w", err) + } + + // Write Obol Stack overlay values (httpRoute, provider config, eRPC, skills) + hostname := fmt.Sprintf("openclaw-%s.%s", id, defaultDomain) + namespace := fmt.Sprintf("%s-%s", appName, id) + overlay := generateOverlayValues(hostname, imported) + if err := os.WriteFile(filepath.Join(deploymentDir, "values-obol.yaml"), []byte(overlay), 0644); err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to write overlay values: %w", err) + } + + // Generate helmfile.yaml referencing local chart + helmfileContent := generateHelmfile(id, namespace) + if err := os.WriteFile(filepath.Join(deploymentDir, "helmfile.yaml"), []byte(helmfileContent), 0644); err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to write helmfile.yaml: %w", err) + } + + fmt.Printf("\n✓ OpenClaw instance configured!\n") + fmt.Printf(" Deployment: %s/%s\n", appName, id) + fmt.Printf(" Namespace: %s\n", namespace) + fmt.Printf(" Hostname: %s\n", hostname) + fmt.Printf(" Location: %s\n", deploymentDir) + fmt.Printf("\nFiles created:\n") + fmt.Printf(" - chart/ Embedded OpenClaw Helm chart\n") + fmt.Printf(" - values.yaml Chart defaults (edit to customize)\n") + fmt.Printf(" - values-obol.yaml Obol Stack defaults (httpRoute, providers, eRPC)\n") + fmt.Printf(" - helmfile.yaml Deployment configuration\n") + + if opts.Sync { + fmt.Printf("\nDeploying to cluster...\n\n") + if err := doSync(cfg, id); err != nil { + return err + } + // Copy workspace files into the pod after sync succeeds + if imported != nil && imported.WorkspaceDir != "" { + copyWorkspaceToPod(cfg, id, imported.WorkspaceDir) + } + return nil + } + + fmt.Printf("\nTo deploy: obol openclaw sync %s\n", id) + return nil +} + +// Sync deploys or updates an OpenClaw instance +func Sync(cfg *config.Config, id string) error { + return doSync(cfg, id) +} + +func doSync(cfg *config.Config, id string) error { + deploymentDir := deploymentPath(cfg, id) + if _, err := os.Stat(deploymentDir); os.IsNotExist(err) { + return fmt.Errorf("deployment not found: %s/%s\nDirectory: %s", appName, id, deploymentDir) + } + + helmfilePath := filepath.Join(deploymentDir, "helmfile.yaml") + if _, err := os.Stat(helmfilePath); os.IsNotExist(err) { + return fmt.Errorf("helmfile.yaml not found in: %s", deploymentDir) + } + + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + helmfileBinary := filepath.Join(cfg.BinDir, "helmfile") + if _, err := os.Stat(helmfileBinary); os.IsNotExist(err) { + return fmt.Errorf("helmfile not found at %s", helmfileBinary) + } + + fmt.Printf("Syncing OpenClaw: %s/%s\n", appName, id) + fmt.Printf("Deployment directory: %s\n", deploymentDir) + fmt.Printf("Running helmfile sync...\n\n") + + cmd := exec.Command(helmfileBinary, "-f", helmfilePath, "sync") + cmd.Dir = deploymentDir + cmd.Env = append(os.Environ(), + fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath), + ) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("helmfile sync failed: %w", err) + } + + namespace := fmt.Sprintf("%s-%s", appName, id) + hostname := fmt.Sprintf("openclaw-%s.%s", id, defaultDomain) + fmt.Printf("\n✓ OpenClaw synced successfully!\n") + fmt.Printf(" Namespace: %s\n", namespace) + fmt.Printf(" URL: http://%s\n", hostname) + fmt.Printf("\nRetrieve gateway token:\n") + fmt.Printf(" obol openclaw token %s\n", id) + fmt.Printf("\nPort-forward fallback:\n") + fmt.Printf(" obol kubectl -n %s port-forward svc/openclaw 18789:18789\n", namespace) + + return nil +} + +// copyWorkspaceToPod copies the local workspace directory into the OpenClaw pod's PVC. +// This is non-fatal: failures print a warning and continue. +func copyWorkspaceToPod(cfg *config.Config, id, workspaceDir string) { + namespace := fmt.Sprintf("%s-%s", appName, id) + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + fmt.Printf("\nImporting workspace from %s...\n", workspaceDir) + + // Wait for pod to be ready + podName, err := waitForPod(kubectlBinary, kubeconfigPath, namespace, 60) + if err != nil { + fmt.Printf("Warning: could not find ready pod, skipping workspace import: %v\n", err) + return + } + + // kubectl cp /. :/data/.openclaw/workspace/ -n + dest := fmt.Sprintf("%s:/data/.openclaw/workspace/", podName) + src := workspaceDir + "/." + cmd := exec.Command(kubectlBinary, "cp", src, dest, "-n", namespace) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + fmt.Printf("Warning: workspace copy failed: %v\n%s", err, stderr.String()) + return + } + + fmt.Printf("Imported workspace into pod %s\n", podName) +} + +// waitForPod polls for a Running pod matching the openclaw label and returns its name. +// Returns an error if no ready pod is found within timeoutSec seconds. +func waitForPod(kubectlBinary, kubeconfigPath, namespace string, timeoutSec int) (string, error) { + labelSelector := fmt.Sprintf("app.kubernetes.io/name=%s", appName) + + for i := 0; i < timeoutSec; i += 3 { + cmd := exec.Command(kubectlBinary, "get", "pods", + "-n", namespace, + "-l", labelSelector, + "-o", "jsonpath={.items[?(@.status.phase=='Running')].metadata.name}", + ) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + var stdout bytes.Buffer + cmd.Stdout = &stdout + cmd.Run() + + podName := strings.TrimSpace(stdout.String()) + if podName != "" { + // If multiple pods, take the first + if idx := strings.Index(podName, " "); idx > 0 { + podName = podName[:idx] + } + return podName, nil + } + + time.Sleep(3 * time.Second) + } + + return "", fmt.Errorf("timed out waiting for pod in namespace %s", namespace) +} + +// getToken retrieves the gateway token for an OpenClaw instance as a string. +func getToken(cfg *config.Config, id string) (string, error) { + namespace := fmt.Sprintf("%s-%s", appName, id) + + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return "", fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + cmd := exec.Command(kubectlBinary, "get", "secret", "-n", namespace, + "-l", fmt.Sprintf("app.kubernetes.io/name=%s", appName), + "-o", "json") + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("failed to get secret: %w\n%s", err, stderr.String()) + } + + var secretList struct { + Items []struct { + Data map[string]string `json:"data"` + } `json:"items"` + } + if err := json.Unmarshal(stdout.Bytes(), &secretList); err != nil { + return "", fmt.Errorf("failed to parse secret: %w", err) + } + + if len(secretList.Items) == 0 { + return "", fmt.Errorf("no secrets found in namespace %s. Is OpenClaw deployed?", namespace) + } + + for _, item := range secretList.Items { + if encoded, ok := item.Data["OPENCLAW_GATEWAY_TOKEN"]; ok { + decoded, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + return "", fmt.Errorf("failed to decode token: %w", err) + } + return string(decoded), nil + } + } + + return "", fmt.Errorf("OPENCLAW_GATEWAY_TOKEN not found in namespace %s secrets", namespace) +} + +// Token retrieves the gateway token for an OpenClaw instance and prints it. +func Token(cfg *config.Config, id string) error { + token, err := getToken(cfg, id) + if err != nil { + return err + } + fmt.Printf("%s\n", token) + return nil +} + +// findOpenClawBinary locates the openclaw CLI binary. +// Search order: PATH, then cfg.BinDir. +func findOpenClawBinary(cfg *config.Config) (string, error) { + if p, err := exec.LookPath("openclaw"); err == nil { + return p, nil + } + candidate := filepath.Join(cfg.BinDir, "openclaw") + if _, err := os.Stat(candidate); err == nil { + return candidate, nil + } + return "", fmt.Errorf("openclaw CLI not found.\n\nInstall with one of:\n obolup.sh (re-run bootstrap installer)\n curl -fsSL https://openclaw.ai/install.sh | bash\n npm install -g openclaw (requires Node.js 22+)") +} + +// portForwarder manages a background kubectl port-forward process. +type portForwarder struct { + cmd *exec.Cmd + localPort int + done chan error + cancel context.CancelFunc +} + +// startPortForward launches kubectl port-forward in the background and waits +// until it reports the forwarding address on stdout. +func startPortForward(cfg *config.Config, namespace string, localPort int) (*portForwarder, error) { + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return nil, fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + portArg := fmt.Sprintf("%d:18789", localPort) + if localPort == 0 { + portArg = ":18789" + } + + ctx, cancel := context.WithCancel(context.Background()) + cmd := exec.CommandContext(ctx, kubectlBinary, "port-forward", + fmt.Sprintf("svc/%s", appName), portArg, "-n", namespace) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + + // kubectl prints "Forwarding from ..." to stdout (not stderr) + stdoutPipe, err := cmd.StdoutPipe() + if err != nil { + cancel() + return nil, fmt.Errorf("failed to create stdout pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + cancel() + return nil, fmt.Errorf("failed to start port-forward: %w", err) + } + + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + + // Parse the "Forwarding from 127.0.0.1:" line from stdout + parsedPort := make(chan int, 1) + parseErr := make(chan error, 1) + go func() { + scanner := bufio.NewScanner(stdoutPipe) + for scanner.Scan() { + line := scanner.Text() + // kubectl prints: "Forwarding from 127.0.0.1: -> 18789" + if strings.Contains(line, "Forwarding from") { + parts := strings.Split(line, ":") + if len(parts) >= 2 { + portPart := strings.Fields(parts[len(parts)-1])[0] + var p int + if _, err := fmt.Sscanf(portPart, "%d", &p); err == nil { + parsedPort <- p + // Continue draining to prevent pipe blocking + io.Copy(io.Discard, stdoutPipe) + return + } + } + } + } + parseErr <- fmt.Errorf("port-forward exited without reporting a local port") + }() + + select { + case p := <-parsedPort: + return &portForwarder{cmd: cmd, localPort: p, done: done, cancel: cancel}, nil + case err := <-parseErr: + cancel() + return nil, err + case err := <-done: + cancel() + if err != nil { + return nil, fmt.Errorf("port-forward process exited unexpectedly: %w", err) + } + return nil, fmt.Errorf("port-forward process exited unexpectedly") + case <-time.After(30 * time.Second): + cancel() + return nil, fmt.Errorf("timed out waiting for port-forward to become ready") + } +} + +// Stop terminates the port-forward process gracefully. +func (pf *portForwarder) Stop() { + pf.cancel() + select { + case <-pf.done: + case <-time.After(5 * time.Second): + if pf.cmd.Process != nil { + pf.cmd.Process.Kill() + } + } +} + +// SetupOptions contains options for the setup command. +type SetupOptions struct { + Port int // kept for backward compat; currently unused +} + +// Setup reconfigures model providers for a deployed OpenClaw instance. +// It runs the interactive provider prompt, regenerates the overlay values, +// and syncs via helmfile so the pod picks up the new configuration. +func Setup(cfg *config.Config, id string, _ SetupOptions) error { + deploymentDir := deploymentPath(cfg, id) + if _, err := os.Stat(deploymentDir); os.IsNotExist(err) { + return fmt.Errorf("deployment not found: %s/%s\nRun 'obol openclaw up' first", appName, id) + } + + // Always show the provider prompt — that's the whole point of setup. + imported, cloudProvider, err := interactiveSetup(nil) + if err != nil { + return fmt.Errorf("setup failed: %w", err) + } + + // Push cloud API key to llmspy if a cloud provider was selected + if cloudProvider != nil { + if llmErr := llm.ConfigureLLMSpy(cfg, cloudProvider.Name, cloudProvider.APIKey); llmErr != nil { + fmt.Printf("Warning: failed to configure llmspy: %v\n", llmErr) + fmt.Println("You can configure it later with: obol llm configure") + } + } + + // Re-copy the embedded chart so the deployment dir picks up any chart fixes + // (e.g. corrected default values, template changes) from the current binary. + chartDir := filepath.Join(deploymentDir, "chart") + if err := copyEmbeddedChart(chartDir); err != nil { + return fmt.Errorf("failed to update chart: %w", err) + } + + // Write updated base values.yaml from the embedded chart defaults + defaultValues, err := chartFS.ReadFile("chart/values.yaml") + if err != nil { + return fmt.Errorf("failed to read chart defaults: %w", err) + } + if err := os.WriteFile(filepath.Join(deploymentDir, "values.yaml"), defaultValues, 0644); err != nil { + return fmt.Errorf("failed to write values.yaml: %w", err) + } + + // Regenerate overlay values with the selected provider + hostname := fmt.Sprintf("openclaw-%s.%s", id, defaultDomain) + overlay := generateOverlayValues(hostname, imported) + overlayPath := filepath.Join(deploymentDir, "values-obol.yaml") + if err := os.WriteFile(overlayPath, []byte(overlay), 0644); err != nil { + return fmt.Errorf("failed to write overlay values: %w", err) + } + + fmt.Printf("\nApplying configuration...\n\n") + if err := doSync(cfg, id); err != nil { + return err + } + + namespace := fmt.Sprintf("%s-%s", appName, id) + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + fmt.Printf("\nWaiting for pod to be ready...\n") + if _, err := waitForPod(kubectlBinary, kubeconfigPath, namespace, 90); err != nil { + fmt.Printf("Warning: pod not ready yet: %v\n", err) + fmt.Println("The deployment may still be rolling out. Check with: obol kubectl get pods -n", namespace) + } else { + fmt.Printf("\n✓ Setup complete!\n") + fmt.Printf(" Open dashboard: obol openclaw dashboard %s\n", id) + } + return nil +} + +// DashboardOptions contains options for the dashboard command. +type DashboardOptions struct { + Port int + NoBrowser bool +} + +// Dashboard port-forwards to the OpenClaw instance and opens the web dashboard. +// The onReady callback is invoked with the dashboard URL; the CLI layer uses it +// to open a browser. +func Dashboard(cfg *config.Config, id string, opts DashboardOptions, onReady func(url string)) error { + deploymentDir := deploymentPath(cfg, id) + if _, err := os.Stat(deploymentDir); os.IsNotExist(err) { + return fmt.Errorf("deployment not found: %s/%s\nRun 'obol openclaw up' first", appName, id) + } + + token, err := getToken(cfg, id) + if err != nil { + return err + } + + namespace := fmt.Sprintf("%s-%s", appName, id) + fmt.Printf("Starting port-forward to %s...\n", namespace) + + pf, err := startPortForward(cfg, namespace, opts.Port) + if err != nil { + return fmt.Errorf("port-forward failed: %w", err) + } + defer pf.Stop() + + dashboardURL := fmt.Sprintf("http://localhost:%d/#token=%s", pf.localPort, token) + fmt.Printf("Port-forward active: localhost:%d -> %s:18789\n", pf.localPort, namespace) + fmt.Printf("\nDashboard URL: %s\n", dashboardURL) + fmt.Printf("Gateway token: %s\n", token) + fmt.Printf("\nPress Ctrl+C to stop.\n") + + if onReady != nil { + onReady(dashboardURL) + } + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(sigCh) + + select { + case <-sigCh: + fmt.Printf("\nShutting down...\n") + case err := <-pf.done: + if err != nil { + return fmt.Errorf("port-forward died unexpectedly: %w", err) + } + } + + return nil +} + +// List displays installed OpenClaw instances +func List(cfg *config.Config) error { + appsDir := filepath.Join(cfg.ConfigDir, "applications", appName) + + if _, err := os.Stat(appsDir); os.IsNotExist(err) { + fmt.Println("No OpenClaw instances installed") + fmt.Println("\nTo create one: obol openclaw up") + return nil + } + + entries, err := os.ReadDir(appsDir) + if err != nil { + return fmt.Errorf("failed to read directory: %w", err) + } + + if len(entries) == 0 { + fmt.Println("No OpenClaw instances installed") + return nil + } + + fmt.Println("OpenClaw instances:") + fmt.Println() + + count := 0 + for _, entry := range entries { + if !entry.IsDir() { + continue + } + id := entry.Name() + namespace := fmt.Sprintf("%s-%s", appName, id) + hostname := fmt.Sprintf("openclaw-%s.%s", id, defaultDomain) + fmt.Printf(" %s\n", id) + fmt.Printf(" Namespace: %s\n", namespace) + fmt.Printf(" URL: http://%s\n", hostname) + fmt.Println() + count++ + } + + fmt.Printf("Total: %d instance(s)\n", count) + return nil +} + +// Delete removes an OpenClaw instance +func Delete(cfg *config.Config, id string, force bool) error { + namespace := fmt.Sprintf("%s-%s", appName, id) + deploymentDir := deploymentPath(cfg, id) + + fmt.Printf("Deleting OpenClaw: %s/%s\n", appName, id) + fmt.Printf("Namespace: %s\n", namespace) + + configExists := false + if _, err := os.Stat(deploymentDir); err == nil { + configExists = true + } + + namespaceExists := false + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); err == nil { + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + cmd := exec.Command(kubectlBinary, "get", "namespace", namespace) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + if err := cmd.Run(); err == nil { + namespaceExists = true + } + } + + if !namespaceExists && !configExists { + return fmt.Errorf("instance not found: %s", id) + } + + fmt.Println("\nResources to be deleted:") + if namespaceExists { + fmt.Printf(" [x] Kubernetes namespace: %s\n", namespace) + } else { + fmt.Printf(" [ ] Kubernetes namespace: %s (not found)\n", namespace) + } + if configExists { + fmt.Printf(" [x] Configuration: %s\n", deploymentDir) + } + + if !force { + fmt.Print("\nProceed with deletion? [y/N]: ") + var response string + fmt.Scanln(&response) + if strings.ToLower(response) != "y" && strings.ToLower(response) != "yes" { + fmt.Println("Deletion cancelled") + return nil + } + } + + if namespaceExists { + fmt.Printf("\nDeleting namespace %s...\n", namespace) + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + cmd := exec.Command(kubectlBinary, "delete", "namespace", namespace, + "--force", "--grace-period=0") + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to delete namespace: %w", err) + } + fmt.Println("Namespace deleted") + } + + if configExists { + fmt.Printf("Deleting configuration...\n") + if err := os.RemoveAll(deploymentDir); err != nil { + return fmt.Errorf("failed to delete config directory: %w", err) + } + fmt.Println("Configuration deleted") + + parentDir := filepath.Join(cfg.ConfigDir, "applications", appName) + entries, err := os.ReadDir(parentDir) + if err == nil && len(entries) == 0 { + os.Remove(parentDir) + } + } + + fmt.Printf("\n✓ OpenClaw %s deleted successfully!\n", id) + return nil +} + +// SkillsSync packages a local skills directory into a ConfigMap and rolls the deployment +func SkillsSync(cfg *config.Config, id, skillsDir string) error { + namespace := fmt.Sprintf("%s-%s", appName, id) + + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + if _, err := os.Stat(skillsDir); os.IsNotExist(err) { + return fmt.Errorf("skills directory not found: %s", skillsDir) + } + + configMapName := fmt.Sprintf("openclaw-%s-skills", id) + archiveKey := "skills.tgz" + + fmt.Printf("Packaging skills from %s...\n", skillsDir) + + var archiveBuf bytes.Buffer + tarCmd := exec.Command("tar", "-czf", "-", "-C", skillsDir, ".") + tarCmd.Stdout = &archiveBuf + var tarStderr bytes.Buffer + tarCmd.Stderr = &tarStderr + if err := tarCmd.Run(); err != nil { + return fmt.Errorf("failed to create skills archive: %w\n%s", err, tarStderr.String()) + } + + tmpFile, err := os.CreateTemp("", "openclaw-skills-*.tgz") + if err != nil { + return fmt.Errorf("failed to create temp file: %w", err) + } + defer os.Remove(tmpFile.Name()) + + if _, err := tmpFile.Write(archiveBuf.Bytes()); err != nil { + tmpFile.Close() + return fmt.Errorf("failed to write archive: %w", err) + } + tmpFile.Close() + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + delCmd := exec.Command(kubectlBinary, "delete", "configmap", configMapName, + "-n", namespace, "--ignore-not-found") + delCmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + delCmd.Run() + + fmt.Printf("Creating ConfigMap %s in namespace %s...\n", configMapName, namespace) + createCmd := exec.Command(kubectlBinary, "create", "configmap", configMapName, + "-n", namespace, + fmt.Sprintf("--from-file=%s=%s", archiveKey, tmpFile.Name())) + createCmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + var createStderr bytes.Buffer + createCmd.Stderr = &createStderr + if err := createCmd.Run(); err != nil { + return fmt.Errorf("failed to create ConfigMap: %w\n%s", err, createStderr.String()) + } + + fmt.Printf("✓ Skills ConfigMap updated: %s\n", configMapName) + fmt.Printf("\nTo apply, re-sync: obol openclaw sync %s\n", id) + return nil +} + +// remoteCapableCommands lists openclaw subcommands that support --url and --token flags. +var remoteCapableCommands = map[string]bool{ + "gateway": true, + "acp": true, + "browser": true, + "logs": true, +} + +// CLI runs an openclaw CLI command against a deployed instance. +// Commands that support --url/--token are executed locally with a port-forward; +// others are executed via kubectl exec into the pod. +func CLI(cfg *config.Config, id string, args []string) error { + deploymentDir := deploymentPath(cfg, id) + if _, err := os.Stat(deploymentDir); os.IsNotExist(err) { + return fmt.Errorf("deployment not found: %s/%s\nRun 'obol openclaw up' first", appName, id) + } + + namespace := fmt.Sprintf("%s-%s", appName, id) + + if len(args) == 0 { + return fmt.Errorf("no openclaw command specified\n\nExamples:\n" + + " obol openclaw cli %s -- gateway health\n" + + " obol openclaw cli %s -- gateway call config.get\n" + + " obol openclaw cli %s -- doctor", id, id, id) + } + + // Determine if the command supports --url/--token (remote-capable) + firstArg := args[0] + if remoteCapableCommands[firstArg] { + return cliViaPortForward(cfg, id, namespace, args) + } + return cliViaKubectlExec(cfg, namespace, args) +} + +// cliViaPortForward runs an openclaw command locally with port-forward + --url/--token. +func cliViaPortForward(cfg *config.Config, id, namespace string, args []string) error { + openclawBinary, err := findOpenClawBinary(cfg) + if err != nil { + return err + } + + token, err := getToken(cfg, id) + if err != nil { + return fmt.Errorf("failed to get gateway token: %w", err) + } + + pf, err := startPortForward(cfg, namespace, 0) + if err != nil { + return fmt.Errorf("port-forward failed: %w", err) + } + defer pf.Stop() + + // Append --url and --token to the args + wsURL := fmt.Sprintf("ws://localhost:%d", pf.localPort) + fullArgs := append(args, "--url", wsURL, "--token", token) + + cmd := exec.Command(openclawBinary, fullArgs...) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + // Handle signals to clean up port-forward + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(sigCh) + + go func() { + <-sigCh + pf.Stop() + }() + + if err := cmd.Run(); err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { + os.Exit(status.ExitStatus()) + } + } + return err + } + return nil +} + +// cliViaKubectlExec runs an openclaw command inside the pod via kubectl exec. +func cliViaKubectlExec(cfg *config.Config, namespace string, args []string) error { + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + // Build: kubectl exec -it -n deploy/openclaw -- node openclaw.mjs + // The pod runs `node openclaw.mjs` (no standalone binary in PATH). + execArgs := []string{ + "exec", "-it", + "-n", namespace, + "deploy/openclaw", + "--", + "node", "openclaw.mjs", + } + execArgs = append(execArgs, args...) + + cmd := exec.Command(kubectlBinary, execArgs...) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { + os.Exit(status.ExitStatus()) + } + } + return err + } + return nil +} + +// deploymentPath returns the path to a deployment directory +func deploymentPath(cfg *config.Config, id string) string { + return filepath.Join(cfg.ConfigDir, "applications", appName, id) +} + +// copyEmbeddedChart extracts the embedded chart FS to destDir +func copyEmbeddedChart(destDir string) error { + return fs.WalkDir(chartFS, "chart", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if path == "chart" { + return nil + } + + relPath := strings.TrimPrefix(path, "chart/") + destPath := filepath.Join(destDir, relPath) + + if d.IsDir() { + return os.MkdirAll(destPath, 0755) + } + + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return err + } + + data, err := chartFS.ReadFile(path) + if err != nil { + return fmt.Errorf("failed to read embedded %s: %w", path, err) + } + return os.WriteFile(destPath, data, 0644) + }) +} + +// generateOverlayValues creates the Obol Stack-specific values overlay. +// If imported is non-nil, provider/channel config from the import is used +// instead of the default Ollama configuration. +func generateOverlayValues(hostname string, imported *ImportResult) string { + var b strings.Builder + + b.WriteString(`# Obol Stack overlay values for OpenClaw +# This file contains stack-specific defaults. Edit to customize. + +# Enable Gateway API HTTPRoute for stack routing +httpRoute: + enabled: true + hostnames: +`) + b.WriteString(fmt.Sprintf(" - %s\n", hostname)) + b.WriteString(` parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + +# SA needs API token mount for K8s read access +serviceAccount: + automount: true + +# Read-only RBAC for K8s API (pods, services, deployments, etc.) +rbac: + create: true + +`) + + // Provider and agent model configuration + importedOverlay := TranslateToOverlayYAML(imported) + if importedOverlay != "" { + b.WriteString("# Imported from ~/.openclaw/openclaw.json\n") + b.WriteString(importedOverlay) + } else { + b.WriteString(`# Route agent traffic to in-cluster Ollama via llmspy proxy +openclaw: + agentModel: ollama/glm-4.7-flash + +# Default model provider: in-cluster Ollama (routed through llmspy) +models: + ollama: + enabled: true + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + apiKeyEnvVar: OLLAMA_API_KEY + apiKeyValue: ollama-local + models: + - id: glm-4.7-flash + name: GLM-4.7 Flash + +`) + } + + b.WriteString(`# eRPC integration +erpc: + url: http://erpc.erpc.svc.cluster.local:4000/rpc + +# Skills: chart creates a default empty ConfigMap; populate with obol openclaw skills sync +skills: + enabled: true + createDefault: true + +# Agent init Job (enable to bootstrap workspace on first deploy) +initJob: + enabled: false +`) + + return b.String() +} + +// ollamaEndpoint returns the base URL where host Ollama should be reachable. +// It respects the OLLAMA_HOST environment variable, falling back to http://localhost:11434. +func ollamaEndpoint() string { + if host := os.Getenv("OLLAMA_HOST"); host != "" { + // OLLAMA_HOST may be just "host:port" or a full URL. + if !strings.HasPrefix(host, "http://") && !strings.HasPrefix(host, "https://") { + host = "http://" + host + } + return strings.TrimRight(host, "/") + } + return "http://localhost:11434" +} + +// detectOllama checks whether Ollama is reachable on the host machine by +// hitting the /api/tags endpoint with a short timeout. Returns true if the +// server responds with HTTP 200. +func detectOllama() bool { + endpoint := ollamaEndpoint() + tagsURL, err := url.JoinPath(endpoint, "api", "tags") + if err != nil { + return false + } + + client := &http.Client{Timeout: 2 * time.Second} + resp, err := client.Get(tagsURL) + if err != nil { + return false + } + resp.Body.Close() + return resp.StatusCode == http.StatusOK +} + +// interactiveSetup prompts the user for provider configuration. +// If imported is non-nil, offers to use the detected config. +// Returns the ImportResult for overlay generation, and optionally a CloudProviderInfo +// when a cloud provider was selected (so the caller can configure llmspy). +func interactiveSetup(imported *ImportResult) (*ImportResult, *CloudProviderInfo, error) { + reader := bufio.NewReader(os.Stdin) + + if imported != nil { + fmt.Print("\nUse detected configuration? [Y/n]: ") + line, _ := reader.ReadString('\n') + line = strings.TrimSpace(strings.ToLower(line)) + if line == "" || line == "y" || line == "yes" { + fmt.Println("Using detected configuration.") + return imported, nil, nil + } + } + + // Detect Ollama on the host to decide whether to offer it as an option + ollamaAvailable := detectOllama() + if ollamaAvailable { + fmt.Printf(" ✓ Ollama detected at %s\n", ollamaEndpoint()) + } else { + fmt.Printf(" ⚠ Ollama not detected on host (%s)\n", ollamaEndpoint()) + } + + if ollamaAvailable { + fmt.Println("\nSelect a model provider:") + fmt.Println(" [1] Ollama (default, runs in-cluster)") + fmt.Println(" [2] OpenAI") + fmt.Println(" [3] Anthropic") + fmt.Print("\nChoice [1]: ") + + line, _ := reader.ReadString('\n') + choice := strings.TrimSpace(line) + if choice == "" { + choice = "1" + } + + switch choice { + case "1": + fmt.Println("Using Ollama (in-cluster) as default provider.") + return nil, nil, nil + case "2": + cloud, err := promptForCloudProvider(reader, "openai", "OpenAI", "gpt-4o", "GPT-4o") + if err != nil { + return nil, nil, err + } + result := buildLLMSpyRoutedOverlay(cloud) + return result, cloud, nil + case "3": + cloud, err := promptForCloudProvider(reader, "anthropic", "Anthropic", "claude-sonnet-4-5-20250929", "Claude Sonnet 4.5") + if err != nil { + return nil, nil, err + } + result := buildLLMSpyRoutedOverlay(cloud) + return result, cloud, nil + default: + fmt.Printf("Unknown choice '%s', using Ollama defaults.\n", choice) + return nil, nil, nil + } + } + + // Ollama not available — only offer cloud providers + fmt.Println("\nSelect a model provider:") + fmt.Println(" [1] OpenAI") + fmt.Println(" [2] Anthropic") + fmt.Print("\nChoice [1]: ") + + line, _ := reader.ReadString('\n') + choice := strings.TrimSpace(line) + if choice == "" { + choice = "1" + } + + switch choice { + case "1": + cloud, err := promptForCloudProvider(reader, "openai", "OpenAI", "gpt-4o", "GPT-4o") + if err != nil { + return nil, nil, err + } + result := buildLLMSpyRoutedOverlay(cloud) + return result, cloud, nil + case "2": + cloud, err := promptForCloudProvider(reader, "anthropic", "Anthropic", "claude-sonnet-4-5-20250929", "Claude Sonnet 4.5") + if err != nil { + return nil, nil, err + } + result := buildLLMSpyRoutedOverlay(cloud) + return result, cloud, nil + default: + return nil, nil, fmt.Errorf("unknown choice '%s'; please select a valid provider", choice) + } +} + +// promptForCloudProvider asks for an API key and returns cloud provider info. +// The actual overlay (ImportResult) is built separately via buildLLMSpyRoutedOverlay. +func promptForCloudProvider(reader *bufio.Reader, name, display, modelID, modelName string) (*CloudProviderInfo, error) { + fmt.Printf("\n%s API key: ", display) + apiKey, _ := reader.ReadString('\n') + apiKey = strings.TrimSpace(apiKey) + if apiKey == "" { + return nil, fmt.Errorf("%s API key is required", display) + } + + return &CloudProviderInfo{ + Name: name, + APIKey: apiKey, + ModelID: modelID, + Display: modelName, + }, nil +} + +// buildLLMSpyRoutedOverlay creates an ImportResult that routes a cloud model +// through the llmspy proxy. OpenClaw sees a single "ollama" provider pointing +// at llmspy, with the cloud model in its model list. The actual cloud providers +// are disabled in OpenClaw — llmspy handles the routing. +func buildLLMSpyRoutedOverlay(cloud *CloudProviderInfo) *ImportResult { + return &ImportResult{ + AgentModel: cloud.ModelID, + Providers: []ImportedProvider{ + { + Name: "ollama", + BaseURL: "http://llmspy.llm.svc.cluster.local:8000/v1", + API: "openai-completions", + APIKeyEnvVar: "OLLAMA_API_KEY", + APIKey: "ollama-local", + Models: []ImportedModel{ + {ID: cloud.ModelID, Name: cloud.Display}, + }, + }, + {Name: "anthropic", Disabled: true}, + {Name: "openai", Disabled: true}, + }, + } +} + +// generateHelmfile creates a helmfile.yaml referencing the local chart +func generateHelmfile(id, namespace string) string { + return fmt.Sprintf(`# OpenClaw instance: %s +# Managed by obol openclaw + +releases: + - name: openclaw + namespace: %s + createNamespace: true + chart: ./chart + values: + - values.yaml + - values-obol.yaml +`, id, namespace) +} diff --git a/internal/openclaw/overlay_test.go b/internal/openclaw/overlay_test.go new file mode 100644 index 0000000..9e82f56 --- /dev/null +++ b/internal/openclaw/overlay_test.go @@ -0,0 +1,154 @@ +package openclaw + +import ( + "strings" + "testing" +) + +func TestBuildLLMSpyRoutedOverlay_Anthropic(t *testing.T) { + cloud := &CloudProviderInfo{ + Name: "anthropic", + APIKey: "sk-ant-test", + ModelID: "claude-sonnet-4-5-20250929", + Display: "Claude Sonnet 4.5", + } + + result := buildLLMSpyRoutedOverlay(cloud) + + // Check agent model uses bare model ID (no provider/ prefix) + if result.AgentModel != "claude-sonnet-4-5-20250929" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "claude-sonnet-4-5-20250929") + } + + // Check 3 providers: ollama (enabled), anthropic (disabled), openai (disabled) + if len(result.Providers) != 3 { + t.Fatalf("len(Providers) = %d, want 3", len(result.Providers)) + } + + ollama := result.Providers[0] + if ollama.Name != "ollama" || ollama.Disabled { + t.Errorf("ollama: name=%q disabled=%v, want ollama/false", ollama.Name, ollama.Disabled) + } + if ollama.BaseURL != "http://llmspy.llm.svc.cluster.local:8000/v1" { + t.Errorf("ollama.BaseURL = %q", ollama.BaseURL) + } + if ollama.APIKeyEnvVar != "OLLAMA_API_KEY" { + t.Errorf("ollama.APIKeyEnvVar = %q, want OLLAMA_API_KEY", ollama.APIKeyEnvVar) + } + if ollama.APIKey != "ollama-local" { + t.Errorf("ollama.APIKey = %q, want ollama-local", ollama.APIKey) + } + if ollama.API != "openai-completions" { + t.Errorf("ollama.API = %q, want openai-completions", ollama.API) + } + if len(ollama.Models) != 1 || ollama.Models[0].ID != "claude-sonnet-4-5-20250929" { + t.Errorf("ollama.Models = %v", ollama.Models) + } + + // anthropic and openai should be disabled + if !result.Providers[1].Disabled || result.Providers[1].Name != "anthropic" { + t.Errorf("anthropic: disabled=%v name=%q", result.Providers[1].Disabled, result.Providers[1].Name) + } + if !result.Providers[2].Disabled || result.Providers[2].Name != "openai" { + t.Errorf("openai: disabled=%v name=%q", result.Providers[2].Disabled, result.Providers[2].Name) + } +} + +func TestBuildLLMSpyRoutedOverlay_OpenAI(t *testing.T) { + cloud := &CloudProviderInfo{ + Name: "openai", + APIKey: "sk-open-test", + ModelID: "gpt-4o", + Display: "GPT-4o", + } + + result := buildLLMSpyRoutedOverlay(cloud) + + if result.AgentModel != "gpt-4o" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "gpt-4o") + } + + ollama := result.Providers[0] + if len(ollama.Models) != 1 || ollama.Models[0].ID != "gpt-4o" { + t.Errorf("ollama model = %v, want gpt-4o", ollama.Models) + } +} + +func TestOverlayYAML_LLMSpyRouted(t *testing.T) { + cloud := &CloudProviderInfo{ + Name: "anthropic", + APIKey: "sk-ant-test", + ModelID: "claude-sonnet-4-5-20250929", + Display: "Claude Sonnet 4.5", + } + result := buildLLMSpyRoutedOverlay(cloud) + yaml := TranslateToOverlayYAML(result) + + // Agent model should be the bare model ID + if !strings.Contains(yaml, "agentModel: claude-sonnet-4-5-20250929") { + t.Errorf("YAML missing agentModel, got:\n%s", yaml) + } + + // ollama should be enabled with llmspy baseUrl + if !strings.Contains(yaml, "baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1") { + t.Errorf("YAML missing llmspy baseUrl, got:\n%s", yaml) + } + + // apiKeyEnvVar should be set + if !strings.Contains(yaml, "apiKeyEnvVar: OLLAMA_API_KEY") { + t.Errorf("YAML missing apiKeyEnvVar, got:\n%s", yaml) + } + + // apiKeyValue should be ollama-local + if !strings.Contains(yaml, "apiKeyValue: ollama-local") { + t.Errorf("YAML missing apiKeyValue, got:\n%s", yaml) + } + + // api should be openai-completions (llmspy is OpenAI-compatible) + if !strings.Contains(yaml, "api: openai-completions") { + t.Errorf("YAML missing api: openai-completions, got:\n%s", yaml) + } + + // Cloud model should appear in ollama's model list + if !strings.Contains(yaml, "- id: claude-sonnet-4-5-20250929") { + t.Errorf("YAML missing cloud model ID, got:\n%s", yaml) + } + + // anthropic and openai should be disabled + if !strings.Contains(yaml, "anthropic:\n enabled: false") { + t.Errorf("YAML missing disabled anthropic, got:\n%s", yaml) + } + if !strings.Contains(yaml, "openai:\n enabled: false") { + t.Errorf("YAML missing disabled openai, got:\n%s", yaml) + } +} + +func TestGenerateOverlayValues_OllamaDefault(t *testing.T) { + // When imported is nil, generateOverlayValues should use Ollama defaults + yaml := generateOverlayValues("openclaw-default.obol.stack", nil) + + if !strings.Contains(yaml, "agentModel: ollama/glm-4.7-flash") { + t.Errorf("default overlay missing ollama agentModel, got:\n%s", yaml) + } + if !strings.Contains(yaml, "baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1") { + t.Errorf("default overlay missing llmspy baseUrl, got:\n%s", yaml) + } +} + +func TestRemoteCapableCommands(t *testing.T) { + // Commands that should go through port-forward + remote := []string{"gateway", "acp", "browser", "logs"} + for _, cmd := range remote { + if !remoteCapableCommands[cmd] { + t.Errorf("%q should be remote-capable", cmd) + } + } + + // Commands that should go through kubectl exec + local := []string{"agent", "doctor", "config", "models", "message"} + for _, cmd := range local { + if remoteCapableCommands[cmd] { + t.Errorf("%q should NOT be remote-capable", cmd) + } + } +} diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 3c51d6f..ae47fb2 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -5,10 +5,12 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "github.com/ObolNetwork/obol-stack/internal/config" "github.com/ObolNetwork/obol-stack/internal/embed" + "github.com/ObolNetwork/obol-stack/internal/openclaw" petname "github.com/dustinkirkland/golang-petname" ) @@ -78,8 +80,12 @@ func Init(cfg *config.Config, force bool) error { // Copy embedded defaults (helmfile + charts for infrastructure) // Resolve placeholders: {{OLLAMA_HOST}} → host DNS for the cluster runtime. - // k3d uses host.k3d.internal; bare k3s would use the node's gateway IP. + // On macOS (Docker Desktop), host.docker.internal resolves to the host. + // On Linux (native Docker), host.k3d.internal is added by k3d. ollamaHost := "host.k3d.internal" + if runtime.GOOS == "darwin" { + ollamaHost = "host.docker.internal" + } defaultsDir := filepath.Join(cfg.ConfigDir, "defaults") if err := embed.CopyDefaults(defaultsDir, map[string]string{ "{{OLLAMA_HOST}}": ollamaHost, @@ -344,6 +350,7 @@ func syncDefaults(cfg *config.Config, kubeconfigPath string) error { "--kubeconfig", kubeconfigPath, "sync", ) + helmfileCmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath) helmfileCmd.Stdout = os.Stdout helmfileCmd.Stderr = os.Stderr @@ -357,6 +364,14 @@ func syncDefaults(cfg *config.Config, kubeconfigPath string) error { } fmt.Println("Default infrastructure deployed") + + // Deploy default OpenClaw instance (non-fatal on failure) + fmt.Println("Setting up default OpenClaw instance...") + if err := openclaw.SetupDefault(cfg); err != nil { + fmt.Printf("Warning: failed to set up default OpenClaw: %v\n", err) + fmt.Println("You can manually set up OpenClaw later with: obol openclaw up") + } + return nil } diff --git a/internal/tunnel/stackid.go b/internal/tunnel/stackid.go index 4115638..a7cd6f2 100644 --- a/internal/tunnel/stackid.go +++ b/internal/tunnel/stackid.go @@ -17,4 +17,3 @@ func getStackID(cfg *config.Config) string { } return strings.TrimSpace(string(data)) } - diff --git a/obolup.sh b/obolup.sh index f6430ab..9b4b666 100755 --- a/obolup.sh +++ b/obolup.sh @@ -990,6 +990,80 @@ install_k9s() { fi } +# Install openclaw CLI +# Unlike other tools, openclaw has no standalone binary downloads. +# It's distributed as an npm package, so we install it locally into +# OBOL_BIN_DIR using npm --prefix to keep it workspace-contained. +install_openclaw() { + # Remove broken symlink if exists + remove_broken_symlink "openclaw" + + # Check for global openclaw first (same pattern as kubectl, helm, etc.) + local global_openclaw + if global_openclaw=$(check_global_binary "openclaw"); then + if create_binary_symlink "openclaw" "$global_openclaw"; then + log_success "openclaw already installed at: $global_openclaw (symlinked)" + else + log_success "openclaw already installed at: $global_openclaw" + fi + return 0 + fi + + # Check if already in OBOL_BIN_DIR + if [[ -f "$OBOL_BIN_DIR/openclaw" ]]; then + log_success "openclaw already installed" + return 0 + fi + + log_info "Installing openclaw CLI..." + + # Require Node.js 22+ and npm + if ! command_exists npm; then + log_warn "npm not found — cannot install openclaw CLI" + echo "" + echo " Install Node.js 22+ first, then re-run obolup.sh" + echo " Or install manually: npm install -g openclaw" + echo "" + return 1 + fi + + local node_major + node_major=$(node --version 2>/dev/null | sed 's/v//' | cut -d. -f1) + if [[ -z "$node_major" ]] || [[ "$node_major" -lt 22 ]]; then + log_warn "Node.js 22+ required for openclaw (found: v${node_major:-none})" + echo "" + echo " Upgrade Node.js, then re-run obolup.sh" + echo " Or install manually: npm install -g openclaw" + echo "" + return 1 + fi + + # Install into OBOL_BIN_DIR using npm --prefix so the package lives + # alongside the other managed binaries (works for both production + # ~/.local/bin and development .workspace/bin layouts). + local npm_prefix="$OBOL_BIN_DIR/.openclaw-npm" + log_info "Installing openclaw via npm into $OBOL_BIN_DIR..." + + if npm install --prefix "$npm_prefix" openclaw 2>&1; then + # Create a wrapper script in OBOL_BIN_DIR that invokes the local install. + # npm --prefix puts the .bin stubs in node_modules/.bin/ which handle + # the correct entry point (openclaw.mjs) automatically. + cat > "$OBOL_BIN_DIR/openclaw" < Date: Thu, 12 Feb 2026 19:41:29 +0400 Subject: [PATCH 26/42] fix(openclaw): update model defaults and improve chart documentation Update Anthropic models to include Opus 4.6, replace retiring GPT-4o with GPT-5.2, add next-step guidance to NOTES.txt, and clarify gateway token and skills injection comments per CTO review feedback. --- internal/openclaw/chart/templates/NOTES.txt | 14 ++++++++++++++ internal/openclaw/chart/values.yaml | 15 ++++++++++----- internal/openclaw/openclaw.go | 8 ++++---- internal/openclaw/overlay_test.go | 12 ++++++------ 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/internal/openclaw/chart/templates/NOTES.txt b/internal/openclaw/chart/templates/NOTES.txt index b69ffbb..2bbb013 100644 --- a/internal/openclaw/chart/templates/NOTES.txt +++ b/internal/openclaw/chart/templates/NOTES.txt @@ -31,3 +31,17 @@ Port-forward for local access: open http://127.0.0.1:18789 {{- end }} + +Next steps: +{{- if and .Values.models.ollama.enabled (not .Values.models.anthropic.enabled) (not .Values.models.openai.enabled) }} + You are using the default Ollama provider. To configure a cloud LLM provider: + obol llm configure --provider=anthropic --api-key= + obol openclaw setup {{ .Release.Name }} +{{- end }} +{{- if not (or .Values.models.ollama.enabled .Values.models.anthropic.enabled .Values.models.openai.enabled) }} + WARNING: No model providers are enabled. Configure at least one provider: + obol llm configure --provider=anthropic --api-key= + obol openclaw setup {{ .Release.Name }} +{{- end }} + Dashboard: open the URL above and enter your gateway token + CLI docs: obol openclaw --help diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml index b88ff6f..f816cae 100644 --- a/internal/openclaw/chart/values.yaml +++ b/internal/openclaw/chart/values.yaml @@ -173,6 +173,8 @@ models: models: - id: claude-sonnet-4-5-20250929 name: Claude Sonnet 4.5 + - id: claude-opus-4-6 + name: Claude Opus 4.6 openai: enabled: false baseUrl: https://api.openai.com/v1 @@ -180,8 +182,8 @@ models: apiKeyEnvVar: OPENAI_API_KEY apiKeyValue: "" models: - - id: gpt-4o - name: GPT-4o + - id: gpt-5.2 + name: GPT-5.2 ollama: enabled: true # -- OpenAI-compatible base URL for Ollama (routed through llmspy global proxy) @@ -218,7 +220,9 @@ channels: # -- Slack App-Level Token (xapp-...) appToken: "" -# -- Skills injection from a ConfigMap archive (created by an external tool; e.g. `obol openclaw skills sync`) +# -- Skills injection from a ConfigMap archive (created by an external tool; e.g. `obol openclaw skills sync`). +# The archive is extracted to `extractDir` by a busybox init container and wired into OpenClaw +# via `skills.load.extraDirs` in _helpers.tpl. Note: ConfigMap total size is limited to ~1 MB by Kubernetes. skills: enabled: false # -- Create a default empty skills ConfigMap when configMapName is not set. @@ -249,9 +253,10 @@ secrets: name: "" gatewayToken: - # -- Secret key name + env var name for gateway token + # -- Secret key name + env var name for the gateway API authentication token. + # This token is required to access OpenClaw's HTTP gateway (chat/completions endpoint and dashboard). key: OPENCLAW_GATEWAY_TOKEN - # -- Explicit token value (discouraged). If empty, a token is generated and persisted across upgrades. + # -- Explicit token value (discouraged). If empty, a token is auto-generated and persisted across upgrades. value: "" # -- Extra Secret names to load via envFrom (for provider/channel keys, etc.) diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index a00f10a..7e3b2af 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -1136,14 +1136,14 @@ func interactiveSetup(imported *ImportResult) (*ImportResult, *CloudProviderInfo fmt.Println("Using Ollama (in-cluster) as default provider.") return nil, nil, nil case "2": - cloud, err := promptForCloudProvider(reader, "openai", "OpenAI", "gpt-4o", "GPT-4o") + cloud, err := promptForCloudProvider(reader, "openai", "OpenAI", "gpt-5.2", "GPT-5.2") if err != nil { return nil, nil, err } result := buildLLMSpyRoutedOverlay(cloud) return result, cloud, nil case "3": - cloud, err := promptForCloudProvider(reader, "anthropic", "Anthropic", "claude-sonnet-4-5-20250929", "Claude Sonnet 4.5") + cloud, err := promptForCloudProvider(reader, "anthropic", "Anthropic", "claude-opus-4-6", "Claude Opus 4.6") if err != nil { return nil, nil, err } @@ -1169,14 +1169,14 @@ func interactiveSetup(imported *ImportResult) (*ImportResult, *CloudProviderInfo switch choice { case "1": - cloud, err := promptForCloudProvider(reader, "openai", "OpenAI", "gpt-4o", "GPT-4o") + cloud, err := promptForCloudProvider(reader, "openai", "OpenAI", "gpt-5.2", "GPT-5.2") if err != nil { return nil, nil, err } result := buildLLMSpyRoutedOverlay(cloud) return result, cloud, nil case "2": - cloud, err := promptForCloudProvider(reader, "anthropic", "Anthropic", "claude-sonnet-4-5-20250929", "Claude Sonnet 4.5") + cloud, err := promptForCloudProvider(reader, "anthropic", "Anthropic", "claude-opus-4-6", "Claude Opus 4.6") if err != nil { return nil, nil, err } diff --git a/internal/openclaw/overlay_test.go b/internal/openclaw/overlay_test.go index 9e82f56..fdeed61 100644 --- a/internal/openclaw/overlay_test.go +++ b/internal/openclaw/overlay_test.go @@ -58,19 +58,19 @@ func TestBuildLLMSpyRoutedOverlay_OpenAI(t *testing.T) { cloud := &CloudProviderInfo{ Name: "openai", APIKey: "sk-open-test", - ModelID: "gpt-4o", - Display: "GPT-4o", + ModelID: "gpt-5.2", + Display: "GPT-5.2", } result := buildLLMSpyRoutedOverlay(cloud) - if result.AgentModel != "gpt-4o" { - t.Errorf("AgentModel = %q, want %q", result.AgentModel, "gpt-4o") + if result.AgentModel != "gpt-5.2" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "gpt-5.2") } ollama := result.Providers[0] - if len(ollama.Models) != 1 || ollama.Models[0].ID != "gpt-4o" { - t.Errorf("ollama model = %v, want gpt-4o", ollama.Models) + if len(ollama.Models) != 1 || ollama.Models[0].ID != "gpt-5.2" { + t.Errorf("ollama model = %v, want gpt-5.2", ollama.Models) } } From 65b138aada73cfac130e61ff4f98bbf609a7d03b Mon Sep 17 00:00:00 2001 From: bussyjd Date: Thu, 12 Feb 2026 20:43:19 +0400 Subject: [PATCH 27/42] fix(openclaw): sync chart hardening from helm-charts Sync _helpers.tpl, validate.yaml, and values.yaml comments to match the helm-charts repo. Key changes: - Remove randAlphaNum gateway token fallback (require explicit value) - Add validation: gateway token required for token auth mode - Add validation: RBAC requires serviceAccount.name when create=false - Add validation: initJob requires persistence.enabled=true - Align provider and gateway token comments --- internal/openclaw/chart/templates/_helpers.tpl | 4 ---- internal/openclaw/chart/templates/validate.yaml | 13 +++++++++++++ internal/openclaw/chart/values.yaml | 6 ++++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/internal/openclaw/chart/templates/_helpers.tpl b/internal/openclaw/chart/templates/_helpers.tpl index c68bc79..d6167e2 100644 --- a/internal/openclaw/chart/templates/_helpers.tpl +++ b/internal/openclaw/chart/templates/_helpers.tpl @@ -119,11 +119,7 @@ Compute (or reuse) the gateway token value. {{- $data := index $existing "data" -}} {{- if and $data (hasKey $data $key) -}} {{- index $data $key | b64dec -}} - {{- else -}} - {{- randAlphaNum 48 -}} {{- end -}} -{{- else -}} - {{- randAlphaNum 48 -}} {{- end -}} {{- end -}} {{- end }} diff --git a/internal/openclaw/chart/templates/validate.yaml b/internal/openclaw/chart/templates/validate.yaml index 12ec2c4..e7f8846 100644 --- a/internal/openclaw/chart/templates/validate.yaml +++ b/internal/openclaw/chart/templates/validate.yaml @@ -10,6 +10,11 @@ {{- fail "openclaw: set secrets.existingSecret or enable secrets.create" -}} {{- end -}} +{{- $gatewayToken := include "openclaw.gatewayTokenValue" . | trim -}} +{{- if and (eq .Values.openclaw.gateway.auth.mode "token") .Values.secrets.create (not .Values.secrets.existingSecret) (eq $gatewayToken "") -}} +{{- fail "openclaw: token auth is enabled; set secrets.gatewayToken.value or use secrets.existingSecret" -}} +{{- end -}} + {{- if and .Values.httpRoute.enabled (eq (len .Values.httpRoute.hostnames) 0) -}} {{- fail "openclaw: httpRoute.enabled is true but httpRoute.hostnames is empty" -}} {{- end -}} @@ -17,3 +22,11 @@ {{- if and .Values.skills.enabled (eq .Values.skills.configMapName "") (not .Values.skills.createDefault) -}} {{- fail "openclaw: skills.enabled is true but no skills.configMapName or skills.createDefault" -}} {{- end -}} + +{{- if and .Values.rbac.create (not .Values.serviceAccount.create) (eq (.Values.serviceAccount.name | trim) "") -}} +{{- fail "openclaw: rbac.create=true with serviceAccount.create=false requires serviceAccount.name" -}} +{{- end -}} + +{{- if and .Values.initJob.enabled (not .Values.persistence.enabled) -}} +{{- fail "openclaw: initJob.enabled requires persistence.enabled=true" -}} +{{- end -}} diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml index f816cae..9919da2 100644 --- a/internal/openclaw/chart/values.yaml +++ b/internal/openclaw/chart/values.yaml @@ -40,6 +40,7 @@ serviceAccount: # Set to true when rbac.create is true so the agent can access the K8s API. automount: false annotations: {} + # -- ServiceAccount name. Required when serviceAccount.create=false and rbac.create=true. name: "" # -- RBAC for the ServiceAccount (read-only access to namespace resources) @@ -50,6 +51,7 @@ rbac: # -- One-shot init Job (runs once to bootstrap workspace/personality) initJob: + # -- Enable a one-shot post-install bootstrap Job. Requires persistence.enabled=true. enabled: false image: repository: ghcr.io/obolnetwork/openclaw @@ -160,7 +162,7 @@ openclaw: enabled: true # -- Model provider configuration -# Each provider is independently toggled. At least one must be enabled. +# Each provider is independently toggled. All providers may be disabled. # API keys are stored in the chart Secret and injected as env vars. models: anthropic: @@ -256,7 +258,7 @@ secrets: # -- Secret key name + env var name for the gateway API authentication token. # This token is required to access OpenClaw's HTTP gateway (chat/completions endpoint and dashboard). key: OPENCLAW_GATEWAY_TOKEN - # -- Explicit token value (discouraged). If empty, a token is auto-generated and persisted across upgrades. + # -- Explicit token value. Required for token auth unless using secrets.existingSecret. value: "" # -- Extra Secret names to load via envFrom (for provider/channel keys, etc.) From 85b2c4b45826bbc1ff2bc862e4e243f92d57cf6f Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 00:18:13 +0400 Subject: [PATCH 28/42] feat(dns): add wildcard DNS resolver for *.obol.stack Add a local dnsmasq-based DNS resolver that enables wildcard hostname resolution for per-instance routing (e.g., openclaw-myid.obol.stack) without manual /etc/hosts entries. - New internal/dns package: manages dnsmasq Docker container on port 5553 - macOS: auto-configures /etc/resolver/obol.stack (requires sudo once) - Linux: prints manual DNS configuration instructions - stack up: starts DNS resolver (idempotent, non-fatal on failure) - stack purge: stops DNS resolver and removes system resolver config - stack down: leaves DNS resolver running (cheap, persists across restarts) Closes #150 --- internal/dns/resolver.go | 155 ++++++++++++++++++++++++++++++++++ internal/dns/resolver_test.go | 20 +++++ internal/stack/stack.go | 19 +++++ obolup.sh | 5 ++ 4 files changed, 199 insertions(+) create mode 100644 internal/dns/resolver.go create mode 100644 internal/dns/resolver_test.go diff --git a/internal/dns/resolver.go b/internal/dns/resolver.go new file mode 100644 index 0000000..a1fe1ac --- /dev/null +++ b/internal/dns/resolver.go @@ -0,0 +1,155 @@ +// Package dns manages a local DNS resolver for wildcard *.obol.stack resolution. +// +// It runs a dnsmasq Docker container that answers DNS queries for the obol.stack +// domain with 127.0.0.1, and configures the host OS to use it. This enables +// per-instance hostname routing (e.g., openclaw-myid.obol.stack) without manual +// /etc/hosts entries. +package dns + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" +) + +const ( + containerName = "obol-dns" + hostPort = "5553" + dnsImage = "alpine:3.21" + resolverDir = "/etc/resolver" + resolverFile = "obol.stack" + domain = "obol.stack" +) + +// EnsureRunning starts the DNS resolver container if not already running. +// Idempotent: no-ops if the container is already healthy. +func EnsureRunning() error { + // Check if container exists and is running + out, err := exec.Command("docker", "inspect", "-f", "{{.State.Running}}", containerName).Output() + if err == nil && strings.TrimSpace(string(out)) == "true" { + return nil // Already running + } + + // Remove stale container if exists (ignore errors) + exec.Command("docker", "rm", "-f", containerName).Run() //nolint:errcheck + + fmt.Println("Starting DNS resolver for *.obol.stack...") + + cmd := exec.Command("docker", "run", "-d", + "--name", containerName, + "-p", hostPort+":53/udp", + "-p", hostPort+":53/tcp", + "--restart", "unless-stopped", + dnsImage, + "sh", "-c", + "apk add --no-cache dnsmasq >/dev/null 2>&1 && "+ + "exec dnsmasq --no-daemon "+ + "--conf-file=/dev/null "+ + "--address=/"+domain+"/127.0.0.1 "+ + "--log-facility=-", + ) + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to start DNS container: %w\n%s", err, output) + } + + fmt.Printf("DNS resolver running (*.obol.stack → 127.0.0.1, port %s)\n", hostPort) + return nil +} + +// Stop removes the DNS resolver container. +func Stop() { + if out, err := exec.Command("docker", "inspect", "-f", "{{.State.Running}}", containerName).Output(); err != nil || strings.TrimSpace(string(out)) != "true" { + return // Not running + } + exec.Command("docker", "rm", "-f", containerName).Run() //nolint:errcheck + fmt.Println("DNS resolver stopped") +} + +// ConfigureSystemResolver sets up the host OS to route *.obol.stack queries +// to our local DNS container. Requires sudo on first run. +// +// macOS: creates /etc/resolver/obol.stack +// Linux: prints manual instructions (TODO: systemd-resolved integration) +func ConfigureSystemResolver() error { + switch runtime.GOOS { + case "darwin": + return configureMacOSResolver() + case "linux": + fmt.Println("Note: automatic DNS resolver setup not yet supported on Linux.") + fmt.Printf("To resolve *.obol.stack, add to your DNS config:\n") + fmt.Printf(" server=/%s/127.0.0.1#%s\n", domain, hostPort) + return nil + default: + return fmt.Errorf("unsupported OS for DNS resolver: %s", runtime.GOOS) + } +} + +// RemoveSystemResolver removes the host OS DNS configuration for *.obol.stack. +func RemoveSystemResolver() { + switch runtime.GOOS { + case "darwin": + removeMacOSResolver() + } +} + +// IsResolverConfigured checks whether the system resolver is already set up. +func IsResolverConfigured() bool { + if runtime.GOOS != "darwin" { + return false + } + path := filepath.Join(resolverDir, resolverFile) + _, err := os.Stat(path) + return err == nil +} + +// configureMacOSResolver creates /etc/resolver/obol.stack pointing to our DNS. +func configureMacOSResolver() error { + path := filepath.Join(resolverDir, resolverFile) + + // Check if already configured correctly + if data, err := os.ReadFile(path); err == nil { + content := string(data) + if strings.Contains(content, "port "+hostPort) { + return nil // Already configured + } + } + + content := fmt.Sprintf("# Managed by obol-stack — resolves *.obol.stack to localhost\nnameserver 127.0.0.1\nport %s\n", hostPort) + + // /etc/resolver/ needs root — try sudo + fmt.Println("Configuring macOS DNS resolver for *.obol.stack (requires sudo)...") + + mkdirCmd := exec.Command("sudo", "mkdir", "-p", resolverDir) + mkdirCmd.Stdout = os.Stdout + mkdirCmd.Stderr = os.Stderr + if err := mkdirCmd.Run(); err != nil { + return fmt.Errorf("failed to create %s (sudo required): %w", resolverDir, err) + } + + writeCmd := exec.Command("sudo", "tee", path) + writeCmd.Stdin = strings.NewReader(content) + writeCmd.Stderr = os.Stderr + if err := writeCmd.Run(); err != nil { + return fmt.Errorf("failed to write %s: %w", path, err) + } + + fmt.Printf("Resolver configured: %s → 127.0.0.1:%s\n", path, hostPort) + return nil +} + +// removeMacOSResolver removes /etc/resolver/obol.stack. +func removeMacOSResolver() { + path := filepath.Join(resolverDir, resolverFile) + if _, err := os.Stat(path); os.IsNotExist(err) { + return + } + if err := exec.Command("sudo", "rm", path).Run(); err != nil { + fmt.Printf("Warning: failed to remove %s: %v\n", path, err) + fmt.Printf(" Remove manually: sudo rm %s\n", path) + return + } + fmt.Printf("Removed DNS resolver config: %s\n", path) +} diff --git a/internal/dns/resolver_test.go b/internal/dns/resolver_test.go new file mode 100644 index 0000000..151d180 --- /dev/null +++ b/internal/dns/resolver_test.go @@ -0,0 +1,20 @@ +package dns + +import "testing" + +func TestConstants(t *testing.T) { + // Verify constants haven't drifted — these are referenced by both the + // Docker container config and the macOS resolver file. + if containerName != "obol-dns" { + t.Errorf("containerName = %q, want %q", containerName, "obol-dns") + } + if hostPort != "5553" { + t.Errorf("hostPort = %q, want %q", hostPort, "5553") + } + if domain != "obol.stack" { + t.Errorf("domain = %q, want %q", domain, "obol.stack") + } + if resolverFile != "obol.stack" { + t.Errorf("resolverFile = %q, want %q", resolverFile, "obol.stack") + } +} diff --git a/internal/stack/stack.go b/internal/stack/stack.go index ae47fb2..fbbf9f9 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -9,6 +9,7 @@ import ( "strings" "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/dns" "github.com/ObolNetwork/obol-stack/internal/embed" "github.com/ObolNetwork/obol-stack/internal/openclaw" petname "github.com/dustinkirkland/golang-petname" @@ -143,6 +144,13 @@ func Up(cfg *config.Config) error { return err } + // Ensure DNS resolver is running for wildcard *.obol.stack + if err := dns.EnsureRunning(); err != nil { + fmt.Printf("Warning: DNS resolver failed to start: %v\n", err) + } else if err := dns.ConfigureSystemResolver(); err != nil { + fmt.Printf("Warning: failed to configure system DNS resolver: %v\n", err) + } + fmt.Println("Stack restarted successfully") fmt.Printf("Stack ID: %s\n", stackID) return nil @@ -191,6 +199,13 @@ func Up(cfg *config.Config) error { return err } + // Ensure DNS resolver is running for wildcard *.obol.stack + if err := dns.EnsureRunning(); err != nil { + fmt.Printf("Warning: DNS resolver failed to start: %v\n", err) + } else if err := dns.ConfigureSystemResolver(); err != nil { + fmt.Printf("Warning: failed to configure system DNS resolver: %v\n", err) + } + fmt.Println("Stack started successfully") fmt.Printf("Stack ID: %s\n", stackID) fmt.Printf("export KUBECONFIG=%s\n", kubeconfigPath) @@ -267,6 +282,10 @@ func Purge(cfg *config.Config, force bool) error { } } + // Stop DNS resolver and remove system resolver config + dns.Stop() + dns.RemoveSystemResolver() + // Remove stack config directory stackConfigDir := filepath.Join(cfg.ConfigDir) if err := os.RemoveAll(stackConfigDir); err != nil { diff --git a/obolup.sh b/obolup.sh index 9b4b666..7af0f80 100755 --- a/obolup.sh +++ b/obolup.sh @@ -1156,6 +1156,11 @@ configure_hosts_file() { if ! check_hosts_file; then update_hosts_file fi + + # Note: wildcard *.obol.stack DNS is handled by a local DNS resolver + # that starts automatically with 'obol stack up'. The /etc/hosts entry + # above provides baseline resolution for the root domain (obol.stack). + log_info "Wildcard *.obol.stack DNS will be configured on first 'obol stack up'" } # Detect appropriate shell profile file (NVM-style detection) From e2d3dc1d1d95423958f2136b1e31ca9dc599f35d Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 01:03:18 +0400 Subject: [PATCH 29/42] feat(dns): add Linux support and fix llmspy image tag DNS resolver: add systemd-resolved integration for Linux. On Linux, dnsmasq binds to 127.0.0.2:53 (avoids systemd-resolved's stub on 127.0.0.53:53) and a resolved.conf.d drop-in forwards *.obol.stack queries. On macOS, behavior is unchanged (port 5553 + /etc/resolver). Also fixes dnsmasq startup with --conf-file=/dev/null to ignore Alpine's default config which enables local-service (rejects queries from Docker bridge network). Fix llmspy image tag: 3.0.32-obol.1-rc.2 does not exist on GHCR, corrected to 3.0.32-obol.1-rc.1. --- CLAUDE.md | 2 +- internal/dns/resolver.go | 167 +++++++++++++++--- internal/dns/resolver_test.go | 54 +++++- .../infrastructure/base/templates/llm.yaml | 2 +- 4 files changed, 190 insertions(+), 35 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 7ea97d1..92bb40f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -581,7 +581,7 @@ The stack uses a two-tier architecture for LLM routing. A cluster-wide proxy (ll | `llm` | Namespace | Dedicated namespace for LLM infrastructure | | `llmspy-config` | ConfigMap | `llms.json` (provider enable/disable) + `providers.json` (provider definitions) | | `llms-secrets` | Secret | Cloud API keys (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`) — empty by default | -| `llmspy` | Deployment | `ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.2`, port 8000 | +| `llmspy` | Deployment | `ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.1`, port 8000 | | `llmspy` | Service (ClusterIP) | `llmspy.llm.svc.cluster.local:8000` | | `ollama` | Service (ExternalName) | Routes to host Ollama via `{{OLLAMA_HOST}}` placeholder | diff --git a/internal/dns/resolver.go b/internal/dns/resolver.go index a1fe1ac..b4bf512 100644 --- a/internal/dns/resolver.go +++ b/internal/dns/resolver.go @@ -4,6 +4,9 @@ // domain with 127.0.0.1, and configures the host OS to use it. This enables // per-instance hostname routing (e.g., openclaw-myid.obol.stack) without manual // /etc/hosts entries. +// +// macOS: binds to port 5553, uses /etc/resolver/obol.stack (supports custom port). +// Linux: binds to 127.0.0.2:53, uses systemd-resolved drop-in (requires port 53). package dns import ( @@ -17,13 +20,42 @@ import ( const ( containerName = "obol-dns" - hostPort = "5553" dnsImage = "alpine:3.21" - resolverDir = "/etc/resolver" - resolverFile = "obol.stack" domain = "obol.stack" + + // macOS: custom port, /etc/resolver handles port directive + macHostPort = "5553" + + // Linux: systemd-resolved can't forward to non-standard ports, so we bind + // to a loopback alias (127.0.0.2) on port 53 to avoid conflicting with + // systemd-resolved's stub listener on 127.0.0.53:53. + linuxBindIP = "127.0.0.2" + linuxBindPort = "53" + + // macOS resolver config + macResolverDir = "/etc/resolver" + macResolverFile = "obol.stack" + + // Linux systemd-resolved drop-in + resolvedDropInDir = "/etc/systemd/resolved.conf.d" + resolvedDropInFile = "obol-stack.conf" ) +// portBindings returns the Docker -p flags for the current OS. +func portBindings() []string { + if runtime.GOOS == "linux" { + return []string{ + "-p", linuxBindIP + ":" + linuxBindPort + ":53/udp", + "-p", linuxBindIP + ":" + linuxBindPort + ":53/tcp", + } + } + // macOS (and fallback) + return []string{ + "-p", macHostPort + ":53/udp", + "-p", macHostPort + ":53/tcp", + } +} + // EnsureRunning starts the DNS resolver container if not already running. // Idempotent: no-ops if the container is already healthy. func EnsureRunning() error { @@ -38,10 +70,9 @@ func EnsureRunning() error { fmt.Println("Starting DNS resolver for *.obol.stack...") - cmd := exec.Command("docker", "run", "-d", - "--name", containerName, - "-p", hostPort+":53/udp", - "-p", hostPort+":53/tcp", + args := []string{"run", "-d", "--name", containerName} + args = append(args, portBindings()...) + args = append(args, "--restart", "unless-stopped", dnsImage, "sh", "-c", @@ -51,11 +82,17 @@ func EnsureRunning() error { "--address=/"+domain+"/127.0.0.1 "+ "--log-facility=-", ) + + cmd := exec.Command("docker", args...) if output, err := cmd.CombinedOutput(); err != nil { return fmt.Errorf("failed to start DNS container: %w\n%s", err, output) } - fmt.Printf("DNS resolver running (*.obol.stack → 127.0.0.1, port %s)\n", hostPort) + if runtime.GOOS == "linux" { + fmt.Printf("DNS resolver running (*.obol.stack → 127.0.0.1, %s:%s)\n", linuxBindIP, linuxBindPort) + } else { + fmt.Printf("DNS resolver running (*.obol.stack → 127.0.0.1, port %s)\n", macHostPort) + } return nil } @@ -71,17 +108,14 @@ func Stop() { // ConfigureSystemResolver sets up the host OS to route *.obol.stack queries // to our local DNS container. Requires sudo on first run. // -// macOS: creates /etc/resolver/obol.stack -// Linux: prints manual instructions (TODO: systemd-resolved integration) +// macOS: creates /etc/resolver/obol.stack (port 5553) +// Linux: creates systemd-resolved drop-in pointing to 127.0.0.2 func ConfigureSystemResolver() error { switch runtime.GOOS { case "darwin": return configureMacOSResolver() case "linux": - fmt.Println("Note: automatic DNS resolver setup not yet supported on Linux.") - fmt.Printf("To resolve *.obol.stack, add to your DNS config:\n") - fmt.Printf(" server=/%s/127.0.0.1#%s\n", domain, hostPort) - return nil + return configureLinuxResolver() default: return fmt.Errorf("unsupported OS for DNS resolver: %s", runtime.GOOS) } @@ -92,41 +126,51 @@ func RemoveSystemResolver() { switch runtime.GOOS { case "darwin": removeMacOSResolver() + case "linux": + removeLinuxResolver() } } // IsResolverConfigured checks whether the system resolver is already set up. func IsResolverConfigured() bool { - if runtime.GOOS != "darwin" { + switch runtime.GOOS { + case "darwin": + path := filepath.Join(macResolverDir, macResolverFile) + _, err := os.Stat(path) + return err == nil + case "linux": + path := filepath.Join(resolvedDropInDir, resolvedDropInFile) + _, err := os.Stat(path) + return err == nil + default: return false } - path := filepath.Join(resolverDir, resolverFile) - _, err := os.Stat(path) - return err == nil } +// --- macOS --- + // configureMacOSResolver creates /etc/resolver/obol.stack pointing to our DNS. func configureMacOSResolver() error { - path := filepath.Join(resolverDir, resolverFile) + path := filepath.Join(macResolverDir, macResolverFile) // Check if already configured correctly if data, err := os.ReadFile(path); err == nil { content := string(data) - if strings.Contains(content, "port "+hostPort) { + if strings.Contains(content, "port "+macHostPort) { return nil // Already configured } } - content := fmt.Sprintf("# Managed by obol-stack — resolves *.obol.stack to localhost\nnameserver 127.0.0.1\nport %s\n", hostPort) + content := fmt.Sprintf("# Managed by obol-stack — resolves *.obol.stack to localhost\nnameserver 127.0.0.1\nport %s\n", macHostPort) // /etc/resolver/ needs root — try sudo fmt.Println("Configuring macOS DNS resolver for *.obol.stack (requires sudo)...") - mkdirCmd := exec.Command("sudo", "mkdir", "-p", resolverDir) + mkdirCmd := exec.Command("sudo", "mkdir", "-p", macResolverDir) mkdirCmd.Stdout = os.Stdout mkdirCmd.Stderr = os.Stderr if err := mkdirCmd.Run(); err != nil { - return fmt.Errorf("failed to create %s (sudo required): %w", resolverDir, err) + return fmt.Errorf("failed to create %s (sudo required): %w", macResolverDir, err) } writeCmd := exec.Command("sudo", "tee", path) @@ -136,13 +180,80 @@ func configureMacOSResolver() error { return fmt.Errorf("failed to write %s: %w", path, err) } - fmt.Printf("Resolver configured: %s → 127.0.0.1:%s\n", path, hostPort) + fmt.Printf("Resolver configured: %s → 127.0.0.1:%s\n", path, macHostPort) return nil } // removeMacOSResolver removes /etc/resolver/obol.stack. func removeMacOSResolver() { - path := filepath.Join(resolverDir, resolverFile) + path := filepath.Join(macResolverDir, macResolverFile) + if _, err := os.Stat(path); os.IsNotExist(err) { + return + } + if err := exec.Command("sudo", "rm", path).Run(); err != nil { + fmt.Printf("Warning: failed to remove %s: %v\n", path, err) + fmt.Printf(" Remove manually: sudo rm %s\n", path) + return + } + fmt.Printf("Removed DNS resolver config: %s\n", path) +} + +// --- Linux (systemd-resolved) --- + +// configureLinuxResolver creates a systemd-resolved drop-in that forwards +// *.obol.stack queries to our dnsmasq on 127.0.0.2:53. +func configureLinuxResolver() error { + // Check if systemd-resolved is active + if err := exec.Command("systemctl", "is-active", "--quiet", "systemd-resolved").Run(); err != nil { + fmt.Println("Note: systemd-resolved not detected.") + fmt.Println("To resolve *.obol.stack, configure your DNS resolver to forward the domain:") + fmt.Printf(" DNS server: %s (port %s) for domain %s\n", linuxBindIP, linuxBindPort, domain) + return nil + } + + path := filepath.Join(resolvedDropInDir, resolvedDropInFile) + + // Check if already configured + if data, err := os.ReadFile(path); err == nil { + if strings.Contains(string(data), linuxBindIP) { + return nil // Already configured + } + } + + content := fmt.Sprintf("# Managed by obol-stack — resolves *.obol.stack via local dnsmasq\n[Resolve]\nDNS=%s\nDomains=~%s\n", linuxBindIP, domain) + + fmt.Println("Configuring systemd-resolved for *.obol.stack (requires sudo)...") + + mkdirCmd := exec.Command("sudo", "mkdir", "-p", resolvedDropInDir) + mkdirCmd.Stdout = os.Stdout + mkdirCmd.Stderr = os.Stderr + if err := mkdirCmd.Run(); err != nil { + return fmt.Errorf("failed to create %s (sudo required): %w", resolvedDropInDir, err) + } + + writeCmd := exec.Command("sudo", "tee", path) + writeCmd.Stdin = strings.NewReader(content) + writeCmd.Stderr = os.Stderr + if err := writeCmd.Run(); err != nil { + return fmt.Errorf("failed to write %s: %w", path, err) + } + + // Restart systemd-resolved to pick up the new config + restartCmd := exec.Command("sudo", "systemctl", "restart", "systemd-resolved") + restartCmd.Stdout = os.Stdout + restartCmd.Stderr = os.Stderr + if err := restartCmd.Run(); err != nil { + fmt.Printf("Warning: failed to restart systemd-resolved: %v\n", err) + fmt.Println(" Run manually: sudo systemctl restart systemd-resolved") + } + + fmt.Printf("Resolver configured: %s → %s:%s\n", path, linuxBindIP, linuxBindPort) + return nil +} + +// removeLinuxResolver removes the systemd-resolved drop-in and restarts the service. +func removeLinuxResolver() { + path := filepath.Join(resolvedDropInDir, resolvedDropInFile) if _, err := os.Stat(path); os.IsNotExist(err) { return } @@ -151,5 +262,11 @@ func removeMacOSResolver() { fmt.Printf(" Remove manually: sudo rm %s\n", path) return } + + // Restart systemd-resolved to drop the forwarding rule + if err := exec.Command("sudo", "systemctl", "restart", "systemd-resolved").Run(); err != nil { + fmt.Printf("Warning: failed to restart systemd-resolved: %v\n", err) + } + fmt.Printf("Removed DNS resolver config: %s\n", path) } diff --git a/internal/dns/resolver_test.go b/internal/dns/resolver_test.go index 151d180..734dfc0 100644 --- a/internal/dns/resolver_test.go +++ b/internal/dns/resolver_test.go @@ -1,20 +1,58 @@ package dns -import "testing" +import ( + "runtime" + "testing" +) func TestConstants(t *testing.T) { - // Verify constants haven't drifted — these are referenced by both the - // Docker container config and the macOS resolver file. if containerName != "obol-dns" { t.Errorf("containerName = %q, want %q", containerName, "obol-dns") } - if hostPort != "5553" { - t.Errorf("hostPort = %q, want %q", hostPort, "5553") - } if domain != "obol.stack" { t.Errorf("domain = %q, want %q", domain, "obol.stack") } - if resolverFile != "obol.stack" { - t.Errorf("resolverFile = %q, want %q", resolverFile, "obol.stack") + + // macOS constants + if macHostPort != "5553" { + t.Errorf("macHostPort = %q, want %q", macHostPort, "5553") + } + if macResolverFile != "obol.stack" { + t.Errorf("macResolverFile = %q, want %q", macResolverFile, "obol.stack") + } + + // Linux constants + if linuxBindIP != "127.0.0.2" { + t.Errorf("linuxBindIP = %q, want %q", linuxBindIP, "127.0.0.2") + } + if linuxBindPort != "53" { + t.Errorf("linuxBindPort = %q, want %q", linuxBindPort, "53") + } + if resolvedDropInFile != "obol-stack.conf" { + t.Errorf("resolvedDropInFile = %q, want %q", resolvedDropInFile, "obol-stack.conf") + } +} + +func TestPortBindings(t *testing.T) { + bindings := portBindings() + if len(bindings) != 4 { + t.Fatalf("portBindings() returned %d elements, want 4", len(bindings)) + } + + switch runtime.GOOS { + case "darwin": + if bindings[1] != "5553:53/udp" { + t.Errorf("macOS UDP binding = %q, want %q", bindings[1], "5553:53/udp") + } + if bindings[3] != "5553:53/tcp" { + t.Errorf("macOS TCP binding = %q, want %q", bindings[3], "5553:53/tcp") + } + case "linux": + if bindings[1] != "127.0.0.2:53:53/udp" { + t.Errorf("Linux UDP binding = %q, want %q", bindings[1], "127.0.0.2:53:53/udp") + } + if bindings[3] != "127.0.0.2:53:53/tcp" { + t.Errorf("Linux TCP binding = %q, want %q", bindings[3], "127.0.0.2:53:53/tcp") + } } } diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 8c8acf8..cb0166c 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -153,7 +153,7 @@ spec: - name: llmspy # Obol fork of LLMSpy with smart routing extension. # Pin a specific version for reproducibility. - image: ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.2 + image: ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.1 imagePullPolicy: IfNotPresent ports: - name: http From ca835f5f837099ff6cf6583b349511c69f195c69 Mon Sep 17 00:00:00 2001 From: JeanDaniel Bussy Date: Fri, 13 Feb 2026 16:42:34 +0400 Subject: [PATCH 30/42] refactor(openclaw): replace embedded chart with remote obol/openclaw Helm repo (#145) Switch from bundling the OpenClaw Helm chart in the Go binary via //go:embed to referencing obol/openclaw from the published Helm repo, matching the pattern used by Helios and Aztec networks. Changes: - generateHelmfile() now emits chart: obol/openclaw with version pin - Remove copyEmbeddedChart() and all chart/values.yaml copy logic - Remove //go:embed directive, chartFS variable, and embed/io/fs imports - Delete internal/openclaw/chart/ (chart lives in helm-charts repo) - Deployment directory simplified to helmfile.yaml + values-obol.yaml - Setup() regenerates helmfile on each run to pick up version bumps Depends on helm-charts PR #183 being merged and chart published. --- internal/openclaw/chart/Chart.yaml | 20 - internal/openclaw/chart/templates/NOTES.txt | 47 --- .../openclaw/chart/templates/_helpers.tpl | 224 ----------- .../openclaw/chart/templates/configmap.yaml | 11 - .../openclaw/chart/templates/deployment.yaml | 187 --------- .../openclaw/chart/templates/httproute.yaml | 25 -- .../openclaw/chart/templates/ingress.yaml | 43 -- .../openclaw/chart/templates/init-job.yaml | 64 --- internal/openclaw/chart/templates/pvc.yaml | 19 - internal/openclaw/chart/templates/role.yaml | 22 - .../openclaw/chart/templates/rolebinding.yaml | 16 - internal/openclaw/chart/templates/secret.yaml | 35 -- .../openclaw/chart/templates/service.yaml | 15 - .../chart/templates/serviceaccount.yaml | 13 - .../chart/templates/skills-configmap.yaml | 11 - .../templates/tests/test-connection.yaml | 30 -- .../openclaw/chart/templates/validate.yaml | 32 -- internal/openclaw/chart/values.schema.json | 377 ------------------ internal/openclaw/chart/values.yaml | 308 -------------- internal/openclaw/openclaw.go | 99 +---- 20 files changed, 19 insertions(+), 1579 deletions(-) delete mode 100644 internal/openclaw/chart/Chart.yaml delete mode 100644 internal/openclaw/chart/templates/NOTES.txt delete mode 100644 internal/openclaw/chart/templates/_helpers.tpl delete mode 100644 internal/openclaw/chart/templates/configmap.yaml delete mode 100644 internal/openclaw/chart/templates/deployment.yaml delete mode 100644 internal/openclaw/chart/templates/httproute.yaml delete mode 100644 internal/openclaw/chart/templates/ingress.yaml delete mode 100644 internal/openclaw/chart/templates/init-job.yaml delete mode 100644 internal/openclaw/chart/templates/pvc.yaml delete mode 100644 internal/openclaw/chart/templates/role.yaml delete mode 100644 internal/openclaw/chart/templates/rolebinding.yaml delete mode 100644 internal/openclaw/chart/templates/secret.yaml delete mode 100644 internal/openclaw/chart/templates/service.yaml delete mode 100644 internal/openclaw/chart/templates/serviceaccount.yaml delete mode 100644 internal/openclaw/chart/templates/skills-configmap.yaml delete mode 100644 internal/openclaw/chart/templates/tests/test-connection.yaml delete mode 100644 internal/openclaw/chart/templates/validate.yaml delete mode 100644 internal/openclaw/chart/values.schema.json delete mode 100644 internal/openclaw/chart/values.yaml diff --git a/internal/openclaw/chart/Chart.yaml b/internal/openclaw/chart/Chart.yaml deleted file mode 100644 index 970d251..0000000 --- a/internal/openclaw/chart/Chart.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: v2 -name: openclaw -description: OpenClaw gateway deployment (agent runtime) for Kubernetes. -type: application -version: 0.1.0 -appVersion: "2026.2.9" -kubeVersion: ">=1.26.0-0" - -home: https://docs.openclaw.ai -sources: - - https://docs.openclaw.ai -maintainers: - - name: Obol Platform Team - email: platform@obol.tech -keywords: - - openclaw - - agent - - ai - - gateway - - obol diff --git a/internal/openclaw/chart/templates/NOTES.txt b/internal/openclaw/chart/templates/NOTES.txt deleted file mode 100644 index 2bbb013..0000000 --- a/internal/openclaw/chart/templates/NOTES.txt +++ /dev/null @@ -1,47 +0,0 @@ -OpenClaw is now installed. - -Namespace: {{ .Release.Namespace }} -Service: {{ include "openclaw.fullname" . }} -Port: {{ .Values.service.port }} - -Gateway token: - kubectl get secret -n {{ .Release.Namespace }} {{ include "openclaw.secretsName" . }} -o jsonpath='{.data.{{ .Values.secrets.gatewayToken.key }}}' | base64 --decode - -{{- if .Values.httpRoute.enabled }} - -HTTPRoute is enabled. Access OpenClaw at: -{{- range .Values.httpRoute.hostnames }} - http://{{ . }} -{{- end }} - -{{- else if .Values.ingress.enabled }} - -Ingress is enabled. Access OpenClaw at: -{{- range $host := .Values.ingress.hosts }} - {{- range .paths }} - http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} - {{- end }} -{{- end }} - -{{- else }} - -Port-forward for local access: - export POD_NAME=$(kubectl get pods -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "openclaw.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") - kubectl -n {{ .Release.Namespace }} port-forward $POD_NAME 18789:{{ .Values.service.port }} - open http://127.0.0.1:18789 - -{{- end }} - -Next steps: -{{- if and .Values.models.ollama.enabled (not .Values.models.anthropic.enabled) (not .Values.models.openai.enabled) }} - You are using the default Ollama provider. To configure a cloud LLM provider: - obol llm configure --provider=anthropic --api-key= - obol openclaw setup {{ .Release.Name }} -{{- end }} -{{- if not (or .Values.models.ollama.enabled .Values.models.anthropic.enabled .Values.models.openai.enabled) }} - WARNING: No model providers are enabled. Configure at least one provider: - obol llm configure --provider=anthropic --api-key= - obol openclaw setup {{ .Release.Name }} -{{- end }} - Dashboard: open the URL above and enter your gateway token - CLI docs: obol openclaw --help diff --git a/internal/openclaw/chart/templates/_helpers.tpl b/internal/openclaw/chart/templates/_helpers.tpl deleted file mode 100644 index d6167e2..0000000 --- a/internal/openclaw/chart/templates/_helpers.tpl +++ /dev/null @@ -1,224 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "openclaw.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -*/}} -{{- define "openclaw.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "openclaw.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels. -*/}} -{{- define "openclaw.labels" -}} -helm.sh/chart: {{ include "openclaw.chart" . }} -{{ include "openclaw.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels. -*/}} -{{- define "openclaw.selectorLabels" -}} -app.kubernetes.io/name: {{ include "openclaw.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -{{/* -Create the name of the service account to use. -*/}} -{{- define "openclaw.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "openclaw.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} - -{{/* -Compute the full image reference. -*/}} -{{- define "openclaw.image" -}} -{{- $tag := .Values.image.tag -}} -{{- if not $tag -}} -{{- $tag = .Chart.AppVersion -}} -{{- end -}} -{{- printf "%s:%s" .Values.image.repository $tag -}} -{{- end }} - -{{/* -Name of the Secret used for envFrom. -*/}} -{{- define "openclaw.secretsName" -}} -{{- if .Values.secrets.existingSecret -}} -{{- .Values.secrets.existingSecret -}} -{{- else if .Values.secrets.name -}} -{{- .Values.secrets.name -}} -{{- else -}} -{{- printf "%s-secrets" (include "openclaw.fullname" .) -}} -{{- end -}} -{{- end }} - -{{/* -Name of the ConfigMap containing openclaw.json. -*/}} -{{- define "openclaw.configMapName" -}} -{{- if .Values.config.existingConfigMap -}} -{{- .Values.config.existingConfigMap -}} -{{- else -}} -{{- printf "%s-config" (include "openclaw.fullname" .) -}} -{{- end -}} -{{- end }} - -{{/* -Name of the PVC used for state storage. -*/}} -{{- define "openclaw.pvcName" -}} -{{- if .Values.persistence.existingClaim -}} -{{- .Values.persistence.existingClaim -}} -{{- else -}} -{{- printf "%s-data" (include "openclaw.fullname" .) -}} -{{- end -}} -{{- end }} - -{{/* -Compute (or reuse) the gateway token value. -*/}} -{{- define "openclaw.gatewayTokenValue" -}} -{{- if .Values.secrets.gatewayToken.value -}} -{{- .Values.secrets.gatewayToken.value -}} -{{- else -}} -{{- $secretName := include "openclaw.secretsName" . -}} -{{- $key := .Values.secrets.gatewayToken.key -}} -{{- $existing := (lookup "v1" "Secret" .Release.Namespace $secretName) -}} -{{- if $existing -}} - {{- $data := index $existing "data" -}} - {{- if and $data (hasKey $data $key) -}} - {{- index $data $key | b64dec -}} - {{- end -}} -{{- end -}} -{{- end -}} -{{- end }} - -{{/* -Render openclaw.json as strict JSON. If config.content is provided, it is used verbatim. -*/}} -{{- define "openclaw.configJson" -}} -{{- if .Values.config.content -}} -{{- .Values.config.content -}} -{{- else -}} -{{- $gatewayAuth := dict "mode" .Values.openclaw.gateway.auth.mode -}} -{{- if ne .Values.openclaw.gateway.auth.mode "none" -}} -{{- $_ := set $gatewayAuth "token" (printf "${%s}" .Values.secrets.gatewayToken.key) -}} -{{- end -}} - -{{- $gateway := dict - "mode" .Values.openclaw.gateway.mode - "bind" .Values.openclaw.gateway.bind - "port" .Values.service.port - "auth" $gatewayAuth - "http" (dict "endpoints" (dict "chatCompletions" (dict "enabled" .Values.openclaw.gateway.http.endpoints.chatCompletions.enabled))) --}} - -{{- $agentDefaults := dict "workspace" .Values.openclaw.workspaceDir -}} -{{- if .Values.openclaw.agentModel -}} -{{- $_ := set $agentDefaults "model" (dict "primary" .Values.openclaw.agentModel) -}} -{{- end -}} - -{{- $cfg := dict - "gateway" $gateway - "agents" (dict "defaults" $agentDefaults) --}} - -{{- if .Values.skills.enabled -}} -{{- $_ := set $cfg "skills" (dict "load" (dict - "extraDirs" (list .Values.skills.extractDir) -)) -}} -{{- end -}} - -{{- /* Build providers map from all enabled model providers */ -}} -{{- $providers := dict -}} -{{- range $name := list "anthropic" "openai" "ollama" -}} -{{- $p := index $.Values.models $name -}} -{{- if $p.enabled -}} -{{- $models := list -}} -{{- range $m := $p.models -}} -{{- $models = append $models (dict "id" $m.id "name" $m.name) -}} -{{- end -}} -{{- $entry := dict - "baseUrl" $p.baseUrl - "apiKey" (printf "${%s}" $p.apiKeyEnvVar) - "models" $models --}} -{{- if $p.api -}} -{{- $_ := set $entry "api" $p.api -}} -{{- end -}} -{{- $_ := set $providers $name $entry -}} -{{- end -}} -{{- end -}} -{{- if $providers -}} -{{- $_ := set $cfg "models" (dict "providers" $providers) -}} -{{- end -}} - -{{- /* Build channels config from enabled integrations */ -}} -{{- $channels := dict -}} -{{- if .Values.channels.telegram.enabled -}} -{{- $tg := dict "botToken" (printf "${TELEGRAM_BOT_TOKEN}") -}} -{{- if .Values.channels.telegram.dmPolicy -}} -{{- $_ := set $tg "dmPolicy" .Values.channels.telegram.dmPolicy -}} -{{- end -}} -{{- $_ := set $channels "telegram" $tg -}} -{{- end -}} -{{- if .Values.channels.discord.enabled -}} -{{- $dc := dict "botToken" (printf "${DISCORD_BOT_TOKEN}") -}} -{{- if .Values.channels.discord.dmPolicy -}} -{{- $_ := set $dc "dmPolicy" .Values.channels.discord.dmPolicy -}} -{{- end -}} -{{- $_ := set $channels "discord" $dc -}} -{{- end -}} -{{- if .Values.channels.slack.enabled -}} -{{- $sl := dict "botToken" (printf "${SLACK_BOT_TOKEN}") "appToken" (printf "${SLACK_APP_TOKEN}") -}} -{{- $_ := set $channels "slack" $sl -}} -{{- end -}} -{{- if $channels -}} -{{- $_ := set $cfg "channels" $channels -}} -{{- end -}} - -{{- $cfg | toPrettyJson -}} -{{- end -}} -{{- end }} - -{{/* -Name of the skills ConfigMap (user-provided or chart-created default). -*/}} -{{- define "openclaw.skillsConfigMapName" -}} -{{- if .Values.skills.configMapName -}} -{{- .Values.skills.configMapName -}} -{{- else -}} -{{- printf "%s-skills" (include "openclaw.fullname" .) -}} -{{- end -}} -{{- end }} diff --git a/internal/openclaw/chart/templates/configmap.yaml b/internal/openclaw/chart/templates/configmap.yaml deleted file mode 100644 index fafe456..0000000 --- a/internal/openclaw/chart/templates/configmap.yaml +++ /dev/null @@ -1,11 +0,0 @@ -{{- if not .Values.config.existingConfigMap -}} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "openclaw.configMapName" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} -data: - {{ .Values.config.key }}: |- - {{- include "openclaw.configJson" . | nindent 4 }} -{{- end }} diff --git a/internal/openclaw/chart/templates/deployment.yaml b/internal/openclaw/chart/templates/deployment.yaml deleted file mode 100644 index 599c646..0000000 --- a/internal/openclaw/chart/templates/deployment.yaml +++ /dev/null @@ -1,187 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "openclaw.fullname" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.replicaCount }} - strategy: - type: Recreate - selector: - matchLabels: - {{- include "openclaw.selectorLabels" . | nindent 6 }} - template: - metadata: - annotations: - checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} - checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }} - {{- with .Values.podAnnotations }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "openclaw.selectorLabels" . | nindent 8 }} - {{- with .Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "openclaw.serviceAccountName" . }} - {{- if .Values.priorityClassName }} - priorityClassName: {{ .Values.priorityClassName | quote }} - {{- end }} - {{- with .Values.podSecurityContext }} - securityContext: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if .Values.skills.enabled }} - initContainers: - - name: extract-skills - image: "{{ .Values.skills.initContainer.image.repository }}:{{ .Values.skills.initContainer.image.tag }}" - imagePullPolicy: {{ .Values.skills.initContainer.image.pullPolicy }} - {{- with .Values.containerSecurityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} - command: - - sh - - -c - - | - set -eu - mkdir -p {{ .Values.skills.extractDir | quote }} - if [ -f /skills/{{ .Values.skills.archiveKey }} ]; then - rm -rf {{ .Values.skills.extractDir | quote }}/* - tar -xzf /skills/{{ .Values.skills.archiveKey }} -C {{ .Values.skills.extractDir | quote }} - echo "Skills extracted successfully" - else - echo "No skills archive found, skipping extraction" - fi - volumeMounts: - - name: data - mountPath: {{ .Values.persistence.mountPath }} - - name: skills-archive - mountPath: /skills - readOnly: true - {{- end }} - containers: - - name: openclaw - image: "{{ include "openclaw.image" . }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- with .Values.containerSecurityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.image.command }} - command: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.image.args }} - args: - {{- toYaml . | nindent 12 }} - {{- end }} - ports: - - name: http - containerPort: {{ .Values.service.port }} - protocol: TCP - env: - - name: OPENCLAW_CONFIG_PATH - value: "/etc/openclaw/{{ .Values.config.key }}" - - name: OPENCLAW_STATE_DIR - value: {{ .Values.openclaw.stateDir | quote }} - - name: ERPC_URL - value: {{ .Values.erpc.url | quote }} - {{- /* Inject non-secret provider API key values (e.g. Ollama placeholder) */ -}} - {{- range $name := list "anthropic" "openai" "ollama" }} - {{- $p := index $.Values.models $name }} - {{- if and $p.enabled $p.apiKeyValue (not (has $name (list "anthropic" "openai"))) }} - - name: {{ $p.apiKeyEnvVar }} - value: {{ $p.apiKeyValue | quote }} - {{- end }} - {{- end }} - {{- with .Values.image.env }} - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.extraEnv }} - {{- toYaml . | nindent 12 }} - {{- end }} - envFrom: - - secretRef: - name: {{ include "openclaw.secretsName" . }} - {{- range .Values.secrets.extraEnvFromSecrets }} - - secretRef: - name: {{ . | quote }} - {{- end }} - {{- if .Values.startupProbe.enabled }} - startupProbe: - tcpSocket: - port: http - periodSeconds: {{ .Values.startupProbe.periodSeconds }} - failureThreshold: {{ .Values.startupProbe.failureThreshold }} - timeoutSeconds: {{ .Values.startupProbe.timeoutSeconds }} - {{- end }} - {{- if .Values.livenessProbe.enabled }} - livenessProbe: - tcpSocket: - port: http - initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.livenessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} - failureThreshold: {{ .Values.livenessProbe.failureThreshold }} - {{- end }} - {{- if .Values.readinessProbe.enabled }} - readinessProbe: - tcpSocket: - port: http - initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.readinessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} - failureThreshold: {{ .Values.readinessProbe.failureThreshold }} - {{- end }} - {{- with .Values.resources }} - resources: - {{- toYaml . | nindent 12 }} - {{- end }} - volumeMounts: - - name: config - mountPath: /etc/openclaw - readOnly: true - - name: data - mountPath: {{ .Values.persistence.mountPath }} - {{- with .Values.extraVolumeMounts }} - {{- toYaml . | nindent 12 }} - {{- end }} - volumes: - - name: config - configMap: - name: {{ include "openclaw.configMapName" . }} - - name: data - {{- if .Values.persistence.enabled }} - persistentVolumeClaim: - claimName: {{ include "openclaw.pvcName" . }} - {{- else }} - emptyDir: {} - {{- end }} - {{- if .Values.skills.enabled }} - - name: skills-archive - configMap: - name: {{ include "openclaw.skillsConfigMapName" . }} - optional: true - {{- end }} - {{- with .Values.extraVolumes }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/internal/openclaw/chart/templates/httproute.yaml b/internal/openclaw/chart/templates/httproute.yaml deleted file mode 100644 index d7c6518..0000000 --- a/internal/openclaw/chart/templates/httproute.yaml +++ /dev/null @@ -1,25 +0,0 @@ -{{- if .Values.httpRoute.enabled -}} -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: {{ include "openclaw.fullname" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} - {{- with .Values.httpRoute.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - parentRefs: - {{- toYaml .Values.httpRoute.parentRefs | nindent 4 }} - hostnames: - {{- toYaml .Values.httpRoute.hostnames | nindent 4 }} - rules: - - matches: - - path: - type: PathPrefix - value: {{ .Values.httpRoute.pathPrefix | quote }} - backendRefs: - - name: {{ include "openclaw.fullname" . }} - port: {{ .Values.service.port }} -{{- end }} diff --git a/internal/openclaw/chart/templates/ingress.yaml b/internal/openclaw/chart/templates/ingress.yaml deleted file mode 100644 index cf55fb0..0000000 --- a/internal/openclaw/chart/templates/ingress.yaml +++ /dev/null @@ -1,43 +0,0 @@ -{{- if .Values.ingress.enabled -}} -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: {{ include "openclaw.fullname" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} - {{- with .Values.ingress.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - {{- with .Values.ingress.className }} - ingressClassName: {{ . }} - {{- end }} - {{- if .Values.ingress.tls }} - tls: - {{- range .Values.ingress.tls }} - - hosts: - {{- range .hosts }} - - {{ . | quote }} - {{- end }} - secretName: {{ .secretName }} - {{- end }} - {{- end }} - rules: - {{- range .Values.ingress.hosts }} - - host: {{ .host | quote }} - http: - paths: - {{- range .paths }} - - path: {{ .path }} - {{- with .pathType }} - pathType: {{ . }} - {{- end }} - backend: - service: - name: {{ include "openclaw.fullname" $ }} - port: - number: {{ $.Values.service.port }} - {{- end }} - {{- end }} -{{- end }} diff --git a/internal/openclaw/chart/templates/init-job.yaml b/internal/openclaw/chart/templates/init-job.yaml deleted file mode 100644 index a58bbd7..0000000 --- a/internal/openclaw/chart/templates/init-job.yaml +++ /dev/null @@ -1,64 +0,0 @@ -{{- if .Values.initJob.enabled -}} -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ include "openclaw.fullname" . }}-init - labels: - {{- include "openclaw.labels" . | nindent 4 }} - annotations: - helm.sh/hook: post-install - helm.sh/hook-weight: "0" - helm.sh/hook-delete-policy: before-hook-creation -spec: - backoffLimit: 3 - template: - metadata: - labels: - {{- include "openclaw.selectorLabels" . | nindent 8 }} - app.kubernetes.io/component: init - spec: - restartPolicy: OnFailure - serviceAccountName: {{ include "openclaw.serviceAccountName" . }} - {{- with .Values.podSecurityContext }} - securityContext: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: init - {{- $tag := .Values.initJob.image.tag | default .Values.image.tag | default .Chart.AppVersion }} - image: "{{ .Values.initJob.image.repository }}:{{ $tag }}" - imagePullPolicy: {{ .Values.initJob.image.pullPolicy }} - {{- with .Values.containerSecurityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.initJob.command }} - command: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.initJob.args }} - args: - {{- toYaml . | nindent 12 }} - {{- end }} - env: - - name: OPENCLAW_STATE_DIR - value: {{ .Values.openclaw.stateDir | quote }} - {{- with .Values.initJob.env }} - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.initJob.resources }} - resources: - {{- toYaml . | nindent 12 }} - {{- end }} - volumeMounts: - - name: data - mountPath: {{ .Values.persistence.mountPath }} - volumes: - - name: data - {{- if .Values.persistence.enabled }} - persistentVolumeClaim: - claimName: {{ include "openclaw.pvcName" . }} - {{- else }} - emptyDir: {} - {{- end }} -{{- end }} diff --git a/internal/openclaw/chart/templates/pvc.yaml b/internal/openclaw/chart/templates/pvc.yaml deleted file mode 100644 index 69bdda3..0000000 --- a/internal/openclaw/chart/templates/pvc.yaml +++ /dev/null @@ -1,19 +0,0 @@ -{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) -}} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "openclaw.pvcName" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} - annotations: - "helm.sh/resource-policy": keep -spec: - accessModes: - {{- toYaml .Values.persistence.accessModes | nindent 4 }} - {{- if .Values.persistence.storageClass }} - storageClassName: {{ .Values.persistence.storageClass }} - {{- end }} - resources: - requests: - storage: {{ .Values.persistence.size }} -{{- end }} diff --git a/internal/openclaw/chart/templates/role.yaml b/internal/openclaw/chart/templates/role.yaml deleted file mode 100644 index e7d7a55..0000000 --- a/internal/openclaw/chart/templates/role.yaml +++ /dev/null @@ -1,22 +0,0 @@ -{{- if .Values.rbac.create -}} -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: {{ include "openclaw.fullname" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} -rules: - # Read-only access to common namespace resources - - apiGroups: [""] - resources: ["pods", "pods/log", "services", "configmaps", "events", "persistentvolumeclaims"] - verbs: ["get", "list", "watch"] - - apiGroups: ["apps"] - resources: ["deployments", "statefulsets", "replicasets"] - verbs: ["get", "list", "watch"] - - apiGroups: ["batch"] - resources: ["jobs", "cronjobs"] - verbs: ["get", "list", "watch"] - {{- with .Values.rbac.extraRules }} - {{- toYaml . | nindent 2 }} - {{- end }} -{{- end }} diff --git a/internal/openclaw/chart/templates/rolebinding.yaml b/internal/openclaw/chart/templates/rolebinding.yaml deleted file mode 100644 index 06f5d48..0000000 --- a/internal/openclaw/chart/templates/rolebinding.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{- if .Values.rbac.create -}} -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: {{ include "openclaw.fullname" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: {{ include "openclaw.fullname" . }} -subjects: - - kind: ServiceAccount - name: {{ include "openclaw.serviceAccountName" . }} - namespace: {{ .Release.Namespace }} -{{- end }} diff --git a/internal/openclaw/chart/templates/secret.yaml b/internal/openclaw/chart/templates/secret.yaml deleted file mode 100644 index 61a8f89..0000000 --- a/internal/openclaw/chart/templates/secret.yaml +++ /dev/null @@ -1,35 +0,0 @@ -{{- if and .Values.secrets.create (not .Values.secrets.existingSecret) -}} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "openclaw.secretsName" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} -type: Opaque -stringData: - {{ .Values.secrets.gatewayToken.key }}: {{ include "openclaw.gatewayTokenValue" . | quote }} - {{- if and .Values.models.anthropic.enabled .Values.models.anthropic.apiKeyValue }} - {{ .Values.models.anthropic.apiKeyEnvVar }}: {{ .Values.models.anthropic.apiKeyValue | quote }} - {{- end }} - {{- if and .Values.models.openai.enabled .Values.models.openai.apiKeyValue }} - {{ .Values.models.openai.apiKeyEnvVar }}: {{ .Values.models.openai.apiKeyValue | quote }} - {{- end }} - {{- if and .Values.channels.telegram.enabled .Values.channels.telegram.botToken }} - TELEGRAM_BOT_TOKEN: {{ .Values.channels.telegram.botToken | quote }} - {{- end }} - {{- if .Values.channels.telegram.dmPolicy }} - TELEGRAM_DM_POLICY: {{ .Values.channels.telegram.dmPolicy | quote }} - {{- end }} - {{- if and .Values.channels.discord.enabled .Values.channels.discord.botToken }} - DISCORD_BOT_TOKEN: {{ .Values.channels.discord.botToken | quote }} - {{- end }} - {{- if .Values.channels.discord.dmPolicy }} - DISCORD_DM_POLICY: {{ .Values.channels.discord.dmPolicy | quote }} - {{- end }} - {{- if and .Values.channels.slack.enabled .Values.channels.slack.botToken }} - SLACK_BOT_TOKEN: {{ .Values.channels.slack.botToken | quote }} - {{- end }} - {{- if and .Values.channels.slack.enabled .Values.channels.slack.appToken }} - SLACK_APP_TOKEN: {{ .Values.channels.slack.appToken | quote }} - {{- end }} -{{- end }} diff --git a/internal/openclaw/chart/templates/service.yaml b/internal/openclaw/chart/templates/service.yaml deleted file mode 100644 index 4fae8c4..0000000 --- a/internal/openclaw/chart/templates/service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "openclaw.fullname" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} -spec: - type: {{ .Values.service.type }} - ports: - - port: {{ .Values.service.port }} - targetPort: http - protocol: TCP - name: http - selector: - {{- include "openclaw.selectorLabels" . | nindent 4 }} diff --git a/internal/openclaw/chart/templates/serviceaccount.yaml b/internal/openclaw/chart/templates/serviceaccount.yaml deleted file mode 100644 index 906e6e4..0000000 --- a/internal/openclaw/chart/templates/serviceaccount.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.serviceAccount.create -}} -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ include "openclaw.serviceAccountName" . }} - labels: - {{- include "openclaw.labels" . | nindent 4 }} - {{- with .Values.serviceAccount.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -automountServiceAccountToken: {{ .Values.serviceAccount.automount }} -{{- end }} diff --git a/internal/openclaw/chart/templates/skills-configmap.yaml b/internal/openclaw/chart/templates/skills-configmap.yaml deleted file mode 100644 index a184edd..0000000 --- a/internal/openclaw/chart/templates/skills-configmap.yaml +++ /dev/null @@ -1,11 +0,0 @@ -{{- if and .Values.skills.enabled .Values.skills.createDefault (not .Values.skills.configMapName) -}} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "openclaw.fullname" . }}-skills - labels: - {{- include "openclaw.labels" . | nindent 4 }} - annotations: - helm.sh/resource-policy: keep -data: {} -{{- end }} diff --git a/internal/openclaw/chart/templates/tests/test-connection.yaml b/internal/openclaw/chart/templates/tests/test-connection.yaml deleted file mode 100644 index b529313..0000000 --- a/internal/openclaw/chart/templates/tests/test-connection.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: {{ include "openclaw.fullname" . }}-test-connection - labels: - {{- include "openclaw.labels" . | nindent 4 }} - app.kubernetes.io/component: test - annotations: - "helm.sh/hook": test - "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded -spec: - restartPolicy: Never - containers: - - name: tcp-check - image: busybox:1.36.1 - command: - - sh - - -c - - | - echo "Testing TCP connection to {{ include "openclaw.fullname" . }}:{{ .Values.service.port }}..." - for i in $(seq 1 10); do - if nc -z {{ include "openclaw.fullname" . }} {{ .Values.service.port }} 2>/dev/null; then - echo "Connection successful!" - exit 0 - fi - echo "Attempt $i/10 failed, retrying in 3s..." - sleep 3 - done - echo "Connection failed after 10 attempts" - exit 1 diff --git a/internal/openclaw/chart/templates/validate.yaml b/internal/openclaw/chart/templates/validate.yaml deleted file mode 100644 index e7f8846..0000000 --- a/internal/openclaw/chart/templates/validate.yaml +++ /dev/null @@ -1,32 +0,0 @@ -{{- if ne (int .Values.replicaCount) 1 -}} -{{- fail "openclaw: replicaCount must be 1 (OpenClaw stores state on disk and should not be scaled horizontally)" -}} -{{- end -}} - -{{- if and .Values.secrets.existingSecret .Values.secrets.create -}} -{{- fail "openclaw: secrets.existingSecret is set; set secrets.create=false" -}} -{{- end -}} - -{{- if and (not .Values.secrets.existingSecret) (not .Values.secrets.create) -}} -{{- fail "openclaw: set secrets.existingSecret or enable secrets.create" -}} -{{- end -}} - -{{- $gatewayToken := include "openclaw.gatewayTokenValue" . | trim -}} -{{- if and (eq .Values.openclaw.gateway.auth.mode "token") .Values.secrets.create (not .Values.secrets.existingSecret) (eq $gatewayToken "") -}} -{{- fail "openclaw: token auth is enabled; set secrets.gatewayToken.value or use secrets.existingSecret" -}} -{{- end -}} - -{{- if and .Values.httpRoute.enabled (eq (len .Values.httpRoute.hostnames) 0) -}} -{{- fail "openclaw: httpRoute.enabled is true but httpRoute.hostnames is empty" -}} -{{- end -}} - -{{- if and .Values.skills.enabled (eq .Values.skills.configMapName "") (not .Values.skills.createDefault) -}} -{{- fail "openclaw: skills.enabled is true but no skills.configMapName or skills.createDefault" -}} -{{- end -}} - -{{- if and .Values.rbac.create (not .Values.serviceAccount.create) (eq (.Values.serviceAccount.name | trim) "") -}} -{{- fail "openclaw: rbac.create=true with serviceAccount.create=false requires serviceAccount.name" -}} -{{- end -}} - -{{- if and .Values.initJob.enabled (not .Values.persistence.enabled) -}} -{{- fail "openclaw: initJob.enabled requires persistence.enabled=true" -}} -{{- end -}} diff --git a/internal/openclaw/chart/values.schema.json b/internal/openclaw/chart/values.schema.json deleted file mode 100644 index e52dc8a..0000000 --- a/internal/openclaw/chart/values.schema.json +++ /dev/null @@ -1,377 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft-07/schema#", - "type": "object", - "required": ["image", "service", "secrets"], - "properties": { - "replicaCount": { - "type": "integer", - "minimum": 1, - "maximum": 1, - "description": "Must be 1 — OpenClaw uses SQLite and cannot scale horizontally" - }, - "image": { - "type": "object", - "required": ["repository"], - "properties": { - "repository": { - "type": "string" - }, - "tag": { - "type": "string" - }, - "pullPolicy": { - "type": "string", - "enum": ["Always", "IfNotPresent", "Never"] - }, - "command": { - "type": "array", - "items": { "type": "string" } - }, - "args": { - "type": "array", - "items": { "type": "string" } - }, - "env": { - "type": "array" - } - } - }, - "imagePullSecrets": { - "type": "array" - }, - "nameOverride": { - "type": "string" - }, - "fullnameOverride": { - "type": "string" - }, - "serviceAccount": { - "type": "object", - "properties": { - "create": { "type": "boolean" }, - "automount": { "type": "boolean" }, - "annotations": { "type": "object" }, - "name": { "type": "string" } - } - }, - "rbac": { - "type": "object", - "properties": { - "create": { "type": "boolean" }, - "extraRules": { "type": "array" } - } - }, - "initJob": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "image": { - "type": "object", - "properties": { - "repository": { "type": "string" }, - "tag": { "type": "string" }, - "pullPolicy": { - "type": "string", - "enum": ["Always", "IfNotPresent", "Never"] - } - } - }, - "command": { - "type": "array", - "items": { "type": "string" } - }, - "args": { - "type": "array", - "items": { "type": "string" } - }, - "env": { "type": "array" }, - "resources": { "type": "object" } - } - }, - "podAnnotations": { "type": "object" }, - "podLabels": { "type": "object" }, - "podSecurityContext": { "type": "object" }, - "containerSecurityContext": { "type": "object" }, - "service": { - "type": "object", - "required": ["port"], - "properties": { - "type": { - "type": "string", - "enum": ["ClusterIP", "NodePort", "LoadBalancer"] - }, - "port": { - "type": "integer", - "minimum": 1, - "maximum": 65535 - } - } - }, - "ingress": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "className": { "type": "string" }, - "annotations": { "type": "object" }, - "hosts": { "type": "array" }, - "tls": { "type": "array" } - } - }, - "httpRoute": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "annotations": { "type": "object" }, - "hostnames": { - "type": "array", - "items": { "type": "string" } - }, - "parentRefs": { "type": "array" }, - "pathPrefix": { "type": "string" } - } - }, - "persistence": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "existingClaim": { "type": "string" }, - "storageClass": { "type": "string" }, - "accessModes": { - "type": "array", - "items": { "type": "string" } - }, - "size": { "type": "string" }, - "mountPath": { "type": "string" } - } - }, - "config": { - "type": "object", - "properties": { - "existingConfigMap": { "type": "string" }, - "key": { "type": "string" }, - "content": { "type": "string" } - } - }, - "openclaw": { - "type": "object", - "properties": { - "stateDir": { "type": "string" }, - "workspaceDir": { "type": "string" }, - "gateway": { - "type": "object", - "properties": { - "mode": { "type": "string" }, - "bind": { "type": "string" }, - "auth": { - "type": "object", - "properties": { - "mode": { - "type": "string", - "enum": ["token", "none"] - } - } - }, - "http": { "type": "object" } - } - } - } - }, - "models": { - "type": "object", - "properties": { - "anthropic": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "baseUrl": { "type": "string", "format": "uri" }, - "api": { "type": "string" }, - "apiKeyEnvVar": { "type": "string" }, - "apiKeyValue": { "type": "string" }, - "models": { - "type": "array", - "items": { - "type": "object", - "required": ["id", "name"], - "properties": { - "id": { "type": "string" }, - "name": { "type": "string" } - } - } - } - } - }, - "openai": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "baseUrl": { "type": "string", "format": "uri" }, - "api": { "type": "string" }, - "apiKeyEnvVar": { "type": "string" }, - "apiKeyValue": { "type": "string" }, - "models": { - "type": "array", - "items": { - "type": "object", - "required": ["id", "name"], - "properties": { - "id": { "type": "string" }, - "name": { "type": "string" } - } - } - } - } - }, - "ollama": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "baseUrl": { "type": "string" }, - "api": { "type": "string" }, - "apiKeyEnvVar": { "type": "string" }, - "apiKeyValue": { "type": "string" }, - "models": { - "type": "array", - "items": { - "type": "object", - "required": ["id", "name"], - "properties": { - "id": { "type": "string" }, - "name": { "type": "string" } - } - } - } - } - } - } - }, - "channels": { - "type": "object", - "properties": { - "telegram": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "botToken": { "type": "string" }, - "dmPolicy": { - "type": "string", - "enum": ["", "open", "paired", "closed"] - } - } - }, - "discord": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "botToken": { "type": "string" }, - "dmPolicy": { - "type": "string", - "enum": ["", "open", "paired", "closed"] - } - } - }, - "slack": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "botToken": { "type": "string" }, - "appToken": { "type": "string" } - } - } - } - }, - "skills": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "createDefault": { "type": "boolean" }, - "configMapName": { "type": "string" }, - "archiveKey": { "type": "string" }, - "extractDir": { "type": "string" }, - "initContainer": { - "type": "object", - "properties": { - "image": { - "type": "object", - "properties": { - "repository": { "type": "string" }, - "tag": { "type": "string" }, - "pullPolicy": { - "type": "string", - "enum": ["Always", "IfNotPresent", "Never"] - } - } - } - } - } - } - }, - "erpc": { - "type": "object", - "properties": { - "url": { "type": "string" } - } - }, - "secrets": { - "type": "object", - "properties": { - "existingSecret": { "type": "string" }, - "create": { "type": "boolean" }, - "name": { "type": "string" }, - "gatewayToken": { - "type": "object", - "properties": { - "key": { "type": "string" }, - "value": { "type": "string" } - } - }, - "extraEnvFromSecrets": { - "type": "array", - "items": { "type": "string" } - } - } - }, - "resources": { - "type": "object", - "properties": { - "limits": { "type": "object" }, - "requests": { "type": "object" } - } - }, - "startupProbe": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "periodSeconds": { "type": "integer", "minimum": 1 }, - "failureThreshold": { "type": "integer", "minimum": 1 }, - "timeoutSeconds": { "type": "integer", "minimum": 1 } - } - }, - "livenessProbe": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "initialDelaySeconds": { "type": "integer", "minimum": 0 }, - "periodSeconds": { "type": "integer", "minimum": 1 }, - "timeoutSeconds": { "type": "integer", "minimum": 1 }, - "failureThreshold": { "type": "integer", "minimum": 1 } - } - }, - "readinessProbe": { - "type": "object", - "properties": { - "enabled": { "type": "boolean" }, - "initialDelaySeconds": { "type": "integer", "minimum": 0 }, - "periodSeconds": { "type": "integer", "minimum": 1 }, - "timeoutSeconds": { "type": "integer", "minimum": 1 }, - "failureThreshold": { "type": "integer", "minimum": 1 } - } - }, - "extraVolumes": { "type": "array" }, - "extraVolumeMounts": { "type": "array" }, - "extraEnv": { "type": "array" }, - "nodeSelector": { "type": "object" }, - "tolerations": { "type": "array" }, - "affinity": { "type": "object" }, - "priorityClassName": { "type": "string" } - } -} diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml deleted file mode 100644 index 9919da2..0000000 --- a/internal/openclaw/chart/values.yaml +++ /dev/null @@ -1,308 +0,0 @@ -# Default values for openclaw. -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. - -# -- Number of replicas (OpenClaw should run as a single instance) -replicaCount: 1 - -# -- OpenClaw image repository, pull policy, and tag version -image: - repository: ghcr.io/obolnetwork/openclaw - pullPolicy: IfNotPresent - tag: "2026.2.9" - - # -- Override the container command (ENTRYPOINT) - command: - - node - # -- Override the container args (CMD) - args: - - openclaw.mjs - - gateway - - --allow-unconfigured - - # -- Additional environment variables for the container - env: [] - # - name: FOO - # value: bar - -# -- Credentials to fetch images from private registry -imagePullSecrets: [] - -# -- Override the chart name -nameOverride: "" -# -- Override the full resource name -fullnameOverride: "" - -# -- Create a ServiceAccount for OpenClaw -serviceAccount: - create: true - # -- Automatically mount a ServiceAccount's API credentials? - # Set to true when rbac.create is true so the agent can access the K8s API. - automount: false - annotations: {} - # -- ServiceAccount name. Required when serviceAccount.create=false and rbac.create=true. - name: "" - -# -- RBAC for the ServiceAccount (read-only access to namespace resources) -rbac: - create: false - # -- Extra rules to append to the generated Role (list of PolicyRule objects) - extraRules: [] - -# -- One-shot init Job (runs once to bootstrap workspace/personality) -initJob: - # -- Enable a one-shot post-install bootstrap Job. Requires persistence.enabled=true. - enabled: false - image: - repository: ghcr.io/obolnetwork/openclaw - tag: "" - pullPolicy: IfNotPresent - command: - - node - - openclaw.mjs - - agent - - init - args: [] - # -- Extra environment variables for the init job - env: [] - # -- Resource requests/limits for the init job - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - memory: 512Mi - -# -- Pod annotations -podAnnotations: {} -# -- Pod labels -podLabels: {} - -# -- Pod security context -podSecurityContext: - fsGroup: 1000 - -# -- Container security context -containerSecurityContext: - runAsNonRoot: true - runAsUser: 1000 - runAsGroup: 1000 - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - -# -- Service configuration -service: - type: ClusterIP - port: 18789 - -# -- Kubernetes Ingress (optional; not used in Obol Stack which uses Gateway API) -ingress: - enabled: false - className: "" - annotations: {} - hosts: - - host: chart-example.local - paths: - - path: / - pathType: Prefix - tls: [] - -# -- Gateway API HTTPRoute (recommended for Obol Stack / Traefik Gateway API) -httpRoute: - enabled: false - annotations: {} - # -- Hostnames for routing (required when enabled) - hostnames: [] - # - openclaw-myid.obol.stack - parentRefs: - - name: traefik-gateway - namespace: traefik - sectionName: web - pathPrefix: / - -# -- Persistence settings for OpenClaw state directory (contains runtime state + secrets) -persistence: - enabled: true - existingClaim: "" - storageClass: "" - accessModes: - - ReadWriteOnce - size: 1Gi - mountPath: /data - -# -- Configuration for the OpenClaw config file (openclaw.json) -config: - # -- Use an existing ConfigMap instead of creating one - existingConfigMap: "" - # -- ConfigMap key / filename - key: openclaw.json - # -- Optional raw JSON5 configuration (overrides generated config when set) - content: "" - -# -- OpenClaw state/workspace settings (paths should be inside persistence.mountPath) -openclaw: - stateDir: /data/.openclaw - workspaceDir: /data/.openclaw/workspace - # -- Default agent model (e.g. "anthropic/claude-sonnet-4-5-20250929"). Empty = use provider default. - agentModel: "" - - gateway: - mode: local - bind: lan - auth: - mode: token - - http: - endpoints: - chatCompletions: - enabled: true - -# -- Model provider configuration -# Each provider is independently toggled. All providers may be disabled. -# API keys are stored in the chart Secret and injected as env vars. -models: - anthropic: - enabled: false - baseUrl: https://api.anthropic.com/v1 - api: "" - apiKeyEnvVar: ANTHROPIC_API_KEY - # -- API key value (stored in Secret). Leave empty to provide via extraEnvFromSecrets. - apiKeyValue: "" - models: - - id: claude-sonnet-4-5-20250929 - name: Claude Sonnet 4.5 - - id: claude-opus-4-6 - name: Claude Opus 4.6 - openai: - enabled: false - baseUrl: https://api.openai.com/v1 - api: "" - apiKeyEnvVar: OPENAI_API_KEY - apiKeyValue: "" - models: - - id: gpt-5.2 - name: GPT-5.2 - ollama: - enabled: true - # -- OpenAI-compatible base URL for Ollama (routed through llmspy global proxy) - baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 - # -- OpenClaw provider API type. Set to "openai-completions" because llmspy exposes an OpenAI-compatible chat/completions endpoint. - api: "openai-completions" - # -- Env var used for provider API key interpolation in openclaw.json - apiKeyEnvVar: OLLAMA_API_KEY - # -- Value set for the apiKey env var (not a secret for Ollama) - apiKeyValue: ollama-local - models: - - id: glm-4.7-flash - name: glm-4.7-flash - -# -- Chat channel integrations -# Tokens are stored in the chart Secret and injected as env vars. -channels: - telegram: - enabled: false - # -- Telegram Bot API token (from @BotFather) - botToken: "" - # -- DM policy: "open" | "paired" | "closed" - dmPolicy: "" - discord: - enabled: false - # -- Discord bot token - botToken: "" - # -- DM policy: "open" | "paired" | "closed" - dmPolicy: "" - slack: - enabled: false - # -- Slack Bot User OAuth Token (xoxb-...) - botToken: "" - # -- Slack App-Level Token (xapp-...) - appToken: "" - -# -- Skills injection from a ConfigMap archive (created by an external tool; e.g. `obol openclaw skills sync`). -# The archive is extracted to `extractDir` by a busybox init container and wired into OpenClaw -# via `skills.load.extraDirs` in _helpers.tpl. Note: ConfigMap total size is limited to ~1 MB by Kubernetes. -skills: - enabled: false - # -- Create a default empty skills ConfigMap when configMapName is not set. - # This allows the chart to deploy without requiring an external ConfigMap. - # Use `obol openclaw skills sync` to populate it later. - createDefault: true - # -- Name of the ConfigMap containing the skills archive (overrides createDefault) - configMapName: "" - archiveKey: skills.tgz - extractDir: /data/.openclaw/skills-injected - initContainer: - image: - repository: busybox - tag: 1.36.1 - pullPolicy: IfNotPresent - -# -- eRPC integration (exposed as ERPC_URL env var) -erpc: - url: http://erpc.erpc.svc.cluster.local:4000/rpc - -# -- OpenClaw secrets (one Secret per instance) -secrets: - # -- Use an existing secret instead of creating one - existingSecret: "" - # -- Create the secret when existingSecret is not set - create: true - # -- Override the created Secret name (defaults to -openclaw-secrets) - name: "" - - gatewayToken: - # -- Secret key name + env var name for the gateway API authentication token. - # This token is required to access OpenClaw's HTTP gateway (chat/completions endpoint and dashboard). - key: OPENCLAW_GATEWAY_TOKEN - # -- Explicit token value. Required for token auth unless using secrets.existingSecret. - value: "" - - # -- Extra Secret names to load via envFrom (for provider/channel keys, etc.) - extraEnvFromSecrets: [] - -# -- Resource requests and limits -resources: - requests: - cpu: 250m - memory: 512Mi - limits: - memory: 2Gi - -# -- Startup probe (tcpSocket; allows generous boot time before liveness kicks in) -startupProbe: - enabled: true - periodSeconds: 5 - failureThreshold: 30 - timeoutSeconds: 3 - -# -- Liveness probe (tcpSocket by default to avoid auth-protected HTTP endpoints) -livenessProbe: - enabled: true - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 3 - -# -- Readiness probe (tcpSocket by default to avoid auth-protected HTTP endpoints) -readinessProbe: - enabled: true - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 3 - failureThreshold: 3 - -# -- Additional volumes -extraVolumes: [] -# -- Additional volume mounts -extraVolumeMounts: [] -# -- Additional environment variables -extraEnv: [] - -nodeSelector: {} -tolerations: [] -affinity: {} -priorityClassName: "" diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 7e3b2af..e86c3e0 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -4,12 +4,10 @@ import ( "bufio" "bytes" "context" - "embed" "encoding/base64" "encoding/json" "fmt" "io" - "io/fs" "net/http" "net/url" "os" @@ -38,15 +36,11 @@ type CloudProviderInfo struct { const ( appName = "openclaw" defaultDomain = "obol.stack" + // chartVersion pins the openclaw Helm chart version from the obol repo. + // renovate: datasource=helm depName=openclaw registryUrl=https://obolnetwork.github.io/helm-charts/ + chartVersion = "0.1.0" ) -// Embed the OpenClaw Helm chart from the shared charts directory. -// The chart source lives in internal/embed/charts/openclaw/ and is -// referenced here so the openclaw package owns its own chart lifecycle. -// -//go:embed all:chart -var chartFS embed.FS - // OnboardOptions contains options for the onboard command type OnboardOptions struct { ID string // Deployment ID (empty = generate petname) @@ -174,24 +168,6 @@ func Onboard(cfg *config.Config, opts OnboardOptions) error { return fmt.Errorf("failed to create deployment directory: %w", err) } - // Copy embedded chart to deployment/chart/ - chartDir := filepath.Join(deploymentDir, "chart") - if err := copyEmbeddedChart(chartDir); err != nil { - os.RemoveAll(deploymentDir) - return fmt.Errorf("failed to copy chart: %w", err) - } - - // Write values.yaml from the embedded chart defaults - defaultValues, err := chartFS.ReadFile("chart/values.yaml") - if err != nil { - os.RemoveAll(deploymentDir) - return fmt.Errorf("failed to read chart defaults: %w", err) - } - if err := os.WriteFile(filepath.Join(deploymentDir, "values.yaml"), defaultValues, 0644); err != nil { - os.RemoveAll(deploymentDir) - return fmt.Errorf("failed to write values.yaml: %w", err) - } - // Write Obol Stack overlay values (httpRoute, provider config, eRPC, skills) hostname := fmt.Sprintf("openclaw-%s.%s", id, defaultDomain) namespace := fmt.Sprintf("%s-%s", appName, id) @@ -201,7 +177,7 @@ func Onboard(cfg *config.Config, opts OnboardOptions) error { return fmt.Errorf("failed to write overlay values: %w", err) } - // Generate helmfile.yaml referencing local chart + // Generate helmfile.yaml referencing obol/openclaw from the published Helm repo helmfileContent := generateHelmfile(id, namespace) if err := os.WriteFile(filepath.Join(deploymentDir, "helmfile.yaml"), []byte(helmfileContent), 0644); err != nil { os.RemoveAll(deploymentDir) @@ -214,10 +190,8 @@ func Onboard(cfg *config.Config, opts OnboardOptions) error { fmt.Printf(" Hostname: %s\n", hostname) fmt.Printf(" Location: %s\n", deploymentDir) fmt.Printf("\nFiles created:\n") - fmt.Printf(" - chart/ Embedded OpenClaw Helm chart\n") - fmt.Printf(" - values.yaml Chart defaults (edit to customize)\n") - fmt.Printf(" - values-obol.yaml Obol Stack defaults (httpRoute, providers, eRPC)\n") - fmt.Printf(" - helmfile.yaml Deployment configuration\n") + fmt.Printf(" - values-obol.yaml Obol Stack overlay (httpRoute, providers, eRPC)\n") + fmt.Printf(" - helmfile.yaml Deployment configuration (chart: obol/openclaw v%s)\n", chartVersion) if opts.Sync { fmt.Printf("\nDeploying to cluster...\n\n") @@ -554,20 +528,11 @@ func Setup(cfg *config.Config, id string, _ SetupOptions) error { } } - // Re-copy the embedded chart so the deployment dir picks up any chart fixes - // (e.g. corrected default values, template changes) from the current binary. - chartDir := filepath.Join(deploymentDir, "chart") - if err := copyEmbeddedChart(chartDir); err != nil { - return fmt.Errorf("failed to update chart: %w", err) - } - - // Write updated base values.yaml from the embedded chart defaults - defaultValues, err := chartFS.ReadFile("chart/values.yaml") - if err != nil { - return fmt.Errorf("failed to read chart defaults: %w", err) - } - if err := os.WriteFile(filepath.Join(deploymentDir, "values.yaml"), defaultValues, 0644); err != nil { - return fmt.Errorf("failed to write values.yaml: %w", err) + // Regenerate helmfile to pick up any chart version bumps + namespace := fmt.Sprintf("%s-%s", appName, id) + helmfileContent := generateHelmfile(id, namespace) + if err := os.WriteFile(filepath.Join(deploymentDir, "helmfile.yaml"), []byte(helmfileContent), 0644); err != nil { + return fmt.Errorf("failed to write helmfile.yaml: %w", err) } // Regenerate overlay values with the selected provider @@ -583,7 +548,6 @@ func Setup(cfg *config.Config, id string, _ SetupOptions) error { return err } - namespace := fmt.Sprintf("%s-%s", appName, id) kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") @@ -961,35 +925,6 @@ func deploymentPath(cfg *config.Config, id string) string { return filepath.Join(cfg.ConfigDir, "applications", appName, id) } -// copyEmbeddedChart extracts the embedded chart FS to destDir -func copyEmbeddedChart(destDir string) error { - return fs.WalkDir(chartFS, "chart", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - if path == "chart" { - return nil - } - - relPath := strings.TrimPrefix(path, "chart/") - destPath := filepath.Join(destDir, relPath) - - if d.IsDir() { - return os.MkdirAll(destPath, 0755) - } - - if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { - return err - } - - data, err := chartFS.ReadFile(path) - if err != nil { - return fmt.Errorf("failed to read embedded %s: %w", path, err) - } - return os.WriteFile(destPath, data, 0644) - }) -} - // generateOverlayValues creates the Obol Stack-specific values overlay. // If imported is non-nil, provider/channel config from the import is used // instead of the default Ollama configuration. @@ -1229,18 +1164,22 @@ func buildLLMSpyRoutedOverlay(cloud *CloudProviderInfo) *ImportResult { } } -// generateHelmfile creates a helmfile.yaml referencing the local chart +// generateHelmfile creates a helmfile.yaml referencing the published obol/openclaw chart. func generateHelmfile(id, namespace string) string { return fmt.Sprintf(`# OpenClaw instance: %s # Managed by obol openclaw +repositories: + - name: obol + url: https://obolnetwork.github.io/helm-charts/ + releases: - name: openclaw namespace: %s createNamespace: true - chart: ./chart + chart: obol/openclaw + version: %s values: - - values.yaml - values-obol.yaml -`, id, namespace) +`, id, namespace, chartVersion) } From 532b23d0483b466d2da17fb34e1e7ab03d904fd8 Mon Sep 17 00:00:00 2001 From: JeanDaniel Bussy Date: Fri, 13 Feb 2026 16:43:36 +0400 Subject: [PATCH 31/42] cleanup(network): remove Helios light client network (#146) Helios is no longer part of the Obol Stack network lineup. Remove the embedded network definition, frontend env var, and all documentation references. --- README.md | 6 +- docs/getting-started.md | 188 ++++++++++++++++++ .../values/obol-frontend.yaml.gotmpl | 2 - .../networks/helios/helmfile.yaml.gotmpl | 116 ----------- .../embed/networks/helios/values.yaml.gotmpl | 17 -- internal/network/network.go | 2 +- notes.md | 2 +- 7 files changed, 191 insertions(+), 142 deletions(-) create mode 100644 docs/getting-started.md delete mode 100644 internal/embed/networks/helios/helmfile.yaml.gotmpl delete mode 100644 internal/embed/networks/helios/values.yaml.gotmpl diff --git a/README.md b/README.md index b7ec40d..585f7d5 100644 --- a/README.md +++ b/README.md @@ -163,14 +163,13 @@ obol network list **Available networks:** - **ethereum** - Full Ethereum node (execution + consensus clients) -- **helios** - Lightweight Ethereum client - **aztec** - Aztec rollup network **View installed deployments:** ```bash # List all network deployment namespaces -obol kubectl get namespaces | grep -E "ethereum|helios|aztec" +obol kubectl get namespaces | grep -E "ethereum|aztec" # View resources in a specific deployment obol kubectl get all -n ethereum-nervous-otter @@ -535,7 +534,6 @@ The Obol Stack follows the [XDG Base Directory](https://specifications.freedeskt │ ├── ethereum/ # Ethereum network deployments │ │ ├── / # First deployment instance │ │ └── / # Second deployment instance -│ ├── helios/ # Helios network deployments │ └── aztec/ # Aztec network deployments └── applications/ # Installed application deployments ├── redis/ # Redis deployments @@ -553,7 +551,6 @@ The Obol Stack follows the [XDG Base Directory](https://specifications.freedeskt └── / # Per-cluster data └── networks/ # Network blockchain data ├── ethereum_/ # Ethereum deployment instance data - ├── helios_/ # Helios deployment instance data └── aztec_/ # Aztec deployment instance data ``` @@ -641,7 +638,6 @@ If you're contributing to the Obol Stack or want to run it from source, you can │ │ ├── ethereum/ # Ethereum network deployments │ │ │ ├── / # First deployment instance │ │ │ └── / # Second deployment instance -│ │ ├── helios/ │ │ └── aztec/ │ └── applications/ # Installed application deployments │ ├── redis/ diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000..adbf921 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,188 @@ +# Getting Started with the Obol Stack + +This guide walks you through installing the Obol Stack, starting a local Kubernetes cluster, testing LLM inference, and deploying your first blockchain network. + +> [!IMPORTANT] +> The Obol Stack is alpha software. If you encounter an issue, please open a +> [GitHub issue](https://github.com/ObolNetwork/obol-stack/issues). + +## Prerequisites + +- **Docker** -- The stack runs a local Kubernetes cluster via [k3d](https://k3d.io), which requires Docker. + - Linux: [Docker Engine](https://docs.docker.com/engine/install/) + - macOS / Windows: [Docker Desktop](https://docs.docker.com/desktop/) +- **Ollama** (optional) -- For LLM inference. Install from [ollama.com](https://ollama.com) and start it with `ollama serve`. + +## Install + +Run the bootstrap installer: + +```bash +bash <(curl -s https://stack.obol.org) +``` + +This installs the `obol` CLI and all required tools (kubectl, helm, k3d, helmfile, k9s) to `~/.local/bin/`. + +> [!NOTE] +> Contributors working from source can use development mode instead -- see +> [CONTRIBUTING.md](../CONTRIBUTING.md) for details. + +## Step 1 -- Initialize the Stack + +```bash +obol stack init +``` + +This generates a unique stack ID (e.g., `creative-dogfish`) and writes the cluster configuration and default infrastructure manifests to `~/.config/obol/`. + +## Step 2 -- Start the Stack + +```bash +obol stack up +``` + +This creates a local k3d cluster and deploys the default infrastructure: + +| Component | Namespace | Description | +|-----------|-----------|-------------| +| **Traefik** | `traefik` | Gateway API ingress controller | +| **Monitoring** | `monitoring` | Prometheus and kube-prometheus-stack | +| **LLMSpy** | `llm` | OpenAI-compatible gateway (proxies to host Ollama) | +| **eRPC** | `erpc` | Unified RPC load balancer | +| **Frontend** | `obol-frontend` | Web interface at http://obol.stack/ | +| **Cloudflared** | `traefik` | Quick tunnel for optional public access | +| **Reloader** | `reloader` | Auto-restarts workloads on config changes | + +## Step 3 -- Verify + +Check that all pods are running: + +```bash +obol kubectl get pods -A +``` + +All pods should show `Running`. eRPC may show `0/1 Ready` -- this is normal until external RPC endpoints are configured. + +Open the frontend in your browser: http://obol.stack/ + +## Step 4 -- Test LLM Inference + +If Ollama is running on the host (`ollama serve`), the stack can route inference requests through LLMSpy. + +Verify Ollama has models loaded: + +```bash +curl -s http://localhost:11434/api/tags | python3 -m json.tool +``` + +Test inference through the cluster: + +```bash +obol kubectl run -n llm inference-test --rm -it --restart=Never \ + --overrides='{"spec":{"terminationGracePeriodSeconds":180,"activeDeadlineSeconds":180}}' \ + --image=curlimages/curl -- \ + curl -s --max-time 120 -X POST \ + http://llmspy.llm.svc.cluster.local:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model":"glm-4.7-flash","messages":[{"role":"user","content":"Say hello in one word"}],"max_tokens":10}' +``` + +Replace `glm-4.7-flash` with whatever model you have loaded in Ollama. + +> [!NOTE] +> The first request may be slow while the model loads into memory. + +## Step 5 -- List Available Networks + +```bash +obol network list +``` + +Available networks: **aztec**, **ethereum**, **inference**. + +Use `--help` to see configuration options for any network: + +```bash +obol network install ethereum --help +``` + +## Step 6 -- Deploy a Network + +Network deployment is two stages: **install** saves configuration, **sync** deploys it. + +```bash +# Generate configuration (nothing deployed yet) +obol network install ethereum --network=hoodi --id demo + +# Review the config if you like +cat ~/.config/obol/networks/ethereum/demo/values.yaml + +# Deploy to the cluster +obol network sync ethereum/demo +``` + +This creates the `ethereum-demo` namespace with an execution client (reth) and a consensus client (lighthouse). + +## Step 7 -- Verify the Network + +```bash +obol kubectl get all -n ethereum-demo +``` + +Test the Ethereum JSON-RPC endpoint: + +```bash +curl -s http://obol.stack/ethereum-demo/execution \ + -X POST -H 'Content-Type: application/json' \ + -d '{"jsonrpc":"2.0","method":"eth_chainId","params":[],"id":1}' +``` + +Expected response (Hoodi testnet): + +```json +{"jsonrpc":"2.0","id":1,"result":"0x88bb0"} +``` + +## Stack Lifecycle + +```bash +# Stop the cluster (preserves config and data) +obol stack down + +# Restart +obol stack up + +# Full cleanup (removes cluster, config, and data) +obol stack purge --force +``` + +> [!WARNING] +> `--force` is required to remove persistent volumes owned by root. + +## Managing Networks + +```bash +# Run multiple instances of the same network +obol network install ethereum --network=mainnet --id prod +obol network sync ethereum/prod + +# Delete a deployment +obol network delete ethereum/demo +``` + +## Key URLs + +| Endpoint | URL | +|----------|-----| +| Frontend | http://obol.stack/ | +| Ethereum Execution RPC | http://obol.stack/ethereum-{id}/execution | +| Ethereum Beacon API | http://obol.stack/ethereum-{id}/beacon | +| eRPC | http://obol.stack/rpc | + +Replace `{id}` with your deployment ID (e.g., `demo`, `prod`). + +## Next Steps + +- Explore the cluster interactively: `obol k9s` +- See the full [README](../README.md) for architecture details and advanced configuration +- Check [CONTRIBUTING.md](../CONTRIBUTING.md) for development mode setup and adding new networks diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index f95ca2b..b194482 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -8,8 +8,6 @@ serviceAccount: image: environment: - - name: NEXT_PUBLIC_HELIOS_CLIENT_URL - value: "http://helios-{{ $network }}.helios.svc.cluster.local:8545" - name: NEXT_PUBLIC_ERPC_URL value: "https://{{ $publicDomain }}/rpc" - name: NEXT_PUBLIC_AZTEC_SEQUENCER_URL diff --git a/internal/embed/networks/helios/helmfile.yaml.gotmpl b/internal/embed/networks/helios/helmfile.yaml.gotmpl deleted file mode 100644 index c0a5d96..0000000 --- a/internal/embed/networks/helios/helmfile.yaml.gotmpl +++ /dev/null @@ -1,116 +0,0 @@ -# Helios Light Client Network -# Provides an Ethereum light client that can be used as an RPC endpoint - -repositories: - - name: obol - url: https://obolnetwork.github.io/helm-charts/ - - name: bedag - url: https://bedag.github.io/helm-charts/ - -releases: - - name: helios - namespace: helios-{{ .Values.id }} - createNamespace: true - chart: obol/helios - version: 0.1.4 - values: - - fullnameOverride: 'helios-{{ .Values.network }}' - - helios: - rpcBindIp: "0.0.0.0" - rpcPort: 8545 - strictCheckpointAge: false - network: '{{ .Values.network }}' - consensusRpc: '{{ .Values.consensusRpc }}' - executionRpc: '{{ .Values.executionRpc }}' - - - persistence: - enabled: true - size: 10Gi - storageClass: local-path - - # Disable legacy Ingress - using Gateway API HTTPRoute instead - - ingress: - enabled: false - - # HTTPRoute for Helios RPC endpoint - - name: helios-httproute - namespace: helios-{{ .Values.id }} - chart: bedag/raw - values: - - resources: - - apiVersion: gateway.networking.k8s.io/v1 - kind: HTTPRoute - metadata: - name: helios - namespace: helios-{{ .Values.id }} - spec: - parentRefs: - - name: traefik-gateway - namespace: traefik - sectionName: web - hostnames: - - obol.stack - rules: - - matches: - - path: - type: Exact - value: /helios-{{ .Values.id }} - - path: - type: PathPrefix - value: /helios-{{ .Values.id }}/ - filters: - - type: URLRewrite - urlRewrite: - path: - type: ReplacePrefixMatch - replacePrefixMatch: / - backendRefs: - - name: helios-{{ .Values.network }} - port: 8545 - - # Metadata ConfigMap for frontend discovery - - name: helios-metadata - namespace: helios-{{ .Values.id }} - chart: bedag/raw - values: - - resources: - - apiVersion: v1 - kind: ConfigMap - metadata: - name: helios-{{ .Values.id }}-metadata - namespace: helios-{{ .Values.id }} - labels: - app.kubernetes.io/part-of: obol.stack - obol.stack/id: {{ .Values.id }} - obol.stack/app: helios - data: - metadata.json: | - { - "network": "{{ .Values.network }}", - "endpoints": { - "rpc": { - "external": "http://obol.stack/helios-{{ .Values.id }}", - "internal": "http://helios-{{ .Values.network }}.helios-{{ .Values.id }}.svc.cluster.local:8545" - } - } - } - - # Grant Obol Agent access - - name: helios-agent-access - namespace: helios-{{ .Values.id }} - chart: bedag/raw - values: - - resources: - - apiVersion: rbac.authorization.k8s.io/v1 - kind: RoleBinding - metadata: - name: obol-agent-access - namespace: helios-{{ .Values.id }} - roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: admin - subjects: - - kind: ServiceAccount - name: obol-agent - namespace: agent diff --git a/internal/embed/networks/helios/values.yaml.gotmpl b/internal/embed/networks/helios/values.yaml.gotmpl deleted file mode 100644 index baed67f..0000000 --- a/internal/embed/networks/helios/values.yaml.gotmpl +++ /dev/null @@ -1,17 +0,0 @@ -# Helios Light Client Network -# Provides an Ethereum light client that can be used as an RPC endpoint -# Configuration via CLI flags -# Template fields populated by obol CLI during network installation - -# @enum mainnet -# @default mainnet -# @description Ethereum network (only mainnet supported currently) -network: {{.Network}} - -# @default http://testing.mainnet.beacon-api.nimbus.team -# @description Consensus RPC endpoint URL -consensusRpc: {{.ConsensusRpc}} - -# @default https://eth.drpc.org -# @description Execution RPC endpoint URL -executionRpc: {{.ExecutionRpc}} diff --git a/internal/network/network.go b/internal/network/network.go index f3e1d7d..f7dc891 100644 --- a/internal/network/network.go +++ b/internal/network/network.go @@ -174,7 +174,7 @@ func Sync(cfg *config.Config, deploymentIdentifier string) error { deploymentID = parts[1] } else { // Try to split by first dash that separates network from ID - // Network names are expected to be single words (ethereum, helios, aztec) + // Network names are expected to be single words (ethereum, aztec) parts := strings.SplitN(deploymentIdentifier, "-", 2) if len(parts) != 2 { return fmt.Errorf("invalid deployment identifier format. Use: / or -") diff --git a/notes.md b/notes.md index 6550e6a..6dc2261 100644 --- a/notes.md +++ b/notes.md @@ -9,7 +9,7 @@ - OKR-1: default LLM flow is llms.py -> Ollama Cloud (no API key copy/paste) - frontend (default) -- erpc, helios (default) +- erpc (default) - obol agent workings (default) - monitoring From 0bdbafe3f65e7a30abd81651c9b27e263eb03ad5 Mon Sep 17 00:00:00 2001 From: JeanDaniel Bussy Date: Fri, 13 Feb 2026 16:45:02 +0400 Subject: [PATCH 32/42] test(openclaw): add import pipeline tests and fix silent failures (#147) Add comprehensive unit tests for the OpenClaw config import pipeline (25 test cases covering DetectExistingConfig, TranslateToOverlayYAML, workspace detection, and helper functions). Refactor DetectExistingConfig for testability by extracting detectExistingConfigAt(home). Fix silent failures: warn when env-var API keys are skipped, when unknown API types are sanitized, when workspace has no marker files, and when DetectExistingConfig returns an error. --- internal/openclaw/import.go | 35 +- internal/openclaw/import_test.go | 539 +++++++++++++++++++++++++++++++ internal/openclaw/openclaw.go | 10 +- 3 files changed, 577 insertions(+), 7 deletions(-) create mode 100644 internal/openclaw/import_test.go diff --git a/internal/openclaw/import.go b/internal/openclaw/import.go index 52ca5dd..8549868 100644 --- a/internal/openclaw/import.go +++ b/internal/openclaw/import.go @@ -102,7 +102,12 @@ func DetectExistingConfig() (*ImportResult, error) { if err != nil { return nil, nil } + return detectExistingConfigAt(home) +} +// detectExistingConfigAt reads and parses openclaw.json from the given home directory. +// Extracted from DetectExistingConfig for testability. +func detectExistingConfigAt(home string) (*ImportResult, error) { configPath := filepath.Join(home, ".openclaw", "openclaw.json") data, err := os.ReadFile(configPath) if err != nil { @@ -125,14 +130,20 @@ func DetectExistingConfig() (*ImportResult, error) { result.WorkspaceDir = detectWorkspace(home, cfg.Agents.Defaults.Workspace) for name, p := range cfg.Models.Providers { + sanitized := sanitizeModelAPI(p.API) + if p.API != "" && sanitized == "" { + fmt.Printf(" Note: unknown API type '%s' for provider '%s', will auto-detect\n", p.API, name) + } ip := ImportedProvider{ Name: name, BaseURL: p.BaseURL, - API: sanitizeModelAPI(p.API), + API: sanitized, } // Only import literal API keys, skip env-var references like ${...} if p.APIKey != "" && !isEnvVarRef(p.APIKey) { ip.APIKey = p.APIKey + } else if p.APIKey != "" { + fmt.Printf(" Note: provider '%s' uses env-var reference %s (will need manual configuration)\n", name, p.APIKey) } for _, m := range p.Models { ip.Models = append(ip.Models, ImportedModel{ID: m.ID, Name: m.Name}) @@ -140,11 +151,19 @@ func DetectExistingConfig() (*ImportResult, error) { result.Providers = append(result.Providers, ip) } - if cfg.Channels.Telegram != nil && cfg.Channels.Telegram.BotToken != "" && !isEnvVarRef(cfg.Channels.Telegram.BotToken) { - result.Channels.Telegram = &ImportedTelegram{BotToken: cfg.Channels.Telegram.BotToken} + if cfg.Channels.Telegram != nil && cfg.Channels.Telegram.BotToken != "" { + if !isEnvVarRef(cfg.Channels.Telegram.BotToken) { + result.Channels.Telegram = &ImportedTelegram{BotToken: cfg.Channels.Telegram.BotToken} + } else { + fmt.Printf(" Note: Telegram bot token uses env-var reference (will need manual configuration)\n") + } } - if cfg.Channels.Discord != nil && cfg.Channels.Discord.BotToken != "" && !isEnvVarRef(cfg.Channels.Discord.BotToken) { - result.Channels.Discord = &ImportedDiscord{BotToken: cfg.Channels.Discord.BotToken} + if cfg.Channels.Discord != nil && cfg.Channels.Discord.BotToken != "" { + if !isEnvVarRef(cfg.Channels.Discord.BotToken) { + result.Channels.Discord = &ImportedDiscord{BotToken: cfg.Channels.Discord.BotToken} + } else { + fmt.Printf(" Note: Discord bot token uses env-var reference (will need manual configuration)\n") + } } if cfg.Channels.Slack != nil { botToken := cfg.Channels.Slack.BotToken @@ -155,7 +174,11 @@ func DetectExistingConfig() (*ImportResult, error) { } if appToken != "" && !isEnvVarRef(appToken) { result.Channels.Slack.AppToken = appToken + } else if appToken != "" { + fmt.Printf(" Note: Slack app token uses env-var reference (will need manual configuration)\n") } + } else if botToken != "" { + fmt.Printf(" Note: Slack bot token uses env-var reference (will need manual configuration)\n") } } @@ -299,6 +322,8 @@ func detectWorkspace(home, configWorkspace string) string { } } + // Directory exists but has no marker files + fmt.Printf(" Note: workspace at %s has no marker files (SOUL.md, AGENTS.md, IDENTITY.md)\n", wsDir) return "" } diff --git a/internal/openclaw/import_test.go b/internal/openclaw/import_test.go new file mode 100644 index 0000000..c5e3c8a --- /dev/null +++ b/internal/openclaw/import_test.go @@ -0,0 +1,539 @@ +package openclaw + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestIsEnvVarRef(t *testing.T) { + tests := []struct { + in string + want bool + }{ + {"${ANTHROPIC_API_KEY}", true}, + {"${VAR:default}", true}, + {"prefix${VAR}suffix", true}, + {"sk-ant-literal-key", false}, + {"", false}, + {"$VAR", false}, + {"plain-string", false}, + } + for _, tt := range tests { + if got := isEnvVarRef(tt.in); got != tt.want { + t.Errorf("isEnvVarRef(%q) = %v, want %v", tt.in, got, tt.want) + } + } +} + +func TestSanitizeModelAPI(t *testing.T) { + // All valid values should pass through unchanged + valid := []string{ + "openai-completions", + "openai-responses", + "anthropic-messages", + "google-generative-ai", + "github-copilot", + "bedrock-converse-stream", + } + for _, api := range valid { + if got := sanitizeModelAPI(api); got != api { + t.Errorf("sanitizeModelAPI(%q) = %q, want %q", api, got, api) + } + } + + // Invalid values should return "" + invalid := []string{ + "custom-api", + "openai", + "", + "OpenAI-Completions", + "mistral-api", + } + for _, api := range invalid { + if got := sanitizeModelAPI(api); got != "" { + t.Errorf("sanitizeModelAPI(%q) = %q, want empty", api, got) + } + } +} + +func TestDetectWorkspace(t *testing.T) { + t.Run("dir with SOUL.md marker", func(t *testing.T) { + home := t.TempDir() + wsDir := filepath.Join(home, ".openclaw", "workspace") + os.MkdirAll(wsDir, 0755) + os.WriteFile(filepath.Join(wsDir, "SOUL.md"), []byte("test"), 0644) + + got := detectWorkspace(home, "") + if got != wsDir { + t.Errorf("detectWorkspace() = %q, want %q", got, wsDir) + } + }) + + t.Run("dir with AGENTS.md marker only", func(t *testing.T) { + home := t.TempDir() + wsDir := filepath.Join(home, ".openclaw", "workspace") + os.MkdirAll(wsDir, 0755) + os.WriteFile(filepath.Join(wsDir, "AGENTS.md"), []byte("test"), 0644) + + got := detectWorkspace(home, "") + if got != wsDir { + t.Errorf("detectWorkspace() = %q, want %q", got, wsDir) + } + }) + + t.Run("dir with IDENTITY.md marker only", func(t *testing.T) { + home := t.TempDir() + wsDir := filepath.Join(home, ".openclaw", "workspace") + os.MkdirAll(wsDir, 0755) + os.WriteFile(filepath.Join(wsDir, "IDENTITY.md"), []byte("test"), 0644) + + got := detectWorkspace(home, "") + if got != wsDir { + t.Errorf("detectWorkspace() = %q, want %q", got, wsDir) + } + }) + + t.Run("dir exists but no marker files", func(t *testing.T) { + home := t.TempDir() + wsDir := filepath.Join(home, ".openclaw", "workspace") + os.MkdirAll(wsDir, 0755) + os.WriteFile(filepath.Join(wsDir, "readme.txt"), []byte("test"), 0644) + + got := detectWorkspace(home, "") + if got != "" { + t.Errorf("detectWorkspace() = %q, want empty", got) + } + }) + + t.Run("dir does not exist", func(t *testing.T) { + home := t.TempDir() + got := detectWorkspace(home, "") + if got != "" { + t.Errorf("detectWorkspace() = %q, want empty", got) + } + }) + + t.Run("custom workspace path from config", func(t *testing.T) { + home := t.TempDir() + customWs := filepath.Join(t.TempDir(), "my-workspace") + os.MkdirAll(customWs, 0755) + os.WriteFile(filepath.Join(customWs, "SOUL.md"), []byte("test"), 0644) + + got := detectWorkspace(home, customWs) + if got != customWs { + t.Errorf("detectWorkspace() = %q, want %q", got, customWs) + } + }) +} + +func TestDetectWorkspaceFiles(t *testing.T) { + t.Run("all files present", func(t *testing.T) { + wsDir := t.TempDir() + for _, f := range []string{"SOUL.md", "AGENTS.md", "IDENTITY.md", "USER.md", "TOOLS.md", "MEMORY.md"} { + os.WriteFile(filepath.Join(wsDir, f), []byte("test"), 0644) + } + os.Mkdir(filepath.Join(wsDir, "memory"), 0755) + + got := detectWorkspaceFiles(wsDir) + if len(got) != 7 { + t.Errorf("detectWorkspaceFiles() returned %d items, want 7: %v", len(got), got) + } + }) + + t.Run("only SOUL.md", func(t *testing.T) { + wsDir := t.TempDir() + os.WriteFile(filepath.Join(wsDir, "SOUL.md"), []byte("test"), 0644) + + got := detectWorkspaceFiles(wsDir) + if len(got) != 1 || got[0] != "SOUL.md" { + t.Errorf("detectWorkspaceFiles() = %v, want [SOUL.md]", got) + } + }) + + t.Run("memory dir included", func(t *testing.T) { + wsDir := t.TempDir() + os.Mkdir(filepath.Join(wsDir, "memory"), 0755) + + got := detectWorkspaceFiles(wsDir) + if len(got) != 1 || got[0] != "memory/" { + t.Errorf("detectWorkspaceFiles() = %v, want [memory/]", got) + } + }) + + t.Run("empty dir", func(t *testing.T) { + wsDir := t.TempDir() + got := detectWorkspaceFiles(wsDir) + if len(got) != 0 { + t.Errorf("detectWorkspaceFiles() = %v, want empty", got) + } + }) +} + +func TestTranslateToOverlayYAML_Nil(t *testing.T) { + got := TranslateToOverlayYAML(nil) + if got != "" { + t.Errorf("TranslateToOverlayYAML(nil) = %q, want empty", got) + } +} + +func TestTranslateToOverlayYAML_AgentModelOnly(t *testing.T) { + result := &ImportResult{ + AgentModel: "claude-opus-4-6", + } + got := TranslateToOverlayYAML(result) + if !strings.Contains(got, "agentModel: claude-opus-4-6") { + t.Errorf("YAML missing agentModel, got:\n%s", got) + } + if strings.Contains(got, "models:") { + t.Errorf("YAML should not contain models section, got:\n%s", got) + } +} + +func TestTranslateToOverlayYAML_ProviderWithModels(t *testing.T) { + result := &ImportResult{ + Providers: []ImportedProvider{ + { + Name: "anthropic", + BaseURL: "https://api.anthropic.com/v1", + API: "anthropic-messages", + APIKey: "sk-ant-test", + Models: []ImportedModel{ + {ID: "claude-opus-4-6", Name: "Claude Opus 4.6"}, + }, + }, + }, + } + got := TranslateToOverlayYAML(result) + + checks := []string{ + "anthropic:\n enabled: true", + "baseUrl: https://api.anthropic.com/v1", + "api: anthropic-messages", + "apiKeyValue: sk-ant-test", + "- id: claude-opus-4-6", + "name: Claude Opus 4.6", + } + for _, check := range checks { + if !strings.Contains(got, check) { + t.Errorf("YAML missing %q, got:\n%s", check, got) + } + } +} + +func TestTranslateToOverlayYAML_DisabledProvider(t *testing.T) { + result := &ImportResult{ + Providers: []ImportedProvider{ + {Name: "openai", Disabled: true}, + }, + } + got := TranslateToOverlayYAML(result) + + if !strings.Contains(got, "openai:\n enabled: false") { + t.Errorf("YAML missing disabled openai, got:\n%s", got) + } + if strings.Contains(got, "enabled: true") { + t.Errorf("YAML should not contain enabled: true for disabled provider, got:\n%s", got) + } +} + +func TestTranslateToOverlayYAML_EmptyAPI(t *testing.T) { + result := &ImportResult{ + Providers: []ImportedProvider{ + { + Name: "custom", + BaseURL: "https://custom.api/v1", + API: "", + }, + }, + } + got := TranslateToOverlayYAML(result) + + if !strings.Contains(got, `api: ""`) { + t.Errorf("YAML missing empty api field, got:\n%s", got) + } +} + +func TestTranslateToOverlayYAML_Channels(t *testing.T) { + result := &ImportResult{ + Channels: ImportedChannels{ + Telegram: &ImportedTelegram{BotToken: "123456:ABC"}, + Discord: &ImportedDiscord{BotToken: "MTIz..."}, + Slack: &ImportedSlack{BotToken: "xoxb-test", AppToken: "xapp-test"}, + }, + } + got := TranslateToOverlayYAML(result) + + checks := []string{ + "telegram:\n enabled: true\n botToken: 123456:ABC", + "discord:\n enabled: true\n botToken: MTIz...", + "slack:\n enabled: true\n botToken: xoxb-test\n appToken: xapp-test", + } + for _, check := range checks { + if !strings.Contains(got, check) { + t.Errorf("YAML missing %q, got:\n%s", check, got) + } + } +} + +func TestTranslateToOverlayYAML_FullConfig(t *testing.T) { + result := &ImportResult{ + AgentModel: "claude-opus-4-6", + Providers: []ImportedProvider{ + { + Name: "anthropic", + BaseURL: "https://api.anthropic.com/v1", + API: "anthropic-messages", + APIKey: "sk-ant-test", + Models: []ImportedModel{{ID: "claude-opus-4-6", Name: "Claude Opus 4.6"}}, + }, + {Name: "openai", Disabled: true}, + }, + Channels: ImportedChannels{ + Telegram: &ImportedTelegram{BotToken: "123:ABC"}, + }, + } + got := TranslateToOverlayYAML(result) + + if !strings.Contains(got, "agentModel: claude-opus-4-6") { + t.Errorf("YAML missing agentModel, got:\n%s", got) + } + if !strings.Contains(got, "anthropic:\n enabled: true") { + t.Errorf("YAML missing enabled anthropic, got:\n%s", got) + } + if !strings.Contains(got, "openai:\n enabled: false") { + t.Errorf("YAML missing disabled openai, got:\n%s", got) + } + if !strings.Contains(got, "telegram:\n enabled: true") { + t.Errorf("YAML missing telegram channel, got:\n%s", got) + } +} + +// writeTestOpenclawConfig creates a test openclaw.json at the expected path +func writeTestOpenclawConfig(t *testing.T, home string, cfg *openclawConfig) { + t.Helper() + dir := filepath.Join(home, ".openclaw") + if err := os.MkdirAll(dir, 0755); err != nil { + t.Fatal(err) + } + data, err := json.Marshal(cfg) + if err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, "openclaw.json"), data, 0644); err != nil { + t.Fatal(err) + } +} + +func TestDetectExistingConfigAt_FileNotFound(t *testing.T) { + home := t.TempDir() + result, err := detectExistingConfigAt(home) + if result != nil || err != nil { + t.Errorf("expected (nil, nil), got (%v, %v)", result, err) + } +} + +func TestDetectExistingConfigAt_InvalidJSON(t *testing.T) { + home := t.TempDir() + dir := filepath.Join(home, ".openclaw") + os.MkdirAll(dir, 0755) + os.WriteFile(filepath.Join(dir, "openclaw.json"), []byte("{invalid json"), 0644) + + result, err := detectExistingConfigAt(home) + if result != nil { + t.Errorf("expected nil result, got %v", result) + } + if err == nil { + t.Fatal("expected error for invalid JSON") + } + if !strings.Contains(err.Error(), "failed to parse") { + t.Errorf("error should mention parsing, got: %v", err) + } +} + +func TestDetectExistingConfigAt_ValidConfig(t *testing.T) { + home := t.TempDir() + cfg := &openclawConfig{} + cfg.Models.Providers = map[string]openclawProvider{ + "anthropic": { + BaseURL: "https://api.anthropic.com/v1", + API: "anthropic-messages", + APIKey: "sk-ant-test-key", + Models: []openclawModel{{ID: "claude-opus-4-6", Name: "Claude Opus 4.6"}}, + }, + } + cfg.Agents.Defaults.Model.Primary = "claude-opus-4-6" + writeTestOpenclawConfig(t, home, cfg) + + result, err := detectExistingConfigAt(home) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == nil { + t.Fatal("expected non-nil result") + } + + if result.AgentModel != "claude-opus-4-6" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "claude-opus-4-6") + } + if len(result.Providers) != 1 { + t.Fatalf("len(Providers) = %d, want 1", len(result.Providers)) + } + p := result.Providers[0] + if p.Name != "anthropic" { + t.Errorf("Provider.Name = %q, want %q", p.Name, "anthropic") + } + if p.APIKey != "sk-ant-test-key" { + t.Errorf("Provider.APIKey = %q, want %q", p.APIKey, "sk-ant-test-key") + } + if p.API != "anthropic-messages" { + t.Errorf("Provider.API = %q, want %q", p.API, "anthropic-messages") + } + if len(p.Models) != 1 || p.Models[0].ID != "claude-opus-4-6" { + t.Errorf("Provider.Models = %v", p.Models) + } +} + +func TestDetectExistingConfigAt_EnvVarKeySkipped(t *testing.T) { + home := t.TempDir() + cfg := &openclawConfig{} + cfg.Models.Providers = map[string]openclawProvider{ + "openai": { + BaseURL: "https://api.openai.com/v1", + API: "openai-completions", + APIKey: "${OPENAI_API_KEY}", + Models: []openclawModel{{ID: "gpt-5.2", Name: "GPT-5.2"}}, + }, + } + writeTestOpenclawConfig(t, home, cfg) + + result, err := detectExistingConfigAt(home) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == nil { + t.Fatal("expected non-nil result") + } + + if len(result.Providers) != 1 { + t.Fatalf("len(Providers) = %d, want 1", len(result.Providers)) + } + if result.Providers[0].APIKey != "" { + t.Errorf("Provider.APIKey = %q, want empty (env-var should be skipped)", result.Providers[0].APIKey) + } +} + +func TestDetectExistingConfigAt_ChannelImport(t *testing.T) { + home := t.TempDir() + cfg := &openclawConfig{} + cfg.Channels.Telegram = &struct { + BotToken string `json:"botToken"` + }{BotToken: "123456:ABCDEF"} + cfg.Channels.Discord = &struct { + BotToken string `json:"botToken"` + }{BotToken: "MTIzNDU2"} + cfg.Channels.Slack = &struct { + BotToken string `json:"botToken"` + AppToken string `json:"appToken"` + }{BotToken: "xoxb-test", AppToken: "xapp-test"} + writeTestOpenclawConfig(t, home, cfg) + + result, err := detectExistingConfigAt(home) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Channels.Telegram == nil || result.Channels.Telegram.BotToken != "123456:ABCDEF" { + t.Errorf("Telegram = %v", result.Channels.Telegram) + } + if result.Channels.Discord == nil || result.Channels.Discord.BotToken != "MTIzNDU2" { + t.Errorf("Discord = %v", result.Channels.Discord) + } + if result.Channels.Slack == nil || result.Channels.Slack.BotToken != "xoxb-test" || result.Channels.Slack.AppToken != "xapp-test" { + t.Errorf("Slack = %v", result.Channels.Slack) + } +} + +func TestDetectExistingConfigAt_ChannelEnvVarSkipped(t *testing.T) { + home := t.TempDir() + cfg := &openclawConfig{} + cfg.Channels.Telegram = &struct { + BotToken string `json:"botToken"` + }{BotToken: "${TELEGRAM_TOKEN}"} + writeTestOpenclawConfig(t, home, cfg) + + result, err := detectExistingConfigAt(home) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Channels.Telegram != nil { + t.Errorf("Telegram should be nil when token is env-var, got %v", result.Channels.Telegram) + } +} + +func TestDetectExistingConfigAt_WorkspaceDetection(t *testing.T) { + home := t.TempDir() + wsDir := filepath.Join(home, ".openclaw", "workspace") + os.MkdirAll(wsDir, 0755) + os.WriteFile(filepath.Join(wsDir, "SOUL.md"), []byte("I am an agent"), 0644) + + cfg := &openclawConfig{} + writeTestOpenclawConfig(t, home, cfg) + + result, err := detectExistingConfigAt(home) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.WorkspaceDir != wsDir { + t.Errorf("WorkspaceDir = %q, want %q", result.WorkspaceDir, wsDir) + } +} + +func TestDetectExistingConfigAt_UnknownAPISanitized(t *testing.T) { + home := t.TempDir() + cfg := &openclawConfig{} + cfg.Models.Providers = map[string]openclawProvider{ + "custom": { + BaseURL: "https://custom.api/v1", + API: "custom-protocol", + APIKey: "key123", + }, + } + writeTestOpenclawConfig(t, home, cfg) + + result, err := detectExistingConfigAt(home) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(result.Providers) != 1 { + t.Fatalf("len(Providers) = %d, want 1", len(result.Providers)) + } + if result.Providers[0].API != "" { + t.Errorf("Provider.API = %q, want empty (unknown API should be sanitized)", result.Providers[0].API) + } +} + +func TestDetectExistingConfigAt_EmptyConfig(t *testing.T) { + home := t.TempDir() + cfg := &openclawConfig{} + writeTestOpenclawConfig(t, home, cfg) + + result, err := detectExistingConfigAt(home) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == nil { + t.Fatal("expected non-nil result for valid but empty config") + } + if len(result.Providers) != 0 { + t.Errorf("len(Providers) = %d, want 0", len(result.Providers)) + } + if result.AgentModel != "" { + t.Errorf("AgentModel = %q, want empty", result.AgentModel) + } +} diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index e86c3e0..82e33e4 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -69,7 +69,10 @@ func SetupDefault(cfg *config.Config) error { } // Check if there is an existing ~/.openclaw config with providers - imported, _ := DetectExistingConfig() + imported, importErr := DetectExistingConfig() + if importErr != nil { + fmt.Printf(" Warning: could not read existing config: %v\n", importErr) + } hasImportedProviders := imported != nil && len(imported.Providers) > 0 // If no imported providers, check Ollama availability for the default overlay @@ -116,7 +119,10 @@ func Onboard(cfg *config.Config, opts OnboardOptions) error { return err } // Import workspace on re-sync too - imported, _ := DetectExistingConfig() + imported, importErr := DetectExistingConfig() + if importErr != nil { + fmt.Printf("Warning: could not read existing config: %v\n", importErr) + } if imported != nil && imported.WorkspaceDir != "" { copyWorkspaceToPod(cfg, id, imported.WorkspaceDir) } From 22b05a985c9b71301e0c74ff66ca0e56494a0e3d Mon Sep 17 00:00:00 2001 From: JeanDaniel Bussy Date: Fri, 13 Feb 2026 16:55:55 +0400 Subject: [PATCH 33/42] fix(openclaw): add controlUi gateway settings for Traefik HTTP proxy (#153) OpenClaw's control UI rejects WebSocket connections with "1008: control ui requires HTTPS or localhost (secure context)" when running behind Traefik over HTTP. This adds: - Chart values and _helpers.tpl rendering for controlUi.allowInsecureAuth and controlUi.dangerouslyDisableDeviceAuth gateway settings - trustedProxies chart value for reverse proxy IP allowlisting - Overlay generation injects controlUi settings for both imported and fresh install paths - RBAC ClusterRole/ClusterRoleBinding for frontend OpenClaw instance discovery (namespaces, pods, configmaps, secrets) --- internal/embed/infrastructure/helmfile.yaml | 36 +++++++++++++++++++ .../openclaw/chart/templates/_helpers.tpl | 13 +++++++ internal/openclaw/chart/values.yaml | 9 +++++ internal/openclaw/openclaw.go | 11 ++++++ 4 files changed, 69 insertions(+) diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index dc2bf22..5501463 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -200,3 +200,39 @@ releases: backendRefs: - name: obol-frontend-obol-app port: 3000 + + # Obol Frontend RBAC (OpenClaw instance discovery via Kubernetes API) + - name: obol-frontend-rbac + namespace: obol-frontend + chart: bedag/raw + needs: + - obol-frontend/obol-frontend + values: + - resources: + - apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: obol-frontend-openclaw-discovery + labels: + app.kubernetes.io/name: obol-frontend + rules: + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["pods", "configmaps", "secrets"] + verbs: ["get", "list"] + - apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: obol-frontend-openclaw-discovery + labels: + app.kubernetes.io/name: obol-frontend + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: obol-frontend-openclaw-discovery + subjects: + - kind: ServiceAccount + name: obol-frontend + namespace: obol-frontend diff --git a/internal/openclaw/chart/templates/_helpers.tpl b/internal/openclaw/chart/templates/_helpers.tpl index d6167e2..b46d24d 100644 --- a/internal/openclaw/chart/templates/_helpers.tpl +++ b/internal/openclaw/chart/templates/_helpers.tpl @@ -143,6 +143,19 @@ Render openclaw.json as strict JSON. If config.content is provided, it is used v "auth" $gatewayAuth "http" (dict "endpoints" (dict "chatCompletions" (dict "enabled" .Values.openclaw.gateway.http.endpoints.chatCompletions.enabled))) -}} +{{- if .Values.openclaw.gateway.trustedProxies -}} +{{- $_ := set $gateway "trustedProxies" .Values.openclaw.gateway.trustedProxies -}} +{{- end -}} +{{- if or .Values.openclaw.gateway.controlUi.allowInsecureAuth .Values.openclaw.gateway.controlUi.dangerouslyDisableDeviceAuth -}} +{{- $controlUi := dict -}} +{{- if .Values.openclaw.gateway.controlUi.allowInsecureAuth -}} +{{- $_ := set $controlUi "allowInsecureAuth" true -}} +{{- end -}} +{{- if .Values.openclaw.gateway.controlUi.dangerouslyDisableDeviceAuth -}} +{{- $_ := set $controlUi "dangerouslyDisableDeviceAuth" true -}} +{{- end -}} +{{- $_ := set $gateway "controlUi" $controlUi -}} +{{- end -}} {{- $agentDefaults := dict "workspace" .Values.openclaw.workspaceDir -}} {{- if .Values.openclaw.agentModel -}} diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml index 9919da2..5165c3e 100644 --- a/internal/openclaw/chart/values.yaml +++ b/internal/openclaw/chart/values.yaml @@ -153,6 +153,15 @@ openclaw: gateway: mode: local bind: lan + # -- Trusted proxy IPs for secure context detection behind a reverse proxy (e.g. Traefik). + # OpenClaw uses exact IP matching (no CIDR support). + trustedProxies: [] + # -- Control UI settings for running behind a reverse proxy + controlUi: + # -- Allow control UI over HTTP (required when behind a non-TLS proxy like Traefik in dev) + allowInsecureAuth: false + # -- Disable device authentication for control UI (not recommended) + dangerouslyDisableDeviceAuth: false auth: mode: token diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 7e3b2af..e803a52 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -1024,11 +1024,22 @@ rbac: importedOverlay := TranslateToOverlayYAML(imported) if importedOverlay != "" { b.WriteString("# Imported from ~/.openclaw/openclaw.json\n") + // Inject gateway proxy settings into the openclaw: section for Traefik. + if strings.Contains(importedOverlay, "openclaw:\n") { + importedOverlay = strings.Replace(importedOverlay, "openclaw:\n", "openclaw:\n gateway:\n controlUi:\n allowInsecureAuth: true\n dangerouslyDisableDeviceAuth: true\n", 1) + } else { + b.WriteString("openclaw:\n gateway:\n controlUi:\n allowInsecureAuth: true\n dangerouslyDisableDeviceAuth: true\n\n") + } b.WriteString(importedOverlay) } else { b.WriteString(`# Route agent traffic to in-cluster Ollama via llmspy proxy openclaw: agentModel: ollama/glm-4.7-flash + gateway: + # Allow control UI over HTTP behind Traefik (local dev stack) + controlUi: + allowInsecureAuth: true + dangerouslyDisableDeviceAuth: true # Default model provider: in-cluster Ollama (routed through llmspy) models: From 5e6c7514a76651fe4726fee722f1425260476a5c Mon Sep 17 00:00:00 2001 From: bussyjd Date: Thu, 12 Feb 2026 23:48:04 +0400 Subject: [PATCH 34/42] fix(openclaw): rename virtual provider from "ollama" to "llmspy" for cloud model routing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenClaw requires provider/model format (e.g. "llmspy/claude-sonnet-4-5-20250929") for model resolution. Without a provider prefix, it hardcodes a fallback to the "anthropic" provider — which is disabled in the llmspy-routed overlay, causing chat requests to fail silently. This renames the virtual provider used for cloud model routing from "ollama" to "llmspy", adds the proper provider prefix to AgentModel, and disables the default "ollama" provider when a cloud provider is selected. The default Ollama-only path is unchanged since it genuinely routes Ollama models. --- CLAUDE.md | 27 +++++++--- internal/openclaw/openclaw.go | 16 +++--- internal/openclaw/overlay_test.go | 90 ++++++++++++++++++------------- 3 files changed, 81 insertions(+), 52 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 7ea97d1..51f7bd8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -616,14 +616,14 @@ The stack uses a two-tier architecture for LLM routing. A cluster-wide proxy (ll When a cloud provider is selected during setup, two things happen simultaneously: 1. **Global tier**: `llm.ConfigureLLMSpy()` patches the cluster-wide llmspy gateway with the API key and enables the provider -2. **Instance tier**: `buildLLMSpyRoutedOverlay()` creates an overlay where a single "ollama" provider points at llmspy, the cloud model is listed under that provider, and `api` is set to `openai-completions` +2. **Instance tier**: `buildLLMSpyRoutedOverlay()` creates an overlay where a "llmspy" provider points at the llmspy gateway, the cloud model is listed under that provider with a `llmspy/` prefix, and `api` is set to `openai-completions`. The default "ollama" provider is disabled. **Result**: The application never talks directly to cloud APIs. All traffic is routed through llmspy. **Data flow**: ``` Application (openclaw.json) - │ model: "ollama/claude-sonnet-4-5-20250929" + │ model: "llmspy/claude-sonnet-4-5-20250929" │ api: "openai-completions" │ baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 │ @@ -636,24 +636,28 @@ llmspy (llm namespace, port 8000) Anthropic API (or Ollama, OpenAI — depending on provider) ``` -**Overlay example** (`values-obol.yaml`): +**Overlay example** (`values-obol.yaml` for cloud provider path): ```yaml models: - ollama: + llmspy: enabled: true baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 api: openai-completions - apiKeyEnvVar: OLLAMA_API_KEY - apiKeyValue: ollama-local + apiKeyEnvVar: LLMSPY_API_KEY + apiKeyValue: llmspy-default models: - id: claude-sonnet-4-5-20250929 name: Claude Sonnet 4.5 + ollama: + enabled: false anthropic: enabled: false openai: enabled: false ``` +**Note**: The default Ollama path (no cloud provider) still uses the "ollama" provider name pointing at llmspy, since it genuinely routes Ollama model traffic. + ### Summary Table | Aspect | Tier 1 (llmspy) | Tier 2 (Application instance) | @@ -663,7 +667,7 @@ models: | **Config storage** | ConfigMap `llmspy-config` | ConfigMap `-config` | | **Secrets** | Secret `llms-secrets` | Secret `-secrets` | | **Configure via** | `obol llm configure` | `obol openclaw setup ` | -| **Providers** | Real (Ollama, Anthropic, OpenAI) | Virtual: everything appears as "ollama" pointing at llmspy | +| **Providers** | Real (Ollama, Anthropic, OpenAI) | Cloud: "llmspy" virtual provider; Default: "ollama" pointing at llmspy | | **API field** | N/A (provider-native) | Must be `openai-completions` for llmspy routing | ### Key Source Files @@ -972,3 +976,12 @@ This file should be updated when: - New workflows or development practices are established Always confirm with the user before making updates to maintain accuracy and relevance. + +## Related Codebases (External Resources) + +| Resource | Path | Description | +|----------|------|-------------| +| obol-stack-front-end | `/Users/bussyjd/Development/Obol_Workbench/obol-stack-front-end` | Next.js web dashboard | +| obol-stack-docs | `/Users/bussyjd/Development/Obol_Workbench/obol-stack-docs` | MkDocs documentation site | +| OpenClaw | `/Users/bussyjd/Development/Obol_Workbench/openclaw` | OpenClaw AI assistant (upstream) | +| llmspy | `/Users/bussyjd/Development/R&D/llmspy` | LLM proxy/router (upstream) | diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 82e33e4..14bc5c0 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -1147,23 +1147,25 @@ func promptForCloudProvider(reader *bufio.Reader, name, display, modelID, modelN } // buildLLMSpyRoutedOverlay creates an ImportResult that routes a cloud model -// through the llmspy proxy. OpenClaw sees a single "ollama" provider pointing -// at llmspy, with the cloud model in its model list. The actual cloud providers -// are disabled in OpenClaw — llmspy handles the routing. +// through the llmspy proxy. OpenClaw sees a "llmspy" provider pointing at the +// cluster-wide llmspy gateway, with the cloud model in its model list. The +// actual cloud providers (and default ollama) are disabled in OpenClaw — llmspy +// handles upstream routing based on the bare model ID. func buildLLMSpyRoutedOverlay(cloud *CloudProviderInfo) *ImportResult { return &ImportResult{ - AgentModel: cloud.ModelID, + AgentModel: "llmspy/" + cloud.ModelID, Providers: []ImportedProvider{ { - Name: "ollama", + Name: "llmspy", BaseURL: "http://llmspy.llm.svc.cluster.local:8000/v1", API: "openai-completions", - APIKeyEnvVar: "OLLAMA_API_KEY", - APIKey: "ollama-local", + APIKeyEnvVar: "LLMSPY_API_KEY", + APIKey: "llmspy-default", Models: []ImportedModel{ {ID: cloud.ModelID, Name: cloud.Display}, }, }, + {Name: "ollama", Disabled: true}, {Name: "anthropic", Disabled: true}, {Name: "openai", Disabled: true}, }, diff --git a/internal/openclaw/overlay_test.go b/internal/openclaw/overlay_test.go index fdeed61..fd84994 100644 --- a/internal/openclaw/overlay_test.go +++ b/internal/openclaw/overlay_test.go @@ -15,42 +15,50 @@ func TestBuildLLMSpyRoutedOverlay_Anthropic(t *testing.T) { result := buildLLMSpyRoutedOverlay(cloud) - // Check agent model uses bare model ID (no provider/ prefix) - if result.AgentModel != "claude-sonnet-4-5-20250929" { - t.Errorf("AgentModel = %q, want %q", result.AgentModel, "claude-sonnet-4-5-20250929") + // Check agent model uses llmspy/ prefix for correct OpenClaw provider routing + if result.AgentModel != "llmspy/claude-sonnet-4-5-20250929" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "llmspy/claude-sonnet-4-5-20250929") } - // Check 3 providers: ollama (enabled), anthropic (disabled), openai (disabled) - if len(result.Providers) != 3 { - t.Fatalf("len(Providers) = %d, want 3", len(result.Providers)) + // Check 4 providers: llmspy (enabled), ollama (disabled), anthropic (disabled), openai (disabled) + if len(result.Providers) != 4 { + t.Fatalf("len(Providers) = %d, want 4", len(result.Providers)) } - ollama := result.Providers[0] - if ollama.Name != "ollama" || ollama.Disabled { - t.Errorf("ollama: name=%q disabled=%v, want ollama/false", ollama.Name, ollama.Disabled) + llmspy := result.Providers[0] + if llmspy.Name != "llmspy" || llmspy.Disabled { + t.Errorf("llmspy: name=%q disabled=%v, want llmspy/false", llmspy.Name, llmspy.Disabled) } - if ollama.BaseURL != "http://llmspy.llm.svc.cluster.local:8000/v1" { - t.Errorf("ollama.BaseURL = %q", ollama.BaseURL) + if llmspy.BaseURL != "http://llmspy.llm.svc.cluster.local:8000/v1" { + t.Errorf("llmspy.BaseURL = %q", llmspy.BaseURL) } - if ollama.APIKeyEnvVar != "OLLAMA_API_KEY" { - t.Errorf("ollama.APIKeyEnvVar = %q, want OLLAMA_API_KEY", ollama.APIKeyEnvVar) + if llmspy.APIKeyEnvVar != "LLMSPY_API_KEY" { + t.Errorf("llmspy.APIKeyEnvVar = %q, want LLMSPY_API_KEY", llmspy.APIKeyEnvVar) } - if ollama.APIKey != "ollama-local" { - t.Errorf("ollama.APIKey = %q, want ollama-local", ollama.APIKey) + if llmspy.APIKey != "llmspy-default" { + t.Errorf("llmspy.APIKey = %q, want llmspy-default", llmspy.APIKey) } - if ollama.API != "openai-completions" { - t.Errorf("ollama.API = %q, want openai-completions", ollama.API) + if llmspy.API != "openai-completions" { + t.Errorf("llmspy.API = %q, want openai-completions", llmspy.API) } - if len(ollama.Models) != 1 || ollama.Models[0].ID != "claude-sonnet-4-5-20250929" { - t.Errorf("ollama.Models = %v", ollama.Models) + if len(llmspy.Models) != 1 || llmspy.Models[0].ID != "claude-sonnet-4-5-20250929" { + t.Errorf("llmspy.Models = %v", llmspy.Models) } - // anthropic and openai should be disabled - if !result.Providers[1].Disabled || result.Providers[1].Name != "anthropic" { - t.Errorf("anthropic: disabled=%v name=%q", result.Providers[1].Disabled, result.Providers[1].Name) + // ollama, anthropic and openai should be disabled + for _, idx := range []int{1, 2, 3} { + if !result.Providers[idx].Disabled { + t.Errorf("Providers[%d] (%s) should be disabled", idx, result.Providers[idx].Name) + } + } + if result.Providers[1].Name != "ollama" { + t.Errorf("Providers[1].Name = %q, want ollama", result.Providers[1].Name) + } + if result.Providers[2].Name != "anthropic" { + t.Errorf("Providers[2].Name = %q, want anthropic", result.Providers[2].Name) } - if !result.Providers[2].Disabled || result.Providers[2].Name != "openai" { - t.Errorf("openai: disabled=%v name=%q", result.Providers[2].Disabled, result.Providers[2].Name) + if result.Providers[3].Name != "openai" { + t.Errorf("Providers[3].Name = %q, want openai", result.Providers[3].Name) } } @@ -64,13 +72,13 @@ func TestBuildLLMSpyRoutedOverlay_OpenAI(t *testing.T) { result := buildLLMSpyRoutedOverlay(cloud) - if result.AgentModel != "gpt-5.2" { - t.Errorf("AgentModel = %q, want %q", result.AgentModel, "gpt-5.2") + if result.AgentModel != "llmspy/gpt-5.2" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "llmspy/gpt-5.2") } - ollama := result.Providers[0] - if len(ollama.Models) != 1 || ollama.Models[0].ID != "gpt-5.2" { - t.Errorf("ollama model = %v, want gpt-5.2", ollama.Models) + llmspy := result.Providers[0] + if len(llmspy.Models) != 1 || llmspy.Models[0].ID != "gpt-5.2" { + t.Errorf("llmspy model = %v, want gpt-5.2", llmspy.Models) } } @@ -84,23 +92,26 @@ func TestOverlayYAML_LLMSpyRouted(t *testing.T) { result := buildLLMSpyRoutedOverlay(cloud) yaml := TranslateToOverlayYAML(result) - // Agent model should be the bare model ID - if !strings.Contains(yaml, "agentModel: claude-sonnet-4-5-20250929") { + // Agent model should have llmspy/ prefix + if !strings.Contains(yaml, "agentModel: llmspy/claude-sonnet-4-5-20250929") { t.Errorf("YAML missing agentModel, got:\n%s", yaml) } - // ollama should be enabled with llmspy baseUrl + // llmspy should be enabled with llmspy baseUrl + if !strings.Contains(yaml, "llmspy:\n enabled: true") { + t.Errorf("YAML missing enabled llmspy provider, got:\n%s", yaml) + } if !strings.Contains(yaml, "baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1") { t.Errorf("YAML missing llmspy baseUrl, got:\n%s", yaml) } - // apiKeyEnvVar should be set - if !strings.Contains(yaml, "apiKeyEnvVar: OLLAMA_API_KEY") { + // apiKeyEnvVar should be LLMSPY_API_KEY + if !strings.Contains(yaml, "apiKeyEnvVar: LLMSPY_API_KEY") { t.Errorf("YAML missing apiKeyEnvVar, got:\n%s", yaml) } - // apiKeyValue should be ollama-local - if !strings.Contains(yaml, "apiKeyValue: ollama-local") { + // apiKeyValue should be llmspy-default + if !strings.Contains(yaml, "apiKeyValue: llmspy-default") { t.Errorf("YAML missing apiKeyValue, got:\n%s", yaml) } @@ -109,12 +120,15 @@ func TestOverlayYAML_LLMSpyRouted(t *testing.T) { t.Errorf("YAML missing api: openai-completions, got:\n%s", yaml) } - // Cloud model should appear in ollama's model list + // Cloud model should appear in llmspy's model list if !strings.Contains(yaml, "- id: claude-sonnet-4-5-20250929") { t.Errorf("YAML missing cloud model ID, got:\n%s", yaml) } - // anthropic and openai should be disabled + // ollama, anthropic and openai should be disabled + if !strings.Contains(yaml, "ollama:\n enabled: false") { + t.Errorf("YAML missing disabled ollama, got:\n%s", yaml) + } if !strings.Contains(yaml, "anthropic:\n enabled: false") { t.Errorf("YAML missing disabled anthropic, got:\n%s", yaml) } From 817dc5fb140d354817917dfece21f71863e448f4 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 17:22:45 +0400 Subject: [PATCH 35/42] security(openclaw): remove dangerouslyDisableDeviceAuth, keep only allowInsecureAuth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dangerouslyDisableDeviceAuth flag is completely redundant when running behind Traefik over HTTP: the browser's crypto.subtle API is unavailable in non-secure contexts (non-localhost HTTP), so the Control UI never sends device identity at all. Setting dangerouslyDisableDeviceAuth only matters when the browser IS in a secure context but you want to skip device auth — which doesn't apply to our Traefik proxy case. allowInsecureAuth alone is sufficient: it allows the gateway to accept token-only authentication when device identity is absent. Token auth remains fully enforced — connections without a valid gateway token are still rejected. Security analysis: - Token/password auth: still enforced (timing-safe comparison) - Origin check: still enforced (same-origin validation) - Device identity: naturally skipped (browser can't provide it on HTTP) - Risk in localhost k3d context: Low (no external attack surface) - OpenClaw security audit classification: critical (general), but acceptable for local-only dev stack Refs: plans/security-audit-controlui.md, plans/trustedproxies-analysis.md --- internal/openclaw/chart/templates/_helpers.tpl | 9 +-------- internal/openclaw/chart/values.yaml | 6 +++--- internal/openclaw/openclaw.go | 15 ++++++++++----- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/internal/openclaw/chart/templates/_helpers.tpl b/internal/openclaw/chart/templates/_helpers.tpl index b46d24d..aa9df60 100644 --- a/internal/openclaw/chart/templates/_helpers.tpl +++ b/internal/openclaw/chart/templates/_helpers.tpl @@ -146,15 +146,8 @@ Render openclaw.json as strict JSON. If config.content is provided, it is used v {{- if .Values.openclaw.gateway.trustedProxies -}} {{- $_ := set $gateway "trustedProxies" .Values.openclaw.gateway.trustedProxies -}} {{- end -}} -{{- if or .Values.openclaw.gateway.controlUi.allowInsecureAuth .Values.openclaw.gateway.controlUi.dangerouslyDisableDeviceAuth -}} -{{- $controlUi := dict -}} {{- if .Values.openclaw.gateway.controlUi.allowInsecureAuth -}} -{{- $_ := set $controlUi "allowInsecureAuth" true -}} -{{- end -}} -{{- if .Values.openclaw.gateway.controlUi.dangerouslyDisableDeviceAuth -}} -{{- $_ := set $controlUi "dangerouslyDisableDeviceAuth" true -}} -{{- end -}} -{{- $_ := set $gateway "controlUi" $controlUi -}} +{{- $_ := set $gateway "controlUi" (dict "allowInsecureAuth" true) -}} {{- end -}} {{- $agentDefaults := dict "workspace" .Values.openclaw.workspaceDir -}} diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml index 5165c3e..59bb185 100644 --- a/internal/openclaw/chart/values.yaml +++ b/internal/openclaw/chart/values.yaml @@ -156,12 +156,12 @@ openclaw: # -- Trusted proxy IPs for secure context detection behind a reverse proxy (e.g. Traefik). # OpenClaw uses exact IP matching (no CIDR support). trustedProxies: [] - # -- Control UI settings for running behind a reverse proxy + # -- Control UI settings for running behind a reverse proxy. + # allowInsecureAuth permits token-only auth when the browser lacks crypto.subtle + # (non-localhost HTTP). Device auth is naturally skipped (browser can't provide it). controlUi: # -- Allow control UI over HTTP (required when behind a non-TLS proxy like Traefik in dev) allowInsecureAuth: false - # -- Disable device authentication for control UI (not recommended) - dangerouslyDisableDeviceAuth: false auth: mode: token diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index e803a52..7bfec8d 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -1024,11 +1024,15 @@ rbac: importedOverlay := TranslateToOverlayYAML(imported) if importedOverlay != "" { b.WriteString("# Imported from ~/.openclaw/openclaw.json\n") - // Inject gateway proxy settings into the openclaw: section for Traefik. + // Inject gateway controlUi settings for Traefik reverse proxy. + // allowInsecureAuth is required because the browser accesses OpenClaw via + // http://.obol.stack (non-localhost HTTP), where crypto.subtle is + // unavailable. Without it, the gateway rejects with 1008 "requires HTTPS or + // localhost (secure context)". Token auth is still enforced. if strings.Contains(importedOverlay, "openclaw:\n") { - importedOverlay = strings.Replace(importedOverlay, "openclaw:\n", "openclaw:\n gateway:\n controlUi:\n allowInsecureAuth: true\n dangerouslyDisableDeviceAuth: true\n", 1) + importedOverlay = strings.Replace(importedOverlay, "openclaw:\n", "openclaw:\n gateway:\n controlUi:\n allowInsecureAuth: true\n", 1) } else { - b.WriteString("openclaw:\n gateway:\n controlUi:\n allowInsecureAuth: true\n dangerouslyDisableDeviceAuth: true\n\n") + b.WriteString("openclaw:\n gateway:\n controlUi:\n allowInsecureAuth: true\n\n") } b.WriteString(importedOverlay) } else { @@ -1036,10 +1040,11 @@ rbac: openclaw: agentModel: ollama/glm-4.7-flash gateway: - # Allow control UI over HTTP behind Traefik (local dev stack) + # Allow control UI over HTTP behind Traefik (local dev stack). + # Required: browser on non-localhost HTTP has no crypto.subtle, + # so device identity is unavailable. Token auth is still enforced. controlUi: allowInsecureAuth: true - dangerouslyDisableDeviceAuth: true # Default model provider: in-cluster Ollama (routed through llmspy) models: From 48ae09a7b1e21091fc1bc082a3b4288b022484b7 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 17:56:18 +0400 Subject: [PATCH 36/42] chore(llm): bump LLMSpy image to 3.0.32-obol.1-rc.4 Includes smart routing, streaming SSE passthrough, and db writer startup race fix. --- internal/embed/infrastructure/base/templates/llm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 8c8acf8..0736a53 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -153,7 +153,7 @@ spec: - name: llmspy # Obol fork of LLMSpy with smart routing extension. # Pin a specific version for reproducibility. - image: ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.2 + image: ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.4 imagePullPolicy: IfNotPresent ports: - name: http From 44f55f71e8d5e68d06b2f6c7661fef5e5a8f4dae Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 18:41:42 +0400 Subject: [PATCH 37/42] security(openclaw): stop logging sensitive APIKey field value in import Remove the p.APIKey value from the env-var reference log message in DetectExistingConfig(). Although the code path only reaches here when the value is an env-var reference (e.g. ${ANTHROPIC_API_KEY}), CodeQL correctly flags it as clear-text logging of a sensitive field (go/ clear-text-logging). Omitting the value is a defense-in-depth fix that prevents accidental exposure if the guard condition ever changes. --- internal/openclaw/import.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/openclaw/import.go b/internal/openclaw/import.go index 8549868..e48dd72 100644 --- a/internal/openclaw/import.go +++ b/internal/openclaw/import.go @@ -143,7 +143,7 @@ func detectExistingConfigAt(home string) (*ImportResult, error) { if p.APIKey != "" && !isEnvVarRef(p.APIKey) { ip.APIKey = p.APIKey } else if p.APIKey != "" { - fmt.Printf(" Note: provider '%s' uses env-var reference %s (will need manual configuration)\n", name, p.APIKey) + fmt.Printf(" Note: provider '%s' uses an env-var reference for its API key (will need manual configuration)\n", name) } for _, m := range p.Models { ip.Models = append(ip.Models, ImportedModel{ID: m.ID, Name: m.Name}) From 0fae81cf80a3665d79b889362b9dbf9b2f730e5a Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 18:55:55 +0400 Subject: [PATCH 38/42] feat(erpc): switch upstream from nodecore to erpc.gcp.obol.tech MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the nodecore RPC upstream with Obol's internal rate-limited eRPC gateway (erpc.gcp.obol.tech). The upstream supports mainnet and hoodi only, so sepolia is removed from all eRPC and ethereum network configurations. Basic Auth credential is intentionally embedded per CTO approval — the endpoint is rate-limited and serves as a convenience proxy for local stack users. Credential is extracted to a template variable with gitleaks:allow suppression. --- README.md | 4 +-- .../infrastructure/values/erpc.yaml.gotmpl | 36 ++++++++----------- .../networks/ethereum/helmfile.yaml.gotmpl | 1 - .../networks/ethereum/values.yaml.gotmpl | 2 +- 4 files changed, 18 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index b7ec40d..6a634c1 100644 --- a/README.md +++ b/README.md @@ -228,7 +228,7 @@ obol network install ethereum --network=hoodi ``` **Ethereum configuration options:** -- `--network`: Choose network (mainnet, sepolia, hoodi) +- `--network`: Choose network (mainnet, hoodi) - `--execution-client`: Choose execution client (reth, geth, nethermind, besu, erigon, ethereumjs) - `--consensus-client`: Choose consensus client (lighthouse, prysm, teku, nimbus, lodestar, grandine) @@ -669,7 +669,7 @@ Networks are embedded in the binary at `internal/embed/networks/`. Each network ```yaml # internal/embed/networks/ethereum/helmfile.yaml.gotmpl values: - # @enum mainnet,sepolia,holesky,hoodi + # @enum mainnet,hoodi # @default mainnet # @description Blockchain network to deploy - network: {{.Network}} diff --git a/internal/embed/infrastructure/values/erpc.yaml.gotmpl b/internal/embed/infrastructure/values/erpc.yaml.gotmpl index 051670c..7f9bc23 100644 --- a/internal/embed/infrastructure/values/erpc.yaml.gotmpl +++ b/internal/embed/infrastructure/values/erpc.yaml.gotmpl @@ -3,11 +3,16 @@ {{- $chainId := 1 -}} {{/* Default: mainnet */}} {{- if eq $network "hoodi" -}} {{- $chainId = 560048 -}} -{{- else if eq $network "sepolia" -}} - {{- $chainId = 11155111 -}} {{- else if ne $network "mainnet" -}} - {{- fail (printf "Unknown network: %s. Supported networks: mainnet, hoodi, sepolia" $network) -}} + {{- fail (printf "Unknown network: %s. Supported networks: mainnet, hoodi" $network) -}} {{- end -}} +{{/* + erpc.gcp.obol.tech — internal, rate-limited eRPC gateway hosted by Obol. + The Basic Auth credential below is knowingly included in the obol-stack source. + This endpoint is rate-limited and serves only as a convenience RPC proxy for + local stack users; exposing it carries less risk than running fully unprotected. +*/}} +{{- $erpcGcpAuth := "obol:svXELzJDXQPrmgA3AopiWZWm" -}} {{/* gitleaks:allow */}} # Number of replicas replicas: 1 @@ -50,13 +55,10 @@ config: |- projects: - id: rpc upstreams: - - id: nodecore - endpoint: https://rpc.nodecore.io + - id: erpc-gcp + endpoint: https://{{ $erpcGcpAuth }}@erpc.gcp.obol.tech/{{ $network }}/evm/{{ $chainId }} evm: chainId: {{ $chainId }} - jsonRpc: - headers: - X-Nodecore-Token: "${OBOL_OAUTH_TOKEN}" networks: - architecture: evm evm: @@ -90,14 +92,8 @@ config: |- # Secret env variables (chart-managed secret for inline values) secretEnv: {} -# Extra env variables (reference external obol-oauth-token secret) -extraEnv: - - name: OBOL_OAUTH_TOKEN - valueFrom: - secretKeyRef: - name: obol-oauth-token - key: token - optional: true +# Extra env variables +extraEnv: [] # Extra args for the erpc container extraArgs: [] @@ -119,8 +115,7 @@ affinity: {} imagePullSecrets: [] # Annotations for the Deployment -annotations: - secret.reloader.stakater.com/reload: "obol-oauth-token" +annotations: {} # Liveness probe livenessProbe: @@ -145,8 +140,7 @@ nodeSelector: {} podLabels: {} # Pod annotations -podAnnotations: - secret.reloader.stakater.com/reload: "obol-oauth-token" +podAnnotations: {} # Pod management policy podManagementPolicy: OrderedReady @@ -203,7 +197,7 @@ extraVolumeMounts: [] # Additional ports extraPorts: [] -# Additional env variables (defined above with OBOL_OAUTH_TOKEN) +# Additional env variables serviceMonitor: enabled: false diff --git a/internal/embed/networks/ethereum/helmfile.yaml.gotmpl b/internal/embed/networks/ethereum/helmfile.yaml.gotmpl index 555e5f3..8593c50 100644 --- a/internal/embed/networks/ethereum/helmfile.yaml.gotmpl +++ b/internal/embed/networks/ethereum/helmfile.yaml.gotmpl @@ -34,7 +34,6 @@ releases: enabled: true addresses: mainnet: https://mainnet-checkpoint-sync.attestant.io - sepolia: https://checkpoint-sync.sepolia.ethpandaops.io hoodi: https://checkpoint-sync.hoodi.ethpandaops.io # Execution client diff --git a/internal/embed/networks/ethereum/values.yaml.gotmpl b/internal/embed/networks/ethereum/values.yaml.gotmpl index 874a0e2..881e4dd 100644 --- a/internal/embed/networks/ethereum/values.yaml.gotmpl +++ b/internal/embed/networks/ethereum/values.yaml.gotmpl @@ -1,7 +1,7 @@ # Configuration via CLI flags # Template fields populated by obol CLI during network installation -# @enum mainnet,sepolia,hoodi +# @enum mainnet,hoodi # @default mainnet # @description Blockchain network to deploy network: {{.Network}} From 7a003b00c1a8be9a0dac1852b4c9ad85ca467395 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 19:07:03 +0400 Subject: [PATCH 39/42] chore: switch default model from glm-4.7-flash to gpt-oss:120b-cloud Replace all references to glm-4.7-flash with Ollama's cloud model gpt-oss:120b-cloud. Cloud models run on Ollama's cloud service, eliminating OOM risk on local machines. --- docs/getting-started.md | 4 ++-- internal/embed/infrastructure/base/templates/llm.yaml | 4 ++-- internal/openclaw/openclaw.go | 6 +++--- internal/openclaw/overlay_test.go | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/getting-started.md b/docs/getting-started.md index adbf921..9ee5b57 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -84,10 +84,10 @@ obol kubectl run -n llm inference-test --rm -it --restart=Never \ curl -s --max-time 120 -X POST \ http://llmspy.llm.svc.cluster.local:8000/v1/chat/completions \ -H "Content-Type: application/json" \ - -d '{"model":"glm-4.7-flash","messages":[{"role":"user","content":"Say hello in one word"}],"max_tokens":10}' + -d '{"model":"gpt-oss:120b-cloud","messages":[{"role":"user","content":"Say hello in one word"}],"max_tokens":10}' ``` -Replace `glm-4.7-flash` with whatever model you have loaded in Ollama. +Replace `gpt-oss:120b-cloud` with whatever model you have loaded in Ollama. > [!NOTE] > The first request may be slow while the model loads into memory. diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 0736a53..7c3ca5c 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -40,7 +40,7 @@ spec: # llms.py v3 configuration for Obol Stack: # - Ollama provider enabled by default (host machine via ollama Service) # - Anthropic and OpenAI providers available (disabled by default; enabled via `obol llm configure`) -# - Default model is glm-4.7-flash +# - Default model is gpt-oss:120b-cloud (Ollama cloud) apiVersion: v1 kind: ConfigMap metadata: @@ -56,7 +56,7 @@ data: "User-Agent": "llmspy.org/3.0" }, "text": { - "model": "glm-4.7-flash", + "model": "gpt-oss:120b-cloud", "messages": [ { "role": "user", "content": [{ "type": "text", "text": "" }] } ] diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 59a59e6..7a1697c 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -979,7 +979,7 @@ rbac: } else { b.WriteString(`# Route agent traffic to in-cluster Ollama via llmspy proxy openclaw: - agentModel: ollama/glm-4.7-flash + agentModel: ollama/gpt-oss:120b-cloud gateway: # Allow control UI over HTTP behind Traefik (local dev stack). # Required: browser on non-localhost HTTP has no crypto.subtle, @@ -995,8 +995,8 @@ models: apiKeyEnvVar: OLLAMA_API_KEY apiKeyValue: ollama-local models: - - id: glm-4.7-flash - name: GLM-4.7 Flash + - id: gpt-oss:120b-cloud + name: GPT-OSS 120B Cloud `) } diff --git a/internal/openclaw/overlay_test.go b/internal/openclaw/overlay_test.go index fd84994..71b85b8 100644 --- a/internal/openclaw/overlay_test.go +++ b/internal/openclaw/overlay_test.go @@ -141,7 +141,7 @@ func TestGenerateOverlayValues_OllamaDefault(t *testing.T) { // When imported is nil, generateOverlayValues should use Ollama defaults yaml := generateOverlayValues("openclaw-default.obol.stack", nil) - if !strings.Contains(yaml, "agentModel: ollama/glm-4.7-flash") { + if !strings.Contains(yaml, "agentModel: ollama/gpt-oss:120b-cloud") { t.Errorf("default overlay missing ollama agentModel, got:\n%s", yaml) } if !strings.Contains(yaml, "baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1") { From 14901def483e76d9f80e1def4a6a36d312cbb550 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 20:10:17 +0400 Subject: [PATCH 40/42] fix(openclaw): revert llmspy provider name to ollama for chart compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The remote OpenClaw Helm chart only iterates hardcoded provider names (ollama, anthropic, openai). Using "llmspy" as the virtual provider name caused it to be silently dropped from the rendered config, breaking the Anthropic inference waterfall. Revert to using "ollama" as the provider name — it still points at llmspy's URL (http://llmspy.llm.svc.cluster.local:8000/v1) with api: openai-completions, so all routing works correctly. Found during pre-production validation. --- internal/openclaw/openclaw.go | 20 +++---- internal/openclaw/overlay_test.go | 89 +++++++++++++++---------------- 2 files changed, 53 insertions(+), 56 deletions(-) diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 7a1697c..52ba627 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -1163,25 +1163,27 @@ func promptForCloudProvider(reader *bufio.Reader, name, display, modelID, modelN } // buildLLMSpyRoutedOverlay creates an ImportResult that routes a cloud model -// through the llmspy proxy. OpenClaw sees a "llmspy" provider pointing at the -// cluster-wide llmspy gateway, with the cloud model in its model list. The -// actual cloud providers (and default ollama) are disabled in OpenClaw — llmspy -// handles upstream routing based on the bare model ID. +// through the llmspy proxy. OpenClaw sees an "ollama" provider pointing at the +// cluster-wide llmspy gateway, with the cloud model in its model list. We reuse +// the "ollama" provider name because the remote Helm chart only iterates a +// hardcoded list (ollama, anthropic, openai) — using a custom name would cause +// the provider to be silently dropped from the rendered config. +// The actual cloud providers are disabled in OpenClaw — llmspy handles upstream +// routing based on the bare model ID. func buildLLMSpyRoutedOverlay(cloud *CloudProviderInfo) *ImportResult { return &ImportResult{ - AgentModel: "llmspy/" + cloud.ModelID, + AgentModel: "ollama/" + cloud.ModelID, Providers: []ImportedProvider{ { - Name: "llmspy", + Name: "ollama", BaseURL: "http://llmspy.llm.svc.cluster.local:8000/v1", API: "openai-completions", - APIKeyEnvVar: "LLMSPY_API_KEY", - APIKey: "llmspy-default", + APIKeyEnvVar: "OLLAMA_API_KEY", + APIKey: "ollama-local", Models: []ImportedModel{ {ID: cloud.ModelID, Name: cloud.Display}, }, }, - {Name: "ollama", Disabled: true}, {Name: "anthropic", Disabled: true}, {Name: "openai", Disabled: true}, }, diff --git a/internal/openclaw/overlay_test.go b/internal/openclaw/overlay_test.go index 71b85b8..33eeb9c 100644 --- a/internal/openclaw/overlay_test.go +++ b/internal/openclaw/overlay_test.go @@ -15,50 +15,48 @@ func TestBuildLLMSpyRoutedOverlay_Anthropic(t *testing.T) { result := buildLLMSpyRoutedOverlay(cloud) - // Check agent model uses llmspy/ prefix for correct OpenClaw provider routing - if result.AgentModel != "llmspy/claude-sonnet-4-5-20250929" { - t.Errorf("AgentModel = %q, want %q", result.AgentModel, "llmspy/claude-sonnet-4-5-20250929") + // Agent model uses ollama/ prefix — the "ollama" provider slot is repurposed + // to point at llmspy, so the model reference must match the provider name. + if result.AgentModel != "ollama/claude-sonnet-4-5-20250929" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "ollama/claude-sonnet-4-5-20250929") } - // Check 4 providers: llmspy (enabled), ollama (disabled), anthropic (disabled), openai (disabled) - if len(result.Providers) != 4 { - t.Fatalf("len(Providers) = %d, want 4", len(result.Providers)) + // Check 3 providers: ollama (enabled, pointing at llmspy), anthropic (disabled), openai (disabled) + if len(result.Providers) != 3 { + t.Fatalf("len(Providers) = %d, want 3", len(result.Providers)) } - llmspy := result.Providers[0] - if llmspy.Name != "llmspy" || llmspy.Disabled { - t.Errorf("llmspy: name=%q disabled=%v, want llmspy/false", llmspy.Name, llmspy.Disabled) + ollama := result.Providers[0] + if ollama.Name != "ollama" || ollama.Disabled { + t.Errorf("ollama: name=%q disabled=%v, want ollama/false", ollama.Name, ollama.Disabled) } - if llmspy.BaseURL != "http://llmspy.llm.svc.cluster.local:8000/v1" { - t.Errorf("llmspy.BaseURL = %q", llmspy.BaseURL) + if ollama.BaseURL != "http://llmspy.llm.svc.cluster.local:8000/v1" { + t.Errorf("ollama.BaseURL = %q", ollama.BaseURL) } - if llmspy.APIKeyEnvVar != "LLMSPY_API_KEY" { - t.Errorf("llmspy.APIKeyEnvVar = %q, want LLMSPY_API_KEY", llmspy.APIKeyEnvVar) + if ollama.APIKeyEnvVar != "OLLAMA_API_KEY" { + t.Errorf("ollama.APIKeyEnvVar = %q, want OLLAMA_API_KEY", ollama.APIKeyEnvVar) } - if llmspy.APIKey != "llmspy-default" { - t.Errorf("llmspy.APIKey = %q, want llmspy-default", llmspy.APIKey) + if ollama.APIKey != "ollama-local" { + t.Errorf("ollama.APIKey = %q, want ollama-local", ollama.APIKey) } - if llmspy.API != "openai-completions" { - t.Errorf("llmspy.API = %q, want openai-completions", llmspy.API) + if ollama.API != "openai-completions" { + t.Errorf("ollama.API = %q, want openai-completions", ollama.API) } - if len(llmspy.Models) != 1 || llmspy.Models[0].ID != "claude-sonnet-4-5-20250929" { - t.Errorf("llmspy.Models = %v", llmspy.Models) + if len(ollama.Models) != 1 || ollama.Models[0].ID != "claude-sonnet-4-5-20250929" { + t.Errorf("ollama.Models = %v", ollama.Models) } - // ollama, anthropic and openai should be disabled - for _, idx := range []int{1, 2, 3} { + // anthropic and openai should be disabled + for _, idx := range []int{1, 2} { if !result.Providers[idx].Disabled { t.Errorf("Providers[%d] (%s) should be disabled", idx, result.Providers[idx].Name) } } - if result.Providers[1].Name != "ollama" { - t.Errorf("Providers[1].Name = %q, want ollama", result.Providers[1].Name) + if result.Providers[1].Name != "anthropic" { + t.Errorf("Providers[1].Name = %q, want anthropic", result.Providers[1].Name) } - if result.Providers[2].Name != "anthropic" { - t.Errorf("Providers[2].Name = %q, want anthropic", result.Providers[2].Name) - } - if result.Providers[3].Name != "openai" { - t.Errorf("Providers[3].Name = %q, want openai", result.Providers[3].Name) + if result.Providers[2].Name != "openai" { + t.Errorf("Providers[2].Name = %q, want openai", result.Providers[2].Name) } } @@ -72,13 +70,13 @@ func TestBuildLLMSpyRoutedOverlay_OpenAI(t *testing.T) { result := buildLLMSpyRoutedOverlay(cloud) - if result.AgentModel != "llmspy/gpt-5.2" { - t.Errorf("AgentModel = %q, want %q", result.AgentModel, "llmspy/gpt-5.2") + if result.AgentModel != "ollama/gpt-5.2" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "ollama/gpt-5.2") } - llmspy := result.Providers[0] - if len(llmspy.Models) != 1 || llmspy.Models[0].ID != "gpt-5.2" { - t.Errorf("llmspy model = %v, want gpt-5.2", llmspy.Models) + ollama := result.Providers[0] + if len(ollama.Models) != 1 || ollama.Models[0].ID != "gpt-5.2" { + t.Errorf("ollama model = %v, want gpt-5.2", ollama.Models) } } @@ -92,26 +90,26 @@ func TestOverlayYAML_LLMSpyRouted(t *testing.T) { result := buildLLMSpyRoutedOverlay(cloud) yaml := TranslateToOverlayYAML(result) - // Agent model should have llmspy/ prefix - if !strings.Contains(yaml, "agentModel: llmspy/claude-sonnet-4-5-20250929") { + // Agent model should have ollama/ prefix + if !strings.Contains(yaml, "agentModel: ollama/claude-sonnet-4-5-20250929") { t.Errorf("YAML missing agentModel, got:\n%s", yaml) } - // llmspy should be enabled with llmspy baseUrl - if !strings.Contains(yaml, "llmspy:\n enabled: true") { - t.Errorf("YAML missing enabled llmspy provider, got:\n%s", yaml) + // ollama should be enabled with llmspy baseUrl + if !strings.Contains(yaml, "ollama:\n enabled: true") { + t.Errorf("YAML missing enabled ollama provider, got:\n%s", yaml) } if !strings.Contains(yaml, "baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1") { t.Errorf("YAML missing llmspy baseUrl, got:\n%s", yaml) } - // apiKeyEnvVar should be LLMSPY_API_KEY - if !strings.Contains(yaml, "apiKeyEnvVar: LLMSPY_API_KEY") { + // apiKeyEnvVar should be OLLAMA_API_KEY + if !strings.Contains(yaml, "apiKeyEnvVar: OLLAMA_API_KEY") { t.Errorf("YAML missing apiKeyEnvVar, got:\n%s", yaml) } - // apiKeyValue should be llmspy-default - if !strings.Contains(yaml, "apiKeyValue: llmspy-default") { + // apiKeyValue should be ollama-local + if !strings.Contains(yaml, "apiKeyValue: ollama-local") { t.Errorf("YAML missing apiKeyValue, got:\n%s", yaml) } @@ -120,15 +118,12 @@ func TestOverlayYAML_LLMSpyRouted(t *testing.T) { t.Errorf("YAML missing api: openai-completions, got:\n%s", yaml) } - // Cloud model should appear in llmspy's model list + // Cloud model should appear in ollama's model list if !strings.Contains(yaml, "- id: claude-sonnet-4-5-20250929") { t.Errorf("YAML missing cloud model ID, got:\n%s", yaml) } - // ollama, anthropic and openai should be disabled - if !strings.Contains(yaml, "ollama:\n enabled: false") { - t.Errorf("YAML missing disabled ollama, got:\n%s", yaml) - } + // anthropic and openai should be disabled if !strings.Contains(yaml, "anthropic:\n enabled: false") { t.Errorf("YAML missing disabled anthropic, got:\n%s", yaml) } From 64ef177b131bea9cb69002e9dec73ee96feb5df3 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 20:10:48 +0400 Subject: [PATCH 41/42] fix(llm): use llmspy image for init container with provider merge script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace busybox init container with the llmspy image itself, using a Python merge script that: 1. Copies llms.json from ConfigMap (controls enabled/disabled state) 2. Loads the full providers.json from the llmspy package (has model definitions and npm package refs for Anthropic/OpenAI) 3. Merges ConfigMap overrides (Ollama endpoint, API key refs) Also remove "models": {} and "all_models": true from cloud providers in the ConfigMap — these crash llmspy since only Ollama has a load_models() implementation. Add "npm" field for Anthropic/OpenAI. Found during pre-production Anthropic integration validation. --- .../infrastructure/base/templates/llm.yaml | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 7c3ca5c..b54d74a 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -85,15 +85,13 @@ data: }, "anthropic": { "id": "anthropic", - "api_key": "$ANTHROPIC_API_KEY", - "models": {}, - "all_models": true + "npm": "@ai-sdk/anthropic", + "api_key": "$ANTHROPIC_API_KEY" }, "openai": { "id": "openai", - "api_key": "$OPENAI_API_KEY", - "models": {}, - "all_models": true + "npm": "openai", + "api_key": "$OPENAI_API_KEY" } } @@ -129,20 +127,38 @@ spec: app: llmspy spec: initContainers: - # Seed ~/.llms/llms.json from the ConfigMap. llms.py also writes runtime - # state (e.g. analytics) under ~/.llms, so we keep the directory writable. + # Seed ~/.llms/ from the ConfigMap + package defaults. + # llms.json is taken from the ConfigMap (controls which providers are enabled). + # providers.json is taken from the llmspy package (has full model definitions) + # and then merged with ConfigMap overrides (Ollama endpoint, API key refs). - name: seed-config - image: busybox:1.36.1 + image: ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.4 imagePullPolicy: IfNotPresent command: - - sh + - python3 - -c - | - set -eu - mkdir -p /data - cp /config/llms.json /data/llms.json - cp /config/providers.json /data/providers.json - chmod 666 /data/llms.json /data/providers.json + import json, os, shutil + os.makedirs('/data', exist_ok=True) + # Copy llms.json from ConfigMap (provider enabled/disabled state) + shutil.copy2('/config/llms.json', '/data/llms.json') + # Start with package providers.json (has full model definitions) + import llms + pkg_dir = os.path.dirname(llms.__file__) + with open(os.path.join(pkg_dir, 'providers.json')) as f: + providers = json.load(f) + # Merge ConfigMap overrides (Ollama endpoint, API key refs) + with open('/config/providers.json') as f: + overrides = json.load(f) + for pid, pcfg in overrides.items(): + if pid in providers: + providers[pid].update(pcfg) + else: + providers[pid] = pcfg + with open('/data/providers.json', 'w') as f: + json.dump(providers, f, indent=2) + os.chmod('/data/llms.json', 0o666) + os.chmod('/data/providers.json', 0o666) volumeMounts: - name: llmspy-config mountPath: /config From 5ab12689c1f09e5a3e6a2666a02e0023349c6caa Mon Sep 17 00:00:00 2001 From: bussyjd Date: Fri, 13 Feb 2026 23:02:37 +0400 Subject: [PATCH 42/42] fix(obolup): auto-start Docker daemon on Linux (snap + systemd) When Docker is installed but the daemon isn't running, obolup now attempts to start it automatically: 1. Try systemd (apt/yum installs): sudo systemctl start docker 2. Try snap: sudo snap start docker If auto-start fails, the error message now shows both systemd and snap commands instead of only systemctl. Fixes Docker startup on Ubuntu with snap-installed Docker where systemctl start docker fails with "Unit docker.service not found". --- obolup.sh | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/obolup.sh b/obolup.sh index 7af0f80..30693e2 100755 --- a/obolup.sh +++ b/obolup.sh @@ -108,17 +108,35 @@ check_docker() { return 1 fi - # Check if Docker daemon is running + # Check if Docker daemon is running; try to start it automatically on Linux if ! docker info >/dev/null 2>&1; then - log_error "Docker daemon is not running" - echo "" - echo "Please start the Docker daemon:" - echo " • Linux: sudo systemctl start docker" - echo " • macOS/Windows: Start Docker Desktop application" - echo "" - echo "Then run this installer again." - echo "" - return 1 + if [[ "$(uname -s)" == "Linux" ]]; then + log_warn "Docker daemon is not running — attempting to start..." + # Try systemd first (apt/yum installs), then snap + if command_exists systemctl && systemctl list-unit-files docker.service >/dev/null 2>&1; then + sudo systemctl start docker 2>/dev/null && sleep 2 + elif snap list docker >/dev/null 2>&1; then + sudo snap start docker 2>/dev/null && sleep 3 + fi + fi + + # Re-check after start attempt + if ! docker info >/dev/null 2>&1; then + log_error "Docker daemon is not running" + echo "" + echo "Please start the Docker daemon:" + if [[ "$(uname -s)" == "Linux" ]]; then + echo " • systemd: sudo systemctl start docker" + echo " • snap: sudo snap start docker" + else + echo " • macOS/Windows: Start Docker Desktop application" + fi + echo "" + echo "Then run this installer again." + echo "" + return 1 + fi + log_success "Docker daemon started" fi # Check Docker version (require at least 20.10.0 for k3d compatibility)