Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions .agents/skills/test-backend/SKILL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
---
name: test-backend
description: Launch and test the k3d or k3s backend lifecycle (init, up, kubectl, down, purge). Use when you want to run a full integration test of a stack backend.
user_invocable: true
metadata:
author: obol-team
version: "1.0.0"
domain: testing
triggers: test backend, test k3d, test k3s, integration test, flow test, backend test
role: tester
scope: validation
output-format: report
---

# Test Backend Skill

Runs a full lifecycle integration test for the obol stack backend (k3d or k3s).

## Arguments

The skill accepts an optional argument specifying which backend to test:

- `k3s` - Test the k3s (bare-metal) backend only
- `k3d` - Test the k3d (Docker-based) backend only
- `all` - Test both backends sequentially (default)
- No argument defaults to `all`

Examples:
- `/test-backend k3s`
- `/test-backend k3d`
- `/test-backend all`
- `/test-backend` (same as `all`)

## Workflow

### 1. Pre-flight

- Build the obol binary: `go build -o .workspace/bin/obol ./cmd/obol` from the project root
- Verify the binary was created successfully
- Set `OBOL_DEVELOPMENT=true` and add `.workspace/bin` to PATH

### 2. Run Test Script

Based on the argument, run the appropriate test script(s) located alongside this skill:

- **k3s**: Run `.agents/skills/test-backend/scripts/test-k3s.sh`
- **k3d**: Run `.agents/skills/test-backend/scripts/test-k3d.sh`
- **all**: Run k3s first, then k3d (k3s requires sudo so test it first while credentials are fresh)

Execute the script via Bash tool from the project root directory. The scripts require:
- **k3s**: Linux, sudo access, k3s binary in `.workspace/bin/`
- **k3d**: Docker running, k3d binary in `.workspace/bin/`

### 3. Report Results

After each script completes, report:
- Total pass/fail counts (shown in the RESULTS line)
- Any specific test failures with their names
- Overall verdict: all green or needs attention

If a test script fails (non-zero exit), read the output to identify which test(s) failed and summarize.

## Important Notes

- The k3s backend requires **sudo access** - the user may need to enter their password
- The k3d backend requires **Docker to be running**
- Each test script performs its own cleanup (purge) before and after
- Tests are sequential and ordered: init -> up -> verify -> down -> restart -> purge
- Typical runtime: ~2-4 minutes per backend
- If the environment has issues (Docker not starting, k3s not installing), report the problem clearly rather than retrying endlessly
153 changes: 153 additions & 0 deletions .agents/skills/test-backend/scripts/test-k3d.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#!/usr/bin/env bash
set -euo pipefail

# K3d Backend Integration Test
# Requires: Docker running, k3d binary, OBOL_DEVELOPMENT=true

PROJECT_ROOT="$(cd "$(dirname "$0")/../../../.." && pwd)"
OBOL="${PROJECT_ROOT}/.workspace/bin/obol"
export OBOL_DEVELOPMENT=true
export PATH="${PROJECT_ROOT}/.workspace/bin:$PATH"

cd "$PROJECT_ROOT"

PASS=0
FAIL=0

log() { echo "$(date +%H:%M:%S) $*"; }
pass() { log " PASS: $*"; PASS=$((PASS + 1)); }
fail() { log " FAIL: $*"; FAIL=$((FAIL + 1)); }

check() {
local desc="$1"; shift
if "$@"; then pass "$desc"; else fail "$desc"; fi
}

check_fail() {
local desc="$1"; shift
if ! "$@" 2>/dev/null; then pass "$desc"; else fail "$desc (should have failed)"; fi
}

k3d_is_functional() {
$OBOL kubectl get nodes --no-headers 2>/dev/null | grep -q "Ready"
}

# Pre-flight: verify Docker is running
if ! docker info >/dev/null 2>&1; then
log "ERROR: Docker is not running. Start Docker and try again."
exit 1
fi

log "========================================="
log "K3d Backend Integration Test"
log "========================================="

# --- Cleanup ---
log "--- Cleanup: purging any existing stack ---"
$OBOL stack purge --force 2>/dev/null || true

# --- TEST 1: stack init (default = k3d) ---
log ""
log "--- TEST 1: stack init (default = k3d) ---"
check "stack init" $OBOL stack init
check "k3d.yaml exists" test -f .workspace/config/k3d.yaml
check ".stack-id exists" test -f .workspace/config/.stack-id
check ".stack-backend exists" test -f .workspace/config/.stack-backend
check "defaults/ directory exists" test -d .workspace/config/defaults
BACKEND=$(cat .workspace/config/.stack-backend)
check "backend is k3d" test "$BACKEND" = "k3d"
STACK_ID=$(cat .workspace/config/.stack-id)
log " Stack ID: $STACK_ID"

# --- TEST 2: stack init again (should fail without --force) ---
log ""
log "--- TEST 2: stack init again (should fail without --force) ---"
check_fail "init without --force correctly rejected" $OBOL stack init

# --- TEST 3: stack init --force ---
log ""
log "--- TEST 3: stack init --force ---"
$OBOL stack init --force
NEW_ID=$(cat .workspace/config/.stack-id)
check "stack ID preserved on --force ($STACK_ID)" test "$STACK_ID" = "$NEW_ID"

# --- TEST 4: stack up ---
log ""
log "--- TEST 4: stack up ---"
check "stack up" $OBOL stack up
check "kubeconfig.yaml exists" test -f .workspace/config/kubeconfig.yaml

# Wait for nodes to be ready (k3d can take a moment)
log " Waiting for nodes to be ready..."
DEADLINE=$((SECONDS + 120))
while [ $SECONDS -lt $DEADLINE ]; do
if k3d_is_functional; then break; fi
sleep 3
done
check "k3d is functional (nodes ready)" k3d_is_functional

# --- TEST 5: kubectl passthrough ---
log ""
log "--- TEST 5: kubectl passthrough ---"
NODES=$($OBOL kubectl get nodes --no-headers 2>/dev/null | wc -l)
check "kubectl sees nodes ($NODES)" test "$NODES" -ge 1

NS=$($OBOL kubectl get namespaces --no-headers 2>/dev/null | wc -l)
check "kubectl sees namespaces ($NS)" test "$NS" -ge 1

# --- TEST 6: stack down ---
log ""
log "--- TEST 6: stack down ---"
check "stack down" $OBOL stack down
check "config preserved after down" test -f .workspace/config/.stack-id

# Verify cluster stopped (kubectl should fail)
sleep 2
check_fail "kubectl unreachable after down" $OBOL kubectl get nodes --no-headers

# --- TEST 7: stack down already stopped ---
log ""
log "--- TEST 7: stack down already stopped ---"
check "stack down (already stopped)" $OBOL stack down

# --- TEST 8: stack up (restart after down) ---
log ""
log "--- TEST 8: stack up (restart) ---"
check "stack up (restart)" $OBOL stack up

# Wait for nodes to be ready after restart
log " Waiting for nodes to be ready..."
DEADLINE=$((SECONDS + 120))
while [ $SECONDS -lt $DEADLINE ]; do
if k3d_is_functional; then break; fi
sleep 3
done
check "k3d functional after restart" k3d_is_functional

READY=$($OBOL kubectl get nodes --no-headers 2>/dev/null | grep -c "Ready" || true)
check "node ready after restart ($READY)" test "$READY" -ge 1

# --- TEST 9: stack purge ---
log ""
log "--- TEST 9: stack purge ---"
check "stack purge" $OBOL stack purge
sleep 2
check "config removed" test ! -f .workspace/config/.stack-id

# --- TEST 10: full cycle + purge --force ---
log ""
log "--- TEST 10: full cycle + purge --force ---"
check "init for purge test" $OBOL stack init
check "up for purge test" $OBOL stack up
check "purge --force" $OBOL stack purge --force
sleep 2
check "config removed after purge --force" test ! -f .workspace/config/.stack-id

log ""
log "========================================="
log "K3d RESULTS: $PASS passed, $FAIL failed"
log "========================================="

if [ "$FAIL" -gt 0 ]; then
exit 1
fi
149 changes: 149 additions & 0 deletions .agents/skills/test-backend/scripts/test-k3s.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env bash
set -euo pipefail

# K3s Backend Integration Test
# Requires: Linux, sudo access, k3s binary, OBOL_DEVELOPMENT=true

PROJECT_ROOT="$(cd "$(dirname "$0")/../../../.." && pwd)"
OBOL="${PROJECT_ROOT}/.workspace/bin/obol"
export OBOL_DEVELOPMENT=true
export PATH="${PROJECT_ROOT}/.workspace/bin:$PATH"

cd "$PROJECT_ROOT"

PASS=0
FAIL=0

log() { echo "$(date +%H:%M:%S) $*"; }
pass() { log " PASS: $*"; PASS=$((PASS + 1)); }
fail() { log " FAIL: $*"; FAIL=$((FAIL + 1)); }

check() {
local desc="$1"; shift
if "$@"; then pass "$desc"; else fail "$desc"; fi
}

check_fail() {
local desc="$1"; shift
if ! "$@" 2>/dev/null; then pass "$desc"; else fail "$desc (should have failed)"; fi
}

k3s_is_functional() {
$OBOL kubectl get nodes --no-headers 2>/dev/null | grep -q "Ready"
}

log "========================================="
log "K3s Backend Integration Test"
log "========================================="

# --- Cleanup ---
log "--- Cleanup: purging any existing stack ---"
$OBOL stack purge --force 2>/dev/null || true

# --- TEST 1: stack init --backend k3s ---
log ""
log "--- TEST 1: stack init --backend k3s ---"
check "stack init --backend k3s" $OBOL stack init --backend k3s
check "k3s-config.yaml exists" test -f .workspace/config/k3s-config.yaml
check ".stack-id exists" test -f .workspace/config/.stack-id
check ".stack-backend exists" test -f .workspace/config/.stack-backend
check "defaults/ directory exists" test -d .workspace/config/defaults
BACKEND=$(cat .workspace/config/.stack-backend)
check "backend is k3s" test "$BACKEND" = "k3s"
STACK_ID=$(cat .workspace/config/.stack-id)
log " Stack ID: $STACK_ID"

# --- TEST 2: stack init again (should fail without --force) ---
log ""
log "--- TEST 2: stack init again (should fail without --force) ---"
check_fail "init without --force correctly rejected" $OBOL stack init --backend k3s

# --- TEST 3: stack init --force (should preserve stack ID) ---
log ""
log "--- TEST 3: stack init --force (should preserve stack ID) ---"
$OBOL stack init --backend k3s --force
NEW_ID=$(cat .workspace/config/.stack-id)
check "stack ID preserved on --force ($STACK_ID)" test "$STACK_ID" = "$NEW_ID"

# --- TEST 4: stack up ---
log ""
log "--- TEST 4: stack up ---"
check "stack up" $OBOL stack up
check "PID file exists" test -f .workspace/config/.k3s.pid
check "kubeconfig.yaml exists" test -f .workspace/config/kubeconfig.yaml
check "k3s is functional (nodes ready)" k3s_is_functional

# --- TEST 5: kubectl passthrough ---
log ""
log "--- TEST 5: kubectl passthrough ---"
NODES=$($OBOL kubectl get nodes --no-headers 2>/dev/null | wc -l)
check "kubectl sees nodes ($NODES)" test "$NODES" -ge 1

NS=$($OBOL kubectl get namespaces --no-headers 2>/dev/null | wc -l)
check "kubectl sees namespaces ($NS)" test "$NS" -ge 1

# --- TEST 6: stack up idempotent (already running) ---
log ""
log "--- TEST 6: stack up idempotent ---"
OLD_PID=$(cat .workspace/config/.k3s.pid)
check "stack up while running" $OBOL stack up
NEW_PID=$(cat .workspace/config/.k3s.pid)
check "PID unchanged (idempotent) ($OLD_PID = $NEW_PID)" test "$OLD_PID" = "$NEW_PID"

# --- TEST 7: stack down ---
log ""
log "--- TEST 7: stack down ---"
check "stack down" $OBOL stack down
check "PID file cleaned up" test ! -f .workspace/config/.k3s.pid
check "config preserved after down" test -f .workspace/config/.stack-id
log " Waiting for API server to become unreachable..."
API_DOWN=false
for i in $(seq 1 15); do
if ! $OBOL kubectl get nodes --no-headers 2>/dev/null; then
API_DOWN=true
break
fi
sleep 2
done
check "kubectl unreachable after down" test "$API_DOWN" = "true"

# --- TEST 8: stack down again (already stopped) ---
log ""
log "--- TEST 8: stack down already stopped ---"
check "stack down (already stopped)" $OBOL stack down

# --- TEST 9: stack up (restart after down) ---
log ""
log "--- TEST 9: stack up (restart) ---"
check "stack up (restart)" $OBOL stack up
check "PID file exists after restart" test -f .workspace/config/.k3s.pid
check "k3s functional after restart" k3s_is_functional

READY=$($OBOL kubectl get nodes --no-headers 2>/dev/null | grep -c "Ready" || true)
check "node ready after restart ($READY)" test "$READY" -ge 1

# --- TEST 10: stack purge (without --force) ---
log ""
log "--- TEST 10: stack purge ---"
check "stack purge" $OBOL stack purge
sleep 2
check "config removed" test ! -f .workspace/config/.stack-id
check "k3s pid file removed" test ! -f .workspace/config/.k3s.pid

# --- TEST 11: full cycle + purge --force ---
log ""
log "--- TEST 11: full cycle + purge --force ---"
check "init for purge test" $OBOL stack init --backend k3s
check "up for purge test" $OBOL stack up
check "purge --force" $OBOL stack purge --force
sleep 2
check "config removed after purge --force" test ! -f .workspace/config/.stack-id

log ""
log "========================================="
log "K3s RESULTS: $PASS passed, $FAIL failed"
log "========================================="

if [ "$FAIL" -gt 0 ]; then
exit 1
fi
2 changes: 1 addition & 1 deletion cmd/obol/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func bootstrapCommand(cfg *config.Config) *cli.Command {

// Step 1: Initialize stack
fmt.Println("Initializing stack configuration...")
if err := stack.Init(cfg, false); err != nil {
if err := stack.Init(cfg, false, ""); err != nil {
// Check if it's an "already exists" error - that's okay
if !strings.Contains(err.Error(), "already exists") {
return fmt.Errorf("bootstrap init failed: %w", err)
Expand Down
7 changes: 6 additions & 1 deletion cmd/obol/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,14 @@ GLOBAL OPTIONS:
Aliases: []string{"f"},
Usage: "Force overwrite existing configuration",
},
&cli.StringFlag{
Name: "backend",
Usage: "Cluster backend: k3d (Docker-based) or k3s (bare-metal)",
EnvVars: []string{"OBOL_BACKEND"},
},
},
Action: func(c *cli.Context) error {
return stack.Init(cfg, c.Bool("force"))
return stack.Init(cfg, c.Bool("force"), c.String("backend"))
},
},
{
Expand Down
Loading