diff --git a/Makefile b/Makefile index 7f5ad13f..622806ee 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,13 @@ test: lint unit e2e e2e: KILO_IMAGE=squat/kilo:test bash_unit $(BASH_UNIT_FLAGS) ./e2e/setup.sh ./e2e/full-mesh.sh ./e2e/location-mesh.sh ./e2e/cross-mesh.sh ./e2e/multi-cluster.sh ./e2e/handlers.sh ./e2e/kgctl.sh ./e2e/teardown.sh +# e2e-cilium runs the Kilo --compatibility=cilium e2e suite against a +# kind cluster where Cilium provides the CNI. It is a separate target +# from `e2e` because the Cilium cluster is incompatible with the Kilo +# bridge CNI used by the default suite. +e2e-cilium: + KILO_IMAGE=squat/kilo:test bash_unit $(BASH_UNIT_FLAGS) ./e2e/cilium-setup.sh ./e2e/cilium-cross-mesh.sh ./e2e/cilium-teardown.sh + docs/kg.md: go run ./cmd/kg/... --help | head -n -2 > help.txt go tool embedmd -w docs/kg.md diff --git a/e2e/cilium-cross-mesh.sh b/e2e/cilium-cross-mesh.sh new file mode 100755 index 00000000..1732e790 --- /dev/null +++ b/e2e/cilium-cross-mesh.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# shellcheck disable=SC1091 +. lib.sh + +# Cilium-CNI counterpart of e2e/cross-mesh.sh. The Kilo DaemonSet is the +# one applied by create_cilium_cluster (kilo-kind-cilium.yaml), which +# already runs Kilo with --cni=false --compatibility=cilium. This suite +# only annotates locations and switches granularity to "cross". +setup_suite() { + _kubectl annotate node "$KIND_CLUSTER-control-plane" kilo.squat.ai/location=loc-a --overwrite + _kubectl annotate node "$KIND_CLUSTER-worker" kilo.squat.ai/location=loc-a --overwrite + _kubectl annotate node "$KIND_CLUSTER-worker2" kilo.squat.ai/location=loc-b --overwrite + # shellcheck disable=SC2016 + _kubectl patch ds -n kube-system kilo -p '{"spec":{"template":{"spec":{"containers":[{"name":"kilo","args":["--hostname=$(NODE_NAME)","--create-interface=false","--cni=false","--compatibility=cilium","--mesh-granularity=cross","--kubeconfig=/etc/kubernetes/kubeconfig","--internal-cidr=$(NODE_IP)/32"]}]}}}}' + block_until_ready_by_name kube-system kilo-userspace +} + +test_cilium_cross_mesh_connectivity() { + assert "retry 30 5 '' check_ping" "should be able to ping all Pods over Cilium VXLAN + Kilo cross mesh" + assert "retry 10 5 'the adjacency matrix is not complete yet' check_adjacent 3" "adjacency should return the right number of successful pings" + echo "sleep for 30s (one reconciliation period) and try again..." + sleep 30 + assert "retry 10 5 'the adjacency matrix is not complete yet' check_adjacent 3" "adjacency should return the right number of successful pings after reconciling" +} + +test_cilium_cross_peer_topology() { + local CP_PEERS WORKER_PEERS WORKER2_PEERS + CP_PEERS=$(_kgctl showconf node "$KIND_CLUSTER-control-plane" | grep -c '^\[Peer\]') + WORKER_PEERS=$(_kgctl showconf node "$KIND_CLUSTER-worker" | grep -c '^\[Peer\]') + WORKER2_PEERS=$(_kgctl showconf node "$KIND_CLUSTER-worker2" | grep -c '^\[Peer\]') + assert_equals "1" "$CP_PEERS" "control-plane (loc-a) should have 1 peer (the loc-b node)" + assert_equals "1" "$WORKER_PEERS" "worker (loc-a) should have 1 peer (the loc-b node)" + assert_equals "2" "$WORKER2_PEERS" "worker2 (loc-b) should have 2 peers (both loc-a nodes)" +} diff --git a/e2e/cilium-setup.sh b/e2e/cilium-setup.sh new file mode 100755 index 00000000..6ba03634 --- /dev/null +++ b/e2e/cilium-setup.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# shellcheck disable=SC1091 +. lib.sh + +# Bring up a kind cluster with Cilium as the CNI for the Cilium-mode e2e +# suite. Counterpart of e2e/setup.sh, which provisions a cluster that +# uses the Kilo bridge CNI. +setup_suite() { + create_cilium_cluster "$(build_kind_config 2)" +} diff --git a/e2e/cilium-teardown.sh b/e2e/cilium-teardown.sh new file mode 100755 index 00000000..4be4f76d --- /dev/null +++ b/e2e/cilium-teardown.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# shellcheck disable=SC1091 +. lib.sh + +teardown_suite () { + if [ -n "$E2E_SKIP_TEARDOWN_ON_FAILURE" ]; then + return + fi + delete_cluster +} diff --git a/e2e/kilo-kind-cilium.yaml b/e2e/kilo-kind-cilium.yaml new file mode 100644 index 00000000..b010447e --- /dev/null +++ b/e2e/kilo-kind-cilium.yaml @@ -0,0 +1,146 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kilo + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kilo +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - patch + - watch +- apiGroups: + - kilo.squat.ai + resources: + - peers + verbs: + - list + - watch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kilo +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kilo +subjects: +- kind: ServiceAccount + name: kilo + namespace: kube-system +--- +# Kilo DaemonSet for the Cilium e2e suite. The CNI is provided by Cilium +# (no kilo CNI ConfigMap and no install-cni init container), so Kilo runs +# in --cni=false / --compatibility=cilium mode and only manages the WG mesh +# on top of Cilium's overlay. +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kilo + namespace: kube-system + labels: + app.kubernetes.io/name: kilo-userspace + app.kubernetes.io/part-of: kilo +spec: + selector: + matchLabels: + app.kubernetes.io/name: kilo-userspace + app.kubernetes.io/part-of: kilo + template: + metadata: + labels: + app.kubernetes.io/name: kilo-userspace + app.kubernetes.io/part-of: kilo + spec: + serviceAccountName: kilo + hostNetwork: true + containers: + - name: kilo + image: squat/kilo:test + imagePullPolicy: Never + args: + - --hostname=$(NODE_NAME) + - --create-interface=false + - --cni=false + - --compatibility=cilium + - --mesh-granularity=full + - --kubeconfig=/etc/kubernetes/kubeconfig + - --internal-cidr=$(NODE_IP)/32 + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + ports: + - containerPort: 1107 + name: metrics + securityContext: + privileged: true + volumeMounts: + - name: kilo-dir + mountPath: /var/lib/kilo + - name: lib-modules + mountPath: /lib/modules + readOnly: true + - name: xtables-lock + mountPath: /run/xtables.lock + readOnly: false + - name: wireguard + mountPath: /var/run/wireguard + readOnly: false + - name: kubeconfig + mountPath: /etc/kubernetes + readOnly: true + - name: wireguard + image: ghcr.io/masipcat/wireguard-go-docker:0.0.20230223 + args: + - wireguard-go + - --foreground + - kilo0 + securityContext: + privileged: true + volumeMounts: + - name: wireguard + mountPath: /var/run/wireguard + readOnly: false + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + volumes: + - name: kilo-dir + hostPath: + path: /var/lib/kilo + - name: lib-modules + hostPath: + path: /lib/modules + - name: xtables-lock + hostPath: + path: /run/xtables.lock + type: FileOrCreate + - name: wireguard + hostPath: + path: /var/run/wireguard + - name: kubeconfig + secret: + secretName: kubeconfig diff --git a/e2e/lib.sh b/e2e/lib.sh index 9fc9e776..99370c53 100755 --- a/e2e/lib.sh +++ b/e2e/lib.sh @@ -142,6 +142,55 @@ delete_cluster () { _kind delete clusters $KIND_CLUSTER } +# install_cilium installs Cilium via Helm into the current kind cluster +# using a minimal config: VXLAN overlay, Kubernetes IPAM, host firewall off. +# Kube-proxy replacement is intentionally left at the default (off) to +# keep the e2e harness focused on Kilo's --compatibility=cilium path +# rather than Cilium's eBPF service LB; KPR coverage can be added in a +# follow-up. +install_cilium() { + local CILIUM_VERSION="${CILIUM_VERSION:-1.16.5}" + helm repo add cilium https://helm.cilium.io/ >/dev/null 2>&1 || true + helm repo update cilium >/dev/null 2>&1 || true + helm --kubeconfig="$KUBECONFIG" install cilium cilium/cilium \ + --namespace kube-system \ + --version "$CILIUM_VERSION" \ + --set ipam.mode=kubernetes \ + --set tunnelProtocol=vxlan \ + --set hostFirewall.enabled=false \ + --set image.pullPolicy=IfNotPresent \ + --set rollOutCiliumPods=true \ + --wait +} + +# create_cilium_cluster launches a kind cluster, installs Cilium as the CNI, +# deploys Kilo in --compatibility=cilium mode, and brings up Adjacency + +# the curl helper, mirroring create_cluster. +create_cilium_cluster() { + # shellcheck disable=SC2119 + local CONFIG="${1:-$(build_kind_config)}" + _kind delete clusters $KIND_CLUSTER > /dev/null + _kind create cluster --name $KIND_CLUSTER --config <(echo "$CONFIG") + # Cilium needs to be installed before any pod that requires CNI networking + # can become Ready, so install it first. + install_cilium + block_until_ready kube-system k8s-app=cilium + _kubectl wait nodes --all --for=condition=Ready --timeout=120s + block_until_ready kube_system k8s-app=kube-dns + # Load the Kilo image into kind and apply the Cilium-mode manifest. + docker tag "$KILO_IMAGE" squat/kilo:test + $KIND_BINARY load docker-image squat/kilo:test --name $KIND_CLUSTER + _kubectl create secret generic kubeconfig --from-file=kubeconfig="$KUBECONFIG" -n kube-system + _kubectl apply -f ../manifests/crds.yaml + _kubectl apply -f kilo-kind-cilium.yaml + if ! block_until_ready_by_name kube-system kilo-userspace; then return 1; fi + _kubectl apply -f helper-curl.yaml + block_until_ready_by_name default curl || return 1 + _kubectl taint node $KIND_CLUSTER-control-plane node-role.kubernetes.io/control-plane:NoSchedule- + _kubectl apply -f https://raw.githubusercontent.com/kilo-io/adjacency/main/example.yaml + block_until_ready_by_name default adjacency +} + curl_pod() { _kubectl get pods -l app.kubernetes.io/name=curl -o name | xargs -I{} "$KUBECTL_BINARY" --kubeconfig="$KUBECONFIG" exec {} -- /usr/bin/curl "$@" }