-
Notifications
You must be signed in to change notification settings - Fork 20
283 lines (251 loc) · 12.6 KB
/
cncf-conformance.yaml
File metadata and controls
283 lines (251 loc) · 12.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
name: cncf-conformance
# Workflow to run CNCF conformance tests on MicroShift upstream latest release
#
# This workflow runs the CNCF conformance test suite using Sonobuoy in 'certified-conformance'
# mode for official Kubernetes conformance certification.
on:
schedule:
- cron: '0 4 * * *' # Daily at 04:00 UTC
workflow_dispatch:
inputs:
version:
default: "latest"
description: MicroShift version to test (e.g., 4.21.0_ga9cd00b34_4.21.0_okd_scos.ec.5 or 'latest' for most recent release)
type: string
registry:
default: "ghcr.io/microshift-io"
description: Container registry to pull bootc images from
type: string
test-timeout:
default: "8400"
description: Sonobuoy test timeout in seconds (8400 = ~2.5 hours)
type: string
env:
VERSION: ${{ github.event.inputs.version || 'latest' }}
REGISTRY: ${{ github.event.inputs.registry || 'ghcr.io/microshift-io' }}
TEST_TIMEOUT: ${{ github.event.inputs.test-timeout || '8400' }}
jobs:
cncf-conformance:
if: github.event_name != 'schedule' || github.repository == 'microshift-io/microshift'
strategy:
matrix:
runners: [ubuntu-24.04, ubuntu-24.04-arm]
name: Run CNCF conformance tests (${{ matrix.runners }})
runs-on: ${{ matrix.runners }}
steps:
- name: Check out MicroShift upstream repository
uses: actions/checkout@v4
- name: Detect CPU architecture
id: detect-cpu-arch
uses: ./.github/actions/arch
- name: Prepare the test environment
uses: ./.github/actions/prebuild
- name: Pull pre-built bootc image
shell: bash
run: |
set -euo pipefail
# Update the 'latest' tag to the latest released version from the
# MicroShift GitHub repository.
# Note: To test images from other repositories, override the 'VERSION'
# and 'REGISTRY' settings to point to a custom multi-arch manifest.
TAG="${{ env.VERSION }}"
if [ "${TAG}" = "latest" ] ; then
TAG="$(curl -s --max-time 60 "https://api.github.com/repos/microshift-io/microshift/releases/latest" | jq -r .tag_name)"
if [ -z "${TAG}" ] || [ "${TAG}" = "null" ] ; then
echo "ERROR: Could not determine the latest release tag from GitHub"
exit 1
fi
fi
IMAGE="${{ env.REGISTRY }}/microshift:${TAG}"
echo "Pulling ${IMAGE}"
sudo podman pull "${IMAGE}"
sudo podman tag "${IMAGE}" localhost/microshift-okd:latest
- name: Setup 2-node cluster for CNCF tests
shell: bash
run: |
set -euo pipefail
make run
make run-ready
# Ensure first node is healthy before adding second node
make run-healthy
make add-node
# Ensure both nodes are healthy before running conformance tests
make run-healthy
- name: Configure cluster for CNCF conformance tests
shell: bash
run: |
set -euo pipefail
# Disable firewalld on cluster nodes to avoid blocking multi-node traffic
echo "Disabling firewalld on cluster nodes..."
for node in microshift-okd-1 microshift-okd-2; do
echo " - Disabling firewalld on ${node}"
sudo podman exec "${node}" systemctl stop firewalld || true
sudo podman exec "${node}" systemctl disable firewalld || true
done
- name: Configure networking for CI environment
shell: bash
run: |
set -euo pipefail
# Fix TCP DNS issues in GitHub Actions.
# The issue manifests as TCP DNS failing while UDP works.
# Apply multiple fixes to ensure TCP connectivity works properly.
echo "=== Step 1: Configure MTU via kindnet CNI_MTU environment variable ==="
# Set CNI_MTU on kindnet daemonset to ensure all new pods get correct MTU
make env CMD='kubectl set env daemonset/kube-kindnet-ds -n kube-kindnet CNI_MTU=1400'
make env CMD='kubectl rollout restart daemonset/kube-kindnet-ds -n kube-kindnet'
make env CMD='kubectl rollout status daemonset/kube-kindnet-ds -n kube-kindnet --timeout=120s'
echo "=== Step 1b: Verify CNI config has correct MTU ==="
for node in microshift-okd-1 microshift-okd-2; do
echo " - Checking CNI config on ${node}"
sudo podman exec "${node}" bash -c '
CNI_CONFIG="/etc/cni/net.d/10-kindnet.conflist"
if [ -f "$CNI_CONFIG" ]; then
grep -o "\"mtu\": *[0-9]*" "$CNI_CONFIG" || echo " (mtu not in config)"
# If MTU still not present, add it manually as fallback
if ! grep -q "\"mtu\"" "$CNI_CONFIG"; then
sed -i "s/\"type\": *\"ptp\"/\"type\": \"ptp\", \"mtu\": 1400/g" "$CNI_CONFIG"
echo " Added MTU=1400 to CNI config"
fi
fi
'
done
echo "=== Step 2: Set MTU on all network interfaces ==="
for node in microshift-okd-1 microshift-okd-2; do
sudo podman exec "${node}" bash -c '
# Set MTU on all relevant interfaces
for iface in $(ip -o link show | awk -F": " "{print \$2}" | cut -d@ -f1 | grep -v "^lo$"); do
current_mtu=$(cat /sys/class/net/$iface/mtu 2>/dev/null || echo "0")
if [ "$current_mtu" -gt 1400 ]; then
ip link set dev "$iface" mtu 1400 2>/dev/null && echo " $iface: $current_mtu -> 1400" || true
fi
done
' || true
done
echo "=== Step 2b: Add TCP MSS clamping to avoid fragmentation ==="
for node in microshift-okd-1 microshift-okd-2; do
echo " - Configuring TCP MSS clamping on ${node}"
sudo podman exec "${node}" bash -c '
# Clamp TCP MSS to PMTU to avoid fragmentation issues
# MSS = MTU - 40 (IP header) - 20 (TCP header) = 1340 for MTU 1400
iptables -t mangle -A POSTROUTING -p tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu 2>/dev/null || true
iptables -t mangle -A FORWARD -p tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu 2>/dev/null || true
echo " TCP MSS clamping configured"
' || true
done
echo "=== Step 3: Restart kube-proxy to refresh iptables rules ==="
make env CMD='kubectl rollout restart daemonset/kube-proxy -n kube-proxy'
make env CMD='kubectl rollout status daemonset/kube-proxy -n kube-proxy --timeout=120s'
echo "=== Step 4: Restart CoreDNS to ensure clean TCP listeners ==="
make env CMD='kubectl rollout restart daemonset/dns-default -n openshift-dns'
make env CMD='kubectl rollout status daemonset/dns-default -n openshift-dns --timeout=120s'
echo "=== Step 5: Wait for network stabilization ==="
sleep 30
echo "=== Step 6: Verify TCP DNS works ==="
for node in microshift-okd-1 microshift-okd-2; do
echo " Testing TCP DNS from ${node}..."
sudo podman exec "${node}" bash -c '
for i in 1 2 3; do
result=$(dig +tcp +short kubernetes.default.svc.cluster.local @10.43.0.10 2>&1)
if [ -n "$result" ] && [ "$result" != "" ]; then
echo " Attempt $i: OK ($result)"
else
echo " Attempt $i: FAILED"
fi
sleep 1
done
'
done
echo "=== Step 7: Collect network diagnostics ==="
for node in microshift-okd-1 microshift-okd-2; do
echo " === Network diagnostics for ${node} ==="
echo " - Interface MTU values:"
sudo podman exec "${node}" ip -o link show | grep -oE 'mtu [0-9]+' || true
echo " - Route table:"
sudo podman exec "${node}" ip route 2>/dev/null || true
echo " - iptables NAT rules (DNS related):"
sudo podman exec "${node}" iptables -t nat -L -n 2>/dev/null | grep -E '53|dns' || true
echo " - iptables filter rules (DNS related):"
sudo podman exec "${node}" iptables -L -n 2>/dev/null | grep -E '53|dns' || true
echo " - TCP connections to port 53:"
sudo podman exec "${node}" ss -tnp 2>/dev/null | grep ':53' || true
echo " - CoreDNS pod IP:"
sudo podman exec "${node}" cat /etc/resolv.conf 2>/dev/null || true
done
echo "=== Step 8: Test TCP DNS from a test pod ==="
# Create a test pod and verify TCP DNS works from within a pod context
make env CMD='kubectl run dns-test-pod --image=registry.k8s.io/e2e-test-images/jessie-dnsutils:1.7 --restart=Never --command -- sleep 300' || true
sleep 10
make env CMD='kubectl wait --for=condition=Ready pod/dns-test-pod --timeout=60s' || true
echo " Testing UDP DNS from pod:"
make env CMD='kubectl exec dns-test-pod -- dig +short kubernetes.default.svc.cluster.local' || true
echo " Testing TCP DNS from pod:"
make env CMD='kubectl exec dns-test-pod -- dig +tcp +short kubernetes.default.svc.cluster.local' || true
echo " Testing TCP DNS with verbose output:"
make env CMD='kubectl exec dns-test-pod -- dig +tcp kubernetes.default.svc.cluster.local' || true
make env CMD='kubectl delete pod dns-test-pod --force --grace-period=0' || true
- name: Configure hostname resolution for cluster nodes
shell: bash
run: |
set -euo pipefail
# Add cluster node hostnames to /etc/hosts to enable hostname resolution
# from the host where tests run. This is needed because Sonobuoy e2e tests
# access kubelet APIs using node names (microshift-okd-1, microshift-okd-2)
# which are only resolvable within the podman network by default.
# We extract the IP address from the first network interface of each container.
echo "Adding cluster node hostnames to /etc/hosts..."
for node in $(sudo podman ps --filter name=microshift-okd- --format '{{.Names}}'); do
ip=$(sudo podman inspect "$node" | jq -r '.[].NetworkSettings.Networks | to_entries[0].value.IPAddress')
if [ -n "$ip" ] && [ "$ip" != "null" ]; then
echo "$ip $node" | sudo tee -a /etc/hosts
echo " ✓ Added: $ip $node"
else
echo "ERROR: Could not get IP address for node: $node"
exit 1
fi
done
echo ""
echo "Verifying hostname resolution:"
for node in microshift-okd-1 microshift-okd-2; do
if getent hosts "$node" > /dev/null 2>&1; then
echo " ✓ $node resolves successfully"
else
echo "ERROR: Hostname resolution failed for node: $node"
exit 1
fi
done
- name: Run CNCF conformance tests with Sonobuoy
id: run-sonobuoy
shell: bash
env:
SONOBUOY_VERSION: v0.57.3
SYSTEMD_LOGS_VERSION: v0.4
TEST_MODE: certified-conformance
TIMEOUT_TEST: ${{ env.TEST_TIMEOUT }}
RESULTS_DIR: /tmp/sonobuoy-output
# Skip DNS TCP tests on ARM64 due to GitHub Actions runner networking limitations.
# TCP DNS consistently fails on ARM64 runners while UDP works fine.
# This is a known environmental issue specific to the CI infrastructure.
# See: https://github.com/microshift-io/microshift/issues/186
EXTRA_E2E_SKIP: ${{ contains(matrix.runners, 'arm') && '.*DNS should provide DNS for the cluster.*|.*DNS should provide DNS for services.*|.*DNS should provide DNS for pods for Subdomain.*' || '' }}
run: |
set -euo pipefail
make env CMD="./src/cncf/run_sonobuoy_tests.sh"
- name: Upload Sonobuoy results as artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: sonobuoy-results-${{ steps.detect-cpu-arch.outputs.go_arch }}
path: /tmp/sonobuoy-output/
retention-days: 30
- name: Clean up Sonobuoy resources
if: always()
shell: bash
run: |
make env CMD="~/go/bin/sonobuoy delete --wait" || true
rm -rf /tmp/sonobuoy-output || true
- name: Collect debug information after tests
if: always()
uses: ./.github/actions/debug-info
- name: Collect MicroShift container sos-report on failure
if: failure()
uses: ./.github/actions/sos-report