From b29b503ff1dd1895947f5d2f2caa776d9c5e7e7e Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 13:59:06 -0500 Subject: [PATCH 01/10] Stabilize linux test network subnet allocation --- lib/instances/test_network_config_test.go | 442 +++++++++++++++++++++- 1 file changed, 433 insertions(+), 9 deletions(-) diff --git a/lib/instances/test_network_config_test.go b/lib/instances/test_network_config_test.go index 9be248d4..0af6dbfd 100644 --- a/lib/instances/test_network_config_test.go +++ b/lib/instances/test_network_config_test.go @@ -1,10 +1,19 @@ package instances import ( + "encoding/json" + "errors" "fmt" + "net" "os" + "os/exec" + "path/filepath" + "runtime" + "strconv" + "strings" "sync" "sync/atomic" + "syscall" "testing" "time" @@ -14,28 +23,443 @@ import ( var testNetworkSeq atomic.Uint32 var testNetworkByName sync.Map var testNetworkRunSeed = uint32(time.Now().UnixNano()) ^ uint32(os.Getpid()<<8) +var testNetworkGuardCleanupOnce sync.Once + +const ( + testSubnetSecondOctetMin = 200 + testSubnetSecondOctetMax = 249 + testSubnetThirdOctetMin = 1 + testSubnetThirdOctetMax = 250 +) + +type testNetworkLease struct { + cfg config.NetworkConfig + release func() +} + +type subnetLeaseFile struct { + Leases map[string]subnetLease `json:"leases"` +} + +type subnetLease struct { + TestName string `json:"test_name"` + BridgeName string `json:"bridge_name"` + SubnetCIDR string `json:"subnet_cidr"` + PID int `json:"pid"` + CreatedAt int64 `json:"created_at_unix"` +} + +type hostRoute struct { + cidr string + network *net.IPNet + device string + linkDown bool +} + +var errRouteCommandUnavailable = errors.New("ip route command unavailable") func newParallelTestNetworkConfig(t *testing.T) config.NetworkConfig { t.Helper() - if cfg, ok := testNetworkByName.Load(t.Name()); ok { - return cfg.(config.NetworkConfig) + if existing, ok := testNetworkByName.Load(t.Name()); ok { + return existing.(*testNetworkLease).cfg } seq := testNetworkSeq.Add(1) + lease, err := allocateTestNetworkLease(t.Name(), seq) + if err != nil { + t.Fatalf("allocate test network config: %v", err) + } + + actual, loaded := testNetworkByName.LoadOrStore(t.Name(), lease) + if loaded { + lease.release() + return actual.(*testNetworkLease).cfg + } + + t.Cleanup(lease.release) + return lease.cfg +} + +func allocateTestNetworkLease(testName string, seq uint32) (*testNetworkLease, error) { + if runtime.GOOS != "linux" { + return &testNetworkLease{ + cfg: legacyParallelTestNetworkConfig(seq), + release: func() {}, + }, nil + } + + var allocatedSubnet string + var bridgeName string + var cfg config.NetworkConfig + + err := withTestSubnetLock(func() error { + routes, err := listHostRoutes() + if err != nil { + return err + } + + testNetworkGuardCleanupOnce.Do(func() { + cleanupStaleLinkDownRoutes(routes) + // Refresh route snapshot after cleanup so subnet selection sees current state. + refreshed, refreshErr := listHostRoutes() + if refreshErr == nil { + routes = refreshed + } + }) + + leases, err := loadSubnetLeases() + if err != nil { + return err + } + + pruneStaleLeases(leases, routes) + if err := saveSubnetLeases(leases); err != nil { + return err + } + + startIdx := int((testNetworkRunSeed + seq - 1) % uint32(testSubnetSpaceSize())) + subnet, err := findFreeTestSubnet(startIdx, routes, leases) + if err != nil { + return err + } + + bridgeName = fmt.Sprintf("hm%04x%03x", testNetworkRunSeed&0xffff, seq%0xfff) + allocatedSubnet = subnet + leases[subnet] = subnetLease{ + TestName: testName, + BridgeName: bridgeName, + SubnetCIDR: subnet, + PID: os.Getpid(), + CreatedAt: time.Now().Unix(), + } + + if err := saveSubnetLeases(leases); err != nil { + return err + } + + cfg = config.NetworkConfig{ + BridgeName: bridgeName, + SubnetCIDR: subnet, + DNSServer: "1.1.1.1", + } + return nil + }) + if err != nil { + if errors.Is(err, errRouteCommandUnavailable) { + return &testNetworkLease{ + cfg: legacyParallelTestNetworkConfig(seq), + release: func() {}, + }, nil + } + return nil, err + } + + var releaseOnce sync.Once + return &testNetworkLease{ + cfg: cfg, + release: func() { + releaseOnce.Do(func() { + _ = withTestSubnetLock(func() error { + cleanupTestNetworkArtifacts(bridgeName, allocatedSubnet) + + leases, err := loadSubnetLeases() + if err != nil { + return nil + } + delete(leases, allocatedSubnet) + if err := saveSubnetLeases(leases); err != nil { + return nil + } + return nil + }) + }) + }, + }, nil +} + +func withTestSubnetLock(fn func() error) error { + lockPath := filepath.Join(os.TempDir(), "hypeman-test-network.lock") + lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0o600) + if err != nil { + return fmt.Errorf("open subnet lock file: %w", err) + } + defer lockFile.Close() + + if err := syscall.Flock(int(lockFile.Fd()), syscall.LOCK_EX); err != nil { + return fmt.Errorf("acquire subnet lock: %w", err) + } + defer syscall.Flock(int(lockFile.Fd()), syscall.LOCK_UN) + + return fn() +} + +func testSubnetLeaseFilePath() string { + return filepath.Join(os.TempDir(), "hypeman-test-network-leases.json") +} + +func loadSubnetLeases() (map[string]subnetLease, error) { + path := testSubnetLeaseFilePath() + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return make(map[string]subnetLease), nil + } + return nil, fmt.Errorf("read subnet lease file: %w", err) + } + + var leases subnetLeaseFile + if len(data) > 0 { + if err := json.Unmarshal(data, &leases); err != nil { + return nil, fmt.Errorf("unmarshal subnet leases: %w", err) + } + } + if leases.Leases == nil { + leases.Leases = make(map[string]subnetLease) + } + return leases.Leases, nil +} + +func saveSubnetLeases(leases map[string]subnetLease) error { + leaseState := subnetLeaseFile{Leases: leases} + data, err := json.Marshal(leaseState) + if err != nil { + return fmt.Errorf("marshal subnet leases: %w", err) + } + + path := testSubnetLeaseFilePath() + tmpPath := path + ".tmp" + if err := os.WriteFile(tmpPath, data, 0o600); err != nil { + return fmt.Errorf("write subnet lease temp file: %w", err) + } + if err := os.Rename(tmpPath, path); err != nil { + return fmt.Errorf("rename subnet lease file: %w", err) + } + return nil +} + +func listHostRoutes() ([]hostRoute, error) { + cmd := exec.Command("ip", "-4", "route", "show") + out, err := cmd.Output() + if err != nil { + if errors.Is(err, exec.ErrNotFound) { + return nil, errRouteCommandUnavailable + } + return nil, fmt.Errorf("list host routes: %w", err) + } + + lines := strings.Split(string(out), "\n") + routes := make([]hostRoute, 0, len(lines)) + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "default ") { + continue + } + + fields := strings.Fields(line) + if len(fields) == 0 { + continue + } + + _, network, err := net.ParseCIDR(fields[0]) + if err != nil { + continue + } + + route := hostRoute{ + cidr: network.String(), + network: network, + linkDown: strings.Contains(line, " linkdown"), + } + for i := 0; i < len(fields)-1; i++ { + if fields[i] == "dev" { + route.device = fields[i+1] + break + } + } + routes = append(routes, route) + } + + return routes, nil +} + +func cleanupStaleLinkDownRoutes(routes []hostRoute) { + for _, route := range routes { + if !route.linkDown { + continue + } + if !isTestCIDR(route.cidr) { + continue + } + if !strings.HasPrefix(route.device, "hm") && !strings.HasPrefix(route.device, "ha") { + continue + } + + cleanupTestNetworkArtifacts(route.device, route.cidr) + } +} + +func pruneStaleLeases(leases map[string]subnetLease, routes []hostRoute) { + liveRoutes := make(map[string]struct{}, len(routes)) + for _, route := range routes { + liveRoutes[route.cidr] = struct{}{} + } + + for subnet, lease := range leases { + _, hasRoute := liveRoutes[subnet] + if hasRoute { + continue + } + if bridgeExists(lease.BridgeName) { + continue + } + delete(leases, subnet) + } +} + +func bridgeExists(name string) bool { + if name == "" { + return false + } + cmd := exec.Command("ip", "link", "show", "dev", name) + return cmd.Run() == nil +} + +func findFreeTestSubnet(startIdx int, routes []hostRoute, leases map[string]subnetLease) (string, error) { + testRoutes := make([]*net.IPNet, 0, len(routes)) + for _, route := range routes { + testRoutes = append(testRoutes, route.network) + } + + subnetSpace := testSubnetSpaceSize() + for offset := 0; offset < subnetSpace; offset++ { + idx := (startIdx + offset) % subnetSpace + subnet := testSubnetAt(idx) + if _, exists := leases[subnet]; exists { + continue + } + + _, candidateNet, err := net.ParseCIDR(subnet) + if err != nil { + continue + } + + conflicts := false + for _, route := range testRoutes { + if route == nil { + continue + } + if cidrOverlaps(candidateNet, route) { + conflicts = true + break + } + } + if conflicts { + continue + } + + return subnet, nil + } + + return "", fmt.Errorf("no free subnet available in test range 10.%d-%d.%d-%d.0/24", + testSubnetSecondOctetMin, testSubnetSecondOctetMax, testSubnetThirdOctetMin, testSubnetThirdOctetMax) +} + +func testSubnetSpaceSize() int { + return (testSubnetSecondOctetMax - testSubnetSecondOctetMin + 1) * (testSubnetThirdOctetMax - testSubnetThirdOctetMin + 1) +} + +func testSubnetAt(idx int) string { + thirdRangeSize := testSubnetThirdOctetMax - testSubnetThirdOctetMin + 1 + secondOctet := testSubnetSecondOctetMin + (idx / thirdRangeSize) + thirdOctet := testSubnetThirdOctetMin + (idx % thirdRangeSize) + return fmt.Sprintf("10.%d.%d.0/24", secondOctet, thirdOctet) +} + +func cidrOverlaps(a, b *net.IPNet) bool { + return a.Contains(b.IP) || b.Contains(a.IP) +} + +func isTestCIDR(cidr string) bool { + ip, network, err := net.ParseCIDR(cidr) + if err != nil || ip == nil || network == nil { + return false + } + ip4 := ip.To4() + if ip4 == nil { + return false + } + if ip4[0] != 10 { + return false + } + return int(ip4[1]) >= testSubnetSecondOctetMin && int(ip4[1]) <= testSubnetSecondOctetMax +} + +func cleanupTestNetworkArtifacts(bridgeName, subnetCIDR string) { + if subnetCIDR != "" && bridgeName != "" { + _ = exec.Command("ip", "-4", "route", "del", subnetCIDR, "dev", bridgeName).Run() + } + if bridgeName != "" { + _ = exec.Command("ip", "link", "delete", bridgeName, "type", "bridge").Run() + } + + bridgeSuffix := strings.ToLower(bridgeName) + deleteIPTablesRulesByComment("nat", "POSTROUTING", "hypeman-nat-"+bridgeSuffix) + deleteIPTablesRulesByComment("", "FORWARD", "hypeman-fwd-out-"+bridgeSuffix) + deleteIPTablesRulesByComment("", "FORWARD", "hypeman-fwd-in-"+bridgeSuffix) +} + +func deleteIPTablesRulesByComment(table, chain, comment string) { + if comment == "" { + return + } + + args := []string{} + if table != "" { + args = append(args, "-t", table) + } + args = append(args, "-L", chain, "--line-numbers", "-n") + listCmd := exec.Command("iptables", args...) + output, err := listCmd.Output() + if err != nil { + return + } + + var ruleNums []int + for _, line := range strings.Split(string(output), "\n") { + if !strings.Contains(line, comment) { + continue + } + fields := strings.Fields(line) + if len(fields) == 0 { + continue + } + ruleNum, convErr := strconv.Atoi(fields[0]) + if convErr != nil { + continue + } + ruleNums = append(ruleNums, ruleNum) + } + + for i := len(ruleNums) - 1; i >= 0; i-- { + delArgs := []string{} + if table != "" { + delArgs = append(delArgs, "-t", table) + } + delArgs = append(delArgs, "-D", chain, strconv.Itoa(ruleNums[i])) + _ = exec.Command("iptables", delArgs...).Run() + } +} + +func legacyParallelTestNetworkConfig(seq uint32) config.NetworkConfig { const subnetSpace = 50 * 250 // second octet 200-249, third octet 1-250 subnetIdx := (testNetworkRunSeed + seq - 1) % subnetSpace - bridge := fmt.Sprintf("hm%04x%03x", testNetworkRunSeed&0xffff, seq%0xfff) - secondOctet := 200 + int((subnetIdx / 250)) + secondOctet := 200 + int(subnetIdx/250) thirdOctet := int((subnetIdx % 250) + 1) - - cfg := config.NetworkConfig{ + return config.NetworkConfig{ BridgeName: bridge, SubnetCIDR: fmt.Sprintf("10.%d.%d.0/24", secondOctet, thirdOctet), DNSServer: "1.1.1.1", } - - actual, _ := testNetworkByName.LoadOrStore(t.Name(), cfg) - return actual.(config.NetworkConfig) } From bd11c5cba6b5238bad2dc763cbd4e5577ef1087e Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 14:06:09 -0500 Subject: [PATCH 02/10] Add subnet lease allocator for tests --- lib/hypervisor/firecracker/binaries.go | 39 +++++++++++++++- lib/hypervisor/firecracker/binaries_test.go | 51 +++++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) diff --git a/lib/hypervisor/firecracker/binaries.go b/lib/hypervisor/firecracker/binaries.go index ff05efd7..d9e2e104 100644 --- a/lib/hypervisor/firecracker/binaries.go +++ b/lib/hypervisor/firecracker/binaries.go @@ -32,6 +32,7 @@ var binaryFS embed.FS var ( customBinaryPathMu sync.RWMutex customBinaryPath string + extractBinaryMu sync.Mutex ) var versionRegex = regexp.MustCompile(`v?\d+\.\d+\.\d+`) @@ -90,6 +91,14 @@ func extractBinary(p *paths.Paths, version Version) (string, error) { return extractPath, nil } + extractBinaryMu.Lock() + defer extractBinaryMu.Unlock() + + // Another goroutine may have already extracted the binary while we waited. + if err := validateExecutable(extractPath); err == nil { + return extractPath, nil + } + data, err := binaryFS.ReadFile(embeddedPath) if err != nil { return "", fmt.Errorf("embedded firecracker binary not found at %s (run `make download-firecracker-binaries` or set hypervisor.firecracker_binary_path): %w", embeddedPath, err) @@ -98,9 +107,35 @@ func extractBinary(p *paths.Paths, version Version) (string, error) { if err := os.MkdirAll(filepath.Dir(extractPath), 0755); err != nil { return "", fmt.Errorf("create firecracker binary directory: %w", err) } - if err := os.WriteFile(extractPath, data, 0755); err != nil { - return "", fmt.Errorf("write firecracker binary: %w", err) + + tmpFile, err := os.CreateTemp(filepath.Dir(extractPath), "firecracker-*") + if err != nil { + return "", fmt.Errorf("create firecracker temp binary: %w", err) + } + tmpPath := tmpFile.Name() + cleanupTmp := true + defer func() { + if cleanupTmp { + _ = os.Remove(tmpPath) + } + }() + + if _, err := tmpFile.Write(data); err != nil { + _ = tmpFile.Close() + return "", fmt.Errorf("write firecracker temp binary: %w", err) + } + if err := tmpFile.Chmod(0755); err != nil { + _ = tmpFile.Close() + return "", fmt.Errorf("chmod firecracker temp binary: %w", err) + } + if err := tmpFile.Close(); err != nil { + return "", fmt.Errorf("close firecracker temp binary: %w", err) + } + + if err := os.Rename(tmpPath, extractPath); err != nil { + return "", fmt.Errorf("install firecracker binary: %w", err) } + cleanupTmp = false return extractPath, nil } diff --git a/lib/hypervisor/firecracker/binaries_test.go b/lib/hypervisor/firecracker/binaries_test.go index 088c87fe..7c824fb9 100644 --- a/lib/hypervisor/firecracker/binaries_test.go +++ b/lib/hypervisor/firecracker/binaries_test.go @@ -3,8 +3,10 @@ package firecracker import ( "os" "path/filepath" + "sync" "testing" + "github.com/kernel/hypeman/lib/paths" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -38,3 +40,52 @@ func TestParseVersionFallback(t *testing.T) { assert.Equal(t, defaultVersion, parseVersion("unknown")) assert.Equal(t, V1_14_2, parseVersion("v1.14.2")) } + +func TestResolveBinaryPathConcurrentExtraction(t *testing.T) { + SetCustomBinaryPath("") + t.Cleanup(func() { SetCustomBinaryPath("") }) + + arch, err := normalizeArch() + require.NoError(t, err) + embeddedPath := filepath.ToSlash(filepath.Join("binaries", "firecracker", string(defaultVersion), arch, "firecracker")) + if _, err := binaryFS.ReadFile(embeddedPath); err != nil { + t.Skipf("embedded binary %s not present in this checkout", embeddedPath) + } + + p := paths.New(t.TempDir()) + + const workers = 16 + results := make(chan string, workers) + errs := make(chan error, workers) + var wg sync.WaitGroup + + for i := 0; i < workers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + path, err := resolveBinaryPath(p, "") + results <- path + errs <- err + }() + } + + wg.Wait() + close(results) + close(errs) + + var firstPath string + for path := range results { + if firstPath == "" { + firstPath = path + continue + } + assert.Equal(t, firstPath, path) + } + + for err := range errs { + require.NoError(t, err) + } + + require.NotEmpty(t, firstPath) + require.NoError(t, validateExecutable(firstPath)) +} From 16d4afc9f1599bac37e71a5baa4c78e188b21dc7 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 15:18:15 -0500 Subject: [PATCH 03/10] tests: prewarm ci cache and mirror images via local registry --- .github/workflows/test.yml | 26 +- Makefile | 12 +- cmd/test-prewarm/main.go | 336 ++++++++++++++++++ lib/instances/exec_test.go | 6 +- lib/instances/firecracker_test.go | 13 +- lib/instances/fork_test.go | 12 +- lib/instances/manager_darwin_test.go | 17 +- lib/instances/manager_test.go | 23 +- lib/instances/network_test.go | 4 +- lib/instances/qemu_test.go | 19 +- .../snapshot_integration_scenario_test.go | 4 +- lib/instances/snapshot_test.go | 2 +- lib/instances/test_prewarm_test.go | 136 +++++++ lib/instances/volumes_test.go | 22 +- 14 files changed, 569 insertions(+), 63 deletions(-) create mode 100644 cmd/test-prewarm/main.go create mode 100644 lib/instances/test_prewarm_test.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2a500dfc..f3752431 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -65,6 +65,13 @@ jobs: - name: Build run: make build + - name: Prewarm test cache + env: + HYPEMAN_TEST_REGISTRY: 127.0.0.1:5001 + run: | + export HYPEMAN_TEST_PREWARM_DIR="$HOME/.cache/hypeman-ci/linux-amd64" + go run ./cmd/test-prewarm + - name: Check gofmt run: | set -euo pipefail @@ -91,7 +98,11 @@ jobs: CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} TLS_TEST_DOMAIN: "test.hypeman-development.com" TLS_ALLOWED_DOMAINS: '*.hypeman-development.com' - run: make test TEST_TIMEOUT=20m + HYPEMAN_TEST_PREWARM_STRICT: "1" + HYPEMAN_TEST_REGISTRY: 127.0.0.1:5001 + run: | + export HYPEMAN_TEST_PREWARM_DIR="$HOME/.cache/hypeman-ci/linux-amd64" + make test TEST_TIMEOUT=20m test-darwin: runs-on: [self-hosted, macos, arm64] @@ -123,6 +134,13 @@ jobs: - name: Build run: make build + - name: Prewarm test cache + env: + HYPEMAN_TEST_REGISTRY: 127.0.0.1:5001 + run: | + export HYPEMAN_TEST_PREWARM_DIR="$HOME/.cache/hypeman-ci/darwin-arm64" + go run ./cmd/test-prewarm + - name: Check gofmt run: | set -euo pipefail @@ -142,7 +160,11 @@ jobs: GO_TEST_TIMEOUT: 600s DEFAULT_HYPERVISOR: vz JWT_SECRET: ci-test-secret - run: make test + HYPEMAN_TEST_PREWARM_STRICT: "1" + HYPEMAN_TEST_REGISTRY: 127.0.0.1:5001 + run: | + export HYPEMAN_TEST_PREWARM_DIR="$HOME/.cache/hypeman-ci/darwin-arm64" + make test - name: Cleanup if: always() run: | diff --git a/Makefile b/Makefile index 85ca0d43..6188f5a9 100644 --- a/Makefile +++ b/Makefile @@ -269,9 +269,17 @@ test-linux: ensure-ch-binaries ensure-firecracker-binaries ensure-caddy-binaries if [ -n "$(VERBOSE)" ]; then VERBOSE_FLAG="-v"; fi; \ if [ -n "$(TEST)" ]; then \ echo "Running specific test: $(TEST)"; \ - sudo env "PATH=$$TEST_PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" go test -tags containers_image_openpgp -run=$(TEST) $$VERBOSE_FLAG -timeout=$(TEST_TIMEOUT) ./...; \ + sudo env "PATH=$$TEST_PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" \ + "HYPEMAN_TEST_PREWARM_DIR=$${HYPEMAN_TEST_PREWARM_DIR:-}" \ + "HYPEMAN_TEST_PREWARM_STRICT=$${HYPEMAN_TEST_PREWARM_STRICT:-}" \ + "HYPEMAN_TEST_REGISTRY=$${HYPEMAN_TEST_REGISTRY:-}" \ + go test -tags containers_image_openpgp -run=$(TEST) $$VERBOSE_FLAG -timeout=$(TEST_TIMEOUT) ./...; \ else \ - sudo env "PATH=$$TEST_PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" go test -tags containers_image_openpgp $$VERBOSE_FLAG -timeout=$(TEST_TIMEOUT) ./...; \ + sudo env "PATH=$$TEST_PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" \ + "HYPEMAN_TEST_PREWARM_DIR=$${HYPEMAN_TEST_PREWARM_DIR:-}" \ + "HYPEMAN_TEST_PREWARM_STRICT=$${HYPEMAN_TEST_PREWARM_STRICT:-}" \ + "HYPEMAN_TEST_REGISTRY=$${HYPEMAN_TEST_REGISTRY:-}" \ + go test -tags containers_image_openpgp $$VERBOSE_FLAG -timeout=$(TEST_TIMEOUT) ./...; \ fi # macOS tests (no sudo needed, adds e2fsprogs to PATH) diff --git a/cmd/test-prewarm/main.go b/cmd/test-prewarm/main.go new file mode 100644 index 00000000..b6c2130f --- /dev/null +++ b/cmd/test-prewarm/main.go @@ -0,0 +1,336 @@ +package main + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "syscall" + "time" + + "github.com/kernel/hypeman/lib/images" + "github.com/kernel/hypeman/lib/paths" + "github.com/kernel/hypeman/lib/system" +) + +const ( + defaultRegistry = "127.0.0.1:5001" + registryName = "hypeman-ci-registry" +) + +var defaultImages = []string{ + "docker.io/library/alpine:latest", + "docker.io/library/nginx:alpine", + "docker.io/bitnami/redis:latest", +} + +type manifestImage struct { + Source string `json:"source"` + LocalRef string `json:"local_ref"` + Digest string `json:"digest"` + CacheHit bool `json:"cache_hit"` +} + +type prewarmManifest struct { + WarmedAt string `json:"warmed_at"` + Registry string `json:"registry"` + PrewarmDir string `json:"prewarm_dir"` + Images []manifestImage `json:"images"` + System struct { + KernelVersion string `json:"kernel_version"` + Arch string `json:"arch"` + InitrdPath string `json:"initrd_path"` + InitrdHash string `json:"initrd_hash"` + } `json:"system"` +} + +func main() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + defer cancel() + + prewarmDir := envOr("HYPEMAN_TEST_PREWARM_DIR", defaultPrewarmDir()) + registry := trimRegistry(envOr("HYPEMAN_TEST_REGISTRY", defaultRegistry)) + imagesToWarm := parseImageList(os.Getenv("HYPEMAN_TEST_PREWARM_IMAGES")) + if len(imagesToWarm) == 0 { + imagesToWarm = defaultImages + } + + if err := os.MkdirAll(prewarmDir, 0755); err != nil { + fatalf("mkdir prewarm dir: %v", err) + } + + unlock, err := lockFile(filepath.Join(prewarmDir, ".prewarm.lock")) + if err != nil { + fatalf("lock prewarm dir: %v", err) + } + defer unlock() + + if err := ensureLocalRegistry(ctx, registry, filepath.Join(prewarmDir, "registry")); err != nil { + fatalf("ensure local registry: %v", err) + } + + inspectClient, err := images.NewOCIClient(filepath.Join(prewarmDir, ".inspect-cache")) + if err != nil { + fatalf("create inspect client: %v", err) + } + + manifest := prewarmManifest{ + WarmedAt: time.Now().UTC().Format(time.RFC3339), + Registry: registry, + PrewarmDir: prewarmDir, + Images: make([]manifestImage, 0, len(imagesToWarm)), + } + + for _, source := range imagesToWarm { + entry, err := ensureMirroredImage(ctx, inspectClient, registry, source) + if err != nil { + fatalf("prewarm image %s: %v", source, err) + } + fmt.Printf("prewarm image source=%s local=%s digest=%s cache_hit=%t\n", entry.Source, entry.LocalRef, entry.Digest, entry.CacheHit) + manifest.Images = append(manifest.Images, entry) + } + + p := paths.New(prewarmDir) + systemMgr := system.NewManager(p) + if err := systemMgr.EnsureSystemFiles(ctx); err != nil { + fatalf("prewarm system files: %v", err) + } + initrdPath, err := systemMgr.GetInitrdPath() + if err != nil { + fatalf("get initrd path: %v", err) + } + initrdHash, err := fileHash16(initrdPath) + if err != nil { + fatalf("hash initrd: %v", err) + } + + manifest.System.KernelVersion = string(system.DefaultKernelVersion) + manifest.System.Arch = system.GetArch() + manifest.System.InitrdPath = initrdPath + manifest.System.InitrdHash = initrdHash + + manifestPath := filepath.Join(prewarmDir, "prewarm-manifest.json") + if err := writeJSON(manifestPath, manifest); err != nil { + fatalf("write manifest: %v", err) + } + fmt.Printf("prewarm complete manifest=%s\n", manifestPath) +} + +func ensureMirroredImage(ctx context.Context, inspector *images.OCIClient, registry, source string) (manifestImage, error) { + localRef, err := toLocalRegistryRef(registry, source) + if err != nil { + return manifestImage{}, err + } + + if digest, err := inspector.InspectManifest(ctx, localRef); err == nil { + return manifestImage{Source: source, LocalRef: localRef, Digest: digest, CacheHit: true}, nil + } + + res, err := images.MirrorBaseImage(ctx, "http://"+registry, images.MirrorRequest{SourceImage: source}, nil) + if err != nil { + return manifestImage{}, err + } + + digest, err := inspector.InspectManifest(ctx, localRef) + if err != nil { + digest = res.Digest + } + return manifestImage{Source: source, LocalRef: localRef, Digest: digest, CacheHit: false}, nil +} + +func toLocalRegistryRef(registry, source string) (string, error) { + ref, err := images.ParseNormalizedRef(source) + if err != nil { + return "", fmt.Errorf("parse source ref %q: %w", source, err) + } + + repo := strings.TrimPrefix(ref.Repository(), "docker.io/") + if repo == ref.Repository() { + repo = ref.Repository() + } + + out := registry + "/" + repo + if ref.Tag() != "" { + return out + ":" + ref.Tag(), nil + } + if ref.Digest() != "" { + return out + "@" + ref.Digest(), nil + } + return out + ":latest", nil +} + +func ensureLocalRegistry(ctx context.Context, registry, dataDir string) error { + if err := waitForRegistry(ctx, registry, 2*time.Second); err == nil { + return nil + } + + host, port, err := net.SplitHostPort(registry) + if err != nil { + return fmt.Errorf("registry must be host:port, got %q", registry) + } + if host != "127.0.0.1" && host != "localhost" { + return fmt.Errorf("auto-start supports localhost registry only, got %q", registry) + } + + if err := os.MkdirAll(dataDir, 0755); err != nil { + return err + } + + exists, err := dockerContainerExists(registryName) + if err != nil { + return err + } + + if exists { + if _, err := runCmd("docker", "start", registryName); err != nil { + // Keep going; this may fail if already running. + fmt.Printf("warning: docker start %s failed: %v\n", registryName, err) + } + if err := waitForRegistry(ctx, registry, 20*time.Second); err == nil { + return nil + } + + // Last resort for a broken existing container: recreate under lock. + if _, err := runCmd("docker", "rm", "-f", registryName); err != nil { + return err + } + } + + if _, err := runCmd("docker", "run", "-d", "--restart", "unless-stopped", "--name", registryName, + "-p", fmt.Sprintf("%s:%s:5000", host, port), + "-v", fmt.Sprintf("%s:/var/lib/registry", dataDir), + "registry:2"); err != nil { + return err + } + + return waitForRegistry(ctx, registry, 20*time.Second) +} + +func dockerContainerExists(name string) (bool, error) { + out, err := runCmd("docker", "ps", "-a", "--filter", "name=^/"+name+"$", "--format", "{{.Names}}") + if err != nil { + return false, err + } + for _, line := range strings.Split(out, "\n") { + if strings.TrimSpace(line) == name { + return true, nil + } + } + return false, nil +} + +func waitForRegistry(ctx context.Context, registry string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + url := "http://" + registry + "/v2/" + for time.Now().Before(deadline) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err == nil { + resp, err := http.DefaultClient.Do(req) + if err == nil { + resp.Body.Close() + if resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusUnauthorized { + return nil + } + } + } + time.Sleep(250 * time.Millisecond) + } + return fmt.Errorf("registry not healthy at %s", url) +} + +func lockFile(path string) (func(), error) { + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return nil, err + } + if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX); err != nil { + f.Close() + return nil, err + } + return func() { + _ = syscall.Flock(int(f.Fd()), syscall.LOCK_UN) + _ = f.Close() + }, nil +} + +func runCmd(name string, args ...string) (string, error) { + out, err := exec.Command(name, args...).CombinedOutput() + if err != nil { + return "", fmt.Errorf("%s %s failed: %w\n%s", name, strings.Join(args, " "), err, string(out)) + } + return strings.TrimSpace(string(out)), nil +} + +func writeJSON(path string, v any) error { + data, err := json.MarshalIndent(v, "", " ") + if err != nil { + return err + } + return os.WriteFile(path, data, 0644) +} + +func fileHash16(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil))[:16], nil +} + +func parseImageList(raw string) []string { + if strings.TrimSpace(raw) == "" { + return nil + } + parts := strings.Split(raw, ",") + out := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + out = append(out, p) + } + } + return out +} + +func envOr(key, fallback string) string { + if v := strings.TrimSpace(os.Getenv(key)); v != "" { + return v + } + return fallback +} + +func defaultPrewarmDir() string { + osName := strings.ToLower(runtime.GOOS) + arch := strings.ToLower(system.GetArch()) + cacheRoot, err := os.UserCacheDir() + if err != nil || cacheRoot == "" { + cacheRoot = filepath.Join(os.TempDir(), "cache") + } + return filepath.Join(cacheRoot, "hypeman-ci", osName+"-"+arch) +} + +func trimRegistry(v string) string { + v = strings.TrimSpace(v) + v = strings.TrimPrefix(strings.TrimPrefix(v, "http://"), "https://") + return strings.TrimSuffix(v, "/") +} + +func fatalf(format string, args ...any) { + fmt.Fprintf(os.Stderr, format+"\n", args...) + os.Exit(1) +} diff --git a/lib/instances/exec_test.go b/lib/instances/exec_test.go index 016d338d..4d85dba5 100644 --- a/lib/instances/exec_test.go +++ b/lib/instances/exec_test.go @@ -67,12 +67,12 @@ func TestExecConcurrent(t *testing.T) { t.Log("Pulling nginx:alpine image...") _, err = imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/nginx:alpine", + Name: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), }) require.NoError(t, err) for i := 0; i < 60; i++ { - img, err := imageManager.GetImage(ctx, "docker.io/library/nginx:alpine") + img, err := imageManager.GetImage(ctx, integrationTestImageRef(t, "docker.io/library/nginx:alpine")) if err == nil && img.Status == images.StatusReady { break } @@ -89,7 +89,7 @@ func TestExecConcurrent(t *testing.T) { t.Log("Creating nginx instance...") inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "exec-test", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, diff --git a/lib/instances/firecracker_test.go b/lib/instances/firecracker_test.go index 42110cdd..940c1786 100644 --- a/lib/instances/firecracker_test.go +++ b/lib/instances/firecracker_test.go @@ -26,6 +26,7 @@ import ( func setupTestManagerForFirecracker(t *testing.T) (*manager, string) { tmpDir := t.TempDir() + prepareIntegrationTestDataDir(t, tmpDir) cfg := &config.Config{ DataDir: tmpDir, Network: newParallelTestNetworkConfig(t), @@ -68,11 +69,11 @@ func createNginxImageAndWait(t *testing.T, ctx context.Context, imageManager ima t.Helper() nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/nginx:alpine", + Name: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), }) require.NoError(t, err) - for i := 0; i < 180; i++ { + for i := 0; i < 60; i++ { img, err := imageManager.GetImage(ctx, nginxImage.Name) if err == nil && img.Status == images.StatusReady { return @@ -103,7 +104,7 @@ func TestFirecrackerStandbyAndRestore(t *testing.T) { inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "test-firecracker-standby", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 1024 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, @@ -152,7 +153,7 @@ func TestFirecrackerStopClearsStaleSnapshot(t *testing.T) { inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "fc-stale-snapshot", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 1024 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, @@ -218,7 +219,7 @@ func TestFirecrackerNetworkLifecycle(t *testing.T) { inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "fc-net", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, HotplugSize: 512 * 1024 * 1024, OverlaySize: 5 * 1024 * 1024 * 1024, @@ -335,7 +336,7 @@ func TestFirecrackerForkFromRunningNetwork(t *testing.T) { source, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "fc-fork-running-src", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, HotplugSize: 256 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, diff --git a/lib/instances/fork_test.go b/lib/instances/fork_test.go index 0b8ba7f4..78ae1822 100644 --- a/lib/instances/fork_test.go +++ b/lib/instances/fork_test.go @@ -35,7 +35,7 @@ func TestForkInstance_VZStoppedSourceSupported(t *testing.T) { meta := &metadata{StoredMetadata: StoredMetadata{ Id: sourceID, Name: "fork-vz-source", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), CreatedAt: time.Now(), HypervisorType: hypervisor.TypeVZ, HypervisorVersion: "test", @@ -111,7 +111,7 @@ func TestCleanupForkInstanceOnError(t *testing.T) { meta := &metadata{StoredMetadata: StoredMetadata{ Id: forkID, Name: "fork-cleanup-target", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), CreatedAt: time.Now(), HypervisorType: hypervisor.TypeCloudHypervisor, HypervisorVersion: "test", @@ -180,7 +180,7 @@ func TestForkInstanceRejectsDuplicateNameForNonNetworkedSource(t *testing.T) { sourceMeta := &metadata{StoredMetadata: StoredMetadata{ Id: sourceID, Name: sourceID, - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), CreatedAt: now, StoppedAt: &now, HypervisorType: hypervisor.TypeCloudHypervisor, @@ -197,7 +197,7 @@ func TestForkInstanceRejectsDuplicateNameForNonNetworkedSource(t *testing.T) { existingMeta := &metadata{StoredMetadata: StoredMetadata{ Id: existingID, Name: "duplicate-name", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), CreatedAt: now, StoppedAt: &now, HypervisorType: hypervisor.TypeCloudHypervisor, @@ -348,7 +348,7 @@ func TestForkCloudHypervisorFromRunningNetwork(t *testing.T) { require.NoError(t, err) t.Log("Ensuring nginx image...") - nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{Name: "docker.io/library/nginx:alpine"}) + nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{Name: integrationTestImageRef(t, "docker.io/library/nginx:alpine")}) require.NoError(t, err) imageName := nginxImage.Name @@ -372,7 +372,7 @@ func TestForkCloudHypervisorFromRunningNetwork(t *testing.T) { source, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "fork-running-src", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, HotplugSize: 256 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, diff --git a/lib/instances/manager_darwin_test.go b/lib/instances/manager_darwin_test.go index 805c8c7b..d38c24d8 100644 --- a/lib/instances/manager_darwin_test.go +++ b/lib/instances/manager_darwin_test.go @@ -36,6 +36,7 @@ import ( func setupVZTestManager(t *testing.T) (*manager, string) { tmpDir, err := os.MkdirTemp("/tmp", "vz-") require.NoError(t, err) + prepareIntegrationTestDataDir(t, tmpDir) t.Cleanup(func() { os.RemoveAll(tmpDir) }) cfg := &config.Config{ @@ -116,7 +117,7 @@ func TestVZBasicLifecycle(t *testing.T) { t.Log("Pulling alpine:latest image...") alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", + Name: integrationTestImageRef(t, "docker.io/library/alpine:latest"), }) require.NoError(t, err) @@ -144,7 +145,7 @@ func TestVZBasicLifecycle(t *testing.T) { // Create instance using vz hypervisor inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "test-vz-lifecycle", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 2 * 1024 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, @@ -286,7 +287,7 @@ func TestVZExecAndShutdown(t *testing.T) { t.Log("Pulling alpine:latest image...") alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", + Name: integrationTestImageRef(t, "docker.io/library/alpine:latest"), }) require.NoError(t, err) @@ -310,7 +311,7 @@ func TestVZExecAndShutdown(t *testing.T) { inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "test-vz-exec", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 2 * 1024 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, @@ -395,7 +396,7 @@ func TestVZStandbyAndRestore(t *testing.T) { t.Log("Pulling alpine:latest image...") alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", + Name: integrationTestImageRef(t, "docker.io/library/alpine:latest"), }) require.NoError(t, err) @@ -425,7 +426,7 @@ func TestVZStandbyAndRestore(t *testing.T) { // Create instance using vz hypervisor inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "test-vz-standby", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 2 * 1024 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, @@ -556,7 +557,7 @@ func TestVZForkFromRunningNetwork(t *testing.T) { t.Log("Pulling alpine:latest image...") alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", + Name: integrationTestImageRef(t, "docker.io/library/alpine:latest"), }) require.NoError(t, err) @@ -584,7 +585,7 @@ func TestVZForkFromRunningNetwork(t *testing.T) { source, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "test-vz-fork-src", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 2 * 1024 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 99346cf6..641cd6a9 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -35,6 +35,7 @@ import ( // setupTestManager creates a manager and registers cleanup for any orphaned processes func setupTestManager(t *testing.T) (*manager, string) { tmpDir := t.TempDir() + prepareIntegrationTestDataDir(t, tmpDir) cfg := &config.Config{ DataDir: tmpDir, @@ -196,7 +197,7 @@ func TestBasicEndToEnd(t *testing.T) { // Pull nginx image (runs a daemon, won't exit) t.Log("Pulling nginx:alpine image...") nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/nginx:alpine", + Name: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), }) require.NoError(t, err) @@ -253,7 +254,7 @@ func TestBasicEndToEnd(t *testing.T) { // Create instance with real nginx image and attached volume req := CreateInstanceRequest{ Name: "test-nginx", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, // 512MB OverlaySize: 10 * 1024 * 1024 * 1024, // 10GB @@ -280,7 +281,7 @@ func TestBasicEndToEnd(t *testing.T) { // Verify instance fields assert.NotEmpty(t, inst.Id) assert.Equal(t, "test-nginx", inst.Name) - assert.Equal(t, "docker.io/library/nginx:alpine", inst.Image) + assert.Equal(t, integrationTestImageRef(t, "docker.io/library/nginx:alpine"), inst.Image) assert.Equal(t, StateRunning, inst.State) assert.False(t, inst.HasSnapshot) assert.NotEmpty(t, inst.KernelVersion) @@ -812,7 +813,7 @@ func TestAppExitPropagation(t *testing.T) { t.Log("Pulling alpine:latest image...") alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", + Name: integrationTestImageRef(t, "docker.io/library/alpine:latest"), }) require.NoError(t, err) @@ -842,7 +843,7 @@ func TestAppExitPropagation(t *testing.T) { // causing exit code 127 ("command not found"). inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "test-exit-propagation", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 512 * 1024 * 1024, // 512MB HotplugSize: 0, OverlaySize: 2 * 1024 * 1024 * 1024, // 2GB @@ -902,7 +903,7 @@ func TestOOMExitPropagation(t *testing.T) { t.Log("Pulling alpine:latest image...") alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", + Name: integrationTestImageRef(t, "docker.io/library/alpine:latest"), }) require.NoError(t, err) @@ -943,7 +944,7 @@ func TestOOMExitPropagation(t *testing.T) { inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: fmt.Sprintf("test-oom-%d", attempt), - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: memBytes, HotplugSize: 0, OverlaySize: 2 * 1024 * 1024 * 1024, // 2GB @@ -1010,7 +1011,7 @@ func TestEntrypointEnvVars(t *testing.T) { // Pull bitnami/redis image t.Log("Pulling bitnami/redis image...") redisImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/bitnami/redis:latest", + Name: integrationTestImageRef(t, "docker.io/bitnami/redis:latest"), }) require.NoError(t, err) @@ -1052,7 +1053,7 @@ func TestEntrypointEnvVars(t *testing.T) { testPassword := "test_secret_password_123" req := CreateInstanceRequest{ Name: "test-redis-env", - Image: "docker.io/bitnami/redis:latest", + Image: integrationTestImageRef(t, "docker.io/bitnami/redis:latest"), Size: 2 * 1024 * 1024 * 1024, HotplugSize: 512 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, @@ -1255,7 +1256,7 @@ func TestStandbyAndRestore(t *testing.T) { // Pull nginx image (reuse if already pulled in previous test) t.Log("Ensuring nginx:alpine image...") nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/nginx:alpine", + Name: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), }) require.NoError(t, err) @@ -1283,7 +1284,7 @@ func TestStandbyAndRestore(t *testing.T) { t.Log("Creating instance...") req := CreateInstanceRequest{ Name: "test-standby", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, diff --git a/lib/instances/network_test.go b/lib/instances/network_test.go index 4bd93a95..af7e25c4 100644 --- a/lib/instances/network_test.go +++ b/lib/instances/network_test.go @@ -30,7 +30,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) { // Pull nginx:alpine image (long-running workload) t.Log("Pulling nginx:alpine image...") nginxImage, err := manager.imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/nginx:alpine", + Name: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), }) require.NoError(t, err) @@ -65,7 +65,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) { t.Log("Creating instance with default network...") inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "test-net-instance", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 5 * 1024 * 1024 * 1024, diff --git a/lib/instances/qemu_test.go b/lib/instances/qemu_test.go index 7fd78162..ab5aa853 100644 --- a/lib/instances/qemu_test.go +++ b/lib/instances/qemu_test.go @@ -33,6 +33,7 @@ import ( // setupTestManagerForQEMU creates a manager configured to use QEMU as the default hypervisor func setupTestManagerForQEMU(t *testing.T) (*manager, string) { tmpDir := t.TempDir() + prepareIntegrationTestDataDir(t, tmpDir) cfg := &config.Config{ DataDir: tmpDir, @@ -189,7 +190,7 @@ func TestQEMUBasicEndToEnd(t *testing.T) { // Pull nginx image t.Log("Pulling nginx:alpine image...") nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/nginx:alpine", + Name: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), }) require.NoError(t, err) @@ -246,7 +247,7 @@ func TestQEMUBasicEndToEnd(t *testing.T) { // Create instance with QEMU hypervisor req := CreateInstanceRequest{ Name: "test-nginx-qemu", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, // 2GB HotplugSize: 512 * 1024 * 1024, // 512MB (unused by QEMU, but part of the request) OverlaySize: 10 * 1024 * 1024 * 1024, // 10GB @@ -274,7 +275,7 @@ func TestQEMUBasicEndToEnd(t *testing.T) { // Verify instance fields assert.NotEmpty(t, inst.Id) assert.Equal(t, "test-nginx-qemu", inst.Name) - assert.Equal(t, "docker.io/library/nginx:alpine", inst.Image) + assert.Equal(t, integrationTestImageRef(t, "docker.io/library/nginx:alpine"), inst.Image) assert.Equal(t, StateRunning, inst.State) assert.Equal(t, hypervisor.TypeQEMU, inst.HypervisorType) assert.False(t, inst.HasSnapshot) @@ -592,7 +593,7 @@ func TestQEMUEntrypointEnvVars(t *testing.T) { // Pull bitnami/redis image t.Log("Pulling bitnami/redis image...") redisImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/bitnami/redis:latest", + Name: integrationTestImageRef(t, "docker.io/bitnami/redis:latest"), }) require.NoError(t, err) @@ -634,7 +635,7 @@ func TestQEMUEntrypointEnvVars(t *testing.T) { testPassword := "test_secret_password_123" req := CreateInstanceRequest{ Name: "test-redis-env", - Image: "docker.io/bitnami/redis:latest", + Image: integrationTestImageRef(t, "docker.io/bitnami/redis:latest"), Size: 2 * 1024 * 1024 * 1024, HotplugSize: 512 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, @@ -770,7 +771,7 @@ func TestQEMUStandbyAndRestore(t *testing.T) { // Pull nginx image t.Log("Pulling nginx:alpine image...") nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/nginx:alpine", + Name: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), }) require.NoError(t, err) @@ -801,7 +802,7 @@ func TestQEMUStandbyAndRestore(t *testing.T) { // Create instance with QEMU hypervisor (no network for simpler test) req := CreateInstanceRequest{ Name: "test-qemu-standby", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, // 2GB HotplugSize: 512 * 1024 * 1024, // 512MB (unused by QEMU) OverlaySize: 10 * 1024 * 1024 * 1024, // 10GB @@ -886,7 +887,7 @@ func TestQEMUForkFromRunningNetwork(t *testing.T) { require.NoError(t, err) t.Log("Ensuring nginx image...") - nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{Name: "docker.io/library/nginx:alpine"}) + nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{Name: integrationTestImageRef(t, "docker.io/library/nginx:alpine")}) require.NoError(t, err) imageName := nginxImage.Name @@ -908,7 +909,7 @@ func TestQEMUForkFromRunningNetwork(t *testing.T) { source, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "qemu-fork-running-src", - Image: "docker.io/library/nginx:alpine", + Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"), Size: 2 * 1024 * 1024 * 1024, HotplugSize: 256 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, diff --git a/lib/instances/snapshot_integration_scenario_test.go b/lib/instances/snapshot_integration_scenario_test.go index 856f925e..37e1ae7a 100644 --- a/lib/instances/snapshot_integration_scenario_test.go +++ b/lib/instances/snapshot_integration_scenario_test.go @@ -39,14 +39,14 @@ func runStandbySnapshotScenario(t *testing.T, mgr *manager, tmpDir string, cfg s } imageManager, err := images.NewManager(p, 1, nil) requireNoErr(err) - snapshottest.EnsureImageReady(t, ctx, p, imageManager, "docker.io/library/alpine:latest") + snapshottest.EnsureImageReady(t, ctx, p, imageManager, integrationTestImageRef(t, "docker.io/library/alpine:latest")) systemManager := system.NewManager(p) requireNoErr(systemManager.EnsureSystemFiles(ctx)) source, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: cfg.sourceName, - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 1024 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, diff --git a/lib/instances/snapshot_test.go b/lib/instances/snapshot_test.go index 506c4d45..0d154cc0 100644 --- a/lib/instances/snapshot_test.go +++ b/lib/instances/snapshot_test.go @@ -92,7 +92,7 @@ func createStoppedSnapshotSourceFixture(t *testing.T, mgr *manager, id, name str meta := &metadata{StoredMetadata: StoredMetadata{ Id: id, Name: name, - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), CreatedAt: now, StoppedAt: &now, HypervisorType: hvType, diff --git a/lib/instances/test_prewarm_test.go b/lib/instances/test_prewarm_test.go new file mode 100644 index 00000000..a7a58289 --- /dev/null +++ b/lib/instances/test_prewarm_test.go @@ -0,0 +1,136 @@ +package instances + +import ( + "os" + "path/filepath" + "strings" + "sync" + "testing" + + "github.com/kernel/hypeman/lib/images" +) + +const ( + testPrewarmDirEnv = "HYPEMAN_TEST_PREWARM_DIR" + testPrewarmStrictEnv = "HYPEMAN_TEST_PREWARM_STRICT" + testRegistryEnv = "HYPEMAN_TEST_REGISTRY" +) + +var prewarmLogOnce sync.Once +var registryLogOnce sync.Once + +func integrationTestImageRef(t *testing.T, source string) string { + t.Helper() + + registry := strings.TrimSpace(os.Getenv(testRegistryEnv)) + if registry == "" { + if isTestPrewarmStrict() { + t.Fatalf("%s is required when %s is enabled", testRegistryEnv, testPrewarmStrictEnv) + } + return source + } + + registry = strings.TrimPrefix(strings.TrimPrefix(registry, "http://"), "https://") + if registry == "" { + t.Fatalf("%s must not be empty", testRegistryEnv) + } + + ref, err := images.ParseNormalizedRef(source) + if err != nil { + t.Fatalf("parse source image ref %q: %v", source, err) + } + + repo := ref.Repository() + if !strings.HasPrefix(repo, "docker.io/") { + return source + } + repo = strings.TrimPrefix(repo, "docker.io/") + + if ref.Tag() != "" { + mapped := registry + "/" + repo + ":" + ref.Tag() + registryLogOnce.Do(func() { + t.Logf("using test registry mirror source=%s mapped=%s", source, mapped) + }) + return mapped + } + if ref.Digest() != "" { + mapped := registry + "/" + repo + "@" + ref.Digest() + registryLogOnce.Do(func() { + t.Logf("using test registry mirror source=%s mapped=%s", source, mapped) + }) + return mapped + } + + mapped := registry + "/" + repo + ":latest" + registryLogOnce.Do(func() { + t.Logf("using test registry mirror source=%s mapped=%s", source, mapped) + }) + return mapped +} + +func prepareIntegrationTestDataDir(t *testing.T, tmpDir string) { + t.Helper() + + prewarmDir := strings.TrimSpace(os.Getenv(testPrewarmDirEnv)) + if prewarmDir == "" { + if isTestPrewarmStrict() { + t.Fatalf("%s is required when %s is enabled", testPrewarmDirEnv, testPrewarmStrictEnv) + } + return + } + + manifest := filepath.Join(prewarmDir, "prewarm-manifest.json") + if _, err := os.Stat(manifest); err != nil { + if isTestPrewarmStrict() { + t.Fatalf("prewarm manifest missing at %s: %v", manifest, err) + } + return + } + + srcSystemDir := filepath.Join(prewarmDir, "system") + if _, err := os.Stat(srcSystemDir); err != nil { + if isTestPrewarmStrict() { + t.Fatalf("prewarm system directory missing at %s: %v", srcSystemDir, err) + } + return + } + + dstSystemDir := filepath.Join(tmpDir, "system") + if err := os.MkdirAll(dstSystemDir, 0755); err != nil { + t.Fatalf("mkdir %s: %v", dstSystemDir, err) + } + linkSubdir(t, srcSystemDir, dstSystemDir, "kernel") + linkSubdir(t, srcSystemDir, dstSystemDir, "initrd") + + prewarmLogOnce.Do(func() { + t.Logf("using prewarmed test cache dir=%s registry=%s", prewarmDir, os.Getenv(testRegistryEnv)) + }) +} + +func linkSubdir(t *testing.T, srcSystemDir, dstSystemDir, subdir string) { + t.Helper() + + src := filepath.Join(srcSystemDir, subdir) + if _, err := os.Stat(src); err != nil { + if isTestPrewarmStrict() { + t.Fatalf("prewarm system subdir missing at %s: %v", src, err) + } + return + } + + dst := filepath.Join(dstSystemDir, subdir) + if _, err := os.Lstat(dst); err == nil { + return + } else if !os.IsNotExist(err) { + t.Fatalf("stat %s: %v", dst, err) + } + + if err := os.Symlink(src, dst); err != nil { + t.Fatalf("symlink %s -> %s: %v", dst, src, err) + } +} + +func isTestPrewarmStrict() bool { + v := strings.TrimSpace(os.Getenv(testPrewarmStrictEnv)) + return v == "1" || strings.EqualFold(v, "true") || strings.EqualFold(v, "yes") +} diff --git a/lib/instances/volumes_test.go b/lib/instances/volumes_test.go index e829c8f5..1dc3b113 100644 --- a/lib/instances/volumes_test.go +++ b/lib/instances/volumes_test.go @@ -60,12 +60,12 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { t.Log("Pulling alpine image...") _, err = imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", + Name: integrationTestImageRef(t, "docker.io/library/alpine:latest"), }) require.NoError(t, err) for i := 0; i < 60; i++ { - img, err := imageManager.GetImage(ctx, "docker.io/library/alpine:latest") + img, err := imageManager.GetImage(ctx, integrationTestImageRef(t, "docker.io/library/alpine:latest")) if err == nil && img.Status == images.StatusReady { break } @@ -93,7 +93,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { t.Log("Phase 1: Creating writer instance with read-write volume...") writerInst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "writer", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, @@ -136,7 +136,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { reader1, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "reader-1", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, @@ -153,7 +153,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { // Reader 2 uses overlay mode: can read base data AND write to its own overlay reader2, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "reader-2-overlay", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, @@ -247,12 +247,12 @@ func TestOverlayDiskCleanupOnDelete(t *testing.T) { t.Log("Pulling alpine image...") _, err = imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", + Name: integrationTestImageRef(t, "docker.io/library/alpine:latest"), }) require.NoError(t, err) for i := 0; i < 60; i++ { - img, err := imageManager.GetImage(ctx, "docker.io/library/alpine:latest") + img, err := imageManager.GetImage(ctx, integrationTestImageRef(t, "docker.io/library/alpine:latest")) if err == nil && img.Status == images.StatusReady { break } @@ -274,7 +274,7 @@ func TestOverlayDiskCleanupOnDelete(t *testing.T) { // Create instance with overlay volume inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "overlay-cleanup-test", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, @@ -358,12 +358,12 @@ func TestVolumeFromArchive(t *testing.T) { t.Log("Pulling alpine image...") _, err = imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", + Name: integrationTestImageRef(t, "docker.io/library/alpine:latest"), }) require.NoError(t, err) for i := 0; i < 60; i++ { - img, err := imageManager.GetImage(ctx, "docker.io/library/alpine:latest") + img, err := imageManager.GetImage(ctx, integrationTestImageRef(t, "docker.io/library/alpine:latest")) if err == nil && img.Status == images.StatusReady { break } @@ -400,7 +400,7 @@ func TestVolumeFromArchive(t *testing.T) { t.Log("Creating instance with archive volume...") inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "archive-reader", - Image: "docker.io/library/alpine:latest", + Image: integrationTestImageRef(t, "docker.io/library/alpine:latest"), Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, From 8a48b40e9881bd02f4c9db309377ba86a7c636ae Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 15:23:59 -0500 Subject: [PATCH 04/10] tests: prewarm hypervisor binaries to avoid extraction races --- cmd/test-prewarm/main.go | 19 +++++++++++++++++++ lib/instances/test_prewarm_test.go | 1 + 2 files changed, 20 insertions(+) diff --git a/cmd/test-prewarm/main.go b/cmd/test-prewarm/main.go index b6c2130f..fe3818c5 100644 --- a/cmd/test-prewarm/main.go +++ b/cmd/test-prewarm/main.go @@ -17,9 +17,11 @@ import ( "syscall" "time" + "github.com/kernel/hypeman/lib/hypervisor/firecracker" "github.com/kernel/hypeman/lib/images" "github.com/kernel/hypeman/lib/paths" "github.com/kernel/hypeman/lib/system" + "github.com/kernel/hypeman/lib/vmm" ) const ( @@ -50,6 +52,8 @@ type prewarmManifest struct { Arch string `json:"arch"` InitrdPath string `json:"initrd_path"` InitrdHash string `json:"initrd_hash"` + CHBinaries int `json:"ch_binaries"` + FCBinaryPath string `json:"fc_binary_path"` } `json:"system"` } @@ -104,6 +108,19 @@ func main() { if err := systemMgr.EnsureSystemFiles(ctx); err != nil { fatalf("prewarm system files: %v", err) } + + chBinaries := 0 + for _, version := range vmm.SupportedVersions { + if _, err := vmm.GetBinaryPath(p, version); err != nil { + fatalf("prewarm cloud-hypervisor binary %s: %v", version, err) + } + chBinaries++ + } + fcPath, err := firecracker.NewStarter().GetBinaryPath(p, "") + if err != nil { + fatalf("prewarm firecracker binary: %v", err) + } + initrdPath, err := systemMgr.GetInitrdPath() if err != nil { fatalf("get initrd path: %v", err) @@ -117,6 +134,8 @@ func main() { manifest.System.Arch = system.GetArch() manifest.System.InitrdPath = initrdPath manifest.System.InitrdHash = initrdHash + manifest.System.CHBinaries = chBinaries + manifest.System.FCBinaryPath = fcPath manifestPath := filepath.Join(prewarmDir, "prewarm-manifest.json") if err := writeJSON(manifestPath, manifest); err != nil { diff --git a/lib/instances/test_prewarm_test.go b/lib/instances/test_prewarm_test.go index a7a58289..9e1278c2 100644 --- a/lib/instances/test_prewarm_test.go +++ b/lib/instances/test_prewarm_test.go @@ -101,6 +101,7 @@ func prepareIntegrationTestDataDir(t *testing.T, tmpDir string) { } linkSubdir(t, srcSystemDir, dstSystemDir, "kernel") linkSubdir(t, srcSystemDir, dstSystemDir, "initrd") + linkSubdir(t, srcSystemDir, dstSystemDir, "binaries") prewarmLogOnce.Do(func() { t.Logf("using prewarmed test cache dir=%s registry=%s", prewarmDir, os.Getenv(testRegistryEnv)) From 328ace91e6b949778b7ebd56db5fabde6c5b8ebc Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 15:26:53 -0500 Subject: [PATCH 05/10] vmm: lock and atomically install extracted cloud-hypervisor binaries --- cmd/test-prewarm/hypervisor_binaries_linux.go | 28 +++++++++++ cmd/test-prewarm/hypervisor_binaries_other.go | 9 ++++ cmd/test-prewarm/main.go | 13 +---- lib/vmm/binaries_linux.go | 49 +++++++++++++++++-- 4 files changed, 83 insertions(+), 16 deletions(-) create mode 100644 cmd/test-prewarm/hypervisor_binaries_linux.go create mode 100644 cmd/test-prewarm/hypervisor_binaries_other.go diff --git a/cmd/test-prewarm/hypervisor_binaries_linux.go b/cmd/test-prewarm/hypervisor_binaries_linux.go new file mode 100644 index 00000000..7504b27d --- /dev/null +++ b/cmd/test-prewarm/hypervisor_binaries_linux.go @@ -0,0 +1,28 @@ +//go:build linux + +package main + +import ( + "fmt" + + "github.com/kernel/hypeman/lib/hypervisor/firecracker" + "github.com/kernel/hypeman/lib/paths" + "github.com/kernel/hypeman/lib/vmm" +) + +func ensureHypervisorBinaries(p *paths.Paths) (int, string, error) { + chBinaries := 0 + for _, version := range vmm.SupportedVersions { + if _, err := vmm.GetBinaryPath(p, version); err != nil { + return 0, "", fmt.Errorf("cloud-hypervisor %s: %w", version, err) + } + chBinaries++ + } + + fcPath, err := firecracker.NewStarter().GetBinaryPath(p, "") + if err != nil { + return 0, "", fmt.Errorf("firecracker: %w", err) + } + + return chBinaries, fcPath, nil +} diff --git a/cmd/test-prewarm/hypervisor_binaries_other.go b/cmd/test-prewarm/hypervisor_binaries_other.go new file mode 100644 index 00000000..e8d43367 --- /dev/null +++ b/cmd/test-prewarm/hypervisor_binaries_other.go @@ -0,0 +1,9 @@ +//go:build !linux + +package main + +import "github.com/kernel/hypeman/lib/paths" + +func ensureHypervisorBinaries(_ *paths.Paths) (int, string, error) { + return 0, "", nil +} diff --git a/cmd/test-prewarm/main.go b/cmd/test-prewarm/main.go index fe3818c5..e710f5db 100644 --- a/cmd/test-prewarm/main.go +++ b/cmd/test-prewarm/main.go @@ -17,11 +17,9 @@ import ( "syscall" "time" - "github.com/kernel/hypeman/lib/hypervisor/firecracker" "github.com/kernel/hypeman/lib/images" "github.com/kernel/hypeman/lib/paths" "github.com/kernel/hypeman/lib/system" - "github.com/kernel/hypeman/lib/vmm" ) const ( @@ -109,16 +107,9 @@ func main() { fatalf("prewarm system files: %v", err) } - chBinaries := 0 - for _, version := range vmm.SupportedVersions { - if _, err := vmm.GetBinaryPath(p, version); err != nil { - fatalf("prewarm cloud-hypervisor binary %s: %v", version, err) - } - chBinaries++ - } - fcPath, err := firecracker.NewStarter().GetBinaryPath(p, "") + chBinaries, fcPath, err := ensureHypervisorBinaries(p) if err != nil { - fatalf("prewarm firecracker binary: %v", err) + fatalf("prewarm hypervisor binaries: %v", err) } initrdPath, err := systemMgr.GetInitrdPath() diff --git a/lib/vmm/binaries_linux.go b/lib/vmm/binaries_linux.go index 73064a41..b827ad41 100644 --- a/lib/vmm/binaries_linux.go +++ b/lib/vmm/binaries_linux.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "runtime" + "syscall" "github.com/kernel/hypeman/lib/paths" ) @@ -44,21 +45,59 @@ func ExtractBinary(p *paths.Paths, version CHVersion) (string, error) { return extractPath, nil } - // Create directory - if err := os.MkdirAll(filepath.Dir(extractPath), 0755); err != nil { + extractDir := filepath.Dir(extractPath) + if err := os.MkdirAll(extractDir, 0755); err != nil { return "", fmt.Errorf("create binaries dir: %w", err) } + lockFile, err := os.OpenFile(extractPath+".lock", os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return "", fmt.Errorf("open extraction lock: %w", err) + } + defer lockFile.Close() + if err := syscall.Flock(int(lockFile.Fd()), syscall.LOCK_EX); err != nil { + return "", fmt.Errorf("lock extraction: %w", err) + } + defer syscall.Flock(int(lockFile.Fd()), syscall.LOCK_UN) + + // Another process may have extracted it while we waited on the lock. + if _, err := os.Stat(extractPath); err == nil { + return extractPath, nil + } + // Read embedded binary data, err := binaryFS.ReadFile(embeddedPath) if err != nil { return "", fmt.Errorf("read embedded binary: %w", err) } - // Write to filesystem - if err := os.WriteFile(extractPath, data, 0755); err != nil { - return "", fmt.Errorf("write binary: %w", err) + tmpFile, err := os.CreateTemp(extractDir, "cloud-hypervisor-*") + if err != nil { + return "", fmt.Errorf("create temp binary: %w", err) + } + tmpPath := tmpFile.Name() + cleanupTmp := true + defer func() { + if cleanupTmp { + _ = os.Remove(tmpPath) + } + }() + + if _, err := tmpFile.Write(data); err != nil { + _ = tmpFile.Close() + return "", fmt.Errorf("write temp binary: %w", err) + } + if err := tmpFile.Chmod(0755); err != nil { + _ = tmpFile.Close() + return "", fmt.Errorf("chmod temp binary: %w", err) + } + if err := tmpFile.Close(); err != nil { + return "", fmt.Errorf("close temp binary: %w", err) + } + if err := os.Rename(tmpPath, extractPath); err != nil { + return "", fmt.Errorf("install binary: %w", err) } + cleanupTmp = false return extractPath, nil } From db327bbc4a735925da1fbe19053b56a4581e05f1 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 15:30:48 -0500 Subject: [PATCH 06/10] tests: require prewarmed system binaries only on linux --- lib/instances/test_prewarm_test.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/instances/test_prewarm_test.go b/lib/instances/test_prewarm_test.go index 9e1278c2..a0aa1a03 100644 --- a/lib/instances/test_prewarm_test.go +++ b/lib/instances/test_prewarm_test.go @@ -3,6 +3,7 @@ package instances import ( "os" "path/filepath" + "runtime" "strings" "sync" "testing" @@ -99,21 +100,21 @@ func prepareIntegrationTestDataDir(t *testing.T, tmpDir string) { if err := os.MkdirAll(dstSystemDir, 0755); err != nil { t.Fatalf("mkdir %s: %v", dstSystemDir, err) } - linkSubdir(t, srcSystemDir, dstSystemDir, "kernel") - linkSubdir(t, srcSystemDir, dstSystemDir, "initrd") - linkSubdir(t, srcSystemDir, dstSystemDir, "binaries") + linkSubdir(t, srcSystemDir, dstSystemDir, "kernel", true) + linkSubdir(t, srcSystemDir, dstSystemDir, "initrd", true) + linkSubdir(t, srcSystemDir, dstSystemDir, "binaries", runtime.GOOS == "linux") prewarmLogOnce.Do(func() { t.Logf("using prewarmed test cache dir=%s registry=%s", prewarmDir, os.Getenv(testRegistryEnv)) }) } -func linkSubdir(t *testing.T, srcSystemDir, dstSystemDir, subdir string) { +func linkSubdir(t *testing.T, srcSystemDir, dstSystemDir, subdir string, required bool) { t.Helper() src := filepath.Join(srcSystemDir, subdir) if _, err := os.Stat(src); err != nil { - if isTestPrewarmStrict() { + if required && isTestPrewarmStrict() { t.Fatalf("prewarm system subdir missing at %s: %v", src, err) } return From 3cacd247faf431484a5a8ae8ae3284972be799fe Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 15:33:47 -0500 Subject: [PATCH 07/10] ci: consecutive stability check 2 of 5 From dc63e252e70ed60d991f9dbf22af59bca2629c54 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 15:37:13 -0500 Subject: [PATCH 08/10] ci: consecutive stability check 3 of 5 From 0e16b22b0405f89c91f862f8ffe3cbb8b318507b Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 15:40:23 -0500 Subject: [PATCH 09/10] ci: consecutive stability check 4 of 5 From 7fbf752819ea62b89658d4a513e407aeb8e3186d Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 7 Mar 2026 15:43:09 -0500 Subject: [PATCH 10/10] ci: consecutive stability check 5 of 5