minor

umswmayj · umswmayj · commit c90dd80acaca · 2026-03-12T15:17:30.000+01:00
diff --git a/internal/scheduling/reservations/failover/controller.go b/internal/scheduling/reservations/failover/controller.go
@@ -183,6 +183,9 @@ func (c *FailoverReservationController) validateReservation(ctx context.Context,
 			return false
 		}
 
+		// TODO we just invalidate the entire reservation if one VM is not placable anymore
+		// That is probably ok as most likely due to concurrency we just do not have space and then all VMs are affected
+		// but it is also possible that it can be because of anti-affinity rules
 		if !valid {
 			log.Info("VM failed validation for reservation host",
 				"reservationName", res.Name,
@@ -490,7 +493,8 @@ func reconcileRemoveEmptyReservations(
 
 // selectVMsToProcess selects a subset of VMs to process based on MaxVMsToProcess limit.
 // VMs are sorted by memory (largest first) to prioritize large VMs for failover reservations.
-// A rotating offset (every 4 reconciliations) ensures different VMs are tried
+// 3 out of 4 reconciliations start at offset 0 (process largest VMs first).
+// Every 4th reconciliation uses a rotating offset to try different VMs
 // if the largest VMs consistently fail to get reservations.
 func (c *FailoverReservationController) selectVMsToProcess(
 	vmsMissingFailover []vmFailoverNeed,
@@ -508,9 +512,12 @@ func (c *FailoverReservationController) selectVMsToProcess(
 		return vmsMissingFailover, false
 	}
 
-	// Rotate every 4 reconciliations to try different VMs if large ones fail
-	rotationPeriod := int64(4)
-	offset := int((c.reconcileCount / rotationPeriod) % int64(len(vmsMissingFailover)))
+	// 3 out of 4 runs start at offset 0, every 4th run uses reconcileCount as offset
+	offset := 0
+	if c.reconcileCount%4 == 0 {
+		// Every 4th reconciliation, use reconcileCount as offset (mod vmCount to wrap around)
+		offset = int(c.reconcileCount) % len(vmsMissingFailover)
+	}
 
 	// Select VMs starting from offset, wrapping around
 	selected = make([]vmFailoverNeed, 0, maxToProcess)
@@ -523,8 +530,7 @@ func (c *FailoverReservationController) selectVMsToProcess(
 		"totalVMsMissingFailover", len(vmsMissingFailover),
 		"maxToProcess", maxToProcess,
 		"offset", offset,
-		"reconcileCount", c.reconcileCount,
-		"rotationPeriod", rotationPeriod)
+		"reconcileCount", c.reconcileCount)
 
 	return selected, true
 }
@@ -805,12 +811,6 @@ func (c *FailoverReservationController) Start(ctx context.Context) error {
 		"flavorFailoverRequirements", c.Config.FlavorFailoverRequirements,
 		"maxVMsToProcess", c.Config.MaxVMsToProcess)
 
-	// Run initial reconciliation
-	if _, err := c.ReconcilePeriodic(ctx); err != nil {
-		log.Error(err, "initial failover reconciliation failed")
-		// Don't return error - continue with periodic reconciliation
-	}
-
 	// Set up periodic reconciliation
 	ticker := time.NewTicker(c.Config.ReconcileInterval)
 	defer ticker.Stop()
diff --git a/internal/scheduling/reservations/failover/controller_test.go b/internal/scheduling/reservations/failover/controller_test.go
@@ -844,3 +844,186 @@ func getAllocations(res *v1alpha1.Reservation) map[string]string {
 	}
 	return res.Status.FailoverReservation.Allocations
 }
+
+// ============================================================================
+// Test: selectVMsToProcess
+// ============================================================================
+
+func TestSelectVMsToProcess(t *testing.T) {
+	// Create 10 VMs with different memory sizes (sorted by memory descending)
+	createVMs := func(count int) []vmFailoverNeed {
+		vms := make([]vmFailoverNeed, count)
+		for i := range count {
+			vms[i] = vmFailoverNeed{
+				VM: VM{
+					UUID:              "vm-" + string(rune('a'+i)),
+					CurrentHypervisor: "host" + string(rune('1'+i)),
+					Resources: map[string]resource.Quantity{
+						"memory": *resource.NewQuantity(int64((count-i)*1024*1024*1024), resource.BinarySI), // Descending memory
+					},
+				},
+				Count: 1,
+			}
+		}
+		return vms
+	}
+
+	tests := []struct {
+		name           string
+		reconcileCount int64
+		vmCount        int
+		maxToProcess   int
+		expectedOffset int // Expected starting offset in the VM list
+		expectedHit    bool
+	}{
+		// 3 out of 4 runs should start at offset 0
+		{
+			name:           "reconcile 1 - offset 0",
+			reconcileCount: 1,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 0,
+			expectedHit:    true,
+		},
+		{
+			name:           "reconcile 2 - offset 0",
+			reconcileCount: 2,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 0,
+			expectedHit:    true,
+		},
+		{
+			name:           "reconcile 3 - offset 0",
+			reconcileCount: 3,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 0,
+			expectedHit:    true,
+		},
+		// Every 4th reconcile uses reconcileCount as offset (mod vmCount)
+		{
+			name:           "reconcile 4 - offset 4",
+			reconcileCount: 4,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 4,
+			expectedHit:    true,
+		},
+		{
+			name:           "reconcile 5 - offset 0",
+			reconcileCount: 5,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 0,
+			expectedHit:    true,
+		},
+		{
+			name:           "reconcile 6 - offset 0",
+			reconcileCount: 6,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 0,
+			expectedHit:    true,
+		},
+		{
+			name:           "reconcile 7 - offset 0",
+			reconcileCount: 7,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 0,
+			expectedHit:    true,
+		},
+		{
+			name:           "reconcile 8 - offset 8",
+			reconcileCount: 8,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 8,
+			expectedHit:    true,
+		},
+		// Test wrap-around when reconcileCount > vmCount
+		{
+			name:           "reconcile 12 - offset 2 (12 mod 10)",
+			reconcileCount: 12,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 2, // 12 % 10 = 2
+			expectedHit:    true,
+		},
+		{
+			name:           "reconcile 20 - offset 0 (20 mod 10)",
+			reconcileCount: 20,
+			vmCount:        10,
+			maxToProcess:   3,
+			expectedOffset: 0, // 20 % 10 = 0
+			expectedHit:    true,
+		},
+		// Edge cases
+		{
+			name:           "maxToProcess 0 - no limit, returns all",
+			reconcileCount: 4,
+			vmCount:        10,
+			maxToProcess:   0,
+			expectedOffset: 0, // No limit means all VMs returned starting from 0
+			expectedHit:    false,
+		},
+		{
+			name:           "maxToProcess >= vmCount - no limit hit",
+			reconcileCount: 4,
+			vmCount:        5,
+			maxToProcess:   10,
+			expectedOffset: 0, // All VMs fit, no rotation needed
+			expectedHit:    false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			controller := &FailoverReservationController{
+				reconcileCount: tt.reconcileCount,
+			}
+
+			vms := createVMs(tt.vmCount)
+			selected, hitLimit := controller.selectVMsToProcess(vms, tt.maxToProcess)
+
+			if hitLimit != tt.expectedHit {
+				t.Errorf("expected hitLimit=%v, got %v", tt.expectedHit, hitLimit)
+			}
+
+			if !tt.expectedHit {
+				// When no limit is hit, all VMs should be returned
+				if len(selected) != tt.vmCount {
+					t.Errorf("expected all %d VMs when no limit hit, got %d", tt.vmCount, len(selected))
+				}
+				return
+			}
+
+			// Verify the first selected VM is at the expected offset
+			if len(selected) == 0 {
+				t.Error("expected at least one VM selected")
+				return
+			}
+
+			// The VMs are sorted by memory descending, so vm-a has most memory, vm-j has least
+			// After sorting, the order is: vm-a, vm-b, vm-c, ..., vm-j
+			// With offset, we should start at vms[offset]
+			expectedFirstVM := vms[tt.expectedOffset].VM.UUID
+			actualFirstVM := selected[0].VM.UUID
+
+			if actualFirstVM != expectedFirstVM {
+				t.Errorf("expected first VM to be %s (offset %d), got %s",
+					expectedFirstVM, tt.expectedOffset, actualFirstVM)
+			}
+
+			// Verify we got the expected number of VMs
+			expectedCount := tt.maxToProcess
+			if expectedCount > tt.vmCount {
+				expectedCount = tt.vmCount
+			}
+			if len(selected) != expectedCount {
+				t.Errorf("expected %d VMs selected, got %d", expectedCount, len(selected))
+			}
+		})
+	}
+}
diff --git a/internal/scheduling/reservations/failover/reservation_scheduling.go b/internal/scheduling/reservations/failover/reservation_scheduling.go
@@ -213,9 +213,21 @@ func (c *FailoverReservationController) validateVmViaSchedulerEvacuation(
 		return false, fmt.Errorf("failed to validate VM for reservation host: %w", err)
 	}
 
+	// Handle empty response - no hosts returned
+	if len(resp.Hosts) < 1 {
+		return false, nil
+	}
+
+	// Log unexpected scheduler responses
+	if len(resp.Hosts) > 1 || resp.Hosts[0] != reservationHost {
+		log.Error(nil, "scheduler returned unexpected hosts for single-host validation request",
+			"vmUUID", vm.UUID,
+			"reservationHost", reservationHost,
+			"returnedHosts", resp.Hosts)
+	}
+
 	// If the reservation host is returned, the VM can use it
-	isValid := len(resp.Hosts) > 0 && resp.Hosts[0] == reservationHost
-	return isValid, nil
+	return resp.Hosts[0] == reservationHost, nil
 }
 
 // scheduleAndBuildNewFailoverReservation schedules a failover reservation for a VM.