Skip to content

Commit c90dd80

Browse files
committed
minor
1 parent 585256c commit c90dd80

3 files changed

Lines changed: 209 additions & 14 deletions

File tree

internal/scheduling/reservations/failover/controller.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ func (c *FailoverReservationController) validateReservation(ctx context.Context,
183183
return false
184184
}
185185

186+
// TODO we just invalidate the entire reservation if one VM is not placable anymore
187+
// That is probably ok as most likely due to concurrency we just do not have space and then all VMs are affected
188+
// but it is also possible that it can be because of anti-affinity rules
186189
if !valid {
187190
log.Info("VM failed validation for reservation host",
188191
"reservationName", res.Name,
@@ -490,7 +493,8 @@ func reconcileRemoveEmptyReservations(
490493

491494
// selectVMsToProcess selects a subset of VMs to process based on MaxVMsToProcess limit.
492495
// VMs are sorted by memory (largest first) to prioritize large VMs for failover reservations.
493-
// A rotating offset (every 4 reconciliations) ensures different VMs are tried
496+
// 3 out of 4 reconciliations start at offset 0 (process largest VMs first).
497+
// Every 4th reconciliation uses a rotating offset to try different VMs
494498
// if the largest VMs consistently fail to get reservations.
495499
func (c *FailoverReservationController) selectVMsToProcess(
496500
vmsMissingFailover []vmFailoverNeed,
@@ -508,9 +512,12 @@ func (c *FailoverReservationController) selectVMsToProcess(
508512
return vmsMissingFailover, false
509513
}
510514

511-
// Rotate every 4 reconciliations to try different VMs if large ones fail
512-
rotationPeriod := int64(4)
513-
offset := int((c.reconcileCount / rotationPeriod) % int64(len(vmsMissingFailover)))
515+
// 3 out of 4 runs start at offset 0, every 4th run uses reconcileCount as offset
516+
offset := 0
517+
if c.reconcileCount%4 == 0 {
518+
// Every 4th reconciliation, use reconcileCount as offset (mod vmCount to wrap around)
519+
offset = int(c.reconcileCount) % len(vmsMissingFailover)
520+
}
514521

515522
// Select VMs starting from offset, wrapping around
516523
selected = make([]vmFailoverNeed, 0, maxToProcess)
@@ -523,8 +530,7 @@ func (c *FailoverReservationController) selectVMsToProcess(
523530
"totalVMsMissingFailover", len(vmsMissingFailover),
524531
"maxToProcess", maxToProcess,
525532
"offset", offset,
526-
"reconcileCount", c.reconcileCount,
527-
"rotationPeriod", rotationPeriod)
533+
"reconcileCount", c.reconcileCount)
528534

529535
return selected, true
530536
}
@@ -805,12 +811,6 @@ func (c *FailoverReservationController) Start(ctx context.Context) error {
805811
"flavorFailoverRequirements", c.Config.FlavorFailoverRequirements,
806812
"maxVMsToProcess", c.Config.MaxVMsToProcess)
807813

808-
// Run initial reconciliation
809-
if _, err := c.ReconcilePeriodic(ctx); err != nil {
810-
log.Error(err, "initial failover reconciliation failed")
811-
// Don't return error - continue with periodic reconciliation
812-
}
813-
814814
// Set up periodic reconciliation
815815
ticker := time.NewTicker(c.Config.ReconcileInterval)
816816
defer ticker.Stop()

internal/scheduling/reservations/failover/controller_test.go

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,3 +844,186 @@ func getAllocations(res *v1alpha1.Reservation) map[string]string {
844844
}
845845
return res.Status.FailoverReservation.Allocations
846846
}
847+
848+
// ============================================================================
849+
// Test: selectVMsToProcess
850+
// ============================================================================
851+
852+
func TestSelectVMsToProcess(t *testing.T) {
853+
// Create 10 VMs with different memory sizes (sorted by memory descending)
854+
createVMs := func(count int) []vmFailoverNeed {
855+
vms := make([]vmFailoverNeed, count)
856+
for i := range count {
857+
vms[i] = vmFailoverNeed{
858+
VM: VM{
859+
UUID: "vm-" + string(rune('a'+i)),
860+
CurrentHypervisor: "host" + string(rune('1'+i)),
861+
Resources: map[string]resource.Quantity{
862+
"memory": *resource.NewQuantity(int64((count-i)*1024*1024*1024), resource.BinarySI), // Descending memory
863+
},
864+
},
865+
Count: 1,
866+
}
867+
}
868+
return vms
869+
}
870+
871+
tests := []struct {
872+
name string
873+
reconcileCount int64
874+
vmCount int
875+
maxToProcess int
876+
expectedOffset int // Expected starting offset in the VM list
877+
expectedHit bool
878+
}{
879+
// 3 out of 4 runs should start at offset 0
880+
{
881+
name: "reconcile 1 - offset 0",
882+
reconcileCount: 1,
883+
vmCount: 10,
884+
maxToProcess: 3,
885+
expectedOffset: 0,
886+
expectedHit: true,
887+
},
888+
{
889+
name: "reconcile 2 - offset 0",
890+
reconcileCount: 2,
891+
vmCount: 10,
892+
maxToProcess: 3,
893+
expectedOffset: 0,
894+
expectedHit: true,
895+
},
896+
{
897+
name: "reconcile 3 - offset 0",
898+
reconcileCount: 3,
899+
vmCount: 10,
900+
maxToProcess: 3,
901+
expectedOffset: 0,
902+
expectedHit: true,
903+
},
904+
// Every 4th reconcile uses reconcileCount as offset (mod vmCount)
905+
{
906+
name: "reconcile 4 - offset 4",
907+
reconcileCount: 4,
908+
vmCount: 10,
909+
maxToProcess: 3,
910+
expectedOffset: 4,
911+
expectedHit: true,
912+
},
913+
{
914+
name: "reconcile 5 - offset 0",
915+
reconcileCount: 5,
916+
vmCount: 10,
917+
maxToProcess: 3,
918+
expectedOffset: 0,
919+
expectedHit: true,
920+
},
921+
{
922+
name: "reconcile 6 - offset 0",
923+
reconcileCount: 6,
924+
vmCount: 10,
925+
maxToProcess: 3,
926+
expectedOffset: 0,
927+
expectedHit: true,
928+
},
929+
{
930+
name: "reconcile 7 - offset 0",
931+
reconcileCount: 7,
932+
vmCount: 10,
933+
maxToProcess: 3,
934+
expectedOffset: 0,
935+
expectedHit: true,
936+
},
937+
{
938+
name: "reconcile 8 - offset 8",
939+
reconcileCount: 8,
940+
vmCount: 10,
941+
maxToProcess: 3,
942+
expectedOffset: 8,
943+
expectedHit: true,
944+
},
945+
// Test wrap-around when reconcileCount > vmCount
946+
{
947+
name: "reconcile 12 - offset 2 (12 mod 10)",
948+
reconcileCount: 12,
949+
vmCount: 10,
950+
maxToProcess: 3,
951+
expectedOffset: 2, // 12 % 10 = 2
952+
expectedHit: true,
953+
},
954+
{
955+
name: "reconcile 20 - offset 0 (20 mod 10)",
956+
reconcileCount: 20,
957+
vmCount: 10,
958+
maxToProcess: 3,
959+
expectedOffset: 0, // 20 % 10 = 0
960+
expectedHit: true,
961+
},
962+
// Edge cases
963+
{
964+
name: "maxToProcess 0 - no limit, returns all",
965+
reconcileCount: 4,
966+
vmCount: 10,
967+
maxToProcess: 0,
968+
expectedOffset: 0, // No limit means all VMs returned starting from 0
969+
expectedHit: false,
970+
},
971+
{
972+
name: "maxToProcess >= vmCount - no limit hit",
973+
reconcileCount: 4,
974+
vmCount: 5,
975+
maxToProcess: 10,
976+
expectedOffset: 0, // All VMs fit, no rotation needed
977+
expectedHit: false,
978+
},
979+
}
980+
981+
for _, tt := range tests {
982+
t.Run(tt.name, func(t *testing.T) {
983+
controller := &FailoverReservationController{
984+
reconcileCount: tt.reconcileCount,
985+
}
986+
987+
vms := createVMs(tt.vmCount)
988+
selected, hitLimit := controller.selectVMsToProcess(vms, tt.maxToProcess)
989+
990+
if hitLimit != tt.expectedHit {
991+
t.Errorf("expected hitLimit=%v, got %v", tt.expectedHit, hitLimit)
992+
}
993+
994+
if !tt.expectedHit {
995+
// When no limit is hit, all VMs should be returned
996+
if len(selected) != tt.vmCount {
997+
t.Errorf("expected all %d VMs when no limit hit, got %d", tt.vmCount, len(selected))
998+
}
999+
return
1000+
}
1001+
1002+
// Verify the first selected VM is at the expected offset
1003+
if len(selected) == 0 {
1004+
t.Error("expected at least one VM selected")
1005+
return
1006+
}
1007+
1008+
// The VMs are sorted by memory descending, so vm-a has most memory, vm-j has least
1009+
// After sorting, the order is: vm-a, vm-b, vm-c, ..., vm-j
1010+
// With offset, we should start at vms[offset]
1011+
expectedFirstVM := vms[tt.expectedOffset].VM.UUID
1012+
actualFirstVM := selected[0].VM.UUID
1013+
1014+
if actualFirstVM != expectedFirstVM {
1015+
t.Errorf("expected first VM to be %s (offset %d), got %s",
1016+
expectedFirstVM, tt.expectedOffset, actualFirstVM)
1017+
}
1018+
1019+
// Verify we got the expected number of VMs
1020+
expectedCount := tt.maxToProcess
1021+
if expectedCount > tt.vmCount {
1022+
expectedCount = tt.vmCount
1023+
}
1024+
if len(selected) != expectedCount {
1025+
t.Errorf("expected %d VMs selected, got %d", expectedCount, len(selected))
1026+
}
1027+
})
1028+
}
1029+
}

internal/scheduling/reservations/failover/reservation_scheduling.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,21 @@ func (c *FailoverReservationController) validateVmViaSchedulerEvacuation(
213213
return false, fmt.Errorf("failed to validate VM for reservation host: %w", err)
214214
}
215215

216+
// Handle empty response - no hosts returned
217+
if len(resp.Hosts) < 1 {
218+
return false, nil
219+
}
220+
221+
// Log unexpected scheduler responses
222+
if len(resp.Hosts) > 1 || resp.Hosts[0] != reservationHost {
223+
log.Error(nil, "scheduler returned unexpected hosts for single-host validation request",
224+
"vmUUID", vm.UUID,
225+
"reservationHost", reservationHost,
226+
"returnedHosts", resp.Hosts)
227+
}
228+
216229
// If the reservation host is returned, the VM can use it
217-
isValid := len(resp.Hosts) > 0 && resp.Hosts[0] == reservationHost
218-
return isValid, nil
230+
return resp.Hosts[0] == reservationHost, nil
219231
}
220232

221233
// scheduleAndBuildNewFailoverReservation schedules a failover reservation for a VM.

0 commit comments

Comments
 (0)