Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions helm/bundles/cortex-nova/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,12 @@ cortex-scheduling-controllers:
# Used when maxVMsToProcess limits processing, allows faster catch-up and for the first reconcile
shortReconcileInterval: 1m
# Number of max VMs to process in one periodic reconciliation loop
maxVMsToProcess: 25
maxVMsToProcess: 50
# How often to rotate VM selection offset when maxVMsToProcess limits processing
# Every N reconcile cycles, the offset rotates to process different VMs
vmSelectionRotationInterval: 3
# Minimum successful reservations to use short interval
minSuccessForShortInterval: 1
minSuccessForShortInterval: 0
# Maximum failures allowed to still use short interval
maxFailuresForShortInterval: 99
# If true, uses hypervisor CRD as source of truth for VM location instead of postgres
Expand Down
10 changes: 9 additions & 1 deletion internal/scheduling/reservations/failover/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ func (c *FailoverReservationController) validateReservation(ctx context.Context,

// reconcileSummary holds statistics from the reconciliation cycle.
type reconcileSummary struct {
vmsMissingFailover int
vmsProcessed int
reservationsNeeded int
totalReused int
Expand Down Expand Up @@ -268,6 +269,7 @@ func (c *FailoverReservationController) ReconcilePeriodic(ctx context.Context) (
}
logger.V(1).Info("found VMs from source", "count", len(vms))

// todo: vms are vms from all AZs, we should consdier processing them by AZ (sequencial or in parallel) but not mixing them together
// List only failover reservations using label selector
var reservationList v1alpha1.ReservationList
if err := c.List(ctx, &reservationList, client.MatchingLabels{
Expand Down Expand Up @@ -313,6 +315,7 @@ func (c *FailoverReservationController) ReconcilePeriodic(ctx context.Context) (

// 6. Create and assign reservations for VMs that need them
assignSummary, hitMaxVMsLimit := c.reconcileCreateAndAssignReservations(ctx, vms, failoverReservations, allHypervisors)
summary.vmsMissingFailover = assignSummary.vmsMissingFailover
summary.vmsProcessed = assignSummary.vmsProcessed
summary.reservationsNeeded = assignSummary.reservationsNeeded
summary.totalReused = assignSummary.totalReused
Expand All @@ -332,6 +335,9 @@ func (c *FailoverReservationController) ReconcilePeriodic(ctx context.Context) (
"reconcileCount", c.reconcileCount,
"duration", duration.Round(time.Millisecond),
"requeueAfter", requeueAfter,
"totalVMs", len(vms),
"totalReservations", len(failoverReservations),
"vmsMissingFailover", summary.vmsMissingFailover,
"vmsProcessed", summary.vmsProcessed,
"reservationsNeeded", summary.reservationsNeeded,
"reused", summary.totalReused,
Expand Down Expand Up @@ -557,11 +563,12 @@ func (c *FailoverReservationController) reconcileCreateAndAssignReservations(
vmsMissingFailover := c.calculateVMsMissingFailover(ctx, vms, failoverReservations)
logger.V(1).Info("VMs missing failover reservations", "count", len(vmsMissingFailover))

totalVMsMissingFailover := len(vmsMissingFailover)
vmsMissingFailover, hitMaxVMsLimit := c.selectVMsToProcess(ctx, vmsMissingFailover, c.Config.MaxVMsToProcess)

logger.V(1).Info("found hypervisors and vm missing failover reservation",
"countHypervisors", len(allHypervisors),
"countVMsMissingFailover", len(vmsMissingFailover))
"countVMsMissingFailover", totalVMsMissingFailover)

totalReservationsNeeded := 0
for _, need := range vmsMissingFailover {
Expand Down Expand Up @@ -649,6 +656,7 @@ func (c *FailoverReservationController) reconcileCreateAndAssignReservations(
}

return reconcileSummary{
vmsMissingFailover: totalVMsMissingFailover,
vmsProcessed: len(vmsMissingFailover),
reservationsNeeded: totalReservationsNeeded,
totalReused: totalReused,
Expand Down
Loading
Loading