@@ -42,13 +42,16 @@ import (
4242 "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis"
4343 "github.com/cobaltcore-dev/cortex/internal/scheduling/cinder"
4444 "github.com/cobaltcore-dev/cortex/internal/scheduling/explanation"
45+ "github.com/cobaltcore-dev/cortex/internal/scheduling/external"
4546 schedulinglib "github.com/cobaltcore-dev/cortex/internal/scheduling/lib"
4647 "github.com/cobaltcore-dev/cortex/internal/scheduling/machines"
4748 "github.com/cobaltcore-dev/cortex/internal/scheduling/manila"
4849 "github.com/cobaltcore-dev/cortex/internal/scheduling/nova"
4950 "github.com/cobaltcore-dev/cortex/internal/scheduling/pods"
51+ "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
5052 "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments"
5153 reservationscontroller "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/controller"
54+ "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/failover"
5255 "github.com/cobaltcore-dev/cortex/pkg/conf"
5356 "github.com/cobaltcore-dev/cortex/pkg/monitoring"
5457 "github.com/cobaltcore-dev/cortex/pkg/multicluster"
@@ -142,6 +145,12 @@ func main() {
142145
143146 ctrl .SetLogger (zap .New (zap .UseFlagOptions (& opts )))
144147
148+ // Log the main configuration
149+ setupLog .Info ("loaded main configuration" ,
150+ "enabledControllers" , mainConfig .EnabledControllers ,
151+ "enabledTasks" , mainConfig .EnabledTasks ,
152+ "leaderElectionID" , mainConfig .LeaderElectionID )
153+
145154 // if the enable-http2 flag is false (the default), http/2 should be disabled
146155 // due to its vulnerabilities. More specifically, disabling http/2 will
147156 // prevent from being vulnerable to the HTTP/2 Stream Cancellation and
@@ -350,6 +359,7 @@ func main() {
350359 }
351360 }
352361 if slices .Contains (mainConfig .EnabledControllers , "nova-deschedulings-executor" ) {
362+ setupLog .Info ("enabling controller" , "controller" , "nova-deschedulings-executor" )
353363 executorConfig := conf .GetConfigOrDie [nova.DeschedulingsExecutorConfig ]()
354364 novaClient := nova .NewNovaClient ()
355365 novaClientConfig := conf .GetConfigOrDie [nova.NovaClientConfig ]()
@@ -379,6 +389,7 @@ func main() {
379389 }
380390 }
381391 if slices .Contains (mainConfig .EnabledControllers , "manila-decisions-pipeline-controller" ) {
392+ setupLog .Info ("enabling controller" , "controller" , "manila-decisions-pipeline-controller" )
382393 controller := & manila.FilterWeigherPipelineController {
383394 Monitor : filterWeigherPipelineMonitor ,
384395 }
@@ -398,6 +409,7 @@ func main() {
398409 }
399410 }
400411 if slices .Contains (mainConfig .EnabledControllers , "cinder-decisions-pipeline-controller" ) {
412+ setupLog .Info ("enabling controller" , "controller" , "cinder-decisions-pipeline-controller" )
401413 controller := & cinder.FilterWeigherPipelineController {
402414 Monitor : filterWeigherPipelineMonitor ,
403415 }
@@ -417,6 +429,7 @@ func main() {
417429 }
418430 }
419431 if slices .Contains (mainConfig .EnabledControllers , "ironcore-decisions-pipeline-controller" ) {
432+ setupLog .Info ("enabling controller" , "controller" , "ironcore-decisions-pipeline-controller" )
420433 controller := & machines.FilterWeigherPipelineController {
421434 Monitor : filterWeigherPipelineMonitor ,
422435 }
@@ -435,6 +448,7 @@ func main() {
435448 }
436449 }
437450 if slices .Contains (mainConfig .EnabledControllers , "pods-decisions-pipeline-controller" ) {
451+ setupLog .Info ("enabling controller" , "controller" , "pods-decisions-pipeline-controller" )
438452 controller := & pods.FilterWeigherPipelineController {
439453 Monitor : filterWeigherPipelineMonitor ,
440454 }
@@ -453,6 +467,7 @@ func main() {
453467 }
454468 }
455469 if slices .Contains (mainConfig .EnabledControllers , "explanation-controller" ) {
470+ setupLog .Info ("enabling controller" , "controller" , "explanation-controller" )
456471 // Setup a controller which will reconcile the history and explanation for
457472 // decision resources.
458473 explanationControllerConfig := conf .GetConfigOrDie [explanation.ControllerConfig ]()
@@ -466,6 +481,7 @@ func main() {
466481 }
467482 }
468483 if slices .Contains (mainConfig .EnabledControllers , "reservations-controller" ) {
484+ setupLog .Info ("enabling controller" , "controller" , "reservations-controller" )
469485 monitor := reservationscontroller .NewControllerMonitor (multiclusterClient )
470486 metrics .Registry .MustRegister (& monitor )
471487 reservationsControllerConfig := conf .GetConfigOrDie [reservationscontroller.Config ]()
@@ -480,6 +496,7 @@ func main() {
480496 }
481497 }
482498 if slices .Contains (mainConfig .EnabledControllers , "datasource-controllers" ) {
499+ setupLog .Info ("enabling controller" , "controller" , "datasource-controllers" )
483500 monitor := datasources .NewMonitor ()
484501 metrics .Registry .MustRegister (& monitor )
485502 if err := (& openstack.OpenStackDatasourceReconciler {
@@ -502,6 +519,7 @@ func main() {
502519 }
503520 }
504521 if slices .Contains (mainConfig .EnabledControllers , "knowledge-controllers" ) {
522+ setupLog .Info ("enabling controller" , "controller" , "knowledge-controllers" )
505523 monitor := extractor .NewMonitor ()
506524 metrics .Registry .MustRegister (& monitor )
507525 if err := (& extractor.KnowledgeReconciler {
@@ -523,6 +541,7 @@ func main() {
523541 }
524542 }
525543 if slices .Contains (mainConfig .EnabledControllers , "kpis-controller" ) {
544+ setupLog .Info ("enabling controller" , "controller" , "kpis-controller" )
526545 kpisControllerConfig := conf .GetConfigOrDie [kpis.ControllerConfig ]()
527546 if err := (& kpis.Controller {
528547 Client : multiclusterClient ,
@@ -532,6 +551,93 @@ func main() {
532551 os .Exit (1 )
533552 }
534553 }
554+ if slices .Contains (mainConfig .EnabledControllers , "failover-reservations-controller" ) {
555+ setupLog .Info ("enabling controller" , "controller" , "failover-reservations-controller" )
556+ failoverConfig := conf .GetConfigOrDie [failover.FailoverConfig ]()
557+
558+ // Apply defaults for unset values
559+ defaults := failover .DefaultConfig ()
560+ if failoverConfig .DatasourceName == "" {
561+ failoverConfig .DatasourceName = defaults .DatasourceName
562+ }
563+ if failoverConfig .SchedulerURL == "" {
564+ failoverConfig .SchedulerURL = defaults .SchedulerURL
565+ }
566+ if failoverConfig .ReconcileInterval == 0 {
567+ failoverConfig .ReconcileInterval = defaults .ReconcileInterval
568+ }
569+ if failoverConfig .Creator == "" {
570+ failoverConfig .Creator = defaults .Creator
571+ }
572+ if failoverConfig .FlavorFailoverRequirements == nil {
573+ failoverConfig .FlavorFailoverRequirements = defaults .FlavorFailoverRequirements
574+ }
575+ if failoverConfig .RevalidationInterval == 0 {
576+ failoverConfig .RevalidationInterval = defaults .RevalidationInterval
577+ }
578+
579+ // DatasourceName is still required - check after applying defaults
580+ if failoverConfig .DatasourceName == "" {
581+ setupLog .Error (nil , "failover-reservations-controller requires datasourceName to be configured" )
582+ os .Exit (1 )
583+ }
584+
585+ // The scheduler client calls the nova external scheduler API to get placement decisions
586+ schedulerClient := reservations .NewSchedulerClient (failoverConfig .SchedulerURL )
587+
588+ // Defer the initialization of PostgresReader until the manager starts
589+ // because the cache is not ready during setup
590+ if err := mgr .Add (manager .RunnableFunc (func (ctx context.Context ) error {
591+ // Create PostgresReader from the configured Datasource CRD
592+ // This runs after the cache is started
593+ postgresReader , err := external .NewPostgresReader (ctx , multiclusterClient , failoverConfig .DatasourceName )
594+ if err != nil {
595+ setupLog .Error (err , "unable to create postgres reader for failover controller" ,
596+ "datasourceName" , failoverConfig .DatasourceName )
597+ return err
598+ }
599+
600+ // Create NovaReader and DBVMSource
601+ novaReader := external .NewNovaReader (postgresReader )
602+ vmSource := failover .NewDBVMSource (novaReader )
603+
604+ // Create the unified failover controller
605+ // It handles both:
606+ // 1. Watch-based per-reservation reconciliation (acknowledgment, validation)
607+ // 2. Periodic bulk VM processing (creating/assigning reservations)
608+ failoverController := failover .NewFailoverReservationController (
609+ multiclusterClient ,
610+ vmSource ,
611+ failoverConfig ,
612+ schedulerClient ,
613+ )
614+
615+ // Set up the watch-based reconciler for per-reservation reconciliation
616+ if err := failoverController .SetupWithManager (mgr , multiclusterClient ); err != nil {
617+ setupLog .Error (err , "unable to set up failover reservation controller" )
618+ return err
619+ }
620+
621+ setupLog .Info ("failover-reservations-controller starting" ,
622+ "datasourceName" , failoverConfig .DatasourceName ,
623+ "schedulerURL" , failoverConfig .SchedulerURL ,
624+ "reconcileInterval" , failoverConfig .ReconcileInterval ,
625+ "revalidationInterval" , failoverConfig .RevalidationInterval )
626+
627+ // Start the controller's periodic reconciliation loop
628+ return failoverController .Start (ctx )
629+ })); err != nil {
630+ setupLog .Error (err , "unable to add failover controller to manager" )
631+ os .Exit (1 )
632+ }
633+ setupLog .Info ("failover-reservations-controller registered" ,
634+ "datasourceName" , failoverConfig .DatasourceName ,
635+ "schedulerURL" , failoverConfig .SchedulerURL ,
636+ "reconcileInterval" , failoverConfig .ReconcileInterval ,
637+ "revalidationInterval" , failoverConfig .RevalidationInterval ,
638+ "trustHypervisorLocation" , failoverConfig .TrustHypervisorLocation ,
639+ "maxVMsToProcess" , failoverConfig .MaxVMsToProcess )
640+ }
535641
536642 // +kubebuilder:scaffold:builder
537643
0 commit comments