cobaltcore-dev · mblos · Mar 26, 2026 · Mar 26, 2026 · coderabbitai · Mar 26, 2026
@@ -308,6 +308,10 @@ func main() {
 	// API endpoint.
 	mux := http.NewServeMux()
 
+	// Shared mutex for serializing CR state changes between the syncer and change-commitments API.
+	// This ensures atomicity when applying Limes state snapshots.
+	crMutex := &commitments.CRMutex{}
+
 	// The pipeline monitor is a bucket for all metrics produced during the
 	// execution of individual steps (see step monitor below) and the overall
 	// pipeline.
@@ -343,7 +347,7 @@ func main() {
 
 		// Initialize commitments API for LIQUID interface (with Nova client for usage reporting)
 		commitmentsConfig := conf.GetConfigOrDie[commitments.Config]()
-		commitmentsAPI := commitments.NewAPIWithConfig(multiclusterClient, commitmentsConfig, novaClient)
+		commitmentsAPI := commitments.NewAPIWithConfig(multiclusterClient, commitmentsConfig, novaClient, crMutex)
 		commitmentsAPI.Init(mux, metrics.Registry, ctrl.Log.WithName("commitments-api"))
 
 		deschedulingsController := &nova.DetectorPipelineController{
@@ -671,7 +675,7 @@ func main() {
 		setupLog.Info("starting commitments syncer")
 		syncerMonitor := commitments.NewSyncerMonitor()
 		must.Succeed(metrics.Registry.Register(syncerMonitor))
-		syncer := commitments.NewSyncer(multiclusterClient, syncerMonitor)
+		syncer := commitments.NewSyncer(multiclusterClient, syncerMonitor, crMutex)
 		syncerConfig := conf.GetConfigOrDie[commitments.SyncerConfig]()
 		syncerDefaults := commitments.DefaultSyncerConfig()
 		if syncerConfig.SyncInterval == 0 {

@@ -21,6 +21,31 @@ type UsageNovaClient interface {
 	ListProjectServers(ctx context.Context, projectID string) ([]nova.ServerDetail, error)
 }
 
+// CRMutex serializes CR state changes between the syncer and change-commitments API.
+// This ensures that the syncer's Limes state snapshot is applied atomically without
+// interference from concurrent change-commitments API calls. The Lock and Unlock
+// methods are no-ops if the receiver is nil, allowing safe use when either component
+// is disabled.
+// TODO: If the syncer and API are moved to separate pods, replace with a K8s
+// distributed lock (e.g., Lease-based coordination).
+type CRMutex struct {
+	mu sync.Mutex
+}
+
+// Lock acquires the mutex. No-op if receiver is nil.
+func (m *CRMutex) Lock() {
+	if m != nil {
+		m.mu.Lock()
+	}
+}
+
+// Unlock releases the mutex. No-op if receiver is nil.
+func (m *CRMutex) Unlock() {
+	if m != nil {
+		m.mu.Unlock()
+	}
+}
+
 // HTTPAPI implements Limes LIQUID commitment validation endpoints.
 type HTTPAPI struct {
 	client          client.Client
@@ -30,15 +55,19 @@ type HTTPAPI struct {
 	usageMonitor    ReportUsageAPIMonitor
 	capacityMonitor ReportCapacityAPIMonitor
 	infoMonitor     InfoAPIMonitor
-	// Mutex to serialize change-commitments requests
-	changeMutex sync.Mutex
+	// Shared mutex to serialize CR state changes with the syncer
+	crMutex *CRMutex
 }
 
 func NewAPI(client client.Client) *HTTPAPI {
-	return NewAPIWithConfig(client, DefaultConfig(), nil)
+	return NewAPIWithConfig(client, DefaultConfig(), nil, nil)
 }
 
-func NewAPIWithConfig(client client.Client, config Config, novaClient UsageNovaClient) *HTTPAPI {
+func NewAPIWithConfig(client client.Client, config Config, novaClient UsageNovaClient, crMutex *CRMutex) *HTTPAPI {
+	// If no shared mutex provided, create a local one (for backwards compatibility in tests)
+	if crMutex == nil {
+		crMutex = &CRMutex{}
+	}
 	return &HTTPAPI{
 		client:          client,
 		config:          config,
@@ -47,6 +76,7 @@ func NewAPIWithConfig(client client.Client, config Config, novaClient UsageNovaC
 		usageMonitor:    NewReportUsageAPIMonitor(),
 		capacityMonitor: NewReportCapacityAPIMonitor(),
 		infoMonitor:     NewInfoAPIMonitor(),
+		crMutex:         crMutex,
 	}
 }
 

@@ -64,9 +64,9 @@ func (api *HTTPAPI) HandleChangeCommitments(w http.ResponseWriter, r *http.Reque
 		return
 	}
 
-	// Serialize all change-commitments requests
-	api.changeMutex.Lock()
-	defer api.changeMutex.Unlock()
+	// Serialize all change-commitments requests (shared with syncer)
+	api.crMutex.Lock()
+	defer api.crMutex.Unlock()
 
 	ctx := reservations.WithGlobalRequestID(context.Background(), "committed-resource-"+requestID)
 	logger := LoggerFromContext(ctx).WithValues("component", "api", "endpoint", "/commitments/v1/change-commitments")

@@ -999,7 +999,7 @@ func newCommitmentTestEnv(
 	// Use custom config if provided, otherwise use default
 	var api *HTTPAPI
 	if customConfig != nil {
-		api = NewAPIWithConfig(wrappedClient, *customConfig, nil)
+		api = NewAPIWithConfig(wrappedClient, *customConfig, nil, nil)
 	} else {
 		api = NewAPI(wrappedClient)
 	}

@@ -537,7 +537,7 @@ func newUsageTestEnv(
 	}
 
 	// Create API with mock Nova client
-	api := NewAPIWithConfig(k8sClient, DefaultConfig(), novaClient)
+	api := NewAPIWithConfig(k8sClient, DefaultConfig(), novaClient, nil)
 	mux := http.NewServeMux()
 	registry := prometheus.NewRegistry()
 	api.Init(mux, registry, log.Log)

@@ -43,13 +43,20 @@ type Syncer struct {
 	client.Client
 	// Monitor for metrics
 	monitor *SyncerMonitor
+	// Shared mutex to serialize CR state changes with the change-commitments API
+	crMutex *CRMutex
 }
 
-func NewSyncer(k8sClient client.Client, monitor *SyncerMonitor) *Syncer {
+func NewSyncer(k8sClient client.Client, monitor *SyncerMonitor, crMutex *CRMutex) *Syncer {
+	// If no shared mutex provided, create a local one (for backwards compatibility in tests)
+	if crMutex == nil {
+		crMutex = &CRMutex{}
+	}
 	return &Syncer{
 		CommitmentsClient: NewCommitmentsClient(),
 		Client:            k8sClient,
 		monitor:           monitor,
+		crMutex:           crMutex,
 	}
 }
 
@@ -183,8 +190,13 @@ func (s *Syncer) getCommitmentStates(ctx context.Context, log logr.Logger, flavo
 }
 
 // SyncReservations fetches commitments from Limes and synchronizes Reservation CRDs.
+// The mutex is held for the entire operation to ensure atomicity - the Limes state
+// snapshot must be applied without interference from concurrent change-commitments API calls.
 func (s *Syncer) SyncReservations(ctx context.Context) error {
-	// TODO handle concurrency with change API: consider creation time of reservations and status ready
+	// Acquire the shared CR mutex for the entire sync operation.
+	// This ensures the Limes state snapshot is applied atomically.
+	s.crMutex.Lock()
+	defer s.crMutex.Unlock()
 
 	// Create context with request ID for this sync execution
 	runID := fmt.Sprintf("sync-%d", time.Now().Unix())

@@ -264,6 +264,7 @@ func TestSyncer_SyncReservations_InstanceCommitments(t *testing.T) {
 	syncer := &Syncer{
 		CommitmentsClient: mockClient,
 		Client:            k8sClient,
+		crMutex:           &CRMutex{},
 	}
 
 	err := syncer.SyncReservations(context.Background())
@@ -400,6 +401,7 @@ func TestSyncer_SyncReservations_UpdateExisting(t *testing.T) {
 	syncer := &Syncer{
 		CommitmentsClient: mockClient,
 		Client:            k8sClient,
+		crMutex:           &CRMutex{},
 	}
 
 	err := syncer.SyncReservations(context.Background())
@@ -499,6 +501,7 @@ func TestSyncer_SyncReservations_UnitMismatch(t *testing.T) {
 		CommitmentsClient: mockClient,
 		Client:            k8sClient,
 		monitor:           monitor,
+		crMutex:           &CRMutex{},
 	}
 
 	err := syncer.SyncReservations(context.Background())
@@ -582,6 +585,7 @@ func TestSyncer_SyncReservations_UnitMatch(t *testing.T) {
 		CommitmentsClient: mockClient,
 		Client:            k8sClient,
 		monitor:           monitor,
+		crMutex:           &CRMutex{},
 	}
 
 	err := syncer.SyncReservations(context.Background())
@@ -666,6 +670,7 @@ func TestSyncer_SyncReservations_EmptyUUID(t *testing.T) {
 	syncer := &Syncer{
 		CommitmentsClient: mockClient,
 		Client:            k8sClient,
+		crMutex:           &CRMutex{},
 	}
 
 	err := syncer.SyncReservations(context.Background())