Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,10 +323,6 @@ func main() {
}
nova.NewAPI(filterWeigherController).Init(mux)

// Initialize commitments API for LIQUID interface
commitmentsAPI := commitments.NewAPI(multiclusterClient)
commitmentsAPI.Init(mux, metrics.Registry)

// Detector pipeline controller setup.
novaClient := nova.NewNovaClient()
novaClientConfig := conf.GetConfigOrDie[nova.NovaClientConfig]()
Expand All @@ -336,6 +332,12 @@ func main() {
setupLog.Error(err, "unable to initialize nova client")
os.Exit(1)
}

// Initialize commitments API for LIQUID interface (with Nova client for usage reporting)
commitmentsConfig := conf.GetConfigOrDie[commitments.Config]()
commitmentsAPI := commitments.NewAPIWithConfig(multiclusterClient, commitmentsConfig, novaClient)
commitmentsAPI.Init(mux, metrics.Registry)
Comment on lines +337 to +340
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't gate the commitments HTTP API behind nova-pipeline-controllers.

This is the only commitmentsAPI.Init() in cmd/main.go, so deployments that do not enable that controller will stop serving /v1/commitments/... entirely. It also makes the Nova pipeline controller path hard-require commitments.Config via GetConfigOrDie(), even though the usage path already tolerates a nil Nova client.

Suggested direction
+var commitmentsNovaClient commitments.UsageNovaClient
+
 if slices.Contains(mainConfig.EnabledControllers, "nova-pipeline-controllers") {
   ...
-  commitmentsConfig := conf.GetConfigOrDie[commitments.Config]()
-  commitmentsAPI := commitments.NewAPIWithConfig(multiclusterClient, commitmentsConfig, novaClient)
-  commitmentsAPI.Init(mux, metrics.Registry)
+  commitmentsNovaClient = novaClient
   ...
 }
+
+commitmentsConfig := conf.GetConfigOrDie[commitments.Config]()
+commitmentsAPI := commitments.NewAPIWithConfig(multiclusterClient, commitmentsConfig, commitmentsNovaClient)
+commitmentsAPI.Init(mux, metrics.Registry)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@cmd/main.go` around lines 336 - 339, The commitments HTTP API is currently
only initialized when the nova pipeline controller is enabled which removes
/v1/commitments when that controller is disabled and forces a required
commitments.Config via GetConfigOrDie; change this so
commitments.NewAPIWithConfig and commitmentsAPI.Init(mux, metrics.Registry) are
always called regardless of the nova-pipeline-controllers flag, obtain
commitments.Config in a non-fatal way (e.g., use a non-Die getter or nil/default
config instead of GetConfigOrDie) and pass nil for novaClient when the Nova
pipeline controller is not enabled so the API remains served while Nova
integration stays optional (adjust code around commitmentsConfig,
commitments.NewAPIWithConfig, and commitmentsAPI.Init).


deschedulingsController := &nova.DetectorPipelineController{
Monitor: detectorPipelineMonitor,
Breaker: &nova.DetectorCycleBreaker{NovaClient: novaClient},
Expand Down
4 changes: 4 additions & 0 deletions internal/scheduling/nova/deschedulings_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ func (m *mockExecutorNovaClient) GetServerMigrations(ctx context.Context, id str
return []migration{}, nil
}

func (m *mockExecutorNovaClient) ListProjectServers(ctx context.Context, projectID string) ([]ServerDetail, error) {
return []ServerDetail{}, nil
}

func TestExecutor_Reconcile(t *testing.T) {
scheme := runtime.NewScheme()
err := v1alpha1.AddToScheme(scheme)
Expand Down
4 changes: 4 additions & 0 deletions internal/scheduling/nova/detector_cycle_breaker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ func (m *mockDetectorCycleBreakerNovaClient) GetServerMigrations(ctx context.Con
return []migration{}, nil
}

func (m *mockDetectorCycleBreakerNovaClient) ListProjectServers(ctx context.Context, projectID string) ([]ServerDetail, error) {
return []ServerDetail{}, nil
}

func TestDetectorCycleBreaker_Filter(t *testing.T) {
tests := []struct {
name string
Expand Down
100 changes: 100 additions & 0 deletions internal/scheduling/nova/nova_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,21 @@ type migration struct {
DestCompute string `json:"dest_compute"`
}

// ServerDetail contains extended server information for usage reporting.
type ServerDetail struct {
ID string `json:"id"`
Name string `json:"name"`
Status string `json:"status"`
TenantID string `json:"tenant_id"`
Created string `json:"created"`
AvailabilityZone string `json:"OS-EXT-AZ:availability_zone"`
Hypervisor string `json:"OS-EXT-SRV-ATTR:hypervisor_hostname"`
FlavorName string // Populated from nested flavor.original_name
FlavorRAM uint64 // Populated from nested flavor.ram
FlavorVCPUs uint64 // Populated from nested flavor.vcpus
FlavorDisk uint64 // Populated from nested flavor.disk
}

type NovaClient interface {
// Initialize the Nova API with the Keystone authentication.
Init(ctx context.Context, client client.Client, conf NovaClientConfig) error
Expand All @@ -47,6 +62,8 @@ type NovaClient interface {
LiveMigrate(ctx context.Context, id string) error
// Get migrations for a server by ID.
GetServerMigrations(ctx context.Context, id string) ([]migration, error)
// List all servers for a project with detailed info.
ListProjectServers(ctx context.Context, projectID string) ([]ServerDetail, error)
}

type novaClient struct {
Expand Down Expand Up @@ -160,3 +177,86 @@ func (api *novaClient) GetServerMigrations(ctx context.Context, id string) ([]mi
slog.Info("fetched migrations for vm", "id", id, "count", len(migrations))
return migrations, nil
}

// ListProjectServers retrieves all servers for a project with detailed info.
func (api *novaClient) ListProjectServers(ctx context.Context, projectID string) ([]ServerDetail, error) {
// Build URL with pagination support
initialURL := api.sc.Endpoint + "servers/detail?all_tenants=true&tenant_id=" + projectID
var nextURL = &initialURL
var result []ServerDetail

for nextURL != nil {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, *nextURL, http.NoBody)
if err != nil {
return nil, err
}
req.Header.Set("X-Auth-Token", api.sc.Token())
req.Header.Set("X-OpenStack-Nova-API-Version", api.sc.Microversion)

resp, err := api.sc.HTTPClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
Comment on lines +196 to +200
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Resource leak: defer inside loop accumulates closers.

Using defer resp.Body.Close() inside a loop means all response bodies stay open until the function returns. For paginated responses with many pages, this can exhaust file descriptors.

🐛 Proposed fix: close body immediately after use
 		resp, err := api.sc.HTTPClient.Do(req)
 		if err != nil {
 			return nil, err
 		}
-		defer resp.Body.Close()
 
 		if resp.StatusCode != http.StatusOK {
+			resp.Body.Close()
 			return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
 		}
 
 		// ... decode logic ...
 
 		if err := json.NewDecoder(resp.Body).Decode(&list); err != nil {
+			resp.Body.Close()
 			return nil, err
 		}
+		resp.Body.Close()
 
 		// Convert to ServerDetail

Alternatively, extract the HTTP request logic into a helper function where defer would be scoped correctly.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/scheduling/nova/nova_client.go` around lines 196 - 200, The HTTP
response body is being deferred inside a loop (resp, req from
api.sc.HTTPClient.Do), causing many open bodies; change the code in the loop to
call resp.Body.Close() immediately after you're finished reading/parsing the
response (e.g., after io.ReadAll or json decoding) instead of deferring, or
refactor the request+read logic into a helper function (e.g., extract
DoRequest/parsePage helper) so that a defer resp.Body.Close() is scoped to that
helper and the body is closed before the next iteration; ensure resp is closed
on all error paths as well.


if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}

// Response structure with nested flavor
var list struct {
Servers []struct {
ID string `json:"id"`
Name string `json:"name"`
Status string `json:"status"`
TenantID string `json:"tenant_id"`
Created string `json:"created"`
AvailabilityZone string `json:"OS-EXT-AZ:availability_zone"`
Hypervisor string `json:"OS-EXT-SRV-ATTR:hypervisor_hostname"`
Flavor struct {
OriginalName string `json:"original_name"`
RAM uint64 `json:"ram"`
VCPUs uint64 `json:"vcpus"`
Disk uint64 `json:"disk"`
} `json:"flavor"`
} `json:"servers"`
Links []struct {
Rel string `json:"rel"`
Href string `json:"href"`
} `json:"servers_links"`
}

if err := json.NewDecoder(resp.Body).Decode(&list); err != nil {
return nil, err
}

// Convert to ServerDetail
for _, s := range list.Servers {
result = append(result, ServerDetail{
ID: s.ID,
Name: s.Name,
Status: s.Status,
TenantID: s.TenantID,
Created: s.Created,
AvailabilityZone: s.AvailabilityZone,
Hypervisor: s.Hypervisor,
FlavorName: s.Flavor.OriginalName,
FlavorRAM: s.Flavor.RAM,
FlavorVCPUs: s.Flavor.VCPUs,
FlavorDisk: s.Flavor.Disk,
})
}

// Check for next page
nextURL = nil
for _, link := range list.Links {
if link.Rel == "next" {
nextURL = &link.Href
break
}
}
}

slog.Info("fetched servers for project", "projectID", projectID, "count", len(result))
return result, nil
}
27 changes: 19 additions & 8 deletions internal/scheduling/reservations/commitments/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,41 @@
package commitments

import (
"context"
"net/http"
"sync"

"github.com/cobaltcore-dev/cortex/internal/scheduling/nova"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// UsageNovaClient is a minimal interface for the Nova client needed by the usage API.
// This allows for easy mocking in tests without implementing the full NovaClient interface.
type UsageNovaClient interface {
ListProjectServers(ctx context.Context, projectID string) ([]nova.ServerDetail, error)
}

// HTTPAPI implements Limes LIQUID commitment validation endpoints.
type HTTPAPI struct {
client client.Client
config Config
monitor ChangeCommitmentsAPIMonitor
client client.Client
config Config
novaClient UsageNovaClient
monitor ChangeCommitmentsAPIMonitor
// Mutex to serialize change-commitments requests
changeMutex sync.Mutex
}

func NewAPI(client client.Client) *HTTPAPI {
return NewAPIWithConfig(client, DefaultConfig())
return NewAPIWithConfig(client, DefaultConfig(), nil)
}
Comment on lines 34 to 36
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don’t publish report-usage from the nil-dependency constructor.

Line 35 still passes nil into NewAPIWithConfig(), but Line 56 always registers HandleReportUsage(). Since HandleReportUsage() constructs a UsageCalculator with api.novaClient in internal/scheduling/reservations/commitments/api_report_usage.go Lines 61-63, NewAPI() now creates an API instance that exposes an endpoint it cannot actually serve.

Minimal guard
 func (api *HTTPAPI) Init(mux *http.ServeMux, registry prometheus.Registerer) {
 	registry.MustRegister(&api.monitor)
 	registry.MustRegister(&api.usageMonitor)
 	registry.MustRegister(&api.capacityMonitor)
 	mux.HandleFunc("/v1/commitments/change-commitments", api.HandleChangeCommitments)
 	mux.HandleFunc("/v1/commitments/report-capacity", api.HandleReportCapacity)
 	mux.HandleFunc("/v1/commitments/info", api.HandleInfo)
-	mux.HandleFunc("/v1/commitments/projects/", api.HandleReportUsage) // matches /v1/commitments/projects/:project_id/report-usage
+	if api.novaClient != nil {
+		mux.HandleFunc("/v1/commitments/projects/", api.HandleReportUsage) // matches /v1/commitments/projects/:project_id/report-usage
+	}
 }

A stronger fix would be to make NewAPI() require a UsageNovaClient as well, so the API cannot be constructed in an unusable state.

Also applies to: 49-56

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/scheduling/reservations/commitments/api.go` around lines 34 - 36,
NewAPI currently calls NewAPIWithConfig(client, DefaultConfig(), nil) which lets
HTTPAPI register HandleReportUsage even when api.novaClient is nil; update
NewAPI/NewAPIWithConfig usage so the report-usage handler is only registered
when a non-nil UsageNovaClient is provided (check api.novaClient before
registering HandleReportUsage in the constructor) or change NewAPI to require
and accept a UsageNovaClient parameter so an API cannot be created without the
dependency; refer to NewAPI, NewAPIWithConfig, HandleReportUsage,
UsageCalculator and api.novaClient when making the guard or signature change.


func NewAPIWithConfig(client client.Client, config Config) *HTTPAPI {
func NewAPIWithConfig(client client.Client, config Config, novaClient UsageNovaClient) *HTTPAPI {
return &HTTPAPI{
client: client,
config: config,
monitor: NewChangeCommitmentsAPIMonitor(),
client: client,
config: config,
novaClient: novaClient,
monitor: NewChangeCommitmentsAPIMonitor(),
}
}

Expand All @@ -37,4 +47,5 @@ func (api *HTTPAPI) Init(mux *http.ServeMux, registry prometheus.Registerer) {
mux.HandleFunc("/v1/commitments/change-commitments", api.HandleChangeCommitments)
// mux.HandleFunc("/v1/report-capacity", api.HandleReportCapacity)
mux.HandleFunc("/v1/commitments/info", api.HandleInfo)
mux.HandleFunc("/v1/commitments/projects/", api.HandleReportUsage) // matches /v1/commitments/projects/:project_id/report-usage
}
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ func newCommitmentTestEnv(
// Use custom config if provided, otherwise use default
var api *HTTPAPI
if customConfig != nil {
api = NewAPIWithConfig(wrappedClient, *customConfig)
api = NewAPIWithConfig(wrappedClient, *customConfig, nil)
} else {
api = NewAPI(wrappedClient)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0

package commitments

import (
"encoding/json"
"fmt"
"net/http"
"strings"
"time"

"github.com/sapcc/go-api-declarations/liquid"
)

// HandleReportUsage implements POST /v1/commitments/projects/:project_id/report-usage from Limes LIQUID API.
// See: https://github.com/sapcc/go-api-declarations/blob/main/liquid/report_usage.go
// See: https://pkg.go.dev/github.com/sapcc/go-api-declarations/liquid
//
// This endpoint reports usage information for a specific project's committed resources,
// including per-AZ usage, physical usage, and detailed VM subresources.
func (api *HTTPAPI) HandleReportUsage(w http.ResponseWriter, r *http.Request) {
requestID := r.Header.Get("X-Request-ID")
if requestID == "" {
requestID = fmt.Sprintf("req-%d", time.Now().UnixNano())
}
log := baseLog.WithValues("requestID", requestID, "endpoint", "report-usage")

if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}

// Extract project UUID from URL path
// URL pattern: /v1/commitments/projects/:project_id/report-usage
projectID, err := extractProjectIDFromPath(r.URL.Path)
if err != nil {
log.Error(err, "failed to extract project ID from path")
http.Error(w, "Invalid URL path: "+err.Error(), http.StatusBadRequest)
return
}

// Parse request body
var req liquid.ServiceUsageRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
log.Error(err, "failed to decode request body")
http.Error(w, "Invalid request body: "+err.Error(), http.StatusBadRequest)
return
}

// Use UsageCalculator to build usage report
calculator := NewUsageCalculator(api.client, api.novaClient)
report, err := calculator.CalculateUsage(r.Context(), log, projectID, req.AllAZs)
if err != nil {
log.Error(err, "failed to calculate usage report", "projectID", projectID)
http.Error(w, "Failed to generate usage report: "+err.Error(), http.StatusInternalServerError)
return
}

w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
if err := json.NewEncoder(w).Encode(report); err != nil {
log.Error(err, "failed to encode usage report")
}
}

// extractProjectIDFromPath extracts the project UUID from the URL path.
// Expected path format: /v1/commitments/projects/:project_id/report-usage
func extractProjectIDFromPath(path string) (string, error) {
// Path: /v1/commitments/projects/<uuid>/report-usage
parts := strings.Split(strings.Trim(path, "/"), "/")
// Expected: ["v1", "commitments", "projects", "<uuid>", "report-usage"]
if len(parts) < 5 {
return "", fmt.Errorf("path too short: %s", path)
}
if parts[2] != "projects" || parts[4] != "report-usage" {
return "", fmt.Errorf("unexpected path format: %s", path)
}
projectID := parts[3]
Comment on lines +92 to +100
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Reject trailing path segments in the route parser.

len(parts) < 5 accepts /v1/commitments/projects/<id>/report-usage/anything as if it were the canonical endpoint. Since this handler is mounted on a prefix, make the path exact here instead of ignoring the extra suffix.

Proposed fix
-	if len(parts) < 5 {
-		return "", fmt.Errorf("path too short: %s", path)
-	}
-	if parts[2] != "projects" || parts[4] != "report-usage" {
+	if len(parts) != 5 || parts[0] != "v1" || parts[1] != "commitments" || parts[2] != "projects" || parts[4] != "report-usage" {
 		return "", fmt.Errorf("unexpected path format: %s", path)
 	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
parts := strings.Split(strings.Trim(path, "/"), "/")
// Expected: ["v1", "commitments", "projects", "<uuid>", "report-usage"]
if len(parts) < 5 {
return "", fmt.Errorf("path too short: %s", path)
}
if parts[2] != "projects" || parts[4] != "report-usage" {
return "", fmt.Errorf("unexpected path format: %s", path)
}
projectID := parts[3]
parts := strings.Split(strings.Trim(path, "/"), "/")
// Expected: ["v1", "commitments", "projects", "<uuid>", "report-usage"]
if len(parts) != 5 || parts[0] != "v1" || parts[1] != "commitments" || parts[2] != "projects" || parts[4] != "report-usage" {
return "", fmt.Errorf("unexpected path format: %s", path)
}
projectID := parts[3]
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/scheduling/reservations/commitments/api_report_usage.go` around
lines 71 - 79, The path parser currently allows extra trailing segments because
it only checks len(parts) < 5; change the validation to require an exact match
of parts length and values: require len(parts) == 5 and validate parts[0] ==
"v1", parts[1] == "commitments", parts[2] == "projects" and parts[4] ==
"report-usage" before extracting projectID (parts[3]). This makes the route
exact and will reject paths like
/v1/commitments/projects/<id>/report-usage/extra.

if projectID == "" {
return "", fmt.Errorf("empty project ID in path: %s", path)
}
return projectID, nil
}
Loading
Loading