Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* [ENHANCEMENT] Compactor: Prevent partition compaction to compact any blocks marked for deletion. #7391
* [ENHANCEMENT] Distributor: Optimize memory allocations by reusing the existing capacity of these pooled slices in the Prometheus Remote Write 2.0 path. #7392
* [ENHANCEMENT] Upgrade gRPC from v1.71.2 to v1.79.3 to address CVE-2026-33186. #7460
* [ENHANCEMENT] Query Frontend: Add `query_too_expensive` reason to QFE and `reason` field to query stats. #7479
* [BUGFIX] Querier: Fix queryWithRetry and labelsWithRetry returning (nil, nil) on cancelled context by propagating ctx.Err(). #7370
* [BUGFIX] Metrics Helper: Fix non-deterministic bucket order in merged histograms by sorting buckets after map iteration, matching Prometheus client library behavior. #7380
* [BUGFIX] Distributor: Return HTTP 401 Unauthorized when tenant ID resolution fails in the Prometheus Remote Write 2.0 path. #7389
Expand Down
25 changes: 15 additions & 10 deletions pkg/frontend/transport/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ const (
reasonChunksLimitStoreGateway = "store_gateway_chunks_limit"
reasonBytesLimitStoreGateway = "store_gateway_bytes_limit"
reasonUnOptimizedRegexMatcher = `unoptimized_regex_matcher`
reasonQueryTooExpensive = "query_too_expensive"

limitTooManySamples = `query processing would load too many samples into memory`
limitTimeRangeExceeded = `the query time range exceeds the limit`
Expand All @@ -74,6 +75,7 @@ const (
limitChunkBytesFetched = `the query hit the aggregated chunks size limit`
limitDataBytesFetched = `the query hit the aggregated data size limit`
limitUnOptimizedRegexMatcher = `unoptimized regex matcher`
limitQueryTooExpensive = `query spent too long in evaluation`

// Store gateway limits.
limitSeriesStoreGateway = `exceeded series limit`
Expand Down Expand Up @@ -562,16 +564,6 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query
}
}

shouldLog := source == requestmeta.SourceAPI || (f.cfg.EnabledRulerQueryStatsLog && source == requestmeta.SourceRuler)
if shouldLog {
logMessage = append(logMessage, formatQueryString(queryString)...)
if error != nil {
level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
} else {
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
}
}

var reason string
if statusCode == http.StatusTooManyRequests {
reason = reasonTooManyRequests
Expand Down Expand Up @@ -602,6 +594,8 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query
reason = reasonBytesLimitStoreGateway
} else if strings.Contains(errMsg, limitUnOptimizedRegexMatcher) {
reason = reasonUnOptimizedRegexMatcher
} else if strings.Contains(errMsg, limitQueryTooExpensive) {
reason = reasonQueryTooExpensive
}
} else if statusCode == http.StatusServiceUnavailable && error != nil {
errMsg := error.Error()
Expand All @@ -610,9 +604,20 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query
}
}
if len(reason) > 0 {
logMessage = append(logMessage, "reason", reason)
f.rejectedQueries.WithLabelValues(reason, source, userID).Inc()
stats.LimitHit = reason
}

shouldLog := source == requestmeta.SourceAPI || (f.cfg.EnabledRulerQueryStatsLog && source == requestmeta.SourceRuler)
if shouldLog {
logMessage = append(logMessage, formatQueryString(queryString)...)
if error != nil {
level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
} else {
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
}
}
}

func (f *Handler) parseRequestQueryString(r *http.Request, bodyBuf bytes.Buffer) url.Values {
Expand Down
37 changes: 37 additions & 0 deletions pkg/frontend/transport/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,22 @@ func TestHandler_ServeHTTP(t *testing.T) {
},
expectedStatusCode: http.StatusServiceUnavailable,
},
{
name: "test handler with reasonQueryTooExpensive",
cfg: HandlerConfig{QueryStatsEnabled: true},
expectedMetrics: 6,
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
return &http.Response{
StatusCode: http.StatusUnprocessableEntity,
Body: io.NopCloser(strings.NewReader("query timed out: query spent too long in evaluation - consider simplifying your query")),
}, nil
}),
additionalMetricsCheckFunc: func(h *Handler) {
v := promtest.ToFloat64(h.rejectedQueries.WithLabelValues(reasonQueryTooExpensive, requestmeta.SourceAPI, userID))
assert.Equal(t, float64(1), v)
},
expectedStatusCode: http.StatusUnprocessableEntity,
},
{
name: "test cortex_slow_queries_total",
cfg: HandlerConfig{QueryStatsEnabled: true, LogQueriesLongerThan: time.Second * 2},
Expand Down Expand Up @@ -584,6 +600,27 @@ func TestReportQueryStatsFormat(t *testing.T) {
}
}

func TestReportQueryStatsRejectionReason(t *testing.T) {
outputBuf := bytes.NewBuffer(nil)
logger := log.NewSyncLogger(log.NewLogfmtLogger(outputBuf))
userID := "fake"
req, _ := http.NewRequest(http.MethodGet, "http://localhost:8080/prometheus/api/v1/query", nil)
resp := &http.Response{ContentLength: 0}
responseTime := time.Second

handler := NewHandler(HandlerConfig{QueryStatsEnabled: true}, tenantfederation.Config{}, http.DefaultTransport, logger, nil)
req = req.WithContext(requestmeta.ContextWithRequestSource(context.Background(), requestmeta.SourceAPI))

queryErr := httpgrpc.Errorf(http.StatusUnprocessableEntity, "%s", `query timed out: query spent too long in evaluation - consider simplifying your query`)
handler.reportQueryStats(req, requestmeta.SourceAPI, userID, nil, responseTime, &querier_stats.QueryStats{}, queryErr, http.StatusUnprocessableEntity, resp)

data, err := io.ReadAll(outputBuf)
require.NoError(t, err)
logLine := string(data)
assert.Contains(t, logLine, "reason=query_too_expensive")
assert.Contains(t, logLine, "status_code=422")
}

func Test_ExtractTenantIDs(t *testing.T) {
roundTripper := roundTripperFunc(func(req *http.Request) (*http.Response, error) {
return &http.Response{
Expand Down
Loading