Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -4329,6 +4329,22 @@ query_rejection:
# external labels for alerting rules
[ruler_external_labels: <map of string (labelName) to string (labelValue)> | default = []]

# Per-tenant external URL for the ruler. If set, it overrides the global
# -ruler.external.url for this tenant's alert notifications.
[ruler_external_url: <string> | default = ""]

# Format for alert generator URLs. Supported values: prometheus (default),
# grafana-explore.
[ruler_alert_generator_url_format: <string> | default = ""]

# Grafana datasource UID for alert generator URLs when format is
# grafana-explore.
[ruler_grafana_datasource_uid: <string> | default = ""]

# Grafana organization ID for alert generator URLs when format is
# grafana-explore.
[ruler_grafana_org_id: <int> | default = 1]

# Enable to allow rules to be evaluated with data from a single zone, if other
# zones are not available.
[rules_partial_data: <boolean> | default = false]
Expand Down
29 changes: 24 additions & 5 deletions pkg/ruler/compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/util/strutil"
"github.com/weaveworks/common/httpgrpc"
"github.com/weaveworks/common/user"

Expand Down Expand Up @@ -164,6 +165,10 @@ type RulesLimits interface {
RulerQueryOffset(userID string) time.Duration
DisabledRuleGroups(userID string) validation.DisabledRuleGroups
RulerExternalLabels(userID string) labels.Labels
RulerExternalURL(userID string) string
RulerAlertGeneratorURLFormat(userID string) string
RulerGrafanaDatasourceUID(userID string) string
RulerGrafanaOrgID(userID string) int64
}

type QueryExecutor func(ctx context.Context, qs string, t time.Time) (promql.Vector, error)
Expand Down Expand Up @@ -369,11 +374,25 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi
Appendable: NewPusherAppendable(p, userID, overrides,
evalMetrics.TotalWritesVec.WithLabelValues(userID),
evalMetrics.FailedWritesVec.WithLabelValues(userID)),
Queryable: q,
QueryFunc: queryFunc,
Context: prometheusContext,
ExternalURL: cfg.ExternalURL.URL,
NotifyFunc: SendAlerts(notifier, cfg.ExternalURL.URL.String()),
Queryable: q,
QueryFunc: queryFunc,
Context: prometheusContext,
ExternalURL: cfg.ExternalURL.URL,
NotifyFunc: SendAlerts(notifier, func(expr string) string {
externalURL := cfg.ExternalURL.String()
if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" {
externalURL = tenantURL
}
if overrides.RulerAlertGeneratorURLFormat(userID) == "grafana-explore" {
datasourceUID := overrides.RulerGrafanaDatasourceUID(userID)
orgID := overrides.RulerGrafanaOrgID(userID)
if orgID == 0 {
orgID = 1
}
return grafanaExploreLink(externalURL, expr, datasourceUID, orgID)
}
return externalURL + strutil.TableLinkForExpression(expr)
}),
Logger: util_log.GoKitLogToSlog(log.With(logger, "user", userID)),
Registerer: reg,
OutageTolerance: cfg.OutageTolerance,
Expand Down
56 changes: 56 additions & 0 deletions pkg/ruler/external_url.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package ruler

import (
"sync"
)

// userExternalURL tracks per-user resolved external URLs and detects changes.
type userExternalURL struct {
global string
limits RulesLimits

mtx sync.Mutex
users map[string]string
}

func newUserExternalURL(global string, limits RulesLimits) *userExternalURL {
return &userExternalURL{
global: global,
limits: limits,

mtx: sync.Mutex{},
users: map[string]string{},
}
}

func (e *userExternalURL) update(userID string) (string, bool) {
tenantURL := e.limits.RulerExternalURL(userID)
resolved := e.global
if tenantURL != "" {
resolved = tenantURL
}

e.mtx.Lock()
defer e.mtx.Unlock()

if prev, ok := e.users[userID]; ok && prev == resolved {
return resolved, false
}

e.users[userID] = resolved
return resolved, true
}

func (e *userExternalURL) remove(user string) {
e.mtx.Lock()
defer e.mtx.Unlock()
delete(e.users, user)
}

func (e *userExternalURL) cleanup() {
e.mtx.Lock()
defer e.mtx.Unlock()
for user := range e.users {
delete(e.users, user)
}
}
67 changes: 67 additions & 0 deletions pkg/ruler/external_url_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package ruler

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestUserExternalURL(t *testing.T) {
limits := ruleLimits{}
e := newUserExternalURL("http://global:9090", &limits)

const userID = "test-user"

t.Run("global URL used when no per-tenant override", func(t *testing.T) {
e.remove(userID)
url, changed := e.update(userID)
require.True(t, changed)
require.Equal(t, "http://global:9090", url)
})

t.Run("no change on second update", func(t *testing.T) {
url, changed := e.update(userID)
require.False(t, changed)
require.Equal(t, "http://global:9090", url)
})

t.Run("per-tenant URL overrides global", func(t *testing.T) {
limits.mtx.Lock()
limits.externalURL = "http://tenant:3000"
limits.mtx.Unlock()

url, changed := e.update(userID)
require.True(t, changed)
require.Equal(t, "http://tenant:3000", url)
})

t.Run("no change when per-tenant URL is the same", func(t *testing.T) {
url, changed := e.update(userID)
require.False(t, changed)
require.Equal(t, "http://tenant:3000", url)
})

t.Run("revert to global when per-tenant override removed", func(t *testing.T) {
limits.mtx.Lock()
limits.externalURL = ""
limits.mtx.Unlock()

url, changed := e.update(userID)
require.True(t, changed)
require.Equal(t, "http://global:9090", url)
})

t.Run("remove and cleanup lifecycle", func(t *testing.T) {
e.remove(userID)
// After remove, next update should report changed
url, changed := e.update(userID)
require.True(t, changed)
require.Equal(t, "http://global:9090", url)

e.cleanup()
// After cleanup, next update should report changed
url, changed = e.update(userID)
require.True(t, changed)
require.Equal(t, "http://global:9090", url)
})
}
13 changes: 10 additions & 3 deletions pkg/ruler/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ type DefaultMultiTenantManager struct {
// Per-user externalLabels.
userExternalLabels *userExternalLabels

// Per-user externalURL.
userExternalURL *userExternalURL

// rules backup
rulesBackupManager *rulesBackupManager

Expand Down Expand Up @@ -101,6 +104,7 @@ func NewDefaultMultiTenantManager(cfg Config, limits RulesLimits, managerFactory
ruleEvalMetrics: evalMetrics,
notifiers: map[string]*rulerNotifier{},
userExternalLabels: newUserExternalLabels(cfg.ExternalLabels, limits),
userExternalURL: newUserExternalURL(cfg.ExternalURL.String(), limits),
notifiersDiscoveryMetrics: notifiersDiscoveryMetrics,
mapper: newMapper(cfg.RulePath, logger),
userManagers: map[string]RulesManager{},
Expand Down Expand Up @@ -166,6 +170,7 @@ func (r *DefaultMultiTenantManager) SyncRuleGroups(ctx context.Context, ruleGrou
r.removeNotifier(userID)
r.mapper.cleanupUser(userID)
r.userExternalLabels.remove(userID)
r.userExternalURL.remove(userID)
r.lastReloadSuccessful.DeleteLabelValues(userID)
r.lastReloadSuccessfulTimestamp.DeleteLabelValues(userID)
r.configUpdatesTotal.DeleteLabelValues(userID)
Expand Down Expand Up @@ -210,6 +215,7 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user
return
}
externalLabels, externalLabelsUpdated := r.userExternalLabels.update(user)
externalURL, externalURLUpdated := r.userExternalURL.update(user)

existing := true
manager := r.getRulesManager(user, ctx)
Expand All @@ -222,13 +228,13 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user
return
}

if !existing || rulesUpdated || externalLabelsUpdated {
if !existing || rulesUpdated || externalLabelsUpdated || externalURLUpdated {
level.Debug(r.logger).Log("msg", "updating rules", "user", user)
r.configUpdatesTotal.WithLabelValues(user).Inc()
if (rulesUpdated || externalLabelsUpdated) && existing {
if (rulesUpdated || externalLabelsUpdated || externalURLUpdated) && existing {
r.updateRuleCache(user, manager.RuleGroups())
}
err = manager.Update(r.cfg.EvaluationInterval, files, externalLabels, r.cfg.ExternalURL.String(), r.ruleGroupIterationFunc)
err = manager.Update(r.cfg.EvaluationInterval, files, externalLabels, externalURL, r.ruleGroupIterationFunc)
r.deleteRuleCache(user)
if err != nil {
r.lastReloadSuccessful.WithLabelValues(user).Set(0)
Expand Down Expand Up @@ -443,6 +449,7 @@ func (r *DefaultMultiTenantManager) Stop() {
// cleanup user rules directories
r.mapper.cleanup()
r.userExternalLabels.cleanup()
r.userExternalURL.cleanup()
}

func (m *DefaultMultiTenantManager) ValidateRuleGroup(g rulefmt.RuleGroup) []error {
Expand Down
34 changes: 31 additions & 3 deletions pkg/ruler/ruler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ruler

import (
"context"
"encoding/json"
"flag"
"fmt"
"hash/fnv"
Expand All @@ -26,7 +27,6 @@ import (
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/promql/parser"
promRules "github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/util/strutil"
"github.com/weaveworks/common/user"
"golang.org/x/sync/errgroup"

Expand Down Expand Up @@ -506,7 +506,7 @@ type sender interface {
// It filters any non-firing alerts from the input.
//
// Copied from Prometheus's main.go.
func SendAlerts(n sender, externalURL string) promRules.NotifyFunc {
func SendAlerts(n sender, generatorURLFn func(expr string) string) promRules.NotifyFunc {
return func(ctx context.Context, expr string, alerts ...*promRules.Alert) {
var res []*notifier.Alert

Expand All @@ -515,7 +515,7 @@ func SendAlerts(n sender, externalURL string) promRules.NotifyFunc {
StartsAt: alert.FiredAt,
Labels: alert.Labels,
Annotations: alert.Annotations,
GeneratorURL: externalURL + strutil.TableLinkForExpression(expr),
GeneratorURL: generatorURLFn(expr),
}
if !alert.ResolvedAt.IsZero() {
a.EndsAt = alert.ResolvedAt
Expand All @@ -531,6 +531,34 @@ func SendAlerts(n sender, externalURL string) promRules.NotifyFunc {
}
}

// grafanaExploreLink builds a Grafana Explore URL for the given expression.
func grafanaExploreLink(baseURL, expr, datasourceUID string, orgID int64) string {
panes := map[string]any{
"default": map[string]any{
"datasource": datasourceUID,
"queries": []map[string]any{
{
"refId": "A",
"expr": expr,
"datasource": map[string]string{"uid": datasourceUID, "type": "prometheus"},
"editorMode": "code",
},
},
"range": map[string]string{
"from": "now-1h",
"to": "now",
},
},
}
panesJSON, _ := json.Marshal(panes)

return fmt.Sprintf("%s/explore?schemaVersion=1&panes=%s&orgId=%d",
strings.TrimRight(baseURL, "/"),
url.QueryEscape(string(panesJSON)),
orgID,
)
}

func ruleGroupDisabled(ruleGroup *rulespb.RuleGroupDesc, disabledRuleGroupsForUser validation.DisabledRuleGroups) bool {
for _, disabledRuleGroupForUser := range disabledRuleGroupsForUser {
if ruleGroup.Namespace == disabledRuleGroupForUser.Namespace &&
Expand Down
Loading
Loading