From 50a8cc1addb9752bdd1a04c88987aa2e262bce54 Mon Sep 17 00:00:00 2001 From: Markus Opolka <7090372+martialblog@users.noreply.github.com> Date: Thu, 12 Mar 2026 15:51:15 +0100 Subject: [PATCH 1/2] Add option to handle watchdog alerts --- README.md | 35 ++++++++++++++++++++++++++++ cmd/alert.go | 38 ++++++++++++++++++++++++++++--- cmd/alert_test.go | 9 ++++++++ testdata/alertmanager/alert.rules | 9 +++++++- 4 files changed, 87 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a07bbd7..efe3eb0 100644 --- a/README.md +++ b/README.md @@ -208,6 +208,41 @@ $ check_prometheus alert --name "HostHighCpuLoad" --name "PrometheusTargetMissin OK - Alerts inactive | total=2 firing=0 pending=0 inactive=2 ``` +#### Checking alerts via their labels + +The `--include-label` and `--exclude-label` options can be used to filter alerts: + +```bash +$ check_prometheus alert --include-label severity=warning +OK - 2 Alerts: 0 Firing - 0 Pending - 2 Inactive + \_[OK] [MysqlTooManyConnections] is inactive + \_[OK] [MysqlHighPreparedStatementsUtilization] is inactive +``` + +```bash +$ check_prometheus alert --include-label namespace=production --exclude-label severity=info +OK - 1 Alerts: 0 Firing - 0 Pending - 1 Inactive + \_[OK] [ApacheDown] is inactive +``` + +#### Checking watchdog alerts + +In Prometheus a "watchdog" or "dead man's switch" is an alert that is always firing to ensure alerting pipeline is working. The `-W, --watchdog` flag can be used to flip/negate the exit state of the plugin for these kind of alerts: + +```bash +$ check_prometheus alert --name Watchdog -W --no-alerts-state 2 +[OK] - 1 Alerts: 1 Firing - 0 Pending - 0 Inactive +\_ [OK] [Watchdog] is firing - value: 1.00 - {"alertname":"Watchdog","severity":"none"} +|total=1 firing=1 pending=0 inactive=0 +``` + +```bash +$ check_prometheus alert --name Watchdog -W --no-alerts-state 2 +[CRITICAL] - 0 Alerts: 0 Firing - 0 Pending - 0 Inactive +\_ [CRITICAL] No alerts retrieved +|total=0 firing=0 pending=0 inactive=0 +``` + ## License Copyright (c) 2022 [NETWAYS GmbH](mailto:info@netways.de) diff --git a/cmd/alert.go b/cmd/alert.go index 75dc87d..01406c8 100644 --- a/cmd/alert.go +++ b/cmd/alert.go @@ -22,6 +22,7 @@ type AlertConfig struct { ExcludeLabels []string IncludeLabels []string ProblemsOnly bool + FlipExitState bool StateLabelKey string NoAlertsState string } @@ -163,7 +164,13 @@ inactive = 0`, sc := result.NewPartialResult() - _ = sc.SetState(rl.GetStatus(cliAlertConfig.StateLabelKey)) + rlStatus := rl.GetStatus(cliAlertConfig.StateLabelKey) + // If the negate flag is set we negate this state + if cliAlertConfig.FlipExitState { + rlStatus = negateStatus(rlStatus) + } + + _ = sc.SetState(rlStatus) sc.Output = rl.GetOutput() overall.AddSubcheck(sc) } @@ -185,7 +192,13 @@ inactive = 0`, sc := result.NewPartialResult() - _ = sc.SetState(rl.GetStatus(cliAlertConfig.StateLabelKey)) + rlStatus := rl.GetStatus(cliAlertConfig.StateLabelKey) + // If the negate flag is set we negate this state + if cliAlertConfig.FlipExitState { + rlStatus = negateStatus(rlStatus) + } + + _ = sc.SetState(rlStatus) // Set the alert in the internal Type to generate the output rl.Alert = alert sc.Output = rl.GetOutput() @@ -257,9 +270,12 @@ func init() { fs.BoolVarP(&cliAlertConfig.ProblemsOnly, "problems", "P", false, "Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed") + fs.BoolVarP(&cliAlertConfig.FlipExitState, "watchdog", "W", false, + "Flip the exit state for firing alerts. When this flag is set firing alerts will be OK and inactive alerts will be CRITICAL. This is intended for handling watchdog alerts") + fs.StringVarP(&cliAlertConfig.StateLabelKey, "label-key-state", "S", "", "Use the given AlertRule label to override the exit state for firing alerts."+ - "\nIf this flag is set the plugin looks for warning/critical/ok in the provided label key") + "\nIf this flag is set the plugin looks for the strings 'warning/critical/ok' in the provided label key") } // Function to convert state to integer. @@ -314,3 +330,19 @@ func matchesLabel(labels model.LabelSet, labelsToMatch []string) bool { return false } + +// negateStatus turns an OK state into critical and a warning/critical state into OK +func negateStatus(state int) int { + switch state { + case check.OK: + return check.Critical + case check.Critical: + return check.OK + case check.Warning: + return check.OK + case check.Unknown: + return check.Unknown + default: + return check.Unknown + } +} diff --git a/cmd/alert_test.go b/cmd/alert_test.go index 7565d8a..b915c62 100644 --- a/cmd/alert_test.go +++ b/cmd/alert_test.go @@ -225,6 +225,15 @@ exit status 2 args: []string{"run", "../main.go", "alert", "--name", "InactiveAlert"}, expected: "[OK] - 1 Alerts: 0 Firing - 0 Pending - 1 Inactive\n\\_ [OK] [InactiveAlert] is inactive\n|total=1 firing=0 pending=0 inactive=1\n\n", }, + { + name: "alert-watchdog", + server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write(loadTestdata(alertTestDataSet2)) + })), + args: []string{"run", "../main.go", "alert", "--name", "InactiveAlert", "-W"}, + expected: "[CRITICAL] - 1 Alerts: 0 Firing - 0 Pending - 1 Inactive\n\\_ [CRITICAL] [InactiveAlert] is inactive\n|total=1 firing=0 pending=0 inactive=1\n\nexit status 2\n", + }, { name: "alert-recording-rule", server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/testdata/alertmanager/alert.rules b/testdata/alertmanager/alert.rules index d2f71d0..3509dc0 100644 --- a/testdata/alertmanager/alert.rules +++ b/testdata/alertmanager/alert.rules @@ -1,7 +1,14 @@ groups: - name: Test Alerts for check_plugin rules: - + - alert: Watchdog + annotations: + message: | + This is an alert meant to ensure that the entire alerting pipeline is functional. + This alert is always firing. + expr: vector(1) + labels: + severity: none - alert: PrometheusTargetMissing expr: up == 0 for: 0m From e7e52c215f4d3ef7d26716812a78d07b6f89d8a3 Mon Sep 17 00:00:00 2001 From: Markus Opolka <7090372+martialblog@users.noreply.github.com> Date: Mon, 16 Mar 2026 10:21:04 +0100 Subject: [PATCH 2/2] Bump golangci-lint to v2.9.0 --- .github/workflows/golangci-lint.yml | 2 +- .golangci.yml | 10 ++++++++++ cmd/alert.go | 1 + cmd/config.go | 1 + cmd/health.go | 2 ++ cmd/query.go | 5 +++-- cmd/root.go | 4 +++- internal/alert/alert.go | 3 +-- internal/client/client.go | 7 ++----- 9 files changed, 24 insertions(+), 11 deletions(-) diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 7d53aa0..0cd3824 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -17,4 +17,4 @@ jobs: - name: golangci-lint uses: golangci/golangci-lint-action@v9 with: - version: v2.1.6 + version: v2.9.0 diff --git a/.golangci.yml b/.golangci.yml index 9da100c..a537c95 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -3,7 +3,10 @@ run: tests: false linters: default: all + enable: + - wsl_v5 disable: + - wsl - cyclop - depguard - err113 @@ -25,6 +28,13 @@ linters: - varnamelen - wrapcheck - funlen + settings: + wsl_v5: + allow-first-in-block: true + allow-whole-block: true + branch-max-lines: 2 + disable: + - err exclusions: generated: lax presets: diff --git a/cmd/alert.go b/cmd/alert.go index 01406c8..6ee2f17 100644 --- a/cmd/alert.go +++ b/cmd/alert.go @@ -100,6 +100,7 @@ inactive = 0`, if cliAlertConfig.AlertName != nil { check.ExitRaw(check.Unknown, "No such alert defined", "|", pdlist.String()) } + check.ExitRaw(noAlertsState, "No alerts defined", "|", pdlist.String()) } diff --git a/cmd/config.go b/cmd/config.go index 00b687b..c865ce2 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -90,6 +90,7 @@ func (c *Config) NewClient() *client.Client { // Using a Bearer Token for authentication if c.Bearer != "" { var t = config.NewInlineSecret(c.Bearer) + rt = config.NewAuthorizationCredentialsRoundTripper("Bearer", t, rt) } diff --git a/cmd/health.go b/cmd/health.go index b5e3182..07f65bd 100644 --- a/cmd/health.go +++ b/cmd/health.go @@ -29,6 +29,7 @@ Ready: Checks the readiness of an endpoint, which returns OK if the Prometheus s // Creating an client and connecting to the API c := cliConfig.NewClient() + err := c.Connect() if err != nil { check.ExitError(err) @@ -61,6 +62,7 @@ Ready: Checks the readiness of an endpoint, which returns OK if the Prometheus s if err != nil { check.ExitError(err) } + partialResult := result.NewPartialResult() _ = partialResult.SetState(rc) diff --git a/cmd/query.go b/cmd/query.go index fba135e..93bc7bc 100644 --- a/cmd/query.go +++ b/cmd/query.go @@ -79,6 +79,7 @@ Note: Time range values e.G. 'go_memstats_alloc_bytes_total[0s]' only the latest } c := cliConfig.NewClient() + err = c.Connect() if err != nil { check.ExitError(err) @@ -93,6 +94,7 @@ Note: Time range values e.G. 'go_memstats_alloc_bytes_total[0s]' only the latest if strings.Contains(err.Error(), "unmarshalerDecoder: unexpected value type \"string\"") { err = errors.New("string value results are not supported") } + check.ExitError(err) } @@ -112,10 +114,8 @@ Note: Time range values e.G. 'go_memstats_alloc_bytes_total[0s]' only the latest case model.ValVector: // Instant vector - a set of time series containing a single sample for each time series, all sharing the same timestamp vectorVal := result.(model.Vector) - // Set initial capacity to reduce memory allocations for _, sample := range vectorVal { - numberValue := float64(sample.Value) partial := goresult.NewPartialResult() @@ -185,6 +185,7 @@ Note: Time range values e.G. 'go_memstats_alloc_bytes_total[0s]' only the latest appendum := fmt.Sprintf("HTTP Warnings: %v", strings.Join(warnings, ", ")) overall.Summary = overall.GetOutput() + appendum } + check.ExitRaw(overall.GetStatus(), overall.GetOutput()) }, } diff --git a/cmd/root.go b/cmd/root.go index 1b78f18..0a46ae5 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -24,7 +24,9 @@ func Execute(version string) { rootCmd.Version = version rootCmd.VersionTemplate() - if err := rootCmd.Execute(); err != nil { + err := rootCmd.Execute() + + if err != nil { check.ExitError(err) } } diff --git a/internal/alert/alert.go b/internal/alert/alert.go index 1d640c9..90473be 100644 --- a/internal/alert/alert.go +++ b/internal/alert/alert.go @@ -16,7 +16,7 @@ const ( alertnameLabelKey = "alertname" ) -// Internal representation of Prometheus Rules. +// Rule is the internal representation of a Prometheus Rules. // Alert attribute will be used when iterating over multiple AlertingRules. type Rule struct { AlertingRule v1.AlertingRule @@ -139,7 +139,6 @@ func (a *Rule) GetOutput() (output string) { // Add current value to output value, _ = strconv.ParseFloat(a.Alert.Value, 32) out.WriteString(fmt.Sprintf(" is %s - value: %.2f", a.AlertingRule.State, value)) - // Add labels to the output l, err := json.Marshal(a.Alert.Labels) diff --git a/internal/client/client.go b/internal/client/client.go index 4668885..cd334f4 100644 --- a/internal/client/client.go +++ b/internal/client/client.go @@ -3,6 +3,7 @@ package client import ( "context" "fmt" + "maps" "net/http" "net/url" "strings" @@ -31,7 +32,6 @@ func (c *Client) Connect() error { Address: c.URL, RoundTripper: c.RoundTripper, }) - if err != nil { return fmt.Errorf("error creating client: %w", err) } @@ -113,10 +113,7 @@ func cloneRequest(r *http.Request) *http.Request { r2 := new(http.Request) *r2 = *r // Deep copy of the Header. - r2.Header = make(http.Header) - for k, s := range r.Header { - r2.Header[k] = s - } + maps.Copy(r.Header, r2.Header) return r2 }