From 29efaf996443904ede2f8b941fc2f8daf600c9f6 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 09:18:38 +0200 Subject: [PATCH 01/10] Use custom user-agent to identify requests by the bridge --- internal/icinga2/icinga.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/icinga2/icinga.go b/internal/icinga2/icinga.go index a3d073e..7fb1f53 100644 --- a/internal/icinga2/icinga.go +++ b/internal/icinga2/icinga.go @@ -78,6 +78,7 @@ func (c *Client) Do(req *http.Request, path string) (*http.Response, error) { c.logger.Debug(fmt.Sprintf("Calling Icinga API at %s", req.URL), "component", "icinga") + req.Header.Set("User-Agent", "alertmanager-icinga-bridge") req.Header.Set("Accept", "application/json") req.Header.Set("Content-Type", "application/json") From bb6f28fb3e1d5279013137ec57d9ab57e88ac6cc Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 09:42:51 +0200 Subject: [PATCH 02/10] Fix heartbeat alerts being removed and extend README for them --- README.md | 2 ++ internal/gc/gc.go | 2 +- internal/gc/gc_test.go | 45 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1ba5812..72b53a9 100644 --- a/README.md +++ b/README.md @@ -282,6 +282,8 @@ This can be used to map alerts like a `DeadMansSwitch`. In Prometheus a "watchdo To treat an alert as a "heartbeat" the alert must have a label `heartbeat` with a [Golang duration](https://pkg.go.dev/time#ParseDuration) as value (e.g. `heartbeat: "1d"`). +To enable garbage collection on these alerts they can be set to downtime. A heartbeat service with an active downtime will be removed by the garbage collection. + The Alertmanager-Icinga-Bridge will create an Icinga service check with active checks enabled and with the check interval set to the parsed duration. We add 10% to the parsed duration to account for network latency etc., which could otherwise lead to flapping heartbeat checks. diff --git a/internal/gc/gc.go b/internal/gc/gc.go index 0c38c07..abb8652 100644 --- a/internal/gc/gc.go +++ b/internal/gc/gc.go @@ -165,7 +165,7 @@ func (g *GarbageCollector) heartbeat(ctx context.Context) { func (g *GarbageCollector) removeServiceIfRequired(ctx context.Context, service icinga2.Service) error { _, heartbeat := service.Vars["label_heartbeat"] - if heartbeat && service.HasDowntime() { + if heartbeat && !service.HasDowntime() { g.logger.Debug("Skipping heartbeat and not downtimed service", "component", "gc", "service", service.Name) return nil } diff --git a/internal/gc/gc_test.go b/internal/gc/gc_test.go index 7c091f6..ea18589 100644 --- a/internal/gc/gc_test.go +++ b/internal/gc/gc_test.go @@ -71,7 +71,7 @@ func TestGCRemoveService_WithRemoved(t *testing.T) { } } -func TestGCRemoveService_WithSkippedDowntime(t *testing.T) { +func TestGCRemoveService_WithHeartbeatNoDowntime(t *testing.T) { ts := testServerForDelete() defer ts.Close() @@ -89,10 +89,9 @@ func TestGCRemoveService_WithSkippedDowntime(t *testing.T) { Name: "svc", Vars: icinga2.Vars{ "keep_for": 20.0, - "label_heartbeat": "true", + "label_heartbeat": "300s", }, LastStateChange: 1770000000.0, - DowntimeDepth: 1, } actualErr := gc.removeServiceIfRequired(context.Background(), svc) @@ -105,7 +104,45 @@ func TestGCRemoveService_WithSkippedDowntime(t *testing.T) { expected := "Skipping heartbeat and not downtimed service" if !strings.Contains(actual, expected) { - t.Fatalf("expected %v, got %v", expected, actual) + t.Fatalf("expected:\n %v, got:\n %v", expected, actual) + } +} + +func TestGCRemoveService_WithHeartbeatDowntime(t *testing.T) { + ts := testServerForDelete() + defer ts.Close() + + var buf bytes.Buffer + logger := slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) + + config := testConfig(ts.URL) + + icingaClient := icinga2.NewClient(config, logger) + + gc := NewGarbageCollector(config, logger, icingaClient) + + svc := icinga2.Service{ + HostName: "unittest", + Name: "svc", + Vars: icinga2.Vars{ + "keep_for": 20.0, + "label_heartbeat": "300s", + }, + LastStateChange: 1770000000.0, + DowntimeDepth: 1, + } + + actualErr := gc.removeServiceIfRequired(context.Background(), svc) + + if actualErr != nil { + t.Errorf("expected no error got %v", actualErr) + } + + actual := buf.String() + expected := "Deleting service at Icinga API" + + if !strings.Contains(actual, expected) { + t.Fatalf("expected:\n %v, got:\n %v", expected, actual) } } From 7b034da2df0a17dd7d670b592684427b86baacd6 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 09:54:28 +0200 Subject: [PATCH 03/10] Enable -race in tests --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2a08e0f..dfa00fd 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ lint: vet: go vet $(go list ./... | grep -v /vendor/) test: - go test -v -cover ./... + go test -v -cover -race ./... coverage: go test -v -cover -coverprofile=coverage.out ./... &&\ go tool cover -html=coverage.out -o coverage.html From 5e41a170b555895b64af681cb5ca7fccb71f3e2c Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 09:57:08 +0200 Subject: [PATCH 04/10] Add package level godoc --- internal/api/listener.go | 1 + internal/config/config.go | 1 + internal/gc/gc.go | 1 + internal/icinga2/icinga.go | 1 + main.go | 3 ++- 5 files changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/api/listener.go b/internal/api/listener.go index 2d7b97b..8e19031 100644 --- a/internal/api/listener.go +++ b/internal/api/listener.go @@ -1,5 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause +// Package api provides the HTTP handler that exposes the bride's HTTP API package api import ( diff --git a/internal/config/config.go b/internal/config/config.go index a43106e..6fb92c7 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1,5 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause +// Package config provides the central configuration of the tool and the CLI options package config import ( diff --git a/internal/gc/gc.go b/internal/gc/gc.go index abb8652..c62fd63 100644 --- a/internal/gc/gc.go +++ b/internal/gc/gc.go @@ -1,5 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause +// Package gc provides the garbage collector that handles cleanup at the Icinga API package gc import ( diff --git a/internal/icinga2/icinga.go b/internal/icinga2/icinga.go index 7fb1f53..7f50b29 100644 --- a/internal/icinga2/icinga.go +++ b/internal/icinga2/icinga.go @@ -1,5 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 +// Package icinga2 provides the HTTP client to talk to the Icinga API package icinga2 import ( diff --git a/main.go b/main.go index 729bc7e..87bcdc3 100644 --- a/main.go +++ b/main.go @@ -1,5 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause +// Package main parses the CLI flags and starts the various components package main import ( @@ -42,7 +43,7 @@ func buildVersion() string { func main() { var cli config.CLI - // Create and parse CLI flags -> move to kong + // Create and parse CLI flags kong.Parse(&cli, kong.Name("alertmanager-icinga-bridge"), kong.Description(`The Alertmanager to Icinga bridge can receive alerts from the Prometheus Alertmanager's generic webhook receiver and creates Icinga Services for these alerts.`), From e9ef0eef85a7ce40edf79d42090158b61559bc49 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 10:26:02 +0200 Subject: [PATCH 05/10] Fix DisplayNameAsServiceName default value --- internal/config/cli.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/config/cli.go b/internal/config/cli.go index 5f2ff0a..d2bb927 100644 --- a/internal/config/cli.go +++ b/internal/config/cli.go @@ -18,7 +18,7 @@ type CLI struct { IcingaURL []string `kong:"required,env='ALERTMANAGER_ICINGA_BRIDGE_ICINGA_URL',help='Icinga API URL (can be repeated)'"` IcingaHostname string `kong:"required,env='ALERTMANAGER_ICINGA_BRIDGE_ICINGA_HOSTNAME',help='Icinga host name to manage services for'"` DisableKeepAlives bool `kong:"default=false,env='ALERTMANAGER_ICINGA_BRIDGE_DISABLE_KEEPALIVES',help='Disable HTTP keepalives'"` - DisplayNameAsServiceName bool `kong:"default='false',env='ALERTMANAGER_ICINGA_BRIDGE_DISPLAY_NAME_AS_SERVICE_NAME',help='Set the Icinga service display name to the generated service name'"` + DisplayNameAsServiceName bool `kong:"default=false,env='ALERTMANAGER_ICINGA_BRIDGE_DISPLAY_NAME_AS_SERVICE_NAME',help='Set the Icinga service display name to the generated service name'"` IcingaInsecureTLS bool `kong:"default=false,env='ALERTMANAGER_ICINGA_BRIDGE_ICINGA_INSECURE_TLS',help='Skip Icinga TLS verification'"` IcingaCAFile string `kong:"env='ALERTMANAGER_ICINGA_BRIDGE_ICINGA_CA',help='Path of a custom CA certificate to use when connecting to the Icinga API'"` IcingaPassword string `kong:"required,env='ALERTMANAGER_ICINGA_BRIDGE_ICINGA_PASSWORD',help='Icinga API password'"` From 04ac8cc05933c7bb04d4d1bda9ef4da8087dc0b1 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 10:34:48 +0200 Subject: [PATCH 06/10] Fix typo in main.go --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index 87bcdc3..fe54c8d 100644 --- a/main.go +++ b/main.go @@ -68,7 +68,7 @@ func main() { // Create Icinga Client icingaClient := icinga2.NewClient(cfg, logger) - logger.Info("Starting alertmanager-icinga-brigde", "version", version, "commit", commit, "date", date, "component", "main") + logger.Info("Starting alertmanager-icinga-bridge", "version", version, "commit", commit, "date", date, "component", "main") // Create and start the Service Garbage Collector garbagecol := gc.NewGarbageCollector(cfg, logger, icingaClient) From 074a1995b609fed49d69c9c443e305490b2d9f88 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 10:35:12 +0200 Subject: [PATCH 07/10] Remove unused PluginOutputStateSuffixes since it is now set in the listener --- internal/config/config.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 6fb92c7..5afe579 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -31,13 +31,12 @@ type Config struct { CustomSeverityLevels map[string]string MergedSeverityLevels map[string]int - PluginOutputByStates bool - BearerToken string - ListenAddr string - TLSCertPath string - TLSKeyPath string - PluginOutputAnnotations []string - PluginOutputStateSuffixes []string + PluginOutputByStates bool + BearerToken string + ListenAddr string + TLSCertPath string + TLSKeyPath string + PluginOutputAnnotations []string IcingaDisableKeepAlives bool IcingaHostname string From 46383bedf36a6406499333dbd97fe8c4c66b1e4f Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 10:48:45 +0200 Subject: [PATCH 08/10] Add MaxBytesReader to listener just to be safe --- internal/api/listener.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/api/listener.go b/internal/api/listener.go index 8e19031..3d81a70 100644 --- a/internal/api/listener.go +++ b/internal/api/listener.go @@ -33,6 +33,9 @@ var ( serviceNamePattern = regexp.MustCompile(`^[-+_.:,a-zA-Z0-9 %]{1,128}$`) ) +// Maximum number of bytes to accept as JSON. 100MB should be more than enough +const maxBytesToAccept = 100 << 20 + // Listener represents the daemon's API type Listener struct { mux http.Handler @@ -154,9 +157,12 @@ func (l *Listener) handleHealthy(w http.ResponseWriter, _ *http.Request) { func (l *Listener) handleIncomingAlert(w http.ResponseWriter, r *http.Request) { l.logger.Debug("Handling incoming alert", "component", "listener") + // We're only reading a maximum just to be safe. + body := http.MaxBytesReader(w, r.Body, maxBytesToAccept) + var payload WebhookPayload - errDecode := json.NewDecoder(r.Body).Decode(&payload) + errDecode := json.NewDecoder(body).Decode(&payload) if errDecode != nil { l.logger.Error("Received invalid JSON", "component", "listener", "error", errDecode.Error()) @@ -383,7 +389,7 @@ func (l *Listener) generatePluginOutput(alert Alert, exitCode int) string { // If the PluginOutputByStates option is enabled then first look for an annotation with the state suffix // otherwise fall back to just using the PluginOutputAnnotations value as is if l.config.PluginOutputByStates { - // Note, I don't like PluginOutputStateSuffixes being a slide and exitCode being the index + // Note, I don't like PluginOutputStateSuffixes being a slice and exitCode being the index if value, ok := alert.Annotations[fmt.Sprintf("%s_%s", v, pluginOutputStateSuffixes[exitCode])]; ok { return value } From 46a31d1440cc460d9ad791248320a581d67b9df6 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 10:49:24 +0200 Subject: [PATCH 09/10] Fix GC returning after single error --- internal/gc/gc.go | 3 +-- internal/icinga2/icinga.go | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/internal/gc/gc.go b/internal/gc/gc.go index c62fd63..2f87619 100644 --- a/internal/gc/gc.go +++ b/internal/gc/gc.go @@ -108,7 +108,6 @@ func (g *GarbageCollector) start(ctx context.Context) { if errSvcRemove != nil { g.logger.Error("Could not remove service from Icinga", "component", "gc", "service", svc.Name, "error", errSvcRemove.Error()) - return } } @@ -187,7 +186,7 @@ func (g *GarbageCollector) removeServiceIfRequired(ctx context.Context, service if errDel != nil { g.logger.Error("Could not remove service", "component", "gc", "service", svcName, "error", errDel.Error()) - return fmt.Errorf("could remove service: %w", errDel) + return fmt.Errorf("could not remove service: %w", errDel) } g.logger.Info("Successfully removed service from Icinga", "component", "gc", "service", svcName) diff --git a/internal/icinga2/icinga.go b/internal/icinga2/icinga.go index 7f50b29..3ca1fe4 100644 --- a/internal/icinga2/icinga.go +++ b/internal/icinga2/icinga.go @@ -22,7 +22,6 @@ import ( const ( icingaActionProcessCheckResultEndpoint = "/v1/actions/process-check-result/" icingaHostEndpoint = "/v1/objects/hosts/" - icingaHostgroupEndpoint = "/v1/objects/hostgroups/" icingaServiceEndpoint = "/v1/objects/services/" ) From d22dfef711343dd4856c86e20d6d9e2abeefc1ae Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Fri, 19 Jun 2026 10:56:45 +0200 Subject: [PATCH 10/10] Fix typos in README --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 72b53a9..319303f 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ The tool respects the environment variables HTTP_PROXY, HTTPS_PROXY and NO_PROXY The `/webhook` endpoint accepts alerts from the Alertmanager's [generic webhook receiver](https://prometheus.io/docs/alerting/latest/configuration/#webhook_config). -Alertmanager-Icinga-Bridge expects a the following to be part of an alert. +Alertmanager-Icinga-Bridge expects the following to be part of an alert. Alert fields: * `generatorURL`: Is mapped to the Icinga service `action_url` @@ -106,7 +106,7 @@ Alternatively, if you enable `--plugin-output-by-states` then the Alertmanager-I This allows you to configure multiple annotations with different values that are then used with the corresponding service state to set the plugin output. -If an annotation is not found for that specific service state then Alertmanager-Icinga-Bridge will fallback on using the annotation name as configured. +If an annotation is not found for that specific service state then Alertmanager-Icinga-Bridge will fall back on using the annotation name as configured. ### Example Alertmanager Configuration @@ -137,7 +137,7 @@ receivers: ## Integration with Icinga -You need to create an Icinga host which the Alertmanager-Icinga-Bridge can use to manage service's for. +You need to create an Icinga host which the Alertmanager-Icinga-Bridge can use to manage services for. Alertmanager-Icinga-Bridge expects that it has full control over this host. Therefore, you should create a host for each Alertmanager-Icinga-Bridge instance which you're running. @@ -243,9 +243,9 @@ object Service "heartbeat" { All alert labels and annotations will be mapped to custom variables. Keys of labels will be prefixed with `label_` and keys of annotations with `annotation_`. -If the key an annotation or label starts with `icinga_` it will also be added as custom variable without any prefix. +If the key of an annotation or label starts with `icinga_` it will also be added as custom variable without any prefix. -Since all labels and annotations are strings, a type information can be provided. +Since all labels and annotations are strings, type information can be provided. This is done by adding the type as part of the prefix (`icinga__`). Current supported types are `number` and `string`. @@ -265,13 +265,13 @@ In case there is a label and an annotation with the `icinga_` prefix, the ## Custom Host/Zone/Template -By default, the `--icinga-hostname` is used to create services and `--templates` for the service's template. This can be overridden by the following labels: +By default, the `--icinga-hostname` is used to create services and `--templates` for the service template. This can be overridden by the following labels: | Alert | Icinga | | ---------- | ----------- | | Label: `icinga_use_host: MyHost` | If present, use given host for the new service. The host must exist beforehand | | Label: `icinga_use_zone: MyZone` | If present, use given zone for the new service The zone must exist beforehand | -| Label: `icinga_use_template: MyTemplate` | If present, use given template for the new service The template must exist beforehand | +| Label: `icinga_use_template: MyTemplate` | If present, use given template for the new service. The template must exist beforehand | Note that this requires the Alertmanager-Icinga-Bridge user to have the necessary permissions on the host. @@ -280,9 +280,9 @@ Note that this requires the Alertmanager-Icinga-Bridge user to have the necessar Alertmanager-Icinga-Bridge supports creating "heartbeat services" in Icinga. This can be used to map alerts like a `DeadMansSwitch`. In Prometheus a "watchdog" or "dead man's switch" is an alert that is always firing to ensure alerting pipeline is working. -To treat an alert as a "heartbeat" the alert must have a label `heartbeat` with a [Golang duration](https://pkg.go.dev/time#ParseDuration) as value (e.g. `heartbeat: "1d"`). +To treat an alert as a "heartbeat", the alert must have a label `heartbeat` with a [Golang duration](https://pkg.go.dev/time#ParseDuration) as value (e.g. `heartbeat: "1d"`). -To enable garbage collection on these alerts they can be set to downtime. A heartbeat service with an active downtime will be removed by the garbage collection. +To enable garbage collection on these alerts, they can be set to "downtime" in Icinga. A heartbeat service with an active downtime will be removed by the garbage collection. The Alertmanager-Icinga-Bridge will create an Icinga service check with active checks enabled and with the check interval set to the parsed duration. We add 10% to the parsed duration to account for network latency etc., which could otherwise lead to flapping heartbeat checks.