-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathalert.rules
More file actions
48 lines (45 loc) · 1.68 KB
/
alert.rules
File metadata and controls
48 lines (45 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
groups:
- name: Test Alerts for check_plugin
rules:
- alert: Watchdog
annotations:
message: |
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing.
expr: vector(1)
labels:
severity: none
- alert: PrometheusTargetMissing
expr: up == 0
for: 0m
labels:
severity: critical
icingaState: warning
annotations:
summary: Prometheus target missing (instance {{ $labels.instance }})
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PrometheusAlertmanagerJobMissing
expr: absent(up{job="alertmanager"})
for: 0m
labels:
severity: low
icingaState: warning
annotations:
summary: Prometheus AlertManager job missing (instance {{ $labels.instance }})
description: "A Prometheus AlertManager job has disappeared\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostOutOfMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: 2m
labels:
severity: warning
annotations:
summary: Host out of memory (instance {{ $labels.instance }})
description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostHighCpuLoad
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80
for: 0m
labels:
severity: extreme
annotations:
summary: Host high CPU load (instance {{ $labels.instance }})
description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"