1
0
Fork 0
mirror of https://github.com/mdlayher/homelab.git synced 2024-12-14 11:47:32 +00:00

nixos/servnerr-3: tidy up Prometheus alerts

Signed-off-by: Matt Layher <mdlayher@gmail.com>
This commit is contained in:
Matt Layher 2022-01-31 20:24:02 -05:00
parent 06267a2384
commit c71716357e
2 changed files with 5 additions and 8 deletions

View file

@ -6,14 +6,14 @@
# excluded due to their experimental nature. # excluded due to their experimental nature.
{ {
alert = "InstanceDown"; alert = "InstanceDown";
expr = ''up{instance!~"(nerr-3.*|theatnerr-1.*)",job!~"lab-.*"} == 0''; expr = ''up{instance!~"(nerr-.*|theatnerr-.*)",job!~"lab-.*"} == 0'';
for = "5m"; for = "5m";
annotations.summary = annotations.summary =
"{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."; "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
} }
{ {
alert = "ServiceDown"; alert = "ServiceDown";
expr = ''probe_success{instance!~"nerr-3.*",job!~"lab-.*"} == 0''; expr = ''probe_success{instance!~"nerr-.*",job!~"lab-.*"} == 0'';
for = "5m"; for = "5m";
annotations.summary = annotations.summary =
"{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."; "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
@ -105,14 +105,14 @@
{ {
alert = "CoreRADMonitorDefaultRouteWANExpiring"; alert = "CoreRADMonitorDefaultRouteWANExpiring";
expr = '' expr = ''
corerad_monitor_default_route_expiration_timestamp_seconds{instance=~"routnerr-2.*",job="corerad"} - time() < 2*60*60''; corerad_monitor_default_route_expiration_timestamp_seconds{instance=~"routnerr-.*",job="corerad"} - time() < 2*60*60'';
annotations.summary = annotations.summary =
"CoreRAD ({{ $labels.instance }}) interface {{ $labels.interface }} will drop its default route to WAN {{ $labels.router }} in less than 2 hours."; "CoreRAD ({{ $labels.instance }}) interface {{ $labels.interface }} will drop its default route to WAN {{ $labels.router }} in less than 2 hours.";
} }
{ {
alert = "CoreRADMonitorDefaultRouteLANExpiring"; alert = "CoreRADMonitorDefaultRouteLANExpiring";
expr = '' expr = ''
corerad_monitor_default_route_expiration_timestamp_seconds{instance!~"routnerr-2.*",job="corerad"} - time() < 1*60*10''; corerad_monitor_default_route_expiration_timestamp_seconds{instance!~"routnerr-.*",job="corerad"} - time() < 1*60*10'';
annotations.summary = annotations.summary =
"CoreRAD ({{ $labels.instance }}) interface {{ $labels.interface }} will drop its default route to LAN {{ $labels.router }} in less than 10 minutes."; "CoreRAD ({{ $labels.instance }}) interface {{ $labels.interface }} will drop its default route to LAN {{ $labels.router }} in less than 10 minutes.";
} }

View file

@ -111,10 +111,7 @@ in {
# TODO: template out hostnames or consider DNSSD. # TODO: template out hostnames or consider DNSSD.
scrapeConfigs = [ scrapeConfigs = [
# Simple, static scrape jobs. # Simple, static scrape jobs.
(staticScrape "apcupsd" [ (staticScrape "apcupsd" [ "nerr-3:9162" "servnerr-3:9162" ])
"nerr-3:9162"
"servnerr-3:9162"
])
(staticScrape "consrv" [ "monitnerr-1:9288" ]) (staticScrape "consrv" [ "monitnerr-1:9288" ])
(staticScrape "coredns" [ "routnerr-2:9153" ]) (staticScrape "coredns" [ "routnerr-2:9153" ])
(staticScrape "corerad" [ "routnerr-2:9430" "servnerr-3:9430" ]) (staticScrape "corerad" [ "routnerr-2:9430" "servnerr-3:9430" ])