mirror of
https://github.com/mdlayher/homelab.git
synced 2024-12-14 11:47:32 +00:00
nixos/servnerr-3: tidy up Prometheus alerts
Signed-off-by: Matt Layher <mdlayher@gmail.com>
This commit is contained in:
parent
06267a2384
commit
c71716357e
2 changed files with 5 additions and 8 deletions
|
@ -6,14 +6,14 @@
|
||||||
# excluded due to their experimental nature.
|
# excluded due to their experimental nature.
|
||||||
{
|
{
|
||||||
alert = "InstanceDown";
|
alert = "InstanceDown";
|
||||||
expr = ''up{instance!~"(nerr-3.*|theatnerr-1.*)",job!~"lab-.*"} == 0'';
|
expr = ''up{instance!~"(nerr-.*|theatnerr-.*)",job!~"lab-.*"} == 0'';
|
||||||
for = "5m";
|
for = "5m";
|
||||||
annotations.summary =
|
annotations.summary =
|
||||||
"{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
|
"{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
alert = "ServiceDown";
|
alert = "ServiceDown";
|
||||||
expr = ''probe_success{instance!~"nerr-3.*",job!~"lab-.*"} == 0'';
|
expr = ''probe_success{instance!~"nerr-.*",job!~"lab-.*"} == 0'';
|
||||||
for = "5m";
|
for = "5m";
|
||||||
annotations.summary =
|
annotations.summary =
|
||||||
"{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
|
"{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
|
||||||
|
@ -105,14 +105,14 @@
|
||||||
{
|
{
|
||||||
alert = "CoreRADMonitorDefaultRouteWANExpiring";
|
alert = "CoreRADMonitorDefaultRouteWANExpiring";
|
||||||
expr = ''
|
expr = ''
|
||||||
corerad_monitor_default_route_expiration_timestamp_seconds{instance=~"routnerr-2.*",job="corerad"} - time() < 2*60*60'';
|
corerad_monitor_default_route_expiration_timestamp_seconds{instance=~"routnerr-.*",job="corerad"} - time() < 2*60*60'';
|
||||||
annotations.summary =
|
annotations.summary =
|
||||||
"CoreRAD ({{ $labels.instance }}) interface {{ $labels.interface }} will drop its default route to WAN {{ $labels.router }} in less than 2 hours.";
|
"CoreRAD ({{ $labels.instance }}) interface {{ $labels.interface }} will drop its default route to WAN {{ $labels.router }} in less than 2 hours.";
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
alert = "CoreRADMonitorDefaultRouteLANExpiring";
|
alert = "CoreRADMonitorDefaultRouteLANExpiring";
|
||||||
expr = ''
|
expr = ''
|
||||||
corerad_monitor_default_route_expiration_timestamp_seconds{instance!~"routnerr-2.*",job="corerad"} - time() < 1*60*10'';
|
corerad_monitor_default_route_expiration_timestamp_seconds{instance!~"routnerr-.*",job="corerad"} - time() < 1*60*10'';
|
||||||
annotations.summary =
|
annotations.summary =
|
||||||
"CoreRAD ({{ $labels.instance }}) interface {{ $labels.interface }} will drop its default route to LAN {{ $labels.router }} in less than 10 minutes.";
|
"CoreRAD ({{ $labels.instance }}) interface {{ $labels.interface }} will drop its default route to LAN {{ $labels.router }} in less than 10 minutes.";
|
||||||
}
|
}
|
||||||
|
|
|
@ -111,10 +111,7 @@ in {
|
||||||
# TODO: template out hostnames or consider DNSSD.
|
# TODO: template out hostnames or consider DNSSD.
|
||||||
scrapeConfigs = [
|
scrapeConfigs = [
|
||||||
# Simple, static scrape jobs.
|
# Simple, static scrape jobs.
|
||||||
(staticScrape "apcupsd" [
|
(staticScrape "apcupsd" [ "nerr-3:9162" "servnerr-3:9162" ])
|
||||||
"nerr-3:9162"
|
|
||||||
"servnerr-3:9162"
|
|
||||||
])
|
|
||||||
(staticScrape "consrv" [ "monitnerr-1:9288" ])
|
(staticScrape "consrv" [ "monitnerr-1:9288" ])
|
||||||
(staticScrape "coredns" [ "routnerr-2:9153" ])
|
(staticScrape "coredns" [ "routnerr-2:9153" ])
|
||||||
(staticScrape "corerad" [ "routnerr-2:9430" "servnerr-3:9430" ])
|
(staticScrape "corerad" [ "routnerr-2:9430" "servnerr-3:9430" ])
|
||||||
|
|
Loading…
Reference in a new issue