2020-06-12 22:07:16 +00:00
|
|
|
{ pkgs, lib, ... }:
|
2020-04-26 18:40:00 +00:00
|
|
|
|
2020-04-26 20:01:05 +00:00
|
|
|
let
|
2020-06-06 16:15:40 +00:00
|
|
|
secrets = import ./lib/secrets.nix;
|
|
|
|
|
2020-04-26 20:01:05 +00:00
|
|
|
# Scrape a target with the specified module, interval, and list of targets.
|
2020-04-28 18:12:30 +00:00
|
|
|
blackboxScrape = (module: blackboxScrapeJobName module module);
|
2020-04-26 20:01:05 +00:00
|
|
|
|
|
|
|
# Same as blackboxScrape, but allow customizing the job name.
|
|
|
|
blackboxScrapeJobName = (job: module: interval: targets: {
|
|
|
|
job_name = "blackbox_${job}";
|
|
|
|
scrape_interval = "${interval}";
|
|
|
|
metrics_path = "/probe";
|
|
|
|
params = { module = [ "${module}" ]; };
|
|
|
|
# blackbox_exporter location is hardcoded.
|
|
|
|
relabel_configs = relabelTarget "servnerr-3:9115";
|
2020-04-27 18:50:29 +00:00
|
|
|
static_configs = [{ inherit targets; }];
|
2020-04-26 20:01:05 +00:00
|
|
|
});
|
|
|
|
|
2020-06-12 22:07:16 +00:00
|
|
|
# Scrape a list of static targets for a job.
|
|
|
|
staticScrape = (job: targets: {
|
|
|
|
job_name = job;
|
|
|
|
static_configs = [{ inherit targets; }];
|
|
|
|
});
|
|
|
|
|
2020-04-26 20:01:05 +00:00
|
|
|
# Produces a relabeling configuration that replaces the instance label with
|
|
|
|
# the HTTP target parameter.
|
|
|
|
relabelTarget = (target: [
|
|
|
|
{
|
|
|
|
source_labels = [ "__address__" ];
|
|
|
|
target_label = "__param_target";
|
|
|
|
}
|
|
|
|
{
|
2020-05-21 15:15:10 +00:00
|
|
|
source_labels = [ "__param_target" ];
|
2020-04-26 20:01:05 +00:00
|
|
|
target_label = "instance";
|
|
|
|
}
|
|
|
|
{
|
|
|
|
target_label = "__address__";
|
|
|
|
replacement = "${target}";
|
|
|
|
}
|
|
|
|
]);
|
|
|
|
|
|
|
|
in {
|
2020-04-26 18:40:00 +00:00
|
|
|
# Prometheus monitoring server and exporter configuration.
|
|
|
|
services.prometheus = {
|
|
|
|
enable = true;
|
|
|
|
webExternalUrl = "https://prometheus.servnerr.com";
|
|
|
|
|
2020-05-28 20:49:03 +00:00
|
|
|
globalConfig.scrape_interval = "15s";
|
|
|
|
|
2020-06-13 14:13:14 +00:00
|
|
|
extraFlags = [ "--storage.tsdb.retention=365d" "--web.enable-admin-api" ];
|
2020-05-08 16:06:47 +00:00
|
|
|
|
2020-06-06 16:15:40 +00:00
|
|
|
alertmanager = {
|
|
|
|
enable = true;
|
|
|
|
webExternalUrl = "https://alertmanager.servnerr.com";
|
|
|
|
|
|
|
|
configuration = {
|
|
|
|
route = {
|
2020-06-12 21:54:29 +00:00
|
|
|
group_by = [ "alertname" ];
|
2020-06-06 16:15:40 +00:00
|
|
|
group_wait = "10s";
|
|
|
|
group_interval = "10s";
|
|
|
|
repeat_interval = "1h";
|
|
|
|
receiver = "default";
|
|
|
|
};
|
|
|
|
receivers = [{
|
|
|
|
name = "default";
|
|
|
|
pushover_configs = secrets.alertmanager.pushover;
|
|
|
|
}];
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2020-04-26 18:40:00 +00:00
|
|
|
# Use alertmanager running on monitoring machine.
|
|
|
|
alertmanagers =
|
2020-06-06 16:15:40 +00:00
|
|
|
[{ static_configs = [{ targets = [ "servnerr-3:9093" ]; }]; }];
|
2020-04-26 18:40:00 +00:00
|
|
|
|
|
|
|
exporters = {
|
2020-04-26 18:42:41 +00:00
|
|
|
# Node exporter already enabled on all machines.
|
2020-04-26 20:01:05 +00:00
|
|
|
|
2020-06-12 22:34:48 +00:00
|
|
|
apcupsd.enable = true;
|
|
|
|
|
2020-04-26 20:01:05 +00:00
|
|
|
blackbox = {
|
|
|
|
enable = true;
|
2020-06-12 21:54:29 +00:00
|
|
|
configFile = pkgs.writeText "blackbox.yml" (builtins.toJSON ({
|
|
|
|
modules = {
|
|
|
|
http_2xx.prober = "http";
|
|
|
|
http_401 = {
|
|
|
|
prober = "http";
|
|
|
|
http.valid_status_codes = [ 401 ];
|
|
|
|
};
|
|
|
|
ssh_banner = {
|
|
|
|
prober = "tcp";
|
|
|
|
tcp.query_response = [{ expect = "^SSH-2.0-"; }];
|
|
|
|
};
|
|
|
|
};
|
|
|
|
}));
|
2020-04-26 20:01:05 +00:00
|
|
|
};
|
|
|
|
|
2020-06-12 22:31:49 +00:00
|
|
|
keylight.enable = true;
|
|
|
|
|
2020-06-12 23:25:31 +00:00
|
|
|
# SNMP exporter with data file from release 0.18.0.
|
2020-04-26 18:40:00 +00:00
|
|
|
snmp = {
|
|
|
|
enable = true;
|
|
|
|
configurationPath = builtins.fetchurl {
|
|
|
|
url =
|
2020-06-12 23:25:31 +00:00
|
|
|
"https://raw.githubusercontent.com/prometheus/snmp_exporter/9a2ff257dd2e8cdb2a4c88b18df668e2008c2cd6/snmp.yml";
|
2020-06-13 14:13:14 +00:00
|
|
|
sha256 = "10pvs9b49p5xnh7q2dfm268mhx5q3d7xp6j8qaljipcrsls8ddfm";
|
2020-04-26 18:40:00 +00:00
|
|
|
};
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2020-04-26 20:01:05 +00:00
|
|
|
# TODO: template out hostnames or consider DNSSD.
|
2020-04-26 18:40:00 +00:00
|
|
|
scrapeConfigs = [
|
2020-06-12 22:07:16 +00:00
|
|
|
# Simple, static scrape jobs.
|
2020-06-12 23:15:44 +00:00
|
|
|
(staticScrape "apcupsd" [
|
|
|
|
"nerr-3:9162"
|
|
|
|
"routnerr-2:9162"
|
|
|
|
"servnerr-3:9162"
|
|
|
|
])
|
2020-07-05 18:56:11 +00:00
|
|
|
(staticScrape "consrv" [ "monitnerr-1:9288" "monitnerr-2:9288" ])
|
2020-06-12 22:07:16 +00:00
|
|
|
(staticScrape "coredns" [ "routnerr-2:9153" ])
|
2020-07-07 20:23:35 +00:00
|
|
|
(staticScrape "corerad" [ "routnerr-2:9430" "servnerr-3:9430" ])
|
2020-06-12 22:07:16 +00:00
|
|
|
(lib.mkMerge [
|
|
|
|
(staticScrape "keylight" [ "keylight" ])
|
|
|
|
{ relabel_configs = relabelTarget "servnerr-3:9288"; }
|
|
|
|
])
|
|
|
|
(staticScrape "node" [
|
2020-06-27 19:53:21 +00:00
|
|
|
"monitnerr-1:9100"
|
2020-07-02 17:53:36 +00:00
|
|
|
"monitnerr-2:9100"
|
2020-06-12 22:07:16 +00:00
|
|
|
"nerr-3:9100"
|
|
|
|
"routnerr-2:9100"
|
|
|
|
"servnerr-3:9100"
|
|
|
|
])
|
|
|
|
(staticScrape "obs" [ "nerr-3:9407" ])
|
2020-06-15 17:46:15 +00:00
|
|
|
(staticScrape "windows" [ "theatnerr-1:9182" ])
|
2020-06-12 22:07:16 +00:00
|
|
|
(staticScrape "wireguard" [ "routnerr-2:9586" ])
|
|
|
|
|
2020-04-26 20:01:05 +00:00
|
|
|
# Blackbox exporter and associated targets.
|
2020-06-12 22:07:16 +00:00
|
|
|
(staticScrape "blackbox" [ "servnerr-3:9115" ])
|
2020-04-26 20:01:05 +00:00
|
|
|
(blackboxScrape "http_2xx" "15s" [ "https://grafana.servnerr.com" ])
|
|
|
|
# Netlify can occasionally be flappy, so check it less often.
|
|
|
|
(blackboxScrapeJobName "http_2xx_mdlayhercom" "http_2xx" "1m"
|
|
|
|
[ "https://mdlayher.com" ])
|
|
|
|
(blackboxScrape "http_401" "15s" [
|
|
|
|
"https://alertmanager.servnerr.com"
|
|
|
|
"https://plex.servnerr.com"
|
|
|
|
"https://prometheus.servnerr.com"
|
|
|
|
])
|
|
|
|
# The SSH banner check produces a fair amount of log spam, so only scrape
|
|
|
|
# it once a minute.
|
|
|
|
(blackboxScrape "ssh_banner" "1m" [
|
|
|
|
"nerr-3:22"
|
|
|
|
"routnerr-2:22"
|
|
|
|
"servnerr-3:22"
|
|
|
|
])
|
2020-06-12 22:07:16 +00:00
|
|
|
|
2020-04-26 18:40:00 +00:00
|
|
|
# SNMP relabeling configuration required to properly replace the instance
|
|
|
|
# names and query the correct devices.
|
2020-06-12 22:07:16 +00:00
|
|
|
(lib.mkMerge [
|
|
|
|
(staticScrape "snmp" [
|
|
|
|
"switch-livingroom01"
|
|
|
|
"switch-office01"
|
|
|
|
"switch-office02.ipv4"
|
|
|
|
"ap-livingroom02.ipv4"
|
|
|
|
])
|
|
|
|
{
|
|
|
|
metrics_path = "/snmp";
|
|
|
|
params = { module = [ "if_mib" ]; };
|
|
|
|
relabel_configs = relabelTarget "servnerr-3:9116";
|
|
|
|
}
|
|
|
|
])
|
|
|
|
|
2020-06-03 15:37:40 +00:00
|
|
|
# Lab-only jobs must be prefixed with lab- to avoid alerting.
|
2020-06-12 22:07:16 +00:00
|
|
|
(staticScrape "lab-corerad" [ "routnerr-2:9431" ])
|
2020-04-26 18:40:00 +00:00
|
|
|
];
|
2020-05-19 22:35:41 +00:00
|
|
|
|
2020-06-12 23:15:44 +00:00
|
|
|
rules = [ (builtins.toJSON (import ./prometheus-alerts.nix)) ];
|
2020-04-26 18:40:00 +00:00
|
|
|
};
|
|
|
|
}
|