1
0
Fork 0
mirror of https://github.com/mdlayher/homelab.git synced 2024-12-14 11:47:32 +00:00
mdlayher-homelab/nixos/servnerr-4/prometheus.nix

167 lines
4.9 KiB
Nix
Raw Permalink Normal View History

{ pkgs, lib, ... }:
let
secrets = import ./lib/secrets.nix;
# Scrape a target with the specified module, interval, and list of targets.
blackboxScrape = (module: blackboxScrapeJobName module module);
# Same as blackboxScrape, but allow customizing the job name.
blackboxScrapeJobName = (job: module: interval: targets: {
job_name = "blackbox_${job}";
scrape_interval = "${interval}";
metrics_path = "/probe";
params = { module = [ "${module}" ]; };
# blackbox_exporter location is hardcoded.
relabel_configs = relabelTarget "servnerr-4:9115";
static_configs = [{ inherit targets; }];
});
# Scrape a list of static targets for a job.
staticScrape = (job_name: targets: {
inherit job_name;
static_configs = [{ inherit targets; }];
});
# Produces a relabeling configuration that replaces the instance label with
# the HTTP target parameter.
relabelTarget = (target: [
{
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
target_label = "__address__";
replacement = "${target}";
}
]);
in {
# Prometheus monitoring server and exporter configuration.
services.prometheus = {
enable = true;
webExternalUrl = "https://prometheus.servnerr.com";
globalConfig.scrape_interval = "15s";
extraFlags = [ "--storage.tsdb.retention=1825d" "--web.enable-admin-api" ];
alertmanager = {
enable = true;
webExternalUrl = "https://alertmanager.servnerr.com";
configuration = {
route = {
group_by = [ "alertname" ];
group_wait = "10s";
group_interval = "10s";
repeat_interval = "1h";
receiver = "default";
};
receivers = [{
name = "default";
2024-11-08 15:42:35 +00:00
discord_configs =
[{ webhook_url = secrets.alertmanager.discord.webhook_url; }];
}];
};
};
# Use alertmanager running on monitoring machine.
alertmanagers =
[{ static_configs = [{ targets = [ "servnerr-4:9093" ]; }]; }];
exporters = {
# Node exporter already enabled on all machines.
apcupsd.enable = true;
blackbox = {
enable = true;
configFile = pkgs.writeText "blackbox.yml" (builtins.toJSON ({
modules = {
http_2xx.prober = "http";
http_401 = {
prober = "http";
http.valid_status_codes = [ 401 ];
};
ssh_banner = {
prober = "tcp";
tcp.query_response = [{ expect = "^SSH-2.0-"; }];
};
};
}));
};
2024-11-08 15:42:35 +00:00
# SNMP exporter with data file from release 0.26.0.
snmp = {
enable = true;
configurationPath = builtins.fetchurl {
url =
2024-11-08 15:42:35 +00:00
"https://raw.githubusercontent.com/prometheus/snmp_exporter/44f8732988e726bad3f13d5779f1da7705178642/snmp.yml";
};
};
};
# TODO: template out hostnames or consider DNSSD.
scrapeConfigs = [
# Simple, static scrape jobs.
(staticScrape "apcupsd" [ "nerr-4:9162" "servnerr-4:9162" ])
(staticScrape "consrv" [ "monitnerr-1:9288" ])
(staticScrape "coredns" [ "routnerr-3:9153" ])
2024-03-30 14:53:46 +00:00
(staticScrape "corerad" [ "routnerr-3:9430" ])
(staticScrape "node" [
"monitnerr-1:9100"
"nerr-4:9100"
"routnerr-3:9100"
"servnerr-4:9100"
])
2024-03-30 14:53:46 +00:00
(staticScrape "windows" [ "theatnerr-2.ipv4.lan.servnerr.com:9182" ])
(staticScrape "zrepl" [ "servnerr-4:9811" ])
# Home Assistant requires a more custom configuration.
{
job_name = "homeassistant";
metrics_path = "/api/prometheus";
bearer_token = "${secrets.prometheus.homeassistant_token}";
static_configs = [{ targets = [ "hass:8123" ]; }];
}
# Blackbox exporter and associated targets.
(staticScrape "blackbox" [ "servnerr-4:9115" ])
(blackboxScrape "http_2xx" "15s" [
"http://living-room-myq-hub.iot.ipv4"
"https://grafana.servnerr.com"
])
(blackboxScrape "http_401" "15s" [
"https://alertmanager.servnerr.com"
"https://plex.servnerr.com"
"https://prometheus.servnerr.com"
])
# The SSH banner check produces a fair amount of log spam, so only scrape
# it once a minute.
(blackboxScrape "ssh_banner" "1m" [
"nerr-4:22"
"routnerr-3:22"
"servnerr-4:22"
])
# SNMP relabeling configuration required to properly replace the instance
# names and query the correct devices.
(lib.mkMerge [
(staticScrape "snmp-cyberpower" [ "pdu01.ipv4" "ups01.ipv4" ])
{
metrics_path = "/snmp";
params = { module = [ "cyberpower" ]; };
relabel_configs = relabelTarget "servnerr-4:9116";
}
])
];
rules = [ (builtins.toJSON (import ./prometheus-alerts.nix)) ];
};
}