1
0
Fork 0
mirror of https://github.com/mdlayher/homelab.git synced 2024-12-14 11:47:32 +00:00

nixos/servnerr-4: init

Signed-off-by: Matt Layher <mdlayher@gmail.com>
This commit is contained in:
Matt Layher 2023-01-06 13:03:46 -05:00
parent 680bfb6211
commit dfa475df5d
No known key found for this signature in database
9 changed files with 674 additions and 0 deletions

View file

@ -0,0 +1,5 @@
# servnerr-3
This machine is my jack of all trades home server, which serves roles related
to monitoring, hypervisor, and NAS duties. It's a custom built machine with
an AMD Ryzen 7 2700 processor.

View file

@ -0,0 +1,119 @@
{ lib, pkgs, ... }:
let
unstable = import <nixos-unstable-small> { };
vars = import ./lib/vars.nix;
in {
imports = [
# Hardware and base system configuration.
./hardware-configuration.nix
./lib/system.nix
./networking.nix
./storage.nix
# Service configuration.
./containers.nix
./prometheus.nix
# Unstable or out-of-tree modules.
# ./lib/modules/zedhook.nix
];
system.stateVersion = "22.11";
boot = {
# Use the systemd-boot EFI boot loader.
loader = {
systemd-boot.enable = true;
efi.canTouchEfiVariables = true;
};
# Enable ZFS.
supportedFilesystems = [ "zfs" ];
kernelParams = [
# Enable serial console.
"console=ttyS0,115200n8"
# 56GiB ZFS ARC.
"zfs.zfs_arc_max=58720256"
];
};
# Start getty over serial console.
systemd.services."serial-getty@ttyS0" = {
enable = true;
# Make sure agetty spawns at boot and always restarts whenever it
# exits due to user logout.
wantedBy = [ "multi-user.target" ];
serviceConfig = { Restart = "always"; };
};
# Scale down CPU frequency when load is low.
powerManagement.cpuFreqGovernor = "ondemand";
# Packages specific to this machine. The base package set is defined in
# lib/system.nix.
environment.systemPackages = with pkgs; [
flac
mkvtoolnix-cli
sqlite
zfs
zrepl
# Unstable and out-of-tree packages.
];
services = {
apcupsd = {
enable = true;
configText = ''
UPSCABLE usb
UPSTYPE usb
DEVICE
UPSCLASS standalone
UPSMODE disable
'';
};
# Deploy CoreRAD monitor mode on all interfaces.
corerad = {
enable = true;
# Enable as necessary to get development builds of CoreRAD.
# package = unstable.corerad;
settings = {
debug = {
address = ":9430";
prometheus = true;
pprof = true;
};
interfaces = [{
names = [ "mgmt0" ];
monitor = true;
}];
};
};
grafana = {
enable = true;
# Bind to all interfaces.
settings.server.http_addr = "";
};
# Enable the OpenSSH daemon.
openssh = {
enable = true;
passwordAuthentication = false;
};
};
virtualisation.libvirtd.enable = true;
# root SSH key for remote builds.
users.users.root.openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIP3+HUx05h15g95ID/lWbU5uvF6TLr2XESmthQjU7qvR NixOS distributed build"
];
}

View file

@ -0,0 +1,46 @@
{ pkgs, ... }:
{
# These services are proprietary and run containerized for confinement from
# the rest of the system and on unstable for faster update cycles.
containers = {
plex = {
autoStart = true;
bindMounts = {
# Mount Plex data directory as read-write.
"/var/lib/plex" = {
hostPath = "/var/lib/plex";
isReadOnly = false;
};
# Mount the ZFS pool as read-only.
#"/primary/media" = {
# hostPath = "/primary/media";
# isReadOnly = true;
#};
};
config = { ... }:
let
unstable =
import <nixos-unstable-small> { config.allowUnfree = true; };
in {
services.plex = {
enable = true;
package = unstable.plex;
};
};
};
};
virtualisation.oci-containers = {
backend = "podman";
containers = {
home-assistant = {
image = "ghcr.io/home-assistant/home-assistant:stable";
extraOptions = [ "--network=host" ];
ports = [ "8123:8123" ];
volumes =
[ "/etc/localtime:/etc/localtime:ro" "/var/lib/hass:/config" ];
};
};
};
}

View file

@ -0,0 +1,39 @@
# Do not modify this file! It was generated by nixos-generate-config
# and may be overwritten by future invocations. Please make changes
# to /etc/nixos/configuration.nix instead.
{ config, lib, pkgs, modulesPath, ... }:
{
imports =
[ (modulesPath + "/installer/scan/not-detected.nix")
];
boot.initrd.availableKernelModules = [ "nvme" "xhci_pci" "usbhid" "usb_storage" "sd_mod" ];
boot.initrd.kernelModules = [ ];
boot.kernelModules = [ "kvm-amd" ];
boot.extraModulePackages = [ ];
fileSystems."/" =
{ device = "/dev/disk/by-uuid/39344788-52df-410c-97ae-027887e10e2f";
fsType = "ext4";
};
fileSystems."/boot" =
{ device = "/dev/disk/by-uuid/726C-3D2A";
fsType = "vfat";
};
swapDevices = [{ device = "/dev/disk/by-uuid/6e3f67c8-74e9-42f0-8925-d444ada3696d"; }];
# Enables DHCP on each ethernet and wireless interface. In case of scripted networking
# (the default) this is the recommended approach. When using systemd-networkd it's
# still possible to use this option, but it's recommended to use it in conjunction
# with explicit per-interface declarations with `networking.interfaces.<interface>.useDHCP`.
networking.useDHCP = lib.mkDefault true;
# networking.interfaces.enp4s0.useDHCP = lib.mkDefault true;
# networking.interfaces.enp7s0.useDHCP = lib.mkDefault true;
powerManagement.cpuFreqGovernor = lib.mkDefault "ondemand";
nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux";
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
}

1
nixos/servnerr-4/lib Symbolic link
View file

@ -0,0 +1 @@
../lib/

View file

@ -0,0 +1,45 @@
{ lib, ... }:
{
networking = {
# Host name and ID.
hostName = "servnerr-4";
hostId = "ed66dcdd";
# Use systemd-networkd for configuration. Forcibly disable legacy DHCP client.
useNetworkd = true;
useDHCP = false;
# No local firewall.
firewall.enable = false;
};
systemd.network = {
enable = true;
# 1GbE management LAN.
links."10-mgmt0" = {
matchConfig.MACAddress = "04:d9:f5:7e:1c:47";
linkConfig.Name = "mgmt0";
};
networks."10-mgmt0" = {
matchConfig.Name = "mgmt0";
networkConfig.DHCP = "ipv4";
dhcpV4Config.ClientIdentifier = "mac";
# Only accept DNS search on this interface.
ipv6AcceptRAConfig.UseDomains = true;
};
# 10GbE LAN.
links."12-tengb0" = {
matchConfig.MACAddress = "90:e2:ba:23:1a:3a";
linkConfig.Name = "tengb0";
};
networks."12-tengb0" = {
# TODO(mdlayher): enable after setting up switch.
enable = false;
matchConfig.Name = "tengb0";
networkConfig.DHCP = "ipv4";
};
};
}

View file

@ -0,0 +1,9 @@
{
groups = [{
name = "default";
rules = [
# PCs which don't run 24/7 are excluded from alerts, and lab-* jobs are
# excluded due to their experimental nature.
];
}];
}

View file

@ -0,0 +1,177 @@
{ pkgs, lib, ... }:
let
secrets = import ./lib/secrets.nix;
# Scrape a target with the specified module, interval, and list of targets.
blackboxScrape = (module: blackboxScrapeJobName module module);
# Same as blackboxScrape, but allow customizing the job name.
blackboxScrapeJobName = (job: module: interval: targets: {
job_name = "blackbox_${job}";
scrape_interval = "${interval}";
metrics_path = "/probe";
params = { module = [ "${module}" ]; };
# blackbox_exporter location is hardcoded.
relabel_configs = relabelTarget "servnerr-4:9115";
static_configs = [{ inherit targets; }];
});
# Scrape a list of static targets for a job.
staticScrape = (job_name: targets: {
inherit job_name;
static_configs = [{ inherit targets; }];
});
# Produces a relabeling configuration that replaces the instance label with
# the HTTP target parameter.
relabelTarget = (target: [
{
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
target_label = "__address__";
replacement = "${target}";
}
]);
in {
# Prometheus monitoring server and exporter configuration.
services.prometheus = {
enable = true;
webExternalUrl = "https://prometheus.servnerr.com";
globalConfig.scrape_interval = "15s";
extraFlags = [ "--storage.tsdb.retention=1825d" "--web.enable-admin-api" ];
alertmanager = {
enable = true;
webExternalUrl = "https://alertmanager.servnerr.com";
configuration = {
route = {
group_by = [ "alertname" ];
group_wait = "10s";
group_interval = "10s";
repeat_interval = "1h";
receiver = "default";
};
receivers = [{
name = "default";
pushover_configs = secrets.alertmanager.pushover;
}];
};
};
# Use alertmanager running on monitoring machine.
alertmanagers =
[{ static_configs = [{ targets = [ "servnerr-4:9093" ]; }]; }];
exporters = {
# Node exporter already enabled on all machines.
apcupsd.enable = true;
blackbox = {
enable = true;
configFile = pkgs.writeText "blackbox.yml" (builtins.toJSON ({
modules = {
http_2xx.prober = "http";
http_401 = {
prober = "http";
http.valid_status_codes = [ 401 ];
};
ssh_banner = {
prober = "tcp";
tcp.query_response = [{ expect = "^SSH-2.0-"; }];
};
};
}));
};
keylight.enable = true;
# SNMP exporter with data file from release 0.19.0.
snmp = {
enable = true;
configurationPath = builtins.fetchurl {
url =
"https://raw.githubusercontent.com/prometheus/snmp_exporter/9dcbc02f59648b21fcf632de1b62a30df70f4649/snmp.yml";
sha256 = "04kh3n3q4nf6542w0cx36pdzfy3nr65hyc755j7q6xlsrpsqc21m";
};
};
};
# TODO: template out hostnames or consider DNSSD.
scrapeConfigs = [
# Simple, static scrape jobs.
(staticScrape "apcupsd" [ "nerr-4:9162" "servnerr-4:9162" ])
(staticScrape "consrv" [ "monitnerr-1:9288" ])
(staticScrape "coredns" [ "routnerr-2:9153" ])
(staticScrape "corerad" [ "routnerr-2:9430" "servnerr-4:9430" ])
(lib.mkMerge [
(staticScrape "keylight" [ "keylight" ])
{ relabel_configs = relabelTarget "servnerr-4:9288"; }
])
(staticScrape "node" [
"monitnerr-1:9100"
"nerr-4:9100"
"routnerr-2:9100"
"servnerr-4:9100"
])
(staticScrape "obs" [ "nerr-4:9407" ])
(staticScrape "windows" [ "theatnerr-2:9182" ])
(staticScrape "wireguard" [ "routnerr-2:9586" ])
(staticScrape "zrepl" [ "servnerr-4:9811" ])
# Home Assistant requires a more custom configuration.
{
job_name = "homeassistant";
metrics_path = "/api/prometheus";
bearer_token = "${secrets.prometheus.homeassistant_token}";
static_configs = [{ targets = [ "servnerr-4:8123" ]; }];
}
# Blackbox exporter and associated targets.
(staticScrape "blackbox" [ "servnerr-4:9115" ])
(blackboxScrape "http_2xx" "15s" [
"http://living-room-myq-hub.iot.ipv4"
"https://grafana.servnerr.com"
])
(blackboxScrape "http_401" "15s" [
"https://alertmanager.servnerr.com"
"https://plex.servnerr.com"
"https://prometheus.servnerr.com"
])
# The SSH banner check produces a fair amount of log spam, so only scrape
# it once a minute.
(blackboxScrape "ssh_banner" "1m" [
"nerr-4:22"
"routnerr-2:22"
"servnerr-4:22"
])
# SNMP relabeling configuration required to properly replace the instance
# names and query the correct devices.
(lib.mkMerge [
(staticScrape "snmp" [ "switch-livingroom01" "switch-office01" ])
{
metrics_path = "/snmp";
params = { module = [ "if_mib" ]; };
relabel_configs = relabelTarget "servnerr-4:9116";
}
])
# Lab-only jobs must be prefixed with lab- to avoid alerting.
(staticScrape "lab-corerad" [ "routnerr-2:9431" ])
];
rules = [ (builtins.toJSON (import ./prometheus-alerts.nix)) ];
};
}

View file

@ -0,0 +1,233 @@
{ lib, pkgs, ... }:
let
secrets = import ./lib/secrets.nix;
# Creates snapshots of zpool source using a zrepl snap job.
snap = (source: {
name = "snap_${source}";
type = "snap";
# Snapshot the entire pool every 15 minutes.
filesystems."${source}<" = true;
snapshotting = {
type = "periodic";
prefix = "zrepl_";
interval = "15m";
};
pruning.keep = keepSnaps;
});
# Advertises zpool source as a zrepl source job for target.
sourceLocal = (source:
(target: {
name = "source_${source}_${target}";
type = "source";
# Export everything, do not snapshot in this job.
filesystems."${source}<" = true;
snapshotting.type = "manual";
serve = {
type = "local";
listener_name = "source_${source}_${target}";
};
}));
# Templates out a zrepl pull job which replicates from zpool source into
# target.
_pullLocal = (source:
(target:
(root_fs: {
name = "pull_${source}_${target}";
type = "pull";
# Replicate all of the source zpool into target.
root_fs = root_fs;
interval = "15m";
connect = {
type = "local";
listener_name = "source_${source}_${target}";
# Assumes only a single client (localhost).
client_identity = "local";
};
recv = {
# Necessary for encrypted destination with unencrypted source.
placeholder.encryption = "inherit";
properties = {
# Inherit any encryption properties.
"inherit" = [ "encryption" "keyformat" "keylocation" ];
override = {
# Always enable compression.
compression = "on";
# Do not mount sink pools.
mountpoint = "none";
# Do not auto-snapshot sink pools.
"com.sun:auto-snapshot" = false;
"com.sun:auto-snapshot:frequent" = false;
"com.sun:auto-snapshot:hourly" = false;
"com.sun:auto-snapshot:daily" = false;
"com.sun:auto-snapshot:weekly" = false;
"com.sun:auto-snapshot:monthly" = false;
};
};
};
# Allow replication concurrency. This should generally speed up blocking
# zfs operations but may negatively impact file I/O. Tune as needed.
replication.concurrency.steps = 4;
pruning = {
keep_sender = [{
# The source job handles pruning.
type = "regex";
regex = ".*";
}];
# Keep the same automatic snapshots as source.
keep_receiver = keepSnaps;
};
})));
# Creates a zrepl pull job which replicates from zpool source into target
# directly.
pullLocal = (source: (target: (_pullLocal source target target)));
# Creates a zrepl pull job which replicates from zpool source into an
# encrypted top-level dataset in target.
pullLocalEncrypted =
(source: (target: (_pullLocal source target "${target}/encrypted")));
# Rules to keep zrepl snapshots.
keepSnaps = [
# Keep manual snapshots.
{
type = "regex";
regex = "^manual_.*";
}
# Keep time-based bucketed snapshots.
{
type = "grid";
# Keep:
# - every snapshot from the last hour
# - every hour from the last 24 hours
# - every day from the last 2 weeks
# - every week from the last 2 months
# - every month from the last 2 years
#
# TODO(mdlayher): verify retention after a couple weeks!
grid = "1x1h(keep=all) | 24x1h | 14x1d | 8x7d | 24x30d";
regex = "^zrepl_.*";
}
];
in {
# primary zpool mounts.
fileSystems = lib.genAttrs [
#"/primary"
#"/primary/archive"
#"/primary/media"
#"/primary/misc"
#"/primary/text"
#"/primary/vm"
] (device: {
# The device has the leading / removed.
device = builtins.substring 1 255 device;
fsType = "zfs";
});
# Don't mount secondary, just import it on boot.
#boot.zfs.extraPools = [ "secondary" ];
nixpkgs = {
# Only allow certain unfree packages.
config.allowUnfreePredicate = pkg:
builtins.elem (lib.getName pkg) [ "tarsnap" ];
# Overlays for unstable and out-of-tree packages.
overlays = [
(_self: super: {
# We want to use the latest zrepl.
zrepl =
super.callPackage <nixos-unstable-small/pkgs/tools/backup/zrepl> { };
})
];
};
services = {
# Enable tarsnap backups.
tarsnap = {
enable = false;
archives.archive = {
directories = [ "/primary/archive" ];
verbose = true;
};
};
# ZFS configuration.
#
# TODO(mdlayher): sharenfs integration?
zfs = {
# Scrub all pools regularly.
autoScrub.enable = true;
# ZED configuration.
zed = {
enableMail = false;
settings = with secrets.zfs; {
# Send event notifications via Pushbullet.
ZED_PUSHBULLET_ACCESS_TOKEN = pushbullet.access_token;
# Send event notifications via Pushover.
#
# TODO(mdlayher): it seems NixOS 21.11 ZFS does not support pushover
# yet; we'll use pushbullet for now and reevaluate later.
# ZED_PUSHOVER_TOKEN = pushover.token;
# ZED_PUSHOVER_USER = pushover.user_key;
# Verify integrity via scrub after resilver.
ZED_SCRUB_AFTER_RESILVER = true;
# More verbose reporting.
ZED_NOTIFY_VERBOSE = true;
ZED_DEBUG_LOG = "/var/log/zed.log";
};
};
};
# Replicate ZFS pools using zrepl.
zrepl = {
enable = false;
settings = {
global.monitoring = [{
type = "prometheus";
listen = ":9811";
}];
jobs = [
# Take snapshots of primary and advertise it as a source for each
# fan-out pull job. Notably a source per pull job is necessary to
# maintain incremental replication, see:
# https://zrepl.github.io/quickstart/fan_out_replication.html.
(snap "primary")
(sourceLocal "primary" "secondary")
(sourceLocal "primary" "backup0")
(sourceLocal "primary" "backup1")
# Pull primary into backup pools:
# - hot: pull into secondary
# - cold: pull into backup{0,1} (if available)
(pullLocal "primary" "secondary")
(pullLocalEncrypted "primary" "backup0")
(pullLocalEncrypted "primary" "backup1")
];
};
};
};
}