mirror of
https://github.com/mdlayher/homelab.git
synced 2024-12-14 11:47:32 +00:00
ansible/prometheus: remove
Signed-off-by: Matt Layher <mdlayher@gmail.com>
This commit is contained in:
parent
b4dc963d4f
commit
76bef328cb
6 changed files with 0 additions and 388 deletions
|
@ -1,153 +0,0 @@
|
|||
---
|
||||
- name: check if installing a different version of prometheus
|
||||
tags:
|
||||
- prometheus
|
||||
shell: "cat {{ prometheus_config }}/.sha256"
|
||||
ignore_errors: true
|
||||
register: checksum
|
||||
|
||||
- name: determine if prometheus {{ prometheus_version}} should be installed
|
||||
tags:
|
||||
- prometheus
|
||||
set_fact:
|
||||
prometheus_install: (checksum.stdout.find(prometheus_sha256) == -1) or (prometheus_force is defined)
|
||||
|
||||
- name: download and checksum prometheus {{ prometheus_version }} tarball
|
||||
tags:
|
||||
- prometheus
|
||||
when: prometheus_install
|
||||
get_url:
|
||||
url: "{{ prometheus_url }}"
|
||||
dest: "{{ prometheus_tgz }}"
|
||||
checksum: "sha256:{{ prometheus_sha256 }}"
|
||||
|
||||
- name: unpack prometheus tarball
|
||||
tags:
|
||||
- prometheus
|
||||
when: prometheus_install
|
||||
unarchive:
|
||||
src: "{{ prometheus_tgz }}"
|
||||
dest: "/tmp/"
|
||||
copy: false
|
||||
|
||||
- name: create prometheus user
|
||||
tags:
|
||||
- prometheus
|
||||
when: prometheus_install
|
||||
user:
|
||||
name: prometheus
|
||||
shell: "/bin/false"
|
||||
comment: "prometheus service user"
|
||||
|
||||
- name: check if prometheus service exists
|
||||
tags:
|
||||
- prometheus
|
||||
stat:
|
||||
path: "/etc/systemd/system/prometheus.service"
|
||||
register: svccheck
|
||||
|
||||
- name: stop prometheus service if it exists
|
||||
tags:
|
||||
- prometheus
|
||||
when: svccheck.stat.exists
|
||||
service:
|
||||
name: prometheus
|
||||
state: stopped
|
||||
|
||||
- name: copy prometheus binaries to {{ host_bin_path }}
|
||||
tags:
|
||||
- prometheus
|
||||
when: prometheus_install
|
||||
command: "cp {{ prometheus_tmp }}/{{ item }} {{ host_bin_path }}/"
|
||||
with_items:
|
||||
- "prometheus"
|
||||
- "promtool"
|
||||
|
||||
- name: set permissions on prometheus binaries
|
||||
tags:
|
||||
- prometheus
|
||||
when: prometheus_install
|
||||
file:
|
||||
path: "{{ host_bin_path }}/{{ item }}"
|
||||
owner: prometheus
|
||||
group: prometheus
|
||||
mode: 0755
|
||||
with_items:
|
||||
- "prometheus"
|
||||
- "promtool"
|
||||
|
||||
- name: create prometheus config directory
|
||||
tags:
|
||||
- prometheus
|
||||
when: prometheus_install
|
||||
file:
|
||||
path: "{{ prometheus_config }}"
|
||||
state: directory
|
||||
owner: prometheus
|
||||
group: prometheus
|
||||
mode: 0755
|
||||
|
||||
- name: create prometheus configuration
|
||||
tags:
|
||||
- prometheus
|
||||
template:
|
||||
src: prometheus.yml.j2
|
||||
dest: "{{ prometheus_config }}/prometheus.yml"
|
||||
with_items: "{{ prometheus_jobs }}"
|
||||
|
||||
- name: create prometheus alerts file
|
||||
tags:
|
||||
- prometheus
|
||||
template:
|
||||
src: alerts.rules.j2
|
||||
dest: "{{ prometheus_config }}/alerts.rules"
|
||||
|
||||
- name: create prometheus data directory
|
||||
tags:
|
||||
- prometheus
|
||||
when: prometheus_install
|
||||
file:
|
||||
path: "{{ prometheus_data }}"
|
||||
state: directory
|
||||
owner: prometheus
|
||||
group: prometheus
|
||||
mode: 0755
|
||||
|
||||
- name: create prometheus systemd unit
|
||||
tags:
|
||||
- prometheus
|
||||
template:
|
||||
src: prometheus.service.j2
|
||||
dest: "/etc/systemd/system/prometheus.service"
|
||||
|
||||
- name: reload systemd configurations
|
||||
tags:
|
||||
- prometheus
|
||||
command: "systemctl daemon-reload"
|
||||
|
||||
- name: start prometheus service
|
||||
tags:
|
||||
- prometheus
|
||||
service:
|
||||
name: prometheus
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: remove temporary prometheus files
|
||||
tags:
|
||||
- prometheus
|
||||
when: prometheus_install
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
with_items:
|
||||
- "{{ prometheus_tmp }}"
|
||||
- "{{ prometheus_tgz }}"
|
||||
|
||||
- name: create checksum file for prometheus {{ prometheus_version }}
|
||||
tags:
|
||||
- prometheus
|
||||
when: prometheus_install
|
||||
template:
|
||||
src: sha256.j2
|
||||
dest: "{{ prometheus_config }}/.sha256"
|
|
@ -1,53 +0,0 @@
|
|||
# {{ ansible_managed }}
|
||||
groups:
|
||||
- name: default
|
||||
rules:
|
||||
- alert: InstanceDown
|
||||
# rtorrent can be flappy and needs a longer interval.
|
||||
expr: up{instance!~"nerr-3.*",job!="rtorrent"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
{% raw %}
|
||||
summary: "Instance {{ $labels.instance }} is down."
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
|
||||
{% endraw %}
|
||||
- alert: ServiceDown
|
||||
expr: probe_success{instance!~"nerr-3.*"} == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
{% raw %}
|
||||
summary: "Service {{ $labels.instance }} is down."
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
|
||||
{% endraw %}
|
||||
- alert: RtorrentDown
|
||||
# rtorrent can be flappy and needs a longer interval.
|
||||
expr: up{job="rtorrent"} == 0
|
||||
for: 120m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
{% raw %}
|
||||
summary: "rTorrent {{ $labels.instance }} is down."
|
||||
{% endraw %}
|
||||
- alert: TLSCertificateNearExpiration
|
||||
expr: probe_ssl_earliest_cert_expiry - time() < 60 * 60 * 24 * 2
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
{% raw %}
|
||||
summary: "TLS certificate for {{ $labels.instance }} will expire in less than 2 days."
|
||||
{% endraw %}
|
||||
- alert: DiskUsageHigh
|
||||
expr: (1 - node_filesystem_free_bytes{fstype=~"ext4|vfat"} / node_filesystem_size_bytes) > 0.75
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
{% raw %}
|
||||
summary: "Disk usage on {{ $labels.instance }}:{{$labels.mountpoint}} ({{$labels.device}}) exceeds 75%."
|
||||
{% endraw %}
|
|
@ -1,20 +0,0 @@
|
|||
# {{ ansible_managed }}
|
||||
[Unit]
|
||||
Description=Prometheus metrics and monitoring system
|
||||
Documentation=https://prometheus.io/docs/introduction/overview/
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
User=prometheus
|
||||
Group=prometheus
|
||||
Type=simple
|
||||
ExecStart={{ host_bin_path }}/prometheus \
|
||||
--config.file {{ prometheus_config }}/prometheus.yml \
|
||||
--storage.tsdb.path {{ prometheus_data }} \
|
||||
--storage.tsdb.retention.time 180d \
|
||||
--log.level debug \
|
||||
--web.external-url {{ prometheus_external_url }}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
|
@ -1,42 +0,0 @@
|
|||
# {{ ansible_managed }}
|
||||
# prometheus configuration, Matt Layher, 2016-04-15
|
||||
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
rule_files:
|
||||
- alerts.rules
|
||||
|
||||
scrape_configs:
|
||||
{% for job in prometheus_jobs %}
|
||||
- job_name: "{{ job.job }}"
|
||||
{% if job.scrape_interval is defined %}
|
||||
scrape_interval: "{{ job.scrape_interval }}"
|
||||
{% endif %}
|
||||
static_configs:
|
||||
- targets: {{ job.targets | to_yaml }}
|
||||
{% if job.scheme is defined %}
|
||||
scheme: "{{ job.scheme }}"
|
||||
{% endif %}
|
||||
{% if job.path is defined %}
|
||||
metrics_path: "{{ job.path }}"
|
||||
{% endif %}
|
||||
{% if job.params is defined %}
|
||||
params:
|
||||
{{ job.params | to_yaml }}
|
||||
{% endif %}
|
||||
{% if job.relabel_target is defined %}
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: "{{ job.relabel_target }}"
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: {{ prometheus_alertmanagers | to_yaml }}
|
|
@ -1,3 +0,0 @@
|
|||
# {{ ansible_managed }}
|
||||
# Remove this file to force ansible to reinstall prometheus {{ prometheus_version }}.
|
||||
{{ prometheus_sha256 }}
|
|
@ -1,117 +0,0 @@
|
|||
---
|
||||
# Prometheus version and checksum configuration.
|
||||
prometheus_version: "2.15.2"
|
||||
prometheus_sha256: "579f800ec3ec2dc9a36d2d513e7800552cf6b0898f87a8abafd54e73b53f8ad0"
|
||||
# Prometheus alerting configuration.
|
||||
prometheus_alertmanagers:
|
||||
- "monitnerr-1:9093"
|
||||
# Prometheus jobs configuration.
|
||||
prometheus_jobs:
|
||||
- job: "apcupsd"
|
||||
targets:
|
||||
- "servnerr-3:9162"
|
||||
- "nerr-3:9162"
|
||||
- job: "blackbox_http_2xx"
|
||||
path: "/probe"
|
||||
params:
|
||||
module: [http_2xx]
|
||||
targets:
|
||||
- "https://grafana.servnerr.com"
|
||||
# TODO: move onto server and re-enable.
|
||||
# - "https://homeassistant.servnerr.com"
|
||||
relabel_target: "monitnerr-1:9115"
|
||||
- job: "blackbox_http_401"
|
||||
path: "/probe"
|
||||
params:
|
||||
module: [http_401]
|
||||
targets:
|
||||
- "https://alertmanager.servnerr.com"
|
||||
- "https://plex.servnerr.com"
|
||||
- "https://prometheus.servnerr.com"
|
||||
relabel_target: "monitnerr-1:9115"
|
||||
- job: "blackbox_mdlayhercom"
|
||||
# Netlify can be flappy at times, so check this less often.
|
||||
scrape_interval: "1m"
|
||||
path: "/probe"
|
||||
params:
|
||||
module: [http_2xx]
|
||||
targets:
|
||||
- "https://mdlayher.com"
|
||||
relabel_target: "monitnerr-1:9115"
|
||||
- job: "blackbox_ssh"
|
||||
# This generates a lot of noise in OpenSSH logs, so do it less often.
|
||||
scrape_interval: "1m"
|
||||
path: "/probe"
|
||||
params:
|
||||
module: [ssh_banner]
|
||||
targets:
|
||||
- "monitnerr-1:22"
|
||||
- "nerr-3:22"
|
||||
- "routnerr-2:22"
|
||||
- "servnerr-3:22"
|
||||
- "unifi.servnerr.com:22"
|
||||
relabel_target: "monitnerr-1:9115"
|
||||
- job: "blackbox_exporter"
|
||||
targets:
|
||||
- "monitnerr-1:9115"
|
||||
- job: "coredns"
|
||||
targets:
|
||||
- "routnerr-2:9153"
|
||||
- job: "corerad"
|
||||
targets:
|
||||
- "routnerr-2:9430"
|
||||
- job: "hdhomerun"
|
||||
targets:
|
||||
- "hdhomerun"
|
||||
relabel_target: "servnerr-3:9137"
|
||||
- job: "loki"
|
||||
targets:
|
||||
- "servnerr-3:3100"
|
||||
- job: "node"
|
||||
targets:
|
||||
- "monitnerr-1:9100"
|
||||
- "nerr-3:9100"
|
||||
- "routnerr-2:9100"
|
||||
- "servnerr-3:9100"
|
||||
- job: "prometheus"
|
||||
scrape_interval: "5s"
|
||||
targets:
|
||||
- "servnerr-3:9090"
|
||||
- job: "promtail"
|
||||
targets:
|
||||
- "servnerr-3:9080"
|
||||
- job: "rtorrent"
|
||||
scrape_interval: "30s"
|
||||
targets:
|
||||
- "servnerr-3:9135"
|
||||
- job: "snmp_exporter"
|
||||
targets:
|
||||
- "servnerr-3:9116"
|
||||
- job: "snmp"
|
||||
path: "/snmp"
|
||||
params:
|
||||
module: [if_mib]
|
||||
targets:
|
||||
- "switch-livingroom01"
|
||||
- "switch-office01"
|
||||
- "ap-livingroom02"
|
||||
relabel_target: "servnerr-3:9116"
|
||||
- job: "traefik"
|
||||
targets:
|
||||
- "routnerr-2:8080"
|
||||
# TODO: disabled until wg-dynamic work continues.
|
||||
# - job: "wgipamd"
|
||||
# targets:
|
||||
# - "routnerr-2:9475"
|
||||
# TODO: temporarily disabled while I get this working on NixOS.
|
||||
# - job: "wireguard"
|
||||
# targets:
|
||||
# - "routnerr-2:9586"
|
||||
# Static configuration.
|
||||
prometheus_config: "/etc/prometheus"
|
||||
prometheus_data: "/var/lib/prometheus"
|
||||
prometheus_external_url: "https://prometheus.servnerr.com"
|
||||
prometheus_dir: "prometheus-{{ prometheus_version }}.linux-amd64"
|
||||
prometheus_tmp: "/tmp/{{ prometheus_dir }}"
|
||||
prometheus_url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/{{ prometheus_dir }}.tar.gz"
|
||||
prometheus_tgz: "/tmp/prometheus.tar.gz"
|
Loading…
Reference in a new issue