1
0
Fork 0
mirror of https://github.com/mdlayher/homelab.git synced 2024-12-15 17:50:54 +00:00

ansible/prometheus: remove

Signed-off-by: Matt Layher <mdlayher@gmail.com>
This commit is contained in:
Matt Layher 2020-05-19 18:35:51 -04:00
parent b4dc963d4f
commit 76bef328cb
No known key found for this signature in database
GPG key ID: 77BFE531397EDE94
6 changed files with 0 additions and 388 deletions

View file

@ -1,153 +0,0 @@
---
- name: check if installing a different version of prometheus
tags:
- prometheus
shell: "cat {{ prometheus_config }}/.sha256"
ignore_errors: true
register: checksum
- name: determine if prometheus {{ prometheus_version}} should be installed
tags:
- prometheus
set_fact:
prometheus_install: (checksum.stdout.find(prometheus_sha256) == -1) or (prometheus_force is defined)
- name: download and checksum prometheus {{ prometheus_version }} tarball
tags:
- prometheus
when: prometheus_install
get_url:
url: "{{ prometheus_url }}"
dest: "{{ prometheus_tgz }}"
checksum: "sha256:{{ prometheus_sha256 }}"
- name: unpack prometheus tarball
tags:
- prometheus
when: prometheus_install
unarchive:
src: "{{ prometheus_tgz }}"
dest: "/tmp/"
copy: false
- name: create prometheus user
tags:
- prometheus
when: prometheus_install
user:
name: prometheus
shell: "/bin/false"
comment: "prometheus service user"
- name: check if prometheus service exists
tags:
- prometheus
stat:
path: "/etc/systemd/system/prometheus.service"
register: svccheck
- name: stop prometheus service if it exists
tags:
- prometheus
when: svccheck.stat.exists
service:
name: prometheus
state: stopped
- name: copy prometheus binaries to {{ host_bin_path }}
tags:
- prometheus
when: prometheus_install
command: "cp {{ prometheus_tmp }}/{{ item }} {{ host_bin_path }}/"
with_items:
- "prometheus"
- "promtool"
- name: set permissions on prometheus binaries
tags:
- prometheus
when: prometheus_install
file:
path: "{{ host_bin_path }}/{{ item }}"
owner: prometheus
group: prometheus
mode: 0755
with_items:
- "prometheus"
- "promtool"
- name: create prometheus config directory
tags:
- prometheus
when: prometheus_install
file:
path: "{{ prometheus_config }}"
state: directory
owner: prometheus
group: prometheus
mode: 0755
- name: create prometheus configuration
tags:
- prometheus
template:
src: prometheus.yml.j2
dest: "{{ prometheus_config }}/prometheus.yml"
with_items: "{{ prometheus_jobs }}"
- name: create prometheus alerts file
tags:
- prometheus
template:
src: alerts.rules.j2
dest: "{{ prometheus_config }}/alerts.rules"
- name: create prometheus data directory
tags:
- prometheus
when: prometheus_install
file:
path: "{{ prometheus_data }}"
state: directory
owner: prometheus
group: prometheus
mode: 0755
- name: create prometheus systemd unit
tags:
- prometheus
template:
src: prometheus.service.j2
dest: "/etc/systemd/system/prometheus.service"
- name: reload systemd configurations
tags:
- prometheus
command: "systemctl daemon-reload"
- name: start prometheus service
tags:
- prometheus
service:
name: prometheus
state: started
enabled: true
- name: remove temporary prometheus files
tags:
- prometheus
when: prometheus_install
file:
path: "{{ item }}"
state: absent
with_items:
- "{{ prometheus_tmp }}"
- "{{ prometheus_tgz }}"
- name: create checksum file for prometheus {{ prometheus_version }}
tags:
- prometheus
when: prometheus_install
template:
src: sha256.j2
dest: "{{ prometheus_config }}/.sha256"

View file

@ -1,53 +0,0 @@
# {{ ansible_managed }}
groups:
- name: default
rules:
- alert: InstanceDown
# rtorrent can be flappy and needs a longer interval.
expr: up{instance!~"nerr-3.*",job!="rtorrent"} == 0
for: 2m
labels:
severity: critical
annotations:
{% raw %}
summary: "Instance {{ $labels.instance }} is down."
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
{% endraw %}
- alert: ServiceDown
expr: probe_success{instance!~"nerr-3.*"} == 0
for: 10m
labels:
severity: critical
annotations:
{% raw %}
summary: "Service {{ $labels.instance }} is down."
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
{% endraw %}
- alert: RtorrentDown
# rtorrent can be flappy and needs a longer interval.
expr: up{job="rtorrent"} == 0
for: 120m
labels:
severity: critical
annotations:
{% raw %}
summary: "rTorrent {{ $labels.instance }} is down."
{% endraw %}
- alert: TLSCertificateNearExpiration
expr: probe_ssl_earliest_cert_expiry - time() < 60 * 60 * 24 * 2
for: 1m
labels:
severity: critical
annotations:
{% raw %}
summary: "TLS certificate for {{ $labels.instance }} will expire in less than 2 days."
{% endraw %}
- alert: DiskUsageHigh
expr: (1 - node_filesystem_free_bytes{fstype=~"ext4|vfat"} / node_filesystem_size_bytes) > 0.75
for: 1m
labels:
severity: critical
annotations:
{% raw %}
summary: "Disk usage on {{ $labels.instance }}:{{$labels.mountpoint}} ({{$labels.device}}) exceeds 75%."
{% endraw %}

View file

@ -1,20 +0,0 @@
# {{ ansible_managed }}
[Unit]
Description=Prometheus metrics and monitoring system
Documentation=https://prometheus.io/docs/introduction/overview/
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart={{ host_bin_path }}/prometheus \
--config.file {{ prometheus_config }}/prometheus.yml \
--storage.tsdb.path {{ prometheus_data }} \
--storage.tsdb.retention.time 180d \
--log.level debug \
--web.external-url {{ prometheus_external_url }}
[Install]
WantedBy=multi-user.target

View file

@ -1,42 +0,0 @@
# {{ ansible_managed }}
# prometheus configuration, Matt Layher, 2016-04-15
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- alerts.rules
scrape_configs:
{% for job in prometheus_jobs %}
- job_name: "{{ job.job }}"
{% if job.scrape_interval is defined %}
scrape_interval: "{{ job.scrape_interval }}"
{% endif %}
static_configs:
- targets: {{ job.targets | to_yaml }}
{% if job.scheme is defined %}
scheme: "{{ job.scheme }}"
{% endif %}
{% if job.path is defined %}
metrics_path: "{{ job.path }}"
{% endif %}
{% if job.params is defined %}
params:
{{ job.params | to_yaml }}
{% endif %}
{% if job.relabel_target is defined %}
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: "{{ job.relabel_target }}"
{% endif %}
{% endfor %}
alerting:
alertmanagers:
- static_configs:
- targets: {{ prometheus_alertmanagers | to_yaml }}

View file

@ -1,3 +0,0 @@
# {{ ansible_managed }}
# Remove this file to force ansible to reinstall prometheus {{ prometheus_version }}.
{{ prometheus_sha256 }}

View file

@ -1,117 +0,0 @@
---
# Prometheus version and checksum configuration.
prometheus_version: "2.15.2"
prometheus_sha256: "579f800ec3ec2dc9a36d2d513e7800552cf6b0898f87a8abafd54e73b53f8ad0"
# Prometheus alerting configuration.
prometheus_alertmanagers:
- "monitnerr-1:9093"
# Prometheus jobs configuration.
prometheus_jobs:
- job: "apcupsd"
targets:
- "servnerr-3:9162"
- "nerr-3:9162"
- job: "blackbox_http_2xx"
path: "/probe"
params:
module: [http_2xx]
targets:
- "https://grafana.servnerr.com"
# TODO: move onto server and re-enable.
# - "https://homeassistant.servnerr.com"
relabel_target: "monitnerr-1:9115"
- job: "blackbox_http_401"
path: "/probe"
params:
module: [http_401]
targets:
- "https://alertmanager.servnerr.com"
- "https://plex.servnerr.com"
- "https://prometheus.servnerr.com"
relabel_target: "monitnerr-1:9115"
- job: "blackbox_mdlayhercom"
# Netlify can be flappy at times, so check this less often.
scrape_interval: "1m"
path: "/probe"
params:
module: [http_2xx]
targets:
- "https://mdlayher.com"
relabel_target: "monitnerr-1:9115"
- job: "blackbox_ssh"
# This generates a lot of noise in OpenSSH logs, so do it less often.
scrape_interval: "1m"
path: "/probe"
params:
module: [ssh_banner]
targets:
- "monitnerr-1:22"
- "nerr-3:22"
- "routnerr-2:22"
- "servnerr-3:22"
- "unifi.servnerr.com:22"
relabel_target: "monitnerr-1:9115"
- job: "blackbox_exporter"
targets:
- "monitnerr-1:9115"
- job: "coredns"
targets:
- "routnerr-2:9153"
- job: "corerad"
targets:
- "routnerr-2:9430"
- job: "hdhomerun"
targets:
- "hdhomerun"
relabel_target: "servnerr-3:9137"
- job: "loki"
targets:
- "servnerr-3:3100"
- job: "node"
targets:
- "monitnerr-1:9100"
- "nerr-3:9100"
- "routnerr-2:9100"
- "servnerr-3:9100"
- job: "prometheus"
scrape_interval: "5s"
targets:
- "servnerr-3:9090"
- job: "promtail"
targets:
- "servnerr-3:9080"
- job: "rtorrent"
scrape_interval: "30s"
targets:
- "servnerr-3:9135"
- job: "snmp_exporter"
targets:
- "servnerr-3:9116"
- job: "snmp"
path: "/snmp"
params:
module: [if_mib]
targets:
- "switch-livingroom01"
- "switch-office01"
- "ap-livingroom02"
relabel_target: "servnerr-3:9116"
- job: "traefik"
targets:
- "routnerr-2:8080"
# TODO: disabled until wg-dynamic work continues.
# - job: "wgipamd"
# targets:
# - "routnerr-2:9475"
# TODO: temporarily disabled while I get this working on NixOS.
# - job: "wireguard"
# targets:
# - "routnerr-2:9586"
# Static configuration.
prometheus_config: "/etc/prometheus"
prometheus_data: "/var/lib/prometheus"
prometheus_external_url: "https://prometheus.servnerr.com"
prometheus_dir: "prometheus-{{ prometheus_version }}.linux-amd64"
prometheus_tmp: "/tmp/{{ prometheus_dir }}"
prometheus_url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/{{ prometheus_dir }}.tar.gz"
prometheus_tgz: "/tmp/prometheus.tar.gz"