1
0
Fork 0

chore: misc updates and refactor for alert aggregation

This commit is contained in:
Tommy 2024-12-21 13:12:56 +01:00
parent d5d07a67ab
commit 5677e2995c
Signed by: tommy
SSH key fingerprint: SHA256:1LWgQT3QPHIT29plS8jjXc3S1FcE/4oGvsx3Efxs6Uc
12 changed files with 593 additions and 306 deletions

View file

@ -1,13 +0,0 @@
version: "3"
tasks:
create-image:
desc: Build local docker image (am-ntfy-builder)
dir: "docker"
cmds:
- docker build --platform linux/amd64 -t am-ntfy-builder --no-cache .
shell:
desc: Drop into a build shell
cmds:
- docker run -v /var/run/docker.sock:/var/run/docker.sock -v ./:/root/working-dir -w /root/working-dir --platform linux/amd64 -it am-ntfy-builder -c "fish"

View file

@ -14,7 +14,7 @@ ENV NIXPKGS_ALLOW_UNFREE=1
ENV NIX_PATH=nixpkgs=channel:nixos-24.05
ARG NIX_CONFIG=
ADD nix.conf /etc/nix/nix.conf
ADD docker/nix.conf /etc/nix/nix.conf
RUN echo $'\n'"${NIX_CONFIG}" >> /etc/nix/nix.conf
RUN mkdir -p "/root" && touch "/root/.nix-channels" && \

101
README.md
View file

@ -1,76 +1,89 @@
# alertmanager-ntfy
Listen for webhooks from
[Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) and
send them to [ntfy](https://ntfy.sh/) push notifications
# 🚨 alertmanager-ntfy 🚨
This project is a lightweight bridge between [Prometheus Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) and [ntfy](https://ntfy.sh/). It sends push notifications to your devices.
Configuration is done with environment variables.
## 📦 Features
- Configure via environment variables.
- Exports Prometheus metrics to monitor webhook activity.
- Built for Kubernetes, but works anywhere
- Has a `flake.nix`
| Variable | Description | Example |
|-----------------|------------------------------|-------------------|
| HTTP_ADDRESS | Adress to listen on | `localhost` |
| HTTP_PORT | Port to listen on | `8080` |
| NTFY_SERVER_URL | ntfy server to send to | `https://ntfy.sh` |
| NTFY_USER | ntfy user for basic auth | `myuser` |
| NTFY_PASS | ntfy password for basic auth | `supersecret` |
## 🚀 Getting Started
# Nix
To send your Alertmanager alerts to ntfy, configure the following environment variables:
For Nix/Nixos users a `flake.nix` is provided to simplify the build. It also
privides app to test the hooks with mocked data from `mock.json`
| Variable | Description | Example |
|-------------------|----------------------------------------------|-------------------|
| `HTTP_ADDRESS` | Address to listen on | `localhost` |
| `HTTP_PORT` | Port to listen on | `8080` |
| `NTFY_SERVER_URL` | ntfy server to send alerts to | `https://ntfy.sh` |
| `NTFY_USER` | Username for basic auth (optional) | `myuser` |
| `NTFY_PASS` | Password for basic auth (optional) | `supersecret` |
| `LOG_LEVEL` | Log level (`debug`, `info`, `warn`, `error`) | `info` |
### Build
### Example
```sh
nix build .#container
To expose the service on `localhost:8080` and send alerts to an ntfy topic `https://ntfy.sh/mytopic`:
```bash
export HTTP_ADDRESS="localhost"
export HTTP_PORT="8080"
export NTFY_SERVER_URL="https://ntfy.sh/mytopic"
export NTFY_USER="myuser"
export NTFY_PASS="supersecret"
export LOG_LEVEL="info"
```
### Push to registry
## 🛠️ Nix Support
```sh
nix run .#push-container
```
For Nix/NixOS users, a `flake.nix` is provided for build, run and test.
### Run directly
### Building with Nix
```sh
nix run
```
To build the container image: `nix build .#container`
### Test alerts
If you are not on amd64 use `nix build .#packages.x86_64-linux.container`.
```sh
nix run '.#mock-hook'
```
Push the built container to your preferred registry: `nix run .#push-container`
### Module
You can run the application directly from Nix with: `nix run`
The flake also includes a NixOS module for ease of use. A minimal configuration
will look like this:
To simulate alerts with test data: `nix run '.#mock-hook'`
## 🖥️ Nix Module
A Nix module is provided for easy setup and integration. A minimal configuration would look like this:
```nix
# Add to flake inputs
inputs.alertmanager-ntfy.url = "github:pinpox/alertmanager-ntfy";
inputs.alertmanager-ntfy.url = "git+https://code.252.no/tommy/alertmanager-ntfy";
# Import the module in your configuration.nix
imports = [
self.inputs.alertmanager-ntfy.nixosModules.default
self.inputs.alertmanager-ntfy.nixosModules.default
];
# Enable and set options
# Enable the service and configure options
services.alertmanager-ntfy = {
enable = true;
httpAddress = "localhost";
httpPort = "9999";
ntfyTopic = "https://ntfy.sh/test";
ntfyPriority = "high";
envFile = "/var/src/secrets/alertmanager-ntfy/envfile";
enable = true;
httpAddress = "localhost";
httpPort = "9999";
ntfyTopic = "https://ntfy.sh/test";
ntfyPriority = "high";
envFile = "/var/src/secrets/alertmanager-ntfy/envfile";
};
```
## 🔍 Prometheus Metrics
The service exports Prometheus metrics for tracking webhook requests and errors. By default, these metrics are exposed at `/metrics`:
- **`http_requests_total`**: Total number of HTTP requests received.
- **`http_request_errors_total`**: Total number of errors encountered during request processing.
## 🤩 Gratitude
This repo is based on the work by [pinpox/alertmanager-ntfy](https://github.com/pinpox/alertmanager-ntfy). Adaptions has been made for Kubernetes deployment.
This project is based on the fantastic work by [pinpox/alertmanager-ntfy](https://github.com/pinpox/alertmanager-ntfy), with adaptations for Kubernetes deployments and enhanced logging/metrics support.

View file

@ -5,7 +5,7 @@ pkgs.buildGoModule rec {
version = "1.0.0";
src = ./src;
vendorHash = "sha256-8nbWcawb/SYL5vLj5DUrDO51L0l+qPE0JOrkQJdre00=";
vendorHash = "sha256-VC3ZsZLob6RrqQjTFYk5fMqUqInWo/mod+lPy2lIqhQ=";
subPackages = [ "."];
meta = with pkgs.lib; {

View file

@ -1,37 +1,21 @@
{
"nodes": {
"flake-compat": {
"flake": false,
"locked": {
"lastModified": 1696426674,
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
"owner": "edolstra",
"repo": "flake-compat",
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
"type": "github"
},
"original": {
"owner": "edolstra",
"repo": "flake-compat",
"type": "github"
}
},
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1705309234,
"narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26",
"type": "github"
"lastModified": 1726560853,
"narHash": "sha256-X6rJYSESBVr3hBoH0WbKE5KvhPU5bloyZ2L4K60/fPQ=",
"ref": "refs/heads/main",
"rev": "c1dfcf08411b08f6b8615f7d8971a2bfa81d5e8a",
"revCount": 101,
"type": "git",
"url": "https://code.252.no/tommy/flake-utils"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
"type": "git",
"url": "https://code.252.no/tommy/flake-utils"
}
},
"nixpkgs": {
@ -52,7 +36,6 @@
},
"root": {
"inputs": {
"flake-compat": "flake-compat",
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}

120
flake.nix
View file

@ -1,75 +1,87 @@
{
description = "Relay Prometheus alerts to ntfy.sh";
description = "Relay webhooks to ntfy.sh";
inputs = {
nixpkgs.url = "github:nixos/nixpkgs/nixos-24.05";
flake-utils.url = "github:numtide/flake-utils";
flake-compat = {
url = "github:edolstra/flake-compat";
flake = false;
};
flake-utils.url = "git+https://code.252.no/tommy/flake-utils";
};
outputs = { self, nixpkgs, flake-utils, ... }:
let
# Define target systems for cross-compilation
targetSystems = [ "x86_64-linux" "aarch64-darwin" ];
name = "alertmanager-ntfy";
registry = "code.252.no/tommy/alertmanager-ntfy";
version = "1.1.0";
# Map target systems to Docker architectures
archMap = {
"x86_64-linux" = "amd64";
"aarch64-darwin" = "arm64";
};
in
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
goModule = import ./default.nix { inherit pkgs; };
name = "alertmanager-ntfy";
registry = "code.252.no/tommy/alertmanager-ntfy";
version = "1.0.2";
in {
packages = rec {
alertmanager-ntfy = goModule;
pkgs = import nixpkgs { inherit system; };
container = pkgs.dockerTools.buildImage {
inherit name;
tag = version;
created = "now";
copyToRoot = pkgs.buildEnv {
name = "root-env";
paths = [ alertmanager-ntfy pkgs.cacert ];
# Generate cross-compiled packages for each target system
crossPackages = builtins.listToAttrs (map (targetSystem:
let
crossPkgs = import nixpkgs {
inherit system;
crossSystem = targetSystem;
};
config.Cmd = [ "${alertmanager-ntfy}/bin/alertmanager-ntfy" ];
};
architecture = archMap.${targetSystem};
push-container = pkgs.writeShellScriptBin "push-container" ''
#!/usr/bin/env bash
set -e
# Build the Go application for the target system
alertmanager-ntfy = crossPkgs.callPackage ./default.nix {};
# Build the image and load it to Docker
echo "Loading Docker image..."
${pkgs.docker}/bin/docker load < ${self.packages.${system}.container}
# Build the Docker image for the target system
container = crossPkgs.dockerTools.buildImage {
inherit name;
tag = version;
created = "now";
# Tag the image
echo "Tagging Docker image..."
${pkgs.docker}/bin/docker tag ${name}:${version} ${registry}:${version}
# Set the architecture for the Docker image
architecture = [ architecture ];
# Push the image to the Docker registry
echo "Pushing Docker image to ${registry}:${version}..."
${pkgs.docker}/bin/docker push ${registry}:${version}
'';
copyToRoot = crossPkgs.buildEnv {
name = "root-env";
paths = [ alertmanager-ntfy crossPkgs.cacert ];
};
config.Cmd = [ "${alertmanager-ntfy}/bin/alertmanager-ntfy" ];
};
mock-hook = pkgs.writeScriptBin "mock-hook" ''
#!${pkgs.stdenv.shell}
${pkgs.curl}/bin/curl -X POST -d @mock.json http://$HTTP_ADDRESS:$HTTP_PORT
'';
};
# Script to push the Docker image
push-container = crossPkgs.writeShellScriptBin "push-container" ''
#!/usr/bin/env bash
set -e
apps = rec {
mock-hook = flake-utils.lib.mkApp { drv = self.packages.${system}.mock-hook; };
alertmanager-ntfy = flake-utils.lib.mkApp { drv = self.packages.${system}.alertmanager-ntfy; };
default = alertmanager-ntfy;
};
# Load the Docker image
echo "Loading Docker image..."
docker load < ${container}
nixosModules.default = ({ pkgs, ... }: {
imports = [ ./module.nix ];
nixpkgs.overlays = [
(self: super: {
alertmanager-ntfy = self.packages.${system}.alertmanager-ntfy;
})
];
});
# Tag the image
echo "Tagging Docker image..."
docker tag ${name}:${version} ${registry}:${version}
# Push the image to the Docker registry
echo "Pushing Docker image to ${registry}:${version}..."
docker push ${registry}:${version}
'';
in {
name = targetSystem;
value = {
alertmanager-ntfy = alertmanager-ntfy;
container = container;
push-container = push-container;
};
}
) targetSystems);
in
{
packages = crossPackages;
}
);
}

166
grafana/dashboard.json Normal file
View file

@ -0,0 +1,166 @@
{
"dashboard": {
"id": null,
"title": "Alertmanager to Ntfy Metrics",
"timezone": "browser",
"schemaVersion": 30,
"version": 1,
"refresh": "5s",
"panels": [
{
"type": "graph",
"title": "Total HTTP Requests",
"gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 },
"datasource": "Prometheus",
"targets": [
{
"expr": "sum(rate(http_requests_total[5m])) by (method, status)",
"legendFormat": "{{method}} {{status}}",
"interval": "",
"refId": "A"
}
],
"yaxes": [
{
"format": "short",
"label": "Requests per second",
"logBase": 1,
"min": null,
"max": null
},
{
"format": "short",
"label": null,
"logBase": 1,
"min": null,
"max": null
}
]
},
{
"type": "graph",
"title": "Total HTTP Request Errors",
"gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 },
"datasource": "Prometheus",
"targets": [
{
"expr": "sum(rate(http_request_errors_total[5m])) by (method, status)",
"legendFormat": "{{method}} {{status}}",
"interval": "",
"refId": "B"
}
],
"yaxes": [
{
"format": "short",
"label": "Errors per second",
"logBase": 1,
"min": null,
"max": null
},
{
"format": "short",
"label": null,
"logBase": 1,
"min": null,
"max": null
}
]
},
{
"type": "stat",
"title": "Total Requests",
"gridPos": { "x": 0, "y": 8, "w": 6, "h": 4 },
"datasource": "Prometheus",
"targets": [
{
"expr": "sum(http_requests_total)",
"legendFormat": "Total Requests",
"refId": "C"
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
}
}
},
{
"type": "stat",
"title": "Total Errors",
"gridPos": { "x": 6, "y": 8, "w": 6, "h": 4 },
"datasource": "Prometheus",
"targets": [
{
"expr": "sum(http_request_errors_total)",
"legendFormat": "Total Errors",
"refId": "D"
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
}
}
},
{
"type": "table",
"title": "HTTP Requests Breakdown",
"gridPos": { "x": 0, "y": 12, "w": 24, "h": 8 },
"datasource": "Prometheus",
"targets": [
{
"expr": "sum(rate(http_requests_total[5m])) by (path, method, status)",
"legendFormat": "{{path}} {{method}} {{status}}",
"interval": "",
"refId": "E"
}
],
"columns": [
{ "text": "Path", "value": "path" },
{ "text": "Method", "value": "method" },
{ "text": "Status", "value": "status" },
{ "text": "Requests per second", "value": "rate" }
],
"transform": "table",
"table": {
"transform": "timeseries_aggregations",
"columns": [
{ "text": "Path", "value": "path" },
{ "text": "Method", "value": "method" },
{ "text": "Status", "value": "status" },
{ "text": "Requests per second", "value": "rate" }
]
}
},
{
"type": "stat",
"title": "Request Success Rate",
"gridPos": { "x": 12, "y": 8, "w": 12, "h": 4 },
"datasource": "Prometheus",
"targets": [
{
"expr": "1 - (sum(rate(http_request_errors_total[5m])) / sum(rate(http_requests_total[5m])))",
"legendFormat": "Success Rate",
"refId": "F"
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"decimals": 2,
"thresholds": {
"mode": "percentage",
"steps": [
{ "color": "green", "value": 90 },
{ "color": "red", "value": 0 }
]
}
}
}
}
]
}
}

View file

@ -1,42 +0,0 @@
{
"receiver":"all",
"status":"firing",
"alerts":[
{
"status":"firing",
"labels":{
"alertname":"systemd_service_failed",
"instance":"birne.wireguard:9100",
"job":"node-stats",
"name":"borgbackup-job-box-backup.service",
"state":"failed",
"type":"simple"
},
"annotations":{
"description":"birne.wireguard:9100 failed to (re)start service borgbackup-job-box-backup.service."
},
"startsAt":"2021-11-30T23:03:16.303Z",
"endsAt":"0001-01-01T00:00:00Z",
"generatorURL":"https://vpn.prometheus.pablo.tools/graph?g0.expr=node_systemd_unit_state%7Bstate%3D%22failed%22%7D+%3D%3D+1\u0026g0.tab=1",
"fingerprint":"4acabeba15f6a22a"
}
],
"groupLabels":{
"instance":"birne.wireguard:9100"
},
"commonLabels":{
"alertname":"systemd_service_failed",
"instance":"birne.wireguard:9100",
"job":"node-stats",
"name":"borgbackup-job-box-backup.service",
"state":"failed",
"type":"simple"
},
"commonAnnotations":{
"description":"birne.wireguard:9100 failed to (re)start service borgbackup-job-box-backup.service."
},
"externalURL":"https://vpn.alerts.pablo.tools",
"version":"4",
"groupKey":"{}:{instance=\"birne.wireguard:9100\"}",
"truncatedAlerts":0
}

View file

@ -11,16 +11,20 @@ require (
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0
)
require github.com/grafana/pyroscope-go/godeltaprof v0.1.8 // indirect
require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/go-kit/log v0.2.1 // indirect
github.com/go-logfmt/logfmt v0.5.1 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/grafana/pyroscope-go v1.2.0
github.com/klauspost/compress v1.17.9 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.20.0 // indirect
github.com/prometheus/client_golang v1.20.4 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 // indirect

View file

@ -125,6 +125,10 @@ github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm4
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/grafana/pyroscope-go v1.2.0 h1:aILLKjTj8CS8f/24OPMGPewQSYlhmdQMBmol1d3KGj8=
github.com/grafana/pyroscope-go v1.2.0/go.mod h1:2GHr28Nr05bg2pElS+dDsc98f3JTUh2f6Fz1hWXrqwk=
github.com/grafana/pyroscope-go/godeltaprof v0.1.8 h1:iwOtYXeeVSAeYefJNaxDytgjKtUuKQbJqgAIjlnicKg=
github.com/grafana/pyroscope-go/godeltaprof v0.1.8/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
@ -158,6 +162,8 @@ github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0Lh
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
@ -220,6 +226,8 @@ github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQ
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI=
github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI=
github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=

View file

@ -1,204 +1,360 @@
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"hash/fnv"
"io"
"net/http"
_ "net/http/pprof" // <--- pprof for debugging at /debug/pprof
"net/url"
"os"
"sort"
"strings"
"sync"
"time"
"golang.org/x/exp/maps"
"github.com/grafana/pyroscope-go"
// used if you want block/mutex/goroutine profiles
"github.com/prometheus/alertmanager/template"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/model"
"github.com/sirupsen/logrus"
)
// Initialize the logger globally
// --------------- Global Variables --------------- //
// Logger
var log = logrus.New()
// PrettifyAlert formats the alert data in a human-readable way
func FormattedAlert(alert template.Alert) (string, string, NtfyPriority, string, string) {
formattedStartTime := alert.StartsAt.Format("2006-01-02 15:04:05 MST")
// Aggregator: in-memory map of alert fingerprint -> latest alert
var (
alertsMu sync.Mutex
activeAlertsMap = make(map[string]template.Alert)
)
labels := alert.Labels
annotations := alert.Annotations
alertName := labels["alertname"]
instance := labels["instance"]
project := labels["project"]
service := labels["service"]
severity := labels["severity"]
summary := annotations["summary"]
moreUrl := annotations["grafana"]
// Determine the type of alert and apply the correct priority mapping
var alertPriority NtfyPriority
var emoji string
var alertTopic string
switch {
case isFalcoAlert(labels):
alertPriority = getPriorityFromSeverity(severity, falcoSeverityToPriority)
emoji = "🦅"
alertTopic = "security"
case isPrometheusAlert(labels):
alertPriority = getPriorityFromSeverity(severity, prometheusSeverityToPriority)
emoji = "🤖"
alertTopic = "alerts"
default:
alertPriority = PriorityDefault // Fallback for unknown sources
emoji = "🤷‍♂️"
alertTopic = "other"
}
formattedAlertTitle := fmt.Sprintf("%s %s", emoji, alertName)
formattedAlertBody := fmt.Sprintf(
"Status: %s\n"+
"Instance: %s\n"+
"Project: %s\n"+
"Service: %s\n"+
"Started at: %s\n"+
"Summary: %s\n"+
"[More info](%s)\n",
strings.ToUpper(alert.Status),
instance,
project,
service,
formattedStartTime,
summary,
moreUrl,
// Prometheus metrics
var (
httpRequestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "http_requests_total",
Help: "Total number of HTTP requests",
},
[]string{"path", "method", "status"},
)
httpRequestErrors = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "http_request_errors_total",
Help: "Total number of HTTP request errors",
},
[]string{"path", "method", "status"},
)
alertTags := strings.Join(maps.Values(alert.Labels), ",")
return formattedAlertTitle, formattedAlertBody, alertPriority, alertTags, alertTopic
aggregatorActiveAlerts = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "aggregator_active_alerts",
Help: "Current number of active (firing) alerts in the aggregator",
})
aggregatorNewAlerts = prometheus.NewCounter(prometheus.CounterOpts{
Name: "aggregator_new_alerts_total",
Help: "Total number of new alerts added to the aggregator",
})
aggregatorUpdatedAlerts = prometheus.NewCounter(prometheus.CounterOpts{
Name: "aggregator_updated_alerts_total",
Help: "Total number of existing alerts updated in the aggregator",
})
aggregatorResolvedAlerts = prometheus.NewCounter(prometheus.CounterOpts{
Name: "aggregator_resolved_alerts_total",
Help: "Total number of resolved alerts removed from the aggregator",
})
requestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_duration_seconds",
Help: "Histogram of request handling durations",
Buckets: prometheus.DefBuckets,
},
[]string{"path", "method", "status"},
)
)
// Initialization
func init() {
prometheus.MustRegister(httpRequestsTotal)
prometheus.MustRegister(httpRequestErrors)
prometheus.MustRegister(aggregatorActiveAlerts)
prometheus.MustRegister(aggregatorNewAlerts)
prometheus.MustRegister(aggregatorUpdatedAlerts)
prometheus.MustRegister(aggregatorResolvedAlerts)
prometheus.MustRegister(requestDuration)
}
// Log incoming requests while avoiding sensitive headers
func logRequestWithStatus(r *http.Request, status int, additionalFields logrus.Fields) {
requestInfo := map[string]interface{}{
"method": r.Method,
"url": r.URL.String(),
"remote": r.RemoteAddr,
"headers": map[string]string{
"Content-Type": r.Header.Get("Content-Type"),
// --------------- Pyroscope Setup --------------- //
// startPyroscope sets up push-mode profiling with optional tags.
// Adjust the ServerAddress to match your Pyroscope service (e.g. `http://10.245.175.166:4040`)
func startPyroscope() {
region := os.Getenv("REGION")
if region == "" {
region = "default"
}
pyroscope_url := os.Getenv("PYROSCOPE_URL")
if pyroscope_url == "" {
pyroscope_url = "http://pyroscope.monitoring.svc.cluster.local:4040"
}
pyroscope_app_name := os.Getenv("PYROSCOPE_APP_NAME")
if pyroscope_app_name == "" {
pyroscope_app_name = "alertmanager-ntfy"
}
// This starts a profiling session that continuously sends data to Pyroscope server
// In "push" mode, pyroscope auto-collects CPU and (optionally) block/mutex/goroutine profiles.
// For "pull" mode, see the Pyroscope docs.
pyroscope.Start(pyroscope.Config{
ApplicationName: pyroscope_app_name, // name your service
ServerAddress: pyroscope_url, // Pyroscope server
Logger: pyroscope.StandardLogger,
Tags: map[string]string{
"region": region, // static tag
},
// By default, Pyroscope collects CPU profiles. You can enable more:
ProfileTypes: []pyroscope.ProfileType{
pyroscope.ProfileAllocObjects,
pyroscope.ProfileAllocSpace,
pyroscope.ProfileInuseObjects,
pyroscope.ProfileInuseSpace,
},
})
}
// --------------- Logging and Configuration --------------- //
func setLogLevel() {
logLevel := strings.ToLower(os.Getenv("LOG_LEVEL"))
switch logLevel {
case "debug":
log.SetLevel(logrus.DebugLevel)
case "info":
log.SetLevel(logrus.InfoLevel)
case "warn":
log.SetLevel(logrus.WarnLevel)
case "error":
log.SetLevel(logrus.ErrorLevel)
default:
log.SetLevel(logrus.InfoLevel)
}
log.SetFormatter(&logrus.JSONFormatter{})
}
func debugLog(fields logrus.Fields, message string) {
if log.Level >= logrus.DebugLevel {
log.WithFields(fields).Debug(message)
}
}
// --------------- Alert Aggregator Logic --------------- //
// fingerprintAlert calculates a stable fingerprint from an alerts labels
func fingerprintAlert(alert template.Alert) string {
labelKeys := make([]string, 0, len(alert.Labels))
for k := range alert.Labels {
labelKeys = append(labelKeys, k)
}
sort.Strings(labelKeys)
var sb strings.Builder
for _, k := range labelKeys {
sb.WriteString(fmt.Sprintf("%s=%s;", k, alert.Labels[k]))
}
fields := logrus.Fields{
"request": requestInfo,
"status": status,
}
hash := fnv.New64a()
hash.Write([]byte(sb.String()))
return fmt.Sprintf("%x", hash.Sum64())
}
for key, value := range additionalFields {
fields[key] = value
}
// Suppose you have separate code that determines the notification priority
// and returns a formatted alert. For brevity, well define a stub here.
func FormattedAlert(alert template.Alert) (string, string, NtfyPriority, string, string) {
// In real code you might parse severity, isFalcoAlert, isPrometheusAlert, etc.
return "title", "body", PriorityDefault, "tags", "topic"
}
// --------------- HTTP Handlers --------------- //
func logRequestWithStatus(r *http.Request, status int, fields logrus.Fields, duration time.Duration) {
fields["method"] = r.Method
fields["url"] = r.URL.String()
fields["status"] = status
fields["duration"] = duration.Seconds()
httpRequestsTotal.WithLabelValues(r.URL.Path, r.Method, fmt.Sprintf("%d", status)).Inc()
requestDuration.WithLabelValues(r.URL.Path, r.Method, fmt.Sprintf("%d", status)).
Observe(duration.Seconds())
log.WithFields(fields).Info("Processed request")
}
// Handle POST requests with alert processing
func handlePost(w http.ResponseWriter, r *http.Request) {
start := time.Now()
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
httpRequestErrors.WithLabelValues(r.URL.Path, r.Method, "400").Inc()
http.Error(w, "Bad Request: Unable to read body", http.StatusBadRequest)
logRequestWithStatus(r, http.StatusBadRequest, logrus.Fields{"error": err}, time.Since(start))
return
}
r.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
var payload template.Data
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
log.WithFields(logrus.Fields{
"error": err,
}).Error("Parsing alertmanager JSON failed")
httpRequestErrors.WithLabelValues(r.URL.Path, r.Method, "400").Inc()
http.Error(w, "Bad Request: Invalid JSON", http.StatusBadRequest)
logRequestWithStatus(r, http.StatusBadRequest, logrus.Fields{"error": err}, time.Since(start))
return
}
for _, alert := range payload.Alerts {
if alert.Status == string(model.AlertResolved) {
log.WithFields(logrus.Fields{
"alert": alert.Labels,
}).Info("Skipping resolved alert")
continue
}
// Optionally, we can TagWrapper around the entire batch if you want a "batch" concept:
// pyroscope.TagWrapper(context.Background(), pyroscope.Labels("batch_id", "someID"), func(ctx context.Context) {
// // aggregator logic here
// })
if err := sendNotification(alert); err != nil {
// Log and return a generic error message
log.WithFields(logrus.Fields{
"error": err,
}).Error("Error sending notification")
http.Error(w, "Internal Server Error: Unable to send notification", http.StatusInternalServerError)
return
}
for _, incomingAlert := range payload.Alerts {
// If you want per-alert tagging, you can wrap aggregator logic in TagWrapper:
// e.g. Tag by alertname or severity
alertName := incomingAlert.Labels["alertname"]
pyroscope.TagWrapper(context.Background(),
pyroscope.Labels("alertname", alertName),
func(ctx context.Context) {
key := fingerprintAlert(incomingAlert)
alertsMu.Lock()
defer alertsMu.Unlock()
if incomingAlert.Status == string(model.AlertResolved) {
if _, exists := activeAlertsMap[key]; exists {
delete(activeAlertsMap, key)
aggregatorResolvedAlerts.Inc()
aggregatorActiveAlerts.Set(float64(len(activeAlertsMap)))
log.WithFields(logrus.Fields{"alert": alertName}).
Info("Resolved alert removed")
}
} else {
// Firing
if _, exists := activeAlertsMap[key]; exists {
// update existing alert
activeAlertsMap[key] = incomingAlert
aggregatorUpdatedAlerts.Inc()
log.WithFields(logrus.Fields{"alert": alertName}).
Info("Updated existing alert")
} else {
// new alert
activeAlertsMap[key] = incomingAlert
aggregatorNewAlerts.Inc()
aggregatorActiveAlerts.Set(float64(len(activeAlertsMap)))
log.WithFields(logrus.Fields{"alert": alertName}).
Info("New firing alert; sending notification")
// Send notification
if err := sendNotification(incomingAlert); err != nil {
httpRequestErrors.WithLabelValues(r.URL.Path, r.Method, "500").Inc()
logRequestWithStatus(r, http.StatusInternalServerError, logrus.Fields{"error": err}, time.Since(start))
http.Error(w, "Internal Server Error: Unable to send notification", http.StatusInternalServerError)
return
}
}
}
},
)
}
w.WriteHeader(http.StatusOK)
logRequestWithStatus(r, http.StatusOK, logrus.Fields{"msg": "POST request processed successfully"})
logRequestWithStatus(r, http.StatusOK, logrus.Fields{"msg": "POST / processed"}, time.Since(start))
}
// Function to send notification (external request)
func sendNotification(alert template.Alert) error {
formattedAlertTitle, formattedAlertBody, alertPriority, alertTags, alertTopic := FormattedAlert(alert)
log.WithFields(logrus.Fields{
"alert": formattedAlertTitle,
}).Info("Processing alert")
func WebhookHandler(w http.ResponseWriter, r *http.Request) {
switch r.Method {
case http.MethodPost:
handlePost(w, r)
default:
w.WriteHeader(http.StatusMethodNotAllowed)
logRequestWithStatus(r, http.StatusMethodNotAllowed, logrus.Fields{"msg": "method not allowed"}, 0)
}
}
url := fmt.Sprintf("%s/%s", os.Getenv("NTFY_SERVER_URL"), alertTopic)
req, err := http.NewRequest("POST", url, strings.NewReader(formattedAlertBody))
// --------------- Notification Code --------------- //
func sendNotification(alert template.Alert) error {
title, body, priority, tags, topic := FormattedAlert(alert)
ntfyURL := fmt.Sprintf("%s/%s", os.Getenv("NTFY_SERVER_URL"), topic)
req, err := http.NewRequest("POST", ntfyURL, strings.NewReader(body))
if err != nil {
return fmt.Errorf("building request failed: %w", err)
return fmt.Errorf("request build error: %w", err)
}
req.Header.Set("Title", formattedAlertTitle)
req.Header.Set("Priority", fmt.Sprintf("%d", alertPriority))
req.Header.Set("Title", title)
req.Header.Set("Priority", fmt.Sprintf("%d", priority))
req.Header.Set("Markdown", "yes")
req.Header.Set("Tags", alertTags)
req.Header.Set("Tags", tags)
username, password := os.Getenv("NTFY_USER"), os.Getenv("NTFY_PASS")
if password != "" {
req.SetBasicAuth(username, password)
} else if username != "" {
req.SetBasicAuth(username, "")
user, pass := os.Getenv("NTFY_USER"), os.Getenv("NTFY_PASS")
if pass != "" {
req.SetBasicAuth(user, pass)
} else if user != "" {
req.SetBasicAuth(user, "")
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("sending request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("notification sending failed with status %d", resp.StatusCode)
}
return nil
}
// WebhookHandler processes all incoming HTTP requests
func WebhookHandler(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodPost {
handlePost(w, r)
} else {
w.WriteHeader(http.StatusMethodNotAllowed)
logRequestWithStatus(r, http.StatusMethodNotAllowed, logrus.Fields{"msg": "Method not allowed"})
}
}
// --------------- Main --------------- //
func main() {
// Set logrus to output in JSON format
log.SetFormatter(&logrus.JSONFormatter{})
// 1. Logging
setLogLevel()
// 2. Start Pyroscope (continuous profiling)
startPyroscope()
// 3. Validate environment variables
for _, v := range []string{"HTTP_ADDRESS", "HTTP_PORT", "NTFY_SERVER_URL"} {
if len(strings.TrimSpace(os.Getenv(v))) == 0 {
panic("Environment variable " + v + " not set!")
}
}
_, err := url.Parse(os.Getenv("NTFY_SERVER_URL"))
if err != nil {
log.Fatal("Environment variable NTFY_SERVER_URL is not a valid URL")
if _, err := url.Parse(os.Getenv("NTFY_SERVER_URL")); err != nil {
log.Fatal("Environment variable NTFY_SERVER_URL is not valid:", err)
}
// 4. HTTP Handlers
http.HandleFunc("/", WebhookHandler)
listenAddr := fmt.Sprintf("%v:%v", os.Getenv("HTTP_ADDRESS"), os.Getenv("HTTP_PORT"))
// Prometheus metrics at /metrics
http.Handle("/metrics", promhttp.Handler())
// pprof is automatically at /debug/pprof/
// 5. Listen
listenAddr := fmt.Sprintf("%s:%s", os.Getenv("HTTP_ADDRESS"), os.Getenv("HTTP_PORT"))
log.WithFields(logrus.Fields{
"listen_address": listenAddr,
"ntfy_server_url": os.Getenv("NTFY_SERVER_URL"),
}).Info("Listening for HTTP requests (webhooks)")
log.Fatal(http.ListenAndServe(listenAddr, nil))
}