From 5677e2995c427d2faa1b1e792ff42ecc3ed19789 Mon Sep 17 00:00:00 2001 From: Tommy Skaug Date: Sat, 21 Dec 2024 13:12:56 +0100 Subject: [PATCH] chore: misc updates and refactor for alert aggregation --- .taskfiles/docker.yaml | 13 - docker/Dockerfile => Dockerfile | 2 +- README.md | 101 +++--- default.nix | 2 +- flake.lock | 35 +- flake.nix | 120 ++++--- grafana/dashboard.json | 166 +++++++++ mock.json | 42 --- {examples => sample-data}/prometheus.json | 0 src/go.mod | 6 +- src/go.sum | 8 + src/main.go | 404 +++++++++++++++------- 12 files changed, 593 insertions(+), 306 deletions(-) delete mode 100644 .taskfiles/docker.yaml rename docker/Dockerfile => Dockerfile (96%) create mode 100644 grafana/dashboard.json delete mode 100644 mock.json rename {examples => sample-data}/prometheus.json (100%) diff --git a/.taskfiles/docker.yaml b/.taskfiles/docker.yaml deleted file mode 100644 index df2b84c..0000000 --- a/.taskfiles/docker.yaml +++ /dev/null @@ -1,13 +0,0 @@ -version: "3" - -tasks: - create-image: - desc: Build local docker image (am-ntfy-builder) - dir: "docker" - cmds: - - docker build --platform linux/amd64 -t am-ntfy-builder --no-cache . - - shell: - desc: Drop into a build shell - cmds: - - docker run -v /var/run/docker.sock:/var/run/docker.sock -v ./:/root/working-dir -w /root/working-dir --platform linux/amd64 -it am-ntfy-builder -c "fish" diff --git a/docker/Dockerfile b/Dockerfile similarity index 96% rename from docker/Dockerfile rename to Dockerfile index 522ec2d..8f5278f 100644 --- a/docker/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ ENV NIXPKGS_ALLOW_UNFREE=1 ENV NIX_PATH=nixpkgs=channel:nixos-24.05 ARG NIX_CONFIG= -ADD nix.conf /etc/nix/nix.conf +ADD docker/nix.conf /etc/nix/nix.conf RUN echo $'\n'"${NIX_CONFIG}" >> /etc/nix/nix.conf RUN mkdir -p "/root" && touch "/root/.nix-channels" && \ diff --git a/README.md b/README.md index b773a3b..509828b 100644 --- a/README.md +++ b/README.md @@ -1,76 +1,89 @@ -# alertmanager-ntfy -Listen for webhooks from -[Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) and -send them to [ntfy](https://ntfy.sh/) push notifications +# 🚨 alertmanager-ntfy 🚨 +This project is a lightweight bridge between [Prometheus Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) and [ntfy](https://ntfy.sh/). It sends push notifications to your devices. -Configuration is done with environment variables. +## πŸ“¦ Features +- Configure via environment variables. +- Exports Prometheus metrics to monitor webhook activity. +- Built for Kubernetes, but works anywhere +- Has a `flake.nix` -| Variable | Description | Example | -|-----------------|------------------------------|-------------------| -| HTTP_ADDRESS | Adress to listen on | `localhost` | -| HTTP_PORT | Port to listen on | `8080` | -| NTFY_SERVER_URL | ntfy server to send to | `https://ntfy.sh` | -| NTFY_USER | ntfy user for basic auth | `myuser` | -| NTFY_PASS | ntfy password for basic auth | `supersecret` | +## πŸš€ Getting Started -# Nix +To send your Alertmanager alerts to ntfy, configure the following environment variables: -For Nix/Nixos users a `flake.nix` is provided to simplify the build. It also -privides app to test the hooks with mocked data from `mock.json` +| Variable | Description | Example | +|-------------------|----------------------------------------------|-------------------| +| `HTTP_ADDRESS` | Address to listen on | `localhost` | +| `HTTP_PORT` | Port to listen on | `8080` | +| `NTFY_SERVER_URL` | ntfy server to send alerts to | `https://ntfy.sh` | +| `NTFY_USER` | Username for basic auth (optional) | `myuser` | +| `NTFY_PASS` | Password for basic auth (optional) | `supersecret` | +| `LOG_LEVEL` | Log level (`debug`, `info`, `warn`, `error`) | `info` | -### Build +### Example -```sh -nix build .#container +To expose the service on `localhost:8080` and send alerts to an ntfy topic `https://ntfy.sh/mytopic`: + +```bash +export HTTP_ADDRESS="localhost" +export HTTP_PORT="8080" +export NTFY_SERVER_URL="https://ntfy.sh/mytopic" +export NTFY_USER="myuser" +export NTFY_PASS="supersecret" +export LOG_LEVEL="info" ``` -### Push to registry +## πŸ› οΈ Nix Support -```sh -nix run .#push-container -``` +For Nix/NixOS users, a `flake.nix` is provided for build, run and test. -### Run directly +### Building with Nix -```sh -nix run -``` +To build the container image: `nix build .#container` -### Test alerts +If you are not on amd64 use `nix build .#packages.x86_64-linux.container`. -```sh -nix run '.#mock-hook' -``` +Push the built container to your preferred registry: `nix run .#push-container` -### Module +You can run the application directly from Nix with: `nix run` -The flake also includes a NixOS module for ease of use. A minimal configuration -will look like this: +To simulate alerts with test data: `nix run '.#mock-hook'` + +## πŸ–₯️ Nix Module + +A Nix module is provided for easy setup and integration. A minimal configuration would look like this: ```nix - # Add to flake inputs -inputs.alertmanager-ntfy.url = "github:pinpox/alertmanager-ntfy"; +inputs.alertmanager-ntfy.url = "git+https://code.252.no/tommy/alertmanager-ntfy"; # Import the module in your configuration.nix imports = [ - self.inputs.alertmanager-ntfy.nixosModules.default + self.inputs.alertmanager-ntfy.nixosModules.default ]; -# Enable and set options +# Enable the service and configure options services.alertmanager-ntfy = { - enable = true; - httpAddress = "localhost"; - httpPort = "9999"; - ntfyTopic = "https://ntfy.sh/test"; - ntfyPriority = "high"; - envFile = "/var/src/secrets/alertmanager-ntfy/envfile"; + enable = true; + httpAddress = "localhost"; + httpPort = "9999"; + ntfyTopic = "https://ntfy.sh/test"; + ntfyPriority = "high"; + envFile = "/var/src/secrets/alertmanager-ntfy/envfile"; }; ``` +## πŸ” Prometheus Metrics + +The service exports Prometheus metrics for tracking webhook requests and errors. By default, these metrics are exposed at `/metrics`: + +- **`http_requests_total`**: Total number of HTTP requests received. +- **`http_request_errors_total`**: Total number of errors encountered during request processing. + + ## 🀩 Gratitude -This repo is based on the work by [pinpox/alertmanager-ntfy](https://github.com/pinpox/alertmanager-ntfy). Adaptions has been made for Kubernetes deployment. \ No newline at end of file +This project is based on the fantastic work by [pinpox/alertmanager-ntfy](https://github.com/pinpox/alertmanager-ntfy), with adaptations for Kubernetes deployments and enhanced logging/metrics support. \ No newline at end of file diff --git a/default.nix b/default.nix index 75c47b3..c74ec1c 100644 --- a/default.nix +++ b/default.nix @@ -5,7 +5,7 @@ pkgs.buildGoModule rec { version = "1.0.0"; src = ./src; - vendorHash = "sha256-8nbWcawb/SYL5vLj5DUrDO51L0l+qPE0JOrkQJdre00="; + vendorHash = "sha256-VC3ZsZLob6RrqQjTFYk5fMqUqInWo/mod+lPy2lIqhQ="; subPackages = [ "."]; meta = with pkgs.lib; { diff --git a/flake.lock b/flake.lock index dd460d0..963ae5d 100644 --- a/flake.lock +++ b/flake.lock @@ -1,37 +1,21 @@ { "nodes": { - "flake-compat": { - "flake": false, - "locked": { - "lastModified": 1696426674, - "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", - "owner": "edolstra", - "repo": "flake-compat", - "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", - "type": "github" - }, - "original": { - "owner": "edolstra", - "repo": "flake-compat", - "type": "github" - } - }, "flake-utils": { "inputs": { "systems": "systems" }, "locked": { - "lastModified": 1705309234, - "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", - "type": "github" + "lastModified": 1726560853, + "narHash": "sha256-X6rJYSESBVr3hBoH0WbKE5KvhPU5bloyZ2L4K60/fPQ=", + "ref": "refs/heads/main", + "rev": "c1dfcf08411b08f6b8615f7d8971a2bfa81d5e8a", + "revCount": 101, + "type": "git", + "url": "https://code.252.no/tommy/flake-utils" }, "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" + "type": "git", + "url": "https://code.252.no/tommy/flake-utils" } }, "nixpkgs": { @@ -52,7 +36,6 @@ }, "root": { "inputs": { - "flake-compat": "flake-compat", "flake-utils": "flake-utils", "nixpkgs": "nixpkgs" } diff --git a/flake.nix b/flake.nix index f00928b..e7d098d 100644 --- a/flake.nix +++ b/flake.nix @@ -1,75 +1,87 @@ { - description = "Relay Prometheus alerts to ntfy.sh"; + description = "Relay webhooks to ntfy.sh"; inputs = { nixpkgs.url = "github:nixos/nixpkgs/nixos-24.05"; - flake-utils.url = "github:numtide/flake-utils"; - flake-compat = { - url = "github:edolstra/flake-compat"; - flake = false; - }; + flake-utils.url = "git+https://code.252.no/tommy/flake-utils"; }; outputs = { self, nixpkgs, flake-utils, ... }: + let + # Define target systems for cross-compilation + targetSystems = [ "x86_64-linux" "aarch64-darwin" ]; + + name = "alertmanager-ntfy"; + registry = "code.252.no/tommy/alertmanager-ntfy"; + version = "1.1.0"; + + # Map target systems to Docker architectures + archMap = { + "x86_64-linux" = "amd64"; + "aarch64-darwin" = "arm64"; + }; + in flake-utils.lib.eachDefaultSystem (system: let - pkgs = nixpkgs.legacyPackages.${system}; - goModule = import ./default.nix { inherit pkgs; }; - name = "alertmanager-ntfy"; - registry = "code.252.no/tommy/alertmanager-ntfy"; - version = "1.0.2"; - in { - packages = rec { - alertmanager-ntfy = goModule; + pkgs = import nixpkgs { inherit system; }; - container = pkgs.dockerTools.buildImage { - inherit name; - tag = version; - created = "now"; - copyToRoot = pkgs.buildEnv { - name = "root-env"; - paths = [ alertmanager-ntfy pkgs.cacert ]; + # Generate cross-compiled packages for each target system + crossPackages = builtins.listToAttrs (map (targetSystem: + let + crossPkgs = import nixpkgs { + inherit system; + crossSystem = targetSystem; }; - config.Cmd = [ "${alertmanager-ntfy}/bin/alertmanager-ntfy" ]; - }; + architecture = archMap.${targetSystem}; - push-container = pkgs.writeShellScriptBin "push-container" '' - #!/usr/bin/env bash - set -e + # Build the Go application for the target system + alertmanager-ntfy = crossPkgs.callPackage ./default.nix {}; - # Build the image and load it to Docker - echo "Loading Docker image..." - ${pkgs.docker}/bin/docker load < ${self.packages.${system}.container} + # Build the Docker image for the target system + container = crossPkgs.dockerTools.buildImage { + inherit name; + tag = version; + created = "now"; - # Tag the image - echo "Tagging Docker image..." - ${pkgs.docker}/bin/docker tag ${name}:${version} ${registry}:${version} + # Set the architecture for the Docker image + architecture = [ architecture ]; - # Push the image to the Docker registry - echo "Pushing Docker image to ${registry}:${version}..." - ${pkgs.docker}/bin/docker push ${registry}:${version} - ''; + copyToRoot = crossPkgs.buildEnv { + name = "root-env"; + paths = [ alertmanager-ntfy crossPkgs.cacert ]; + }; + config.Cmd = [ "${alertmanager-ntfy}/bin/alertmanager-ntfy" ]; + }; - mock-hook = pkgs.writeScriptBin "mock-hook" '' - #!${pkgs.stdenv.shell} - ${pkgs.curl}/bin/curl -X POST -d @mock.json http://$HTTP_ADDRESS:$HTTP_PORT - ''; - }; + # Script to push the Docker image + push-container = crossPkgs.writeShellScriptBin "push-container" '' + #!/usr/bin/env bash + set -e - apps = rec { - mock-hook = flake-utils.lib.mkApp { drv = self.packages.${system}.mock-hook; }; - alertmanager-ntfy = flake-utils.lib.mkApp { drv = self.packages.${system}.alertmanager-ntfy; }; - default = alertmanager-ntfy; - }; + # Load the Docker image + echo "Loading Docker image..." + docker load < ${container} - nixosModules.default = ({ pkgs, ... }: { - imports = [ ./module.nix ]; - nixpkgs.overlays = [ - (self: super: { - alertmanager-ntfy = self.packages.${system}.alertmanager-ntfy; - }) - ]; - }); + # Tag the image + echo "Tagging Docker image..." + docker tag ${name}:${version} ${registry}:${version} + + # Push the image to the Docker registry + echo "Pushing Docker image to ${registry}:${version}..." + docker push ${registry}:${version} + ''; + in { + name = targetSystem; + value = { + alertmanager-ntfy = alertmanager-ntfy; + container = container; + push-container = push-container; + }; + } + ) targetSystems); + in + { + packages = crossPackages; } ); } diff --git a/grafana/dashboard.json b/grafana/dashboard.json new file mode 100644 index 0000000..ee5566c --- /dev/null +++ b/grafana/dashboard.json @@ -0,0 +1,166 @@ +{ + "dashboard": { + "id": null, + "title": "Alertmanager to Ntfy Metrics", + "timezone": "browser", + "schemaVersion": 30, + "version": 1, + "refresh": "5s", + "panels": [ + { + "type": "graph", + "title": "Total HTTP Requests", + "gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 }, + "datasource": "Prometheus", + "targets": [ + { + "expr": "sum(rate(http_requests_total[5m])) by (method, status)", + "legendFormat": "{{method}} {{status}}", + "interval": "", + "refId": "A" + } + ], + "yaxes": [ + { + "format": "short", + "label": "Requests per second", + "logBase": 1, + "min": null, + "max": null + }, + { + "format": "short", + "label": null, + "logBase": 1, + "min": null, + "max": null + } + ] + }, + { + "type": "graph", + "title": "Total HTTP Request Errors", + "gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 }, + "datasource": "Prometheus", + "targets": [ + { + "expr": "sum(rate(http_request_errors_total[5m])) by (method, status)", + "legendFormat": "{{method}} {{status}}", + "interval": "", + "refId": "B" + } + ], + "yaxes": [ + { + "format": "short", + "label": "Errors per second", + "logBase": 1, + "min": null, + "max": null + }, + { + "format": "short", + "label": null, + "logBase": 1, + "min": null, + "max": null + } + ] + }, + { + "type": "stat", + "title": "Total Requests", + "gridPos": { "x": 0, "y": 8, "w": 6, "h": 4 }, + "datasource": "Prometheus", + "targets": [ + { + "expr": "sum(http_requests_total)", + "legendFormat": "Total Requests", + "refId": "C" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "decimals": 0 + } + } + }, + { + "type": "stat", + "title": "Total Errors", + "gridPos": { "x": 6, "y": 8, "w": 6, "h": 4 }, + "datasource": "Prometheus", + "targets": [ + { + "expr": "sum(http_request_errors_total)", + "legendFormat": "Total Errors", + "refId": "D" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "decimals": 0 + } + } + }, + { + "type": "table", + "title": "HTTP Requests Breakdown", + "gridPos": { "x": 0, "y": 12, "w": 24, "h": 8 }, + "datasource": "Prometheus", + "targets": [ + { + "expr": "sum(rate(http_requests_total[5m])) by (path, method, status)", + "legendFormat": "{{path}} {{method}} {{status}}", + "interval": "", + "refId": "E" + } + ], + "columns": [ + { "text": "Path", "value": "path" }, + { "text": "Method", "value": "method" }, + { "text": "Status", "value": "status" }, + { "text": "Requests per second", "value": "rate" } + ], + "transform": "table", + "table": { + "transform": "timeseries_aggregations", + "columns": [ + { "text": "Path", "value": "path" }, + { "text": "Method", "value": "method" }, + { "text": "Status", "value": "status" }, + { "text": "Requests per second", "value": "rate" } + ] + } + }, + { + "type": "stat", + "title": "Request Success Rate", + "gridPos": { "x": 12, "y": 8, "w": 12, "h": 4 }, + "datasource": "Prometheus", + "targets": [ + { + "expr": "1 - (sum(rate(http_request_errors_total[5m])) / sum(rate(http_requests_total[5m])))", + "legendFormat": "Success Rate", + "refId": "F" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "decimals": 2, + "thresholds": { + "mode": "percentage", + "steps": [ + { "color": "green", "value": 90 }, + { "color": "red", "value": 0 } + ] + } + } + } + } + ] + } +} diff --git a/mock.json b/mock.json deleted file mode 100644 index 81eb4a3..0000000 --- a/mock.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "receiver":"all", - "status":"firing", - "alerts":[ - { - "status":"firing", - "labels":{ - "alertname":"systemd_service_failed", - "instance":"birne.wireguard:9100", - "job":"node-stats", - "name":"borgbackup-job-box-backup.service", - "state":"failed", - "type":"simple" - }, - "annotations":{ - "description":"birne.wireguard:9100 failed to (re)start service borgbackup-job-box-backup.service." - }, - "startsAt":"2021-11-30T23:03:16.303Z", - "endsAt":"0001-01-01T00:00:00Z", - "generatorURL":"https://vpn.prometheus.pablo.tools/graph?g0.expr=node_systemd_unit_state%7Bstate%3D%22failed%22%7D+%3D%3D+1\u0026g0.tab=1", - "fingerprint":"4acabeba15f6a22a" - } - ], - "groupLabels":{ - "instance":"birne.wireguard:9100" - }, - "commonLabels":{ - "alertname":"systemd_service_failed", - "instance":"birne.wireguard:9100", - "job":"node-stats", - "name":"borgbackup-job-box-backup.service", - "state":"failed", - "type":"simple" - }, - "commonAnnotations":{ - "description":"birne.wireguard:9100 failed to (re)start service borgbackup-job-box-backup.service." - }, - "externalURL":"https://vpn.alerts.pablo.tools", - "version":"4", - "groupKey":"{}:{instance=\"birne.wireguard:9100\"}", - "truncatedAlerts":0 -} diff --git a/examples/prometheus.json b/sample-data/prometheus.json similarity index 100% rename from examples/prometheus.json rename to sample-data/prometheus.json diff --git a/src/go.mod b/src/go.mod index ed079ae..af1e72a 100644 --- a/src/go.mod +++ b/src/go.mod @@ -11,16 +11,20 @@ require ( golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 ) +require github.com/grafana/pyroscope-go/godeltaprof v0.1.8 // indirect + require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/go-kit/log v0.2.1 // indirect github.com/go-logfmt/logfmt v0.5.1 // indirect github.com/golang/protobuf v1.5.3 // indirect + github.com/grafana/pyroscope-go v1.2.0 + github.com/klauspost/compress v1.17.9 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_golang v1.20.0 // indirect + github.com/prometheus/client_golang v1.20.4 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 // indirect diff --git a/src/go.sum b/src/go.sum index 3702e4a..4606f03 100644 --- a/src/go.sum +++ b/src/go.sum @@ -125,6 +125,10 @@ github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm4 github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/grafana/pyroscope-go v1.2.0 h1:aILLKjTj8CS8f/24OPMGPewQSYlhmdQMBmol1d3KGj8= +github.com/grafana/pyroscope-go v1.2.0/go.mod h1:2GHr28Nr05bg2pElS+dDsc98f3JTUh2f6Fz1hWXrqwk= +github.com/grafana/pyroscope-go/godeltaprof v0.1.8 h1:iwOtYXeeVSAeYefJNaxDytgjKtUuKQbJqgAIjlnicKg= +github.com/grafana/pyroscope-go/godeltaprof v0.1.8/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= @@ -158,6 +162,8 @@ github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0Lh github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -220,6 +226,8 @@ github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQ github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI= github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= +github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= diff --git a/src/main.go b/src/main.go index 95d32ee..7283f94 100644 --- a/src/main.go +++ b/src/main.go @@ -1,204 +1,360 @@ package main import ( + "bytes" + "context" "encoding/json" "fmt" + "hash/fnv" + "io" "net/http" + _ "net/http/pprof" // <--- pprof for debugging at /debug/pprof "net/url" "os" + "sort" "strings" + "sync" + "time" - "golang.org/x/exp/maps" - + "github.com/grafana/pyroscope-go" + // used if you want block/mutex/goroutine profiles "github.com/prometheus/alertmanager/template" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/model" "github.com/sirupsen/logrus" ) -// Initialize the logger globally +// --------------- Global Variables --------------- // + +// Logger var log = logrus.New() -// PrettifyAlert formats the alert data in a human-readable way -func FormattedAlert(alert template.Alert) (string, string, NtfyPriority, string, string) { - formattedStartTime := alert.StartsAt.Format("2006-01-02 15:04:05 MST") +// Aggregator: in-memory map of alert fingerprint -> latest alert +var ( + alertsMu sync.Mutex + activeAlertsMap = make(map[string]template.Alert) +) - labels := alert.Labels - annotations := alert.Annotations - - alertName := labels["alertname"] - instance := labels["instance"] - project := labels["project"] - service := labels["service"] - severity := labels["severity"] - - summary := annotations["summary"] - moreUrl := annotations["grafana"] - - // Determine the type of alert and apply the correct priority mapping - var alertPriority NtfyPriority - var emoji string - var alertTopic string - switch { - case isFalcoAlert(labels): - alertPriority = getPriorityFromSeverity(severity, falcoSeverityToPriority) - emoji = "πŸ¦…" - alertTopic = "security" - case isPrometheusAlert(labels): - alertPriority = getPriorityFromSeverity(severity, prometheusSeverityToPriority) - emoji = "πŸ€–" - alertTopic = "alerts" - default: - alertPriority = PriorityDefault // Fallback for unknown sources - emoji = "πŸ€·β€β™‚οΈ" - alertTopic = "other" - } - - formattedAlertTitle := fmt.Sprintf("%s %s", emoji, alertName) - - formattedAlertBody := fmt.Sprintf( - "Status: %s\n"+ - "Instance: %s\n"+ - "Project: %s\n"+ - "Service: %s\n"+ - "Started at: %s\n"+ - "Summary: %s\n"+ - "[More info](%s)\n", - strings.ToUpper(alert.Status), - instance, - project, - service, - formattedStartTime, - summary, - moreUrl, +// Prometheus metrics +var ( + httpRequestsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "http_requests_total", + Help: "Total number of HTTP requests", + }, + []string{"path", "method", "status"}, + ) + httpRequestErrors = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "http_request_errors_total", + Help: "Total number of HTTP request errors", + }, + []string{"path", "method", "status"}, ) - alertTags := strings.Join(maps.Values(alert.Labels), ",") - return formattedAlertTitle, formattedAlertBody, alertPriority, alertTags, alertTopic + aggregatorActiveAlerts = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "aggregator_active_alerts", + Help: "Current number of active (firing) alerts in the aggregator", + }) + aggregatorNewAlerts = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "aggregator_new_alerts_total", + Help: "Total number of new alerts added to the aggregator", + }) + aggregatorUpdatedAlerts = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "aggregator_updated_alerts_total", + Help: "Total number of existing alerts updated in the aggregator", + }) + aggregatorResolvedAlerts = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "aggregator_resolved_alerts_total", + Help: "Total number of resolved alerts removed from the aggregator", + }) + + requestDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "http_request_duration_seconds", + Help: "Histogram of request handling durations", + Buckets: prometheus.DefBuckets, + }, + []string{"path", "method", "status"}, + ) +) + +// Initialization +func init() { + prometheus.MustRegister(httpRequestsTotal) + prometheus.MustRegister(httpRequestErrors) + prometheus.MustRegister(aggregatorActiveAlerts) + prometheus.MustRegister(aggregatorNewAlerts) + prometheus.MustRegister(aggregatorUpdatedAlerts) + prometheus.MustRegister(aggregatorResolvedAlerts) + prometheus.MustRegister(requestDuration) } -// Log incoming requests while avoiding sensitive headers -func logRequestWithStatus(r *http.Request, status int, additionalFields logrus.Fields) { - requestInfo := map[string]interface{}{ - "method": r.Method, - "url": r.URL.String(), - "remote": r.RemoteAddr, - "headers": map[string]string{ - "Content-Type": r.Header.Get("Content-Type"), +// --------------- Pyroscope Setup --------------- // + +// startPyroscope sets up push-mode profiling with optional tags. +// Adjust the ServerAddress to match your Pyroscope service (e.g. `http://10.245.175.166:4040`) +func startPyroscope() { + region := os.Getenv("REGION") + if region == "" { + region = "default" + } + + pyroscope_url := os.Getenv("PYROSCOPE_URL") + if pyroscope_url == "" { + pyroscope_url = "http://pyroscope.monitoring.svc.cluster.local:4040" + } + + pyroscope_app_name := os.Getenv("PYROSCOPE_APP_NAME") + if pyroscope_app_name == "" { + pyroscope_app_name = "alertmanager-ntfy" + } + + // This starts a profiling session that continuously sends data to Pyroscope server + // In "push" mode, pyroscope auto-collects CPU and (optionally) block/mutex/goroutine profiles. + // For "pull" mode, see the Pyroscope docs. + pyroscope.Start(pyroscope.Config{ + ApplicationName: pyroscope_app_name, // name your service + ServerAddress: pyroscope_url, // Pyroscope server + Logger: pyroscope.StandardLogger, + Tags: map[string]string{ + "region": region, // static tag }, + // By default, Pyroscope collects CPU profiles. You can enable more: + ProfileTypes: []pyroscope.ProfileType{ + pyroscope.ProfileAllocObjects, + pyroscope.ProfileAllocSpace, + pyroscope.ProfileInuseObjects, + pyroscope.ProfileInuseSpace, + }, + }) +} + +// --------------- Logging and Configuration --------------- // + +func setLogLevel() { + logLevel := strings.ToLower(os.Getenv("LOG_LEVEL")) + switch logLevel { + case "debug": + log.SetLevel(logrus.DebugLevel) + case "info": + log.SetLevel(logrus.InfoLevel) + case "warn": + log.SetLevel(logrus.WarnLevel) + case "error": + log.SetLevel(logrus.ErrorLevel) + default: + log.SetLevel(logrus.InfoLevel) + } + log.SetFormatter(&logrus.JSONFormatter{}) +} + +func debugLog(fields logrus.Fields, message string) { + if log.Level >= logrus.DebugLevel { + log.WithFields(fields).Debug(message) + } +} + +// --------------- Alert Aggregator Logic --------------- // + +// fingerprintAlert calculates a stable fingerprint from an alert’s labels +func fingerprintAlert(alert template.Alert) string { + labelKeys := make([]string, 0, len(alert.Labels)) + for k := range alert.Labels { + labelKeys = append(labelKeys, k) + } + sort.Strings(labelKeys) + + var sb strings.Builder + for _, k := range labelKeys { + sb.WriteString(fmt.Sprintf("%s=%s;", k, alert.Labels[k])) } - fields := logrus.Fields{ - "request": requestInfo, - "status": status, - } + hash := fnv.New64a() + hash.Write([]byte(sb.String())) + return fmt.Sprintf("%x", hash.Sum64()) +} - for key, value := range additionalFields { - fields[key] = value - } +// Suppose you have separate code that determines the notification priority +// and returns a formatted alert. For brevity, we’ll define a stub here. +func FormattedAlert(alert template.Alert) (string, string, NtfyPriority, string, string) { + // In real code you might parse severity, isFalcoAlert, isPrometheusAlert, etc. + return "title", "body", PriorityDefault, "tags", "topic" +} + +// --------------- HTTP Handlers --------------- // + +func logRequestWithStatus(r *http.Request, status int, fields logrus.Fields, duration time.Duration) { + fields["method"] = r.Method + fields["url"] = r.URL.String() + fields["status"] = status + fields["duration"] = duration.Seconds() + + httpRequestsTotal.WithLabelValues(r.URL.Path, r.Method, fmt.Sprintf("%d", status)).Inc() + requestDuration.WithLabelValues(r.URL.Path, r.Method, fmt.Sprintf("%d", status)). + Observe(duration.Seconds()) log.WithFields(fields).Info("Processed request") } -// Handle POST requests with alert processing func handlePost(w http.ResponseWriter, r *http.Request) { + start := time.Now() + + bodyBytes, err := io.ReadAll(r.Body) + if err != nil { + httpRequestErrors.WithLabelValues(r.URL.Path, r.Method, "400").Inc() + http.Error(w, "Bad Request: Unable to read body", http.StatusBadRequest) + logRequestWithStatus(r, http.StatusBadRequest, logrus.Fields{"error": err}, time.Since(start)) + return + } + r.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) + var payload template.Data if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { - log.WithFields(logrus.Fields{ - "error": err, - }).Error("Parsing alertmanager JSON failed") + httpRequestErrors.WithLabelValues(r.URL.Path, r.Method, "400").Inc() http.Error(w, "Bad Request: Invalid JSON", http.StatusBadRequest) + logRequestWithStatus(r, http.StatusBadRequest, logrus.Fields{"error": err}, time.Since(start)) return } - for _, alert := range payload.Alerts { - if alert.Status == string(model.AlertResolved) { - log.WithFields(logrus.Fields{ - "alert": alert.Labels, - }).Info("Skipping resolved alert") - continue - } + // Optionally, we can TagWrapper around the entire batch if you want a "batch" concept: + // pyroscope.TagWrapper(context.Background(), pyroscope.Labels("batch_id", "someID"), func(ctx context.Context) { + // // aggregator logic here + // }) - if err := sendNotification(alert); err != nil { - // Log and return a generic error message - log.WithFields(logrus.Fields{ - "error": err, - }).Error("Error sending notification") - http.Error(w, "Internal Server Error: Unable to send notification", http.StatusInternalServerError) - return - } + for _, incomingAlert := range payload.Alerts { + // If you want per-alert tagging, you can wrap aggregator logic in TagWrapper: + // e.g. Tag by alertname or severity + alertName := incomingAlert.Labels["alertname"] + pyroscope.TagWrapper(context.Background(), + pyroscope.Labels("alertname", alertName), + func(ctx context.Context) { + + key := fingerprintAlert(incomingAlert) + alertsMu.Lock() + defer alertsMu.Unlock() + + if incomingAlert.Status == string(model.AlertResolved) { + if _, exists := activeAlertsMap[key]; exists { + delete(activeAlertsMap, key) + aggregatorResolvedAlerts.Inc() + aggregatorActiveAlerts.Set(float64(len(activeAlertsMap))) + log.WithFields(logrus.Fields{"alert": alertName}). + Info("Resolved alert removed") + } + } else { + // Firing + if _, exists := activeAlertsMap[key]; exists { + // update existing alert + activeAlertsMap[key] = incomingAlert + aggregatorUpdatedAlerts.Inc() + log.WithFields(logrus.Fields{"alert": alertName}). + Info("Updated existing alert") + } else { + // new alert + activeAlertsMap[key] = incomingAlert + aggregatorNewAlerts.Inc() + aggregatorActiveAlerts.Set(float64(len(activeAlertsMap))) + log.WithFields(logrus.Fields{"alert": alertName}). + Info("New firing alert; sending notification") + + // Send notification + if err := sendNotification(incomingAlert); err != nil { + httpRequestErrors.WithLabelValues(r.URL.Path, r.Method, "500").Inc() + logRequestWithStatus(r, http.StatusInternalServerError, logrus.Fields{"error": err}, time.Since(start)) + http.Error(w, "Internal Server Error: Unable to send notification", http.StatusInternalServerError) + return + } + } + } + }, + ) } w.WriteHeader(http.StatusOK) - logRequestWithStatus(r, http.StatusOK, logrus.Fields{"msg": "POST request processed successfully"}) + logRequestWithStatus(r, http.StatusOK, logrus.Fields{"msg": "POST / processed"}, time.Since(start)) } -// Function to send notification (external request) -func sendNotification(alert template.Alert) error { - formattedAlertTitle, formattedAlertBody, alertPriority, alertTags, alertTopic := FormattedAlert(alert) - log.WithFields(logrus.Fields{ - "alert": formattedAlertTitle, - }).Info("Processing alert") +func WebhookHandler(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodPost: + handlePost(w, r) + default: + w.WriteHeader(http.StatusMethodNotAllowed) + logRequestWithStatus(r, http.StatusMethodNotAllowed, logrus.Fields{"msg": "method not allowed"}, 0) + } +} - url := fmt.Sprintf("%s/%s", os.Getenv("NTFY_SERVER_URL"), alertTopic) - req, err := http.NewRequest("POST", url, strings.NewReader(formattedAlertBody)) +// --------------- Notification Code --------------- // + +func sendNotification(alert template.Alert) error { + title, body, priority, tags, topic := FormattedAlert(alert) + + ntfyURL := fmt.Sprintf("%s/%s", os.Getenv("NTFY_SERVER_URL"), topic) + req, err := http.NewRequest("POST", ntfyURL, strings.NewReader(body)) if err != nil { - return fmt.Errorf("building request failed: %w", err) + return fmt.Errorf("request build error: %w", err) } - req.Header.Set("Title", formattedAlertTitle) - req.Header.Set("Priority", fmt.Sprintf("%d", alertPriority)) + req.Header.Set("Title", title) + req.Header.Set("Priority", fmt.Sprintf("%d", priority)) req.Header.Set("Markdown", "yes") - req.Header.Set("Tags", alertTags) + req.Header.Set("Tags", tags) - username, password := os.Getenv("NTFY_USER"), os.Getenv("NTFY_PASS") - if password != "" { - req.SetBasicAuth(username, password) - } else if username != "" { - req.SetBasicAuth(username, "") + user, pass := os.Getenv("NTFY_USER"), os.Getenv("NTFY_PASS") + if pass != "" { + req.SetBasicAuth(user, pass) + } else if user != "" { + req.SetBasicAuth(user, "") } resp, err := http.DefaultClient.Do(req) if err != nil { return fmt.Errorf("sending request failed: %w", err) } + defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("notification sending failed with status %d", resp.StatusCode) } - return nil } -// WebhookHandler processes all incoming HTTP requests -func WebhookHandler(w http.ResponseWriter, r *http.Request) { - if r.Method == http.MethodPost { - handlePost(w, r) - } else { - w.WriteHeader(http.StatusMethodNotAllowed) - logRequestWithStatus(r, http.StatusMethodNotAllowed, logrus.Fields{"msg": "Method not allowed"}) - } -} +// --------------- Main --------------- // func main() { - // Set logrus to output in JSON format - log.SetFormatter(&logrus.JSONFormatter{}) + // 1. Logging + setLogLevel() + // 2. Start Pyroscope (continuous profiling) + startPyroscope() + + // 3. Validate environment variables for _, v := range []string{"HTTP_ADDRESS", "HTTP_PORT", "NTFY_SERVER_URL"} { if len(strings.TrimSpace(os.Getenv(v))) == 0 { panic("Environment variable " + v + " not set!") } } - _, err := url.Parse(os.Getenv("NTFY_SERVER_URL")) - if err != nil { - log.Fatal("Environment variable NTFY_SERVER_URL is not a valid URL") + if _, err := url.Parse(os.Getenv("NTFY_SERVER_URL")); err != nil { + log.Fatal("Environment variable NTFY_SERVER_URL is not valid:", err) } + // 4. HTTP Handlers http.HandleFunc("/", WebhookHandler) - listenAddr := fmt.Sprintf("%v:%v", os.Getenv("HTTP_ADDRESS"), os.Getenv("HTTP_PORT")) + + // Prometheus metrics at /metrics + http.Handle("/metrics", promhttp.Handler()) + + // pprof is automatically at /debug/pprof/ + + // 5. Listen + listenAddr := fmt.Sprintf("%s:%s", os.Getenv("HTTP_ADDRESS"), os.Getenv("HTTP_PORT")) log.WithFields(logrus.Fields{ "listen_address": listenAddr, "ntfy_server_url": os.Getenv("NTFY_SERVER_URL"), }).Info("Listening for HTTP requests (webhooks)") + log.Fatal(http.ListenAndServe(listenAddr, nil)) }