mirror of
https://github.com/TwiN/gatus.git
synced 2024-12-14 11:58:04 +00:00
feat(alerting): Persist triggered alerts across application restart (#764)
* feat(alerting): Persist triggered alerts across application restart Fixes #679 * test(alerting): Add numerous tests related to alerts
This commit is contained in:
parent
9d151fcdb4
commit
f2c5f5911c
13 changed files with 822 additions and 72 deletions
|
@ -1,7 +1,10 @@
|
|||
package alert
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
|
@ -26,6 +29,9 @@ type Alert struct {
|
|||
// FailureThreshold is the number of failures in a row needed before triggering the alert
|
||||
FailureThreshold int `yaml:"failure-threshold"`
|
||||
|
||||
// SuccessThreshold defines how many successful executions must happen in a row before an ongoing incident is marked as resolved
|
||||
SuccessThreshold int `yaml:"success-threshold"`
|
||||
|
||||
// Description of the alert. Will be included in the alert sent.
|
||||
//
|
||||
// This is a pointer, because it is populated by YAML and we need to know whether it was explicitly set to a value
|
||||
|
@ -38,9 +44,6 @@ type Alert struct {
|
|||
// or not for provider.ParseWithDefaultAlert to work. Use Alert.IsSendingOnResolved() for a non-pointer
|
||||
SendOnResolved *bool `yaml:"send-on-resolved"`
|
||||
|
||||
// SuccessThreshold defines how many successful executions must happen in a row before an ongoing incident is marked as resolved
|
||||
SuccessThreshold int `yaml:"success-threshold"`
|
||||
|
||||
// ResolveKey is an optional field that is used by some providers (i.e. PagerDuty's dedup_key) to resolve
|
||||
// ongoing/triggered incidents
|
||||
ResolveKey string `yaml:"-"`
|
||||
|
@ -94,3 +97,17 @@ func (alert *Alert) IsSendingOnResolved() bool {
|
|||
}
|
||||
return *alert.SendOnResolved
|
||||
}
|
||||
|
||||
// Checksum returns a checksum of the alert
|
||||
// Used to determine which persisted triggered alert should be deleted on application start
|
||||
func (alert *Alert) Checksum() string {
|
||||
hash := sha256.New()
|
||||
hash.Write([]byte(string(alert.Type) + "_" +
|
||||
strconv.FormatBool(alert.IsEnabled()) + "_" +
|
||||
strconv.FormatBool(alert.IsSendingOnResolved()) + "_" +
|
||||
strconv.Itoa(alert.SuccessThreshold) + "_" +
|
||||
strconv.Itoa(alert.FailureThreshold) + "_" +
|
||||
alert.GetDescription()),
|
||||
)
|
||||
return hex.EncodeToString(hash.Sum(nil))
|
||||
}
|
||||
|
|
|
@ -84,3 +84,109 @@ func TestAlert_IsSendingOnResolved(t *testing.T) {
|
|||
t.Error("alert.IsSendingOnResolved() should've returned true, because SendOnResolved was set to true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlert_Checksum(t *testing.T) {
|
||||
description1, description2 := "a", "b"
|
||||
yes, no := true, false
|
||||
scenarios := []struct {
|
||||
name string
|
||||
alert Alert
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "barebone",
|
||||
alert: Alert{
|
||||
Type: TypeDiscord,
|
||||
},
|
||||
expected: "fed0580e44ed5701dbba73afa1f14b2c53ca5a7b8067a860441c212916057fe3",
|
||||
},
|
||||
{
|
||||
name: "with-description-1",
|
||||
alert: Alert{
|
||||
Type: TypeDiscord,
|
||||
Description: &description1,
|
||||
},
|
||||
expected: "005f407ebe506e74a4aeb46f74c28b376debead7011e1b085da3840f72ba9707",
|
||||
},
|
||||
{
|
||||
name: "with-description-2",
|
||||
alert: Alert{
|
||||
Type: TypeDiscord,
|
||||
Description: &description2,
|
||||
},
|
||||
expected: "3c2c4a9570cdc614006993c21f79a860a7f5afea10cf70d1a79d3c49342ef2c8",
|
||||
},
|
||||
{
|
||||
name: "with-description-2-and-enabled-false",
|
||||
alert: Alert{
|
||||
Type: TypeDiscord,
|
||||
Enabled: &no,
|
||||
Description: &description2,
|
||||
},
|
||||
expected: "837945c2b4cd5e961db3e63e10c348d4f1c3446ba68cf5a48e35a1ae22cf0c22",
|
||||
},
|
||||
{
|
||||
name: "with-description-2-and-enabled-true",
|
||||
alert: Alert{
|
||||
Type: TypeDiscord,
|
||||
Enabled: &yes, // it defaults to true if not set, but just to make sure
|
||||
Description: &description2,
|
||||
},
|
||||
expected: "3c2c4a9570cdc614006993c21f79a860a7f5afea10cf70d1a79d3c49342ef2c8",
|
||||
},
|
||||
{
|
||||
name: "with-description-2-and-enabled-true-and-send-on-resolved-true",
|
||||
alert: Alert{
|
||||
Type: TypeDiscord,
|
||||
Enabled: &yes,
|
||||
SendOnResolved: &yes,
|
||||
Description: &description2,
|
||||
},
|
||||
expected: "bf1436995a880eb4a352c74c5dfee1f1b5ff6b9fc55aef9bf411b3631adfd80c",
|
||||
},
|
||||
{
|
||||
name: "with-description-2-and-failure-threshold-7",
|
||||
alert: Alert{
|
||||
Type: TypeSlack,
|
||||
FailureThreshold: 7,
|
||||
Description: &description2,
|
||||
},
|
||||
expected: "8bd479e18bda393d4c924f5a0d962e825002168dedaa88b445e435db7bacffd3",
|
||||
},
|
||||
{
|
||||
name: "with-description-2-and-failure-threshold-9",
|
||||
alert: Alert{
|
||||
Type: TypeSlack,
|
||||
FailureThreshold: 9,
|
||||
Description: &description2,
|
||||
},
|
||||
expected: "5abdfce5236e344996d264d526e769c07cb0d3d329a999769a1ff84b157ca6f1",
|
||||
},
|
||||
{
|
||||
name: "with-description-2-and-success-threshold-5",
|
||||
alert: Alert{
|
||||
Type: TypeSlack,
|
||||
SuccessThreshold: 7,
|
||||
Description: &description2,
|
||||
},
|
||||
expected: "c0000e73626b80e212cfc24830de7094568f648e37f3e16f9e68c7f8ef75c34c",
|
||||
},
|
||||
{
|
||||
name: "with-description-2-and-success-threshold-1",
|
||||
alert: Alert{
|
||||
Type: TypeSlack,
|
||||
SuccessThreshold: 1,
|
||||
Description: &description2,
|
||||
},
|
||||
expected: "5c28963b3a76104cfa4a0d79c89dd29ec596c8cfa4b1af210ec83d6d41587b5f",
|
||||
},
|
||||
}
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(scenario.name, func(t *testing.T) {
|
||||
scenario.alert.ValidateAndSetDefaults()
|
||||
if checksum := scenario.alert.Checksum(); checksum != scenario.expected {
|
||||
t.Errorf("expected checksum %v, got %v", scenario.expected, checksum)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,6 +64,12 @@ func TestCreateExternalEndpointResult(t *testing.T) {
|
|||
AuthorizationHeaderBearerToken: "Bearer token",
|
||||
ExpectedCode: 404,
|
||||
},
|
||||
{
|
||||
Name: "bad-success-value",
|
||||
Path: "/api/v1/endpoints/g_n/external?success=invalid",
|
||||
AuthorizationHeaderBearerToken: "Bearer token",
|
||||
ExpectedCode: 400,
|
||||
},
|
||||
{
|
||||
Name: "good-token-success-true",
|
||||
Path: "/api/v1/endpoints/g_n/external?success=true",
|
||||
|
|
|
@ -245,16 +245,13 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) {
|
|||
if config == nil || config.Endpoints == nil || len(config.Endpoints) == 0 {
|
||||
err = ErrNoEndpointInConfig
|
||||
} else {
|
||||
validateAlertingConfig(config.Alerting, config.Endpoints, config.Debug)
|
||||
validateAlertingConfig(config.Alerting, config.Endpoints, config.ExternalEndpoints, config.Debug)
|
||||
if err := validateSecurityConfig(config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := validateEndpointsConfig(config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := validateExternalEndpointsConfig(config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := validateWebConfig(config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -338,28 +335,37 @@ func validateWebConfig(config *Config) error {
|
|||
}
|
||||
|
||||
func validateEndpointsConfig(config *Config) error {
|
||||
duplicateValidationMap := make(map[string]bool)
|
||||
// Validate endpoints
|
||||
for _, ep := range config.Endpoints {
|
||||
if config.Debug {
|
||||
log.Printf("[config.validateEndpointsConfig] Validating endpoint '%s'", ep.Name)
|
||||
}
|
||||
if endpointKey := ep.Key(); duplicateValidationMap[endpointKey] {
|
||||
return fmt.Errorf("invalid endpoint %s: name and group combination must be unique", ep.Key())
|
||||
} else {
|
||||
duplicateValidationMap[endpointKey] = true
|
||||
}
|
||||
if err := ep.ValidateAndSetDefaults(); err != nil {
|
||||
return fmt.Errorf("invalid endpoint %s: %w", ep.DisplayName(), err)
|
||||
return fmt.Errorf("invalid endpoint %s: %w", ep.Key(), err)
|
||||
}
|
||||
}
|
||||
log.Printf("[config.validateEndpointsConfig] Validated %d endpoints", len(config.Endpoints))
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateExternalEndpointsConfig(config *Config) error {
|
||||
// Validate external endpoints
|
||||
for _, ee := range config.ExternalEndpoints {
|
||||
if config.Debug {
|
||||
log.Printf("[config.validateExternalEndpointsConfig] Validating external endpoint '%s'", ee.Name)
|
||||
log.Printf("[config.validateEndpointsConfig] Validating external endpoint '%s'", ee.Name)
|
||||
}
|
||||
if endpointKey := ee.Key(); duplicateValidationMap[endpointKey] {
|
||||
return fmt.Errorf("invalid external endpoint %s: name and group combination must be unique", ee.Key())
|
||||
} else {
|
||||
duplicateValidationMap[endpointKey] = true
|
||||
}
|
||||
if err := ee.ValidateAndSetDefaults(); err != nil {
|
||||
return fmt.Errorf("invalid external endpoint %s: %w", ee.DisplayName(), err)
|
||||
return fmt.Errorf("invalid external endpoint %s: %w", ee.Key(), err)
|
||||
}
|
||||
}
|
||||
log.Printf("[config.validateExternalEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints))
|
||||
log.Printf("[config.validateEndpointsConfig] Validated %d external endpoints", len(config.ExternalEndpoints))
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -382,7 +388,7 @@ func validateSecurityConfig(config *Config) error {
|
|||
// Note that the alerting configuration has to be validated before the endpoint configuration, because the default alert
|
||||
// returned by provider.AlertProvider.GetDefaultAlert() must be parsed before endpoint.Endpoint.ValidateAndSetDefaults()
|
||||
// sets the default alert values when none are set.
|
||||
func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, debug bool) {
|
||||
func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoint.Endpoint, externalEndpoints []*endpoint.ExternalEndpoint, debug bool) {
|
||||
if alertingConfig == nil {
|
||||
log.Printf("[config.validateAlertingConfig] Alerting is not configured")
|
||||
return
|
||||
|
@ -391,12 +397,12 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
|
|||
alert.TypeAWSSES,
|
||||
alert.TypeCustom,
|
||||
alert.TypeDiscord,
|
||||
alert.TypeEmail,
|
||||
alert.TypeGitHub,
|
||||
alert.TypeGitLab,
|
||||
alert.TypeGoogleChat,
|
||||
alert.TypeGotify,
|
||||
alert.TypeJetBrainsSpace,
|
||||
alert.TypeEmail,
|
||||
alert.TypeMatrix,
|
||||
alert.TypeMattermost,
|
||||
alert.TypeMessagebird,
|
||||
|
@ -420,7 +426,17 @@ func validateAlertingConfig(alertingConfig *alerting.Config, endpoints []*endpoi
|
|||
for alertIndex, endpointAlert := range ep.Alerts {
|
||||
if alertType == endpointAlert.Type {
|
||||
if debug {
|
||||
log.Printf("[config.validateAlertingConfig] Parsing alert %d with provider's default alert for provider=%s in endpoint=%s", alertIndex, alertType, ep.Name)
|
||||
log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ep.Key())
|
||||
}
|
||||
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, ee := range externalEndpoints {
|
||||
for alertIndex, endpointAlert := range ee.Alerts {
|
||||
if alertType == endpointAlert.Type {
|
||||
if debug {
|
||||
log.Printf("[config.validateAlertingConfig] Parsing alert %d with default alert for provider=%s in endpoint with key=%s", alertIndex, alertType, ee.Key())
|
||||
}
|
||||
provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), endpointAlert)
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"github.com/TwiN/gatus/v5/alerting/provider/email"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/github"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/googlechat"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/gotify"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/jetbrainsspace"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/matrix"
|
||||
"github.com/TwiN/gatus/v5/alerting/provider/mattermost"
|
||||
|
@ -36,6 +37,7 @@ import (
|
|||
)
|
||||
|
||||
func TestLoadConfiguration(t *testing.T) {
|
||||
yes := true
|
||||
dir := t.TempDir()
|
||||
scenarios := []struct {
|
||||
name string
|
||||
|
@ -165,6 +167,8 @@ metrics: true
|
|||
alerting:
|
||||
slack:
|
||||
webhook-url: https://hooks.slack.com/services/xxx/yyy/zzz
|
||||
default-alert:
|
||||
enabled: true
|
||||
|
||||
endpoints:
|
||||
- name: example
|
||||
|
@ -179,6 +183,12 @@ alerting:
|
|||
discord:
|
||||
webhook-url: https://discord.com/api/webhooks/xxx/yyy
|
||||
|
||||
external-endpoints:
|
||||
- name: ext-ep-test
|
||||
token: "potato"
|
||||
alerts:
|
||||
- type: slack
|
||||
|
||||
endpoints:
|
||||
- name: frontend
|
||||
url: https://example.com
|
||||
|
@ -190,7 +200,20 @@ endpoints:
|
|||
Metrics: true,
|
||||
Alerting: &alerting.Config{
|
||||
Discord: &discord.AlertProvider{WebhookURL: "https://discord.com/api/webhooks/xxx/yyy"},
|
||||
Slack: &slack.AlertProvider{WebhookURL: "https://hooks.slack.com/services/xxx/yyy/zzz"},
|
||||
Slack: &slack.AlertProvider{WebhookURL: "https://hooks.slack.com/services/xxx/yyy/zzz", DefaultAlert: &alert.Alert{Enabled: &yes}},
|
||||
},
|
||||
ExternalEndpoints: []*endpoint.ExternalEndpoint{
|
||||
{
|
||||
Name: "ext-ep-test",
|
||||
Token: "potato",
|
||||
Alerts: []*alert.Alert{
|
||||
{
|
||||
Type: alert.TypeSlack,
|
||||
FailureThreshold: 3,
|
||||
SuccessThreshold: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Endpoints: []*endpoint.Endpoint{
|
||||
{
|
||||
|
@ -325,10 +348,6 @@ external-endpoints:
|
|||
- name: ext-ep-test
|
||||
group: core
|
||||
token: "potato"
|
||||
alerts:
|
||||
- type: discord
|
||||
description: "healthcheck failed"
|
||||
send-on-resolved: true
|
||||
|
||||
endpoints:
|
||||
- name: website
|
||||
|
@ -382,18 +401,7 @@ endpoints:
|
|||
if config.ExternalEndpoints[0].Token != "potato" {
|
||||
t.Errorf("Token should have been %s", "potato")
|
||||
}
|
||||
if len(config.ExternalEndpoints[0].Alerts) != 1 {
|
||||
t.Error("Should have returned one alert")
|
||||
}
|
||||
if config.ExternalEndpoints[0].Alerts[0].Type != alert.TypeDiscord {
|
||||
t.Errorf("Type should have been %s", alert.TypeDiscord)
|
||||
}
|
||||
if config.ExternalEndpoints[0].Alerts[0].FailureThreshold != 3 {
|
||||
t.Errorf("FailureThreshold should have been %d, got %d", 3, config.ExternalEndpoints[0].Alerts[0].FailureThreshold)
|
||||
}
|
||||
if config.ExternalEndpoints[0].Alerts[0].SuccessThreshold != 2 {
|
||||
t.Errorf("SuccessThreshold should have been %d, got %d", 2, config.ExternalEndpoints[0].Alerts[0].SuccessThreshold)
|
||||
}
|
||||
|
||||
if len(config.Endpoints) != 3 {
|
||||
t.Error("Should have returned two endpoints")
|
||||
}
|
||||
|
@ -439,7 +447,6 @@ endpoints:
|
|||
if len(config.Endpoints[1].Conditions) != 2 {
|
||||
t.Errorf("There should have been %d conditions", 2)
|
||||
}
|
||||
|
||||
if config.Endpoints[2].URL != "https://example.com/" {
|
||||
t.Errorf("URL should have been %s", "https://example.com/")
|
||||
}
|
||||
|
@ -977,6 +984,22 @@ alerting:
|
|||
enabled: true
|
||||
failure-threshold: 5
|
||||
success-threshold: 3
|
||||
email:
|
||||
from: "from@example.com"
|
||||
username: "from@example.com"
|
||||
password: "hunter2"
|
||||
host: "mail.example.com"
|
||||
port: 587
|
||||
to: "recipient1@example.com,recipient2@example.com"
|
||||
client:
|
||||
insecure: false
|
||||
default-alert:
|
||||
enabled: true
|
||||
gotify:
|
||||
server-url: "https://gotify.example"
|
||||
token: "**************"
|
||||
default-alert:
|
||||
enabled: true
|
||||
|
||||
endpoints:
|
||||
- name: website
|
||||
|
@ -993,6 +1016,8 @@ endpoints:
|
|||
- type: teams
|
||||
- type: pushover
|
||||
- type: jetbrainsspace
|
||||
- type: email
|
||||
- type: gotify
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
`))
|
||||
|
@ -1107,6 +1132,7 @@ endpoints:
|
|||
if config.Alerting.JetBrainsSpace == nil || !config.Alerting.JetBrainsSpace.IsValid() {
|
||||
t.Fatal("JetBrainsSpace alerting config should've been valid")
|
||||
}
|
||||
|
||||
if config.Alerting.JetBrainsSpace.GetDefaultAlert() == nil {
|
||||
t.Fatal("JetBrainsSpace.GetDefaultAlert() shouldn't have returned nil")
|
||||
}
|
||||
|
@ -1120,6 +1146,50 @@ endpoints:
|
|||
t.Errorf("JetBrainsSpace webhook should've been %s, but was %s", "baz", config.Alerting.JetBrainsSpace.Token)
|
||||
}
|
||||
|
||||
if config.Alerting.Email == nil || !config.Alerting.Email.IsValid() {
|
||||
t.Fatal("Email alerting config should've been valid")
|
||||
}
|
||||
if config.Alerting.Email.GetDefaultAlert() == nil {
|
||||
t.Fatal("Email.GetDefaultAlert() shouldn't have returned nil")
|
||||
}
|
||||
if config.Alerting.Email.From != "from@example.com" {
|
||||
t.Errorf("Email from should've been %s, but was %s", "from@example.com", config.Alerting.Email.From)
|
||||
}
|
||||
if config.Alerting.Email.Username != "from@example.com" {
|
||||
t.Errorf("Email username should've been %s, but was %s", "from@example.com", config.Alerting.Email.Username)
|
||||
}
|
||||
if config.Alerting.Email.Password != "hunter2" {
|
||||
t.Errorf("Email password should've been %s, but was %s", "hunter2", config.Alerting.Email.Password)
|
||||
}
|
||||
if config.Alerting.Email.Host != "mail.example.com" {
|
||||
t.Errorf("Email host should've been %s, but was %s", "mail.example.com", config.Alerting.Email.Host)
|
||||
}
|
||||
if config.Alerting.Email.Port != 587 {
|
||||
t.Errorf("Email port should've been %d, but was %d", 587, config.Alerting.Email.Port)
|
||||
}
|
||||
if config.Alerting.Email.To != "recipient1@example.com,recipient2@example.com" {
|
||||
t.Errorf("Email to should've been %s, but was %s", "recipient1@example.com,recipient2@example.com", config.Alerting.Email.To)
|
||||
}
|
||||
if config.Alerting.Email.ClientConfig == nil {
|
||||
t.Fatal("Email client config should've been set")
|
||||
}
|
||||
if config.Alerting.Email.ClientConfig.Insecure {
|
||||
t.Error("Email client config should've been secure")
|
||||
}
|
||||
|
||||
if config.Alerting.Gotify == nil || !config.Alerting.Gotify.IsValid() {
|
||||
t.Fatal("Gotify alerting config should've been valid")
|
||||
}
|
||||
if config.Alerting.Gotify.GetDefaultAlert() == nil {
|
||||
t.Fatal("Gotify.GetDefaultAlert() shouldn't have returned nil")
|
||||
}
|
||||
if config.Alerting.Gotify.ServerURL != "https://gotify.example" {
|
||||
t.Errorf("Gotify server URL should've been %s, but was %s", "https://gotify.example", config.Alerting.Gotify.ServerURL)
|
||||
}
|
||||
if config.Alerting.Gotify.Token != "**************" {
|
||||
t.Errorf("Gotify token should've been %s, but was %s", "**************", config.Alerting.Gotify.Token)
|
||||
}
|
||||
|
||||
// Endpoints
|
||||
if len(config.Endpoints) != 1 {
|
||||
t.Error("There should've been 1 endpoint")
|
||||
|
@ -1130,8 +1200,8 @@ endpoints:
|
|||
if config.Endpoints[0].Interval != 60*time.Second {
|
||||
t.Errorf("Interval should have been %s, because it is the default value", 60*time.Second)
|
||||
}
|
||||
if len(config.Endpoints[0].Alerts) != 10 {
|
||||
t.Fatal("There should've been 10 alerts configured")
|
||||
if len(config.Endpoints[0].Alerts) != 12 {
|
||||
t.Fatalf("There should've been 12 alerts configured, got %d", len(config.Endpoints[0].Alerts))
|
||||
}
|
||||
|
||||
if config.Endpoints[0].Alerts[0].Type != alert.TypeSlack {
|
||||
|
@ -1255,10 +1325,36 @@ endpoints:
|
|||
t.Error("The alert should've been enabled")
|
||||
}
|
||||
if config.Endpoints[0].Alerts[9].FailureThreshold != 5 {
|
||||
t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 3, config.Endpoints[0].Alerts[9].FailureThreshold)
|
||||
t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 5, config.Endpoints[0].Alerts[9].FailureThreshold)
|
||||
}
|
||||
if config.Endpoints[0].Alerts[9].SuccessThreshold != 3 {
|
||||
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Endpoints[0].Alerts[9].SuccessThreshold)
|
||||
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 3, config.Endpoints[0].Alerts[9].SuccessThreshold)
|
||||
}
|
||||
|
||||
if config.Endpoints[0].Alerts[10].Type != alert.TypeEmail {
|
||||
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeEmail, config.Endpoints[0].Alerts[10].Type)
|
||||
}
|
||||
if !config.Endpoints[0].Alerts[10].IsEnabled() {
|
||||
t.Error("The alert should've been enabled")
|
||||
}
|
||||
if config.Endpoints[0].Alerts[10].FailureThreshold != 3 {
|
||||
t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 3, config.Endpoints[0].Alerts[10].FailureThreshold)
|
||||
}
|
||||
if config.Endpoints[0].Alerts[10].SuccessThreshold != 2 {
|
||||
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Endpoints[0].Alerts[10].SuccessThreshold)
|
||||
}
|
||||
|
||||
if config.Endpoints[0].Alerts[11].Type != alert.TypeGotify {
|
||||
t.Errorf("The type of the alert should've been %s, but it was %s", alert.TypeGotify, config.Endpoints[0].Alerts[11].Type)
|
||||
}
|
||||
if !config.Endpoints[0].Alerts[11].IsEnabled() {
|
||||
t.Error("The alert should've been enabled")
|
||||
}
|
||||
if config.Endpoints[0].Alerts[11].FailureThreshold != 3 {
|
||||
t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 3, config.Endpoints[0].Alerts[11].FailureThreshold)
|
||||
}
|
||||
if config.Endpoints[0].Alerts[11].SuccessThreshold != 2 {
|
||||
t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Endpoints[0].Alerts[11].SuccessThreshold)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1532,6 +1628,99 @@ endpoints:
|
|||
}
|
||||
}
|
||||
|
||||
func TestParseAndValidateConfigBytesWithDuplicateEndpointName(t *testing.T) {
|
||||
scenarios := []struct {
|
||||
name string
|
||||
shouldError bool
|
||||
config string
|
||||
}{
|
||||
{
|
||||
name: "same-name-no-group",
|
||||
shouldError: true,
|
||||
config: `
|
||||
endpoints:
|
||||
- name: ep1
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- name: ep1
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"`,
|
||||
},
|
||||
{
|
||||
name: "same-name-different-group",
|
||||
shouldError: false,
|
||||
config: `
|
||||
endpoints:
|
||||
- name: ep1
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- name: ep1
|
||||
group: g1
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"`,
|
||||
},
|
||||
{
|
||||
name: "same-name-same-group",
|
||||
shouldError: true,
|
||||
config: `
|
||||
endpoints:
|
||||
- name: ep1
|
||||
group: g1
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- name: ep1
|
||||
group: g1
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"`,
|
||||
},
|
||||
{
|
||||
name: "same-name-different-endpoint-type",
|
||||
shouldError: true,
|
||||
config: `
|
||||
external-endpoints:
|
||||
- name: ep1
|
||||
token: "12345678"
|
||||
|
||||
endpoints:
|
||||
- name: ep1
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"`,
|
||||
},
|
||||
{
|
||||
name: "same-name-different-group-different-endpoint-type",
|
||||
shouldError: false,
|
||||
config: `
|
||||
external-endpoints:
|
||||
- name: ep1
|
||||
group: gr1
|
||||
token: "12345678"
|
||||
|
||||
endpoints:
|
||||
- name: ep1
|
||||
url: https://twin.sh/health
|
||||
conditions:
|
||||
- "[STATUS] == 200"`,
|
||||
},
|
||||
}
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(scenario.name, func(t *testing.T) {
|
||||
_, err := parseAndValidateConfigBytes([]byte(scenario.config))
|
||||
if scenario.shouldError && err == nil {
|
||||
t.Error("should've returned an error")
|
||||
} else if !scenario.shouldError && err != nil {
|
||||
t.Error("shouldn't have returned an error")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAndValidateConfigBytesWithInvalidStorageConfig(t *testing.T) {
|
||||
_, err := parseAndValidateConfigBytes([]byte(`
|
||||
storage:
|
||||
|
@ -1645,7 +1834,7 @@ endpoints:
|
|||
|
||||
func TestParseAndValidateConfigBytesWithNoEndpoints(t *testing.T) {
|
||||
_, err := parseAndValidateConfigBytes([]byte(``))
|
||||
if err != ErrNoEndpointInConfig {
|
||||
if !errors.Is(err, ErrNoEndpointInConfig) {
|
||||
t.Error("The error returned should have been of type ErrNoEndpointInConfig")
|
||||
}
|
||||
}
|
||||
|
@ -1657,6 +1846,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
|
|||
Email: &email.AlertProvider{},
|
||||
GitHub: &github.AlertProvider{},
|
||||
GoogleChat: &googlechat.AlertProvider{},
|
||||
Gotify: &gotify.AlertProvider{},
|
||||
JetBrainsSpace: &jetbrainsspace.AlertProvider{},
|
||||
Matrix: &matrix.AlertProvider{},
|
||||
Mattermost: &mattermost.AlertProvider{},
|
||||
|
@ -1679,6 +1869,7 @@ func TestGetAlertingProviderByAlertType(t *testing.T) {
|
|||
{alertType: alert.TypeEmail, expected: alertingConfig.Email},
|
||||
{alertType: alert.TypeGitHub, expected: alertingConfig.GitHub},
|
||||
{alertType: alert.TypeGoogleChat, expected: alertingConfig.GoogleChat},
|
||||
{alertType: alert.TypeGotify, expected: alertingConfig.Gotify},
|
||||
{alertType: alert.TypeJetBrainsSpace, expected: alertingConfig.JetBrainsSpace},
|
||||
{alertType: alert.TypeMatrix, expected: alertingConfig.Matrix},
|
||||
{alertType: alert.TypeMattermost, expected: alertingConfig.Mattermost},
|
||||
|
|
58
main.go
58
main.go
|
@ -83,13 +83,67 @@ func initializeStorage(cfg *config.Config) {
|
|||
for _, ep := range cfg.Endpoints {
|
||||
keys = append(keys, ep.Key())
|
||||
}
|
||||
for _, externalEndpoint := range cfg.ExternalEndpoints {
|
||||
keys = append(keys, externalEndpoint.Key())
|
||||
for _, ee := range cfg.ExternalEndpoints {
|
||||
keys = append(keys, ee.Key())
|
||||
}
|
||||
numberOfEndpointStatusesDeleted := store.Get().DeleteAllEndpointStatusesNotInKeys(keys)
|
||||
if numberOfEndpointStatusesDeleted > 0 {
|
||||
log.Printf("[main.initializeStorage] Deleted %d endpoint statuses because their matching endpoints no longer existed", numberOfEndpointStatusesDeleted)
|
||||
}
|
||||
// Clean up the triggered alerts from the storage provider and load valid triggered endpoint alerts
|
||||
numberOfPersistedTriggeredAlertsLoaded := 0
|
||||
for _, ep := range cfg.Endpoints {
|
||||
var checksums []string
|
||||
for _, alert := range ep.Alerts {
|
||||
if alert.IsEnabled() {
|
||||
checksums = append(checksums, alert.Checksum())
|
||||
}
|
||||
}
|
||||
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep, checksums)
|
||||
if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 {
|
||||
log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ep.Key())
|
||||
}
|
||||
for _, alert := range ep.Alerts {
|
||||
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(ep, alert)
|
||||
if err != nil {
|
||||
log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
continue
|
||||
}
|
||||
if exists {
|
||||
alert.Triggered, alert.ResolveKey = true, resolveKey
|
||||
ep.NumberOfSuccessesInARow, ep.NumberOfFailuresInARow = numberOfSuccessesInARow, alert.FailureThreshold
|
||||
numberOfPersistedTriggeredAlertsLoaded++
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, ee := range cfg.ExternalEndpoints {
|
||||
var checksums []string
|
||||
for _, alert := range ee.Alerts {
|
||||
if alert.IsEnabled() {
|
||||
checksums = append(checksums, alert.Checksum())
|
||||
}
|
||||
}
|
||||
convertedEndpoint := ee.ToEndpoint()
|
||||
numberOfTriggeredAlertsDeleted := store.Get().DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(convertedEndpoint, checksums)
|
||||
if cfg.Debug && numberOfTriggeredAlertsDeleted > 0 {
|
||||
log.Printf("[main.initializeStorage] Deleted %d triggered alerts for endpoint with key=%s because their configurations have been changed or deleted", numberOfTriggeredAlertsDeleted, ee.Key())
|
||||
}
|
||||
for _, alert := range ee.Alerts {
|
||||
exists, resolveKey, numberOfSuccessesInARow, err := store.Get().GetTriggeredEndpointAlert(convertedEndpoint, alert)
|
||||
if err != nil {
|
||||
log.Printf("[main.initializeStorage] Failed to get triggered alert for endpoint with key=%s: %s", ee.Key(), err.Error())
|
||||
continue
|
||||
}
|
||||
if exists {
|
||||
alert.Triggered, alert.ResolveKey = true, resolveKey
|
||||
ee.NumberOfSuccessesInARow, ee.NumberOfFailuresInARow = numberOfSuccessesInARow, alert.FailureThreshold
|
||||
numberOfPersistedTriggeredAlertsLoaded++
|
||||
}
|
||||
}
|
||||
}
|
||||
if numberOfPersistedTriggeredAlertsLoaded > 0 {
|
||||
log.Printf("[main.initializeStorage] Loaded %d persisted triggered alerts", numberOfPersistedTriggeredAlertsLoaded)
|
||||
}
|
||||
}
|
||||
|
||||
func listenToConfigurationFileChanges(cfg *config.Config) {
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common/paging"
|
||||
|
@ -174,6 +175,37 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
|
|||
return s.cache.DeleteAll(keysToDelete)
|
||||
}
|
||||
|
||||
// GetTriggeredEndpointAlert returns whether the triggered alert for the specified endpoint as well as the necessary information to resolve it
|
||||
//
|
||||
// Always returns that the alert does not exist for the in-memory store since it does not support persistence across restarts
|
||||
func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Alert) (exists bool, resolveKey string, numberOfSuccessesInARow int, err error) {
|
||||
return false, "", 0, nil
|
||||
}
|
||||
|
||||
// UpsertTriggeredEndpointAlert inserts/updates a triggered alert for an endpoint
|
||||
// Used for persistence of triggered alerts across application restarts
|
||||
//
|
||||
// Does nothing for the in-memory store since it does not support persistence across restarts
|
||||
func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteTriggeredEndpointAlert deletes a triggered alert for an endpoint
|
||||
//
|
||||
// Does nothing for the in-memory store since it does not support persistence across restarts
|
||||
func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteAllTriggeredAlertsNotInChecksumsByEndpoint removes all triggered alerts owned by an endpoint whose alert
|
||||
// configurations are not provided in the checksums list.
|
||||
// This prevents triggered alerts that have been removed or modified from lingering in the database.
|
||||
//
|
||||
// Does nothing for the in-memory store since it does not support persistence across restarts
|
||||
func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.Endpoint, checksums []string) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Clear deletes everything from the store
|
||||
func (s *Store) Clear() {
|
||||
s.cache.Clear()
|
||||
|
|
|
@ -16,7 +16,7 @@ func (s *Store) createPostgresSchema() error {
|
|||
_, err = s.db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS endpoint_events (
|
||||
endpoint_event_id BIGSERIAL PRIMARY KEY,
|
||||
endpoint_id INTEGER NOT NULL REFERENCES endpoints(endpoint_id) ON DELETE CASCADE,
|
||||
endpoint_id BIGINT NOT NULL REFERENCES endpoints(endpoint_id) ON DELETE CASCADE,
|
||||
event_type TEXT NOT NULL,
|
||||
event_timestamp TIMESTAMP NOT NULL
|
||||
)
|
||||
|
@ -66,7 +66,20 @@ func (s *Store) createPostgresSchema() error {
|
|||
UNIQUE(endpoint_id, hour_unix_timestamp)
|
||||
)
|
||||
`)
|
||||
// Silent table modifications
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = s.db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS endpoint_alerts_triggered (
|
||||
endpoint_alert_trigger_id BIGSERIAL PRIMARY KEY,
|
||||
endpoint_id BIGINT NOT NULL REFERENCES endpoints(endpoint_id) ON DELETE CASCADE,
|
||||
configuration_checksum TEXT NOT NULL,
|
||||
resolve_key TEXT NOT NULL,
|
||||
number_of_successes_in_a_row INTEGER NOT NULL,
|
||||
UNIQUE(endpoint_id, configuration_checksum)
|
||||
)
|
||||
`)
|
||||
// Silent table modifications TODO: Remove this in v6.0.0
|
||||
_, _ = s.db.Exec(`ALTER TABLE endpoint_results ADD IF NOT EXISTS domain_expiration BIGINT NOT NULL DEFAULT 0`)
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -66,7 +66,20 @@ func (s *Store) createSQLiteSchema() error {
|
|||
UNIQUE(endpoint_id, hour_unix_timestamp)
|
||||
)
|
||||
`)
|
||||
// Silent table modifications TODO: Remove this
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = s.db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS endpoint_alerts_triggered (
|
||||
endpoint_alert_trigger_id INTEGER PRIMARY KEY,
|
||||
endpoint_id INTEGER NOT NULL REFERENCES endpoints(endpoint_id) ON DELETE CASCADE,
|
||||
configuration_checksum TEXT NOT NULL,
|
||||
resolve_key TEXT NOT NULL,
|
||||
number_of_successes_in_a_row INTEGER NOT NULL,
|
||||
UNIQUE(endpoint_id, configuration_checksum)
|
||||
)
|
||||
`)
|
||||
// Silent table modifications TODO: Remove this in v6.0.0
|
||||
_, _ = s.db.Exec(`ALTER TABLE endpoint_results ADD domain_expiration INTEGER NOT NULL DEFAULT 0`)
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common/paging"
|
||||
|
@ -27,9 +28,9 @@ const (
|
|||
// for aesthetic purposes, I deemed it wasn't worth the performance impact of yet another one-to-many table.
|
||||
arraySeparator = "|~|"
|
||||
|
||||
uptimeCleanUpThreshold = 10 * 24 * time.Hour // Maximum uptime age before triggering a clean up
|
||||
eventsCleanUpThreshold = common.MaximumNumberOfEvents + 10 // Maximum number of events before triggering a clean up
|
||||
resultsCleanUpThreshold = common.MaximumNumberOfResults + 10 // Maximum number of results before triggering a clean up
|
||||
uptimeCleanUpThreshold = 10 * 24 * time.Hour // Maximum uptime age before triggering a cleanup
|
||||
eventsCleanUpThreshold = common.MaximumNumberOfEvents + 10 // Maximum number of events before triggering a cleanup
|
||||
resultsCleanUpThreshold = common.MaximumNumberOfResults + 10 // Maximum number of results before triggering a cleanup
|
||||
|
||||
uptimeRetention = 7 * 24 * time.Hour
|
||||
|
||||
|
@ -234,12 +235,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
|||
// Endpoint doesn't exist in the database, insert it
|
||||
if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.Insert] Failed to create endpoint with group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.Insert] Failed to retrieve id of endpoint with group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -255,7 +256,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
|||
numberOfEvents, err := s.getNumberOfEventsByEndpointID(tx, endpointID)
|
||||
if err != nil {
|
||||
// Silently fail
|
||||
log.Printf("[sql.Insert] Failed to retrieve total number of events for group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
if numberOfEvents == 0 {
|
||||
// There's no events yet, which means we need to add the EventStart and the first healthy/unhealthy event
|
||||
|
@ -265,18 +266,18 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
|||
})
|
||||
if err != nil {
|
||||
// Silently fail
|
||||
log.Printf("[sql.Insert] Failed to insert event=%s for group=%s; endpoint=%s: %s", endpoint.EventStart, ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error())
|
||||
}
|
||||
event := endpoint.NewEventFromResult(result)
|
||||
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
|
||||
// Silently fail
|
||||
log.Printf("[sql.Insert] Failed to insert event=%s for group=%s; endpoint=%s: %s", event.Type, ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
|
||||
}
|
||||
} else {
|
||||
// Get the success value of the previous result
|
||||
var lastResultSuccess bool
|
||||
if lastResultSuccess, err = s.getLastEndpointResultSuccessValue(tx, endpointID); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to retrieve outcome of previous result for group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
} else {
|
||||
// If we managed to retrieve the outcome of the previous result, we'll compare it with the new result.
|
||||
// If the final outcome (success or failure) of the previous and the new result aren't the same, it means
|
||||
|
@ -286,7 +287,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
|||
event := endpoint.NewEventFromResult(result)
|
||||
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
|
||||
// Silently fail
|
||||
log.Printf("[sql.Insert] Failed to insert event=%s for group=%s; endpoint=%s: %s", event.Type, ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -295,40 +296,40 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
|
|||
// (since we're only deleting MaximumNumberOfEvents at a time instead of 1)
|
||||
if numberOfEvents > eventsCleanUpThreshold {
|
||||
if err = s.deleteOldEndpointEvents(tx, endpointID); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to delete old events for group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second, we need to insert the result.
|
||||
if err = s.insertEndpointResult(tx, endpointID, result); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to insert result for group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
_ = tx.Rollback() // If we can't insert the result, we'll rollback now since there's no point continuing
|
||||
return err
|
||||
}
|
||||
// Clean up old results
|
||||
numberOfResults, err := s.getNumberOfResultsByEndpointID(tx, endpointID)
|
||||
if err != nil {
|
||||
log.Printf("[sql.Insert] Failed to retrieve total number of results for group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
} else {
|
||||
if numberOfResults > resultsCleanUpThreshold {
|
||||
if err = s.deleteOldEndpointResults(tx, endpointID); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to delete old results for group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
// Finally, we need to insert the uptime data.
|
||||
// Because the uptime data significantly outlives the results, we can't rely on the results for determining the uptime
|
||||
if err = s.updateEndpointUptime(tx, endpointID, result); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to update uptime for group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
// Clean up old uptime entries
|
||||
ageOfOldestUptimeEntry, err := s.getAgeOfOldestEndpointUptimeEntry(tx, endpointID)
|
||||
if err != nil {
|
||||
log.Printf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
} else {
|
||||
if ageOfOldestUptimeEntry > uptimeCleanUpThreshold {
|
||||
if err = s.deleteOldUptimeEntries(tx, endpointID, time.Now().Add(-(uptimeRetention + time.Hour))); err != nil {
|
||||
log.Printf("[sql.Insert] Failed to delete old uptime entries for group=%s; endpoint=%s: %s", ep.Group, ep.Name, err.Error())
|
||||
log.Printf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -374,6 +375,8 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
|
|||
}
|
||||
if s.writeThroughCache != nil {
|
||||
// It's easier to just wipe out the entire cache than to try to find all keys that are not in the keys list
|
||||
// This only happens on start and during tests, so it's fine for us to just clear the cache without worrying
|
||||
// about performance
|
||||
_ = s.writeThroughCache.DeleteKeysByPattern("*")
|
||||
}
|
||||
// Return number of rows deleted
|
||||
|
@ -381,6 +384,111 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
|
|||
return int(rowsAffects)
|
||||
}
|
||||
|
||||
// GetTriggeredEndpointAlert returns whether the triggered alert for the specified endpoint as well as the necessary information to resolve it
|
||||
func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Alert) (exists bool, resolveKey string, numberOfSuccessesInARow int, err error) {
|
||||
//log.Printf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key())
|
||||
err = s.db.QueryRow(
|
||||
"SELECT resolve_key, number_of_successes_in_a_row FROM endpoint_alerts_triggered WHERE endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $1 LIMIT 1) AND configuration_checksum = $2",
|
||||
ep.Key(),
|
||||
alert.Checksum(),
|
||||
).Scan(&resolveKey, &numberOfSuccessesInARow)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return false, "", 0, nil
|
||||
}
|
||||
return false, "", 0, err
|
||||
}
|
||||
return true, resolveKey, numberOfSuccessesInARow, nil
|
||||
}
|
||||
|
||||
// UpsertTriggeredEndpointAlert inserts/updates a triggered alert for an endpoint
|
||||
// Used for persistence of triggered alerts across application restarts
|
||||
func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
|
||||
//log.Printf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
|
||||
tx, err := s.db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
endpointID, err := s.getEndpointID(tx, ep)
|
||||
if err != nil {
|
||||
if errors.Is(err, common.ErrEndpointNotFound) {
|
||||
// Endpoint doesn't exist in the database, insert it
|
||||
// This shouldn't happen, but we'll handle it anyway
|
||||
if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
_, err = tx.Exec(
|
||||
`
|
||||
INSERT INTO endpoint_alerts_triggered (endpoint_id, configuration_checksum, resolve_key, number_of_successes_in_a_row)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT(endpoint_id, configuration_checksum) DO UPDATE SET
|
||||
resolve_key = $3,
|
||||
number_of_successes_in_a_row = $4
|
||||
`,
|
||||
endpointID,
|
||||
triggeredAlert.Checksum(),
|
||||
triggeredAlert.ResolveKey,
|
||||
ep.NumberOfSuccessesInARow, // We only persist NumberOfSuccessesInARow, because all alerts in this table are already triggered
|
||||
)
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
return err
|
||||
}
|
||||
if err = tx.Commit(); err != nil {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteTriggeredEndpointAlert deletes a triggered alert for an endpoint
|
||||
func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
|
||||
//log.Printf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
|
||||
_, err := s.db.Exec("DELETE FROM endpoint_alerts_triggered WHERE configuration_checksum = $1 AND endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $2 LIMIT 1)", triggeredAlert.Checksum(), ep.Key())
|
||||
return err
|
||||
}
|
||||
|
||||
// DeleteAllTriggeredAlertsNotInChecksumsByEndpoint removes all triggered alerts owned by an endpoint whose alert
|
||||
// configurations are not provided in the checksums list.
|
||||
// This prevents triggered alerts that have been removed or modified from lingering in the database.
|
||||
func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.Endpoint, checksums []string) int {
|
||||
//log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums)
|
||||
var err error
|
||||
var result sql.Result
|
||||
if len(checksums) == 0 {
|
||||
// No checksums? Then it means there are no (enabled) alerts configured for that endpoint, so we can get rid of all
|
||||
// persisted triggered alerts for that endpoint
|
||||
result, err = s.db.Exec("DELETE FROM endpoint_alerts_triggered WHERE endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $1 LIMIT 1)", ep.Key())
|
||||
} else {
|
||||
args := make([]interface{}, 0, len(checksums)+1)
|
||||
args = append(args, ep.Key())
|
||||
query := `DELETE FROM endpoint_alerts_triggered
|
||||
WHERE endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $1 LIMIT 1)
|
||||
AND configuration_checksum NOT IN (`
|
||||
for i := range checksums {
|
||||
query += fmt.Sprintf("$%d,", i+2)
|
||||
args = append(args, checksums[i])
|
||||
}
|
||||
query = query[:len(query)-1] + ")" // Remove the last comma and add the closing parenthesis
|
||||
result, err = s.db.Exec(query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error())
|
||||
return 0
|
||||
}
|
||||
// Return number of rows deleted
|
||||
rowsAffects, _ := result.RowsAffected()
|
||||
return int(rowsAffects)
|
||||
}
|
||||
|
||||
// Clear deletes everything from the store
|
||||
func (s *Store) Clear() {
|
||||
_, _ = s.db.Exec("DELETE FROM endpoints")
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
package sql
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common/paging"
|
||||
|
@ -81,13 +83,13 @@ var (
|
|||
)
|
||||
|
||||
func TestNewStore(t *testing.T) {
|
||||
if _, err := NewStore("", "TestNewStore.db", false); err != ErrDatabaseDriverNotSpecified {
|
||||
if _, err := NewStore("", t.TempDir()+"/TestNewStore.db", false); !errors.Is(err, ErrDatabaseDriverNotSpecified) {
|
||||
t.Error("expected error due to blank driver parameter")
|
||||
}
|
||||
if _, err := NewStore("sqlite", "", false); err != ErrPathNotSpecified {
|
||||
if _, err := NewStore("sqlite", "", false); !errors.Is(err, ErrPathNotSpecified) {
|
||||
t.Error("expected error due to blank path parameter")
|
||||
}
|
||||
if store, err := NewStore("sqlite", t.TempDir()+"/TestNewStore.db", false); err != nil {
|
||||
if store, err := NewStore("sqlite", t.TempDir()+"/TestNewStore.db", true); err != nil {
|
||||
t.Error("shouldn't have returned any error, got", err.Error())
|
||||
} else {
|
||||
_ = store.db.Close()
|
||||
|
@ -168,6 +170,40 @@ func TestStore_InsertCleansUpEventsAndResultsProperly(t *testing.T) {
|
|||
store.Clear()
|
||||
}
|
||||
|
||||
func TestStore_InsertWithCaching(t *testing.T) {
|
||||
store, _ := NewStore("sqlite", t.TempDir()+"/TestStore_InsertWithCaching.db", true)
|
||||
defer store.Close()
|
||||
// Add 2 results
|
||||
store.Insert(&testEndpoint, &testSuccessfulResult)
|
||||
store.Insert(&testEndpoint, &testSuccessfulResult)
|
||||
// Verify that they exist
|
||||
endpointStatuses, _ := store.GetAllEndpointStatuses(paging.NewEndpointStatusParams().WithResults(1, 20))
|
||||
if numberOfEndpointStatuses := len(endpointStatuses); numberOfEndpointStatuses != 1 {
|
||||
t.Fatalf("expected 1 EndpointStatus, got %d", numberOfEndpointStatuses)
|
||||
}
|
||||
if len(endpointStatuses[0].Results) != 2 {
|
||||
t.Fatalf("expected 2 results, got %d", len(endpointStatuses[0].Results))
|
||||
}
|
||||
// Add 2 more results
|
||||
store.Insert(&testEndpoint, &testUnsuccessfulResult)
|
||||
store.Insert(&testEndpoint, &testUnsuccessfulResult)
|
||||
// Verify that they exist
|
||||
endpointStatuses, _ = store.GetAllEndpointStatuses(paging.NewEndpointStatusParams().WithResults(1, 20))
|
||||
if numberOfEndpointStatuses := len(endpointStatuses); numberOfEndpointStatuses != 1 {
|
||||
t.Fatalf("expected 1 EndpointStatus, got %d", numberOfEndpointStatuses)
|
||||
}
|
||||
if len(endpointStatuses[0].Results) != 4 {
|
||||
t.Fatalf("expected 4 results, got %d", len(endpointStatuses[0].Results))
|
||||
}
|
||||
// Clear the store, which should also clear the cache
|
||||
store.Clear()
|
||||
// Verify that they no longer exist
|
||||
endpointStatuses, _ = store.GetAllEndpointStatuses(paging.NewEndpointStatusParams().WithResults(1, 20))
|
||||
if numberOfEndpointStatuses := len(endpointStatuses); numberOfEndpointStatuses != 0 {
|
||||
t.Fatalf("expected 0 EndpointStatus, got %d", numberOfEndpointStatuses)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_Persistence(t *testing.T) {
|
||||
path := t.TempDir() + "/TestStore_Persistence.db"
|
||||
store, _ := NewStore("sqlite", path, false)
|
||||
|
@ -368,10 +404,10 @@ func TestStore_NoRows(t *testing.T) {
|
|||
defer store.Close()
|
||||
tx, _ := store.db.Begin()
|
||||
defer tx.Rollback()
|
||||
if _, err := store.getLastEndpointResultSuccessValue(tx, 1); err != errNoRowsReturned {
|
||||
if _, err := store.getLastEndpointResultSuccessValue(tx, 1); !errors.Is(err, errNoRowsReturned) {
|
||||
t.Errorf("should've %v, got %v", errNoRowsReturned, err)
|
||||
}
|
||||
if _, err := store.getAgeOfOldestEndpointUptimeEntry(tx, 1); err != errNoRowsReturned {
|
||||
if _, err := store.getAgeOfOldestEndpointUptimeEntry(tx, 1); !errors.Is(err, errNoRowsReturned) {
|
||||
t.Errorf("should've %v, got %v", errNoRowsReturned, err)
|
||||
}
|
||||
}
|
||||
|
@ -564,3 +600,131 @@ func TestCacheKey(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTriggeredEndpointAlertsPersistence(t *testing.T) {
|
||||
store, _ := NewStore("sqlite", t.TempDir()+"/TestTriggeredEndpointAlertsPersistence.db", false)
|
||||
defer store.Close()
|
||||
yes, desc := false, "description"
|
||||
ep := testEndpoint
|
||||
ep.NumberOfSuccessesInARow = 0
|
||||
alrt := &alert.Alert{
|
||||
Type: alert.TypePagerDuty,
|
||||
Enabled: &yes,
|
||||
FailureThreshold: 4,
|
||||
SuccessThreshold: 2,
|
||||
Description: &desc,
|
||||
SendOnResolved: &yes,
|
||||
Triggered: true,
|
||||
ResolveKey: "1234567",
|
||||
}
|
||||
// Alert just triggered, so NumberOfSuccessesInARow is 0
|
||||
if err := store.UpsertTriggeredEndpointAlert(&ep, alrt); err != nil {
|
||||
t.Fatal("expected no error, got", err.Error())
|
||||
}
|
||||
exists, resolveKey, numberOfSuccessesInARow, err := store.GetTriggeredEndpointAlert(&ep, alrt)
|
||||
if err != nil {
|
||||
t.Fatal("expected no error, got", err.Error())
|
||||
}
|
||||
if !exists {
|
||||
t.Error("expected triggered alert to exist")
|
||||
}
|
||||
if resolveKey != alrt.ResolveKey {
|
||||
t.Errorf("expected resolveKey %s, got %s", alrt.ResolveKey, resolveKey)
|
||||
}
|
||||
if numberOfSuccessesInARow != ep.NumberOfSuccessesInARow {
|
||||
t.Errorf("expected persisted NumberOfSuccessesInARow to be %d, got %d", ep.NumberOfSuccessesInARow, numberOfSuccessesInARow)
|
||||
}
|
||||
// Endpoint just had a successful evaluation, so NumberOfSuccessesInARow is now 1
|
||||
ep.NumberOfSuccessesInARow++
|
||||
if err := store.UpsertTriggeredEndpointAlert(&ep, alrt); err != nil {
|
||||
t.Fatal("expected no error, got", err.Error())
|
||||
}
|
||||
exists, resolveKey, numberOfSuccessesInARow, err = store.GetTriggeredEndpointAlert(&ep, alrt)
|
||||
if err != nil {
|
||||
t.Error("expected no error, got", err.Error())
|
||||
}
|
||||
if !exists {
|
||||
t.Error("expected triggered alert to exist")
|
||||
}
|
||||
if resolveKey != alrt.ResolveKey {
|
||||
t.Errorf("expected resolveKey %s, got %s", alrt.ResolveKey, resolveKey)
|
||||
}
|
||||
if numberOfSuccessesInARow != ep.NumberOfSuccessesInARow {
|
||||
t.Errorf("expected persisted NumberOfSuccessesInARow to be %d, got %d", ep.NumberOfSuccessesInARow, numberOfSuccessesInARow)
|
||||
}
|
||||
// Simulate the endpoint having another successful evaluation, which means the alert is now resolved,
|
||||
// and we should delete the triggered alert from the store
|
||||
ep.NumberOfSuccessesInARow++
|
||||
if err := store.DeleteTriggeredEndpointAlert(&ep, alrt); err != nil {
|
||||
t.Fatal("expected no error, got", err.Error())
|
||||
}
|
||||
exists, _, _, err = store.GetTriggeredEndpointAlert(&ep, alrt)
|
||||
if err != nil {
|
||||
t.Error("expected no error, got", err.Error())
|
||||
}
|
||||
if exists {
|
||||
t.Error("expected triggered alert to no longer exist as it has been deleted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(t *testing.T) {
|
||||
store, _ := NewStore("sqlite", t.TempDir()+"/TestStore_DeleteAllTriggeredAlertsNotInChecksumsByEndpoint.db", false)
|
||||
defer store.Close()
|
||||
yes, desc := false, "description"
|
||||
ep1 := testEndpoint
|
||||
ep1.Name = "ep1"
|
||||
ep2 := testEndpoint
|
||||
ep2.Name = "ep2"
|
||||
alert1 := alert.Alert{
|
||||
Type: alert.TypePagerDuty,
|
||||
Enabled: &yes,
|
||||
FailureThreshold: 4,
|
||||
SuccessThreshold: 2,
|
||||
Description: &desc,
|
||||
SendOnResolved: &yes,
|
||||
Triggered: true,
|
||||
ResolveKey: "1234567",
|
||||
}
|
||||
alert2 := alert1
|
||||
alert2.Type, alert2.ResolveKey = alert.TypeSlack, ""
|
||||
alert3 := alert2
|
||||
if err := store.UpsertTriggeredEndpointAlert(&ep1, &alert1); err != nil {
|
||||
t.Fatal("expected no error, got", err.Error())
|
||||
}
|
||||
if err := store.UpsertTriggeredEndpointAlert(&ep1, &alert2); err != nil {
|
||||
t.Fatal("expected no error, got", err.Error())
|
||||
}
|
||||
if err := store.UpsertTriggeredEndpointAlert(&ep2, &alert3); err != nil {
|
||||
t.Fatal("expected no error, got", err.Error())
|
||||
}
|
||||
if exists, _, _, _ := store.GetTriggeredEndpointAlert(&ep1, &alert1); !exists {
|
||||
t.Error("expected alert1 to have been deleted")
|
||||
}
|
||||
if exists, _, _, _ := store.GetTriggeredEndpointAlert(&ep1, &alert2); !exists {
|
||||
t.Error("expected alert2 to exist for ep1")
|
||||
}
|
||||
if exists, _, _, _ := store.GetTriggeredEndpointAlert(&ep2, &alert3); !exists {
|
||||
t.Error("expected alert3 to exist for ep2")
|
||||
}
|
||||
// Now we simulate the alert configuration being updated, and the alert being resolved
|
||||
if deleted := store.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(&ep1, []string{alert2.Checksum()}); deleted != 1 {
|
||||
t.Errorf("expected 1 triggered alert to be deleted, got %d", deleted)
|
||||
}
|
||||
if exists, _, _, _ := store.GetTriggeredEndpointAlert(&ep1, &alert1); exists {
|
||||
t.Error("expected alert1 to have been deleted")
|
||||
}
|
||||
if exists, _, _, _ := store.GetTriggeredEndpointAlert(&ep1, &alert2); !exists {
|
||||
t.Error("expected alert2 to exist for ep1")
|
||||
}
|
||||
if exists, _, _, _ := store.GetTriggeredEndpointAlert(&ep2, &alert3); !exists {
|
||||
t.Error("expected alert3 to exist for ep2")
|
||||
}
|
||||
// Now let's just assume all alerts for ep1 were removed
|
||||
if deleted := store.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(&ep1, []string{}); deleted != 1 {
|
||||
t.Errorf("expected 1 triggered alert to be deleted, got %d", deleted)
|
||||
}
|
||||
// Make sure the alert for ep2 still exists
|
||||
if exists, _, _, _ := store.GetTriggeredEndpointAlert(&ep2, &alert3); !exists {
|
||||
t.Error("expected alert3 to exist for ep2")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
"github.com/TwiN/gatus/v5/storage"
|
||||
"github.com/TwiN/gatus/v5/storage/store/common/paging"
|
||||
|
@ -41,6 +42,21 @@ type Store interface {
|
|||
// Used to delete endpoints that have been persisted but are no longer part of the configured endpoints
|
||||
DeleteAllEndpointStatusesNotInKeys(keys []string) int
|
||||
|
||||
// GetTriggeredEndpointAlert returns whether the triggered alert for the specified endpoint as well as the necessary information to resolve it
|
||||
GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Alert) (exists bool, resolveKey string, numberOfSuccessesInARow int, err error)
|
||||
|
||||
// UpsertTriggeredEndpointAlert inserts/updates a triggered alert for an endpoint
|
||||
// Used for persistence of triggered alerts across application restarts
|
||||
UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error
|
||||
|
||||
// DeleteTriggeredEndpointAlert deletes a triggered alert for an endpoint
|
||||
DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error
|
||||
|
||||
// DeleteAllTriggeredAlertsNotInChecksumsByEndpoint removes all triggered alerts owned by an endpoint whose alert
|
||||
// configurations are not provided in the checksums list.
|
||||
// This prevents triggered alerts that have been removed or modified from lingering in the database.
|
||||
DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.Endpoint, checksums []string) int
|
||||
|
||||
// Clear deletes everything from the store
|
||||
Clear()
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"github.com/TwiN/gatus/v5/alerting"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint"
|
||||
"github.com/TwiN/gatus/v5/storage/store"
|
||||
)
|
||||
|
||||
// HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure
|
||||
|
@ -50,9 +51,12 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
|
|||
log.Printf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint=%s: %s", ep.Name, err.Error())
|
||||
} else {
|
||||
endpointAlert.Triggered = true
|
||||
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type)
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -60,21 +64,31 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
|
|||
func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config, debug bool) {
|
||||
ep.NumberOfSuccessesInARow++
|
||||
for _, endpointAlert := range ep.Alerts {
|
||||
if !endpointAlert.IsEnabled() || !endpointAlert.Triggered || endpointAlert.SuccessThreshold > ep.NumberOfSuccessesInARow {
|
||||
isStillBelowSuccessThreshold := endpointAlert.SuccessThreshold > ep.NumberOfSuccessesInARow
|
||||
if isStillBelowSuccessThreshold && endpointAlert.IsEnabled() {
|
||||
// Persist NumberOfSuccessesInARow
|
||||
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
}
|
||||
if !endpointAlert.IsEnabled() || !endpointAlert.Triggered || isStillBelowSuccessThreshold {
|
||||
continue
|
||||
}
|
||||
// Even if the alert provider returns an error, we still set the alert's Triggered variable to false.
|
||||
// Further explanation can be found on Alert's Triggered field.
|
||||
endpointAlert.Triggered = false
|
||||
if err := store.Get().DeleteTriggeredEndpointAlert(ep, endpointAlert); err != nil {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
if !endpointAlert.IsSendingOnResolved() {
|
||||
continue
|
||||
}
|
||||
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
|
||||
if alertProvider != nil {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Name, endpointAlert.GetDescription())
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
|
||||
err := alertProvider.Send(ep, endpointAlert, result, true)
|
||||
if err != nil {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint=%s: %s", ep.Name, err.Error())
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type)
|
||||
|
|
Loading…
Reference in a new issue