twin-gatus/watchdog/watchdog.go

package watchdog

import (
	"context"
	"log"
	"sync"
	"time"

	"github.com/TwiN/gatus/v5/alerting"
	"github.com/TwiN/gatus/v5/config"
	"github.com/TwiN/gatus/v5/config/maintenance"
	"github.com/TwiN/gatus/v5/core"
	"github.com/TwiN/gatus/v5/metrics"
	"github.com/TwiN/gatus/v5/storage/store"
)

var (
	// monitoringMutex is used to prevent multiple endpoint from being evaluated at the same time.
	// Without this, conditions using response time may become inaccurate.
	monitoringMutex sync.Mutex

	ctx        context.Context
	cancelFunc context.CancelFunc
)

// Monitor loops over each endpoint and starts a goroutine to monitor each endpoint separately
func Monitor(cfg *config.Config) {
	ctx, cancelFunc = context.WithCancel(context.Background())
	for _, endpoint := range cfg.Endpoints {
		if endpoint.IsEnabled() {
			// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration
			time.Sleep(777 * time.Millisecond)
			go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx)
		}
	}
}

// monitor a single endpoint in a loop
func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) {
	// Run it immediately on start
	execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)
	// Loop for the next executions
	for {
		select {
		case <-ctx.Done():
			log.Printf("[watchdog][monitor] Canceling current execution of group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
			return
		case <-time.After(endpoint.Interval):
			execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)
		}
	}
}

func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool) {
	if !disableMonitoringLock {
		// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which
		// could cause performance issues and return inaccurate results
		monitoringMutex.Lock()
	}
	if debug {
		log.Printf("[watchdog][execute] Monitoring group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
	}
	result := endpoint.EvaluateHealth()
	if enabledMetrics {
		metrics.PublishMetricsForEndpoint(endpoint, result)
	}
	UpdateEndpointStatuses(endpoint, result)
	log.Printf(
		"[watchdog][execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s",
		endpoint.Group,
		endpoint.Name,
		result.Success,
		len(result.Errors),
		result.Duration.Round(time.Millisecond),
	)
	if !maintenanceConfig.IsUnderMaintenance() {
		// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
		HandleAlerting(endpoint, result, alertingConfig, debug)
	} else if debug {
		log.Println("[watchdog][execute] Not handling alerting because currently in the maintenance window")
	}
	if debug {
		log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", endpoint.Interval, endpoint.Group, endpoint.Name)
	}
	if !disableMonitoringLock {
		monitoringMutex.Unlock()
	}
}

// UpdateEndpointStatuses updates the slice of endpoint statuses
func UpdateEndpointStatuses(endpoint *core.Endpoint, result *core.Result) {
	if err := store.Get().Insert(endpoint, result); err != nil {
		log.Println("[watchdog][UpdateEndpointStatuses] Failed to insert data in storage:", err.Error())
	}
}

// Shutdown stops monitoring all endpoints
func Shutdown() {
	cancelFunc()
}
Add watchdog package 2019-09-04 23:37:13 +00:00			`package watchdog`
Add request handlers and move monitoring to watchdog package 2019-09-07 01:59:50 +00:00
			`import (`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`"context"`
Implement interval + Add timestamp to Result struct 2019-09-09 01:07:08 +00:00			`"log"`
Add request handlers and move monitoring to watchdog package 2019-09-07 01:59:50 +00:00			`"sync"`
			`"time"`
(feat) Add auto-discovery in k8s \| Adarsh 2020-10-30 15:30:03 +00:00
chore: Bump module version to v5 2022-12-06 06:41:09 +00:00			`"github.com/TwiN/gatus/v5/alerting"`
			`"github.com/TwiN/gatus/v5/config"`
			`"github.com/TwiN/gatus/v5/config/maintenance"`
			`"github.com/TwiN/gatus/v5/core"`
			`"github.com/TwiN/gatus/v5/metrics"`
			`"github.com/TwiN/gatus/v5/storage/store"`
Add request handlers and move monitoring to watchdog package 2019-09-07 01:59:50 +00:00			`)`

			`var (`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`// monitoringMutex is used to prevent multiple endpoint from being evaluated at the same time.`
Support sending notifications when alert is resolved Add debug parameter for those wishing to filter some noise from the logs 2020-09-05 01:31:28 +00:00			`// Without this, conditions using response time may become inaccurate.`
			`monitoringMutex sync.Mutex`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00
			`ctx context.Context`
			`cancelFunc context.CancelFunc`
Add request handlers and move monitoring to watchdog package 2019-09-07 01:59:50 +00:00			`)`

Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`// Monitor loops over each endpoint and starts a goroutine to monitor each endpoint separately`
Allow configuration file to be passed as parameter 2019-12-04 21:44:35 +00:00			`func Monitor(cfg *config.Config) {`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`ctx, cancelFunc = context.WithCancel(context.Background())`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`for _, endpoint := range cfg.Endpoints {`
			`if endpoint.IsEnabled() {`
Close #74: Add maintenance window 2021-09-22 04:04:51 +00:00			`// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration`
Move store initialization to store package This will allow importing storage.Config without importing every SQL drivers in the known universe 2021-10-28 23:35:46 +00:00			`time.Sleep(777 * time.Millisecond)`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx)`
Add enabled parameter to service (#175) * feat: Add enabled flag to service * Add IsEnabled method Co-authored-by: 1newsr <1newsr@users.noreply.github.com> 2021-09-18 15:52:11 +00:00			`}`
Prevent multiple services from being evaluated at the same time 2020-04-06 22:58:13 +00:00			`}`
			`}`

Move store initialization to store package This will allow importing storage.Config without importing every SQL drivers in the known universe 2021-10-28 23:35:46 +00:00			`// monitor a single endpoint in a loop`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`func monitor(endpoint core.Endpoint, alertingConfig alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) {`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`// Run it immediately on start`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`// Loop for the next executions`
Prevent multiple services from being evaluated at the same time 2020-04-06 22:58:13 +00:00			`for {`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`select {`
			`case <-ctx.Done():`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`log.Printf("[watchdog][monitor] Canceling current execution of group=%s; endpoint=%s", endpoint.Group, endpoint.Name)`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`return`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`case <-time.After(endpoint.Interval):`
			`execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug)`
Support sending notifications when alert is resolved Add debug parameter for those wishing to filter some noise from the logs 2020-09-05 01:31:28 +00:00			`}`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`}`
			`}`

Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`func execute(endpoint core.Endpoint, alertingConfig alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool) {`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`if !disableMonitoringLock {`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`// could cause performance issues and return inaccurate results`
			`monitoringMutex.Lock()`
			`}`
			`if debug {`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`log.Printf("[watchdog][execute] Monitoring group=%s; endpoint=%s", endpoint.Group, endpoint.Name)`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`}`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`result := endpoint.EvaluateHealth()`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`if enabledMetrics {`
refactor(metrics): Rename metric to metrics 2022-06-16 22:55:51 +00:00			`metrics.PublishMetricsForEndpoint(endpoint, result)`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`}`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`UpdateEndpointStatuses(endpoint, result)`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`log.Printf(`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`"[watchdog][execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s",`
			`endpoint.Group,`
			`endpoint.Name,`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`result.Success,`
			`len(result.Errors),`
			`result.Duration.Round(time.Millisecond),`
			`)`
Close #74: Add maintenance window 2021-09-22 04:04:51 +00:00			`if !maintenanceConfig.IsUnderMaintenance() {`
Fix #117: Implement email alerts 2021-12-03 02:05:17 +00:00			`// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`HandleAlerting(endpoint, result, alertingConfig, debug)`
Close #74: Add maintenance window 2021-09-22 04:04:51 +00:00			`} else if debug {`
			`log.Println("[watchdog][execute] Not handling alerting because currently in the maintenance window")`
			`}`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`if debug {`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", endpoint.Interval, endpoint.Group, endpoint.Name)`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`}`
			`if !disableMonitoringLock {`
			`monitoringMutex.Unlock()`
Start working on notifications when service is back to healthy (#9) 2020-09-04 22:23:56 +00:00			`}`
			`}`
Start working on #13: Service groups 2020-11-26 23:09:01 +00:00
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`// UpdateEndpointStatuses updates the slice of endpoint statuses`
			`func UpdateEndpointStatuses(endpoint core.Endpoint, result core.Result) {`
Move store initialization to store package This will allow importing storage.Config without importing every SQL drivers in the known universe 2021-10-28 23:35:46 +00:00			`if err := store.Get().Insert(endpoint, result); err != nil {`
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`log.Println("[watchdog][UpdateEndpointStatuses] Failed to insert data in storage:", err.Error())`
Close #74: Add maintenance window 2021-09-22 04:04:51 +00:00			`}`
Start working on #13: Service groups 2020-11-26 23:09:01 +00:00			`}`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00
Rename Service to Endpoint (#192) * Add clarifications in comments * #191: Rename Service to Endpoint 2021-10-23 20:47:12 +00:00			`// Shutdown stops monitoring all endpoints`
#29: Automatically reload on configuration file update 2021-05-19 02:29:15 +00:00			`func Shutdown() {`
			`cancelFunc()`
			`}`