From b8f8a47d8d7365eb6bf26a77d2ace1a236ed08c3 Mon Sep 17 00:00:00 2001
From: Yashvardhan Kukreja <yash.kukreja.98@gmail.com>
Date: Sat, 15 May 2021 21:34:26 +0530
Subject: [PATCH] feat: added
 kyverno_policy_rule_execution_latency_milliseconds metric

Signed-off-by: Yashvardhan Kukreja <yash.kukreja.98@gmail.com>
---
 pkg/metrics/metrics.go                        |   4 +-
 .../policyruleexecutionlatency/parsers.go     |  40 ++++++
 .../policyRuleExecutionLatency.go             | 117 ++++++++++++++++++
 .../policyruleexecutionlatency/types.go       |   7 ++
 pkg/policy/existing.go                        |   9 ++
 pkg/webhooks/generation.go                    |  14 +++
 pkg/webhooks/mutation.go                      |  16 +++
 pkg/webhooks/validation.go                    |  13 ++
 8 files changed, 218 insertions(+), 2 deletions(-)
 create mode 100644 pkg/metrics/policyruleexecutionlatency/parsers.go
 create mode 100644 pkg/metrics/policyruleexecutionlatency/policyRuleExecutionLatency.go
 create mode 100644 pkg/metrics/policyruleexecutionlatency/types.go

diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go
index 7e183b6b21..6875a84839 100644
--- a/pkg/metrics/metrics.go
+++ b/pkg/metrics/metrics.go
@@ -66,8 +66,8 @@ func NewPromConfig() *PromConfig {
 	}
 	policyRuleExecutionLatencyMetric := prom.NewGaugeVec(
 		prom.GaugeOpts{
-			Name: "kyverno_policy_rule_execution_latency",
-			Help: "can be used to track the latencies associated with the execution/processing of the individual rules under Kyverno policies whenever they evaluate incoming resource requests.",
+			Name: "kyverno_policy_rule_execution_latency_milliseconds",
+			Help: "can be used to track the latencies (in milliseconds) associated with the execution/processing of the individual rules under Kyverno policies whenever they evaluate incoming resource requests.",
 		},
 		policyRuleExecutionLatencyLabels,
 	)
diff --git a/pkg/metrics/policyruleexecutionlatency/parsers.go b/pkg/metrics/policyruleexecutionlatency/parsers.go
new file mode 100644
index 0000000000..7b963935d5
--- /dev/null
+++ b/pkg/metrics/policyruleexecutionlatency/parsers.go
@@ -0,0 +1,40 @@
+package policyruleexecutionlatency
+
+import (
+	"fmt"
+
+	"github.com/kyverno/kyverno/pkg/engine/response"
+	"github.com/kyverno/kyverno/pkg/metrics"
+)
+
+func ParsePromMetrics(pm metrics.PromMetrics) PromMetrics {
+	return PromMetrics(pm)
+}
+
+func ParseRuleTypeFromEngineRuleResponse(rule response.RuleResponse) metrics.RuleType {
+	switch rule.Type {
+	case "Validation":
+		return metrics.Validate
+	case "Mutation":
+		return metrics.Mutate
+	case "Generation":
+		return metrics.Generate
+	default:
+		return metrics.EmptyRuleType
+	}
+}
+
+func ParseResourceRequestOperation(requestOperationStr string) (metrics.ResourceRequestOperation, error) {
+	switch requestOperationStr {
+	case "CREATE":
+		return metrics.ResourceCreated, nil
+	case "UPDATE":
+		return metrics.ResourceUpdated, nil
+	case "DELETE":
+		return metrics.ResourceDeleted, nil
+	case "CONNECT":
+		return metrics.ResourceConnected, nil
+	default:
+		return "", fmt.Errorf("Unknown request operation made by resource: %s. Allowed requests: 'CREATE', 'UPDATE', 'DELETE', 'CONNECT'", requestOperationStr)
+	}
+}
diff --git a/pkg/metrics/policyruleexecutionlatency/policyRuleExecutionLatency.go b/pkg/metrics/policyruleexecutionlatency/policyRuleExecutionLatency.go
new file mode 100644
index 0000000000..381fbeadb8
--- /dev/null
+++ b/pkg/metrics/policyruleexecutionlatency/policyRuleExecutionLatency.go
@@ -0,0 +1,117 @@
+package policyruleexecutionlatency
+
+import (
+	"fmt"
+	"time"
+
+	kyverno "github.com/kyverno/kyverno/pkg/api/kyverno/v1"
+	"github.com/kyverno/kyverno/pkg/engine/response"
+	"github.com/kyverno/kyverno/pkg/metrics"
+	prom "github.com/prometheus/client_golang/prometheus"
+)
+
+func (pm PromMetrics) registerPolicyRuleResultsMetric(
+	policyValidationMode metrics.PolicyValidationMode,
+	policyType metrics.PolicyType,
+	policyBackgroundMode metrics.PolicyBackgroundMode,
+	policyNamespace, policyName string,
+	resourceName, resourceKind, resourceNamespace string,
+	resourceRequestOperation metrics.ResourceRequestOperation,
+	ruleName string,
+	ruleResult metrics.RuleResult,
+	ruleType metrics.RuleType,
+	ruleExecutionCause metrics.RuleExecutionCause,
+	ruleResponse string,
+	mainRequestTriggerTimestamp, policyExecutionTimestamp, ruleExecutionTimestamp int64,
+	generateRuleLatencyType string,
+	ruleExecutionLatencyInMs float64,
+) error {
+	if policyType == metrics.Cluster {
+		policyNamespace = "-"
+	}
+	if ruleType != metrics.Generate || generateRuleLatencyType == "" {
+		generateRuleLatencyType = "-"
+	}
+	pm.PolicyRuleExecutionLatency.With(prom.Labels{
+		"policy_validation_mode":         string(policyValidationMode),
+		"policy_type":                    string(policyType),
+		"policy_background_mode":         string(policyBackgroundMode),
+		"policy_namespace":               policyNamespace,
+		"policy_name":                    policyName,
+		"resource_name":                  resourceName,
+		"resource_kind":                  resourceKind,
+		"resource_namespace":             resourceNamespace,
+		"resource_request_operation":     string(resourceRequestOperation),
+		"rule_name":                      ruleName,
+		"rule_result":                    string(ruleResult),
+		"rule_type":                      string(ruleType),
+		"rule_execution_cause":           string(ruleExecutionCause),
+		"rule_response":                  ruleResponse,
+		"main_request_trigger_timestamp": fmt.Sprintf("%+v", time.Unix(mainRequestTriggerTimestamp, 0)),
+		"policy_execution_timestamp":     fmt.Sprintf("%+v", time.Unix(policyExecutionTimestamp, 0)),
+		"rule_execution_timestamp":       fmt.Sprintf("%+v", time.Unix(ruleExecutionTimestamp, 0)),
+		"generate_rule_latency_type":     generateRuleLatencyType,
+	}).Set(ruleExecutionLatencyInMs)
+	return nil
+}
+
+//policy - policy related data
+//engineResponse - resource and rule related data
+func (pm PromMetrics) ProcessEngineResponse(policy kyverno.ClusterPolicy, engineResponse response.EngineResponse, executionCause metrics.RuleExecutionCause, generateRuleLatencyType string, resourceRequestOperation metrics.ResourceRequestOperation, mainRequestTriggerTimestamp int64) error {
+
+	policyValidationMode, err := metrics.ParsePolicyValidationMode(policy.Spec.ValidationFailureAction)
+	if err != nil {
+		return err
+	}
+	policyType := metrics.Namespaced
+	policyBackgroundMode := metrics.ParsePolicyBackgroundMode(*policy.Spec.Background)
+	policyNamespace := policy.ObjectMeta.Namespace
+	if policyNamespace == "" {
+		policyNamespace = "-"
+		policyType = metrics.Cluster
+	}
+	policyName := policy.ObjectMeta.Name
+
+	policyExecutionTimestamp := engineResponse.PolicyResponse.PolicyExecutionTimestamp
+
+	resourceSpec := engineResponse.PolicyResponse.Resource
+
+	resourceName := resourceSpec.Name
+	resourceKind := resourceSpec.Kind
+	resourceNamespace := resourceSpec.Namespace
+
+	ruleResponses := engineResponse.PolicyResponse.Rules
+
+	for _, rule := range ruleResponses {
+		ruleName := rule.Name
+		ruleType := ParseRuleTypeFromEngineRuleResponse(rule)
+		ruleResponse := rule.Message
+		ruleResult := metrics.Fail
+		if rule.Success {
+			ruleResult = metrics.Pass
+		}
+
+		ruleExecutionTimestamp := rule.RuleStats.RuleExecutionTimestamp
+		ruleExecutionLatencyInMs := float64(rule.RuleStats.ProcessingTime) / float64(1000*1000)
+
+		if err := pm.registerPolicyRuleResultsMetric(
+			policyValidationMode,
+			policyType,
+			policyBackgroundMode,
+			policyNamespace, policyName,
+			resourceName, resourceKind, resourceNamespace,
+			resourceRequestOperation,
+			ruleName,
+			ruleResult,
+			ruleType,
+			executionCause,
+			ruleResponse,
+			mainRequestTriggerTimestamp, policyExecutionTimestamp, ruleExecutionTimestamp,
+			generateRuleLatencyType,
+			ruleExecutionLatencyInMs,
+		); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/pkg/metrics/policyruleexecutionlatency/types.go b/pkg/metrics/policyruleexecutionlatency/types.go
new file mode 100644
index 0000000000..afca7cce17
--- /dev/null
+++ b/pkg/metrics/policyruleexecutionlatency/types.go
@@ -0,0 +1,7 @@
+package policyruleexecutionlatency
+
+import (
+	"github.com/kyverno/kyverno/pkg/metrics"
+)
+
+type PromMetrics metrics.PromMetrics
diff --git a/pkg/policy/existing.go b/pkg/policy/existing.go
index c12c43a04b..4b601b2f07 100644
--- a/pkg/policy/existing.go
+++ b/pkg/policy/existing.go
@@ -12,6 +12,7 @@ import (
 	"github.com/kyverno/kyverno/pkg/engine"
 	"github.com/kyverno/kyverno/pkg/engine/response"
 	"github.com/kyverno/kyverno/pkg/metrics"
+	policyRuleExecutionLatency "github.com/kyverno/kyverno/pkg/metrics/policyruleexecutionlatency"
 	policyRuleResults "github.com/kyverno/kyverno/pkg/metrics/policyruleresults"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 )
@@ -88,6 +89,8 @@ func (pc *PolicyController) applyAndReportPerNamespace(policy *kyverno.ClusterPo
 		for _, engineResponse := range engineResponses {
 			// registering the kyverno_policy_rule_results_info metric concurrently
 			go pc.registerPolicyRuleResultsMetricValidationBS(logger, *policy, *engineResponse, backgroundScanTimestamp)
+			// registering the kyverno_policy_rule_execution_latency_milliseconds metric concurrently
+			go pc.registerPolicyRuleExecutionLatencyMetricValidateBS(logger, *policy, *engineResponse, backgroundScanTimestamp)
 		}
 		*metricAlreadyRegistered = true
 	}
@@ -101,6 +104,12 @@ func (pc *PolicyController) registerPolicyRuleResultsMetricValidationBS(logger l
 	}
 }
 
+func (pc *PolicyController) registerPolicyRuleExecutionLatencyMetricValidateBS(logger logr.Logger, policy kyverno.ClusterPolicy, engineResponse response.EngineResponse, backgroundScanTimestamp int64) {
+	if err := policyRuleExecutionLatency.ParsePromMetrics(*pc.promConfig.Metrics).ProcessEngineResponse(policy, engineResponse, metrics.BackgroundScan, "", metrics.ResourceCreated, backgroundScanTimestamp); err != nil {
+		logger.Error(err, "error occurred while registering kyverno_policy_rule_execution_latency_milliseconds metrics for the above policy", "name", policy.Name)
+	}
+}
+
 func (pc *PolicyController) applyPolicy(policy *kyverno.ClusterPolicy, resource unstructured.Unstructured, logger logr.Logger) (engineResponses []*response.EngineResponse) {
 	// pre-processing, check if the policy and resource version has been processed before
 	if !pc.rm.ProcessResource(policy.Name, policy.ResourceVersion, resource.GetKind(), resource.GetNamespace(), resource.GetName(), resource.GetResourceVersion()) {
diff --git a/pkg/webhooks/generation.go b/pkg/webhooks/generation.go
index 36c408b182..10182cf779 100644
--- a/pkg/webhooks/generation.go
+++ b/pkg/webhooks/generation.go
@@ -25,6 +25,7 @@ import (
 	"github.com/kyverno/kyverno/pkg/event"
 	gen "github.com/kyverno/kyverno/pkg/generate"
 	"github.com/kyverno/kyverno/pkg/metrics"
+	policyRuleExecutionLatency "github.com/kyverno/kyverno/pkg/metrics/policyruleexecutionlatency"
 	policyRuleResults "github.com/kyverno/kyverno/pkg/metrics/policyruleresults"
 	kyvernoutils "github.com/kyverno/kyverno/pkg/utils"
 	"github.com/kyverno/kyverno/pkg/webhooks/generate"
@@ -85,6 +86,9 @@ func (ws *WebhookServer) HandleGenerate(request *v1beta1.AdmissionRequest, polic
 			}
 			// registering the kyverno_policy_rule_results_info metric concurrently
 			go ws.registerPolicyRuleResultsMetricGeneration(logger, string(request.Operation), *policy, *engineResponse, admissionRequestTimestamp)
+
+			// registering the kyverno_policy_rule_execution_latency_milliseconds metric concurrently
+			go ws.registerPolicyRuleExecutionLatencyMetricGenerate(logger, string(request.Operation), *policy, *engineResponse, admissionRequestTimestamp)
 		}
 
 		// Adds Generate Request to a channel(queue size 1000) to generators
@@ -112,6 +116,16 @@ func (ws *WebhookServer) registerPolicyRuleResultsMetricGeneration(logger logr.L
 	}
 }
 
+func (ws *WebhookServer) registerPolicyRuleExecutionLatencyMetricGenerate(logger logr.Logger, resourceRequestOperation string, policy kyverno.ClusterPolicy, engineResponse response.EngineResponse, admissionRequestTimestamp int64) {
+	resourceRequestOperationPromAlias, err := policyRuleExecutionLatency.ParseResourceRequestOperation(resourceRequestOperation)
+	if err != nil {
+		logger.Error(err, "error occurred while registering kyverno_policy_rule_execution_latency_milliseconds metrics for the above policy", "name", policy.Name)
+	}
+	if err := policyRuleExecutionLatency.ParsePromMetrics(*ws.promConfig.Metrics).ProcessEngineResponse(policy, engineResponse, metrics.AdmissionRequest, "", resourceRequestOperationPromAlias, admissionRequestTimestamp); err != nil {
+		logger.Error(err, "error occurred while registering kyverno_policy_rule_execution_latency_milliseconds metrics for the above policy", "name", policy.Name)
+	}
+}
+
 //handleUpdate handles admission-requests for update
 func (ws *WebhookServer) handleUpdate(request *v1beta1.AdmissionRequest, policies []*kyverno.ClusterPolicy) {
 	logger := ws.log.WithValues("action", "generation", "uid", request.UID, "kind", request.Kind, "namespace", request.Namespace, "name", request.Name, "operation", request.Operation, "gvk", request.Kind.String())
diff --git a/pkg/webhooks/mutation.go b/pkg/webhooks/mutation.go
index 2f07a79ab1..fed392f542 100644
--- a/pkg/webhooks/mutation.go
+++ b/pkg/webhooks/mutation.go
@@ -15,6 +15,7 @@ import (
 	"github.com/kyverno/kyverno/pkg/engine/response"
 	engineutils "github.com/kyverno/kyverno/pkg/engine/utils"
 	"github.com/kyverno/kyverno/pkg/metrics"
+	policyRuleExecutionLatency "github.com/kyverno/kyverno/pkg/metrics/policyruleexecutionlatency"
 	policyRuleResults "github.com/kyverno/kyverno/pkg/metrics/policyruleresults"
 	v1beta1 "k8s.io/api/admission/v1beta1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -43,6 +44,7 @@ func (ws *WebhookServer) HandleMutation(
 
 	var patches [][]byte
 	var engineResponses []*response.EngineResponse
+	var triggeredPolicies []kyverno.ClusterPolicy
 	policyContext := &engine.PolicyContext{
 		NewResource:         resource,
 		AdmissionInfo:       userRequestInfo,
@@ -94,6 +96,10 @@ func (ws *WebhookServer) HandleMutation(
 
 		// registering the kyverno_policy_rule_results_info metric concurrently
 		go ws.registerPolicyRuleResultsMetricMutation(logger, string(request.Operation), *policy, *engineResponse, admissionRequestTimestamp)
+		triggeredPolicies = append(triggeredPolicies, *policy)
+
+		// registering the kyverno_policy_rule_execution_latency_milliseconds metric concurrently
+		go ws.registerPolicyRuleExecutionLatencyMetricMutate(logger, string(request.Operation), *policy, *engineResponse, admissionRequestTimestamp)
 	}
 
 	// generate annotations
@@ -138,6 +144,16 @@ func (ws *WebhookServer) registerPolicyRuleResultsMetricMutation(logger logr.Log
 	}
 }
 
+func (ws *WebhookServer) registerPolicyRuleExecutionLatencyMetricMutate(logger logr.Logger, resourceRequestOperation string, policy kyverno.ClusterPolicy, engineResponse response.EngineResponse, admissionRequestTimestamp int64) {
+	resourceRequestOperationPromAlias, err := policyRuleExecutionLatency.ParseResourceRequestOperation(resourceRequestOperation)
+	if err != nil {
+		logger.Error(err, "error occurred while registering kyverno_policy_rule_execution_latency_milliseconds metrics for the above policy", "name", policy.Name)
+	}
+	if err := policyRuleExecutionLatency.ParsePromMetrics(*ws.promConfig.Metrics).ProcessEngineResponse(policy, engineResponse, metrics.AdmissionRequest, "", resourceRequestOperationPromAlias, admissionRequestTimestamp); err != nil {
+		logger.Error(err, "error occurred while registering kyverno_policy_rule_execution_latency_milliseconds metrics for the above policy", "name", policy.Name)
+	}
+}
+
 type mutateStats struct {
 	resp      *response.EngineResponse
 	namespace string
diff --git a/pkg/webhooks/validation.go b/pkg/webhooks/validation.go
index e51fa2526c..b082f72d8c 100644
--- a/pkg/webhooks/validation.go
+++ b/pkg/webhooks/validation.go
@@ -24,6 +24,7 @@ import (
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 
 	"github.com/kyverno/kyverno/pkg/metrics"
+	policyRuleExecutionLatency "github.com/kyverno/kyverno/pkg/metrics/policyruleexecutionlatency"
 	policyRuleResults "github.com/kyverno/kyverno/pkg/metrics/policyruleresults"
 )
 
@@ -106,6 +107,8 @@ func HandleValidation(
 
 		// registering the kyverno_policy_rule_results_info metric concurrently
 		go registerPolicyRuleResultsMetricValidation(promConfig, logger, string(request.Operation), policyContext.Policy, *engineResponse, admissionRequestTimestamp)
+		// registering the kyverno_policy_rule_execution_latency_milliseconds metric concurrently
+		go registerPolicyRuleExecutionLatencyMetricValidate(promConfig, logger, string(request.Operation), policyContext.Policy, *engineResponse, admissionRequestTimestamp)
 
 		engineResponses = append(engineResponses, engineResponse)
 		statusListener.Update(validateStats{
@@ -165,6 +168,16 @@ func registerPolicyRuleResultsMetricValidation(promConfig *metrics.PromConfig, l
 	}
 }
 
+func registerPolicyRuleExecutionLatencyMetricValidate(promConfig *metrics.PromConfig, logger logr.Logger, requestOperation string, policy kyverno.ClusterPolicy, engineResponse response.EngineResponse, admissionRequestTimestamp int64) {
+	resourceRequestOperationPromAlias, err := policyRuleExecutionLatency.ParseResourceRequestOperation(requestOperation)
+	if err != nil {
+		logger.Error(err, "error occurred while registering kyverno_policy_rule_execution_latency_milliseconds metrics for the above policy", "name", policy.Name)
+	}
+	if err := policyRuleExecutionLatency.ParsePromMetrics(*promConfig.Metrics).ProcessEngineResponse(policy, engineResponse, metrics.AdmissionRequest, "", resourceRequestOperationPromAlias, admissionRequestTimestamp); err != nil {
+		logger.Error(err, "error occurred while registering kyverno_policy_rule_execution_latency_milliseconds metrics for the above policy", "name", policy.Name)
+	}
+}
+
 func buildDeletionPrInfo(oldR unstructured.Unstructured) policyreport.Info {
 	return policyreport.Info{
 		Namespace: oldR.GetNamespace(),