2021-05-05 00:41:13 +05:30
package metrics
import (
2022-07-11 23:19:47 +05:30
"context"
"net/http"
"time"
2021-09-11 03:09:12 +05:30
2022-07-11 23:19:47 +05:30
"github.com/go-logr/logr"
kconfig "github.com/kyverno/kyverno/pkg/config"
"github.com/kyverno/kyverno/pkg/utils/kube"
2022-07-25 14:55:26 +05:30
"go.opentelemetry.io/otel"
2022-07-11 23:19:47 +05:30
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/metric/global"
"go.opentelemetry.io/otel/metric/instrument"
"go.opentelemetry.io/otel/metric/instrument/asyncfloat64"
"go.opentelemetry.io/otel/metric/instrument/syncfloat64"
"go.opentelemetry.io/otel/metric/instrument/syncint64"
controller "go.opentelemetry.io/otel/sdk/metric/controller/basic"
"go.opentelemetry.io/otel/sdk/metric/export/aggregation"
processor "go.opentelemetry.io/otel/sdk/metric/processor/basic"
"go.opentelemetry.io/otel/sdk/metric/selector/simple"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
"k8s.io/client-go/kubernetes"
2021-05-05 00:41:13 +05:30
)
2022-07-11 23:19:47 +05:30
const (
meterName = "kyverno"
)
type MetricsConfig struct {
// instruments
policyChangesMetric syncint64 . Counter
policyResultsMetric syncint64 . Counter
policyRuleInfoMetric asyncfloat64 . Gauge
policyExecutionDurationMetric syncfloat64 . Histogram
admissionRequestsMetric syncint64 . Counter
admissionReviewDurationMetric syncfloat64 . Histogram
// config
Config * kconfig . MetricsConfigData
Log logr . Logger
2021-05-05 00:41:13 +05:30
}
2022-07-11 23:19:47 +05:30
func initializeMetrics ( m * MetricsConfig ) ( * MetricsConfig , error ) {
var err error
meter := global . MeterProvider ( ) . Meter ( meterName )
m . policyResultsMetric , err = meter . SyncInt64 ( ) . Counter ( "kyverno_policy_results_total" , instrument . WithDescription ( "can be used to track the results associated with the policies applied in the user’ s cluster, at the level from rule to policy to admission requests" ) )
if err != nil {
m . Log . Error ( err , "Failed to create instrument" )
return nil , err
}
m . policyChangesMetric , err = meter . SyncInt64 ( ) . Counter ( "kyverno_policy_changes_total" , instrument . WithDescription ( "can be used to track all the changes associated with the Kyverno policies present on the cluster such as creation, updates and deletions" ) )
if err != nil {
m . Log . Error ( err , "Failed to create instrument" )
return nil , err
}
m . admissionRequestsMetric , err = meter . SyncInt64 ( ) . Counter ( "kyverno_admission_requests_total" , instrument . WithDescription ( "can be used to track the number of admission requests encountered by Kyverno in the cluster" ) )
if err != nil {
m . Log . Error ( err , "Failed to create instrument" )
return nil , err
}
m . policyExecutionDurationMetric , err = meter . SyncFloat64 ( ) . Histogram ( "kyverno_policy_execution_duration_seconds" , instrument . WithDescription ( "can be used to track the latencies (in seconds) associated with the execution/processing of the individual rules under Kyverno policies whenever they evaluate incoming resource requests" ) )
if err != nil {
m . Log . Error ( err , "Failed to create instrument" )
return nil , err
}
m . admissionReviewDurationMetric , err = meter . SyncFloat64 ( ) . Histogram ( "kyverno_admission_review_duration_seconds" , instrument . WithDescription ( "can be used to track the latencies (in seconds) associated with the entire individual admission review. For example, if an incoming request trigger, say, five policies, this metric will track the e2e latency associated with the execution of all those policies" ) )
if err != nil {
m . Log . Error ( err , "Failed to create instrument" )
return nil , err
}
// Register Async Callbacks
m . policyRuleInfoMetric , err = meter . AsyncFloat64 ( ) . Gauge ( "kyverno_policy_rule_info_total" , instrument . WithDescription ( "can be used to track the info of the rules or/and policies present in the cluster. 0 means the rule doesn't exist and has been deleted, 1 means the rule is currently existent in the cluster" ) )
if err != nil {
m . Log . Error ( err , "Failed to create instrument" )
return nil , err
}
return m , nil
2021-05-05 00:41:13 +05:30
}
2022-07-25 14:55:26 +05:30
func ShutDownController ( ctx context . Context , pusher * controller . Controller ) {
// pushes any last exports to the receiver
if err := pusher . Stop ( ctx ) ; err != nil {
otel . Handle ( err )
}
}
2022-07-11 23:19:47 +05:30
func NewOTLPGRPCConfig ( endpoint string ,
metricsConfigData * kconfig . MetricsConfigData ,
certs string ,
kubeClient kubernetes . Interface ,
log logr . Logger ,
2022-07-25 14:55:26 +05:30
) ( * MetricsConfig , * controller . Controller , error ) {
2022-07-11 23:19:47 +05:30
ctx := context . Background ( )
var client otlpmetric . Client
2021-05-05 00:41:13 +05:30
2022-07-11 23:19:47 +05:30
if certs != "" {
// here the certificates are stored as configmaps
transportCreds , err := kube . FetchCert ( ctx , certs , kubeClient )
if err != nil {
log . Error ( err , "Error fetching certificate from secret" )
2022-07-25 14:55:26 +05:30
return nil , nil , err
2022-07-11 23:19:47 +05:30
}
client = otlpmetricgrpc . NewClient (
otlpmetricgrpc . WithEndpoint ( endpoint ) ,
otlpmetricgrpc . WithTLSCredentials ( transportCreds ) ,
)
} else {
client = otlpmetricgrpc . NewClient (
otlpmetricgrpc . WithEndpoint ( endpoint ) ,
otlpmetricgrpc . WithInsecure ( ) ,
)
2021-05-05 00:41:13 +05:30
}
2022-07-11 23:19:47 +05:30
// create New Exporter for exporting metrics
metricExp , err := otlpmetric . New ( ctx , client )
if err != nil {
log . Error ( err , "Failed to create the collector exporter" )
2022-07-25 14:55:26 +05:30
return nil , nil , err
2021-05-05 00:41:13 +05:30
}
2022-07-11 23:19:47 +05:30
res , err := resource . New ( context . Background ( ) ,
resource . WithAttributes ( semconv . ServiceNameKey . String ( "kyverno_metrics" ) ) ,
resource . WithSchemaURL ( semconv . SchemaURL ) ,
2021-05-05 00:41:13 +05:30
)
2022-07-11 23:19:47 +05:30
if err != nil {
log . Error ( err , "failed creating resource" )
2022-07-25 14:55:26 +05:30
return nil , nil , err
2022-07-11 23:19:47 +05:30
}
2021-05-05 00:41:13 +05:30
2022-07-11 23:19:47 +05:30
// create controller and bind the exporter with it
pusher := controller . New (
processor . NewFactory (
simple . NewWithHistogramDistribution ( ) ,
aggregation . CumulativeTemporalitySelector ( ) ,
processor . WithMemory ( true ) ,
) ,
controller . WithExporter ( metricExp ) ,
controller . WithResource ( res ) ,
controller . WithCollectPeriod ( 2 * time . Second ) ,
2021-05-05 00:41:13 +05:30
)
2022-07-11 23:19:47 +05:30
global . SetMeterProvider ( pusher )
m := new ( MetricsConfig )
m . Log = log
m . Config = metricsConfigData
2021-05-05 00:41:13 +05:30
2022-07-11 23:19:47 +05:30
m , err = initializeMetrics ( m )
if err != nil {
log . Error ( err , "Failed initializing metrics" )
2022-07-25 14:55:26 +05:30
return nil , nil , err
2021-05-05 00:41:13 +05:30
}
2021-07-23 21:46:50 +05:30
2022-07-11 23:19:47 +05:30
if err := pusher . Start ( ctx ) ; err != nil {
log . Error ( err , "could not start metric exporter" )
2022-07-25 14:55:26 +05:30
return nil , nil , err
2021-07-23 21:46:50 +05:30
}
2022-07-11 23:19:47 +05:30
2022-07-25 14:55:26 +05:30
return m , pusher , nil
2022-07-11 23:19:47 +05:30
}
func NewPrometheusConfig ( metricsConfigData * kconfig . MetricsConfigData ,
log logr . Logger ,
) ( * MetricsConfig , * http . ServeMux , error ) {
config := prometheus . Config { }
res , err := resource . New ( context . Background ( ) ,
resource . WithAttributes ( semconv . ServiceNameKey . String ( "kyverno-svc-metrics" ) ) ,
resource . WithAttributes ( semconv . ServiceNamespaceKey . String ( kconfig . KyvernoNamespace ( ) ) ) ,
resource . WithSchemaURL ( semconv . SchemaURL ) ,
2021-05-05 00:41:13 +05:30
)
2022-07-11 23:19:47 +05:30
if err != nil {
log . Error ( err , "failed creating resource" )
return nil , nil , err
}
2021-05-05 00:41:13 +05:30
2022-07-11 23:19:47 +05:30
c := controller . New (
processor . NewFactory (
simple . NewWithHistogramDistribution ( ) ,
aggregation . CumulativeTemporalitySelector ( ) ,
processor . WithMemory ( true ) ,
) ,
controller . WithResource ( res ) ,
)
exporter , err := prometheus . New ( config , c )
if err != nil {
log . Error ( err , "failed to initialize prometheus exporter" )
return nil , nil , err
}
global . SetMeterProvider ( exporter . MeterProvider ( ) )
// Create new config object and attach metricsConfig to it
m := new ( MetricsConfig )
m . Config = metricsConfigData
// Initialize metrics logger
m . Log = log
m , err = initializeMetrics ( m )
if err != nil {
log . Error ( err , "failed to initialize metrics config" )
return nil , nil , err
}
metricsServerMux := http . NewServeMux ( )
metricsServerMux . HandleFunc ( "/metrics" , exporter . ServeHTTP )
return m , metricsServerMux , nil
}
func ( m * MetricsConfig ) RecordPolicyResults ( policyValidationMode PolicyValidationMode , policyType PolicyType , policyBackgroundMode PolicyBackgroundMode , policyNamespace string , policyName string ,
resourceKind string , resourceNamespace string , resourceRequestOperation ResourceRequestOperation , ruleName string , ruleResult RuleResult , ruleType RuleType ,
ruleExecutionCause RuleExecutionCause ) {
ctx := context . Background ( )
commonLabels := [ ] attribute . KeyValue {
attribute . String ( "policy_validation_mode" , string ( policyValidationMode ) ) ,
attribute . String ( "policy_type" , string ( policyType ) ) ,
attribute . String ( "policy_background_mode" , string ( policyBackgroundMode ) ) ,
attribute . String ( "policy_namespace" , policyNamespace ) ,
attribute . String ( "policy_name" , policyName ) ,
attribute . String ( "resource_kind" , resourceKind ) ,
attribute . String ( "resource_namespace" , resourceNamespace ) ,
attribute . String ( "resource_request_operation" , string ( resourceRequestOperation ) ) ,
attribute . String ( "rule_name" , ruleName ) ,
attribute . String ( "rule_result" , string ( ruleResult ) ) ,
attribute . String ( "rule_type" , string ( ruleType ) ) ,
attribute . String ( "rule_execution_cause" , string ( ruleExecutionCause ) ) ,
}
m . policyResultsMetric . Add ( ctx , 1 , commonLabels ... )
}
func ( m * MetricsConfig ) RecordPolicyChanges ( policyValidationMode PolicyValidationMode , policyType PolicyType , policyBackgroundMode PolicyBackgroundMode , policyNamespace string , policyName string , policyChangeType string ) {
ctx := context . Background ( )
commonLabels := [ ] attribute . KeyValue {
attribute . String ( "policy_validation_mode" , string ( policyValidationMode ) ) ,
attribute . String ( "policy_type" , string ( policyType ) ) ,
attribute . String ( "policy_background_mode" , string ( policyBackgroundMode ) ) ,
attribute . String ( "policy_namespace" , policyNamespace ) ,
attribute . String ( "policy_name" , policyName ) ,
attribute . String ( "policy_change_type" , policyChangeType ) ,
}
m . policyChangesMetric . Add ( ctx , 1 , commonLabels ... )
}
func ( m * MetricsConfig ) RecordPolicyRuleInfo ( policyValidationMode PolicyValidationMode , policyType PolicyType , policyBackgroundMode PolicyBackgroundMode , policyNamespace string , policyName string ,
ruleName string , ruleType RuleType , status string , metricValue float64 ) {
ctx := context . Background ( )
commonLabels := [ ] attribute . KeyValue {
attribute . String ( "policy_validation_mode" , string ( policyValidationMode ) ) ,
attribute . String ( "policy_type" , string ( policyType ) ) ,
attribute . String ( "policy_background_mode" , string ( policyBackgroundMode ) ) ,
attribute . String ( "policy_namespace" , policyNamespace ) ,
attribute . String ( "policy_name" , policyName ) ,
attribute . String ( "rule_name" , ruleName ) ,
attribute . String ( "rule_type" , string ( ruleType ) ) ,
attribute . String ( "status_ready" , status ) ,
2021-09-11 03:09:12 +05:30
}
2022-07-11 23:19:47 +05:30
m . policyRuleInfoMetric . Observe ( ctx , metricValue , commonLabels ... )
}
func ( m MetricsConfig ) RecordAdmissionRequests ( resourceKind string , resourceNamespace string , resourceRequestOperation ResourceRequestOperation ) {
ctx := context . Background ( )
commonLabels := [ ] attribute . KeyValue {
attribute . String ( "resource_kind" , resourceKind ) ,
attribute . String ( "resource_namespace" , resourceNamespace ) ,
attribute . String ( "resource_request_operation" , string ( resourceRequestOperation ) ) ,
}
m . admissionRequestsMetric . Add ( ctx , 1 , commonLabels ... )
}
func ( m * MetricsConfig ) RecordPolicyExecutionDuration ( policyValidationMode PolicyValidationMode , policyType PolicyType , policyBackgroundMode PolicyBackgroundMode , policyNamespace string , policyName string ,
resourceKind string , resourceNamespace string , resourceRequestOperation ResourceRequestOperation , ruleName string , ruleResult RuleResult , ruleType RuleType ,
ruleExecutionCause RuleExecutionCause , generalRuleLatencyType string , ruleExecutionLatency float64 ) {
ctx := context . Background ( )
commonLabels := [ ] attribute . KeyValue {
attribute . String ( "policy_validation_mode" , string ( policyValidationMode ) ) ,
attribute . String ( "policy_type" , string ( policyType ) ) ,
attribute . String ( "policy_background_mode" , string ( policyBackgroundMode ) ) ,
attribute . String ( "policy_namespace" , policyNamespace ) ,
attribute . String ( "policy_name" , policyName ) ,
attribute . String ( "resource_kind" , resourceKind ) ,
attribute . String ( "resource_namespace" , resourceNamespace ) ,
attribute . String ( "resource_request_operation" , string ( resourceRequestOperation ) ) ,
attribute . String ( "rule_name" , ruleName ) ,
attribute . String ( "rule_result" , string ( ruleResult ) ) ,
attribute . String ( "rule_type" , string ( ruleType ) ) ,
attribute . String ( "rule_execution_cause" , string ( ruleExecutionCause ) ) ,
attribute . String ( "general_rule_latency_type" , generalRuleLatencyType ) ,
}
m . policyExecutionDurationMetric . Record ( ctx , ruleExecutionLatency , commonLabels ... )
}
func ( m * MetricsConfig ) RecordAdmissionReviewDuration ( resourceKind string , resourceNamespace string , resourceRequestOperation string , admissionRequestLatency float64 ) {
ctx := context . Background ( )
commonLabels := [ ] attribute . KeyValue {
attribute . String ( "resource_kind" , resourceKind ) ,
attribute . String ( "resource_namespace" , resourceNamespace ) ,
attribute . String ( "resource_request_operation" , resourceRequestOperation ) ,
}
m . admissionReviewDurationMetric . Record ( ctx , admissionRequestLatency , commonLabels ... )
2021-05-05 00:41:13 +05:30
}