1
0
Fork 0
mirror of https://github.com/kyverno/kyverno.git synced 2025-03-31 03:45:17 +00:00

1319 fix throttling (#1341)

* fix policy status and generate controller issues

* shorten ACTION column name

* update logs

Co-authored-by: Shuting Zhao <shutting06@gmail.com>
This commit is contained in:
Jim Bugwadia 2020-11-30 11:22:20 -08:00 committed by GitHub
parent 36615e21a7
commit 2344b2c305
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 162 additions and 122 deletions

View file

@ -21,7 +21,7 @@ spec:
name: Background
type: string
- jsonPath: .spec.validationFailureAction
name: Validation Failure Action
name: Action
type: string
name: v1
schema:

View file

@ -105,7 +105,7 @@ func main() {
// DYNAMIC CLIENT
// - client for all registered resources
client, err := dclient.NewClient(clientConfig, 5*time.Minute, stopCh, log.Log)
client, err := dclient.NewClient(clientConfig, 15*time.Minute, stopCh, log.Log)
if err != nil {
setupLog.Error(err, "Failed to create client")
os.Exit(1)
@ -146,7 +146,6 @@ func main() {
// Resource Mutating Webhook Watcher
webhookMonitor := webhookconfig.NewMonitor(log.Log.WithName("WebhookMonitor"))
// KYVERNO CRD INFORMER
// watches CRD resources:
// - ClusterPolicy, Policy

View file

@ -23,7 +23,7 @@ spec:
name: Background
type: string
- jsonPath: .spec.validationFailureAction
name: Validation Failure Action
name: Action
type: string
name: v1
schema:

View file

@ -26,7 +26,7 @@ spec:
name: Background
type: string
- jsonPath: .spec.validationFailureAction
name: Validation Failure Action
name: Action
type: string
name: v1
schema:

View file

@ -26,7 +26,7 @@ spec:
name: Background
type: string
- jsonPath: .spec.validationFailureAction
name: Validation Failure Action
name: Action
type: string
name: v1
schema:

View file

@ -12,7 +12,7 @@ import (
// +kubebuilder:subresource:status
// +kubebuilder:resource:path=clusterpolicies,scope="Cluster",shortName=cpol
// +kubebuilder:printcolumn:name="Background",type="string",JSONPath=".spec.background"
// +kubebuilder:printcolumn:name="Validation Failure Action",type="string",JSONPath=".spec.validationFailureAction"
// +kubebuilder:printcolumn:name="Action",type="string",JSONPath=".spec.validationFailureAction"
type ClusterPolicy struct {
metav1.TypeMeta `json:",inline,omitempty" yaml:",inline,omitempty"`
metav1.ObjectMeta `json:"metadata,omitempty" yaml:"metadata,omitempty"`

View file

@ -69,12 +69,11 @@ func NewController(
c := Controller{
kyvernoClient: kyvernoclient,
client: client,
//TODO: do the math for worst case back off and make sure cleanup runs after that
// as we dont want a deleted GR to be re-queue
queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(1, 30), "generate-request-cleanup"),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "generate-request-cleanup"),
dynamicInformer: dynamicInformer,
log: log,
}
c.control = Control{client: kyvernoclient}
c.enqueueGR = c.enqueue
c.syncHandler = c.syncGenerateRequest
@ -85,22 +84,23 @@ func NewController(
c.pSynced = pInformer.Informer().HasSynced
c.grSynced = grInformer.Informer().HasSynced
pInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
pInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
DeleteFunc: c.deletePolicy, // we only cleanup if the policy is delete
}, 2*time.Minute)
})
grInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
grInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: c.addGR,
UpdateFunc: c.updateGR,
DeleteFunc: c.deleteGR,
}, 2*time.Minute)
})
//TODO: dynamic registration
// Only supported for namespaces
nsInformer := dynamicInformer.ForResource(client.DiscoveryClient.GetGVRFromKind("Namespace"))
c.nsInformer = nsInformer
c.nsInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
c.nsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
DeleteFunc: c.deleteGenericResource,
}, 2*time.Minute)
})
return &c
}
@ -134,6 +134,7 @@ func (c *Controller) deletePolicy(obj interface{}) {
return
}
}
logger.V(4).Info("deleting policy", "name", p.Name)
// clean up the GR
// Get the corresponding GR
@ -143,6 +144,7 @@ func (c *Controller) deletePolicy(obj interface{}) {
logger.Error(err, "failed to generate request CR for the policy", "name", p.Name)
return
}
for _, gr := range grs {
c.addGR(gr)
}
@ -167,12 +169,14 @@ func (c *Controller) deleteGR(obj interface{}) {
logger.Info("Couldn't get object from tombstone", "obj", obj)
return
}
_, ok = tombstone.Obj.(*kyverno.GenerateRequest)
if !ok {
logger.Info("ombstone contained object that is not a Generate Request", "obj", obj)
return
}
}
for _, resource := range gr.Status.GeneratedResources {
r, err := c.client.GetResource(resource.APIVersion, resource.Kind, resource.Namespace, resource.Name)
if err != nil && !apierrors.IsNotFound(err) {
@ -187,6 +191,7 @@ func (c *Controller) deleteGR(obj interface{}) {
}
}
}
logger.V(4).Info("deleting Generate Request CR", "name", gr.Name)
// sync Handler will remove it from the queue
c.enqueueGR(gr)

View file

@ -201,8 +201,9 @@ func (c *Controller) applyGeneratePolicy(log logr.Logger, policyContext engine.P
genResources = append(genResources, genResource)
}
if gr.Status.State == "" {
c.policyStatusListener.Send(generateSyncStats{
if gr.Status.State == "" && len(genResources) > 0 {
log.V(3).Info("updating policy status", "policy", policy.Name, "data", ruleNameToProcessingTime)
c.policyStatusListener.Update(generateSyncStats{
policyName: policy.Name,
ruleNameToProcessingTime: ruleNameToProcessingTime,
})

View file

@ -27,13 +27,14 @@ import (
const (
maxRetries = 5
resyncPeriod = 15 * time.Minute
)
// Controller manages the life-cycle for Generate-Requests and applies generate rule
type Controller struct {
// dyanmic client implementation
// dynamic client implementation
client *dclient.Client
// typed client for kyverno CRDs
// typed client for Kyverno CRDs
kyvernoClient *kyvernoclient.Clientset
// event generator interface
eventGen event.Interface
@ -54,7 +55,7 @@ type Controller struct {
pSynced cache.InformerSynced
// grSynced returns true if the Generate Request store has been synced at least once
grSynced cache.InformerSynced
// dyanmic sharedinformer factory
// dynamic shared informer factory
dynamicInformer dynamicinformer.DynamicSharedInformerFactory
//TODO: list of generic informers
// only support Namespaces for re-evalutation on resource updates
@ -83,9 +84,7 @@ func NewController(
client: client,
kyvernoClient: kyvernoclient,
eventGen: eventGen,
//TODO: do the math for worst case back off and make sure cleanup runs after that
// as we dont want a deleted GR to be re-queue
queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(1, 30), "generate-request"),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "generate-request"),
dynamicInformer: dynamicInformer,
log: log,
policyStatusListener: policyStatus,
@ -94,16 +93,16 @@ func NewController(
}
c.statusControl = StatusControl{client: kyvernoclient}
pInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
pInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
UpdateFunc: c.updatePolicy, // We only handle updates to policy
// Deletion of policy will be handled by cleanup controller
}, 2*time.Minute)
})
grInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
grInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: c.addGR,
UpdateFunc: c.updateGR,
DeleteFunc: c.deleteGR,
}, 2*time.Minute)
})
c.enqueueGR = c.enqueue
c.syncHandler = c.syncGenerateRequest
@ -118,9 +117,10 @@ func NewController(
// Only supported for namespaces
nsInformer := dynamicInformer.ForResource(client.DiscoveryClient.GetGVRFromKind("Namespace"))
c.nsInformer = nsInformer
c.nsInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
c.nsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
UpdateFunc: c.updateGenericResource,
}, 2*time.Minute)
})
return &c
}
@ -306,6 +306,7 @@ func (c *Controller) syncGenerateRequest(key string) error {
defer func() {
logger.V(4).Info("finished sync", "key", key, "processingTime", time.Since(startTime).String())
}()
_, grName, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
return err
@ -320,5 +321,6 @@ func (c *Controller) syncGenerateRequest(key string) error {
logger.Error(err, "failed to list generate requests")
return err
}
return c.processGR(gr)
}

View file

@ -147,7 +147,7 @@ To apply policy with variables:
if err != nil {
return err
}
dClient, err = client.NewClient(restConfig, 5*time.Minute, make(chan struct{}), log.Log)
dClient, err = client.NewClient(restConfig, 15*time.Minute, make(chan struct{}), log.Log)
if err != nil {
return err
}

View file

@ -267,7 +267,7 @@ func (gen *Generator) syncHandler(info Info) error {
func (gen *Generator) sync(reportReq *unstructured.Unstructured, info Info) error {
defer func() {
if val := reportReq.GetAnnotations()["fromSync"]; val == "true" {
gen.policyStatusListener.Send(violationCount{
gen.policyStatusListener.Update(violationCount{
policyName: info.PolicyName,
violatedRules: info.Rules,
})

View file

@ -3,7 +3,7 @@ package policystatus
import (
"context"
"encoding/json"
"fmt"
"github.com/go-logr/logr"
"strings"
"sync"
"time"
@ -17,9 +17,8 @@ import (
)
// Policy status implementation works in the following way,
//Currently policy status maintains a cache of the status of
//each policy.
//Every x unit of time the status of policy is updated using
// Currently policy status maintains a cache of the status of each policy.
// Every x unit of time the status of policy is updated using
//the data from the cache.
//The sync exposes a listener which accepts a statusUpdater
//interface which dictates how the status should be updated.
@ -27,21 +26,21 @@ import (
//on a channel.
//The worker then updates the current status using the methods
//exposed by the interface.
//Current implementation is designed to be threadsafe with optimised
//Current implementation is designed to be thread safe with optimised
//locking for each policy.
// statusUpdater defines a type to have a method which
//updates the given status
// updates the given status
type statusUpdater interface {
PolicyName() string
UpdateStatus(status v1.PolicyStatus) v1.PolicyStatus
}
// Listener ...
// Listener is a channel of statusUpdater instances
type Listener chan statusUpdater
// Send sends an update request
func (l Listener) Send(s statusUpdater) {
// Update queues an status update request
func (l Listener) Update(s statusUpdater) {
l <- s
}
@ -55,6 +54,7 @@ type Sync struct {
client *versioned.Clientset
lister kyvernolister.ClusterPolicyLister
nsLister kyvernolister.PolicyLister
log logr.Logger
}
type cache struct {
@ -63,7 +63,7 @@ type cache struct {
keyToMutex *keyToMutex
}
// NewSync ...
// NewSync creates a new Sync instance
func NewSync(c *versioned.Clientset, lister kyvernolister.ClusterPolicyLister, nsLister kyvernolister.PolicyLister) *Sync {
return &Sync{
cache: &cache{
@ -75,16 +75,17 @@ func NewSync(c *versioned.Clientset, lister kyvernolister.ClusterPolicyLister, n
lister: lister,
nsLister: nsLister,
Listener: make(chan statusUpdater, 20),
log: log.Log.WithName("PolicyStatus"),
}
}
// Run ...
// Run starts workers and periodically flushes the cached status
func (s *Sync) Run(workers int, stopCh <-chan struct{}) {
for i := 0; i < workers; i++ {
go s.updateStatusCache(stopCh)
}
wait.Until(s.updatePolicyStatus, 10*time.Second, stopCh)
wait.Until(s.updatePolicyStatus, 60*time.Second, stopCh)
<-stopCh
}
@ -94,7 +95,10 @@ func (s *Sync) updateStatusCache(stopCh <-chan struct{}) {
for {
select {
case statusUpdater := <-s.Listener:
s.cache.keyToMutex.Get(statusUpdater.PolicyName()).Lock()
name := statusUpdater.PolicyName()
s.log.V(3).Info("received policy status update request", "policy", name)
s.cache.keyToMutex.Get(name).Lock()
s.cache.dataMu.RLock()
status, exist := s.cache.data[statusUpdater.PolicyName()]
@ -105,6 +109,7 @@ func (s *Sync) updateStatusCache(stopCh <-chan struct{}) {
status = policy.Status
}
}
updatedStatus := statusUpdater.UpdateStatus(status)
s.cache.dataMu.Lock()
@ -114,7 +119,10 @@ func (s *Sync) updateStatusCache(stopCh <-chan struct{}) {
s.cache.keyToMutex.Get(statusUpdater.PolicyName()).Unlock()
oldStatus, _ := json.Marshal(status)
newStatus, _ := json.Marshal(updatedStatus)
log.Log.V(4).Info(fmt.Sprintf("\nupdated status of policy - %v\noldStatus:\n%v\nnewStatus:\n%v\n", statusUpdater.PolicyName(), string(oldStatus), string(newStatus)))
s.log.V(4).Info("updated policy status", "policy", statusUpdater.PolicyName(),
"oldStatus", string(oldStatus), "newStatus", string(newStatus))
case <-stopCh:
return
}
@ -122,59 +130,79 @@ func (s *Sync) updateStatusCache(stopCh <-chan struct{}) {
}
// updatePolicyStatus updates the status in the policy resource definition
//from the status cache, syncing them
// from the status cache, syncing them
func (s *Sync) updatePolicyStatus() {
s.cache.dataMu.Lock()
var nameToStatus = make(map[string]v1.PolicyStatus, len(s.cache.data))
for k, v := range s.cache.data {
nameToStatus[k] = v
for key, status := range s.getCachedStatus() {
s.log.V(2).Info("updating policy status", "policy", key)
namespace, policyName := s.parseStatusKey(key)
if namespace == "" {
s.updateClusterPolicy(policyName, key, status)
} else {
s.updateNamespacedPolicyStatus(policyName, namespace, key, status)
}
s.cache.dataMu.Unlock()
}
}
for policyName, status := range nameToStatus {
// Identify Policy and ClusterPolicy based on namespace in key
// key = <namespace>/<name> for namespacepolicy and key = <name> for clusterpolicy
// and update the respective policies
func (s *Sync) parseStatusKey(key string) (string, string) {
namespace := ""
isNamespacedPolicy := false
key := policyName
index := strings.Index(policyName, "/")
policyName := key
index := strings.Index(key, "/")
if index != -1 {
namespace = policyName[:index]
isNamespacedPolicy = true
policyName = policyName[index+1:]
namespace = key[:index]
policyName = key[index+1:]
}
if !isNamespacedPolicy {
return namespace, policyName
}
func (s *Sync) updateClusterPolicy(policyName, key string, status v1.PolicyStatus) {
defer s.deleteCachedStatus(key)
policy, err := s.lister.Get(policyName)
if err != nil {
continue
s.log.Error(err, "failed to update policy status", "policy", policyName)
return
}
policy.Status = status
_, err = s.client.KyvernoV1().ClusterPolicies().UpdateStatus(context.TODO(), policy, metav1.UpdateOptions{})
if err != nil {
s.cache.dataMu.Lock()
delete(s.cache.data, policyName)
s.cache.dataMu.Unlock()
log.Log.Error(err, "failed to update policy status")
s.log.Error(err, "failed to update policy status", "policy", policyName)
}
} else {
}
func (s *Sync) updateNamespacedPolicyStatus(policyName, namespace, key string, status v1.PolicyStatus) {
defer s.deleteCachedStatus(key)
policy, err := s.nsLister.Policies(namespace).Get(policyName)
if err != nil {
s.cache.dataMu.Lock()
delete(s.cache.data, key)
s.cache.dataMu.Unlock()
continue
s.log.Error(err, "failed to update policy status", "policy", policyName)
return
}
policy.Status = status
_, err = s.client.KyvernoV1().Policies(namespace).UpdateStatus(context.TODO(), policy, metav1.UpdateOptions{})
if err != nil {
s.cache.dataMu.Lock()
delete(s.cache.data, key)
s.cache.dataMu.Unlock()
log.Log.Error(err, "failed to update namespace policy status")
}
}
s.log.Error(err, "failed to update namespaced policy status", "policy", policyName)
}
}
func (s *Sync) deleteCachedStatus(policyName string) {
s.cache.dataMu.Lock()
defer s.cache.dataMu.Unlock()
delete(s.cache.data, policyName)
}
func (s *Sync) getCachedStatus() map[string]v1.PolicyStatus {
s.cache.dataMu.Lock()
defer s.cache.dataMu.Unlock()
var nameToStatus = make(map[string]v1.PolicyStatus, len(s.cache.data))
for k, v := range s.cache.data {
nameToStatus[k] = v
}
return nameToStatus
}

View file

@ -73,7 +73,7 @@ func TestKeyToMutex(t *testing.T) {
}
for i := 0; i < 100; i++ {
go s.Listener.Send(dummyStatusUpdater{})
go s.Listener.Update(dummyStatusUpdater{})
}
<-time.After(time.Second * 3)

View file

@ -12,7 +12,7 @@ import (
//maxRetryCount defines the max deadline count
const (
tickerInterval time.Duration = 10 * time.Second
tickerInterval time.Duration = 30 * time.Second
idleCheckInterval time.Duration = 60 * time.Second
idleDeadline time.Duration = idleCheckInterval * 2
)
@ -70,9 +70,9 @@ func (t *Monitor) Run(register *Register, eventGen event.Interface, client *dcli
case <-ticker.C:
if err := register.Check(); err != nil {
t.log.Error(err,"missing webhooks")
t.log.Error(err, "missing webhooks")
if err := register.Register(); err != nil {
logger.Error(err,"failed to register webhooks")
logger.Error(err, "failed to register webhooks")
}
continue
@ -90,7 +90,7 @@ func (t *Monitor) Run(register *Register, eventGen event.Interface, client *dcli
register.Remove(cleanUp)
<-cleanUp
if err:= register.Register(); err != nil {
if err := register.Register(); err != nil {
logger.Error(err, "Failed to register webhooks")
}

View file

@ -60,7 +60,6 @@ func (vc statusControl) setStatus(status string) error {
deployStatus, ok := ann[annWebhookStatus]
if ok {
// annotatiaion is present
if deployStatus == status {
logger.V(4).Info(fmt.Sprintf("annotation %s already set to '%s'", annWebhookStatus, status))
return nil

View file

@ -52,7 +52,7 @@ func NewGenerator(client *kyvernoclient.Clientset, stopCh <-chan struct{}, log l
func (g *Generator) Apply(gr kyverno.GenerateRequestSpec, action v1beta1.Operation) error {
logger := g.log
logger.V(4).Info("creating Generate Request", "request", gr)
// Send to channel
// Update to channel
message := GeneratorChannel{
action: action,
spec: gr,

View file

@ -77,7 +77,7 @@ func (ws *WebhookServer) HandleGenerate(request *v1beta1.AdmissionRequest, polic
engineResponse.PolicyResponse.Rules = rules
// some generate rules do apply to the resource
engineResponses = append(engineResponses, engineResponse)
ws.statusListener.Send(generateStats{
ws.statusListener.Update(generateStats{
resp: engineResponse,
})
}

View file

@ -56,8 +56,12 @@ func (ws *WebhookServer) HandleMutation(
policyContext.Policy = *policy
engineResponse := engine.Mutate(policyContext)
policyPatches := engineResponse.GetPatches()
if engineResponse.PolicyResponse.RulesAppliedCount > 0 && len(policyPatches) > 0 {
ws.statusListener.Update(mutateStats{resp: engineResponse, namespace: policy.Namespace})
}
ws.statusListener.Send(mutateStats{resp: engineResponse, namespace: policy.Namespace})
if !engineResponse.IsSuccessful() && len(engineResponse.GetFailedRules()) > 0 {
logger.Info("failed to apply policy", "policy", policy.Name, "failed rules", engineResponse.GetFailedRules())
continue
@ -69,8 +73,6 @@ func (ws *WebhookServer) HandleMutation(
continue
}
// gather patches
policyPatches := engineResponse.GetPatches()
if len(policyPatches) > 0 {
patches = append(patches, policyPatches...)
rules := engineResponse.GetSuccessRules()

View file

@ -82,6 +82,7 @@ func HandleValidation(
var engineResponses []response.EngineResponse
for _, policy := range policies {
logger.V(3).Info("evaluating policy", "policy", policy.Name)
policyContext.Policy = *policy
engineResponse := engine.Validate(policyContext)
@ -90,11 +91,13 @@ func HandleValidation(
// allow updates if resource update doesnt change the policy evaluation
continue
}
engineResponses = append(engineResponses, engineResponse)
statusListener.Send(validateStats{
statusListener.Update(validateStats{
resp: engineResponse,
namespace: policy.Namespace,
})
if !engineResponse.IsSuccessful() {
logger.V(4).Info("failed to apply policy", "policy", policy.Name, "failed rules", engineResponse.GetFailedRules())
continue
@ -102,6 +105,7 @@ func HandleValidation(
logger.Info("validation rules from policy applied successfully", "policy", policy.Name)
}
// If Validation fails then reject the request
// no violations will be created on "enforce"
blocked := toBlockResource(engineResponses, logger)