mirror of
https://github.com/kyverno/kyverno.git
synced 2024-12-14 11:57:48 +00:00
Integrate LitmusChaos - Pod Memory Hog experiment (#2014)
* updating readme Signed-off-by: Mahfuza Humayra Mohona <mhmohona@gmail.com> Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ> * Updating GetWithRetry function Signed-off-by: Mahfuza Humayra Mohona <mhmohona@gmail.com> Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ> * Updating GetWithRetry function Signed-off-by: Mahfuza Humayra Mohona <mhmohona@gmail.com> Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ> * Updating GetWithRetry function Signed-off-by: Mahfuza Humayra Mohona <mhmohona@gmail.com> Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ> * removing update Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ> * Update utils.go Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ>
This commit is contained in:
parent
9efd58f667
commit
9e769d1fd0
4 changed files with 471 additions and 0 deletions
29
litmuschaos/README.md
Normal file
29
litmuschaos/README.md
Normal file
|
@ -0,0 +1,29 @@
|
|||
# Integration of Kyverno with Litmus
|
||||
|
||||
Kyverno is a policy engine designed for Kubernetes. It can validate, mutate, and generate configurations using admission controls and background scans. Litmus provides a large number of experiments for testing containers, pods, nodes, as well as specific platforms and tools. The advantage of chaos engineering is that one can quickly figure out issues that other testing layers cannot easily capture. This can save a lot of time in the future, and will help to find the loopholes in the system and fix them.
|
||||
|
||||
|
||||
## Steps to Execute LitmusChaos Experiment
|
||||
|
||||
### Prerequisites
|
||||
* Ensure that Kubernetes Version > 1.15
|
||||
* Ensure that the Kyverno is running by executing `kubectl get pods` in operator namespace (typically, `kyverno`). If not, install from [here](https://kyverno.io/docs/installation/).
|
||||
* Update Kyverno Deployment to use `ghcr.io/kyverno/kyverno:test-litmuschaos` image. Note that this image is built specifically to run Litmuschaos experiments per this request, [CHAOS_KILL_COMMAND](https://docs.litmuschaos.io/docs/pod-cpu-hog/#prepare-chaosengine). The official Kyverno images will adopt this soon.
|
||||
* Ensure that the Litmus Chaos Operator is running by executing `kubectl get pods` in operator namespace (typically, `litmus`). If not, install from [here](https://docs.litmuschaos.io/docs/getstarted/#install-litmus).
|
||||
|
||||
|
||||
### Running experiment
|
||||
Aftr setting up the docker images, for running a LitmusChaos experiment following steps need to be followed -
|
||||
- Firstly, exicute ` eval export E2E="ok" `
|
||||
- Run the Chaos Experiment Test Command - ` go test ./litmuschaos/pod_cpu_hog -v `.
|
||||
|
||||
The test passes if the enforce policy shows it's expected behaviour.
|
||||
|
||||
# Experiments
|
||||
|
||||
| Experiment name | LitmusChaos experiment - Pod CPU Hog |
|
||||
| :-------------: | ------------- |
|
||||
| Test command | ` go test ./litmuschaos/pod_cpu_hog -v ` |
|
||||
| Goal | Seeing how the overall application stack behaves when Kyverno pods experience CPU spikes either due to expected/undesired processes |
|
||||
| Performed tests | <li> Deploy enforce policy. </li><li>Run the chaos test to consume CPU resources on the application container. </li><li> Verify the enforce policy behaviour. </li></li>|
|
||||
| Expected result | Kyverno pod is responding after running Pod CPU Hog Experiment |
|
26
litmuschaos/pod_cpu_hog/config.go
Normal file
26
litmuschaos/pod_cpu_hog/config.go
Normal file
|
@ -0,0 +1,26 @@
|
|||
package e2e
|
||||
|
||||
type testData struct {
|
||||
testResourceName, group, version, resource, namespace string
|
||||
manifest []byte
|
||||
}
|
||||
|
||||
// Pod CPU hog test
|
||||
var PodCPUHogTest = struct {
|
||||
//TestName - Name of the Test
|
||||
TestName string
|
||||
TestData []testData
|
||||
}{
|
||||
|
||||
TestName: "test-litmus-chaos-experiment",
|
||||
TestData: []testData{
|
||||
{
|
||||
testResourceName: "add-new-capabilities",
|
||||
group: "",
|
||||
version: "v1",
|
||||
resource: "Pod",
|
||||
namespace: "test-litmus",
|
||||
manifest: KyvernoTestResourcesYaml,
|
||||
},
|
||||
},
|
||||
}
|
147
litmuschaos/pod_cpu_hog/pod_cpu_hog_test.go
Normal file
147
litmuschaos/pod_cpu_hog/pod_cpu_hog_test.go
Normal file
|
@ -0,0 +1,147 @@
|
|||
package e2e
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kyverno/kyverno/test/e2e"
|
||||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var (
|
||||
// Namespace GVR
|
||||
nsGVR = e2e.GetGVR("", "v1", "namespaces")
|
||||
// Chaos service account GVR
|
||||
saGVR = e2e.GetGVR("", "v1", "serviceaccounts")
|
||||
// Role GVR
|
||||
rGVR = e2e.GetGVR("rbac.authorization.k8s.io", "v1", "roles")
|
||||
// RoleBinding GVR
|
||||
rbGVR = e2e.GetGVR("rbac.authorization.k8s.io", "v1", "rolebindings")
|
||||
// PodCPUHogExperiment GVR
|
||||
cpuGVR = e2e.GetGVR("litmuschaos.io", "v1alpha1", "chaosexperiments")
|
||||
// ChaosEngine GVR
|
||||
ceGVR = e2e.GetGVR("litmuschaos.io", "v1alpha1", "chaosengines")
|
||||
// Chaos Result GVR
|
||||
crGVR = e2e.GetGVR("litmuschaos.io", "v1alpha1", "chaosresults")
|
||||
// Cluster Policy GVR
|
||||
clPolGVR = e2e.GetGVR("kyverno.io", "v1", "clusterpolicies")
|
||||
// Kyverno disallow_cri_sock_mount Policy GVR
|
||||
dcsmPolGVR = e2e.GetGVR("", "v1", "pods")
|
||||
|
||||
// ClusterPolicy Namespace
|
||||
clPolNS = ""
|
||||
// Namespace Name
|
||||
// Hardcoded in YAML Definition
|
||||
nspace = "test-litmus"
|
||||
)
|
||||
|
||||
func Test_Pod_CPU_Hog(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
if os.Getenv("E2E") == "" {
|
||||
t.Skip("Skipping E2E Test")
|
||||
}
|
||||
|
||||
// Generate E2E Client
|
||||
e2eClient, err := e2e.NewE2EClient()
|
||||
Expect(err).To(BeNil())
|
||||
|
||||
for _, resource := range PodCPUHogTest.TestData {
|
||||
|
||||
// CleanUp Resources
|
||||
By(fmt.Sprintf("Cleaning Cluster Policies in %s", nspace))
|
||||
e2eClient.CleanClusterPolicies(clPolGVR) //Clean Cluster Policy
|
||||
By(fmt.Sprintf("Deleting Namespace : %s", nspace))
|
||||
e2eClient.DeleteClusteredResource(nsGVR, nspace) // Clear Namespace
|
||||
e2eClient.DeleteNamespacedResource(dcsmPolGVR, nspace, resource.testResourceName)
|
||||
e2e.GetWithRetry(time.Duration(1), 15, func() error { // Wait Till Deletion of Namespace
|
||||
_, err := e2eClient.GetClusteredResource(nsGVR, nspace)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return errors.New("Deleting Namespace")
|
||||
})
|
||||
|
||||
// Create Namespace
|
||||
By(fmt.Sprintf("Creating Namespace %s", saGVR))
|
||||
_, err = e2eClient.CreateClusteredResourceYaml(nsGVR, LitmusChaosnamespaceYaml)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
e2e.GetWithRetry(time.Duration(1), 15, func() error { // Wait Till Creation of Namespace
|
||||
_, err := e2eClient.GetClusteredResource(nsGVR, resource.namespace)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// ================== Litmus Chaos Experiment ==================
|
||||
// Prepare chaosServiceAccount
|
||||
By(fmt.Sprintf("\nPrepareing Chaos Service Account in %s", nspace))
|
||||
_, err = e2eClient.CreateNamespacedResourceYaml(saGVR, nspace, ChaosServiceAccountYaml)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
_, err = e2eClient.CreateNamespacedResourceYaml(rGVR, nspace, ChaosRoleYaml)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
_, err = e2eClient.CreateNamespacedResourceYaml(rbGVR, nspace, ChaosRoleBindingYaml)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Deploy Pod CPU Hog Experiment
|
||||
By(fmt.Sprintf("\nInstalling Litmus Chaos Experiment in %s", nspace))
|
||||
_, err = e2eClient.CreateNamespacedResourceYaml(cpuGVR, nspace, PodCPUHogExperimentYaml)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Prepare Chaos Engine
|
||||
By(fmt.Sprintf("\nCreating ChaosEngine Resource in %s", nspace))
|
||||
_, err = e2eClient.CreateNamespacedResourceYaml(ceGVR, nspace, ChaosEngineYaml)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By(fmt.Sprintf("\nMonitoring status from ChaosResult in %s", nspace))
|
||||
|
||||
e2e.GetWithRetry(time.Duration(30), 5, func() error { // Wait Till preparing Chaos engine
|
||||
chaosresult, err := e2eClient.GetNamespacedResource(crGVR, nspace, "kind-chaos-pod-cpu-hog")
|
||||
if err != nil {
|
||||
return fmt.Errorf("Unable to fatch ChaosResult: %v", err)
|
||||
}
|
||||
chaosVerdict, _, err := unstructured.NestedString(chaosresult.UnstructuredContent(), "status", "experimentStatus", "verdict")
|
||||
if err != nil {
|
||||
By(fmt.Sprintf("\nUnable to fatch the status.verdict from ChaosResult: %v", err))
|
||||
}
|
||||
|
||||
By(fmt.Sprintf("\nChaos verdict %s", chaosVerdict))
|
||||
|
||||
if chaosVerdict == "Pass" {
|
||||
return nil
|
||||
}
|
||||
return errors.New("Chaos result is not passed")
|
||||
})
|
||||
|
||||
// Create disallow_cri_sock_mount policy
|
||||
By(fmt.Sprintf("\nCreating Enforce Policy in %s", clPolNS))
|
||||
_, err = e2eClient.CreateNamespacedResourceYaml(clPolGVR, clPolNS, DisallowAddingCapabilitiesYaml)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Deploy disallow_cri_sock_mount policy
|
||||
By(fmt.Sprintf("\nDeploying Enforce Policy in %s", nspace))
|
||||
_, err = e2eClient.CreateNamespacedResourceYaml(dcsmPolGVR, nspace, resource.manifest)
|
||||
Expect(err).To(HaveOccurred())
|
||||
|
||||
//CleanUp Resources
|
||||
e2eClient.CleanClusterPolicies(clPolGVR) //Clean Cluster Policy
|
||||
e2eClient.CleanClusterPolicies(saGVR)
|
||||
e2eClient.DeleteClusteredResource(nsGVR, nspace) // Clear Namespace
|
||||
e2e.GetWithRetry(time.Duration(1), 15, func() error { // Wait Till Deletion of Namespace
|
||||
_, err := e2eClient.GetClusteredResource(nsGVR, nspace)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return errors.New("Deleting Namespace")
|
||||
})
|
||||
|
||||
By(fmt.Sprintf("Test %s Completed. \n\n\n", PodCPUHogTest.TestName))
|
||||
}
|
||||
|
||||
}
|
269
litmuschaos/pod_cpu_hog/resources.go
Normal file
269
litmuschaos/pod_cpu_hog/resources.go
Normal file
|
@ -0,0 +1,269 @@
|
|||
package e2e
|
||||
|
||||
// Namespace Description
|
||||
var LitmusChaosnamespaceYaml = []byte(`
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: test-litmus
|
||||
`)
|
||||
|
||||
// Litmus Chaos Service Account
|
||||
var ChaosServiceAccountYaml = []byte(`
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: pod-cpu-hog-sa
|
||||
namespace: test-litmus
|
||||
labels:
|
||||
name: pod-cpu-hog-sa
|
||||
app.kubernetes.io/part-of: litmus
|
||||
`)
|
||||
|
||||
var ChaosRoleYaml = []byte(`
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: pod-cpu-hog-sa
|
||||
namespace: test-litmus
|
||||
labels:
|
||||
name: pod-cpu-hog-sa
|
||||
app.kubernetes.io/part-of: litmus
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["pods","events"]
|
||||
verbs: ["create","list","get","patch","update","delete","deletecollection"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods/exec","pods/log","replicationcontrollers"]
|
||||
verbs: ["create","list","get"]
|
||||
- apiGroups: ["batch"]
|
||||
resources: ["jobs"]
|
||||
verbs: ["create","list","get","delete","deletecollection"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["deployments","statefulsets","daemonsets","replicasets"]
|
||||
verbs: ["list","get"]
|
||||
- apiGroups: ["apps.openshift.io"]
|
||||
resources: ["deploymentconfigs"]
|
||||
verbs: ["list","get"]
|
||||
- apiGroups: ["argoproj.io"]
|
||||
resources: ["rollouts"]
|
||||
verbs: ["list","get"]
|
||||
- apiGroups: ["litmuschaos.io"]
|
||||
resources: ["chaosengines","chaosexperiments","chaosresults"]
|
||||
verbs: ["create","list","get","patch","update"]
|
||||
`)
|
||||
|
||||
var ChaosRoleBindingYaml = []byte(`
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: pod-cpu-hog-sa
|
||||
namespace: test-litmus
|
||||
labels:
|
||||
name: pod-cpu-hog-sa
|
||||
app.kubernetes.io/part-of: litmus
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: pod-cpu-hog-sa
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: pod-cpu-hog-sa
|
||||
namespace: test-litmus
|
||||
`)
|
||||
|
||||
// Pod CPU Hog Experiment
|
||||
var PodCPUHogExperimentYaml = []byte(`
|
||||
apiVersion: litmuschaos.io/v1alpha1
|
||||
description:
|
||||
message: |
|
||||
Injects cpu consumption on pods belonging to an app deployment
|
||||
kind: ChaosExperiment
|
||||
metadata:
|
||||
name: pod-cpu-hog
|
||||
labels:
|
||||
name: pod-cpu-hog
|
||||
app.kubernetes.io/part-of: litmus
|
||||
app.kubernetes.io/component: chaosexperiment
|
||||
app.kubernetes.io/version: 1.13.3
|
||||
spec:
|
||||
definition:
|
||||
scope: Namespaced
|
||||
permissions:
|
||||
- apiGroups:
|
||||
- ""
|
||||
- "batch"
|
||||
- "apps"
|
||||
- "apps.openshift.io"
|
||||
- "argoproj.io"
|
||||
- "litmuschaos.io"
|
||||
resources:
|
||||
- "jobs"
|
||||
- "pods"
|
||||
- "pods/log"
|
||||
- "events"
|
||||
- "replicationcontrollers"
|
||||
- "deployments"
|
||||
- "statefulsets"
|
||||
- "daemonsets"
|
||||
- "replicasets"
|
||||
- "deploymentconfigs"
|
||||
- "rollouts"
|
||||
- "pods/exec"
|
||||
- "chaosengines"
|
||||
- "chaosexperiments"
|
||||
- "chaosresults"
|
||||
verbs:
|
||||
- "create"
|
||||
- "list"
|
||||
- "get"
|
||||
- "patch"
|
||||
- "update"
|
||||
- "delete"
|
||||
- "deletecollection"
|
||||
image: "litmuschaos/go-runner:1.13.3"
|
||||
imagePullPolicy: Always
|
||||
args:
|
||||
- -c
|
||||
- ./experiments -name pod-cpu-hog
|
||||
command:
|
||||
- /bin/bash
|
||||
env:
|
||||
- name: TOTAL_CHAOS_DURATION
|
||||
value: '60'
|
||||
|
||||
## Number of CPU cores to stress
|
||||
- name: CPU_CORES
|
||||
value: '1'
|
||||
|
||||
## Percentage of total pods to target
|
||||
- name: PODS_AFFECTED_PERC
|
||||
value: ''
|
||||
|
||||
## Period to wait before and after injection of chaos in sec
|
||||
- name: RAMP_TIME
|
||||
value: ''
|
||||
|
||||
## env var that describes the library used to execute the chaos
|
||||
## default: litmus. Supported values: litmus, pumba
|
||||
- name: LIB
|
||||
value: 'litmus'
|
||||
|
||||
## It is used in pumba lib only
|
||||
- name: LIB_IMAGE
|
||||
value: 'litmuschaos/go-runner:1.13.3'
|
||||
|
||||
## It is used in pumba lib only
|
||||
- name: STRESS_IMAGE
|
||||
value: 'alexeiled/stress-ng:latest-ubuntu'
|
||||
|
||||
# provide the socket file path
|
||||
# it is used in pumba lib
|
||||
- name: SOCKET_PATH
|
||||
value: '/var/run/docker.sock'
|
||||
|
||||
- name: TARGET_PODS
|
||||
value: ''
|
||||
|
||||
## it defines the sequence of chaos execution for multiple target pods
|
||||
## supported values: serial, parallel
|
||||
- name: SEQUENCE
|
||||
value: 'parallel'
|
||||
|
||||
labels:
|
||||
name: pod-cpu-hog
|
||||
app.kubernetes.io/part-of: litmus
|
||||
app.kubernetes.io/component: experiment-job
|
||||
app.kubernetes.io/version: 1.13.3
|
||||
|
||||
`)
|
||||
|
||||
// ChaosEngine Manifest
|
||||
var ChaosEngineYaml = []byte(`
|
||||
apiVersion: litmuschaos.io/v1alpha1
|
||||
kind: ChaosEngine
|
||||
metadata:
|
||||
name: kind-chaos
|
||||
namespace: test-litmus
|
||||
spec:
|
||||
# It can be active/stop
|
||||
engineState: 'active'
|
||||
appinfo:
|
||||
appns: 'kyverno'
|
||||
applabel: 'app.kubernetes.io/name=kyverno'
|
||||
appkind: 'deployment'
|
||||
chaosServiceAccount: pod-cpu-hog-sa
|
||||
# It can be delete/retain
|
||||
jobCleanUpPolicy: 'delete'
|
||||
experiments:
|
||||
- name: pod-cpu-hog
|
||||
spec:
|
||||
components:
|
||||
env:
|
||||
#number of cpu cores to be consumed
|
||||
#verify the resources the app has been launched with
|
||||
- name: CPU_CORES
|
||||
value: '1'
|
||||
|
||||
- name: TOTAL_CHAOS_DURATION
|
||||
value: '60' # in seconds
|
||||
`)
|
||||
|
||||
// install disallow_cri_sock_mount
|
||||
var DisallowAddingCapabilitiesYaml = []byte(`
|
||||
apiVersion: kyverno.io/v1
|
||||
kind: ClusterPolicy
|
||||
metadata:
|
||||
name: disallow-add-capabilities
|
||||
annotations:
|
||||
policies.kyverno.io/category: Pod Security Standards (Baseline)
|
||||
policies.kyverno.io/severity: medium
|
||||
policies.kyverno.io/subject: Pod
|
||||
policies.kyverno.io/description: >-
|
||||
Capabilities permit privileged actions without giving full root access.
|
||||
Adding capabilities beyond the default set must not be allowed.
|
||||
spec:
|
||||
validationFailureAction: enforce
|
||||
background: true
|
||||
rules:
|
||||
- name: capabilities
|
||||
match:
|
||||
resources:
|
||||
kinds:
|
||||
- Pod
|
||||
validate:
|
||||
message: >-
|
||||
Adding of additional capabilities beyond the default set is not allowed.
|
||||
The fields spec.containers[*].securityContext.capabilities.add and
|
||||
spec.initContainers[*].securityContext.capabilities.add must be empty.
|
||||
pattern:
|
||||
spec:
|
||||
containers:
|
||||
- =(securityContext):
|
||||
=(capabilities):
|
||||
X(add): null
|
||||
=(initContainers):
|
||||
- =(securityContext):
|
||||
=(capabilities):
|
||||
X(add): null
|
||||
|
||||
`)
|
||||
|
||||
// disallow_cri_sock_mount Resource
|
||||
var KyvernoTestResourcesYaml = []byte(`
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: add-new-capabilities
|
||||
spec:
|
||||
containers:
|
||||
- name: add-new-capabilities
|
||||
image: "ubuntu:18.04"
|
||||
command:
|
||||
- /bin/sleep
|
||||
- "300"
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- NET_ADMIN
|
||||
`)
|
Loading…
Reference in a new issue