1
0
Fork 0
mirror of https://github.com/kyverno/kyverno.git synced 2024-12-14 11:57:48 +00:00

Integrate LitmusChaos - Pod Memory Hog experiment (#2014)

* updating readme

Signed-off-by: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ>

* Updating GetWithRetry function

Signed-off-by: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ>

* Updating GetWithRetry function

Signed-off-by: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ>

* Updating GetWithRetry function

Signed-off-by: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ>

* removing update

Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ>

* Update utils.go

Signed-off-by: Mahfuza Mohona <mahfuza.mohona@LEADSOFT.BIZ>
This commit is contained in:
Mahfuza Humayra Mohona 2021-06-24 03:16:49 +06:00 committed by GitHub
parent 9efd58f667
commit 9e769d1fd0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 471 additions and 0 deletions

29
litmuschaos/README.md Normal file
View file

@ -0,0 +1,29 @@
# Integration of Kyverno with Litmus
Kyverno is a policy engine designed for Kubernetes. It can validate, mutate, and generate configurations using admission controls and background scans. Litmus provides a large number of experiments for testing containers, pods, nodes, as well as specific platforms and tools. The advantage of chaos engineering is that one can quickly figure out issues that other testing layers cannot easily capture. This can save a lot of time in the future, and will help to find the loopholes in the system and fix them.
## Steps to Execute LitmusChaos Experiment
### Prerequisites
* Ensure that Kubernetes Version > 1.15
* Ensure that the Kyverno is running by executing `kubectl get pods` in operator namespace (typically, `kyverno`). If not, install from [here](https://kyverno.io/docs/installation/).
* Update Kyverno Deployment to use `ghcr.io/kyverno/kyverno:test-litmuschaos` image. Note that this image is built specifically to run Litmuschaos experiments per this request, [CHAOS_KILL_COMMAND](https://docs.litmuschaos.io/docs/pod-cpu-hog/#prepare-chaosengine). The official Kyverno images will adopt this soon.
* Ensure that the Litmus Chaos Operator is running by executing `kubectl get pods` in operator namespace (typically, `litmus`). If not, install from [here](https://docs.litmuschaos.io/docs/getstarted/#install-litmus).
### Running experiment
Aftr setting up the docker images, for running a LitmusChaos experiment following steps need to be followed -
- Firstly, exicute ` eval export E2E="ok" `
- Run the Chaos Experiment Test Command - ` go test ./litmuschaos/pod_cpu_hog -v `.
The test passes if the enforce policy shows it's expected behaviour.
# Experiments
| Experiment name | LitmusChaos experiment - Pod CPU Hog |
| :-------------: | ------------- |
| Test command | ` go test ./litmuschaos/pod_cpu_hog -v ` |
| Goal | Seeing how the overall application stack behaves when Kyverno pods experience CPU spikes either due to expected/undesired processes |
| Performed tests | <li> Deploy enforce policy. </li><li>Run the chaos test to consume CPU resources on the application container. </li><li> Verify the enforce policy behaviour. </li></li>|
| Expected result | Kyverno pod is responding after running Pod CPU Hog Experiment |

View file

@ -0,0 +1,26 @@
package e2e
type testData struct {
testResourceName, group, version, resource, namespace string
manifest []byte
}
// Pod CPU hog test
var PodCPUHogTest = struct {
//TestName - Name of the Test
TestName string
TestData []testData
}{
TestName: "test-litmus-chaos-experiment",
TestData: []testData{
{
testResourceName: "add-new-capabilities",
group: "",
version: "v1",
resource: "Pod",
namespace: "test-litmus",
manifest: KyvernoTestResourcesYaml,
},
},
}

View file

@ -0,0 +1,147 @@
package e2e
import (
"errors"
"fmt"
"os"
"testing"
"time"
"github.com/kyverno/kyverno/test/e2e"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var (
// Namespace GVR
nsGVR = e2e.GetGVR("", "v1", "namespaces")
// Chaos service account GVR
saGVR = e2e.GetGVR("", "v1", "serviceaccounts")
// Role GVR
rGVR = e2e.GetGVR("rbac.authorization.k8s.io", "v1", "roles")
// RoleBinding GVR
rbGVR = e2e.GetGVR("rbac.authorization.k8s.io", "v1", "rolebindings")
// PodCPUHogExperiment GVR
cpuGVR = e2e.GetGVR("litmuschaos.io", "v1alpha1", "chaosexperiments")
// ChaosEngine GVR
ceGVR = e2e.GetGVR("litmuschaos.io", "v1alpha1", "chaosengines")
// Chaos Result GVR
crGVR = e2e.GetGVR("litmuschaos.io", "v1alpha1", "chaosresults")
// Cluster Policy GVR
clPolGVR = e2e.GetGVR("kyverno.io", "v1", "clusterpolicies")
// Kyverno disallow_cri_sock_mount Policy GVR
dcsmPolGVR = e2e.GetGVR("", "v1", "pods")
// ClusterPolicy Namespace
clPolNS = ""
// Namespace Name
// Hardcoded in YAML Definition
nspace = "test-litmus"
)
func Test_Pod_CPU_Hog(t *testing.T) {
RegisterTestingT(t)
if os.Getenv("E2E") == "" {
t.Skip("Skipping E2E Test")
}
// Generate E2E Client
e2eClient, err := e2e.NewE2EClient()
Expect(err).To(BeNil())
for _, resource := range PodCPUHogTest.TestData {
// CleanUp Resources
By(fmt.Sprintf("Cleaning Cluster Policies in %s", nspace))
e2eClient.CleanClusterPolicies(clPolGVR) //Clean Cluster Policy
By(fmt.Sprintf("Deleting Namespace : %s", nspace))
e2eClient.DeleteClusteredResource(nsGVR, nspace) // Clear Namespace
e2eClient.DeleteNamespacedResource(dcsmPolGVR, nspace, resource.testResourceName)
e2e.GetWithRetry(time.Duration(1), 15, func() error { // Wait Till Deletion of Namespace
_, err := e2eClient.GetClusteredResource(nsGVR, nspace)
if err != nil {
return nil
}
return errors.New("Deleting Namespace")
})
// Create Namespace
By(fmt.Sprintf("Creating Namespace %s", saGVR))
_, err = e2eClient.CreateClusteredResourceYaml(nsGVR, LitmusChaosnamespaceYaml)
Expect(err).NotTo(HaveOccurred())
e2e.GetWithRetry(time.Duration(1), 15, func() error { // Wait Till Creation of Namespace
_, err := e2eClient.GetClusteredResource(nsGVR, resource.namespace)
if err != nil {
return err
}
return nil
})
// ================== Litmus Chaos Experiment ==================
// Prepare chaosServiceAccount
By(fmt.Sprintf("\nPrepareing Chaos Service Account in %s", nspace))
_, err = e2eClient.CreateNamespacedResourceYaml(saGVR, nspace, ChaosServiceAccountYaml)
Expect(err).NotTo(HaveOccurred())
_, err = e2eClient.CreateNamespacedResourceYaml(rGVR, nspace, ChaosRoleYaml)
Expect(err).NotTo(HaveOccurred())
_, err = e2eClient.CreateNamespacedResourceYaml(rbGVR, nspace, ChaosRoleBindingYaml)
Expect(err).NotTo(HaveOccurred())
// Deploy Pod CPU Hog Experiment
By(fmt.Sprintf("\nInstalling Litmus Chaos Experiment in %s", nspace))
_, err = e2eClient.CreateNamespacedResourceYaml(cpuGVR, nspace, PodCPUHogExperimentYaml)
Expect(err).NotTo(HaveOccurred())
// Prepare Chaos Engine
By(fmt.Sprintf("\nCreating ChaosEngine Resource in %s", nspace))
_, err = e2eClient.CreateNamespacedResourceYaml(ceGVR, nspace, ChaosEngineYaml)
Expect(err).NotTo(HaveOccurred())
By(fmt.Sprintf("\nMonitoring status from ChaosResult in %s", nspace))
e2e.GetWithRetry(time.Duration(30), 5, func() error { // Wait Till preparing Chaos engine
chaosresult, err := e2eClient.GetNamespacedResource(crGVR, nspace, "kind-chaos-pod-cpu-hog")
if err != nil {
return fmt.Errorf("Unable to fatch ChaosResult: %v", err)
}
chaosVerdict, _, err := unstructured.NestedString(chaosresult.UnstructuredContent(), "status", "experimentStatus", "verdict")
if err != nil {
By(fmt.Sprintf("\nUnable to fatch the status.verdict from ChaosResult: %v", err))
}
By(fmt.Sprintf("\nChaos verdict %s", chaosVerdict))
if chaosVerdict == "Pass" {
return nil
}
return errors.New("Chaos result is not passed")
})
// Create disallow_cri_sock_mount policy
By(fmt.Sprintf("\nCreating Enforce Policy in %s", clPolNS))
_, err = e2eClient.CreateNamespacedResourceYaml(clPolGVR, clPolNS, DisallowAddingCapabilitiesYaml)
Expect(err).NotTo(HaveOccurred())
// Deploy disallow_cri_sock_mount policy
By(fmt.Sprintf("\nDeploying Enforce Policy in %s", nspace))
_, err = e2eClient.CreateNamespacedResourceYaml(dcsmPolGVR, nspace, resource.manifest)
Expect(err).To(HaveOccurred())
//CleanUp Resources
e2eClient.CleanClusterPolicies(clPolGVR) //Clean Cluster Policy
e2eClient.CleanClusterPolicies(saGVR)
e2eClient.DeleteClusteredResource(nsGVR, nspace) // Clear Namespace
e2e.GetWithRetry(time.Duration(1), 15, func() error { // Wait Till Deletion of Namespace
_, err := e2eClient.GetClusteredResource(nsGVR, nspace)
if err != nil {
return nil
}
return errors.New("Deleting Namespace")
})
By(fmt.Sprintf("Test %s Completed. \n\n\n", PodCPUHogTest.TestName))
}
}

View file

@ -0,0 +1,269 @@
package e2e
// Namespace Description
var LitmusChaosnamespaceYaml = []byte(`
apiVersion: v1
kind: Namespace
metadata:
name: test-litmus
`)
// Litmus Chaos Service Account
var ChaosServiceAccountYaml = []byte(`
apiVersion: v1
kind: ServiceAccount
metadata:
name: pod-cpu-hog-sa
namespace: test-litmus
labels:
name: pod-cpu-hog-sa
app.kubernetes.io/part-of: litmus
`)
var ChaosRoleYaml = []byte(`
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: pod-cpu-hog-sa
namespace: test-litmus
labels:
name: pod-cpu-hog-sa
app.kubernetes.io/part-of: litmus
rules:
- apiGroups: [""]
resources: ["pods","events"]
verbs: ["create","list","get","patch","update","delete","deletecollection"]
- apiGroups: [""]
resources: ["pods/exec","pods/log","replicationcontrollers"]
verbs: ["create","list","get"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create","list","get","delete","deletecollection"]
- apiGroups: ["apps"]
resources: ["deployments","statefulsets","daemonsets","replicasets"]
verbs: ["list","get"]
- apiGroups: ["apps.openshift.io"]
resources: ["deploymentconfigs"]
verbs: ["list","get"]
- apiGroups: ["argoproj.io"]
resources: ["rollouts"]
verbs: ["list","get"]
- apiGroups: ["litmuschaos.io"]
resources: ["chaosengines","chaosexperiments","chaosresults"]
verbs: ["create","list","get","patch","update"]
`)
var ChaosRoleBindingYaml = []byte(`
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: pod-cpu-hog-sa
namespace: test-litmus
labels:
name: pod-cpu-hog-sa
app.kubernetes.io/part-of: litmus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: pod-cpu-hog-sa
subjects:
- kind: ServiceAccount
name: pod-cpu-hog-sa
namespace: test-litmus
`)
// Pod CPU Hog Experiment
var PodCPUHogExperimentYaml = []byte(`
apiVersion: litmuschaos.io/v1alpha1
description:
message: |
Injects cpu consumption on pods belonging to an app deployment
kind: ChaosExperiment
metadata:
name: pod-cpu-hog
labels:
name: pod-cpu-hog
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: chaosexperiment
app.kubernetes.io/version: 1.13.3
spec:
definition:
scope: Namespaced
permissions:
- apiGroups:
- ""
- "batch"
- "apps"
- "apps.openshift.io"
- "argoproj.io"
- "litmuschaos.io"
resources:
- "jobs"
- "pods"
- "pods/log"
- "events"
- "replicationcontrollers"
- "deployments"
- "statefulsets"
- "daemonsets"
- "replicasets"
- "deploymentconfigs"
- "rollouts"
- "pods/exec"
- "chaosengines"
- "chaosexperiments"
- "chaosresults"
verbs:
- "create"
- "list"
- "get"
- "patch"
- "update"
- "delete"
- "deletecollection"
image: "litmuschaos/go-runner:1.13.3"
imagePullPolicy: Always
args:
- -c
- ./experiments -name pod-cpu-hog
command:
- /bin/bash
env:
- name: TOTAL_CHAOS_DURATION
value: '60'
## Number of CPU cores to stress
- name: CPU_CORES
value: '1'
## Percentage of total pods to target
- name: PODS_AFFECTED_PERC
value: ''
## Period to wait before and after injection of chaos in sec
- name: RAMP_TIME
value: ''
## env var that describes the library used to execute the chaos
## default: litmus. Supported values: litmus, pumba
- name: LIB
value: 'litmus'
## It is used in pumba lib only
- name: LIB_IMAGE
value: 'litmuschaos/go-runner:1.13.3'
## It is used in pumba lib only
- name: STRESS_IMAGE
value: 'alexeiled/stress-ng:latest-ubuntu'
# provide the socket file path
# it is used in pumba lib
- name: SOCKET_PATH
value: '/var/run/docker.sock'
- name: TARGET_PODS
value: ''
## it defines the sequence of chaos execution for multiple target pods
## supported values: serial, parallel
- name: SEQUENCE
value: 'parallel'
labels:
name: pod-cpu-hog
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: experiment-job
app.kubernetes.io/version: 1.13.3
`)
// ChaosEngine Manifest
var ChaosEngineYaml = []byte(`
apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: kind-chaos
namespace: test-litmus
spec:
# It can be active/stop
engineState: 'active'
appinfo:
appns: 'kyverno'
applabel: 'app.kubernetes.io/name=kyverno'
appkind: 'deployment'
chaosServiceAccount: pod-cpu-hog-sa
# It can be delete/retain
jobCleanUpPolicy: 'delete'
experiments:
- name: pod-cpu-hog
spec:
components:
env:
#number of cpu cores to be consumed
#verify the resources the app has been launched with
- name: CPU_CORES
value: '1'
- name: TOTAL_CHAOS_DURATION
value: '60' # in seconds
`)
// install disallow_cri_sock_mount
var DisallowAddingCapabilitiesYaml = []byte(`
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: disallow-add-capabilities
annotations:
policies.kyverno.io/category: Pod Security Standards (Baseline)
policies.kyverno.io/severity: medium
policies.kyverno.io/subject: Pod
policies.kyverno.io/description: >-
Capabilities permit privileged actions without giving full root access.
Adding capabilities beyond the default set must not be allowed.
spec:
validationFailureAction: enforce
background: true
rules:
- name: capabilities
match:
resources:
kinds:
- Pod
validate:
message: >-
Adding of additional capabilities beyond the default set is not allowed.
The fields spec.containers[*].securityContext.capabilities.add and
spec.initContainers[*].securityContext.capabilities.add must be empty.
pattern:
spec:
containers:
- =(securityContext):
=(capabilities):
X(add): null
=(initContainers):
- =(securityContext):
=(capabilities):
X(add): null
`)
// disallow_cri_sock_mount Resource
var KyvernoTestResourcesYaml = []byte(`
apiVersion: v1
kind: Pod
metadata:
name: add-new-capabilities
spec:
containers:
- name: add-new-capabilities
image: "ubuntu:18.04"
command:
- /bin/sleep
- "300"
securityContext:
capabilities:
add:
- NET_ADMIN
`)