1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

Adding chaos-monkey for deployments

This commit is contained in:
Ewout Prangsma 2018-03-30 15:40:11 +02:00
parent fd86e4e5b8
commit 056aabb515
No known key found for this signature in database
GPG key ID: 4DBAD380D93D0698
16 changed files with 391 additions and 5 deletions

View file

@ -65,6 +65,10 @@ ifndef ENTERPRISEIMAGE
ENTERPRISEIMAGE := $(DEFAULTENTERPRISEIMAGE)
endif
ifndef ALLOWCHAOS
ALLOWCHAOS := true
endif
BINNAME := $(PROJECT)
BIN := $(BINDIR)/$(BINNAME)
TESTBINNAME := $(PROJECT)_test
@ -200,7 +204,8 @@ manifests: $(GOBUILDDIR)
--output-suffix=$(MANIFESTSUFFIX) \
--image=$(OPERATORIMAGE) \
--image-sha256=$(IMAGESHA256) \
--namespace=$(DEPLOYMENTNAMESPACE)
--namespace=$(DEPLOYMENTNAMESPACE) \
--allow-chaos=$(ALLOWCHAOS)
# Testing

View file

@ -10,3 +10,5 @@ spec:
coordinators:
args:
- --log.level=true
chaos:
enabled: true

View file

@ -78,7 +78,9 @@ var (
operatorOptions struct {
enableDeployment bool // Run deployment operator
enableStorage bool // Run deployment operator
createCRD bool
}
chaosOptions struct {
allowed bool
}
)
@ -89,7 +91,7 @@ func init() {
f.StringVar(&logLevel, "log.level", defaultLogLevel, "Set initial log level")
f.BoolVar(&operatorOptions.enableDeployment, "operator.deployment", false, "Enable to run the ArangoDeployment operator")
f.BoolVar(&operatorOptions.enableStorage, "operator.storage", false, "Enable to run the ArangoLocalStorage operator")
f.BoolVar(&operatorOptions.createCRD, "operator.create-crd", true, "Disable to avoid create the custom resource definition")
f.BoolVar(&chaosOptions.allowed, "chaos.allowed", false, "Set to allow chaos in deployments. Only activated when allowed and enabled in deployment")
}
func main() {
@ -183,7 +185,7 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper
ServiceAccount: serviceAccount,
EnableDeployment: operatorOptions.enableDeployment,
EnableStorage: operatorOptions.enableStorage,
CreateCRD: operatorOptions.createCRD,
AllowChaos: chaosOptions.allowed,
}
deps := operator.Dependencies{
LogService: logService,

View file

@ -17,6 +17,7 @@ spec:
image: {{ .Image }}
args:
- --operator.deployment
- --chaos.allowed={{ .Deployment.AllowChaos }}
env:
- name: MY_POD_NAMESPACE
valueFrom:

View file

@ -0,0 +1,91 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//
package v1alpha
import (
time "time"
"github.com/arangodb/kube-arangodb/pkg/util"
"github.com/pkg/errors"
)
// ChaosSpec holds configuration for the deployment chaos monkey.
type ChaosSpec struct {
// Enabled switches the chaos monkey for a deployment on or off.
Enabled *bool `json:"enabled,omitempty"`
// Interval is the time between events
Interval *time.Duration `json:"interval,omitempty"`
// KillPodProbability is the chance of a pod being killed during an event
KillPodProbability *Percent `json:"kill-pod-probability,omitempty"`
}
// IsEnabled returns the value of enabled.
func (s ChaosSpec) IsEnabled() bool {
return util.BoolOrDefault(s.Enabled)
}
// GetInterval returns the value of interval.
func (s ChaosSpec) GetInterval() time.Duration {
return util.DurationOrDefault(s.Interval)
}
// GetKillPodProbability returns the value of kill-pod-probability.
func (s ChaosSpec) GetKillPodProbability() Percent {
return PercentOrDefault(s.KillPodProbability)
}
// Validate the given spec
func (s ChaosSpec) Validate() error {
if s.IsEnabled() {
if s.GetInterval() <= 0 {
return maskAny(errors.Wrapf(ValidationError, "Interval must be > 0"))
}
if err := s.GetKillPodProbability().Validate(); err != nil {
return maskAny(err)
}
}
return nil
}
// SetDefaults fills in missing defaults
func (s *ChaosSpec) SetDefaults() {
if s.GetInterval() == 0 {
s.Interval = util.NewDuration(time.Minute)
}
if s.GetKillPodProbability() == 0 {
s.KillPodProbability = NewPercent(50)
}
}
// SetDefaultsFrom fills unspecified fields with a value from given source spec.
func (s *ChaosSpec) SetDefaultsFrom(source ChaosSpec) {
if s.Enabled == nil {
s.Enabled = util.NewBoolOrNil(source.Enabled)
}
if s.Interval == nil {
s.Interval = util.NewDurationOrNil(source.Interval)
}
if s.KillPodProbability == nil {
s.KillPodProbability = NewPercentOrNil(source.KillPodProbability)
}
}

View file

@ -62,6 +62,8 @@ type DeploymentSpec struct {
Coordinators ServerGroupSpec `json:"coordinators"`
SyncMasters ServerGroupSpec `json:"syncmasters"`
SyncWorkers ServerGroupSpec `json:"syncworkers"`
Chaos ChaosSpec `json:"chaos"`
}
// GetMode returns the value of mode.
@ -147,6 +149,7 @@ func (s *DeploymentSpec) SetDefaults(deploymentName string) {
s.Coordinators.SetDefaults(ServerGroupCoordinators, s.GetMode().HasCoordinators(), s.GetMode())
s.SyncMasters.SetDefaults(ServerGroupSyncMasters, s.Sync.IsEnabled(), s.GetMode())
s.SyncWorkers.SetDefaults(ServerGroupSyncWorkers, s.Sync.IsEnabled(), s.GetMode())
s.Chaos.SetDefaults()
}
// SetDefaultsFrom fills unspecified fields with a value from given source spec.
@ -176,6 +179,7 @@ func (s *DeploymentSpec) SetDefaultsFrom(source DeploymentSpec) {
s.Coordinators.SetDefaultsFrom(source.Coordinators)
s.SyncMasters.SetDefaultsFrom(source.SyncMasters)
s.SyncWorkers.SetDefaultsFrom(source.SyncWorkers)
s.Chaos.SetDefaultsFrom(source.Chaos)
}
// Validate the specification.
@ -226,6 +230,9 @@ func (s *DeploymentSpec) Validate() error {
if err := s.SyncWorkers.Validate(ServerGroupSyncWorkers, s.Sync.IsEnabled(), s.GetMode(), s.GetEnvironment()); err != nil {
return maskAny(err)
}
if err := s.Chaos.Validate(); err != nil {
return maskAny(errors.Wrap(err, "spec.chaos"))
}
return nil
}

View file

@ -0,0 +1,62 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//
package v1alpha
import (
"github.com/pkg/errors"
)
// Percent is a percentage between 0 and 100.
type Percent int
// Validate the given percentage.
func (p Percent) Validate() error {
if p < 0 || p > 100 {
return maskAny(errors.Wrapf(ValidationError, "Percentage must be between 0 and 100, got %d", int(p)))
}
return nil
}
// NewPercent returns a reference to a percent with given value.
func NewPercent(input Percent) *Percent {
return &input
}
// NewPercentOrNil returns nil if input is nil, otherwise returns a clone of the given value.
func NewPercentOrNil(input *Percent) *Percent {
if input == nil {
return nil
}
return NewPercent(*input)
}
// PercentOrDefault returns the default value or 0 if input is nil, otherwise returns the referenced value.
func PercentOrDefault(input *Percent, defaultValue ...Percent) Percent {
if input == nil {
if len(defaultValue) > 0 {
return defaultValue[0]
}
return 0
}
return *input
}

View file

@ -144,6 +144,49 @@ func (in *AuthenticationSpec) DeepCopy() *AuthenticationSpec {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ChaosSpec) DeepCopyInto(out *ChaosSpec) {
*out = *in
if in.Enabled != nil {
in, out := &in.Enabled, &out.Enabled
if *in == nil {
*out = nil
} else {
*out = new(bool)
**out = **in
}
}
if in.Interval != nil {
in, out := &in.Interval, &out.Interval
if *in == nil {
*out = nil
} else {
*out = new(time.Duration)
**out = **in
}
}
if in.KillPodProbability != nil {
in, out := &in.KillPodProbability, &out.KillPodProbability
if *in == nil {
*out = nil
} else {
*out = new(Percent)
**out = **in
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ChaosSpec.
func (in *ChaosSpec) DeepCopy() *ChaosSpec {
if in == nil {
return nil
}
out := new(ChaosSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Condition) DeepCopyInto(out *Condition) {
*out = *in
@ -220,6 +263,7 @@ func (in *DeploymentSpec) DeepCopyInto(out *DeploymentSpec) {
in.Coordinators.DeepCopyInto(&out.Coordinators)
in.SyncMasters.DeepCopyInto(&out.SyncMasters)
in.SyncWorkers.DeepCopyInto(&out.SyncWorkers)
in.Chaos.DeepCopyInto(&out.Chaos)
return
}

View file

@ -0,0 +1,40 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//
package chaos
import (
"k8s.io/api/core/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
)
// Context provides methods to the chaos package.
type Context interface {
// GetSpec returns the current specification of the deployment
GetSpec() api.DeploymentSpec
// DeletePod deletes a pod with given name in the namespace
// of the deployment. If the pod does not exist, the error is ignored.
DeletePod(podName string) error
// GetOwnedPods returns a list of all pods owned by the deployment.
GetOwnedPods() ([]v1.Pod, error)
}

View file

@ -0,0 +1,29 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//
package chaos
import "github.com/pkg/errors"
var (
maskAny = errors.WithStack
)

View file

@ -0,0 +1,90 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//
package chaos
import (
"math/rand"
"time"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
// Monkey is the service that introduces chaos in the deployment
// if allowed and enabled.
type Monkey struct {
log zerolog.Logger
context Context
}
// NewMonkey creates a new chaos monkey with given context.
func NewMonkey(log zerolog.Logger, context Context) *Monkey {
log = log.With().Str("component", "chaos-monkey").Logger()
return &Monkey{
log: log,
context: context,
}
}
// Run the monkey until the given channel is closed.
func (m Monkey) Run(stopCh <-chan struct{}) {
for {
spec := m.context.GetSpec()
if spec.Chaos.IsEnabled() {
// Gamble to set if we must introduce chaos
chance := float64(spec.Chaos.GetKillPodProbability()) / 100.0
if rand.Float64() < chance {
// Let's introduce pod chaos
if err := m.killRandomPod(); err != nil {
log.Info().Err(err).Msg("Failed to kill random pod")
}
}
}
select {
case <-time.After(spec.Chaos.GetInterval()):
// Continue
case <-stopCh:
// We're done
return
}
}
}
// killRandomPod fetches all owned pods and tries to kill one.
func (m Monkey) killRandomPod() error {
pods, err := m.context.GetOwnedPods()
if err != nil {
return maskAny(err)
}
if len(pods) <= 1 {
// Not enough pods
return nil
}
p := pods[rand.Intn(len(pods))]
m.log.Info().Str("pod-name", p.GetName()).Msg("Killing pod")
if err := m.context.DeletePod(p.GetName()); err != nil {
return maskAny(err)
}
return nil
}

View file

@ -35,6 +35,7 @@ import (
corev1 "k8s.io/client-go/kubernetes/typed/core/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
"github.com/arangodb/kube-arangodb/pkg/deployment/chaos"
"github.com/arangodb/kube-arangodb/pkg/deployment/reconcile"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
"github.com/arangodb/kube-arangodb/pkg/generated/clientset/versioned"
@ -46,6 +47,7 @@ import (
// Config holds configuration settings for a Deployment
type Config struct {
ServiceAccount string
AllowChaos bool
}
// Dependencies holds dependent services for a Deployment
@ -93,6 +95,7 @@ type Deployment struct {
clusterScalingIntegration *clusterScalingIntegration
reconciler *reconcile.Reconciler
resources *resources.Resources
chaosMonkey *chaos.Monkey
}
// New creates a new Deployment from the given API object.
@ -127,6 +130,10 @@ func New(config Config, deps Dependencies, apiObject *api.ArangoDeployment) (*De
d.clusterScalingIntegration = ci
go ci.ListenForClusterEvents(d.stopCh)
}
if config.AllowChaos {
d.chaosMonkey = chaos.NewMonkey(deps.Log, d)
d.chaosMonkey.Run(d.stopCh)
}
return d, nil
}

View file

@ -67,7 +67,7 @@ type Config struct {
ServiceAccount string
EnableDeployment bool
EnableStorage bool
CreateCRD bool
AllowChaos bool
}
type Dependencies struct {

View file

@ -193,6 +193,7 @@ func (o *Operator) handleDeploymentEvent(event *Event) error {
func (o *Operator) makeDeploymentConfigAndDeps(apiObject *api.ArangoDeployment) (deployment.Config, deployment.Dependencies) {
cfg := deployment.Config{
ServiceAccount: o.Config.ServiceAccount,
AllowChaos: o.Config.AllowChaos,
}
deps := deployment.Dependencies{
Log: o.Dependencies.LogService.MustGetLogger("deployment").With().

View file

@ -48,6 +48,7 @@ var (
DeploymentOperatorName string
StorageOperatorName string
RBAC bool
AllowChaos bool
}
deploymentTemplateNames = []string{
"rbac.yaml",
@ -72,6 +73,7 @@ func init() {
pflag.StringVar(&options.DeploymentOperatorName, "deployment-operator-name", "arango-deployment-operator", "Name of the ArangoDeployment operator deployment")
pflag.StringVar(&options.StorageOperatorName, "storage-operator-name", "arango-storage-operator", "Name of the ArangoLocalStorage operator deployment")
pflag.BoolVar(&options.RBAC, "rbac", true, "Use role based access control")
pflag.BoolVar(&options.AllowChaos, "allow-chaos", false, "If set, allows chaos in deployments")
pflag.Parse()
}
@ -96,6 +98,7 @@ type ResourceOptions struct {
User CommonOptions
Operator CommonOptions
OperatorDeploymentName string
AllowChaos bool
}
func main() {
@ -149,6 +152,7 @@ func main() {
ServiceAccountName: "default",
},
OperatorDeploymentName: "arango-deployment-operator",
AllowChaos: options.AllowChaos,
},
Storage: ResourceOptions{
User: CommonOptions{

View file

@ -58,6 +58,7 @@ func main() {
version := bumpVersion(releaseType)
make("clean", nil)
make("all", map[string]string{
"ALLOWCHAOS": "false",
"DOCKERNAMESPACE": "arangodb",
"IMAGETAG": version,
"MANIFESTSUFFIX": "-",