2022-01-04 18:46:53 +00:00
//
// DISCLAIMER
//
2024-01-16 09:48:24 +00:00
// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany
2022-01-04 18:46:53 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
package cmd
import (
"context"
goflag "flag"
"fmt"
"net"
"net/http"
"os"
2022-09-19 14:48:54 +00:00
"reflect"
2022-01-04 18:46:53 +00:00
"strconv"
"strings"
"time"
2022-06-14 07:26:07 +00:00
"github.com/gin-gonic/gin"
2022-07-11 11:49:47 +00:00
"github.com/rs/zerolog"
"github.com/spf13/cobra"
flag "github.com/spf13/pflag"
2022-07-14 16:22:16 +00:00
apps "k8s.io/api/apps/v1"
2022-07-11 11:49:47 +00:00
core "k8s.io/api/core/v1"
2022-09-19 19:55:37 +00:00
apiErrors "k8s.io/apimachinery/pkg/api/errors"
2022-07-11 11:49:47 +00:00
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
typedCore "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/record"
2022-06-14 07:26:07 +00:00
2022-08-15 08:23:25 +00:00
"github.com/arangodb/kube-arangodb/pkg/api"
2024-02-23 13:13:52 +00:00
shared "github.com/arangodb/kube-arangodb/pkg/apis/shared"
2022-06-13 12:44:58 +00:00
"github.com/arangodb/kube-arangodb/pkg/crd"
2023-09-29 09:45:22 +00:00
agencyConfig "github.com/arangodb/kube-arangodb/pkg/deployment/agency/config"
2022-07-11 11:49:47 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
2023-08-15 18:22:15 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/reconcile"
2022-06-13 12:44:58 +00:00
"github.com/arangodb/kube-arangodb/pkg/generated/clientset/versioned/scheme"
"github.com/arangodb/kube-arangodb/pkg/logging"
2022-12-08 08:59:13 +00:00
"github.com/arangodb/kube-arangodb/pkg/metrics/collector"
2022-06-13 12:44:58 +00:00
"github.com/arangodb/kube-arangodb/pkg/operator"
2022-07-11 11:49:47 +00:00
"github.com/arangodb/kube-arangodb/pkg/operator/scope"
2022-06-13 12:44:58 +00:00
"github.com/arangodb/kube-arangodb/pkg/server"
2022-01-04 18:46:53 +00:00
"github.com/arangodb/kube-arangodb/pkg/util"
2022-06-13 12:44:58 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/constants"
2024-02-08 14:25:48 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/errors"
2022-07-11 11:49:47 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/globals"
operatorHTTP "github.com/arangodb/kube-arangodb/pkg/util/http"
2022-06-13 12:44:58 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
"github.com/arangodb/kube-arangodb/pkg/util/kclient"
2022-12-08 08:59:13 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/metrics"
2022-06-13 12:44:58 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/probe"
"github.com/arangodb/kube-arangodb/pkg/util/retry"
2024-04-29 09:56:10 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/shutdown"
2022-07-11 11:49:47 +00:00
"github.com/arangodb/kube-arangodb/pkg/version"
2022-01-04 18:46:53 +00:00
)
const (
2023-09-29 10:27:46 +00:00
defaultServerHost = "0.0.0.0"
defaultServerPort = 8528
defaultAPIHTTPPort = 8628
defaultAPIGRPCPort = 8728
defaultAdminSecretName = "arangodb-operator-dashboard"
defaultAPIJWTSecretName = "arangodb-operator-api-jwt"
defaultAPIJWTKeySecretName = "arangodb-operator-api-jwt-key"
defaultShutdownDelay = 2 * time . Second
defaultShutdownTimeout = 30 * time . Second
2022-01-04 18:46:53 +00:00
)
var (
2022-06-14 07:26:07 +00:00
logger = logging . Global ( ) . RegisterAndGetLogger ( "root" , logging . Info )
eventRecorder = logging . Global ( ) . RegisterAndGetLogger ( "root-event-recorder" , logging . Info )
2022-01-04 18:46:53 +00:00
cmdMain = cobra . Command {
Use : "arangodb_operator" ,
Run : executeMain ,
}
2022-11-28 14:47:38 +00:00
memoryLimit struct {
hardLimit uint64
}
2022-01-04 18:46:53 +00:00
serverOptions struct {
host string
port int
tlsSecretName string
adminSecretName string // Name of basic authentication secret containing the admin username+password of the dashboard
allowAnonymous bool // If set, anonymous access to dashboard is allowed
}
2022-08-15 08:23:25 +00:00
apiOptions struct {
enabled bool
httpPort int
grpcPort int
jwtSecretName string
jwtKeySecretName string
tlsSecretName string
}
2022-01-04 18:46:53 +00:00
operatorOptions struct {
enableDeployment bool // Run deployment operator
enableDeploymentReplication bool // Run deployment-replication operator
enableStorage bool // Run local-storage operator
enableBackup bool // Run backup operator
enableApps bool // Run apps operator
2023-11-17 11:07:30 +00:00
enableML bool // Run ml operator
2024-05-08 10:20:14 +00:00
enableAnalytics bool // Run analytics operator
2022-01-04 18:46:53 +00:00
versionOnly bool // Run only version endpoint, explicitly disabled with other
2022-01-06 20:27:01 +00:00
enableK2KClusterSync bool // Run k2kClusterSync operator
2022-01-04 18:46:53 +00:00
2022-09-19 14:48:54 +00:00
operatorFeatureConfigMap string // ConfigMap name
2022-01-04 18:46:53 +00:00
scalingIntegrationEnabled bool
2023-09-02 00:17:01 +00:00
reconciliationDelay time . Duration
2022-01-04 18:46:53 +00:00
singleMode bool
scope string
}
2022-05-26 08:47:55 +00:00
shutdownOptions struct {
delay time . Duration
timeout time . Duration
}
2022-03-02 23:49:49 +00:00
crdOptions struct {
2023-11-15 09:20:18 +00:00
install bool
validationSchema [ ] string
2022-03-02 23:49:49 +00:00
}
2022-01-04 18:46:53 +00:00
operatorKubernetesOptions struct {
maxBatchSize int64
2022-02-28 18:53:01 +00:00
qps float32
burst int
2022-01-04 18:46:53 +00:00
}
operatorBackup struct {
concurrentUploads int
}
operatorTimeouts struct {
2024-03-25 12:57:16 +00:00
k8s time . Duration
arangoD time . Duration
arangoDCheck time . Duration
reconciliation time . Duration
agency time . Duration
shardRebuild time . Duration
shardRebuildRetry time . Duration
backupArangoD time . Duration
backupUploadArangoD time . Duration
forcePodDeletionGracePeriod time . Duration
2024-04-08 11:03:01 +00:00
podSchedulingGracePeriod time . Duration
2022-01-04 18:46:53 +00:00
}
2024-02-26 13:28:18 +00:00
operatorImageDiscovery struct {
timeout time . Duration
defaultStatusDiscovery bool
}
2023-11-23 09:58:34 +00:00
operatorReconciliationRetry struct {
delay time . Duration
count int
}
2022-01-04 18:46:53 +00:00
chaosOptions struct {
allowed bool
}
2022-12-08 08:59:13 +00:00
metricsOptions struct {
excludedMetricPrefixes [ ] string
}
2022-01-04 18:46:53 +00:00
livenessProbe probe . LivenessProbe
deploymentProbe probe . ReadyProbe
deploymentReplicationProbe probe . ReadyProbe
storageProbe probe . ReadyProbe
backupProbe probe . ReadyProbe
appsProbe probe . ReadyProbe
2023-11-17 11:07:30 +00:00
mlProbe probe . ReadyProbe
2024-05-08 10:20:14 +00:00
analyticsProbe probe . ReadyProbe
2022-01-06 20:27:01 +00:00
k2KClusterSyncProbe probe . ReadyProbe
2022-01-04 18:46:53 +00:00
)
func init ( ) {
2023-09-29 10:27:46 +00:00
var deprecatedStr string
2022-01-04 18:46:53 +00:00
f := cmdMain . Flags ( )
2024-02-14 08:14:32 +00:00
2022-01-04 18:46:53 +00:00
f . StringVar ( & serverOptions . host , "server.host" , defaultServerHost , "Host to listen on" )
f . IntVar ( & serverOptions . port , "server.port" , defaultServerPort , "Port to listen on" )
f . StringVar ( & serverOptions . tlsSecretName , "server.tls-secret-name" , "" , "Name of secret containing tls.crt & tls.key for HTTPS server (if empty, self-signed certificate is used)" )
f . StringVar ( & serverOptions . adminSecretName , "server.admin-secret-name" , defaultAdminSecretName , "Name of secret containing username + password for login to the dashboard" )
f . BoolVar ( & serverOptions . allowAnonymous , "server.allow-anonymous-access" , false , "Allow anonymous access to the dashboard" )
2022-08-15 08:23:25 +00:00
f . BoolVar ( & apiOptions . enabled , "api.enabled" , true , "Enable operator HTTP and gRPC API" )
f . IntVar ( & apiOptions . httpPort , "api.http-port" , defaultAPIHTTPPort , "HTTP API port to listen on" )
f . IntVar ( & apiOptions . grpcPort , "api.grpc-port" , defaultAPIGRPCPort , "gRPC API port to listen on" )
f . StringVar ( & apiOptions . tlsSecretName , "api.tls-secret-name" , "" , "Name of secret containing tls.crt & tls.key for HTTPS API (if empty, self-signed certificate is used)" )
f . StringVar ( & apiOptions . jwtSecretName , "api.jwt-secret-name" , defaultAPIJWTSecretName , "Name of secret which will contain JWT to authenticate API requests." )
f . StringVar ( & apiOptions . jwtKeySecretName , "api.jwt-key-secret-name" , defaultAPIJWTKeySecretName , "Name of secret containing key used to sign JWT. If there is no such secret present, value will be saved here" )
2022-01-04 18:46:53 +00:00
f . BoolVar ( & operatorOptions . enableDeployment , "operator.deployment" , false , "Enable to run the ArangoDeployment operator" )
f . BoolVar ( & operatorOptions . enableDeploymentReplication , "operator.deployment-replication" , false , "Enable to run the ArangoDeploymentReplication operator" )
f . BoolVar ( & operatorOptions . enableStorage , "operator.storage" , false , "Enable to run the ArangoLocalStorage operator" )
f . BoolVar ( & operatorOptions . enableBackup , "operator.backup" , false , "Enable to run the ArangoBackup operator" )
f . BoolVar ( & operatorOptions . enableApps , "operator.apps" , false , "Enable to run the ArangoApps operator" )
2023-11-17 11:07:30 +00:00
f . BoolVar ( & operatorOptions . enableML , "operator.ml" , false , "Enable to run the ArangoML operator" )
2024-05-08 10:20:14 +00:00
f . BoolVar ( & operatorOptions . enableAnalytics , "operator.analytics" , false , "Enable to run the Analytics operator" )
2022-03-23 22:19:36 +00:00
f . BoolVar ( & operatorOptions . enableK2KClusterSync , "operator.k2k-cluster-sync" , false , "Enable to run the ListSimple operator" )
2022-06-14 07:26:07 +00:00
f . MarkDeprecated ( "operator.k2k-cluster-sync" , "Enabled within deployment operator" )
2022-01-04 18:46:53 +00:00
f . BoolVar ( & operatorOptions . versionOnly , "operator.version" , false , "Enable only version endpoint in Operator" )
2023-09-29 10:27:46 +00:00
f . StringVar ( & deprecatedStr , "operator.alpine-image" , "alpine:3.7" , "Docker image used for alpine containers" )
2022-01-04 18:46:53 +00:00
f . MarkDeprecated ( "operator.alpine-image" , "Value is not used anymore" )
2023-09-29 10:27:46 +00:00
f . StringVar ( & deprecatedStr , "operator.metrics-exporter-image" , "arangodb/arangodb-exporter:0.1.6" , "Docker image used for metrics containers by default" )
f . MarkDeprecated ( "operator.metrics-exporter-image" , "Value is not used anymore" )
f . StringVar ( & deprecatedStr , "operator.arango-image" , "arangodb/arangodb:latest" , "Docker image used for arango by default" )
f . MarkDeprecated ( "operator.arango-image" , "Value is not used anymore" )
2022-01-04 18:46:53 +00:00
f . BoolVar ( & chaosOptions . allowed , "chaos.allowed" , false , "Set to allow chaos in deployments. Only activated when allowed and enabled in deployment" )
f . BoolVar ( & operatorOptions . singleMode , "mode.single" , false , "Enable single mode in Operator. WARNING: There should be only one replica of Operator, otherwise Operator can take unexpected actions" )
f . StringVar ( & operatorOptions . scope , "scope" , scope . DefaultScope . String ( ) , "Define scope on which Operator works. Legacy - pre 1.1.0 scope with limited cluster access" )
f . DurationVar ( & operatorTimeouts . k8s , "timeout.k8s" , globals . DefaultKubernetesTimeout , "The request timeout to the kubernetes" )
f . DurationVar ( & operatorTimeouts . arangoD , "timeout.arangod" , globals . DefaultArangoDTimeout , "The request timeout to the ArangoDB" )
2022-02-03 23:03:12 +00:00
f . DurationVar ( & operatorTimeouts . arangoDCheck , "timeout.arangod-check" , globals . DefaultArangoDCheckTimeout , "The version check request timeout to the ArangoDB" )
2022-04-14 10:49:31 +00:00
f . DurationVar ( & operatorTimeouts . agency , "timeout.agency" , globals . DefaultArangoDAgencyTimeout , "The Agency read timeout" )
2022-01-04 18:46:53 +00:00
f . DurationVar ( & operatorTimeouts . reconciliation , "timeout.reconciliation" , globals . DefaultReconciliationTimeout , "The reconciliation timeout to the ArangoDB CR" )
2023-04-25 15:19:13 +00:00
f . DurationVar ( & operatorTimeouts . shardRebuild , "timeout.shard-rebuild" , globals . DefaultOutSyncedShardRebuildTimeout , "Timeout after which particular out-synced shard is considered as failed and rebuild is triggered" )
f . DurationVar ( & operatorTimeouts . shardRebuildRetry , "timeout.shard-rebuild-retry" , globals . DefaultOutSyncedShardRebuildRetryTimeout , "Timeout after which rebuild shards retry flow is triggered" )
2023-07-05 13:58:33 +00:00
f . DurationVar ( & operatorTimeouts . backupArangoD , "timeout.backup-arangod" , globals . BackupDefaultArangoClientTimeout , "The request timeout to the ArangoDB during backup calls" )
f . DurationVar ( & operatorTimeouts . backupUploadArangoD , "timeout.backup-upload" , globals . BackupUploadArangoClientTimeout , "The request timeout to the ArangoDB during uploading files" )
2024-03-25 12:57:16 +00:00
f . DurationVar ( & operatorTimeouts . forcePodDeletionGracePeriod , "timeout.force-delete-pod-grace-period" , globals . DefaultForcePodDeletionGracePeriodTimeout , "Default period when ArangoDB Pod should be forcefully removed after all containers were stopped - set to 0 to disable forceful removals" )
2024-04-08 11:03:01 +00:00
f . DurationVar ( & operatorTimeouts . podSchedulingGracePeriod , "timeout.pod-scheduling-grace-period" , globals . DefaultPodSchedulingGracePeriod , "Default period when ArangoDB Pod should be deleted in case of scheduling info change - set to 0 to disable" )
2022-05-26 08:47:55 +00:00
f . DurationVar ( & shutdownOptions . delay , "shutdown.delay" , defaultShutdownDelay , "The delay before running shutdown handlers" )
f . DurationVar ( & shutdownOptions . timeout , "shutdown.timeout" , defaultShutdownTimeout , "Timeout for shutdown handlers" )
2023-11-23 09:58:34 +00:00
f . DurationVar ( & operatorReconciliationRetry . delay , "operator.reconciliation.retry.delay" , globals . DefaultOperatorUpdateRetryDelay , "Delay between Object Update operations in the Reconciliation loop" )
f . IntVar ( & operatorReconciliationRetry . count , "operator.reconciliation.retry.count" , globals . DefaultOperatorUpdateRetryCount , "Count of retries during Object Update operations in the Reconciliation loop" )
2022-08-25 11:44:28 +00:00
f . BoolVar ( & operatorOptions . scalingIntegrationEnabled , "internal.scaling-integration" , false , "Enable Scaling Integration" )
2023-09-02 00:17:01 +00:00
f . DurationVar ( & operatorOptions . reconciliationDelay , "reconciliation.delay" , 0 , "Delay between reconciliation loops (<= 0 -> Disabled)" )
2022-01-04 18:46:53 +00:00
f . Int64Var ( & operatorKubernetesOptions . maxBatchSize , "kubernetes.max-batch-size" , globals . DefaultKubernetesRequestBatchSize , "Size of batch during objects read" )
2022-02-28 18:53:01 +00:00
f . Float32Var ( & operatorKubernetesOptions . qps , "kubernetes.qps" , kclient . DefaultQPS , "Number of queries per second for k8s API" )
f . IntVar ( & operatorKubernetesOptions . burst , "kubernetes.burst" , kclient . DefaultBurst , "Burst for the k8s API" )
2022-03-02 23:49:49 +00:00
f . BoolVar ( & crdOptions . install , "crd.install" , true , "Install missing CRD if access is possible" )
2023-11-15 09:20:18 +00:00
f . StringArrayVar ( & crdOptions . validationSchema , "crd.validation-schema" , defaultValidationSchemaEnabled , "Overrides default set of CRDs which should have validation schema enabled <crd-name>=<true/false>." )
2022-01-04 18:46:53 +00:00
f . IntVar ( & operatorBackup . concurrentUploads , "backup-concurrent-uploads" , globals . DefaultBackupConcurrentUploads , "Number of concurrent uploads per deployment" )
2022-11-28 14:47:38 +00:00
f . Uint64Var ( & memoryLimit . hardLimit , "memory-limit" , 0 , "Define memory limit for hard shutdown and the dump of goroutines. Used for testing" )
2022-12-08 08:59:13 +00:00
f . StringArrayVar ( & metricsOptions . excludedMetricPrefixes , "metrics.excluded-prefixes" , nil , "List of the excluded metrics prefixes" )
2024-02-26 13:28:18 +00:00
f . BoolVar ( & operatorImageDiscovery . defaultStatusDiscovery , "image.discovery.status" , true , "Discover Operator Image from Pod Status by default. When disabled Pod Spec is used." )
f . DurationVar ( & operatorImageDiscovery . timeout , "image.discovery.timeout" , time . Minute , "Timeout for image discovery process" )
2024-03-25 12:15:07 +00:00
if err := logging . Init ( & cmdMain ) ; err != nil {
panic ( err . Error ( ) )
}
2022-08-09 10:38:55 +00:00
if err := features . Init ( & cmdMain ) ; err != nil {
panic ( err . Error ( ) )
}
2023-09-29 09:45:22 +00:00
if err := agencyConfig . Init ( & cmdMain ) ; err != nil {
2023-06-06 08:27:08 +00:00
panic ( err . Error ( ) )
}
2023-08-15 18:22:15 +00:00
if err := reconcile . ActionsConfigGlobal . Init ( & cmdMain ) ; err != nil {
panic ( err . Error ( ) )
}
2022-01-04 18:46:53 +00:00
}
2024-05-21 09:55:06 +00:00
func Command ( ) * cobra . Command {
return & cmdMain
}
2022-01-04 18:46:53 +00:00
func Execute ( ) int {
flag . CommandLine . AddGoFlagSet ( goflag . CommandLine )
if err := cmdMain . Execute ( ) ; err != nil {
2023-07-06 12:33:39 +00:00
if v , ok := err . ( CommandExitCode ) ; ok {
return v . ExitCode
}
2022-01-04 18:46:53 +00:00
return 1
}
return 0
}
// Show usage
func executeUsage ( cmd * cobra . Command , args [ ] string ) {
cmd . Usage ( )
}
// Run the operator
func executeMain ( cmd * cobra . Command , args [ ] string ) {
// Get environment
namespace := os . Getenv ( constants . EnvOperatorPodNamespace )
name := os . Getenv ( constants . EnvOperatorPodName )
ip := os . Getenv ( constants . EnvOperatorPodIP )
2022-11-28 14:47:38 +00:00
go monitorMemoryLimit ( )
2022-01-04 18:46:53 +00:00
globals . GetGlobalTimeouts ( ) . Kubernetes ( ) . Set ( operatorTimeouts . k8s )
globals . GetGlobalTimeouts ( ) . ArangoD ( ) . Set ( operatorTimeouts . arangoD )
2022-04-14 10:49:31 +00:00
globals . GetGlobalTimeouts ( ) . Agency ( ) . Set ( operatorTimeouts . agency )
2022-02-03 23:03:12 +00:00
globals . GetGlobalTimeouts ( ) . ArangoDCheck ( ) . Set ( operatorTimeouts . arangoDCheck )
2022-01-04 18:46:53 +00:00
globals . GetGlobalTimeouts ( ) . Reconciliation ( ) . Set ( operatorTimeouts . reconciliation )
2023-04-25 15:19:13 +00:00
globals . GetGlobalTimeouts ( ) . ShardRebuild ( ) . Set ( operatorTimeouts . shardRebuild )
globals . GetGlobalTimeouts ( ) . ShardRebuildRetry ( ) . Set ( operatorTimeouts . shardRebuildRetry )
2023-07-05 13:58:33 +00:00
globals . GetGlobalTimeouts ( ) . BackupArangoClientTimeout ( ) . Set ( operatorTimeouts . backupArangoD )
globals . GetGlobalTimeouts ( ) . BackupArangoClientUploadTimeout ( ) . Set ( operatorTimeouts . backupUploadArangoD )
2024-03-25 12:57:16 +00:00
globals . GetGlobalTimeouts ( ) . ForcePodDeletionGracePeriodTimeout ( ) . Set ( operatorTimeouts . forcePodDeletionGracePeriod )
2024-04-08 11:03:01 +00:00
globals . GetGlobalTimeouts ( ) . PodSchedulingGracePeriod ( ) . Set ( operatorTimeouts . podSchedulingGracePeriod )
2023-07-05 13:58:33 +00:00
2023-11-23 09:58:34 +00:00
globals . GetGlobals ( ) . Retry ( ) . OperatorUpdateRetryDelay ( ) . Set ( operatorReconciliationRetry . delay )
globals . GetGlobals ( ) . Retry ( ) . OperatorUpdateRetryCount ( ) . Set ( operatorReconciliationRetry . count )
2022-01-04 18:46:53 +00:00
globals . GetGlobals ( ) . Kubernetes ( ) . RequestBatchSize ( ) . Set ( operatorKubernetesOptions . maxBatchSize )
globals . GetGlobals ( ) . Backup ( ) . ConcurrentUploads ( ) . Set ( operatorBackup . concurrentUploads )
2022-12-08 08:59:13 +00:00
collector . GetCollector ( ) . SetFilter ( metrics . NegateMetricPushFilter ( metrics . NewPrefixMetricPushFilter ( metricsOptions . excludedMetricPrefixes ... ) ) )
2022-02-28 18:53:01 +00:00
kclient . SetDefaultQPS ( operatorKubernetesOptions . qps )
kclient . SetDefaultBurst ( operatorKubernetesOptions . burst )
2022-01-04 18:46:53 +00:00
// Prepare log service
2024-03-25 12:15:07 +00:00
if err := logging . Enable ( ) ; err != nil {
logger . Err ( err ) . Fatal ( "Unable to enable logger" )
2022-01-04 18:46:53 +00:00
}
2022-03-23 22:19:36 +00:00
2022-06-14 07:26:07 +00:00
podNameParts := strings . Split ( name , "-" )
operatorID := podNameParts [ len ( podNameParts ) - 1 ]
2023-07-18 10:57:33 +00:00
if operatorID != "" {
logging . Global ( ) . RegisterWrappers ( func ( in * zerolog . Event ) * zerolog . Event {
return in . Str ( "operator-id" , operatorID )
} )
}
2022-01-04 18:46:53 +00:00
2023-02-23 10:55:06 +00:00
logger . Info ( "nice to meet you" )
2022-01-04 18:46:53 +00:00
2023-05-24 12:23:10 +00:00
// Print all enabled featured
features . Iterate ( func ( name string , feature features . Feature ) {
logger . Info ( "Operator Feature %s (%s) is %s." , name , features . GetFeatureArgName ( name ) , util . BoolSwitch ( feature . Enabled ( ) , "enabled" , "disabled" ) )
} )
2022-01-04 18:46:53 +00:00
// Check operating mode
2022-01-06 20:27:01 +00:00
if ! operatorOptions . enableDeployment && ! operatorOptions . enableDeploymentReplication && ! operatorOptions . enableStorage &&
2024-05-08 10:20:14 +00:00
! operatorOptions . enableBackup && ! operatorOptions . enableApps && ! operatorOptions . enableK2KClusterSync && ! operatorOptions . enableML && ! operatorOptions . enableAnalytics {
2022-01-04 18:46:53 +00:00
if ! operatorOptions . versionOnly {
2023-11-17 11:07:30 +00:00
if version . GetVersionV1 ( ) . IsEnterprise ( ) {
2024-05-08 10:20:14 +00:00
logger . Fatal ( "Turn on --operator.deployment, --operator.deployment-replication, --operator.storage, --operator.backup, --operator.apps, --operator.k2k-cluster-sync, --operator.ml, --operator.analytics or any combination of these" )
2023-11-17 11:07:30 +00:00
} else {
2024-03-25 12:15:07 +00:00
logger . Fatal ( "Turn on --operator.deployment, --operator.deployment-replication, --operator.storage, --operator.backup, --operator.apps, --operator.k2k-cluster-sync or any combination of these" )
2023-11-17 11:07:30 +00:00
}
2022-01-04 18:46:53 +00:00
}
} else if operatorOptions . versionOnly {
2024-05-08 10:20:14 +00:00
logger . Fatal ( "Options --operator.deployment, --operator.deployment-replication, --operator.storage, --operator.backup, --operator.apps, --operator.k2k-cluster-sync, --operator.ml, --operator.analytics cannot be enabled together with --operator.version" )
2023-11-17 11:07:30 +00:00
} else if ! version . GetVersionV1 ( ) . IsEnterprise ( ) {
2024-05-08 10:20:14 +00:00
if operatorOptions . enableML || operatorOptions . enableAnalytics {
logger . Fatal ( "Options --operator.ml, --operator.analytics can be enabled only on the Enterprise Operator" )
2023-11-17 11:07:30 +00:00
}
2022-01-04 18:46:53 +00:00
}
// Log version
2022-06-14 07:26:07 +00:00
logger .
2022-01-04 18:46:53 +00:00
Str ( "pod-name" , name ) .
Str ( "pod-namespace" , namespace ) .
2022-06-14 07:26:07 +00:00
Info ( "Starting arangodb-operator (%s), version %s build %s" , version . GetVersionV1 ( ) . Edition . Title ( ) , version . GetVersionV1 ( ) . Version , version . GetVersionV1 ( ) . Build )
2022-01-04 18:46:53 +00:00
// Check environment
if ! operatorOptions . versionOnly {
if len ( namespace ) == 0 {
2022-06-14 07:26:07 +00:00
logger . Fatal ( "%s environment variable missing" , constants . EnvOperatorPodNamespace )
2022-01-04 18:46:53 +00:00
}
if len ( name ) == 0 {
2022-06-14 07:26:07 +00:00
logger . Fatal ( "%s environment variable missing" , constants . EnvOperatorPodName )
2022-01-04 18:46:53 +00:00
}
if len ( ip ) == 0 {
2022-06-14 07:26:07 +00:00
logger . Fatal ( "%s environment variable missing" , constants . EnvOperatorPodIP )
2022-01-04 18:46:53 +00:00
}
// Get host name
id , err := os . Hostname ( )
if err != nil {
2022-06-14 07:26:07 +00:00
logger . Err ( err ) . Fatal ( "Failed to get hostname" )
2022-01-04 18:46:53 +00:00
}
2022-02-28 18:53:01 +00:00
client , ok := kclient . GetDefaultFactory ( ) . Client ( )
if ! ok {
2022-06-14 07:26:07 +00:00
logger . Fatal ( "Failed to get client" )
2022-01-04 18:46:53 +00:00
}
2022-02-28 18:53:01 +00:00
2022-03-02 23:49:49 +00:00
if crdOptions . install {
2024-04-29 09:56:10 +00:00
ctx , cancel := context . WithTimeout ( shutdown . Context ( ) , time . Minute )
2022-03-02 23:49:49 +00:00
defer cancel ( )
2023-11-15 09:20:18 +00:00
crdOpts , err := prepareCRDOptions ( crdOptions . validationSchema )
if err != nil {
logger . Fatal ( "Invalid --crd.validation-schema args: %s" , err )
}
_ = crd . EnsureCRDWithOptions ( ctx , client , crd . EnsureCRDOptions { IgnoreErrors : true , CRDOptions : crdOpts } )
2022-03-02 23:49:49 +00:00
}
2022-02-28 18:53:01 +00:00
secrets := client . Kubernetes ( ) . CoreV1 ( ) . Secrets ( namespace )
2022-01-04 18:46:53 +00:00
// Create operator
cfg , deps , err := newOperatorConfigAndDeps ( id + "-" + name , namespace , name )
if err != nil {
2022-06-14 07:26:07 +00:00
logger . Err ( err ) . Fatal ( "Failed to create operator config & deps" )
2022-01-04 18:46:53 +00:00
}
2022-09-19 14:48:54 +00:00
if err := ensureFeaturesConfigMap ( context . Background ( ) , client . Kubernetes ( ) . CoreV1 ( ) . ConfigMaps ( namespace ) , cfg ) ; err != nil {
logger . Err ( err ) . Error ( "Failed to create features config map" )
}
2022-01-04 18:46:53 +00:00
o , err := operator . NewOperator ( cfg , deps )
if err != nil {
2022-06-14 07:26:07 +00:00
logger . Err ( err ) . Fatal ( "Failed to create operator" )
2022-01-04 18:46:53 +00:00
}
2022-08-15 08:23:25 +00:00
if apiOptions . enabled {
apiServerCfg := api . ServerConfig {
Namespace : namespace ,
ServerName : name ,
ServerAltNames : [ ] string { ip } ,
HTTPAddress : net . JoinHostPort ( "0.0.0.0" , strconv . Itoa ( apiOptions . httpPort ) ) ,
GRPCAddress : net . JoinHostPort ( "0.0.0.0" , strconv . Itoa ( apiOptions . grpcPort ) ) ,
TLSSecretName : apiOptions . tlsSecretName ,
JWTSecretName : apiOptions . jwtSecretName ,
JWTKeySecretName : apiOptions . jwtKeySecretName ,
LivelinessProbe : & livenessProbe ,
ProbeDeployment : api . ReadinessProbeConfig {
Enabled : cfg . EnableDeployment ,
Probe : & deploymentProbe ,
} ,
ProbeDeploymentReplication : api . ReadinessProbeConfig {
Enabled : cfg . EnableDeploymentReplication ,
Probe : & deploymentReplicationProbe ,
} ,
ProbeStorage : api . ReadinessProbeConfig {
Enabled : cfg . EnableStorage ,
Probe : & storageProbe ,
} ,
}
apiServer , err := api . NewServer ( client . Kubernetes ( ) . CoreV1 ( ) , apiServerCfg )
if err != nil {
logger . Err ( err ) . Fatal ( "Failed to create API server" )
}
2024-03-25 12:15:07 +00:00
go func ( ) {
if err := apiServer . Run ( ) ; err != nil {
logger . Err ( err ) . Error ( "while running API server" )
}
} ( )
2022-08-15 08:23:25 +00:00
}
2022-01-04 18:46:53 +00:00
listenAddr := net . JoinHostPort ( serverOptions . host , strconv . Itoa ( serverOptions . port ) )
2022-02-28 18:53:01 +00:00
if svr , err := server . NewServer ( client . Kubernetes ( ) . CoreV1 ( ) , server . Config {
2022-01-04 18:46:53 +00:00
Namespace : namespace ,
Address : listenAddr ,
TLSSecretName : serverOptions . tlsSecretName ,
TLSSecretNamespace : namespace ,
PodName : name ,
PodIP : ip ,
AdminSecretName : serverOptions . adminSecretName ,
AllowAnonymous : serverOptions . allowAnonymous ,
} , server . Dependencies {
LivenessProbe : & livenessProbe ,
Deployment : server . OperatorDependency {
Enabled : cfg . EnableDeployment ,
Probe : & deploymentProbe ,
} ,
DeploymentReplication : server . OperatorDependency {
Enabled : cfg . EnableDeploymentReplication ,
Probe : & deploymentReplicationProbe ,
} ,
Storage : server . OperatorDependency {
Enabled : cfg . EnableStorage ,
Probe : & storageProbe ,
} ,
Backup : server . OperatorDependency {
Enabled : cfg . EnableBackup ,
Probe : & backupProbe ,
} ,
Apps : server . OperatorDependency {
Enabled : cfg . EnableApps ,
Probe : & appsProbe ,
} ,
2023-11-17 11:07:30 +00:00
ML : server . OperatorDependency {
Enabled : cfg . EnableML ,
Probe : & mlProbe ,
} ,
2024-05-08 10:20:14 +00:00
Analytics : server . OperatorDependency {
Enabled : cfg . EnableAnalytics ,
Probe : & analyticsProbe ,
} ,
2022-01-06 20:27:01 +00:00
ClusterSync : server . OperatorDependency {
Enabled : cfg . EnableK2KClusterSync ,
Probe : & k2KClusterSyncProbe ,
} ,
2022-01-04 18:46:53 +00:00
Operators : o ,
Secrets : secrets ,
} ) ; err != nil {
2022-06-14 07:26:07 +00:00
logger . Err ( err ) . Fatal ( "Failed to create HTTP server" )
2022-01-04 18:46:53 +00:00
} else {
2024-03-25 12:15:07 +00:00
go func ( ) {
if err := svr . Run ( ) ; err != nil {
logger . Err ( err ) . Error ( "error while starting server" )
}
} ( )
2022-01-04 18:46:53 +00:00
}
// startChaos(context.Background(), cfg.KubeCli, cfg.Namespace, chaosLevel)
// Start operator
o . Run ( )
} else {
if err := startVersionProcess ( ) ; err != nil {
2022-06-14 07:26:07 +00:00
logger . Err ( err ) . Fatal ( "Failed to create HTTP server" )
2022-01-04 18:46:53 +00:00
}
}
}
func startVersionProcess ( ) error {
// Just expose version
listenAddr := net . JoinHostPort ( serverOptions . host , strconv . Itoa ( serverOptions . port ) )
2022-06-14 07:26:07 +00:00
logger . Str ( "addr" , listenAddr ) . Info ( "Starting version endpoint" )
2022-01-04 18:46:53 +00:00
gin . SetMode ( gin . ReleaseMode )
r := gin . New ( )
r . Use ( gin . Recovery ( ) )
versionV1Responser , err := operatorHTTP . NewSimpleJSONResponse ( version . GetVersionV1 ( ) )
if err != nil {
return errors . WithStack ( err )
}
r . GET ( "/_api/version" , gin . WrapF ( versionV1Responser . ServeHTTP ) )
r . GET ( "/api/v1/version" , gin . WrapF ( versionV1Responser . ServeHTTP ) )
s := http . Server {
Addr : listenAddr ,
Handler : r ,
}
return s . ListenAndServe ( )
}
// newOperatorConfigAndDeps creates operator config & dependencies.
func newOperatorConfigAndDeps ( id , namespace , name string ) ( operator . Config , operator . Dependencies , error ) {
2022-02-28 18:53:01 +00:00
client , ok := kclient . GetDefaultFactory ( ) . Client ( )
if ! ok {
return operator . Config { } , operator . Dependencies { } , errors . Errorf ( "Failed to get client" )
2022-01-04 18:46:53 +00:00
}
2022-02-28 18:53:01 +00:00
image , serviceAccount , err := getMyPodInfo ( client . Kubernetes ( ) , namespace , name )
2022-01-04 18:46:53 +00:00
if err != nil {
2022-02-28 18:53:01 +00:00
return operator . Config { } , operator . Dependencies { } , errors . WithStack ( fmt . Errorf ( "Failed to get my pod's service account: %s" , err ) )
2022-01-04 18:46:53 +00:00
}
2022-06-14 07:26:07 +00:00
eventRecorder := createRecorder ( client . Kubernetes ( ) , name , namespace )
2022-01-04 18:46:53 +00:00
scope , ok := scope . AsScope ( operatorOptions . scope )
if ! ok {
2022-02-28 18:53:01 +00:00
return operator . Config { } , operator . Dependencies { } , errors . WithStack ( fmt . Errorf ( "Scope %s is not known by Operator" , operatorOptions . scope ) )
2022-01-04 18:46:53 +00:00
}
cfg := operator . Config {
ID : id ,
Namespace : namespace ,
PodName : name ,
ServiceAccount : serviceAccount ,
OperatorImage : image ,
EnableDeployment : operatorOptions . enableDeployment ,
EnableDeploymentReplication : operatorOptions . enableDeploymentReplication ,
EnableStorage : operatorOptions . enableStorage ,
EnableBackup : operatorOptions . enableBackup ,
EnableApps : operatorOptions . enableApps ,
2023-11-17 11:07:30 +00:00
EnableML : operatorOptions . enableML ,
2024-05-08 10:20:14 +00:00
EnableAnalytics : operatorOptions . enableAnalytics ,
2022-01-06 20:27:01 +00:00
EnableK2KClusterSync : operatorOptions . enableK2KClusterSync ,
2022-01-04 18:46:53 +00:00
AllowChaos : chaosOptions . allowed ,
ScalingIntegrationEnabled : operatorOptions . scalingIntegrationEnabled ,
SingleMode : operatorOptions . singleMode ,
Scope : scope ,
2023-09-02 00:17:01 +00:00
ReconciliationDelay : operatorOptions . reconciliationDelay ,
2022-05-26 08:47:55 +00:00
ShutdownDelay : shutdownOptions . delay ,
ShutdownTimeout : shutdownOptions . timeout ,
2022-01-04 18:46:53 +00:00
}
deps := operator . Dependencies {
2022-02-28 18:53:01 +00:00
Client : client ,
2022-01-04 18:46:53 +00:00
EventRecorder : eventRecorder ,
LivenessProbe : & livenessProbe ,
DeploymentProbe : & deploymentProbe ,
DeploymentReplicationProbe : & deploymentReplicationProbe ,
StorageProbe : & storageProbe ,
BackupProbe : & backupProbe ,
AppsProbe : & appsProbe ,
2023-11-17 11:07:30 +00:00
MlProbe : & mlProbe ,
2024-05-08 10:20:14 +00:00
AnalyticsProbe : & analyticsProbe ,
2022-01-06 20:27:01 +00:00
K2KClusterSyncProbe : & k2KClusterSyncProbe ,
2022-01-04 18:46:53 +00:00
}
return cfg , deps , nil
}
// getMyPodInfo looks up the image & service account of the pod with given name in given namespace
// Returns image, serviceAccount, error.
func getMyPodInfo ( kubecli kubernetes . Interface , namespace , name string ) ( string , string , error ) {
2024-02-26 13:28:18 +00:00
if image , sa , ok := getMyPodInfoWrap ( kubecli , namespace , name , getMyImageInfoFunc ( operatorImageDiscovery . defaultStatusDiscovery ) ) ; ok {
return image , sa , nil
}
logger . Warn ( "Unable to discover image, fallback to second method" )
if image , sa , ok := getMyPodInfoWrap ( kubecli , namespace , name , getMyImageInfoFunc ( ! operatorImageDiscovery . defaultStatusDiscovery ) ) ; ok {
return image , sa , nil
}
return "" , "" , errors . Errorf ( "Unable to discover image" )
}
func getMyPodInfoWrap ( kubecli kubernetes . Interface , namespace , name string , imageFunc func ( in * core . Pod ) ( string , bool ) ) ( string , string , bool ) {
2022-01-04 18:46:53 +00:00
var image , sa string
op := func ( ) error {
2022-06-30 18:39:07 +00:00
pod , err := kubecli . CoreV1 ( ) . Pods ( namespace ) . Get ( context . Background ( ) , name , meta . GetOptions { } )
2022-01-04 18:46:53 +00:00
if err != nil {
2022-06-14 07:26:07 +00:00
logger .
2022-01-04 18:46:53 +00:00
Err ( err ) .
Str ( "name" , name ) .
2022-06-14 07:26:07 +00:00
Error ( "Failed to get operator pod" )
2022-02-28 18:53:01 +00:00
return errors . WithStack ( err )
2022-01-04 18:46:53 +00:00
}
sa = pod . Spec . ServiceAccountName
2024-02-26 13:28:18 +00:00
if i , ok := imageFunc ( pod ) ; ! ok {
2024-02-29 10:19:44 +00:00
return errors . Errorf ( "failed to get image ID from pod" )
2024-02-26 13:28:18 +00:00
} else {
image = i
2022-01-21 08:22:49 +00:00
}
2022-01-04 18:46:53 +00:00
return nil
}
2024-02-26 13:28:18 +00:00
if err := retry . Retry ( op , operatorImageDiscovery . timeout / 2 ) ; err == nil {
return image , sa , true
}
return "" , "" , false
}
func getMyImageInfoFunc ( status bool ) func ( pod * core . Pod ) ( string , bool ) {
return func ( pod * core . Pod ) ( string , bool ) {
if status {
return k8sutil . GetArangoDBImageIDFromContainerStatuses ( pod . Status . ContainerStatuses , shared . ServerContainerName , shared . OperatorContainerName , constants . MyContainerNameEnv . GetOrDefault ( shared . OperatorContainerName ) )
}
2024-02-29 10:19:44 +00:00
return k8sutil . GetArangoDBImageFromContainers ( pod . Spec . Containers , shared . ServerContainerName , shared . OperatorContainerName , constants . MyContainerNameEnv . GetOrDefault ( shared . OperatorContainerName ) )
2022-01-04 18:46:53 +00:00
}
}
2022-06-14 07:26:07 +00:00
func createRecorder ( kubecli kubernetes . Interface , name , namespace string ) record . EventRecorder {
2022-01-04 18:46:53 +00:00
eventBroadcaster := record . NewBroadcaster ( )
eventBroadcaster . StartLogging ( func ( format string , args ... interface { } ) {
2022-06-14 07:26:07 +00:00
eventRecorder . Info ( format , args ... )
2022-01-04 18:46:53 +00:00
} )
2022-07-11 11:49:47 +00:00
eventBroadcaster . StartRecordingToSink ( & typedCore . EventSinkImpl { Interface : typedCore . New ( kubecli . CoreV1 ( ) . RESTClient ( ) ) . Events ( namespace ) } )
2022-01-04 18:46:53 +00:00
combinedScheme := runtime . NewScheme ( )
scheme . AddToScheme ( combinedScheme )
2022-06-30 18:39:07 +00:00
core . AddToScheme ( combinedScheme )
2022-07-14 16:22:16 +00:00
apps . AddToScheme ( combinedScheme )
2022-06-30 18:39:07 +00:00
return eventBroadcaster . NewRecorder ( combinedScheme , core . EventSource { Component : name } )
2022-01-04 18:46:53 +00:00
}
2022-09-19 14:48:54 +00:00
// ensureFeaturesConfigMap creates or updates config map with enabled features.
func ensureFeaturesConfigMap ( ctx context . Context , client typedCore . ConfigMapInterface , cfg operator . Config ) error {
ft := features . GetFeatureMap ( )
featuresCM := make ( map [ string ] string , len ( ft ) )
for k , v := range ft {
if v {
featuresCM [ k ] = features . Enabled
} else {
featuresCM [ k ] = features . Disabled
}
}
nctx , c := globals . GetGlobalTimeouts ( ) . Kubernetes ( ) . WithTimeout ( ctx )
defer c ( )
if cm , err := client . Get ( nctx , features . ConfigMapName ( ) , meta . GetOptions { } ) ; err != nil {
2022-09-19 19:55:37 +00:00
if ! apiErrors . IsNotFound ( err ) {
2022-09-19 14:48:54 +00:00
return err
}
nctx , c := globals . GetGlobalTimeouts ( ) . Kubernetes ( ) . WithTimeout ( ctx )
defer c ( )
if _ , err := client . Create ( nctx , & core . ConfigMap {
ObjectMeta : meta . ObjectMeta {
Name : features . ConfigMapName ( ) ,
Namespace : cfg . Namespace ,
} ,
2022-09-20 06:05:25 +00:00
Data : featuresCM ,
2022-09-19 14:48:54 +00:00
} , meta . CreateOptions { } ) ; err != nil {
return err
}
return nil
} else if ! reflect . DeepEqual ( cm . Data , featuresCM ) {
q := cm . DeepCopy ( )
q . Data = featuresCM
nctx , c := globals . GetGlobalTimeouts ( ) . Kubernetes ( ) . WithTimeout ( ctx )
defer c ( )
if _ , err := client . Update ( nctx , q , meta . UpdateOptions { } ) ; err != nil {
return err
}
return nil
}
return nil
}