1
0
Fork 0
mirror of https://github.com/prometheus-operator/prometheus-operator.git synced 2025-04-16 01:06:27 +00:00
prometheus-operator/pkg/prometheus/server/statefulset.go
Arthur Silva Sens cc47b1e160
Prometheus Agent support (#5385)
* Introduce PrometheusAgent CRD

Operator is able to run with PrometheusAgent resources in the cluster, but doesn't do anything with them yet. This is the first step to implement the Prometheus Agent Operator.

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>

* Re-enable configmap and secret informers

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit 1a71db03db6b41cd0cee9d0193b6ea3884bb5bae)

* Implement Resolve for Agent operator

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit 49558165b9178b6c1bda833a48f7bfe1468c942a)

* Operator is able to create Agent Statefulset

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit 7a3826683c92f917312c866a2bb6401dc54b95f2)

* Agent Operator creates secret from ServiceMonitors

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit 11232669befb4de9d0765dfadfe5fae00b575f11)

* Agent Operator creates secret from PodMonitors

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit 5ae551734bac2babc056c86443d15729d43d12b0)

* Agent Operator creates secret from Probes

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit 9637612fbbe9617335fd6188271ebf2cc74a3693)

* Agent Operator configures remote-write

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit c4bdf230d527e19f8b77ca5f938b9254ed344f7d)

* Agent Operator configures additionalScrapeConfigs

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit d9f28db764641e682bf4fe8963310f791979c387)

* Implement UpdateStatus

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit c546ecaf3e8b73916df44a8f48b279c6988e32f5)

* Add resource handlers

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit 5b83359445e20f88ea5fff80302fce62d58058b9)

* make format

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
(cherry picked from commit 6507964ba28f4ebf32ce3203db752444e288c45d)

* Only start agent operator if there is enough permission

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>

* Remove node endpoint syncronization from agent operator

The server operator already handles it

Signed-off-by: ArthurSens <arthursens2005@gmail.com>

* Move PrometheusAgent API from v1 to v1alpha1

Signed-off-by: ArthurSens <arthursens2005@gmail.com>

* pkg/prometheus/agent/statefulset.go: Fix image concatenation

Signed-off-by: ArthurSens <arthursens2005@gmail.com>

* Avoid name colisions between Prometheus Agents and Servers

Signed-off-by: ArthurSens <arthursens2005@gmail.com>

* agent/createOrUpdateConfigurationSecret: Do not handle case where servicemonitor and podmonitor selectors are empty

Signed-off-by: ArthurSens <arthursens2005@gmail.com>

* make format

Signed-off-by: ArthurSens <arthursens2005@gmail.com>

* make --always-make format generate

Signed-off-by: ArthurSens <arthursens2005@gmail.com>

* Remove unused fields from Operator struct

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>

* Add deployment mode as new selector label for agent/server ssts

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>

* WIP: Fix OperatorUpgrade e2e test

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>

* Panic if type casting PrometheusInterface doesn't return Prometheus/Agent

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>

* Detect whether PrometheusAgent CRD is installed or not

If the operator's service account has all permissions on the cluster and
the CRD isn't installed then the PrometheusAgent controller will run
but fail because of the absence of the CRD.

Signed-off-by: Simon Pasquier <spasquie@redhat.com>

* Create dedicated governing service for Prometheus agent

Signed-off-by: Simon Pasquier <spasquie@redhat.com>

---------

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
Signed-off-by: ArthurSens <arthursens2005@gmail.com>
Signed-off-by: Simon Pasquier <spasquie@redhat.com>
Co-authored-by: Simon Pasquier <spasquie@redhat.com>
2023-03-27 12:30:01 +02:00

833 lines
28 KiB
Go

// Copyright 2016 The prometheus-operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package prometheus
import (
"fmt"
"net/url"
"path"
"strings"
"github.com/blang/semver/v4"
"github.com/go-kit/log"
"github.com/pkg/errors"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/prometheus-operator/prometheus-operator/pkg/k8sutil"
"github.com/prometheus-operator/prometheus-operator/pkg/operator"
prompkg "github.com/prometheus-operator/prometheus-operator/pkg/prometheus"
"github.com/prometheus-operator/prometheus-operator/pkg/webconfig"
)
const (
defaultRetention = "24h"
defaultQueryLogVolume = "query-log-file"
prometheusMode = "server"
governingServiceName = "prometheus-operated"
)
// TODO(ArthurSens): generalize it enough to be used by both server and agent.
func makeStatefulSetService(p *monitoringv1.Prometheus, config operator.Config) *v1.Service {
p = p.DeepCopy()
if p.Spec.PortName == "" {
p.Spec.PortName = prompkg.DefaultPortName
}
svc := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: governingServiceName,
OwnerReferences: []metav1.OwnerReference{
{
Name: p.GetName(),
Kind: p.Kind,
APIVersion: p.APIVersion,
UID: p.GetUID(),
},
},
Labels: config.Labels.Merge(map[string]string{
"operated-prometheus": "true",
}),
},
Spec: v1.ServiceSpec{
ClusterIP: "None",
Ports: []v1.ServicePort{
{
Name: p.Spec.PortName,
Port: 9090,
TargetPort: intstr.FromString(p.Spec.PortName),
},
},
Selector: map[string]string{
"app.kubernetes.io/name": "prometheus",
},
},
}
if p.Spec.Thanos != nil {
svc.Spec.Ports = append(svc.Spec.Ports, v1.ServicePort{
Name: "grpc",
Port: 10901,
TargetPort: intstr.FromString("grpc"),
})
}
return svc
}
func makeStatefulSet(
logger log.Logger,
name string,
p monitoringv1.PrometheusInterface,
baseImage, tag, sha string,
retention monitoringv1.Duration,
retentionSize monitoringv1.ByteSize,
rules monitoringv1.Rules,
query *monitoringv1.QuerySpec,
allowOverlappingBlocks bool,
enableAdminAPI bool,
queryLogFile string,
thanos *monitoringv1.ThanosSpec,
disableCompaction bool,
config *operator.Config,
cg *prompkg.ConfigGenerator,
ruleConfigMapNames []string,
inputHash string,
shard int32,
tlsAssetSecrets []string,
) (*appsv1.StatefulSet, error) {
cpf := p.GetCommonPrometheusFields()
objMeta := p.GetObjectMeta()
typeMeta := p.GetTypeMeta()
if cpf.PortName == "" {
cpf.PortName = prompkg.DefaultPortName
}
if cpf.Replicas == nil {
cpf.Replicas = &prompkg.MinReplicas
}
intZero := int32(0)
if cpf.Replicas != nil && *cpf.Replicas < 0 {
cpf.Replicas = &intZero
}
// We need to re-set the common fields because cpf is only a copy of the original object.
// We set some defaults if some fields are not present, and we want those fields set in the original Prometheus object before building the StatefulSetSpec.
p.SetCommonPrometheusFields(cpf)
spec, err := makeStatefulSetSpec(logger, baseImage, tag, sha, retention, retentionSize, rules, query, allowOverlappingBlocks, enableAdminAPI, queryLogFile, thanos, disableCompaction, p, config, cg, shard, ruleConfigMapNames, tlsAssetSecrets)
if err != nil {
return nil, errors.Wrap(err, "make StatefulSet spec")
}
boolTrue := true
// do not transfer kubectl annotations to the statefulset so it is not
// pruned by kubectl
annotations := make(map[string]string)
for key, value := range objMeta.GetAnnotations() {
if !strings.HasPrefix(key, "kubectl.kubernetes.io/") {
annotations[key] = value
}
}
labels := make(map[string]string)
for key, value := range objMeta.GetLabels() {
labels[key] = value
}
labels[prompkg.ShardLabelName] = fmt.Sprintf("%d", shard)
labels[prompkg.PrometheusNameLabelName] = objMeta.GetName()
labels[prompkg.PrometheusModeLabeLName] = prometheusMode
statefulset := &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Labels: config.Labels.Merge(labels),
Annotations: annotations,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: typeMeta.APIVersion,
BlockOwnerDeletion: &boolTrue,
Controller: &boolTrue,
Kind: typeMeta.Kind,
Name: objMeta.GetName(),
UID: objMeta.GetUID(),
},
},
},
Spec: *spec,
}
if statefulset.ObjectMeta.Annotations == nil {
statefulset.ObjectMeta.Annotations = map[string]string{
prompkg.SSetInputHashName: inputHash,
}
} else {
statefulset.ObjectMeta.Annotations[prompkg.SSetInputHashName] = inputHash
}
if cpf.ImagePullSecrets != nil && len(cpf.ImagePullSecrets) > 0 {
statefulset.Spec.Template.Spec.ImagePullSecrets = cpf.ImagePullSecrets
}
storageSpec := cpf.Storage
if storageSpec == nil {
statefulset.Spec.Template.Spec.Volumes = append(statefulset.Spec.Template.Spec.Volumes, v1.Volume{
Name: prompkg.VolumeName(p),
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
},
})
} else if storageSpec.EmptyDir != nil {
emptyDir := storageSpec.EmptyDir
statefulset.Spec.Template.Spec.Volumes = append(statefulset.Spec.Template.Spec.Volumes, v1.Volume{
Name: prompkg.VolumeName(p),
VolumeSource: v1.VolumeSource{
EmptyDir: emptyDir,
},
})
} else if storageSpec.Ephemeral != nil {
ephemeral := storageSpec.Ephemeral
statefulset.Spec.Template.Spec.Volumes = append(statefulset.Spec.Template.Spec.Volumes, v1.Volume{
Name: prompkg.VolumeName(p),
VolumeSource: v1.VolumeSource{
Ephemeral: ephemeral,
},
})
} else {
pvcTemplate := operator.MakeVolumeClaimTemplate(storageSpec.VolumeClaimTemplate)
if pvcTemplate.Name == "" {
pvcTemplate.Name = prompkg.VolumeName(p)
}
if storageSpec.VolumeClaimTemplate.Spec.AccessModes == nil {
pvcTemplate.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}
} else {
pvcTemplate.Spec.AccessModes = storageSpec.VolumeClaimTemplate.Spec.AccessModes
}
pvcTemplate.Spec.Resources = storageSpec.VolumeClaimTemplate.Spec.Resources
pvcTemplate.Spec.Selector = storageSpec.VolumeClaimTemplate.Spec.Selector
statefulset.Spec.VolumeClaimTemplates = append(statefulset.Spec.VolumeClaimTemplates, *pvcTemplate)
}
statefulset.Spec.Template.Spec.Volumes = append(statefulset.Spec.Template.Spec.Volumes, cpf.Volumes...)
if cpf.HostNetwork {
statefulset.Spec.Template.Spec.DNSPolicy = v1.DNSClusterFirstWithHostNet
}
return statefulset, nil
}
func makeStatefulSetSpec(
logger log.Logger,
baseImage, tag, sha string,
retention monitoringv1.Duration,
retentionSize monitoringv1.ByteSize,
rules monitoringv1.Rules,
query *monitoringv1.QuerySpec,
allowOverlappingBlocks bool,
enableAdminAPI bool,
queryLogFile string,
thanos *monitoringv1.ThanosSpec,
disableCompaction bool,
p monitoringv1.PrometheusInterface,
c *operator.Config,
cg *prompkg.ConfigGenerator,
shard int32,
ruleConfigMapNames []string,
tlsAssetSecrets []string,
) (*appsv1.StatefulSetSpec, error) {
// Prometheus may take quite long to shut down to checkpoint existing data.
// Allow up to 10 minutes for clean termination.
terminationGracePeriod := int64(600)
cpf := p.GetCommonPrometheusFields()
promName := p.GetObjectMeta().GetName()
pImagePath, err := operator.BuildImagePath(
operator.StringPtrValOrDefault(cpf.Image, ""),
operator.StringValOrDefault(baseImage, c.PrometheusDefaultBaseImage),
operator.StringValOrDefault(cpf.Version, operator.DefaultPrometheusVersion),
operator.StringValOrDefault(tag, ""),
operator.StringValOrDefault(sha, ""),
)
if err != nil {
return nil, err
}
webRoutePrefix := "/"
if cpf.RoutePrefix != "" {
webRoutePrefix = cpf.RoutePrefix
}
promArgs := prompkg.BuildCommonPrometheusArgs(cpf, cg, webRoutePrefix)
promArgs = appendServerArgs(promArgs, cg, retention, retentionSize, rules, query, allowOverlappingBlocks, enableAdminAPI)
var ports []v1.ContainerPort
if !cpf.ListenLocal {
ports = []v1.ContainerPort{
{
Name: cpf.PortName,
ContainerPort: 9090,
Protocol: v1.ProtocolTCP,
},
}
}
volumes, promVolumeMounts, err := prompkg.BuildCommonVolumes(p, tlsAssetSecrets)
if err != nil {
return nil, err
}
volumes, promVolumeMounts = appendServerVolumes(volumes, promVolumeMounts, queryLogFile, ruleConfigMapNames)
// Mount web config and web TLS credentials as volumes.
// We always mount the web config file for versions greater than 2.24.0.
// With this we avoid redeploying prometheus when reconfiguring between
// HTTP and HTTPS and vice-versa.
webConfigGenerator := cg.WithMinimumVersion("2.24.0")
if webConfigGenerator.IsCompatible() {
var fields monitoringv1.WebConfigFileFields
if cpf.Web != nil {
fields = cpf.Web.WebConfigFileFields
}
webConfig, err := webconfig.New(prompkg.WebConfigDir, prompkg.WebConfigSecretName(p), fields)
if err != nil {
return nil, err
}
confArg, configVol, configMount, err := webConfig.GetMountParameters()
if err != nil {
return nil, err
}
promArgs = append(promArgs, confArg)
volumes = append(volumes, configVol...)
promVolumeMounts = append(promVolumeMounts, configMount...)
} else if cpf.Web != nil {
webConfigGenerator.Warn("web.config.file")
}
// The /-/ready handler returns OK only after the TSDB initialization has
// completed. The WAL replay can take a significant time for large setups
// hence we enable the startup probe with a generous failure threshold (15
// minutes) to ensure that the readiness probe only comes into effect once
// Prometheus is effectively ready.
// We don't want to use the /-/healthy handler here because it returns OK as
// soon as the web server is started (irrespective of the WAL replay).
readyProbeHandler := prompkg.ProbeHandler("/-/ready", cpf, webConfigGenerator, webRoutePrefix)
startupProbe := &v1.Probe{
ProbeHandler: readyProbeHandler,
TimeoutSeconds: prompkg.ProbeTimeoutSeconds,
PeriodSeconds: 15,
FailureThreshold: 60,
}
readinessProbe := &v1.Probe{
ProbeHandler: readyProbeHandler,
TimeoutSeconds: prompkg.ProbeTimeoutSeconds,
PeriodSeconds: 5,
FailureThreshold: 3,
}
livenessProbe := &v1.Probe{
ProbeHandler: prompkg.ProbeHandler("/-/healthy", cpf, webConfigGenerator, webRoutePrefix),
TimeoutSeconds: prompkg.ProbeTimeoutSeconds,
PeriodSeconds: 5,
FailureThreshold: 6,
}
podAnnotations, podLabels := prompkg.BuildPodMetadata(cpf, cg)
// In cases where an existing selector label is modified, or a new one is added, new sts cannot match existing pods.
// We should try to avoid removing such immutable fields whenever possible since doing
// so forces us to enter the 'recreate cycle' and can potentially lead to downtime.
// The requirement to make a change here should be carefully evaluated.
podSelectorLabels := map[string]string{
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/managed-by": "prometheus-operator",
"app.kubernetes.io/instance": promName,
"prometheus": promName,
prompkg.ShardLabelName: fmt.Sprintf("%d", shard),
prompkg.PrometheusNameLabelName: promName,
}
for k, v := range podSelectorLabels {
podLabels[k] = v
}
finalSelectorLabels := c.Labels.Merge(podSelectorLabels)
finalLabels := c.Labels.Merge(podLabels)
var additionalContainers, operatorInitContainers []v1.Container
prometheusURIScheme := "http"
if cpf.Web != nil && cpf.Web.TLSConfig != nil {
prometheusURIScheme = "https"
}
thanosContainer, err := createThanosContainer(&disableCompaction, p, thanos, c, prometheusURIScheme, webRoutePrefix)
if err != nil {
return nil, err
}
if thanosContainer != nil {
additionalContainers = append(additionalContainers, *thanosContainer)
}
if disableCompaction {
thanosBlockDuration := "2h"
if thanos != nil {
thanosBlockDuration = operator.StringValOrDefault(string(thanos.BlockDuration), thanosBlockDuration)
}
promArgs = append(promArgs, monitoringv1.Argument{Name: "storage.tsdb.max-block-duration", Value: thanosBlockDuration})
promArgs = append(promArgs, monitoringv1.Argument{Name: "storage.tsdb.min-block-duration", Value: thanosBlockDuration})
}
var watchedDirectories []string
configReloaderVolumeMounts := []v1.VolumeMount{
{
Name: "config",
MountPath: prompkg.ConfDir,
},
{
Name: "config-out",
MountPath: prompkg.ConfOutDir,
},
}
if len(ruleConfigMapNames) != 0 {
for _, name := range ruleConfigMapNames {
mountPath := prompkg.RulesDir + "/" + name
configReloaderVolumeMounts = append(configReloaderVolumeMounts, v1.VolumeMount{
Name: name,
MountPath: mountPath,
})
watchedDirectories = append(watchedDirectories, mountPath)
}
}
var minReadySeconds int32
if cpf.MinReadySeconds != nil {
minReadySeconds = int32(*cpf.MinReadySeconds)
}
operatorInitContainers = append(operatorInitContainers,
operator.CreateConfigReloader(
"init-config-reloader",
operator.ReloaderConfig(c.ReloaderConfig),
operator.ReloaderRunOnce(),
operator.LogFormat(cpf.LogFormat),
operator.LogLevel(cpf.LogLevel),
operator.VolumeMounts(configReloaderVolumeMounts),
operator.ConfigFile(path.Join(prompkg.ConfDir, prompkg.ConfigFilename)),
operator.ConfigEnvsubstFile(path.Join(prompkg.ConfOutDir, prompkg.ConfigEnvsubstFilename)),
operator.WatchedDirectories(watchedDirectories),
operator.Shard(shard),
operator.ImagePullPolicy(cpf.ImagePullPolicy),
),
)
initContainers, err := k8sutil.MergePatchContainers(operatorInitContainers, cpf.InitContainers)
if err != nil {
return nil, errors.Wrap(err, "failed to merge init containers spec")
}
containerArgs, err := operator.BuildArgs(promArgs, cpf.AdditionalArgs)
if err != nil {
return nil, err
}
boolFalse := false
boolTrue := true
operatorContainers := append([]v1.Container{
{
Name: "prometheus",
Image: pImagePath,
ImagePullPolicy: cpf.ImagePullPolicy,
Ports: ports,
Args: containerArgs,
VolumeMounts: promVolumeMounts,
StartupProbe: startupProbe,
LivenessProbe: livenessProbe,
ReadinessProbe: readinessProbe,
Resources: cpf.Resources,
TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
SecurityContext: &v1.SecurityContext{
ReadOnlyRootFilesystem: &boolTrue,
AllowPrivilegeEscalation: &boolFalse,
Capabilities: &v1.Capabilities{
Drop: []v1.Capability{"ALL"},
},
},
},
operator.CreateConfigReloader(
"config-reloader",
operator.ReloaderConfig(c.ReloaderConfig),
operator.ReloaderURL(url.URL{
Scheme: prometheusURIScheme,
Host: c.LocalHost + ":9090",
Path: path.Clean(webRoutePrefix + "/-/reload"),
}),
operator.ListenLocal(cpf.ListenLocal),
operator.LocalHost(c.LocalHost),
operator.LogFormat(cpf.LogFormat),
operator.LogLevel(cpf.LogLevel),
operator.ConfigFile(path.Join(prompkg.ConfDir, prompkg.ConfigFilename)),
operator.ConfigEnvsubstFile(path.Join(prompkg.ConfOutDir, prompkg.ConfigEnvsubstFilename)),
operator.WatchedDirectories(watchedDirectories), operator.VolumeMounts(configReloaderVolumeMounts),
operator.Shard(shard),
operator.ImagePullPolicy(cpf.ImagePullPolicy),
),
}, additionalContainers...)
containers, err := k8sutil.MergePatchContainers(operatorContainers, cpf.Containers)
if err != nil {
return nil, errors.Wrap(err, "failed to merge containers spec")
}
// PodManagementPolicy is set to Parallel to mitigate issues in kubernetes: https://github.com/kubernetes/kubernetes/issues/60164
// This is also mentioned as one of limitations of StatefulSets: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations
return &appsv1.StatefulSetSpec{
ServiceName: governingServiceName,
Replicas: cpf.Replicas,
PodManagementPolicy: appsv1.ParallelPodManagement,
UpdateStrategy: appsv1.StatefulSetUpdateStrategy{
Type: appsv1.RollingUpdateStatefulSetStrategyType,
},
MinReadySeconds: minReadySeconds,
Selector: &metav1.LabelSelector{
MatchLabels: finalSelectorLabels,
},
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: finalLabels,
Annotations: podAnnotations,
},
Spec: v1.PodSpec{
Containers: containers,
InitContainers: initContainers,
SecurityContext: cpf.SecurityContext,
ServiceAccountName: cpf.ServiceAccountName,
AutomountServiceAccountToken: &boolTrue,
NodeSelector: cpf.NodeSelector,
PriorityClassName: cpf.PriorityClassName,
TerminationGracePeriodSeconds: &terminationGracePeriod,
Volumes: volumes,
Tolerations: cpf.Tolerations,
Affinity: cpf.Affinity,
TopologySpreadConstraints: cpf.TopologySpreadConstraints,
HostAliases: operator.MakeHostAliases(cpf.HostAliases),
HostNetwork: cpf.HostNetwork,
},
},
}, nil
}
// appendServerArgs appends arguments that are only valid for the Prometheus server.
func appendServerArgs(
promArgs []monitoringv1.Argument,
cg *prompkg.ConfigGenerator,
retention monitoringv1.Duration,
retentionSize monitoringv1.ByteSize,
rules monitoringv1.Rules,
query *monitoringv1.QuerySpec,
allowOverlappingBlocks, enableAdminAPI bool) []monitoringv1.Argument {
var (
retentionTimeFlagName = "storage.tsdb.retention.time"
retentionTimeFlagValue = string(retention)
)
if cg.WithMaximumVersion("2.7.0").IsCompatible() {
retentionTimeFlagName = "storage.tsdb.retention"
if retention == "" {
retentionTimeFlagValue = defaultRetention
}
} else if retention == "" && retentionSize == "" {
retentionTimeFlagValue = defaultRetention
}
if retentionTimeFlagValue != "" {
promArgs = append(promArgs, monitoringv1.Argument{Name: retentionTimeFlagName, Value: retentionTimeFlagValue})
}
if retentionSize != "" {
retentionSizeFlag := monitoringv1.Argument{Name: "storage.tsdb.retention.size", Value: string(retentionSize)}
promArgs = cg.WithMinimumVersion("2.7.0").AppendCommandlineArgument(promArgs, retentionSizeFlag)
}
promArgs = append(promArgs,
monitoringv1.Argument{Name: "storage.tsdb.path", Value: prompkg.StorageDir},
)
if enableAdminAPI {
promArgs = append(promArgs, monitoringv1.Argument{Name: "web.enable-admin-api"})
}
if rules.Alert.ForOutageTolerance != "" {
promArgs = cg.WithMinimumVersion("2.4.0").AppendCommandlineArgument(promArgs, monitoringv1.Argument{Name: "rules.alert.for-outage-tolerance", Value: rules.Alert.ForOutageTolerance})
}
if rules.Alert.ForGracePeriod != "" {
promArgs = cg.WithMinimumVersion("2.4.0").AppendCommandlineArgument(promArgs, monitoringv1.Argument{Name: "rules.alert.for-grace-period", Value: rules.Alert.ForGracePeriod})
}
if rules.Alert.ResendDelay != "" {
promArgs = cg.WithMinimumVersion("2.4.0").AppendCommandlineArgument(promArgs, monitoringv1.Argument{Name: "rules.alert.resend-delay", Value: rules.Alert.ResendDelay})
}
if query != nil {
if query.LookbackDelta != nil {
promArgs = append(promArgs, monitoringv1.Argument{Name: "query.lookback-delta", Value: *query.LookbackDelta})
}
if query.MaxSamples != nil && *query.MaxSamples > 0 {
promArgs = cg.WithMinimumVersion("2.5.0").AppendCommandlineArgument(promArgs, monitoringv1.Argument{Name: "query.max-samples", Value: fmt.Sprintf("%d", *query.MaxSamples)})
}
if query.MaxConcurrency != nil && *query.MaxConcurrency > 1 {
promArgs = append(promArgs, monitoringv1.Argument{Name: "query.max-concurrency", Value: fmt.Sprintf("%d", *query.MaxConcurrency)})
}
if query.Timeout != nil {
promArgs = append(promArgs, monitoringv1.Argument{Name: "query.timeout", Value: string(*query.Timeout)})
}
}
if allowOverlappingBlocks {
promArgs = cg.WithMinimumVersion("2.11.0").WithMaximumVersion("2.39.0").AppendCommandlineArgument(promArgs, monitoringv1.Argument{Name: "storage.tsdb.allow-overlapping-blocks"})
}
return promArgs
}
// appendServerVolumes returns a set of volumes to be mounted on the statefulset spec that are specific to Prometheus Server
func appendServerVolumes(volumes []v1.Volume, volumeMounts []v1.VolumeMount, queryLogFile string, ruleConfigMapNames []string) ([]v1.Volume, []v1.VolumeMount) {
if volume, ok := queryLogFileVolume(queryLogFile); ok {
volumes = append(volumes, volume)
}
for _, name := range ruleConfigMapNames {
volumes = append(volumes, v1.Volume{
Name: name,
VolumeSource: v1.VolumeSource{
ConfigMap: &v1.ConfigMapVolumeSource{
LocalObjectReference: v1.LocalObjectReference{
Name: name,
},
},
},
})
}
for _, name := range ruleConfigMapNames {
volumeMounts = append(volumeMounts, v1.VolumeMount{
Name: name,
MountPath: prompkg.RulesDir + "/" + name,
})
}
if vmount, ok := queryLogFileVolumeMount(queryLogFile); ok {
volumeMounts = append(volumeMounts, vmount)
}
return volumes, volumeMounts
}
func createThanosContainer(
disableCompaction *bool,
p monitoringv1.PrometheusInterface,
thanos *monitoringv1.ThanosSpec,
c *operator.Config,
prometheusURIScheme, webRoutePrefix string) (*v1.Container, error) {
var container *v1.Container
cpf := p.GetCommonPrometheusFields()
if thanos != nil {
thanosImage, err := operator.BuildImagePath(
operator.StringPtrValOrDefault(thanos.Image, ""),
operator.StringPtrValOrDefault(thanos.BaseImage, c.ThanosDefaultBaseImage),
operator.StringPtrValOrDefault(thanos.Version, operator.DefaultThanosVersion),
operator.StringPtrValOrDefault(thanos.Tag, ""),
operator.StringPtrValOrDefault(thanos.SHA, ""),
)
if err != nil {
return nil, errors.Wrap(err, "failed to build image path")
}
var grpcBindAddress, httpBindAddress string
if thanos.ListenLocal || thanos.GRPCListenLocal {
grpcBindAddress = "127.0.0.1"
}
if thanos.ListenLocal || thanos.HTTPListenLocal {
httpBindAddress = "127.0.0.1"
}
thanosArgs := []monitoringv1.Argument{
{Name: "prometheus.url", Value: fmt.Sprintf("%s://%s:9090%s", prometheusURIScheme, c.LocalHost, path.Clean(webRoutePrefix))},
{Name: "prometheus.http-client", Value: `{"tls_config": {"insecure_skip_verify":true}}`},
{Name: "grpc-address", Value: fmt.Sprintf("%s:10901", grpcBindAddress)},
{Name: "http-address", Value: fmt.Sprintf("%s:10902", httpBindAddress)},
}
if thanos.GRPCServerTLSConfig != nil {
tls := thanos.GRPCServerTLSConfig
if tls.CertFile != "" {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "grpc-server-tls-cert", Value: tls.CertFile})
}
if tls.KeyFile != "" {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "grpc-server-tls-key", Value: tls.KeyFile})
}
if tls.CAFile != "" {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "grpc-server-tls-client-ca", Value: tls.CAFile})
}
}
boolFalse := false
boolTrue := true
container = &v1.Container{
Name: "thanos-sidecar",
Image: thanosImage,
ImagePullPolicy: cpf.ImagePullPolicy,
TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
SecurityContext: &v1.SecurityContext{
AllowPrivilegeEscalation: &boolFalse,
ReadOnlyRootFilesystem: &boolTrue,
Capabilities: &v1.Capabilities{
Drop: []v1.Capability{"ALL"},
},
},
Ports: []v1.ContainerPort{
{
Name: "http",
ContainerPort: 10902,
},
{
Name: "grpc",
ContainerPort: 10901,
},
},
Resources: thanos.Resources,
}
for _, thanosSideCarVM := range thanos.VolumeMounts {
container.VolumeMounts = append(container.VolumeMounts, v1.VolumeMount{
Name: thanosSideCarVM.Name,
MountPath: thanosSideCarVM.MountPath,
})
}
if thanos.ObjectStorageConfig != nil || thanos.ObjectStorageConfigFile != nil {
if thanos.ObjectStorageConfigFile != nil {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "objstore.config-file", Value: *thanos.ObjectStorageConfigFile})
} else {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "objstore.config", Value: "$(OBJSTORE_CONFIG)"})
container.Env = append(container.Env, v1.EnvVar{
Name: "OBJSTORE_CONFIG",
ValueFrom: &v1.EnvVarSource{
SecretKeyRef: thanos.ObjectStorageConfig,
},
})
}
volName := prompkg.VolumeName(p)
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "tsdb.path", Value: prompkg.StorageDir})
container.VolumeMounts = append(
container.VolumeMounts,
v1.VolumeMount{
Name: volName,
MountPath: prompkg.StorageDir,
SubPath: prompkg.SubPathForStorage(cpf.Storage),
},
)
// NOTE(bwplotka): As described in https://thanos.io/components/sidecar.md/ we have to turn off compaction of Prometheus
// to avoid races during upload, if the uploads are configured.
*disableCompaction = true
}
if thanos.TracingConfig != nil || len(thanos.TracingConfigFile) > 0 {
if len(thanos.TracingConfigFile) > 0 {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "tracing.config-file", Value: thanos.TracingConfigFile})
} else {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "tracing.config", Value: "$(TRACING_CONFIG)"})
container.Env = append(container.Env, v1.EnvVar{
Name: "TRACING_CONFIG",
ValueFrom: &v1.EnvVarSource{
SecretKeyRef: thanos.TracingConfig,
},
})
}
}
if thanos.LogLevel != "" {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "log.level", Value: thanos.LogLevel})
} else if cpf.LogLevel != "" {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "log.level", Value: cpf.LogLevel})
}
if thanos.LogFormat != "" {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "log.format", Value: thanos.LogFormat})
} else if cpf.LogFormat != "" {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "log.format", Value: cpf.LogFormat})
}
if thanos.MinTime != "" {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "min-time", Value: thanos.MinTime})
}
if thanos.ReadyTimeout != "" {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "prometheus.ready_timeout", Value: string(thanos.ReadyTimeout)})
}
thanosVersion, err := semver.ParseTolerant(operator.StringPtrValOrDefault(thanos.Version, operator.DefaultThanosVersion))
if err != nil {
return nil, errors.Wrap(err, "failed to parse Thanos version")
}
if thanos.GetConfigTimeout != "" && thanosVersion.GTE(semver.MustParse("0.29.0")) {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "prometheus.get_config_timeout", Value: string(thanos.GetConfigTimeout)})
}
if thanos.GetConfigInterval != "" && thanosVersion.GTE(semver.MustParse("0.29.0")) {
thanosArgs = append(thanosArgs, monitoringv1.Argument{Name: "prometheus.get_config_interval", Value: string(thanos.GetConfigInterval)})
}
containerArgs, err := operator.BuildArgs(thanosArgs, thanos.AdditionalArgs)
if err != nil {
return nil, err
}
container.Args = append([]string{"sidecar"}, containerArgs...)
}
return container, nil
}
func queryLogFileVolumeMount(queryLogFile string) (v1.VolumeMount, bool) {
if !prompkg.UsesDefaultQueryLogVolume(queryLogFile) {
return v1.VolumeMount{}, false
}
return v1.VolumeMount{
Name: defaultQueryLogVolume,
ReadOnly: false,
MountPath: prompkg.DefaultQueryLogDirectory,
}, true
}
func queryLogFileVolume(queryLogFile string) (v1.Volume, bool) {
if !prompkg.UsesDefaultQueryLogVolume(queryLogFile) {
return v1.Volume{}, false
}
return v1.Volume{
Name: defaultQueryLogVolume,
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
},
}, true
}