1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2024-12-14 11:57:51 +00:00

Merge pull request #1913 from tobiasgiese/helm-probes-fix

Add separate helm values for the liveness and readiness probes
This commit is contained in:
Kubernetes Prow Robot 2024-10-23 12:34:52 +01:00 committed by GitHub
commit 83ba1ffba9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 282 additions and 150 deletions

View file

@ -48,9 +48,38 @@ spec:
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
livenessProbe:
{{- toYaml .Values.master.livenessProbe | nindent 12 }}
grpc:
port: {{ .Values.master.healthPort | default "8082" }}
{{- with .Values.master.livenessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.master.livenessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.master.livenessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.master.livenessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
readinessProbe:
{{- toYaml .Values.master.readinessProbe | nindent 12 }}
grpc:
port: {{ .Values.master.healthPort | default "8082" }}
{{- with .Values.master.readinessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.master.readinessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.master.readinessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.master.readinessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
{{- with .Values.master.readinessProbe.successThreshold }}
successThreshold: {{ . }}
{{- end }}
ports:
- containerPort: {{ .Values.master.port | default "8080" }}
name: grpc

View file

@ -45,9 +45,38 @@ spec:
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
livenessProbe:
{{- toYaml .Values.topologyUpdater.livenessProbe | nindent 10 }}
grpc:
port: {{ .Values.topologyUpdater.healthPort | default "8082" }}
{{- with .Values.topologyUpdater.livenessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.livenessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.livenessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.livenessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
readinessProbe:
{{- toYaml .Values.topologyUpdater.readinessProbe | nindent 10 }}
grpc:
port: {{ .Values.topologyUpdater.healthPort | default "8082" }}
{{- with .Values.topologyUpdater.readinessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.readinessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.readinessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.readinessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.readinessProbe.successThreshold }}
successThreshold: {{ . }}
{{- end }}
env:
- name: NODE_NAME
valueFrom:

View file

@ -47,9 +47,38 @@ spec:
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
livenessProbe:
{{- toYaml .Values.worker.livenessProbe | nindent 12 }}
grpc:
port: {{ .Values.worker.healthPort | default "8082" }}
{{- with .Values.worker.livenessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.worker.livenessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.worker.livenessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.worker.livenessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
readinessProbe:
{{- toYaml .Values.worker.readinessProbe | nindent 12 }}
grpc:
port: {{ .Values.worker.healthPort | default "8082" }}
{{- with .Values.worker.readinessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.worker.readinessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.worker.readinessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.worker.readinessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
{{- with .Values.worker.readinessProbe.successThreshold }}
successThreshold: {{ . }}
{{- end }}
env:
- name: NODE_NAME
valueFrom:

View file

@ -150,12 +150,15 @@ master:
initialDelaySeconds: 10
# failureThreshold: 3
# periodSeconds: 10
# timeoutSeconds: 1
readinessProbe:
grpc:
port: 8082
initialDelaySeconds: 5
failureThreshold: 10
# periodSeconds: 10
# timeoutSeconds: 1
# successThreshold: 1
worker:
enable: true
@ -426,12 +429,15 @@ worker:
initialDelaySeconds: 10
# failureThreshold: 3
# periodSeconds: 10
# timeoutSeconds: 1
readinessProbe:
grpc:
port: 8082
initialDelaySeconds: 5
failureThreshold: 10
# periodSeconds: 10
# timeoutSeconds: 1
# successThreshold: 1
serviceAccount:
# Specifies whether a service account should be created.
@ -520,12 +526,15 @@ topologyUpdater:
initialDelaySeconds: 10
# failureThreshold: 3
# periodSeconds: 10
# timeoutSeconds: 1
readinessProbe:
grpc:
port: 8082
initialDelaySeconds: 5
failureThreshold: 10
# periodSeconds: 10
# timeoutSeconds: 1
# successThreshold: 1
resources:
limits:

View file

@ -5,9 +5,11 @@ sort: 3
---
# Deployment with Helm
{: .no_toc}
## Table of contents
{: .no_toc .text-delta}
1. TOC
@ -71,7 +73,8 @@ To upgrade the `node-feature-discovery` deployment to {{ site.release }} via Hel
### From v0.7 and older
Please see the [uninstallation guide](https://kubernetes-sigs.github.io/node-feature-discovery/v0.7/get-started/deployment-and-usage.html#uninstallation).
Please see
the [uninstallation guide](https://kubernetes-sigs.github.io/node-feature-discovery/v0.7/get-started/deployment-and-usage.html#uninstallation).
And then follow the standard [deployment instructions](#deployment).
### From v0.8 - v0.11
@ -148,11 +151,11 @@ Chart parameters are available.
### General parameters
| Name | Type | Default | Description |
| --------------------------------------------------- | ------ | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|-----------------------------------------------------|--------|-----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `image.repository` | string | `{{ site.container_image \| split: ":" \| first }}` | NFD image repository |
| `image.tag` | string | `{{ site.release }}` | NFD image tag |
| `image.pullPolicy` | string | `Always` | Image pull policy |
| `imagePullSecrets` | array | [] | ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. If specified, these secrets will be passed to individual puller implementations for them to use. For example, in the case of docker, only DockerConfig type secrets are honored. [More info](https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod). |
| `imagePullSecrets` | array | [] | ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. [More info](https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod). |
| `nameOverride` | string | | Override the name of the chart |
| `fullnameOverride` | string | | Override a default fully qualified app name |
| `tls.enable` | bool | false | Specifies whether to use TLS for communications between components. **NOTE**: this parameter is related to the deprecated gRPC API and will be removed with it in a future release. |
@ -174,13 +177,13 @@ API's you need to install the prometheus operator in your cluster.
### Master pod parameters
| Name | Type | Default | Description |
| ----------------------------------- | ------- | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|---------------------------------------------|---------|----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `master.*` | dict | | NFD master deployment configuration |
| `master.enable` | bool | true | Specifies whether nfd-master should be deployed |
| `master.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace |
| `master.port` | integer | | Specifies the TCP port that nfd-master listens for incoming requests. **NOTE**: this parameter is related to the deprecated gRPC API and will be removed with it in a future release |
| `master.metricsPort` | integer | 8081 | Port on which to expose metrics from components to prometheus operator |
| `master.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint |
| `master.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint, will be also used for the probes |
| `master.instance` | string | | Instance name. Used to separate annotation namespaces for multiple parallel deployments |
| `master.resyncPeriod` | string | | NFD API controller resync period. |
| `master.extraLabelNs` | array | [] | List of allowed extra label namespaces |
@ -198,7 +201,7 @@ API's you need to install the prometheus operator in your cluster.
| `master.service.type` | string | ClusterIP | NFD master service type. **NOTE**: this parameter is related to the deprecated gRPC API and will be removed with it in a future release |
| `master.service.port` | integer | 8080 | NFD master service port. **NOTE**: this parameter is related to the deprecated gRPC API and will be removed with it in a future release |
| `master.resources.limits` | dict | {memory: 4Gi} | NFD master pod [resources limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits) |
| `master.resources.requests` | dict | {cpu: 100m, memory: 128Mi} | NFD master pod [resources requests](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits). You may want to use the same value for `requests.memory` and `limits.memory`. The “requests” value affects scheduling to accommodate pods on nodes. If there is a large difference between “requests” and “limits” and nodes experience memory pressure, the kernel may invoke the OOM Killer, even if the memory does not exceed the “limits” threshold. This can cause unexpected pod evictions. Memory cannot be compressed and once allocated to a pod, it can only be reclaimed by killing the pod. [Natan Yellin 22/09/2022](https://home.robusta.dev/blog/kubernetes-memory-limit) that discusses this issue. |
| `master.resources.requests` | dict | {cpu: 100m, memory: 128Mi} | NFD master pod [resources requests](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits). See `[0]` for more info |
| `master.tolerations` | dict | _Schedule to control-plane node_ | NFD master pod [tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) |
| `master.annotations` | dict | {} | NFD master pod [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) |
| `master.affinity` | dict | | NFD master pod required [node affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) |
@ -208,18 +211,36 @@ API's you need to install the prometheus operator in your cluster.
| `master.extraArgs` | array | [] | Additional [command line arguments](../reference/master-commandline-reference.md) to pass to nfd-master |
| `master.extraEnvs` | array | [] | Additional environment variables to pass to nfd-master |
| `master.revisionHistoryLimit` | integer | | Specify how many old ReplicaSets for this Deployment you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#revision-history-limit) |
| `master.livenessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":10} | NFD master pod [liveness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#liveness-probe) |
| `master.readinessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":5,"failureThreshold": 10} | NFD master pod [readiness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#readiness-probe)|
| `master.livenessProbe.initialDelaySeconds` | integer | 10 | Specifies the number of seconds after the container has started before liveness probes are initiated. |
| `master.livenessProbe.failureThreshold` | integer | 3 (by Kubernetes) | Specifies the number of consecutive failures of liveness probes before considering the pod as not ready. |
| `master.livenessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the liveness probe. |
| `master.livenessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `master.readinessProbe.initialDelaySeconds` | integer | 5 | Specifies the number of seconds after the container has started before readiness probes are initiated. |
| `master.readinessProbe.failureThreshold` | integer | 10 | Specifies the number of consecutive failures of readiness probes before considering the pod as not ready. |
| `master.readinessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the readiness probe. |
| `master.readinessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `master.readinessProbe.successThreshold` | integer | 1 (by Kubernetes) | Specifies the number of consecutive successes of readiness probes before considering the pod as ready. |
> `[0]` Additional info for `master.resources.requests`: \
> You may want to use the same value for `requests.memory` and `limits.memory`.
> The “requests” value affects scheduling to accommodate pods on nodes.
> If there is a large difference between “requests” and “limits” and nodes
> experience memory pressure, the kernel may invoke the OOM Killer, even if
> the memory does not exceed the “limits” threshold.
> This can cause unexpected pod evictions. Memory cannot be compressed and
> once allocated to a pod, it can only be reclaimed by killing the pod.
> [Natan Yellin 22/09/2022](https://home.robusta.dev/blog/kubernetes-memory-limit)
> that discusses this issue.
### Worker pod parameters
| Name | Type | Default | Description |
| ----------------------------------- | ------ | ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|---------------------------------------------|---------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `worker.*` | dict | | NFD worker daemonset configuration |
| `worker.enable` | bool | true | Specifies whether nfd-worker should be deployed |
| `worker.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace |
| `worker.metricsPort` | int | 8081 | Port on which to expose metrics from components to prometheus operator |
| `worker.healthPort` | int | 8082 | Port on which to expose the grpc health endpoint |
| `worker.healthPort` | int | 8082 | Port on which to expose the grpc health endpoint, will be also used for the probes |
| `worker.config` | dict | | NFD worker [configuration](../reference/worker-configuration-reference) |
| `worker.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settins |
| `worker.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) |
@ -238,13 +259,20 @@ API's you need to install the prometheus operator in your cluster.
| `worker.extraArgs` | array | [] | Additional [command line arguments](../reference/worker-commandline-reference.md) to pass to nfd-worker |
| `worker.extraEnvs` | array | [] | Additional environment variables to pass to nfd-worker |
| `worker.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/ #DaemonSetSpec) |
| `worker.livenessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":10} | NFD worker pod [liveness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#liveness-probe) |
| `worker.readinessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":5,"failureThreshold": 10} | NFD worker pod [readiness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#readiness-probe)|
| `worker.livenessProbe.initialDelaySeconds` | integer | 10 | Specifies the number of seconds after the container has started before liveness probes are initiated. |
| `worker.livenessProbe.failureThreshold` | integer | 3 (by Kubernetes) | Specifies the number of consecutive failures of liveness probes before considering the pod as not ready. |
| `worker.livenessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the liveness probe. |
| `worker.livenessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `worker.readinessProbe.initialDelaySeconds` | integer | 5 | Specifies the number of seconds after the container has started before readiness probes are initiated. |
| `worker.readinessProbe.failureThreshold` | integer | 10 | Specifies the number of consecutive failures of readiness probes before considering the pod as not ready. |
| `worker.readinessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the readiness probe. |
| `worker.readinessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `worker.readinessProbe.successThreshold` | integer | 1 (by Kubernetes) | Specifies the number of consecutive successes of readiness probes before considering the pod as ready. |
### Topology updater parameters
| Name | Type | Default | Description |
| --------------------------------------------- | ------- | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|------------------------------------------------------|---------|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `topologyUpdater.*` | dict | | NFD Topology Updater configuration |
| `topologyUpdater.enable` | bool | false | Specifies whether the NFD Topology Updater should be created |
| `topologyUpdater.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace |
@ -254,7 +282,7 @@ API's you need to install the prometheus operator in your cluster.
| `topologyUpdater.serviceAccount.name` | string | | The name of the service account for topology updater to use. If not set and create is true, a name is generated using the fullname template and `-topology-updater` suffix |
| `topologyUpdater.rbac.create` | bool | true | Specifies whether to create [RBAC][rbac] configuration for topology updater |
| `topologyUpdater.metricsPort` | integer | 8081 | Port on which to expose prometheus metrics |
| `topologyUpdater.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint |
| `topologyUpdater.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint, will be also used for the probes |
| `topologyUpdater.kubeletConfigPath` | string | "" | Specifies the kubelet config host path |
| `topologyUpdater.kubeletPodResourcesSockPath` | string | "" | Specifies the kubelet sock path to read pod resources |
| `topologyUpdater.updateInterval` | string | 60s | Time to sleep between CR updates. Non-positive value implies no CR update. |
@ -274,13 +302,20 @@ API's you need to install the prometheus operator in your cluster.
| `topologyUpdater.extraArgs` | array | [] | Additional [command line arguments](../reference/topology-updater-commandline-reference.md) to pass to nfd-topology-updater |
| `topologyUpdater.extraEnvs` | array | [] | Additional environment variables to pass to nfd-topology-updater |
| `topologyUpdater.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/#DaemonSetSpec) |
| `topologyUpdater.livenessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":10} | Topology updater pod [liveness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#liveness-probe) |
| `topologyUpdater.readinessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":5,"failureThreshold": 10} | Topology updater pod [readiness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#readiness-probe)|
| `topologyUpdater.livenessProbe.initialDelaySeconds` | integer | 10 | Specifies the number of seconds after the container has started before liveness probes are initiated. |
| `topologyUpdater.livenessProbe.failureThreshold` | integer | 3 (by Kubernetes) | Specifies the number of consecutive failures of liveness probes before considering the pod as not ready. |
| `topologyUpdater.livenessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the liveness probe. |
| `topologyUpdater.livenessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `topologyUpdater.readinessProbe.initialDelaySeconds` | integer | 5 | Specifies the number of seconds after the container has started before readiness probes are initiated. |
| `topologyUpdater.readinessProbe.failureThreshold` | integer | 10 | Specifies the number of consecutive failures of readiness probes before considering the pod as not ready. |
| `topologyUpdater.readinessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the readiness probe. |
| `topologyUpdater.readinessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `topologyUpdater.readinessProbe.successThreshold` | integer | 1 (by Kubernetes) | Specifies the number of consecutive successes of readiness probes before considering the pod as ready. |
### Garbage collector parameters
| Name | Type | Default | Description |
| ------------------------------------- | ------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|---------------------------------|---------|---------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `gc.*` | dict | | NFD Garbage Collector configuration |
| `gc.enable` | bool | true | Specifies whether the NFD Garbage Collector should be created |
| `gc.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace |
@ -303,4 +338,5 @@ API's you need to install the prometheus operator in your cluster.
| `gc.revisionHistoryLimit` | integer | | Specify how many old ReplicaSets for this Deployment you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#revision-history-limit) |
<!-- Links -->
[rbac]: https://kubernetes.io/docs/reference/access-authn-authz/rbac/