1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2024-12-14 11:57:51 +00:00

Add separate helm values for the liveness and readiness probes

Signed-off-by: Tobias Giese <tgiese@nvidia.com>
This commit is contained in:
Tobias Giese 2024-10-18 12:42:42 +02:00
parent 901fbe2866
commit 52c2fc6498
No known key found for this signature in database
GPG key ID: 9BBE28558D698365
5 changed files with 226 additions and 109 deletions

View file

@ -48,9 +48,38 @@ spec:
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }}
livenessProbe: livenessProbe:
{{- toYaml .Values.master.livenessProbe | nindent 12 }} grpc:
port: {{ .Values.master.healthPort | default "8082" }}
{{- with .Values.master.livenessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.master.livenessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.master.livenessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.master.livenessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
readinessProbe: readinessProbe:
{{- toYaml .Values.master.readinessProbe | nindent 12 }} grpc:
port: {{ .Values.master.healthPort | default "8082" }}
{{- with .Values.master.readinessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.master.readinessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.master.readinessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.master.readinessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
{{- with .Values.master.readinessProbe.successThreshold }}
successThreshold: {{ . }}
{{- end }}
ports: ports:
- containerPort: {{ .Values.master.port | default "8080" }} - containerPort: {{ .Values.master.port | default "8080" }}
name: grpc name: grpc

View file

@ -45,9 +45,38 @@ spec:
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: "{{ .Values.image.pullPolicy }}" imagePullPolicy: "{{ .Values.image.pullPolicy }}"
livenessProbe: livenessProbe:
{{- toYaml .Values.topologyUpdater.livenessProbe | nindent 10 }} grpc:
port: {{ .Values.topologyUpdater.healthPort | default "8082" }}
{{- with .Values.topologyUpdater.livenessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.livenessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.livenessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.livenessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
readinessProbe: readinessProbe:
{{- toYaml .Values.topologyUpdater.readinessProbe | nindent 10 }} grpc:
port: {{ .Values.topologyUpdater.healthPort | default "8082" }}
{{- with .Values.topologyUpdater.readinessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.readinessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.readinessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.readinessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
{{- with .Values.topologyUpdater.readinessProbe.successThreshold }}
successThreshold: {{ . }}
{{- end }}
env: env:
- name: NODE_NAME - name: NODE_NAME
valueFrom: valueFrom:

View file

@ -47,9 +47,38 @@ spec:
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }}
livenessProbe: livenessProbe:
{{- toYaml .Values.worker.livenessProbe | nindent 12 }} grpc:
port: {{ .Values.worker.healthPort | default "8082" }}
{{- with .Values.worker.livenessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.worker.livenessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.worker.livenessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.worker.livenessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
readinessProbe: readinessProbe:
{{- toYaml .Values.worker.readinessProbe | nindent 12 }} grpc:
port: {{ .Values.worker.healthPort | default "8082" }}
{{- with .Values.worker.readinessProbe.initialDelaySeconds }}
initialDelaySeconds: {{ . }}
{{- end }}
{{- with .Values.worker.readinessProbe.failureThreshold }}
failureThreshold: {{ . }}
{{- end }}
{{- with .Values.worker.readinessProbe.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
{{- with .Values.worker.readinessProbe.timeoutSeconds }}
timeoutSeconds: {{ . }}
{{- end }}
{{- with .Values.worker.readinessProbe.successThreshold }}
successThreshold: {{ . }}
{{- end }}
env: env:
- name: NODE_NAME - name: NODE_NAME
valueFrom: valueFrom:

View file

@ -150,12 +150,15 @@ master:
initialDelaySeconds: 10 initialDelaySeconds: 10
# failureThreshold: 3 # failureThreshold: 3
# periodSeconds: 10 # periodSeconds: 10
# timeoutSeconds: 1
readinessProbe: readinessProbe:
grpc: grpc:
port: 8082 port: 8082
initialDelaySeconds: 5 initialDelaySeconds: 5
failureThreshold: 10 failureThreshold: 10
# periodSeconds: 10 # periodSeconds: 10
# timeoutSeconds: 1
# successThreshold: 1
worker: worker:
enable: true enable: true
@ -426,12 +429,15 @@ worker:
initialDelaySeconds: 10 initialDelaySeconds: 10
# failureThreshold: 3 # failureThreshold: 3
# periodSeconds: 10 # periodSeconds: 10
# timeoutSeconds: 1
readinessProbe: readinessProbe:
grpc: grpc:
port: 8082 port: 8082
initialDelaySeconds: 5 initialDelaySeconds: 5
failureThreshold: 10 failureThreshold: 10
# periodSeconds: 10 # periodSeconds: 10
# timeoutSeconds: 1
# successThreshold: 1
serviceAccount: serviceAccount:
# Specifies whether a service account should be created. # Specifies whether a service account should be created.
@ -520,12 +526,15 @@ topologyUpdater:
initialDelaySeconds: 10 initialDelaySeconds: 10
# failureThreshold: 3 # failureThreshold: 3
# periodSeconds: 10 # periodSeconds: 10
# timeoutSeconds: 1
readinessProbe: readinessProbe:
grpc: grpc:
port: 8082 port: 8082
initialDelaySeconds: 5 initialDelaySeconds: 5
failureThreshold: 10 failureThreshold: 10
# periodSeconds: 10 # periodSeconds: 10
# timeoutSeconds: 1
# successThreshold: 1
resources: resources:
limits: limits:

View file

@ -177,13 +177,13 @@ API's you need to install the prometheus operator in your cluster.
### Master pod parameters ### Master pod parameters
| Name | Type | Default | Description | | Name | Type | Default | Description |
|-------------------------------------|---------|-----------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |---------------------------------------------|---------|----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `master.*` | dict | | NFD master deployment configuration | | `master.*` | dict | | NFD master deployment configuration |
| `master.enable` | bool | true | Specifies whether nfd-master should be deployed | | `master.enable` | bool | true | Specifies whether nfd-master should be deployed |
| `master.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace | | `master.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace |
| `master.port` | integer | | Specifies the TCP port that nfd-master listens for incoming requests. **NOTE**: this parameter is related to the deprecated gRPC API and will be removed with it in a future release | | `master.port` | integer | | Specifies the TCP port that nfd-master listens for incoming requests. **NOTE**: this parameter is related to the deprecated gRPC API and will be removed with it in a future release |
| `master.metricsPort` | integer | 8081 | Port on which to expose metrics from components to prometheus operator | | `master.metricsPort` | integer | 8081 | Port on which to expose metrics from components to prometheus operator |
| `master.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint | | `master.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint, will be also used for the probes |
| `master.instance` | string | | Instance name. Used to separate annotation namespaces for multiple parallel deployments | | `master.instance` | string | | Instance name. Used to separate annotation namespaces for multiple parallel deployments |
| `master.resyncPeriod` | string | | NFD API controller resync period. | | `master.resyncPeriod` | string | | NFD API controller resync period. |
| `master.extraLabelNs` | array | [] | List of allowed extra label namespaces | | `master.extraLabelNs` | array | [] | List of allowed extra label namespaces |
@ -211,8 +211,15 @@ API's you need to install the prometheus operator in your cluster.
| `master.extraArgs` | array | [] | Additional [command line arguments](../reference/master-commandline-reference.md) to pass to nfd-master | | `master.extraArgs` | array | [] | Additional [command line arguments](../reference/master-commandline-reference.md) to pass to nfd-master |
| `master.extraEnvs` | array | [] | Additional environment variables to pass to nfd-master | | `master.extraEnvs` | array | [] | Additional environment variables to pass to nfd-master |
| `master.revisionHistoryLimit` | integer | | Specify how many old ReplicaSets for this Deployment you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#revision-history-limit) | | `master.revisionHistoryLimit` | integer | | Specify how many old ReplicaSets for this Deployment you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#revision-history-limit) |
| `master.livenessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":10} | NFD master pod [liveness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#liveness-probe) | | `master.livenessProbe.initialDelaySeconds` | integer | 10 | Specifies the number of seconds after the container has started before liveness probes are initiated. |
| `master.readinessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":5,"failureThreshold": 10} | NFD master pod [readiness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#readiness-probe) | | `master.livenessProbe.failureThreshold` | integer | 3 (by Kubernetes) | Specifies the number of consecutive failures of liveness probes before considering the pod as not ready. |
| `master.livenessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the liveness probe. |
| `master.livenessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `master.readinessProbe.initialDelaySeconds` | integer | 5 | Specifies the number of seconds after the container has started before readiness probes are initiated. |
| `master.readinessProbe.failureThreshold` | integer | 10 | Specifies the number of consecutive failures of readiness probes before considering the pod as not ready. |
| `master.readinessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the readiness probe. |
| `master.readinessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `master.readinessProbe.successThreshold` | integer | 1 (by Kubernetes) | Specifies the number of consecutive successes of readiness probes before considering the pod as ready. |
> `[0]` Additional info for `master.resources.requests`: \ > `[0]` Additional info for `master.resources.requests`: \
> You may want to use the same value for `requests.memory` and `limits.memory`. > You may want to use the same value for `requests.memory` and `limits.memory`.
@ -228,12 +235,12 @@ API's you need to install the prometheus operator in your cluster.
### Worker pod parameters ### Worker pod parameters
| Name | Type | Default | Description | | Name | Type | Default | Description |
|-------------------------------------|---------|-----------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |---------------------------------------------|---------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `worker.*` | dict | | NFD worker daemonset configuration | | `worker.*` | dict | | NFD worker daemonset configuration |
| `worker.enable` | bool | true | Specifies whether nfd-worker should be deployed | | `worker.enable` | bool | true | Specifies whether nfd-worker should be deployed |
| `worker.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace | | `worker.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace |
| `worker.metricsPort` | int | 8081 | Port on which to expose metrics from components to prometheus operator | | `worker.metricsPort` | int | 8081 | Port on which to expose metrics from components to prometheus operator |
| `worker.healthPort` | int | 8082 | Port on which to expose the grpc health endpoint | | `worker.healthPort` | int | 8082 | Port on which to expose the grpc health endpoint, will be also used for the probes |
| `worker.config` | dict | | NFD worker [configuration](../reference/worker-configuration-reference) | | `worker.config` | dict | | NFD worker [configuration](../reference/worker-configuration-reference) |
| `worker.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settins | | `worker.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settins |
| `worker.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) | | `worker.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) |
@ -252,13 +259,20 @@ API's you need to install the prometheus operator in your cluster.
| `worker.extraArgs` | array | [] | Additional [command line arguments](../reference/worker-commandline-reference.md) to pass to nfd-worker | | `worker.extraArgs` | array | [] | Additional [command line arguments](../reference/worker-commandline-reference.md) to pass to nfd-worker |
| `worker.extraEnvs` | array | [] | Additional environment variables to pass to nfd-worker | | `worker.extraEnvs` | array | [] | Additional environment variables to pass to nfd-worker |
| `worker.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/ #DaemonSetSpec) | | `worker.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/ #DaemonSetSpec) |
| `worker.livenessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":10} | NFD worker pod [liveness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#liveness-probe) | | `worker.livenessProbe.initialDelaySeconds` | integer | 10 | Specifies the number of seconds after the container has started before liveness probes are initiated. |
| `worker.readinessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":5,"failureThreshold": 10} | NFD worker pod [readiness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#readiness-probe) | | `worker.livenessProbe.failureThreshold` | integer | 3 (by Kubernetes) | Specifies the number of consecutive failures of liveness probes before considering the pod as not ready. |
| `worker.livenessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the liveness probe. |
| `worker.livenessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `worker.readinessProbe.initialDelaySeconds` | integer | 5 | Specifies the number of seconds after the container has started before readiness probes are initiated. |
| `worker.readinessProbe.failureThreshold` | integer | 10 | Specifies the number of consecutive failures of readiness probes before considering the pod as not ready. |
| `worker.readinessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the readiness probe. |
| `worker.readinessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `worker.readinessProbe.successThreshold` | integer | 1 (by Kubernetes) | Specifies the number of consecutive successes of readiness probes before considering the pod as ready. |
### Topology updater parameters ### Topology updater parameters
| Name | Type | Default | Description | | Name | Type | Default | Description |
|-----------------------------------------------|---------|-----------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |------------------------------------------------------|---------|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `topologyUpdater.*` | dict | | NFD Topology Updater configuration | | `topologyUpdater.*` | dict | | NFD Topology Updater configuration |
| `topologyUpdater.enable` | bool | false | Specifies whether the NFD Topology Updater should be created | | `topologyUpdater.enable` | bool | false | Specifies whether the NFD Topology Updater should be created |
| `topologyUpdater.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace | | `topologyUpdater.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace |
@ -268,7 +282,7 @@ API's you need to install the prometheus operator in your cluster.
| `topologyUpdater.serviceAccount.name` | string | | The name of the service account for topology updater to use. If not set and create is true, a name is generated using the fullname template and `-topology-updater` suffix | | `topologyUpdater.serviceAccount.name` | string | | The name of the service account for topology updater to use. If not set and create is true, a name is generated using the fullname template and `-topology-updater` suffix |
| `topologyUpdater.rbac.create` | bool | true | Specifies whether to create [RBAC][rbac] configuration for topology updater | | `topologyUpdater.rbac.create` | bool | true | Specifies whether to create [RBAC][rbac] configuration for topology updater |
| `topologyUpdater.metricsPort` | integer | 8081 | Port on which to expose prometheus metrics | | `topologyUpdater.metricsPort` | integer | 8081 | Port on which to expose prometheus metrics |
| `topologyUpdater.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint | | `topologyUpdater.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint, will be also used for the probes |
| `topologyUpdater.kubeletConfigPath` | string | "" | Specifies the kubelet config host path | | `topologyUpdater.kubeletConfigPath` | string | "" | Specifies the kubelet config host path |
| `topologyUpdater.kubeletPodResourcesSockPath` | string | "" | Specifies the kubelet sock path to read pod resources | | `topologyUpdater.kubeletPodResourcesSockPath` | string | "" | Specifies the kubelet sock path to read pod resources |
| `topologyUpdater.updateInterval` | string | 60s | Time to sleep between CR updates. Non-positive value implies no CR update. | | `topologyUpdater.updateInterval` | string | 60s | Time to sleep between CR updates. Non-positive value implies no CR update. |
@ -288,8 +302,15 @@ API's you need to install the prometheus operator in your cluster.
| `topologyUpdater.extraArgs` | array | [] | Additional [command line arguments](../reference/topology-updater-commandline-reference.md) to pass to nfd-topology-updater | | `topologyUpdater.extraArgs` | array | [] | Additional [command line arguments](../reference/topology-updater-commandline-reference.md) to pass to nfd-topology-updater |
| `topologyUpdater.extraEnvs` | array | [] | Additional environment variables to pass to nfd-topology-updater | | `topologyUpdater.extraEnvs` | array | [] | Additional environment variables to pass to nfd-topology-updater |
| `topologyUpdater.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/#DaemonSetSpec) | | `topologyUpdater.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/#DaemonSetSpec) |
| `topologyUpdater.livenessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":10} | Topology updater pod [liveness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#liveness-probe) | | `topologyUpdater.livenessProbe.initialDelaySeconds` | integer | 10 | Specifies the number of seconds after the container has started before liveness probes are initiated. |
| `topologyUpdater.readinessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":5,"failureThreshold": 10} | Topology updater pod [readiness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#readiness-probe) | | `topologyUpdater.livenessProbe.failureThreshold` | integer | 3 (by Kubernetes) | Specifies the number of consecutive failures of liveness probes before considering the pod as not ready. |
| `topologyUpdater.livenessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the liveness probe. |
| `topologyUpdater.livenessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `topologyUpdater.readinessProbe.initialDelaySeconds` | integer | 5 | Specifies the number of seconds after the container has started before readiness probes are initiated. |
| `topologyUpdater.readinessProbe.failureThreshold` | integer | 10 | Specifies the number of consecutive failures of readiness probes before considering the pod as not ready. |
| `topologyUpdater.readinessProbe.periodSeconds` | integer | 10 (by Kubernetes) | Specifies how often (in seconds) to perform the readiness probe. |
| `topologyUpdater.readinessProbe.timeoutSeconds` | integer | 1 (by Kubernetes) | Specifies the number of seconds after which the probe times out. |
| `topologyUpdater.readinessProbe.successThreshold` | integer | 1 (by Kubernetes) | Specifies the number of consecutive successes of readiness probes before considering the pod as ready. |
### Garbage collector parameters ### Garbage collector parameters