mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2024-12-14 11:57:51 +00:00
nfd-worker: Add liveness probe
Signed-off-by: Oleg Zhurakivskyy <oleg.zhurakivskyy@intel.com>
This commit is contained in:
parent
35cc81969f
commit
8b63d17af7
5 changed files with 82 additions and 1 deletions
|
@ -32,7 +32,8 @@ import (
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// ProgramName is the canonical name of this program
|
// ProgramName is the canonical name of this program
|
||||||
ProgramName = "nfd-worker"
|
ProgramName = "nfd-worker"
|
||||||
|
GrpcHealthPort = 8082
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -81,6 +82,7 @@ func main() {
|
||||||
utils.ConfigureGrpcKlog()
|
utils.ConfigureGrpcKlog()
|
||||||
|
|
||||||
// Get new NfdWorker instance
|
// Get new NfdWorker instance
|
||||||
|
args.GrpcHealthPort = GrpcHealthPort
|
||||||
instance, err := worker.NewNfdWorker(args)
|
instance, err := worker.NewNfdWorker(args)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "failed to initialize NfdWorker instance")
|
klog.ErrorS(err, "failed to initialize NfdWorker instance")
|
||||||
|
|
|
@ -19,6 +19,17 @@ spec:
|
||||||
- name: nfd-worker
|
- name: nfd-worker
|
||||||
image: gcr.io/k8s-staging-nfd/node-feature-discovery:master
|
image: gcr.io/k8s-staging-nfd/node-feature-discovery:master
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
|
livenessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 10
|
||||||
command:
|
command:
|
||||||
- "nfd-worker"
|
- "nfd-worker"
|
||||||
args:
|
args:
|
||||||
|
|
|
@ -43,6 +43,17 @@ spec:
|
||||||
{{- toYaml .Values.worker.securityContext | nindent 12 }}
|
{{- toYaml .Values.worker.securityContext | nindent 12 }}
|
||||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||||
|
livenessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 10
|
||||||
env:
|
env:
|
||||||
- name: NODE_NAME
|
- name: NODE_NAME
|
||||||
valueFrom:
|
valueFrom:
|
||||||
|
|
|
@ -394,6 +394,20 @@ worker:
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
# runAsUser: 1000
|
# runAsUser: 1000
|
||||||
|
|
||||||
|
# livenessProbe: {}
|
||||||
|
## NOTE: Currently not configurable, defaults are provided for the sake of extra documentation.
|
||||||
|
# grpc:
|
||||||
|
# port: 8082
|
||||||
|
# initialDelaySeconds: 10
|
||||||
|
# periodSeconds: 10
|
||||||
|
# readinessProbe: {}
|
||||||
|
## NOTE: Currently not configurable, defaults are provided for the sake of extra documentation.
|
||||||
|
# grpc:
|
||||||
|
# port: 8082
|
||||||
|
# initialDelaySeconds: 5
|
||||||
|
# periodSeconds: 10
|
||||||
|
# failureThreshold: 10
|
||||||
|
|
||||||
serviceAccount:
|
serviceAccount:
|
||||||
# Specifies whether a service account should be created.
|
# Specifies whether a service account should be created.
|
||||||
# We create this by default to make it easier for downstream users to apply PodSecurityPolicies.
|
# We create this by default to make it easier for downstream users to apply PodSecurityPolicies.
|
||||||
|
|
|
@ -21,6 +21,7 @@ import (
|
||||||
"crypto/x509"
|
"crypto/x509"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
@ -33,6 +34,8 @@ import (
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
"google.golang.org/grpc/credentials"
|
"google.golang.org/grpc/credentials"
|
||||||
"google.golang.org/grpc/credentials/insecure"
|
"google.golang.org/grpc/credentials/insecure"
|
||||||
|
"google.golang.org/grpc/health"
|
||||||
|
"google.golang.org/grpc/health/grpc_health_v1"
|
||||||
"k8s.io/apimachinery/pkg/api/errors"
|
"k8s.io/apimachinery/pkg/api/errors"
|
||||||
"k8s.io/apimachinery/pkg/types"
|
"k8s.io/apimachinery/pkg/types"
|
||||||
"k8s.io/apimachinery/pkg/util/validation"
|
"k8s.io/apimachinery/pkg/util/validation"
|
||||||
|
@ -104,6 +107,7 @@ type Args struct {
|
||||||
Server string
|
Server string
|
||||||
ServerNameOverride string
|
ServerNameOverride string
|
||||||
MetricsPort int
|
MetricsPort int
|
||||||
|
GrpcHealthPort int
|
||||||
|
|
||||||
Overrides ConfigOverrideArgs
|
Overrides ConfigOverrideArgs
|
||||||
}
|
}
|
||||||
|
@ -124,6 +128,7 @@ type nfdWorker struct {
|
||||||
config *NFDConfig
|
config *NFDConfig
|
||||||
kubernetesNamespace string
|
kubernetesNamespace string
|
||||||
grpcClient pb.LabelerClient
|
grpcClient pb.LabelerClient
|
||||||
|
healthServer *grpc.Server
|
||||||
nfdClient *nfdclient.Clientset
|
nfdClient *nfdclient.Clientset
|
||||||
stop chan struct{} // channel for signaling stop
|
stop chan struct{} // channel for signaling stop
|
||||||
featureSources []source.FeatureSource
|
featureSources []source.FeatureSource
|
||||||
|
@ -187,6 +192,29 @@ func (i *infiniteTicker) Reset(d time.Duration) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (w *nfdWorker) startGrpcHealthServer(errChan chan<- error) error {
|
||||||
|
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", w.args.GrpcHealthPort))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to listen: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
s := grpc.NewServer()
|
||||||
|
grpc_health_v1.RegisterHealthServer(s, health.NewServer())
|
||||||
|
klog.InfoS("gRPC health server serving", "port", w.args.GrpcHealthPort)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer func() {
|
||||||
|
lis.Close()
|
||||||
|
}()
|
||||||
|
if err := s.Serve(lis); err != nil {
|
||||||
|
errChan <- fmt.Errorf("gRPC health server exited with an error: %w", err)
|
||||||
|
}
|
||||||
|
klog.InfoS("gRPC health server stopped")
|
||||||
|
}()
|
||||||
|
w.healthServer = s
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Run feature discovery.
|
// Run feature discovery.
|
||||||
func (w *nfdWorker) runFeatureDiscovery() error {
|
func (w *nfdWorker) runFeatureDiscovery() error {
|
||||||
discoveryStart := time.Now()
|
discoveryStart := time.Now()
|
||||||
|
@ -262,8 +290,20 @@ func (w *nfdWorker) Run() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grpcErr := make(chan error, 1)
|
||||||
|
|
||||||
|
// Start gRPC server for liveness probe (at this point we're "live")
|
||||||
|
if w.args.GrpcHealthPort != 0 {
|
||||||
|
if err := w.startGrpcHealthServer(grpcErr); err != nil {
|
||||||
|
return fmt.Errorf("failed to start gRPC health server: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
case err := <-grpcErr:
|
||||||
|
return fmt.Errorf("error in serving gRPC: %w", err)
|
||||||
|
|
||||||
case <-labelTrigger.C:
|
case <-labelTrigger.C:
|
||||||
err = w.runFeatureDiscovery()
|
err = w.runFeatureDiscovery()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -294,6 +334,9 @@ func (w *nfdWorker) Run() error {
|
||||||
|
|
||||||
case <-w.stop:
|
case <-w.stop:
|
||||||
klog.InfoS("shutting down nfd-worker")
|
klog.InfoS("shutting down nfd-worker")
|
||||||
|
if w.healthServer != nil {
|
||||||
|
w.healthServer.GracefulStop()
|
||||||
|
}
|
||||||
configWatch.Close()
|
configWatch.Close()
|
||||||
w.certWatch.Close()
|
w.certWatch.Close()
|
||||||
return nil
|
return nil
|
||||||
|
|
Loading…
Reference in a new issue