1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-15 04:57:56 +00:00

Merge pull request #1535 from marquiz/devel/grpc-probe

nfd-master: run a separate gRPC health server
This commit is contained in:
Kubernetes Prow Robot 2024-01-05 15:24:28 +01:00 committed by GitHub
commit 4501bedd61
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 56 additions and 15 deletions

View file

@ -33,6 +33,7 @@ import (
const (
// ProgramName is the canonical name of this program
ProgramName = "nfd-master"
GrpcHealthPort = 8082
)
func main() {
@ -100,6 +101,7 @@ func main() {
utils.ConfigureGrpcKlog()
// Get new NfdMaster instance
args.GrpcHealthPort = GrpcHealthPort
instance, err := master.NewNfdMaster(args)
if err != nil {
klog.ErrorS(err, "failed to initialize NfdMaster instance")

View file

@ -23,12 +23,12 @@ spec:
imagePullPolicy: Always
livenessProbe:
grpc:
port: 8080
port: 8082
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
grpc:
port: 8080
port: 8082
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 10
@ -37,5 +37,3 @@ spec:
ports:
- name: metrics
containerPort: 8081
- name: grpc
containerPort: 8080

View file

@ -43,12 +43,12 @@ spec:
imagePullPolicy: {{ .Values.image.pullPolicy }}
livenessProbe:
grpc:
port: 8080
port: 8082
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
grpc:
port: 8080
port: 8082
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 10

View file

@ -116,6 +116,9 @@ type Args struct {
CrdController bool
EnableNodeFeatureApi bool
Port int
// GrpcHealthPort is only needed to avoid races between tests (by skipping the health server).
// Could be removed when gRPC labler service is dropped (when nfd-worker tests stop running nfd-master).
GrpcHealthPort int
Prune bool
VerifyNodeName bool
Options string
@ -144,6 +147,7 @@ type nfdMaster struct {
nodeName string
configFilePath string
server *grpc.Server
healthServer *grpc.Server
stop chan struct{}
ready chan bool
apihelper apihelper.APIHelpers
@ -270,7 +274,11 @@ func (m *nfdMaster) Run() error {
// Run gRPC server
grpcErr := make(chan error, 1)
// If the NodeFeature API is enabled, don'tregister the labeler API
// server. Otherwise, register the labeler server.
if !m.args.EnableNodeFeatureApi {
go m.runGrpcServer(grpcErr)
}
// Run updater that handles events from the nfd CRD API.
if m.nfdController != nil {
@ -281,6 +289,13 @@ func (m *nfdMaster) Run() error {
}
}
// Start gRPC server for liveness probe (at this point we're "live")
if m.args.GrpcHealthPort != 0 {
if err := m.startGrpcHealthServer(grpcErr); err != nil {
return fmt.Errorf("failed to start gRPC health server: %w", err)
}
}
// Notify that we're ready to accept connections
m.ready <- true
close(m.ready)
@ -323,6 +338,32 @@ func (m *nfdMaster) Run() error {
}
}
// startGrpcHealthServer starts a gRPC health server for Kubernetes readiness/liveness probes.
// TODO: improve status checking e.g. with watchdog in the main event loop and
// cheking that node updater pool is alive.
func (m *nfdMaster) startGrpcHealthServer(errChan chan<- error) error {
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", m.args.GrpcHealthPort))
if err != nil {
return fmt.Errorf("failed to listen: %w", err)
}
s := grpc.NewServer()
grpc_health_v1.RegisterHealthServer(s, health.NewServer())
klog.InfoS("gRPC health server serving", "port", m.args.GrpcHealthPort)
go func() {
defer func() {
lis.Close()
}()
if err := s.Serve(lis); err != nil {
errChan <- fmt.Errorf("gRPC health server exited with an error: %w", err)
}
klog.InfoS("gRPC health server stopped")
}()
m.healthServer = s
return nil
}
func (m *nfdMaster) runGrpcServer(errChan chan<- error) {
// Create server listening for TCP connections
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", m.args.Port))
@ -352,13 +393,8 @@ func (m *nfdMaster) runGrpcServer(errChan chan<- error) {
}
m.server = grpc.NewServer(serverOpts...)
// If the NodeFeature API is enabled, don'tregister the labeler API
// server. Otherwise, register the labeler server.
if !m.args.EnableNodeFeatureApi {
pb.RegisterLabelerServer(m.server, m)
}
grpc_health_v1.RegisterHealthServer(m.server, health.NewServer())
klog.InfoS("gRPC server serving", "port", m.args.Port)
// Run gRPC server
@ -421,7 +457,12 @@ func (m *nfdMaster) nfdAPIUpdateHandler() {
// Stop NfdMaster
func (m *nfdMaster) Stop() {
if m.server != nil {
m.server.GracefulStop()
}
if m.healthServer != nil {
m.healthServer.GracefulStop()
}
if m.nfdController != nil {
m.nfdController.stop()