mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2025-03-15 04:57:56 +00:00
Merge pull request #1535 from marquiz/devel/grpc-probe
nfd-master: run a separate gRPC health server
This commit is contained in:
commit
4501bedd61
4 changed files with 56 additions and 15 deletions
|
@ -32,7 +32,8 @@ import (
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// ProgramName is the canonical name of this program
|
// ProgramName is the canonical name of this program
|
||||||
ProgramName = "nfd-master"
|
ProgramName = "nfd-master"
|
||||||
|
GrpcHealthPort = 8082
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -100,6 +101,7 @@ func main() {
|
||||||
utils.ConfigureGrpcKlog()
|
utils.ConfigureGrpcKlog()
|
||||||
|
|
||||||
// Get new NfdMaster instance
|
// Get new NfdMaster instance
|
||||||
|
args.GrpcHealthPort = GrpcHealthPort
|
||||||
instance, err := master.NewNfdMaster(args)
|
instance, err := master.NewNfdMaster(args)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "failed to initialize NfdMaster instance")
|
klog.ErrorS(err, "failed to initialize NfdMaster instance")
|
||||||
|
|
|
@ -23,12 +23,12 @@ spec:
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
grpc:
|
grpc:
|
||||||
port: 8080
|
port: 8082
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
grpc:
|
grpc:
|
||||||
port: 8080
|
port: 8082
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
failureThreshold: 10
|
failureThreshold: 10
|
||||||
|
@ -37,5 +37,3 @@ spec:
|
||||||
ports:
|
ports:
|
||||||
- name: metrics
|
- name: metrics
|
||||||
containerPort: 8081
|
containerPort: 8081
|
||||||
- name: grpc
|
|
||||||
containerPort: 8080
|
|
||||||
|
|
|
@ -43,12 +43,12 @@ spec:
|
||||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
grpc:
|
grpc:
|
||||||
port: 8080
|
port: 8082
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
grpc:
|
grpc:
|
||||||
port: 8080
|
port: 8082
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
failureThreshold: 10
|
failureThreshold: 10
|
||||||
|
|
|
@ -116,6 +116,9 @@ type Args struct {
|
||||||
CrdController bool
|
CrdController bool
|
||||||
EnableNodeFeatureApi bool
|
EnableNodeFeatureApi bool
|
||||||
Port int
|
Port int
|
||||||
|
// GrpcHealthPort is only needed to avoid races between tests (by skipping the health server).
|
||||||
|
// Could be removed when gRPC labler service is dropped (when nfd-worker tests stop running nfd-master).
|
||||||
|
GrpcHealthPort int
|
||||||
Prune bool
|
Prune bool
|
||||||
VerifyNodeName bool
|
VerifyNodeName bool
|
||||||
Options string
|
Options string
|
||||||
|
@ -144,6 +147,7 @@ type nfdMaster struct {
|
||||||
nodeName string
|
nodeName string
|
||||||
configFilePath string
|
configFilePath string
|
||||||
server *grpc.Server
|
server *grpc.Server
|
||||||
|
healthServer *grpc.Server
|
||||||
stop chan struct{}
|
stop chan struct{}
|
||||||
ready chan bool
|
ready chan bool
|
||||||
apihelper apihelper.APIHelpers
|
apihelper apihelper.APIHelpers
|
||||||
|
@ -270,7 +274,11 @@ func (m *nfdMaster) Run() error {
|
||||||
|
|
||||||
// Run gRPC server
|
// Run gRPC server
|
||||||
grpcErr := make(chan error, 1)
|
grpcErr := make(chan error, 1)
|
||||||
go m.runGrpcServer(grpcErr)
|
// If the NodeFeature API is enabled, don'tregister the labeler API
|
||||||
|
// server. Otherwise, register the labeler server.
|
||||||
|
if !m.args.EnableNodeFeatureApi {
|
||||||
|
go m.runGrpcServer(grpcErr)
|
||||||
|
}
|
||||||
|
|
||||||
// Run updater that handles events from the nfd CRD API.
|
// Run updater that handles events from the nfd CRD API.
|
||||||
if m.nfdController != nil {
|
if m.nfdController != nil {
|
||||||
|
@ -281,6 +289,13 @@ func (m *nfdMaster) Run() error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start gRPC server for liveness probe (at this point we're "live")
|
||||||
|
if m.args.GrpcHealthPort != 0 {
|
||||||
|
if err := m.startGrpcHealthServer(grpcErr); err != nil {
|
||||||
|
return fmt.Errorf("failed to start gRPC health server: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Notify that we're ready to accept connections
|
// Notify that we're ready to accept connections
|
||||||
m.ready <- true
|
m.ready <- true
|
||||||
close(m.ready)
|
close(m.ready)
|
||||||
|
@ -323,6 +338,32 @@ func (m *nfdMaster) Run() error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// startGrpcHealthServer starts a gRPC health server for Kubernetes readiness/liveness probes.
|
||||||
|
// TODO: improve status checking e.g. with watchdog in the main event loop and
|
||||||
|
// cheking that node updater pool is alive.
|
||||||
|
func (m *nfdMaster) startGrpcHealthServer(errChan chan<- error) error {
|
||||||
|
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", m.args.GrpcHealthPort))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to listen: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
s := grpc.NewServer()
|
||||||
|
grpc_health_v1.RegisterHealthServer(s, health.NewServer())
|
||||||
|
klog.InfoS("gRPC health server serving", "port", m.args.GrpcHealthPort)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer func() {
|
||||||
|
lis.Close()
|
||||||
|
}()
|
||||||
|
if err := s.Serve(lis); err != nil {
|
||||||
|
errChan <- fmt.Errorf("gRPC health server exited with an error: %w", err)
|
||||||
|
}
|
||||||
|
klog.InfoS("gRPC health server stopped")
|
||||||
|
}()
|
||||||
|
m.healthServer = s
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (m *nfdMaster) runGrpcServer(errChan chan<- error) {
|
func (m *nfdMaster) runGrpcServer(errChan chan<- error) {
|
||||||
// Create server listening for TCP connections
|
// Create server listening for TCP connections
|
||||||
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", m.args.Port))
|
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", m.args.Port))
|
||||||
|
@ -352,13 +393,8 @@ func (m *nfdMaster) runGrpcServer(errChan chan<- error) {
|
||||||
}
|
}
|
||||||
m.server = grpc.NewServer(serverOpts...)
|
m.server = grpc.NewServer(serverOpts...)
|
||||||
|
|
||||||
// If the NodeFeature API is enabled, don'tregister the labeler API
|
pb.RegisterLabelerServer(m.server, m)
|
||||||
// server. Otherwise, register the labeler server.
|
|
||||||
if !m.args.EnableNodeFeatureApi {
|
|
||||||
pb.RegisterLabelerServer(m.server, m)
|
|
||||||
}
|
|
||||||
|
|
||||||
grpc_health_v1.RegisterHealthServer(m.server, health.NewServer())
|
|
||||||
klog.InfoS("gRPC server serving", "port", m.args.Port)
|
klog.InfoS("gRPC server serving", "port", m.args.Port)
|
||||||
|
|
||||||
// Run gRPC server
|
// Run gRPC server
|
||||||
|
@ -421,7 +457,12 @@ func (m *nfdMaster) nfdAPIUpdateHandler() {
|
||||||
|
|
||||||
// Stop NfdMaster
|
// Stop NfdMaster
|
||||||
func (m *nfdMaster) Stop() {
|
func (m *nfdMaster) Stop() {
|
||||||
m.server.GracefulStop()
|
if m.server != nil {
|
||||||
|
m.server.GracefulStop()
|
||||||
|
}
|
||||||
|
if m.healthServer != nil {
|
||||||
|
m.healthServer.GracefulStop()
|
||||||
|
}
|
||||||
|
|
||||||
if m.nfdController != nil {
|
if m.nfdController != nil {
|
||||||
m.nfdController.stop()
|
m.nfdController.stop()
|
||||||
|
|
Loading…
Add table
Reference in a new issue