mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2025-03-16 21:38:23 +00:00
Merge pull request #1643 from ozhuraki/topology-health
nfd-topology-updater: Add liveness probe
This commit is contained in:
commit
fcf819ad9f
5 changed files with 82 additions and 0 deletions
|
@ -36,6 +36,7 @@ const (
|
||||||
// ProgramName is the canonical name of this program
|
// ProgramName is the canonical name of this program
|
||||||
ProgramName = "nfd-topology-updater"
|
ProgramName = "nfd-topology-updater"
|
||||||
kubeletSecurePort = 10250
|
kubeletSecurePort = 10250
|
||||||
|
GrpcHealthPort = 8082
|
||||||
)
|
)
|
||||||
|
|
||||||
var DefaultKubeletStateDir = path.Join(string(hostpath.VarDir), "lib", "kubelet")
|
var DefaultKubeletStateDir = path.Join(string(hostpath.VarDir), "lib", "kubelet")
|
||||||
|
@ -54,6 +55,7 @@ func main() {
|
||||||
utils.ConfigureGrpcKlog()
|
utils.ConfigureGrpcKlog()
|
||||||
|
|
||||||
// Get new TopologyUpdater instance
|
// Get new TopologyUpdater instance
|
||||||
|
args.GrpcHealthPort = GrpcHealthPort
|
||||||
instance, err := topology.NewTopologyUpdater(*args, *resourcemonitorArgs)
|
instance, err := topology.NewTopologyUpdater(*args, *resourcemonitorArgs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "failed to initialize topology updater instance")
|
klog.ErrorS(err, "failed to initialize topology updater instance")
|
||||||
|
|
|
@ -19,6 +19,17 @@ spec:
|
||||||
- name: nfd-topology-updater
|
- name: nfd-topology-updater
|
||||||
image: gcr.io/k8s-staging-nfd/node-feature-discovery:master
|
image: gcr.io/k8s-staging-nfd/node-feature-discovery:master
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
|
livenessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 10
|
||||||
command:
|
command:
|
||||||
- "nfd-topology-updater"
|
- "nfd-topology-updater"
|
||||||
args: []
|
args: []
|
||||||
|
|
|
@ -41,6 +41,17 @@ spec:
|
||||||
- name: topology-updater
|
- name: topology-updater
|
||||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||||
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
|
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
|
||||||
|
livenessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
grpc:
|
||||||
|
port: 8082
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 10
|
||||||
env:
|
env:
|
||||||
- name: NODE_NAME
|
- name: NODE_NAME
|
||||||
valueFrom:
|
valueFrom:
|
||||||
|
|
|
@ -475,6 +475,20 @@ topologyUpdater:
|
||||||
readOnlyRootFilesystem: true
|
readOnlyRootFilesystem: true
|
||||||
runAsUser: 0
|
runAsUser: 0
|
||||||
|
|
||||||
|
# livenessProbe: {}
|
||||||
|
## NOTE: Currently not configurable, defaults are provided for the sake of extra documentation.
|
||||||
|
# grpc:
|
||||||
|
# port: 8082
|
||||||
|
# initialDelaySeconds: 10
|
||||||
|
# periodSeconds: 10
|
||||||
|
# readinessProbe: {}
|
||||||
|
## NOTE: Currently not configurable, defaults are provided for the sake of extra documentation.
|
||||||
|
# grpc:
|
||||||
|
# port: 8082
|
||||||
|
# initialDelaySeconds: 5
|
||||||
|
# periodSeconds: 10
|
||||||
|
# failureThreshold: 10
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
|
|
|
@ -18,12 +18,16 @@ package nfdtopologyupdater
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"golang.org/x/net/context"
|
"golang.org/x/net/context"
|
||||||
|
|
||||||
|
"google.golang.org/grpc"
|
||||||
|
"google.golang.org/grpc/health"
|
||||||
|
"google.golang.org/grpc/health/grpc_health_v1"
|
||||||
"k8s.io/apimachinery/pkg/api/errors"
|
"k8s.io/apimachinery/pkg/api/errors"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/types"
|
"k8s.io/apimachinery/pkg/types"
|
||||||
|
@ -58,6 +62,7 @@ type Args struct {
|
||||||
KubeConfigFile string
|
KubeConfigFile string
|
||||||
ConfigFile string
|
ConfigFile string
|
||||||
KubeletStateDir string
|
KubeletStateDir string
|
||||||
|
GrpcHealthPort int
|
||||||
|
|
||||||
Klog map[string]*utils.KlogFlagVal
|
Klog map[string]*utils.KlogFlagVal
|
||||||
}
|
}
|
||||||
|
@ -85,6 +90,7 @@ type nfdTopologyUpdater struct {
|
||||||
ownerRefs []metav1.OwnerReference
|
ownerRefs []metav1.OwnerReference
|
||||||
k8sClient k8sclient.Interface
|
k8sClient k8sclient.Interface
|
||||||
kubeletConfigFunc func() (*kubeletconfigv1beta1.KubeletConfiguration, error)
|
kubeletConfigFunc func() (*kubeletconfigv1beta1.KubeletConfiguration, error)
|
||||||
|
healthServer *grpc.Server
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewTopologyUpdater creates a new NfdTopologyUpdater instance.
|
// NewTopologyUpdater creates a new NfdTopologyUpdater instance.
|
||||||
|
@ -128,6 +134,29 @@ func (w *nfdTopologyUpdater) detectTopologyPolicyAndScope() (string, string, err
|
||||||
return klConfig.TopologyManagerPolicy, klConfig.TopologyManagerScope, nil
|
return klConfig.TopologyManagerPolicy, klConfig.TopologyManagerScope, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (w *nfdTopologyUpdater) startGrpcHealthServer(errChan chan<- error) error {
|
||||||
|
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", w.args.GrpcHealthPort))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to listen: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
s := grpc.NewServer()
|
||||||
|
grpc_health_v1.RegisterHealthServer(s, health.NewServer())
|
||||||
|
klog.InfoS("gRPC health server serving", "port", w.args.GrpcHealthPort)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer func() {
|
||||||
|
lis.Close()
|
||||||
|
}()
|
||||||
|
if err := s.Serve(lis); err != nil {
|
||||||
|
errChan <- fmt.Errorf("gRPC health server exited with an error: %w", err)
|
||||||
|
}
|
||||||
|
klog.InfoS("gRPC health server stopped")
|
||||||
|
}()
|
||||||
|
w.healthServer = s
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Run nfdTopologyUpdater. Returns if a fatal error is encountered, or, after
|
// Run nfdTopologyUpdater. Returns if a fatal error is encountered, or, after
|
||||||
// one request if OneShot is set to 'true' in the updater args.
|
// one request if OneShot is set to 'true' in the updater args.
|
||||||
func (w *nfdTopologyUpdater) Run() error {
|
func (w *nfdTopologyUpdater) Run() error {
|
||||||
|
@ -187,8 +216,20 @@ func (w *nfdTopologyUpdater) Run() error {
|
||||||
return fmt.Errorf("failed to obtain node resource information: %w", err)
|
return fmt.Errorf("failed to obtain node resource information: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grpcErr := make(chan error, 1)
|
||||||
|
|
||||||
|
// Start gRPC server for liveness probe (at this point we're "live")
|
||||||
|
if w.args.GrpcHealthPort != 0 {
|
||||||
|
if err := w.startGrpcHealthServer(grpcErr); err != nil {
|
||||||
|
return fmt.Errorf("failed to start gRPC health server: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
case err := <-grpcErr:
|
||||||
|
return fmt.Errorf("error in serving gRPC: %w", err)
|
||||||
|
|
||||||
case info := <-w.eventSource:
|
case info := <-w.eventSource:
|
||||||
klog.V(4).InfoS("event received, scanning...", "event", info.Event)
|
klog.V(4).InfoS("event received, scanning...", "event", info.Event)
|
||||||
scanResponse, err := resScan.Scan()
|
scanResponse, err := resScan.Scan()
|
||||||
|
@ -217,6 +258,9 @@ func (w *nfdTopologyUpdater) Run() error {
|
||||||
|
|
||||||
case <-w.stop:
|
case <-w.stop:
|
||||||
klog.InfoS("shutting down nfd-topology-updater")
|
klog.InfoS("shutting down nfd-topology-updater")
|
||||||
|
if w.healthServer != nil {
|
||||||
|
w.healthServer.GracefulStop()
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue