2019-02-08 21:43:54 +02:00
/ *
2021-02-19 15:43:31 +02:00
Copyright 2019 - 2021 The Kubernetes Authors .
2019-02-08 21:43:54 +02:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
2022-12-23 10:45:07 +02:00
package nfdworker
2019-02-08 21:43:54 +02:00
import (
2022-12-23 10:45:07 +02:00
"crypto/tls"
"crypto/x509"
2020-04-21 22:03:37 +03:00
"encoding/json"
2019-02-08 21:43:54 +02:00
"fmt"
2024-03-11 17:40:44 +02:00
"net"
2019-02-08 21:43:54 +02:00
"os"
2020-11-27 08:26:22 +02:00
"path/filepath"
2019-02-08 21:43:54 +02:00
"regexp"
2021-03-01 09:02:22 +02:00
"sort"
2019-02-08 21:43:54 +02:00
"strings"
"time"
2023-12-13 11:38:06 +02:00
"golang.org/x/exp/maps"
2019-02-08 21:43:54 +02:00
"golang.org/x/net/context"
2022-12-23 10:45:07 +02:00
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"
2024-03-11 17:40:44 +02:00
"google.golang.org/grpc/health"
"google.golang.org/grpc/health/grpc_health_v1"
2022-08-12 13:10:48 +03:00
"k8s.io/apimachinery/pkg/api/errors"
2023-12-08 13:41:01 +02:00
"k8s.io/apimachinery/pkg/types"
2019-02-08 21:43:54 +02:00
"k8s.io/apimachinery/pkg/util/validation"
2021-02-23 10:05:13 +02:00
"k8s.io/klog/v2"
2023-09-07 10:54:59 +01:00
klogutils "sigs.k8s.io/node-feature-discovery/pkg/utils/klog"
2021-02-19 15:43:31 +02:00
"sigs.k8s.io/yaml"
2022-08-12 13:10:48 +03:00
apiequality "k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/v1alpha1"
2024-03-14 19:23:07 +01:00
"sigs.k8s.io/node-feature-discovery/pkg/features"
2022-08-12 13:10:48 +03:00
nfdclient "sigs.k8s.io/node-feature-discovery/pkg/generated/clientset/versioned"
2019-02-08 21:43:54 +02:00
pb "sigs.k8s.io/node-feature-discovery/pkg/labeler"
2021-02-19 15:43:31 +02:00
"sigs.k8s.io/node-feature-discovery/pkg/utils"
2019-02-08 21:43:54 +02:00
"sigs.k8s.io/node-feature-discovery/pkg/version"
"sigs.k8s.io/node-feature-discovery/source"
2021-03-01 09:02:22 +02:00
// Register all source packages
_ "sigs.k8s.io/node-feature-discovery/source/cpu"
_ "sigs.k8s.io/node-feature-discovery/source/custom"
_ "sigs.k8s.io/node-feature-discovery/source/fake"
_ "sigs.k8s.io/node-feature-discovery/source/kernel"
_ "sigs.k8s.io/node-feature-discovery/source/local"
_ "sigs.k8s.io/node-feature-discovery/source/memory"
_ "sigs.k8s.io/node-feature-discovery/source/network"
_ "sigs.k8s.io/node-feature-discovery/source/pci"
_ "sigs.k8s.io/node-feature-discovery/source/storage"
_ "sigs.k8s.io/node-feature-discovery/source/system"
_ "sigs.k8s.io/node-feature-discovery/source/usb"
2019-02-08 21:43:54 +02:00
)
2022-12-23 10:45:07 +02:00
// NfdWorker is the interface for nfd-worker daemon
type NfdWorker interface {
Run ( ) error
Stop ( )
}
2022-01-19 12:42:06 +05:30
// NFDConfig contains the configuration settings of NfdWorker.
2019-02-08 21:43:54 +02:00
type NFDConfig struct {
2020-11-27 10:19:31 +02:00
Core coreConfig
2020-04-21 22:03:37 +03:00
Sources sourcesConfig
2019-02-08 21:43:54 +02:00
}
2020-11-27 10:19:31 +02:00
type coreConfig struct {
2023-09-07 10:54:59 +01:00
Klog klogutils . KlogConfigOpts
2021-02-19 15:43:31 +02:00
LabelWhiteList utils . RegexpVal
2020-12-01 14:54:59 +02:00
NoPublish bool
2021-08-27 12:52:24 +03:00
FeatureSources [ ] string
2021-11-25 11:24:09 +02:00
Sources * [ ] string
2021-11-25 10:58:08 +02:00
LabelSources [ ] string
2023-04-15 16:11:59 +01:00
SleepInterval utils . DurationVal
2020-11-27 10:19:31 +02:00
}
2020-04-21 22:03:37 +03:00
type sourcesConfig map [ string ] source . Config
2019-02-08 21:43:54 +02:00
// Labels are a Kubernetes representation of discovered features.
type Labels map [ string ] string
2022-01-19 12:42:06 +05:30
// Args are the command line arguments of NfdWorker.
2019-02-08 21:43:54 +02:00
type Args struct {
2024-03-14 19:23:07 +01:00
CaFile string
CertFile string
ConfigFile string
KeyFile string
Klog map [ string ] * utils . KlogFlagVal
Kubeconfig string
Oneshot bool
Options string
Server string
ServerNameOverride string
MetricsPort int
2024-03-11 17:40:44 +02:00
GrpcHealthPort int
2021-02-19 15:43:31 +02:00
Overrides ConfigOverrideArgs
}
// ConfigOverrideArgs are args that override config file options
type ConfigOverrideArgs struct {
NoPublish * bool
2021-12-03 09:22:43 +02:00
FeatureSources * utils . StringSliceVal
2021-11-25 10:58:08 +02:00
LabelSources * utils . StringSliceVal
2019-02-08 21:43:54 +02:00
}
type nfdWorker struct {
2022-08-24 12:01:38 +03:00
args Args
certWatch * utils . FsWatcher
2022-12-23 10:45:07 +02:00
clientConn * grpc . ClientConn
2022-08-24 12:01:38 +03:00
configFilePath string
config * NFDConfig
kubernetesNamespace string
grpcClient pb . LabelerClient
2024-03-11 17:40:44 +02:00
healthServer * grpc . Server
2022-08-12 13:10:48 +03:00
nfdClient * nfdclient . Clientset
2022-08-24 12:01:38 +03:00
stop chan struct { } // channel for signaling stop
featureSources [ ] source . FeatureSource
labelSources [ ] source . LabelSource
2020-12-01 14:54:59 +02:00
}
2023-02-02 16:54:09 +02:00
// This ticker can represent infinite and normal intervals.
type infiniteTicker struct {
* time . Ticker
}
2022-01-19 12:42:06 +05:30
// NewNfdWorker creates new NfdWorker instance.
2022-12-23 10:45:07 +02:00
func NewNfdWorker ( args * Args ) ( NfdWorker , error ) {
2020-04-21 19:40:01 +03:00
nfd := & nfdWorker {
2022-08-24 12:01:38 +03:00
args : * args ,
config : & NFDConfig { } ,
kubernetesNamespace : utils . GetKubernetesNamespace ( ) ,
stop : make ( chan struct { } , 1 ) ,
2020-04-21 19:40:01 +03:00
}
2022-12-23 10:45:07 +02:00
// Check TLS related args
if args . CertFile != "" || args . KeyFile != "" || args . CaFile != "" {
if args . CertFile == "" {
return nfd , fmt . Errorf ( "-cert-file needs to be specified alongside -key-file and -ca-file" )
}
if args . KeyFile == "" {
return nfd , fmt . Errorf ( "-key-file needs to be specified alongside -cert-file and -ca-file" )
}
if args . CaFile == "" {
return nfd , fmt . Errorf ( "-ca-file needs to be specified alongside -cert-file and -key-file" )
}
}
2020-11-27 10:19:31 +02:00
if args . ConfigFile != "" {
nfd . configFilePath = filepath . Clean ( args . ConfigFile )
}
2019-02-08 21:43:54 +02:00
return nfd , nil
}
2020-11-27 10:19:31 +02:00
func newDefaultConfig ( ) * NFDConfig {
return & NFDConfig {
2020-12-01 14:27:47 +02:00
Core : coreConfig {
2021-02-19 15:43:31 +02:00
LabelWhiteList : utils . RegexpVal { Regexp : * regexp . MustCompile ( "" ) } ,
2023-04-15 16:11:59 +01:00
SleepInterval : utils . DurationVal { Duration : 60 * time . Second } ,
2021-08-27 12:52:24 +03:00
FeatureSources : [ ] string { "all" } ,
2021-11-25 10:58:08 +02:00
LabelSources : [ ] string { "all" } ,
2021-02-23 20:42:17 +02:00
Klog : make ( map [ string ] string ) ,
2020-12-01 14:27:47 +02:00
} ,
2020-11-27 10:19:31 +02:00
}
}
2023-02-02 16:54:09 +02:00
func ( i * infiniteTicker ) Reset ( d time . Duration ) {
switch {
case d > 0 :
i . Ticker . Reset ( d )
default :
// If the sleep interval is not a positive number the ticker will act
// as if it was set to an infinite duration by not ticking.
i . Ticker . Stop ( )
}
}
2024-03-11 17:40:44 +02:00
func ( w * nfdWorker ) startGrpcHealthServer ( errChan chan <- error ) error {
lis , err := net . Listen ( "tcp" , fmt . Sprintf ( ":%d" , w . args . GrpcHealthPort ) )
if err != nil {
return fmt . Errorf ( "failed to listen: %w" , err )
}
s := grpc . NewServer ( )
grpc_health_v1 . RegisterHealthServer ( s , health . NewServer ( ) )
klog . InfoS ( "gRPC health server serving" , "port" , w . args . GrpcHealthPort )
go func ( ) {
defer func ( ) {
lis . Close ( )
} ( )
if err := s . Serve ( lis ) ; err != nil {
errChan <- fmt . Errorf ( "gRPC health server exited with an error: %w" , err )
}
klog . InfoS ( "gRPC health server stopped" )
} ( )
w . healthServer = s
return nil
}
2023-02-02 16:54:09 +02:00
// Run feature discovery.
func ( w * nfdWorker ) runFeatureDiscovery ( ) error {
2023-02-10 14:32:26 +02:00
discoveryStart := time . Now ( )
2023-02-02 16:54:09 +02:00
for _ , s := range w . featureSources {
2023-02-10 14:32:26 +02:00
currentSourceStart := time . Now ( )
2023-02-02 16:54:09 +02:00
if err := s . Discover ( ) ; err != nil {
2023-05-03 11:04:08 +03:00
klog . ErrorS ( err , "feature discovery failed" , "source" , s . Name ( ) )
2023-02-02 16:54:09 +02:00
}
2023-05-03 11:04:08 +03:00
klog . V ( 3 ) . InfoS ( "feature discovery completed" , "featureSource" , s . Name ( ) , "duration" , time . Since ( currentSourceStart ) )
2023-02-10 14:32:26 +02:00
}
discoveryDuration := time . Since ( discoveryStart )
2023-05-03 11:04:08 +03:00
klog . V ( 2 ) . InfoS ( "feature discovery of all sources completed" , "duration" , discoveryDuration )
2023-06-06 16:39:02 +02:00
featureDiscoveryDuration . WithLabelValues ( utils . NodeName ( ) ) . Observe ( discoveryDuration . Seconds ( ) )
2023-02-10 14:32:26 +02:00
if w . config . Core . SleepInterval . Duration > 0 && discoveryDuration > w . config . Core . SleepInterval . Duration / 2 {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "feature discovery sources took over half of sleep interval " , "duration" , discoveryDuration , "sleepInterval" , w . config . Core . SleepInterval . Duration )
2023-02-02 16:54:09 +02:00
}
// Get the set of feature labels.
labels := createFeatureLabels ( w . labelSources , w . config . Core . LabelWhiteList . Regexp )
// Update the node with the feature labels.
if ! w . config . Core . NoPublish {
return w . advertiseFeatures ( labels )
}
return nil
}
2019-02-08 21:43:54 +02:00
// Run NfdWorker client. Returns if a fatal error is encountered, or, after
// one request if OneShot is set to 'true' in the worker args.
func ( w * nfdWorker ) Run ( ) error {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "Node Feature Discovery Worker" , "version" , version . Get ( ) , "nodeName" , utils . NodeName ( ) , "namespace" , w . kubernetesNamespace )
2019-02-08 21:43:54 +02:00
2020-11-27 10:19:31 +02:00
// Create watcher for config file and read initial configuration
2021-02-16 21:14:22 +02:00
configWatch , err := utils . CreateFsWatcher ( time . Second , w . configFilePath )
2019-08-27 23:27:40 +03:00
if err != nil {
2020-11-27 10:19:31 +02:00
return err
2019-08-27 23:27:40 +03:00
}
2020-11-30 17:23:28 +02:00
if err := w . configure ( w . configFilePath , w . args . Options ) ; err != nil {
return err
}
2019-08-27 23:27:40 +03:00
2021-02-18 14:18:30 +02:00
// Create watcher for TLS certificates
w . certWatch , err = utils . CreateFsWatcher ( time . Second , w . args . CaFile , w . args . CertFile , w . args . KeyFile )
if err != nil {
return err
}
2022-12-23 10:45:07 +02:00
defer w . grpcDisconnect ( )
2020-11-27 08:26:22 +02:00
2023-02-02 16:54:09 +02:00
// Create ticker for feature discovery and run feature discovery once before the loop.
labelTrigger := infiniteTicker { Ticker : time . NewTicker ( 1 ) }
labelTrigger . Reset ( w . config . Core . SleepInterval . Duration )
defer labelTrigger . Stop ( )
2021-03-01 18:39:49 +02:00
2023-06-06 16:39:02 +02:00
// Register to metrics server
if w . args . MetricsPort > 0 {
2023-10-06 17:35:48 +03:00
m := utils . CreateMetricsServer ( w . args . MetricsPort ,
buildInfo ,
featureDiscoveryDuration )
go m . Run ( )
2023-06-06 16:39:02 +02:00
registerVersion ( version . Get ( ) )
2023-10-06 17:35:48 +03:00
defer m . Stop ( )
2023-06-06 16:39:02 +02:00
}
2023-02-02 16:54:09 +02:00
err = w . runFeatureDiscovery ( )
if err != nil {
return err
}
2019-08-27 23:27:40 +03:00
2023-02-02 16:54:09 +02:00
// Only run feature disovery once if Oneshot is set to 'true'.
if w . args . Oneshot {
return nil
}
2019-08-27 23:27:40 +03:00
2024-03-11 17:40:44 +02:00
grpcErr := make ( chan error , 1 )
// Start gRPC server for liveness probe (at this point we're "live")
if w . args . GrpcHealthPort != 0 {
if err := w . startGrpcHealthServer ( grpcErr ) ; err != nil {
return fmt . Errorf ( "failed to start gRPC health server: %w" , err )
}
}
2023-02-02 16:54:09 +02:00
for {
select {
2024-03-11 17:40:44 +02:00
case err := <- grpcErr :
return fmt . Errorf ( "error in serving gRPC: %w" , err )
2023-02-02 16:54:09 +02:00
case <- labelTrigger . C :
err = w . runFeatureDiscovery ( )
if err != nil {
return err
2020-11-26 14:15:39 +02:00
}
2020-11-27 08:26:22 +02:00
2021-02-16 21:14:22 +02:00
case <- configWatch . Events :
2023-05-03 11:04:08 +03:00
klog . InfoS ( "reloading configuration" )
2020-11-30 17:23:28 +02:00
if err := w . configure ( w . configFilePath , w . args . Options ) ; err != nil {
return err
}
2020-11-27 10:19:31 +02:00
// Manage connection to master
2024-03-14 19:23:07 +01:00
if w . config . Core . NoPublish || ! features . NFDFeatureGate . Enabled ( features . NodeFeatureAPI ) {
2022-12-23 10:45:07 +02:00
w . grpcDisconnect ( )
2020-11-27 10:19:31 +02:00
}
2022-08-12 16:35:59 +03:00
2020-11-27 18:02:45 +02:00
// Always re-label after a re-config event. This way the new config
// comes into effect even if the sleep interval is long (or infinite)
2023-02-02 16:54:09 +02:00
labelTrigger . Reset ( w . config . Core . SleepInterval . Duration )
err = w . runFeatureDiscovery ( )
if err != nil {
return err
}
2021-02-11 21:38:13 +02:00
2021-02-18 14:18:30 +02:00
case <- w . certWatch . Events :
2023-05-03 11:04:08 +03:00
klog . InfoS ( "TLS certificate update, renewing connection to nfd-master" )
2022-12-23 10:45:07 +02:00
w . grpcDisconnect ( )
2021-02-18 14:18:30 +02:00
2021-02-11 21:38:13 +02:00
case <- w . stop :
2023-05-03 11:04:08 +03:00
klog . InfoS ( "shutting down nfd-worker" )
2024-03-11 17:40:44 +02:00
if w . healthServer != nil {
w . healthServer . GracefulStop ( )
}
2021-02-11 21:38:13 +02:00
configWatch . Close ( )
2021-02-18 14:18:30 +02:00
w . certWatch . Close ( )
2021-02-11 21:38:13 +02:00
return nil
2019-08-27 23:27:40 +03:00
}
}
}
2021-02-11 21:38:13 +02:00
// Stop NfdWorker
func ( w * nfdWorker ) Stop ( ) {
2024-03-14 15:17:45 +01:00
close ( w . stop )
2021-02-11 21:38:13 +02:00
}
2022-08-12 16:35:59 +03:00
// getGrpcClient returns client connection to the NFD gRPC server. It creates a
// connection if one hasn't yet been established,.
func ( w * nfdWorker ) getGrpcClient ( ) ( pb . LabelerClient , error ) {
if w . grpcClient != nil {
return w . grpcClient , nil
2019-08-27 23:27:40 +03:00
}
2022-12-23 10:45:07 +02:00
// Check that if a connection already exists
if w . clientConn != nil {
return nil , fmt . Errorf ( "client connection already exists" )
}
// Dial and create a client
dialCtx , cancel := context . WithTimeout ( context . Background ( ) , 60 * time . Second )
defer cancel ( )
dialOpts := [ ] grpc . DialOption { grpc . WithBlock ( ) }
if w . args . CaFile != "" || w . args . CertFile != "" || w . args . KeyFile != "" {
// Load client cert for client authentication
cert , err := tls . LoadX509KeyPair ( w . args . CertFile , w . args . KeyFile )
if err != nil {
return nil , fmt . Errorf ( "failed to load client certificate: %v" , err )
}
// Load CA cert for server cert verification
caCert , err := os . ReadFile ( w . args . CaFile )
if err != nil {
return nil , fmt . Errorf ( "failed to read root certificate file: %v" , err )
}
caPool := x509 . NewCertPool ( )
if ok := caPool . AppendCertsFromPEM ( caCert ) ; ! ok {
return nil , fmt . Errorf ( "failed to add certificate from '%s'" , w . args . CaFile )
}
// Create TLS config
tlsConfig := & tls . Config {
Certificates : [ ] tls . Certificate { cert } ,
RootCAs : caPool ,
ServerName : w . args . ServerNameOverride ,
MinVersion : tls . VersionTLS13 ,
}
dialOpts = append ( dialOpts , grpc . WithTransportCredentials ( credentials . NewTLS ( tlsConfig ) ) )
} else {
dialOpts = append ( dialOpts , grpc . WithTransportCredentials ( insecure . NewCredentials ( ) ) )
}
2023-05-03 11:04:08 +03:00
klog . InfoS ( "connecting to nfd-master" , "address" , w . args . Server )
2022-12-23 10:45:07 +02:00
conn , err := grpc . DialContext ( dialCtx , w . args . Server , dialOpts ... )
if err != nil {
2022-08-12 16:35:59 +03:00
return nil , err
2019-02-08 21:43:54 +02:00
}
2022-12-23 10:45:07 +02:00
w . clientConn = conn
2021-07-08 11:02:39 +03:00
2022-12-23 10:45:07 +02:00
w . grpcClient = pb . NewLabelerClient ( w . clientConn )
2019-02-08 21:43:54 +02:00
2022-08-12 16:35:59 +03:00
return w . grpcClient , nil
2019-08-27 23:27:40 +03:00
}
2019-02-08 21:43:54 +02:00
2022-12-23 10:45:07 +02:00
// grpcDisconnect closes the gRPC connection to NFD master
func ( w * nfdWorker ) grpcDisconnect ( ) {
if w . clientConn != nil {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "closing connection to nfd-master" )
2022-12-23 10:45:07 +02:00
w . clientConn . Close ( )
}
w . clientConn = nil
2022-08-12 12:27:20 +03:00
w . grpcClient = nil
2019-02-08 21:43:54 +02:00
}
2020-12-01 14:27:47 +02:00
func ( c * coreConfig ) sanitize ( ) {
if c . SleepInterval . Duration > 0 && c . SleepInterval . Duration < time . Second {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "too short sleep interval specified, forcing to 1s" ,
"sleepInterval" , c . SleepInterval . Duration . String ( ) )
2023-04-15 16:11:59 +01:00
c . SleepInterval = utils . DurationVal { Duration : time . Second }
2020-12-01 14:27:47 +02:00
}
}
2021-02-23 20:42:17 +02:00
func ( w * nfdWorker ) configureCore ( c coreConfig ) error {
// Handle klog
2023-09-07 10:54:59 +01:00
err := klogutils . MergeKlogConfiguration ( w . args . Klog , c . Klog )
if err != nil {
return err
2021-02-23 20:42:17 +02:00
}
2021-08-27 12:52:24 +03:00
// Determine enabled feature sources
featureSources := make ( map [ string ] source . FeatureSource )
for _ , name := range c . FeatureSources {
if name == "all" {
for n , s := range source . GetAllFeatureSources ( ) {
2021-11-26 11:22:39 +02:00
if ts , ok := s . ( source . SupplementalSource ) ; ! ok || ! ts . DisableByDefault ( ) {
2021-08-27 12:52:24 +03:00
featureSources [ n ] = s
}
}
} else {
disable := false
strippedName := name
if strings . HasPrefix ( name , "-" ) {
strippedName = name [ 1 : ]
disable = true
}
if s := source . GetFeatureSource ( strippedName ) ; s != nil {
if ! disable {
featureSources [ name ] = s
} else {
delete ( featureSources , strippedName )
}
} else {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "skipping unknown source specified in core.featureSources" , "featureSource" , name )
2021-08-27 12:52:24 +03:00
}
}
}
2023-12-13 11:38:06 +02:00
w . featureSources = maps . Values ( featureSources )
2021-08-27 12:52:24 +03:00
sort . Slice ( w . featureSources , func ( i , j int ) bool { return w . featureSources [ i ] . Name ( ) < w . featureSources [ j ] . Name ( ) } )
2021-08-27 12:21:16 +03:00
// Determine enabled label sources
labelSources := make ( map [ string ] source . LabelSource )
2021-11-25 10:58:08 +02:00
for _ , name := range c . LabelSources {
2021-03-01 09:02:22 +02:00
if name == "all" {
for n , s := range source . GetAllLabelSources ( ) {
2021-11-26 11:22:39 +02:00
if ts , ok := s . ( source . SupplementalSource ) ; ! ok || ! ts . DisableByDefault ( ) {
2021-08-27 12:21:16 +03:00
labelSources [ n ] = s
2021-03-01 09:02:22 +02:00
}
}
} else {
2021-11-25 10:22:20 +02:00
disable := false
strippedName := name
if strings . HasPrefix ( name , "-" ) {
strippedName = name [ 1 : ]
disable = true
}
if s := source . GetLabelSource ( strippedName ) ; s != nil {
if ! disable {
2021-08-27 12:21:16 +03:00
labelSources [ name ] = s
2021-11-25 10:22:20 +02:00
} else {
2021-08-27 12:21:16 +03:00
delete ( labelSources , strippedName )
2021-11-25 10:22:20 +02:00
}
2021-03-01 09:02:22 +02:00
} else {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "skipping unknown source specified in core.labelSources (or -label-sources)" , "labelSource" , name )
2021-03-01 09:02:22 +02:00
}
2020-12-01 15:53:04 +02:00
}
}
2023-12-13 11:38:06 +02:00
w . labelSources = maps . Values ( labelSources )
2021-03-01 09:02:22 +02:00
2021-08-27 12:21:16 +03:00
sort . Slice ( w . labelSources , func ( i , j int ) bool {
iP , jP := w . labelSources [ i ] . Priority ( ) , w . labelSources [ j ] . Priority ( )
2021-03-01 09:02:22 +02:00
if iP != jP {
return iP < jP
2020-12-01 15:53:04 +02:00
}
2021-08-27 12:21:16 +03:00
return w . labelSources [ i ] . Name ( ) < w . labelSources [ j ] . Name ( )
2021-03-01 09:02:22 +02:00
} )
2023-05-03 11:04:08 +03:00
if klogV := klog . V ( 1 ) ; klogV . Enabled ( ) {
2021-08-27 12:52:24 +03:00
n := make ( [ ] string , len ( w . featureSources ) )
for i , s := range w . featureSources {
n [ i ] = s . Name ( )
}
2023-05-03 11:04:08 +03:00
klogV . InfoS ( "enabled feature sources" , "featureSources" , n )
2021-08-27 12:52:24 +03:00
n = make ( [ ] string , len ( w . labelSources ) )
2021-08-27 12:21:16 +03:00
for i , s := range w . labelSources {
2021-03-01 09:02:22 +02:00
n [ i ] = s . Name ( )
2020-12-01 15:53:04 +02:00
}
2023-05-03 11:04:08 +03:00
klogV . InfoS ( "enabled label sources" , "labelSources" , n )
2020-12-01 15:53:04 +02:00
}
2021-03-01 09:02:22 +02:00
2021-02-23 20:42:17 +02:00
return nil
2020-12-01 15:53:04 +02:00
}
2019-02-08 21:43:54 +02:00
// Parse configuration options
2020-11-30 17:23:28 +02:00
func ( w * nfdWorker ) configure ( filepath string , overrides string ) error {
2020-04-21 22:03:37 +03:00
// Create a new default config
2020-11-27 10:19:31 +02:00
c := newDefaultConfig ( )
2021-03-01 09:02:22 +02:00
confSources := source . GetAllConfigurableSources ( )
c . Sources = make ( map [ string ] source . Config , len ( confSources ) )
for _ , s := range confSources {
2020-04-21 22:03:37 +03:00
c . Sources [ s . Name ( ) ] = s . NewConfig ( )
2019-02-08 21:43:54 +02:00
}
2020-04-21 22:03:37 +03:00
// Try to read and parse config file
2020-11-30 17:23:28 +02:00
if filepath != "" {
2022-09-08 13:23:49 +03:00
data , err := os . ReadFile ( filepath )
2020-04-21 22:03:37 +03:00
if err != nil {
2020-11-30 17:23:28 +02:00
if os . IsNotExist ( err ) {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "config file not found, using defaults" , "path" , filepath )
2020-11-30 17:23:28 +02:00
} else {
return fmt . Errorf ( "error reading config file: %s" , err )
}
2020-04-21 22:03:37 +03:00
} else {
2020-11-30 17:23:28 +02:00
err = yaml . Unmarshal ( data , c )
if err != nil {
2021-02-25 12:12:06 -05:00
return fmt . Errorf ( "failed to parse config file: %s" , err )
2020-11-30 17:23:28 +02:00
}
2021-11-25 11:24:09 +02:00
if c . Core . Sources != nil {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "usage of deprecated 'core.sources' config file option, please use 'core.labelSources' instead" )
2021-11-25 11:24:09 +02:00
c . Core . LabelSources = * c . Core . Sources
}
2023-05-03 11:04:08 +03:00
klog . InfoS ( "configuration file parsed" , "path" , filepath )
2020-04-21 22:03:37 +03:00
}
2019-02-08 21:43:54 +02:00
}
// Parse config overrides
2020-11-30 17:23:28 +02:00
if err := yaml . Unmarshal ( [ ] byte ( overrides ) , c ) ; err != nil {
2021-11-25 12:14:19 +02:00
return fmt . Errorf ( "failed to parse -options: %s" , err )
2019-02-08 21:43:54 +02:00
}
2021-02-19 15:43:31 +02:00
if w . args . Overrides . NoPublish != nil {
c . Core . NoPublish = * w . args . Overrides . NoPublish
2020-11-27 10:19:31 +02:00
}
2021-12-03 09:22:43 +02:00
if w . args . Overrides . FeatureSources != nil {
c . Core . FeatureSources = * w . args . Overrides . FeatureSources
}
2021-11-25 10:58:08 +02:00
if w . args . Overrides . LabelSources != nil {
c . Core . LabelSources = * w . args . Overrides . LabelSources
2020-12-01 15:53:04 +02:00
}
2020-12-01 14:27:47 +02:00
c . Core . sanitize ( )
2020-11-27 10:19:31 +02:00
2020-04-21 22:03:37 +03:00
w . config = c
2021-02-23 20:42:17 +02:00
if err := w . configureCore ( c . Core ) ; err != nil {
return err
}
2020-12-01 15:53:04 +02:00
2021-03-01 09:02:22 +02:00
// (Re-)configure sources
for _ , s := range confSources {
2020-04-21 22:03:37 +03:00
s . SetConfig ( c . Sources [ s . Name ( ) ] )
}
2020-11-30 17:23:28 +02:00
2023-05-03 11:04:08 +03:00
klog . InfoS ( "configuration successfully updated" , "configuration" , w . config )
2020-11-30 17:23:28 +02:00
return nil
2019-02-08 21:43:54 +02:00
}
// createFeatureLabels returns the set of feature labels from the enabled
// sources and the whitelist argument.
2021-03-01 07:45:32 +02:00
func createFeatureLabels ( sources [ ] source . LabelSource , labelWhiteList regexp . Regexp ) ( labels Labels ) {
2019-02-08 21:43:54 +02:00
labels = Labels { }
2021-03-01 18:39:49 +02:00
// Get labels from all enabled label sources
2023-05-03 11:04:08 +03:00
klog . InfoS ( "starting feature discovery..." )
2019-02-08 21:43:54 +02:00
for _ , source := range sources {
nfd-worker: fix --label-whitelist
Unify handling of --label-whitelist in nfd-worker and nfd-master. That is,
in nfd-worker, apply the regexp filter on non-namespaced part of the
label name.
Brief history:
1. Originally the whitelist regexp was applied on the full namespaced
label name (that would be e.g.
'feature.node.kubernetes.io/cpu-cpuid.AVX' in the current nfd version)
2. Commit 81752b2d changed the behavior so that the regexp was applied
on the non-namespaced part (that would be `cpu-cpuid.AVX`)
3. Commit 40918827 added support for custom label namespaces. With this
change, the label whitelist handling diverged between nfd-worker and
nfd-master. In nfd-master the whitelist regexp is always applied on
the non-namespaced label name. However, in nfd-worker the whitelist
handling is two-fold (and inconsistent): for labels in the standard
nfd namespace regexp is applied on the non-namespaced part (e.g.
`cpu-cpuid.AVX`, but, for labels in custom namespaces the regexp is
applied on the full name (e.g. `example.com/my-feature`).
This patch changes nfd-worker to behave similarly to nfd-master. The
namespace part is now always omitted, which should be easier for the
users to comprehend.
Also, fixes a bug in the label name prefixing so that the name of the
feature source is not prefixed into labels with custom label namespace
(effectively mangling the intended namespace). For example, previously a
'example.com/feature' label from the 'custom' feature source would be
prefixed with the source name, mangling it to
'custom-example.com/feature'.
2020-04-28 10:38:38 +03:00
labelsFromSource , err := getFeatureLabels ( source , labelWhiteList )
2019-02-08 21:43:54 +02:00
if err != nil {
2023-05-03 11:04:08 +03:00
klog . ErrorS ( err , "discovery failed" , "source" , source . Name ( ) )
2019-02-08 21:43:54 +02:00
continue
}
2023-12-13 11:38:06 +02:00
maps . Copy ( labels , labelsFromSource )
2019-02-08 21:43:54 +02:00
}
2023-05-17 16:42:32 +03:00
if klogV := klog . V ( 1 ) ; klogV . Enabled ( ) {
klogV . InfoS ( "feature discovery completed" , "labels" , utils . DelayedDumper ( labels ) )
} else {
klog . InfoS ( "feature discovery completed" )
}
2019-02-08 21:43:54 +02:00
return labels
}
// getFeatureLabels returns node labels for features discovered by the
// supplied source.
2021-03-01 07:45:32 +02:00
func getFeatureLabels ( source source . LabelSource , labelWhiteList regexp . Regexp ) ( labels Labels , err error ) {
2019-02-08 21:43:54 +02:00
labels = Labels { }
2021-03-01 18:39:49 +02:00
features , err := source . GetLabels ( )
2019-02-08 21:43:54 +02:00
if err != nil {
return nil , err
}
nfd-worker: fix --label-whitelist
Unify handling of --label-whitelist in nfd-worker and nfd-master. That is,
in nfd-worker, apply the regexp filter on non-namespaced part of the
label name.
Brief history:
1. Originally the whitelist regexp was applied on the full namespaced
label name (that would be e.g.
'feature.node.kubernetes.io/cpu-cpuid.AVX' in the current nfd version)
2. Commit 81752b2d changed the behavior so that the regexp was applied
on the non-namespaced part (that would be `cpu-cpuid.AVX`)
3. Commit 40918827 added support for custom label namespaces. With this
change, the label whitelist handling diverged between nfd-worker and
nfd-master. In nfd-master the whitelist regexp is always applied on
the non-namespaced label name. However, in nfd-worker the whitelist
handling is two-fold (and inconsistent): for labels in the standard
nfd namespace regexp is applied on the non-namespaced part (e.g.
`cpu-cpuid.AVX`, but, for labels in custom namespaces the regexp is
applied on the full name (e.g. `example.com/my-feature`).
This patch changes nfd-worker to behave similarly to nfd-master. The
namespace part is now always omitted, which should be easier for the
users to comprehend.
Also, fixes a bug in the label name prefixing so that the name of the
feature source is not prefixed into labels with custom label namespace
(effectively mangling the intended namespace). For example, previously a
'example.com/feature' label from the 'custom' feature source would be
prefixed with the source name, mangling it to
'custom-example.com/feature'.
2020-04-28 10:38:38 +03:00
2019-02-08 21:43:54 +02:00
for k , v := range features {
Option to stop implicitly adding default prefix to names
Add new autoDefaultNs (default is "true") config option to nfd-master.
Setting the config option to false stops NFD from automatically adding
the "feature.node.kubernetes.io/" prefix to labels, annotations and
extended resources. Taints are not affected as for them no prefix is
automatically added. The user-visible part of enabling the option change
is that NodeFeatureRules, local feature files, hooks and configuration
of the "custom" may need to be altereda (if the auto-prefixing is
relied on).
For now, the config option defaults to "true", meaning no change in
default behavior. However, the intent is to change the default to
"false" in a future release, deprecating the option and eventually
removing it (forcing it to "false").
The goal of stopping doing "auto-prefixing" is to simplify the operation
(of nfd and users). Make the naming more straightforward and easier to
understand and debug (kind of WYSIWYG), eliminating peculiar corner
cases:
1. Make validation simpler and unambiguous
2. Remove "overloading" of names, i.e. the mapping two values to the
same actual name. E.g. previously something like
labels:
feature.node.kubernetes.io/foo: bar
foo: baz
Could actually result in node label:
feature.node.kubernetes.io/foo: baz
3. Make the processing/usagee of the "rule.matched" and "local.labels"
feature in NodeFeatureRules unambiguous and more understadable. E.g.
previously you could have node label
"feature.node.kubernetes.io/local-foo: bar" but in the NodeFeatureRule
you'd need to use the unprefixed name "local-foo" or the fully
prefixed name, depending on what was specified in the feature file (or
hook) on the node(s).
NOTE: setting autoDefaultNs to false is a breaking change for users who
rely on automatic prefixing with the default feature.node.kubernetes.io/
namespace. NodeFeatureRules, feature files, hooks and custom rules
(configuration of the "custom" source of nfd-worker) will need to be
altered. Unprefixed labels, annoations and extended resources will be
denied by nfd-master.
2023-11-08 09:51:19 +02:00
name := k
switch sourceName := source . Name ( ) ; sourceName {
case "local" , "custom" :
// No mangling of labels from the custom rules, hooks or feature files
default :
// Prefix for labels from other sources
if ! strings . Contains ( name , "/" ) {
name = nfdv1alpha1 . FeatureLabelNs + "/" + sourceName + "-" + name
}
}
// Split label name into namespace and name compoents
split := strings . SplitN ( name , "/" , 2 )
nfd-worker: fix --label-whitelist
Unify handling of --label-whitelist in nfd-worker and nfd-master. That is,
in nfd-worker, apply the regexp filter on non-namespaced part of the
label name.
Brief history:
1. Originally the whitelist regexp was applied on the full namespaced
label name (that would be e.g.
'feature.node.kubernetes.io/cpu-cpuid.AVX' in the current nfd version)
2. Commit 81752b2d changed the behavior so that the regexp was applied
on the non-namespaced part (that would be `cpu-cpuid.AVX`)
3. Commit 40918827 added support for custom label namespaces. With this
change, the label whitelist handling diverged between nfd-worker and
nfd-master. In nfd-master the whitelist regexp is always applied on
the non-namespaced label name. However, in nfd-worker the whitelist
handling is two-fold (and inconsistent): for labels in the standard
nfd namespace regexp is applied on the non-namespaced part (e.g.
`cpu-cpuid.AVX`, but, for labels in custom namespaces the regexp is
applied on the full name (e.g. `example.com/my-feature`).
This patch changes nfd-worker to behave similarly to nfd-master. The
namespace part is now always omitted, which should be easier for the
users to comprehend.
Also, fixes a bug in the label name prefixing so that the name of the
feature source is not prefixed into labels with custom label namespace
(effectively mangling the intended namespace). For example, previously a
'example.com/feature' label from the 'custom' feature source would be
prefixed with the source name, mangling it to
'custom-example.com/feature'.
2020-04-28 10:38:38 +03:00
Option to stop implicitly adding default prefix to names
Add new autoDefaultNs (default is "true") config option to nfd-master.
Setting the config option to false stops NFD from automatically adding
the "feature.node.kubernetes.io/" prefix to labels, annotations and
extended resources. Taints are not affected as for them no prefix is
automatically added. The user-visible part of enabling the option change
is that NodeFeatureRules, local feature files, hooks and configuration
of the "custom" may need to be altereda (if the auto-prefixing is
relied on).
For now, the config option defaults to "true", meaning no change in
default behavior. However, the intent is to change the default to
"false" in a future release, deprecating the option and eventually
removing it (forcing it to "false").
The goal of stopping doing "auto-prefixing" is to simplify the operation
(of nfd and users). Make the naming more straightforward and easier to
understand and debug (kind of WYSIWYG), eliminating peculiar corner
cases:
1. Make validation simpler and unambiguous
2. Remove "overloading" of names, i.e. the mapping two values to the
same actual name. E.g. previously something like
labels:
feature.node.kubernetes.io/foo: bar
foo: baz
Could actually result in node label:
feature.node.kubernetes.io/foo: baz
3. Make the processing/usagee of the "rule.matched" and "local.labels"
feature in NodeFeatureRules unambiguous and more understadable. E.g.
previously you could have node label
"feature.node.kubernetes.io/local-foo: bar" but in the NodeFeatureRule
you'd need to use the unprefixed name "local-foo" or the fully
prefixed name, depending on what was specified in the feature file (or
hook) on the node(s).
NOTE: setting autoDefaultNs to false is a breaking change for users who
rely on automatic prefixing with the default feature.node.kubernetes.io/
namespace. NodeFeatureRules, feature files, hooks and custom rules
(configuration of the "custom" source of nfd-worker) will need to be
altered. Unprefixed labels, annoations and extended resources will be
denied by nfd-master.
2023-11-08 09:51:19 +02:00
nameForWhiteListing := name
nfd-worker: fix --label-whitelist
Unify handling of --label-whitelist in nfd-worker and nfd-master. That is,
in nfd-worker, apply the regexp filter on non-namespaced part of the
label name.
Brief history:
1. Originally the whitelist regexp was applied on the full namespaced
label name (that would be e.g.
'feature.node.kubernetes.io/cpu-cpuid.AVX' in the current nfd version)
2. Commit 81752b2d changed the behavior so that the regexp was applied
on the non-namespaced part (that would be `cpu-cpuid.AVX`)
3. Commit 40918827 added support for custom label namespaces. With this
change, the label whitelist handling diverged between nfd-worker and
nfd-master. In nfd-master the whitelist regexp is always applied on
the non-namespaced label name. However, in nfd-worker the whitelist
handling is two-fold (and inconsistent): for labels in the standard
nfd namespace regexp is applied on the non-namespaced part (e.g.
`cpu-cpuid.AVX`, but, for labels in custom namespaces the regexp is
applied on the full name (e.g. `example.com/my-feature`).
This patch changes nfd-worker to behave similarly to nfd-master. The
namespace part is now always omitted, which should be easier for the
users to comprehend.
Also, fixes a bug in the label name prefixing so that the name of the
feature source is not prefixed into labels with custom label namespace
(effectively mangling the intended namespace). For example, previously a
'example.com/feature' label from the 'custom' feature source would be
prefixed with the source name, mangling it to
'custom-example.com/feature'.
2020-04-28 10:38:38 +03:00
if len ( split ) == 2 {
nameForWhiteListing = split [ 1 ]
2019-02-08 21:43:54 +02:00
}
nfd-worker: fix --label-whitelist
Unify handling of --label-whitelist in nfd-worker and nfd-master. That is,
in nfd-worker, apply the regexp filter on non-namespaced part of the
label name.
Brief history:
1. Originally the whitelist regexp was applied on the full namespaced
label name (that would be e.g.
'feature.node.kubernetes.io/cpu-cpuid.AVX' in the current nfd version)
2. Commit 81752b2d changed the behavior so that the regexp was applied
on the non-namespaced part (that would be `cpu-cpuid.AVX`)
3. Commit 40918827 added support for custom label namespaces. With this
change, the label whitelist handling diverged between nfd-worker and
nfd-master. In nfd-master the whitelist regexp is always applied on
the non-namespaced label name. However, in nfd-worker the whitelist
handling is two-fold (and inconsistent): for labels in the standard
nfd namespace regexp is applied on the non-namespaced part (e.g.
`cpu-cpuid.AVX`, but, for labels in custom namespaces the regexp is
applied on the full name (e.g. `example.com/my-feature`).
This patch changes nfd-worker to behave similarly to nfd-master. The
namespace part is now always omitted, which should be easier for the
users to comprehend.
Also, fixes a bug in the label name prefixing so that the name of the
feature source is not prefixed into labels with custom label namespace
(effectively mangling the intended namespace). For example, previously a
'example.com/feature' label from the 'custom' feature source would be
prefixed with the source name, mangling it to
'custom-example.com/feature'.
2020-04-28 10:38:38 +03:00
// Validate label name.
Option to stop implicitly adding default prefix to names
Add new autoDefaultNs (default is "true") config option to nfd-master.
Setting the config option to false stops NFD from automatically adding
the "feature.node.kubernetes.io/" prefix to labels, annotations and
extended resources. Taints are not affected as for them no prefix is
automatically added. The user-visible part of enabling the option change
is that NodeFeatureRules, local feature files, hooks and configuration
of the "custom" may need to be altereda (if the auto-prefixing is
relied on).
For now, the config option defaults to "true", meaning no change in
default behavior. However, the intent is to change the default to
"false" in a future release, deprecating the option and eventually
removing it (forcing it to "false").
The goal of stopping doing "auto-prefixing" is to simplify the operation
(of nfd and users). Make the naming more straightforward and easier to
understand and debug (kind of WYSIWYG), eliminating peculiar corner
cases:
1. Make validation simpler and unambiguous
2. Remove "overloading" of names, i.e. the mapping two values to the
same actual name. E.g. previously something like
labels:
feature.node.kubernetes.io/foo: bar
foo: baz
Could actually result in node label:
feature.node.kubernetes.io/foo: baz
3. Make the processing/usagee of the "rule.matched" and "local.labels"
feature in NodeFeatureRules unambiguous and more understadable. E.g.
previously you could have node label
"feature.node.kubernetes.io/local-foo: bar" but in the NodeFeatureRule
you'd need to use the unprefixed name "local-foo" or the fully
prefixed name, depending on what was specified in the feature file (or
hook) on the node(s).
NOTE: setting autoDefaultNs to false is a breaking change for users who
rely on automatic prefixing with the default feature.node.kubernetes.io/
namespace. NodeFeatureRules, feature files, hooks and custom rules
(configuration of the "custom" source of nfd-worker) will need to be
altered. Unprefixed labels, annoations and extended resources will be
denied by nfd-master.
2023-11-08 09:51:19 +02:00
errs := validation . IsQualifiedName ( name )
2019-02-08 21:43:54 +02:00
if len ( errs ) > 0 {
2023-11-24 15:34:48 +02:00
klog . InfoS ( "ignoring label with invalid name" , "labelKey" , name , "errors" , errs )
2019-02-08 21:43:54 +02:00
continue
}
value := fmt . Sprintf ( "%v" , v )
// Validate label value
errs = validation . IsValidLabelValue ( value )
if len ( errs ) > 0 {
2023-12-19 02:29:37 +08:00
klog . InfoS ( "ignoring label with invalid value" , "labelKey" , name , "labelValue" , value , "errors" , errs )
2019-02-08 21:43:54 +02:00
continue
}
nfd-worker: fix --label-whitelist
Unify handling of --label-whitelist in nfd-worker and nfd-master. That is,
in nfd-worker, apply the regexp filter on non-namespaced part of the
label name.
Brief history:
1. Originally the whitelist regexp was applied on the full namespaced
label name (that would be e.g.
'feature.node.kubernetes.io/cpu-cpuid.AVX' in the current nfd version)
2. Commit 81752b2d changed the behavior so that the regexp was applied
on the non-namespaced part (that would be `cpu-cpuid.AVX`)
3. Commit 40918827 added support for custom label namespaces. With this
change, the label whitelist handling diverged between nfd-worker and
nfd-master. In nfd-master the whitelist regexp is always applied on
the non-namespaced label name. However, in nfd-worker the whitelist
handling is two-fold (and inconsistent): for labels in the standard
nfd namespace regexp is applied on the non-namespaced part (e.g.
`cpu-cpuid.AVX`, but, for labels in custom namespaces the regexp is
applied on the full name (e.g. `example.com/my-feature`).
This patch changes nfd-worker to behave similarly to nfd-master. The
namespace part is now always omitted, which should be easier for the
users to comprehend.
Also, fixes a bug in the label name prefixing so that the name of the
feature source is not prefixed into labels with custom label namespace
(effectively mangling the intended namespace). For example, previously a
'example.com/feature' label from the 'custom' feature source would be
prefixed with the source name, mangling it to
'custom-example.com/feature'.
2020-04-28 10:38:38 +03:00
// Skip if label doesn't match labelWhiteList
if ! labelWhiteList . MatchString ( nameForWhiteListing ) {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "label does not match the whitelist and will not be published." , "labelKey" , nameForWhiteListing , "regexp" , labelWhiteList . String ( ) )
nfd-worker: fix --label-whitelist
Unify handling of --label-whitelist in nfd-worker and nfd-master. That is,
in nfd-worker, apply the regexp filter on non-namespaced part of the
label name.
Brief history:
1. Originally the whitelist regexp was applied on the full namespaced
label name (that would be e.g.
'feature.node.kubernetes.io/cpu-cpuid.AVX' in the current nfd version)
2. Commit 81752b2d changed the behavior so that the regexp was applied
on the non-namespaced part (that would be `cpu-cpuid.AVX`)
3. Commit 40918827 added support for custom label namespaces. With this
change, the label whitelist handling diverged between nfd-worker and
nfd-master. In nfd-master the whitelist regexp is always applied on
the non-namespaced label name. However, in nfd-worker the whitelist
handling is two-fold (and inconsistent): for labels in the standard
nfd namespace regexp is applied on the non-namespaced part (e.g.
`cpu-cpuid.AVX`, but, for labels in custom namespaces the regexp is
applied on the full name (e.g. `example.com/my-feature`).
This patch changes nfd-worker to behave similarly to nfd-master. The
namespace part is now always omitted, which should be easier for the
users to comprehend.
Also, fixes a bug in the label name prefixing so that the name of the
feature source is not prefixed into labels with custom label namespace
(effectively mangling the intended namespace). For example, previously a
'example.com/feature' label from the 'custom' feature source would be
prefixed with the source name, mangling it to
'custom-example.com/feature'.
2020-04-28 10:38:38 +03:00
continue
}
Option to stop implicitly adding default prefix to names
Add new autoDefaultNs (default is "true") config option to nfd-master.
Setting the config option to false stops NFD from automatically adding
the "feature.node.kubernetes.io/" prefix to labels, annotations and
extended resources. Taints are not affected as for them no prefix is
automatically added. The user-visible part of enabling the option change
is that NodeFeatureRules, local feature files, hooks and configuration
of the "custom" may need to be altereda (if the auto-prefixing is
relied on).
For now, the config option defaults to "true", meaning no change in
default behavior. However, the intent is to change the default to
"false" in a future release, deprecating the option and eventually
removing it (forcing it to "false").
The goal of stopping doing "auto-prefixing" is to simplify the operation
(of nfd and users). Make the naming more straightforward and easier to
understand and debug (kind of WYSIWYG), eliminating peculiar corner
cases:
1. Make validation simpler and unambiguous
2. Remove "overloading" of names, i.e. the mapping two values to the
same actual name. E.g. previously something like
labels:
feature.node.kubernetes.io/foo: bar
foo: baz
Could actually result in node label:
feature.node.kubernetes.io/foo: baz
3. Make the processing/usagee of the "rule.matched" and "local.labels"
feature in NodeFeatureRules unambiguous and more understadable. E.g.
previously you could have node label
"feature.node.kubernetes.io/local-foo: bar" but in the NodeFeatureRule
you'd need to use the unprefixed name "local-foo" or the fully
prefixed name, depending on what was specified in the feature file (or
hook) on the node(s).
NOTE: setting autoDefaultNs to false is a breaking change for users who
rely on automatic prefixing with the default feature.node.kubernetes.io/
namespace. NodeFeatureRules, feature files, hooks and custom rules
(configuration of the "custom" source of nfd-worker) will need to be
altered. Unprefixed labels, annoations and extended resources will be
denied by nfd-master.
2023-11-08 09:51:19 +02:00
labels [ name ] = value
2019-02-08 21:43:54 +02:00
}
return labels , nil
}
2022-08-12 13:10:48 +03:00
// advertiseFeatures advertises the features of a Kubernetes node
func ( w * nfdWorker ) advertiseFeatures ( labels Labels ) error {
2024-03-14 19:23:07 +01:00
if features . NFDFeatureGate . Enabled ( features . NodeFeatureAPI ) {
2022-08-12 13:10:48 +03:00
// Create/update NodeFeature CR object
if err := w . updateNodeFeatureObject ( labels ) ; err != nil {
return fmt . Errorf ( "failed to advertise features (via CRD API): %w" , err )
}
} else {
// Create/update feature labels through gRPC connection to nfd-master
if err := w . advertiseFeatureLabels ( labels ) ; err != nil {
return fmt . Errorf ( "failed to advertise features (via gRPC): %w" , err )
}
}
return nil
}
2019-02-08 21:43:54 +02:00
// advertiseFeatureLabels advertises the feature labels to a Kubernetes node
// via the NFD server.
2021-08-27 12:52:24 +03:00
func ( w * nfdWorker ) advertiseFeatureLabels ( labels Labels ) error {
2019-02-08 21:43:54 +02:00
ctx , cancel := context . WithTimeout ( context . Background ( ) , 10 * time . Second )
defer cancel ( )
2023-05-03 11:04:08 +03:00
klog . InfoS ( "sending labeling request to nfd-master" )
2019-02-08 21:43:54 +02:00
labelReq := pb . SetLabelsRequest { Labels : labels ,
2022-07-04 14:05:58 +03:00
Features : source . GetAllFeatures ( ) ,
2019-02-08 21:43:54 +02:00
NfdVersion : version . Get ( ) ,
2022-12-23 09:50:15 +02:00
NodeName : utils . NodeName ( ) }
2022-08-12 16:35:59 +03:00
cli , err := w . getGrpcClient ( )
if err != nil {
return err
}
_ , err = cli . SetLabels ( ctx , & labelReq )
2019-02-08 21:43:54 +02:00
if err != nil {
2023-05-03 11:04:08 +03:00
klog . ErrorS ( err , "failed to label node" )
2019-02-08 21:43:54 +02:00
return err
}
return nil
}
2020-04-21 22:03:37 +03:00
2022-08-12 13:10:48 +03:00
// updateNodeFeatureObject creates/updates the node-specific NodeFeature custom resource.
func ( m * nfdWorker ) updateNodeFeatureObject ( labels Labels ) error {
cli , err := m . getNfdClient ( )
if err != nil {
return err
}
2022-12-23 09:50:15 +02:00
nodename := utils . NodeName ( )
2022-08-12 13:10:48 +03:00
namespace := m . kubernetesNamespace
features := source . GetAllFeatures ( )
2023-12-08 13:41:01 +02:00
// Create owner ref
ownerRefs := [ ] metav1 . OwnerReference { }
podName := os . Getenv ( "POD_NAME" )
podUID := os . Getenv ( "POD_UID" )
if podName != "" && podUID != "" {
isTrue := true
ownerRefs = [ ] metav1 . OwnerReference {
{
APIVersion : "v1" ,
Kind : "Pod" ,
Name : podName ,
UID : types . UID ( podUID ) ,
Controller : & isTrue ,
} ,
}
} else {
klog . InfoS ( "Cannot set NodeFeature owner reference, POD_NAME and/or POD_UID not specified" )
}
2022-08-12 13:10:48 +03:00
// TODO: we could implement some simple caching of the object, only get it
// every 10 minutes or so because nobody else should really be modifying it
if nfr , err := cli . NfdV1alpha1 ( ) . NodeFeatures ( namespace ) . Get ( context . TODO ( ) , nodename , metav1 . GetOptions { } ) ; errors . IsNotFound ( err ) {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "creating NodeFeature object" , "nodefeature" , klog . KObj ( nfr ) )
2022-08-12 13:10:48 +03:00
nfr = & nfdv1alpha1 . NodeFeature {
ObjectMeta : metav1 . ObjectMeta {
2023-12-08 13:41:01 +02:00
Name : nodename ,
Annotations : map [ string ] string { nfdv1alpha1 . WorkerVersionAnnotation : version . Get ( ) } ,
Labels : map [ string ] string { nfdv1alpha1 . NodeFeatureObjNodeNameLabel : nodename } ,
OwnerReferences : ownerRefs ,
2022-08-12 13:10:48 +03:00
} ,
Spec : nfdv1alpha1 . NodeFeatureSpec {
Features : * features ,
Labels : labels ,
} ,
}
nfrCreated , err := cli . NfdV1alpha1 ( ) . NodeFeatures ( namespace ) . Create ( context . TODO ( ) , nfr , metav1 . CreateOptions { } )
if err != nil {
return fmt . Errorf ( "failed to create NodeFeature object %q: %w" , nfr . Name , err )
}
2023-05-17 16:42:32 +03:00
klog . V ( 4 ) . InfoS ( "NodeFeature object created" , "nodeFeature" , utils . DelayedDumper ( nfrCreated ) )
2022-08-12 13:10:48 +03:00
} else if err != nil {
return fmt . Errorf ( "failed to get NodeFeature object: %w" , err )
} else {
nfrUpdated := nfr . DeepCopy ( )
nfrUpdated . Annotations = map [ string ] string { nfdv1alpha1 . WorkerVersionAnnotation : version . Get ( ) }
nfrUpdated . Labels = map [ string ] string { nfdv1alpha1 . NodeFeatureObjNodeNameLabel : nodename }
2023-12-08 13:41:01 +02:00
nfrUpdated . OwnerReferences = ownerRefs
2022-08-12 13:10:48 +03:00
nfrUpdated . Spec = nfdv1alpha1 . NodeFeatureSpec {
Features : * features ,
Labels : labels ,
}
if ! apiequality . Semantic . DeepEqual ( nfr , nfrUpdated ) {
2023-05-03 11:04:08 +03:00
klog . InfoS ( "updating NodeFeature object" , "nodefeature" , klog . KObj ( nfr ) )
2022-08-12 13:10:48 +03:00
nfrUpdated , err = cli . NfdV1alpha1 ( ) . NodeFeatures ( namespace ) . Update ( context . TODO ( ) , nfrUpdated , metav1 . UpdateOptions { } )
if err != nil {
return fmt . Errorf ( "failed to update NodeFeature object %q: %w" , nfr . Name , err )
}
2023-05-17 16:42:32 +03:00
klog . V ( 4 ) . InfoS ( "NodeFeature object updated" , "nodeFeature" , utils . DelayedDumper ( nfrUpdated ) )
2022-08-12 13:10:48 +03:00
} else {
2023-05-03 11:04:08 +03:00
klog . V ( 1 ) . InfoS ( "no changes in NodeFeature object, not updating" , "nodefeature" , klog . KObj ( nfr ) )
2022-08-12 13:10:48 +03:00
}
}
return nil
}
// getNfdClient returns the clientset for using the nfd CRD api
func ( m * nfdWorker ) getNfdClient ( ) ( * nfdclient . Clientset , error ) {
if m . nfdClient != nil {
return m . nfdClient , nil
}
2024-01-22 13:42:20 +02:00
kubeconfig , err := utils . GetKubeconfig ( m . args . Kubeconfig )
2022-08-12 13:10:48 +03:00
if err != nil {
return nil , err
}
c , err := nfdclient . NewForConfig ( kubeconfig )
if err != nil {
return nil , err
}
m . nfdClient = c
return c , nil
}
2020-04-21 22:03:37 +03:00
// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func ( c * sourcesConfig ) UnmarshalJSON ( data [ ] byte ) error {
// First do a raw parse to get the per-source data
raw := map [ string ] json . RawMessage { }
err := yaml . Unmarshal ( data , & raw )
if err != nil {
return err
}
// Then parse each source-specific data structure
// NOTE: we expect 'c' to be pre-populated with correct per-source data
// types. Non-pre-populated keys are ignored.
for k , rawv := range raw {
if v , ok := ( * c ) [ k ] ; ok {
err := yaml . Unmarshal ( rawv , & v )
if err != nil {
return fmt . Errorf ( "failed to parse %q source config: %v" , k , err )
}
}
}
return nil
}