1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2024-12-14 11:57:51 +00:00

nfd-worker: support creating NodeFeatures object

Support the new NodeFeatures object of the NFD CRD api. Add two new
command line options to nfd-worker:

 -kubeconfig               specifies the kubeconfig to use for
                           connecting k8s api (defaults to empty which
                           implies in-cluster config)
 -enable-nodefeature-api   enable the NodeFeature CRD API for
                           communicating node features to nfd-master,
                           will also automatically disable gRPC
                           (defgault to false)

No config file option for selecting the API is available as there should
be no need for dynamically selecting between gRPC and CRD. The
nfd-master configuration must be changed in tandem and it is safer (and
avoid awkward configuration races) to configure the whole NFD deployment
at once.

Default behavior of nfd-worker is not changed i.e. NodeFeatures object
creation is not enabled by default (but must be enabled with the command
line flag).

The patch also updates the kustomize and Helm deployment, adding RBAC
rules for nfd-worker and updating the example worker configuration.
This commit is contained in:
Markus Lehtonen 2022-08-12 13:10:48 +03:00
parent d1c91e129a
commit 237494463b
16 changed files with 243 additions and 14 deletions

View file

@ -101,6 +101,10 @@ func initFlags(flagset *flag.FlagSet) (*worker.Args, *worker.ConfigOverrideArgs)
"Config file to use.")
flagset.StringVar(&args.KeyFile, "key-file", "",
"Private key matching -cert-file")
flagset.BoolVar(&args.EnableNodeFeatureApi, "enable-nodefeature-api", false,
"Enable the NodeFeature CRD API for communicating with nfd-master. This will automatically disable the gRPC communication.")
flagset.StringVar(&args.Kubeconfig, "kubeconfig", "",
"Kubeconfig to use")
flagset.BoolVar(&args.Oneshot, "oneshot", false,
"Do not publish feature labels")
flagset.StringVar(&args.Options, "options", "",
@ -119,7 +123,7 @@ func initFlags(flagset *flag.FlagSet) (*worker.Args, *worker.ConfigOverrideArgs)
LabelSources: &utils.StringSliceVal{},
}
overrides.NoPublish = flagset.Bool("no-publish", false,
"Do not publish discovered features, disable connection to nfd-master.")
"Do not publish discovered features, disable connection to nfd-master and don't create NodeFeature object.")
flagset.Var(overrides.FeatureSources, "feature-sources",
"Comma separated list of feature sources. Special value 'all' enables all sources. "+
"Prefix the source name with '-' to disable it.")

View file

@ -7,3 +7,6 @@ resources:
- master-serviceaccount.yaml
- master-clusterrole.yaml
- master-clusterrolebinding.yaml
- worker-serviceaccount.yaml
- worker-role.yaml
- worker-rolebinding.yaml

View file

@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: nfd-worker
rules:
- apiGroups:
- nfd.k8s-sigs.io
resources:
- nodefeatures
verbs:
- create
- get
- update

View file

@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: nfd-worker
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: nfd-worker
subjects:
- kind: ServiceAccount
name: nfd-worker
namespace: default

View file

@ -0,0 +1,4 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfd-worker

View file

@ -13,6 +13,7 @@ spec:
labels:
app: nfd-worker
spec:
serviceAccount: nfd-worker
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: nfd-worker

View file

@ -12,6 +12,7 @@ spec:
labels:
app: nfd-worker
spec:
serviceAccount: nfd-worker
dnsPolicy: ClusterFirstWithHostNet
restartPolicy: Never
affinity:

View file

@ -0,0 +1,18 @@
{{- if .Values.worker.rbac.create }}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "node-feature-discovery.fullname" . }}-worker
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
rules:
- apiGroups:
- nfd.k8s-sigs.io
resources:
- nodefeatures
verbs:
- create
- get
- update
{{- end }}

View file

@ -0,0 +1,17 @@
{{- if .Values.worker.rbac.create }}
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "node-feature-discovery.fullname" . }}-worker
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ include "node-feature-discovery.fullname" . }}-worker
subjects:
- kind: ServiceAccount
name: {{ include "node-feature-discovery.worker.serviceAccountName" . }}
namespace: {{ include "node-feature-discovery.namespace" . }}
{{- end }}

View file

@ -46,6 +46,9 @@ spec:
- "nfd-worker"
args:
- "--server={{ include "node-feature-discovery.fullname" . }}-master:{{ .Values.master.service.port }}"
{{- if .Values.enableNodeFeatureApi }}
- "-enable-nodefeature-api"
{{- end }}
{{- if .Values.tls.enable }}
- "--ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
- "--key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"

View file

@ -10,6 +10,8 @@ nameOverride: ""
fullnameOverride: ""
namespaceOverride: ""
enableNodeFeatureApi: false
master:
instance:
extraLabelNs: []
@ -338,6 +340,9 @@ worker:
# If not set and create is true, a name is generated using the fullname template
name:
rbac:
create: true
# Allow users to mount the hostPath /usr/src, useful for RHCOS on s390x
# Does not work on systems without /usr/src AND a read-only /usr, such as Talos
mountUsrSrc: false

View file

@ -97,6 +97,7 @@ We have introduced the following Chart parameters.
| `fullnameOverride` | string | | Override a default fully qualified app name |
| `tls.enable` | bool | false | Specifies whether to use TLS for communications between components |
| `tls.certManager` | bool | false | If enabled, requires [cert-manager](https://cert-manager.io/docs/) to be installed and will automatically create the required TLS certificates |
| `enableNodeFeatureApi` | bool | false | Enable the NodeFeature CRD API for communicating node features. This will automatically disable the gRPC communication.
### Master pod parameters
@ -134,6 +135,7 @@ We have introduced the following Chart parameters.
| `worker.serviceAccount.create` | bool | true | Specifies whether a service account for nfd-worker should be created
| `worker.serviceAccount.annotations` | dict | {} | Annotations to add to the service account for nfd-worker
| `worker.serviceAccount.name` | string | | The name of the service account to use for nfd-worker. If not set and create is true, a name is generated using the fullname template (suffixed with `-worker`)
| `worker.rbac.create` | bool | true | Specifies whether to create [RBAC][rbac] configuration for nfd-worker
| `worker.mountUsrSrc` | bool | false | Specifies whether to allow users to mount the hostpath /user/src. Does not work on systems without /usr/src AND a read-only /usr |
| `worker.resources` | dict | {} | NFD worker pod [resources management](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) |
| `worker.nodeSelector` | dict | {} | NFD worker pod [node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) |

View file

@ -122,6 +122,22 @@ Example:
nfd-worker -key-file=/opt/nfd/worker.key -cert-file=/opt/nfd/worker.crt -ca-file=/opt/nfd/ca.crt
```
### -kubeconfig
The `-kubeconfig` flag specifies the kubeconfig to use for connecting to the
Kubernetes API server. It is only needed for manipulating NodeFeature
objects, and thus the flag only takes effect when
[`-enable-nodefeature-api`](#-enable-nodefeature-api)) is specified. An empty
value (which is also the default) implies in-cluster kubeconfig.
Default: *empty*
Example:
```bash
nfd-worker -kubeconfig ${HOME}/.kube/config
```
### -server-name-override
The `-server-name-override` flag specifies the common name (CN) which to
@ -178,11 +194,33 @@ Example:
nfd-worker -label-sources=kernel,system,local
```
### -enable-nodefeature-api
The `-enable-nodefeature-api` flag enables the experimental NodeFeature CRD API
for communicating with nfd-master. This will also automatically disable the
gRPC communication to nfd-master. When enabled, nfd-worker will create per-node
NodeFeature objects the contain all discovered node features and the set of
feature labels to be created.
Default: false
Example:
```bash
nfd-worker -enable-nodefeature-api
```
### -no-publish
The `-no-publish` flag disables all communication with the nfd-master, making
it a "dry-run" flag for nfd-worker. NFD-Worker runs feature detection normally,
but no labeling requests are sent to nfd-master.
The `-no-publish` flag disables all communication with the nfd-master and the
Kubernetes API server. It is effectively a "dry-run" flag for nfd-worker.
NFD-Worker runs feature detection normally, but no labeling requests are sent
to nfd-master and no NodeFeature objects are created or updated in the API
server.
Note: This flag takes precedence over the
[`core.noPublish`](worker-configuration-reference#corenopublish)
configuration file option.
Default: *false*

View file

@ -131,10 +131,14 @@ core:
### core.noPublish
Setting `core.noPublish` to `true` disables all communication with the
nfd-master. It is effectively a "dry-run" flag: nfd-worker runs feature
detection normally, but no labeling requests are sent to nfd-master.
nfd-master and the Kubernetes API server. It is effectively a "dry-run" option.
NFD-Worker runs feature detection normally, but no labeling requests are sent
to nfd-master and no NodeFeature objects are created or updated in the API
server.
Note: Overridden by the `-no-publish` command line flag (if specified).
Note: Overridden by the
[`-no-publish`](worker-commandline-reference#-no-publish) command line flag (if
specified).
Default: `false`

View file

@ -49,6 +49,7 @@ type Args struct {
CaFile string
CertFile string
KeyFile string
Kubeconfig string
Server string
ServerNameOverride string

View file

@ -27,10 +27,16 @@ import (
"time"
"golang.org/x/net/context"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/util/validation"
"k8s.io/klog/v2"
"sigs.k8s.io/yaml"
apiequality "k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/node-feature-discovery/pkg/apihelper"
nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/v1alpha1"
nfdclient "sigs.k8s.io/node-feature-discovery/pkg/generated/clientset/versioned"
pb "sigs.k8s.io/node-feature-discovery/pkg/labeler"
clientcommon "sigs.k8s.io/node-feature-discovery/pkg/nfd-client"
"sigs.k8s.io/node-feature-discovery/pkg/utils"
@ -76,9 +82,10 @@ type Labels map[string]string
type Args struct {
clientcommon.Args
ConfigFile string
Oneshot bool
Options string
ConfigFile string
EnableNodeFeatureApi bool
Oneshot bool
Options string
Klog map[string]*utils.KlogFlagVal
Overrides ConfigOverrideArgs
@ -101,6 +108,7 @@ type nfdWorker struct {
config *NFDConfig
kubernetesNamespace string
grpcClient pb.LabelerClient
nfdClient *nfdclient.Clientset
stop chan struct{} // channel for signaling stop
featureSources []source.FeatureSource
labelSources []source.LabelSource
@ -150,6 +158,7 @@ func newDefaultConfig() *NFDConfig {
func (w *nfdWorker) Run() error {
klog.Infof("Node Feature Discovery Worker %s", version.Get())
klog.Infof("NodeName: '%s'", clientcommon.NodeName())
klog.Infof("Kubernetes namespace: '%s'", w.kubernetesNamespace)
// Create watcher for config file and read initial configuration
configWatch, err := utils.CreateFsWatcher(time.Second, w.configFilePath)
@ -185,9 +194,8 @@ func (w *nfdWorker) Run() error {
// Update the node with the feature labels.
if !w.config.Core.NoPublish {
err := w.advertiseFeatureLabels(labels)
if err != nil {
return fmt.Errorf("failed to advertise labels: %s", err.Error())
if err := w.advertiseFeatures(labels); err != nil {
return err
}
}
@ -205,7 +213,7 @@ func (w *nfdWorker) Run() error {
return err
}
// Manage connection to master
if w.config.Core.NoPublish {
if w.config.Core.NoPublish || !w.args.EnableNodeFeatureApi {
w.GrpcDisconnect()
}
@ -524,6 +532,22 @@ func getFeatureLabels(source source.LabelSource, labelWhiteList regexp.Regexp) (
return labels, nil
}
// advertiseFeatures advertises the features of a Kubernetes node
func (w *nfdWorker) advertiseFeatures(labels Labels) error {
if w.args.EnableNodeFeatureApi {
// Create/update NodeFeature CR object
if err := w.updateNodeFeatureObject(labels); err != nil {
return fmt.Errorf("failed to advertise features (via CRD API): %w", err)
}
} else {
// Create/update feature labels through gRPC connection to nfd-master
if err := w.advertiseFeatureLabels(labels); err != nil {
return fmt.Errorf("failed to advertise features (via gRPC): %w", err)
}
}
return nil
}
// advertiseFeatureLabels advertises the feature labels to a Kubernetes node
// via the NFD server.
func (w *nfdWorker) advertiseFeatureLabels(labels Labels) error {
@ -551,6 +575,85 @@ func (w *nfdWorker) advertiseFeatureLabels(labels Labels) error {
return nil
}
// updateNodeFeatureObject creates/updates the node-specific NodeFeature custom resource.
func (m *nfdWorker) updateNodeFeatureObject(labels Labels) error {
cli, err := m.getNfdClient()
if err != nil {
return err
}
nodename := clientcommon.NodeName()
namespace := m.kubernetesNamespace
features := source.GetAllFeatures()
// TODO: we could implement some simple caching of the object, only get it
// every 10 minutes or so because nobody else should really be modifying it
if nfr, err := cli.NfdV1alpha1().NodeFeatures(namespace).Get(context.TODO(), nodename, metav1.GetOptions{}); errors.IsNotFound(err) {
klog.Infof("creating NodeFeature object %q", nodename)
nfr = &nfdv1alpha1.NodeFeature{
ObjectMeta: metav1.ObjectMeta{
Name: nodename,
Annotations: map[string]string{nfdv1alpha1.WorkerVersionAnnotation: version.Get()},
Labels: map[string]string{nfdv1alpha1.NodeFeatureObjNodeNameLabel: nodename},
},
Spec: nfdv1alpha1.NodeFeatureSpec{
Features: *features,
Labels: labels,
},
}
nfrCreated, err := cli.NfdV1alpha1().NodeFeatures(namespace).Create(context.TODO(), nfr, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("failed to create NodeFeature object %q: %w", nfr.Name, err)
}
utils.KlogDump(4, "NodeFeature object created:", " ", nfrCreated)
} else if err != nil {
return fmt.Errorf("failed to get NodeFeature object: %w", err)
} else {
nfrUpdated := nfr.DeepCopy()
nfrUpdated.Annotations = map[string]string{nfdv1alpha1.WorkerVersionAnnotation: version.Get()}
nfrUpdated.Labels = map[string]string{nfdv1alpha1.NodeFeatureObjNodeNameLabel: nodename}
nfrUpdated.Spec = nfdv1alpha1.NodeFeatureSpec{
Features: *features,
Labels: labels,
}
if !apiequality.Semantic.DeepEqual(nfr, nfrUpdated) {
klog.Infof("updating NodeFeature object %q", nodename)
nfrUpdated, err = cli.NfdV1alpha1().NodeFeatures(namespace).Update(context.TODO(), nfrUpdated, metav1.UpdateOptions{})
if err != nil {
return fmt.Errorf("failed to update NodeFeature object %q: %w", nfr.Name, err)
}
utils.KlogDump(4, "NodeFeature object updated:", " ", nfrUpdated)
} else {
klog.V(1).Info("no changes in NodeFeature object, not updating")
}
}
return nil
}
// getNfdClient returns the clientset for using the nfd CRD api
func (m *nfdWorker) getNfdClient() (*nfdclient.Clientset, error) {
if m.nfdClient != nil {
return m.nfdClient, nil
}
kubeconfig, err := apihelper.GetKubeconfig(m.args.Kubeconfig)
if err != nil {
return nil, err
}
c, err := nfdclient.NewForConfig(kubeconfig)
if err != nil {
return nil, err
}
m.nfdClient = c
return c, nil
}
// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (d *duration) UnmarshalJSON(data []byte) error {
var v interface{}