1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-15 04:57:56 +00:00

Merge pull request #910 from fmuyassarov/taint/feruz

Allow optionally setting node taints defined on the NodeFeatureRule CR
This commit is contained in:
Kubernetes Prow Robot 2022-12-06 07:28:37 -08:00 committed by GitHub
commit 9f68f6c93a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 271 additions and 17 deletions

View file

@ -96,6 +96,8 @@ func initFlags(flagset *flag.FlagSet) *master.Args {
"NB: the label namespace is omitted i.e. the filter is only applied to the name part after '/'.") "NB: the label namespace is omitted i.e. the filter is only applied to the name part after '/'.")
flagset.BoolVar(&args.NoPublish, "no-publish", false, flagset.BoolVar(&args.NoPublish, "no-publish", false,
"Do not publish feature labels") "Do not publish feature labels")
flagset.BoolVar(&args.EnableTaints, "enable-taints", false,
"Enable node tainting feature")
flagset.BoolVar(&args.FeatureRulesController, "featurerules-controller", true, flagset.BoolVar(&args.FeatureRulesController, "featurerules-controller", true,
"Enable controller for NodeFeatureRule objects. Generates node labels based on the rules in these CRs.") "Enable controller for NodeFeatureRule objects. Generates node labels based on the rules in these CRs.")
flagset.IntVar(&args.Port, "port", 8080, flagset.IntVar(&args.Port, "port", 8080,

View file

@ -7,6 +7,12 @@ spec:
# The following feature demonstrates the capabilities of the matchFeatures and # The following feature demonstrates the capabilities of the matchFeatures and
# matchAny matchers. # matchAny matchers.
- name: "my feature rule" - name: "my feature rule"
taints:
- effect: PreferNoSchedule
key: "feature.node.kubernetes.io/special-node"
value: "true"
- effect: NoExecute
key: "feature.node.kubernetes.io/dedicated-node"
labels: labels:
"my-complex-feature": "my-value" "my-complex-feature": "my-value"
# matchFeatures implements a logical AND over feature matchers. # matchFeatures implements a logical AND over feature matchers.

View file

@ -189,6 +189,35 @@ spec:
name: name:
description: Name of the rule. description: Name of the rule.
type: string type: string
taints:
description: Taints to create if the rule matches.
items:
description: The node this Taint is attached to has the "effect"
on any pod that does not tolerate the Taint.
properties:
effect:
description: Required. The effect of the taint on pods
that do not tolerate the taint. Valid effects are NoSchedule,
PreferNoSchedule and NoExecute.
type: string
key:
description: Required. The taint key to be applied to
a node.
type: string
timeAdded:
description: TimeAdded represents the time at which the
taint was added. It is only written for NoExecute taints.
format: date-time
type: string
value:
description: The taint value corresponding to the taint
key.
type: string
required:
- effect
- key
type: object
type: array
vars: vars:
additionalProperties: additionalProperties:
type: string type: string

View file

@ -189,6 +189,35 @@ spec:
name: name:
description: Name of the rule. description: Name of the rule.
type: string type: string
taints:
description: Taints to create if the rule matches.
items:
description: The node this Taint is attached to has the "effect"
on any pod that does not tolerate the Taint.
properties:
effect:
description: Required. The effect of the taint on pods
that do not tolerate the taint. Valid effects are NoSchedule,
PreferNoSchedule and NoExecute.
type: string
key:
description: Required. The taint key to be applied to
a node.
type: string
timeAdded:
description: TimeAdded represents the time at which the
taint was added. It is only written for NoExecute taints.
format: date-time
type: string
value:
description: The taint value corresponding to the taint
key.
type: string
required:
- effect
- key
type: object
type: array
vars: vars:
additionalProperties: additionalProperties:
type: string type: string

View file

@ -99,6 +99,18 @@ Example:
nfd-master -cert-file=/opt/nfd/master.crt -key-file=/opt/nfd/master.key -ca-file=/opt/nfd/ca.crt nfd-master -cert-file=/opt/nfd/master.crt -key-file=/opt/nfd/master.key -ca-file=/opt/nfd/ca.crt
``` ```
### -enable-taints
The `-enable-taints` flag enables/disables node tainting feature of NFD.
Default: *false*
Example:
```bash
nfd-master -enable-taints=true
```
### -key-file ### -key-file
The `-key-file` is one of the three flags (together with `-ca-file` and The `-key-file` is one of the three flags (together with `-ca-file` and

View file

@ -30,8 +30,8 @@ labeling:
## NodeFeatureRule custom resource ## NodeFeatureRule custom resource
`NodeFeatureRule` objects provide an easy way to create vendor or application `NodeFeatureRule` objects provide an easy way to create vendor or application
specific labels. It uses a flexible rule-based mechanism for creating labels specific labels and taints. It uses a flexible rule-based mechanism for creating
based on node feature. labels and optionally taints based on node features.
### A NodeFeatureRule example ### A NodeFeatureRule example
@ -76,6 +76,54 @@ re-labeling delay up to the sleep-interval of nfd-worker (1 minute by default).
See [Label rule format](#label-rule-format) for detailed description of See [Label rule format](#label-rule-format) for detailed description of
available fields and how to write labeling rules. available fields and how to write labeling rules.
### NodeFeatureRule tainting feature
This feature is experimental.
In some circumstances it is desirable keep nodes with specialized hardware away from
running general workload and instead leave them for workloads that need the specialized
hardware. One way to achieve it is to taint the nodes with the specialized hardware
and add corresponding toleration to pods that require the special hardware. NFD
offers node tainting functionality which is disabled by default. User can define
one or more custom taints via the `taints` field of the NodeFeatureRule CR. The
same rule-based mechanism is applied here and the NFD taints only rule matching nodes.
To enable the tainting feature, `--enable-taints` flag needs to be set to `true`.
If the flag `--enable-taints` is set to `false` (i.e. disabled), taints defined in
the NodeFeatureRule CR have no effect and will be ignored by the NFD master.
**NOTE**: Before enabling any taints, make sure to edit nfd-worker daemonset to
tolerate the taints to be created. Otherwise, already running pods that do not
tolerate the taint are evicted immediately from the node including the nfd-worker
pod.
Example NodeFeatureRule with custom taints:
```yaml
apiVersion: nfd.k8s-sigs.io/v1alpha1
kind: NodeFeatureRule
metadata:
name: my-sample-rule-object
spec:
rules:
- name: "my sample taint rule"
taints:
- effect: PreferNoSchedule
key: "feature.node.kubernetes.io/special-node"
value: "true"
- effect: NoExecute
key: "feature.node.kubernetes.io/dedicated-node"
matchFeatures:
- feature: kernel.loadedmodule
matchExpressions:
dummy: {op: Exists}
- feature: kernel.config
matchExpressions:
X86: {op: In, value: ["y"]}
```
In this example, if the `my sample taint rule` rule is matched, `feature.node.kubernetes.io/pci-0300_1d0f.present=true:NoExecute`
and `feature.node.kubernetes.io/cpu-cpuid.ADX:NoExecute` taints are set on the node.
### NodeFeatureRule controller ### NodeFeatureRule controller
@ -365,6 +413,15 @@ details.
labels specified in the `labels` field will override anything labels specified in the `labels` field will override anything
originating from `labelsTemplate`. originating from `labelsTemplate`.
### Taints
*taints* is a list of taint entries and each entry can have `key`, `value` and `effect`,
where the `value` is optional. Effect could be `NoSchedule`, `PreferNoSchedule`
or `NoExecute`. To learn more about the meaning of these effects, check out k8s [documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/).
**NOTE** taints field is not available for the custom rules of nfd-worker and only
for NodeFeatureRule objects.
#### Vars #### Vars
The `.vars` field is a map of values (key-value pairs) to store for subsequent The `.vars` field is a map of values (key-value pairs) to store for subsequent

View file

@ -43,4 +43,7 @@ const (
// WorkerVersionAnnotation is the annotation that holds the version of nfd-worker running on the node // WorkerVersionAnnotation is the annotation that holds the version of nfd-worker running on the node
WorkerVersionAnnotation = AnnotationNs + "/worker.version" WorkerVersionAnnotation = AnnotationNs + "/worker.version"
// NodeTaintsAnnotation is the annotation that holds the taints that nfd-master set on the node
NodeTaintsAnnotation = AnnotationNs + "/taints"
) )

View file

@ -22,8 +22,8 @@ import (
"strings" "strings"
"text/template" "text/template"
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2" "k8s.io/klog/v2"
"sigs.k8s.io/node-feature-discovery/pkg/utils" "sigs.k8s.io/node-feature-discovery/pkg/utils"
) )
@ -32,6 +32,7 @@ import (
type RuleOutput struct { type RuleOutput struct {
Labels map[string]string Labels map[string]string
Vars map[string]string Vars map[string]string
Taints []corev1.Taint
} }
// Execute the rule against a set of input features. // Execute the rule against a set of input features.
@ -94,9 +95,8 @@ func (r *Rule) Execute(features *Features) (RuleOutput, error) {
vars[k] = v vars[k] = v
} }
ret := RuleOutput{Labels: labels, Vars: vars} ret := RuleOutput{Labels: labels, Vars: vars, Taints: r.Taints}
utils.KlogDump(2, fmt.Sprintf("rule %q matched with: ", r.Name), " ", ret) utils.KlogDump(2, fmt.Sprintf("rule %q matched with: ", r.Name), " ", ret)
return ret, nil return ret, nil
} }

View file

@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1 package v1alpha1
import ( import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
) )
@ -120,6 +121,10 @@ type Rule struct {
// +optional // +optional
VarsTemplate string `json:"varsTemplate"` VarsTemplate string `json:"varsTemplate"`
// Taints to create if the rule matches.
// +optional
Taints []corev1.Taint `json:"taints,omitempty"`
// MatchFeatures specifies a set of matcher terms all of which must match. // MatchFeatures specifies a set of matcher terms all of which must match.
// +optional // +optional
MatchFeatures FeatureMatcher `json:"matchFeatures"` MatchFeatures FeatureMatcher `json:"matchFeatures"`

View file

@ -6,6 +6,7 @@
package v1alpha1 package v1alpha1
import ( import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
) )
@ -438,6 +439,13 @@ func (in *Rule) DeepCopyInto(out *Rule) {
(*out)[key] = val (*out)[key] = val
} }
} }
if in.Taints != nil {
in, out := &in.Taints, &out.Taints
*out = make([]v1.Taint, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.MatchFeatures != nil { if in.MatchFeatures != nil {
in, out := &in.MatchFeatures, &out.MatchFeatures in, out := &in.MatchFeatures, &out.MatchFeatures
*out = make(FeatureMatcher, len(*in)) *out = make(FeatureMatcher, len(*in))

View file

@ -39,10 +39,12 @@ import (
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" label "k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest" restclient "k8s.io/client-go/rest"
"k8s.io/klog/v2" "k8s.io/klog/v2"
controller "k8s.io/kubernetes/pkg/controller"
taintutils "k8s.io/kubernetes/pkg/util/taints"
"sigs.k8s.io/node-feature-discovery/pkg/apihelper" "sigs.k8s.io/node-feature-discovery/pkg/apihelper"
nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/v1alpha1" nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/v1alpha1"
@ -72,6 +74,7 @@ type Args struct {
LabelWhiteList utils.RegexpVal LabelWhiteList utils.RegexpVal
FeatureRulesController bool FeatureRulesController bool
NoPublish bool NoPublish bool
EnableTaints bool
Port int Port int
Prune bool Prune bool
VerifyNodeName bool VerifyNodeName bool
@ -294,6 +297,13 @@ func (m *nfdMaster) prune() error {
return fmt.Errorf("failed to prune labels from node %q: %v", node.Name, err) return fmt.Errorf("failed to prune labels from node %q: %v", node.Name, err)
} }
// Prune taints
err = m.setTaints(cli, []corev1.Taint{}, node.Name)
if err != nil {
return fmt.Errorf("failed to prune taints from node %q: %v", node.Name, err)
}
// Prune annotations // Prune annotations
node, err := m.apihelper.GetNode(cli, node.Name) node, err := m.apihelper.GetNode(cli, node.Name)
if err != nil { if err != nil {
@ -392,14 +402,13 @@ func verifyNodeName(cert *x509.Certificate, nodeName string) error {
err := cert.VerifyHostname(nodeName) err := cert.VerifyHostname(nodeName)
if err != nil { if err != nil {
return fmt.Errorf("Certificate %q not valid for node %q: %v", cert.Subject.CommonName, nodeName, err) return fmt.Errorf("certificate %q not valid for node %q: %v", cert.Subject.CommonName, nodeName, err)
} }
return nil return nil
} }
// SetLabels implements LabelerServer // SetLabels implements LabelerServer
func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.SetLabelsReply, error) { func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.SetLabelsReply, error) {
err := authorizeClient(c, m.args.VerifyNodeName, r.NodeName) err := authorizeClient(c, m.args.VerifyNodeName, r.NodeName)
if err != nil { if err != nil {
return &pb.SetLabelsReply{}, err return &pb.SetLabelsReply{}, err
@ -420,7 +429,9 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se
// NOTE: we effectively mangle the request struct by not creating a deep copy of the map // NOTE: we effectively mangle the request struct by not creating a deep copy of the map
rawLabels = r.Labels rawLabels = r.Labels
} }
for k, v := range m.crLabels(r) { crLabels, crTaints := m.processNodeFeatureRule(r)
for k, v := range crLabels {
rawLabels[k] = v rawLabels[k] = v
} }
@ -440,10 +451,101 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se
klog.Errorf("failed to advertise labels: %v", err) klog.Errorf("failed to advertise labels: %v", err)
return &pb.SetLabelsReply{}, err return &pb.SetLabelsReply{}, err
} }
// set taints
var taints []corev1.Taint
if m.args.EnableTaints {
taints = crTaints
}
// Call setTaints even though the feature flag is disabled. This
// ensures that we delete NFD owned stale taints when flag got
// turned off.
err = m.setTaints(cli, taints, r.NodeName)
if err != nil {
return &pb.SetLabelsReply{}, err
}
} }
return &pb.SetLabelsReply{}, nil return &pb.SetLabelsReply{}, nil
} }
// setTaints sets node taints and annotations based on the taints passed via
// nodeFeatureRule custom resorce. If empty list of taints is passed, currently
// NFD owned taints and annotations are removed from the node.
func (m *nfdMaster) setTaints(cli *kubernetes.Clientset, taints []corev1.Taint, nodeName string) error {
// Fetch the node object.
node, err := m.apihelper.GetNode(cli, nodeName)
if err != nil {
return err
}
// De-serialize the taints annotation into corev1.Taint type for comparision below.
oldTaints := []corev1.Taint{}
if val, ok := node.Annotations[nfdv1alpha1.NodeTaintsAnnotation]; ok {
sts := strings.Split(val, ",")
oldTaints, _, err = taintutils.ParseTaints(sts)
if err != nil {
return err
}
}
// Delete old nfd-managed taints that are not found in the set of new taints.
taintsUpdated := false
newNode := node.DeepCopy()
for _, taintToRemove := range oldTaints {
if taintutils.TaintExists(taints, &taintToRemove) {
continue
}
newTaints, removed := taintutils.DeleteTaint(newNode.Spec.Taints, &taintToRemove)
if !removed {
klog.V(1).Infof("taint %q already deleted from node", taintToRemove.ToString())
}
taintsUpdated = taintsUpdated || removed
newNode.Spec.Taints = newTaints
}
// Add new taints found in the set of new taints.
for _, taint := range taints {
var updated bool
newNode, updated, err = taintutils.AddOrUpdateTaint(newNode, &taint)
if err != nil {
return fmt.Errorf("failed to add %q taint on node %v", taint, node.Name)
}
taintsUpdated = taintsUpdated || updated
}
if taintsUpdated {
err = controller.PatchNodeTaints(context.TODO(), cli, nodeName, node, newNode)
if err != nil {
return fmt.Errorf("failed to patch the node %v", node.Name)
}
klog.Infof("updated node %q taints", nodeName)
}
// Update node annotation that holds the taints managed by us
newAnnotations := map[string]string{}
if len(taints) > 0 {
// Serialize the new taints into string and update the annotation
// with that string.
taintStrs := make([]string, 0, len(taints))
for _, taint := range taints {
taintStrs = append(taintStrs, taint.ToString())
}
newAnnotations[nfdv1alpha1.NodeTaintsAnnotation] = strings.Join(taintStrs, ",")
}
patches := createPatches([]string{nfdv1alpha1.NodeTaintsAnnotation}, node.Annotations, newAnnotations, "/metadata/annotations")
if len(patches) > 0 {
err = m.apihelper.PatchNode(cli, node.Name, patches)
if err != nil {
return fmt.Errorf("error while patching node object: %v", err)
}
klog.V(1).Infof("patched node %q annotations for taints", nodeName)
}
return nil
}
func authorizeClient(c context.Context, checkNodeName bool, nodeName string) error { func authorizeClient(c context.Context, checkNodeName bool, nodeName string) error {
if checkNodeName { if checkNodeName {
// Client authorization. // Client authorization.
@ -493,20 +595,21 @@ func (m *nfdMaster) UpdateNodeTopology(c context.Context, r *topologypb.NodeTopo
return &topologypb.NodeTopologyResponse{}, nil return &topologypb.NodeTopologyResponse{}, nil
} }
func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string { func (m *nfdMaster) processNodeFeatureRule(r *pb.SetLabelsRequest) (map[string]string, []corev1.Taint) {
if m.nfdController == nil { if m.nfdController == nil {
return nil return nil, nil
} }
l := make(map[string]string) labels := make(map[string]string)
ruleSpecs, err := m.nfdController.ruleLister.List(labels.Everything()) var taints []corev1.Taint
ruleSpecs, err := m.nfdController.ruleLister.List(label.Everything())
sort.Slice(ruleSpecs, func(i, j int) bool { sort.Slice(ruleSpecs, func(i, j int) bool {
return ruleSpecs[i].Name < ruleSpecs[j].Name return ruleSpecs[i].Name < ruleSpecs[j].Name
}) })
if err != nil { if err != nil {
klog.Errorf("failed to list NodeFeatureRule resources: %v", err) klog.Errorf("failed to list NodeFeatureRule resources: %v", err)
return nil return nil, nil
} }
// Helper struct for rule processing // Helper struct for rule processing
@ -527,9 +630,9 @@ func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string {
klog.Errorf("failed to process Rule %q: %v", rule.Name, err) klog.Errorf("failed to process Rule %q: %v", rule.Name, err)
continue continue
} }
taints = append(taints, ruleOut.Taints...)
for k, v := range ruleOut.Labels { for k, v := range ruleOut.Labels {
l[k] = v labels[k] = v
} }
// Feed back rule output to features map for subsequent rules to match // Feed back rule output to features map for subsequent rules to match
@ -538,7 +641,7 @@ func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string {
} }
} }
return l return labels, taints
} }
// updateNodeFeatures ensures the Kubernetes node object is up to date, // updateNodeFeatures ensures the Kubernetes node object is up to date,