1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-14 20:56:42 +00:00

Merge pull request #910 from fmuyassarov/taint/feruz

Allow optionally setting node taints defined on the NodeFeatureRule CR
This commit is contained in:
Kubernetes Prow Robot 2022-12-06 07:28:37 -08:00 committed by GitHub
commit 9f68f6c93a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 271 additions and 17 deletions

View file

@ -96,6 +96,8 @@ func initFlags(flagset *flag.FlagSet) *master.Args {
"NB: the label namespace is omitted i.e. the filter is only applied to the name part after '/'.")
flagset.BoolVar(&args.NoPublish, "no-publish", false,
"Do not publish feature labels")
flagset.BoolVar(&args.EnableTaints, "enable-taints", false,
"Enable node tainting feature")
flagset.BoolVar(&args.FeatureRulesController, "featurerules-controller", true,
"Enable controller for NodeFeatureRule objects. Generates node labels based on the rules in these CRs.")
flagset.IntVar(&args.Port, "port", 8080,

View file

@ -7,6 +7,12 @@ spec:
# The following feature demonstrates the capabilities of the matchFeatures and
# matchAny matchers.
- name: "my feature rule"
taints:
- effect: PreferNoSchedule
key: "feature.node.kubernetes.io/special-node"
value: "true"
- effect: NoExecute
key: "feature.node.kubernetes.io/dedicated-node"
labels:
"my-complex-feature": "my-value"
# matchFeatures implements a logical AND over feature matchers.

View file

@ -189,6 +189,35 @@ spec:
name:
description: Name of the rule.
type: string
taints:
description: Taints to create if the rule matches.
items:
description: The node this Taint is attached to has the "effect"
on any pod that does not tolerate the Taint.
properties:
effect:
description: Required. The effect of the taint on pods
that do not tolerate the taint. Valid effects are NoSchedule,
PreferNoSchedule and NoExecute.
type: string
key:
description: Required. The taint key to be applied to
a node.
type: string
timeAdded:
description: TimeAdded represents the time at which the
taint was added. It is only written for NoExecute taints.
format: date-time
type: string
value:
description: The taint value corresponding to the taint
key.
type: string
required:
- effect
- key
type: object
type: array
vars:
additionalProperties:
type: string

View file

@ -189,6 +189,35 @@ spec:
name:
description: Name of the rule.
type: string
taints:
description: Taints to create if the rule matches.
items:
description: The node this Taint is attached to has the "effect"
on any pod that does not tolerate the Taint.
properties:
effect:
description: Required. The effect of the taint on pods
that do not tolerate the taint. Valid effects are NoSchedule,
PreferNoSchedule and NoExecute.
type: string
key:
description: Required. The taint key to be applied to
a node.
type: string
timeAdded:
description: TimeAdded represents the time at which the
taint was added. It is only written for NoExecute taints.
format: date-time
type: string
value:
description: The taint value corresponding to the taint
key.
type: string
required:
- effect
- key
type: object
type: array
vars:
additionalProperties:
type: string

View file

@ -99,6 +99,18 @@ Example:
nfd-master -cert-file=/opt/nfd/master.crt -key-file=/opt/nfd/master.key -ca-file=/opt/nfd/ca.crt
```
### -enable-taints
The `-enable-taints` flag enables/disables node tainting feature of NFD.
Default: *false*
Example:
```bash
nfd-master -enable-taints=true
```
### -key-file
The `-key-file` is one of the three flags (together with `-ca-file` and

View file

@ -30,8 +30,8 @@ labeling:
## NodeFeatureRule custom resource
`NodeFeatureRule` objects provide an easy way to create vendor or application
specific labels. It uses a flexible rule-based mechanism for creating labels
based on node feature.
specific labels and taints. It uses a flexible rule-based mechanism for creating
labels and optionally taints based on node features.
### A NodeFeatureRule example
@ -76,6 +76,54 @@ re-labeling delay up to the sleep-interval of nfd-worker (1 minute by default).
See [Label rule format](#label-rule-format) for detailed description of
available fields and how to write labeling rules.
### NodeFeatureRule tainting feature
This feature is experimental.
In some circumstances it is desirable keep nodes with specialized hardware away from
running general workload and instead leave them for workloads that need the specialized
hardware. One way to achieve it is to taint the nodes with the specialized hardware
and add corresponding toleration to pods that require the special hardware. NFD
offers node tainting functionality which is disabled by default. User can define
one or more custom taints via the `taints` field of the NodeFeatureRule CR. The
same rule-based mechanism is applied here and the NFD taints only rule matching nodes.
To enable the tainting feature, `--enable-taints` flag needs to be set to `true`.
If the flag `--enable-taints` is set to `false` (i.e. disabled), taints defined in
the NodeFeatureRule CR have no effect and will be ignored by the NFD master.
**NOTE**: Before enabling any taints, make sure to edit nfd-worker daemonset to
tolerate the taints to be created. Otherwise, already running pods that do not
tolerate the taint are evicted immediately from the node including the nfd-worker
pod.
Example NodeFeatureRule with custom taints:
```yaml
apiVersion: nfd.k8s-sigs.io/v1alpha1
kind: NodeFeatureRule
metadata:
name: my-sample-rule-object
spec:
rules:
- name: "my sample taint rule"
taints:
- effect: PreferNoSchedule
key: "feature.node.kubernetes.io/special-node"
value: "true"
- effect: NoExecute
key: "feature.node.kubernetes.io/dedicated-node"
matchFeatures:
- feature: kernel.loadedmodule
matchExpressions:
dummy: {op: Exists}
- feature: kernel.config
matchExpressions:
X86: {op: In, value: ["y"]}
```
In this example, if the `my sample taint rule` rule is matched, `feature.node.kubernetes.io/pci-0300_1d0f.present=true:NoExecute`
and `feature.node.kubernetes.io/cpu-cpuid.ADX:NoExecute` taints are set on the node.
### NodeFeatureRule controller
@ -365,6 +413,15 @@ details.
labels specified in the `labels` field will override anything
originating from `labelsTemplate`.
### Taints
*taints* is a list of taint entries and each entry can have `key`, `value` and `effect`,
where the `value` is optional. Effect could be `NoSchedule`, `PreferNoSchedule`
or `NoExecute`. To learn more about the meaning of these effects, check out k8s [documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/).
**NOTE** taints field is not available for the custom rules of nfd-worker and only
for NodeFeatureRule objects.
#### Vars
The `.vars` field is a map of values (key-value pairs) to store for subsequent

View file

@ -43,4 +43,7 @@ const (
// WorkerVersionAnnotation is the annotation that holds the version of nfd-worker running on the node
WorkerVersionAnnotation = AnnotationNs + "/worker.version"
// NodeTaintsAnnotation is the annotation that holds the taints that nfd-master set on the node
NodeTaintsAnnotation = AnnotationNs + "/taints"
)

View file

@ -22,8 +22,8 @@ import (
"strings"
"text/template"
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"sigs.k8s.io/node-feature-discovery/pkg/utils"
)
@ -32,6 +32,7 @@ import (
type RuleOutput struct {
Labels map[string]string
Vars map[string]string
Taints []corev1.Taint
}
// Execute the rule against a set of input features.
@ -94,9 +95,8 @@ func (r *Rule) Execute(features *Features) (RuleOutput, error) {
vars[k] = v
}
ret := RuleOutput{Labels: labels, Vars: vars}
ret := RuleOutput{Labels: labels, Vars: vars, Taints: r.Taints}
utils.KlogDump(2, fmt.Sprintf("rule %q matched with: ", r.Name), " ", ret)
return ret, nil
}

View file

@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@ -120,6 +121,10 @@ type Rule struct {
// +optional
VarsTemplate string `json:"varsTemplate"`
// Taints to create if the rule matches.
// +optional
Taints []corev1.Taint `json:"taints,omitempty"`
// MatchFeatures specifies a set of matcher terms all of which must match.
// +optional
MatchFeatures FeatureMatcher `json:"matchFeatures"`

View file

@ -6,6 +6,7 @@
package v1alpha1
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
)
@ -438,6 +439,13 @@ func (in *Rule) DeepCopyInto(out *Rule) {
(*out)[key] = val
}
}
if in.Taints != nil {
in, out := &in.Taints, &out.Taints
*out = make([]v1.Taint, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.MatchFeatures != nil {
in, out := &in.MatchFeatures, &out.MatchFeatures
*out = make(FeatureMatcher, len(*in))

View file

@ -39,10 +39,12 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
label "k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
"k8s.io/klog/v2"
controller "k8s.io/kubernetes/pkg/controller"
taintutils "k8s.io/kubernetes/pkg/util/taints"
"sigs.k8s.io/node-feature-discovery/pkg/apihelper"
nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/v1alpha1"
@ -72,6 +74,7 @@ type Args struct {
LabelWhiteList utils.RegexpVal
FeatureRulesController bool
NoPublish bool
EnableTaints bool
Port int
Prune bool
VerifyNodeName bool
@ -294,6 +297,13 @@ func (m *nfdMaster) prune() error {
return fmt.Errorf("failed to prune labels from node %q: %v", node.Name, err)
}
// Prune taints
err = m.setTaints(cli, []corev1.Taint{}, node.Name)
if err != nil {
return fmt.Errorf("failed to prune taints from node %q: %v", node.Name, err)
}
// Prune annotations
node, err := m.apihelper.GetNode(cli, node.Name)
if err != nil {
@ -392,14 +402,13 @@ func verifyNodeName(cert *x509.Certificate, nodeName string) error {
err := cert.VerifyHostname(nodeName)
if err != nil {
return fmt.Errorf("Certificate %q not valid for node %q: %v", cert.Subject.CommonName, nodeName, err)
return fmt.Errorf("certificate %q not valid for node %q: %v", cert.Subject.CommonName, nodeName, err)
}
return nil
}
// SetLabels implements LabelerServer
func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.SetLabelsReply, error) {
err := authorizeClient(c, m.args.VerifyNodeName, r.NodeName)
if err != nil {
return &pb.SetLabelsReply{}, err
@ -420,7 +429,9 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se
// NOTE: we effectively mangle the request struct by not creating a deep copy of the map
rawLabels = r.Labels
}
for k, v := range m.crLabels(r) {
crLabels, crTaints := m.processNodeFeatureRule(r)
for k, v := range crLabels {
rawLabels[k] = v
}
@ -440,10 +451,101 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se
klog.Errorf("failed to advertise labels: %v", err)
return &pb.SetLabelsReply{}, err
}
// set taints
var taints []corev1.Taint
if m.args.EnableTaints {
taints = crTaints
}
// Call setTaints even though the feature flag is disabled. This
// ensures that we delete NFD owned stale taints when flag got
// turned off.
err = m.setTaints(cli, taints, r.NodeName)
if err != nil {
return &pb.SetLabelsReply{}, err
}
}
return &pb.SetLabelsReply{}, nil
}
// setTaints sets node taints and annotations based on the taints passed via
// nodeFeatureRule custom resorce. If empty list of taints is passed, currently
// NFD owned taints and annotations are removed from the node.
func (m *nfdMaster) setTaints(cli *kubernetes.Clientset, taints []corev1.Taint, nodeName string) error {
// Fetch the node object.
node, err := m.apihelper.GetNode(cli, nodeName)
if err != nil {
return err
}
// De-serialize the taints annotation into corev1.Taint type for comparision below.
oldTaints := []corev1.Taint{}
if val, ok := node.Annotations[nfdv1alpha1.NodeTaintsAnnotation]; ok {
sts := strings.Split(val, ",")
oldTaints, _, err = taintutils.ParseTaints(sts)
if err != nil {
return err
}
}
// Delete old nfd-managed taints that are not found in the set of new taints.
taintsUpdated := false
newNode := node.DeepCopy()
for _, taintToRemove := range oldTaints {
if taintutils.TaintExists(taints, &taintToRemove) {
continue
}
newTaints, removed := taintutils.DeleteTaint(newNode.Spec.Taints, &taintToRemove)
if !removed {
klog.V(1).Infof("taint %q already deleted from node", taintToRemove.ToString())
}
taintsUpdated = taintsUpdated || removed
newNode.Spec.Taints = newTaints
}
// Add new taints found in the set of new taints.
for _, taint := range taints {
var updated bool
newNode, updated, err = taintutils.AddOrUpdateTaint(newNode, &taint)
if err != nil {
return fmt.Errorf("failed to add %q taint on node %v", taint, node.Name)
}
taintsUpdated = taintsUpdated || updated
}
if taintsUpdated {
err = controller.PatchNodeTaints(context.TODO(), cli, nodeName, node, newNode)
if err != nil {
return fmt.Errorf("failed to patch the node %v", node.Name)
}
klog.Infof("updated node %q taints", nodeName)
}
// Update node annotation that holds the taints managed by us
newAnnotations := map[string]string{}
if len(taints) > 0 {
// Serialize the new taints into string and update the annotation
// with that string.
taintStrs := make([]string, 0, len(taints))
for _, taint := range taints {
taintStrs = append(taintStrs, taint.ToString())
}
newAnnotations[nfdv1alpha1.NodeTaintsAnnotation] = strings.Join(taintStrs, ",")
}
patches := createPatches([]string{nfdv1alpha1.NodeTaintsAnnotation}, node.Annotations, newAnnotations, "/metadata/annotations")
if len(patches) > 0 {
err = m.apihelper.PatchNode(cli, node.Name, patches)
if err != nil {
return fmt.Errorf("error while patching node object: %v", err)
}
klog.V(1).Infof("patched node %q annotations for taints", nodeName)
}
return nil
}
func authorizeClient(c context.Context, checkNodeName bool, nodeName string) error {
if checkNodeName {
// Client authorization.
@ -493,20 +595,21 @@ func (m *nfdMaster) UpdateNodeTopology(c context.Context, r *topologypb.NodeTopo
return &topologypb.NodeTopologyResponse{}, nil
}
func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string {
func (m *nfdMaster) processNodeFeatureRule(r *pb.SetLabelsRequest) (map[string]string, []corev1.Taint) {
if m.nfdController == nil {
return nil
return nil, nil
}
l := make(map[string]string)
ruleSpecs, err := m.nfdController.ruleLister.List(labels.Everything())
labels := make(map[string]string)
var taints []corev1.Taint
ruleSpecs, err := m.nfdController.ruleLister.List(label.Everything())
sort.Slice(ruleSpecs, func(i, j int) bool {
return ruleSpecs[i].Name < ruleSpecs[j].Name
})
if err != nil {
klog.Errorf("failed to list NodeFeatureRule resources: %v", err)
return nil
return nil, nil
}
// Helper struct for rule processing
@ -527,9 +630,9 @@ func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string {
klog.Errorf("failed to process Rule %q: %v", rule.Name, err)
continue
}
taints = append(taints, ruleOut.Taints...)
for k, v := range ruleOut.Labels {
l[k] = v
labels[k] = v
}
// Feed back rule output to features map for subsequent rules to match
@ -538,7 +641,7 @@ func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string {
}
}
return l
return labels, taints
}
// updateNodeFeatures ensures the Kubernetes node object is up to date,