mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2025-03-14 20:56:42 +00:00
Merge pull request #910 from fmuyassarov/taint/feruz
Allow optionally setting node taints defined on the NodeFeatureRule CR
This commit is contained in:
commit
9f68f6c93a
11 changed files with 271 additions and 17 deletions
|
@ -96,6 +96,8 @@ func initFlags(flagset *flag.FlagSet) *master.Args {
|
|||
"NB: the label namespace is omitted i.e. the filter is only applied to the name part after '/'.")
|
||||
flagset.BoolVar(&args.NoPublish, "no-publish", false,
|
||||
"Do not publish feature labels")
|
||||
flagset.BoolVar(&args.EnableTaints, "enable-taints", false,
|
||||
"Enable node tainting feature")
|
||||
flagset.BoolVar(&args.FeatureRulesController, "featurerules-controller", true,
|
||||
"Enable controller for NodeFeatureRule objects. Generates node labels based on the rules in these CRs.")
|
||||
flagset.IntVar(&args.Port, "port", 8080,
|
||||
|
|
|
@ -7,6 +7,12 @@ spec:
|
|||
# The following feature demonstrates the capabilities of the matchFeatures and
|
||||
# matchAny matchers.
|
||||
- name: "my feature rule"
|
||||
taints:
|
||||
- effect: PreferNoSchedule
|
||||
key: "feature.node.kubernetes.io/special-node"
|
||||
value: "true"
|
||||
- effect: NoExecute
|
||||
key: "feature.node.kubernetes.io/dedicated-node"
|
||||
labels:
|
||||
"my-complex-feature": "my-value"
|
||||
# matchFeatures implements a logical AND over feature matchers.
|
||||
|
|
|
@ -189,6 +189,35 @@ spec:
|
|||
name:
|
||||
description: Name of the rule.
|
||||
type: string
|
||||
taints:
|
||||
description: Taints to create if the rule matches.
|
||||
items:
|
||||
description: The node this Taint is attached to has the "effect"
|
||||
on any pod that does not tolerate the Taint.
|
||||
properties:
|
||||
effect:
|
||||
description: Required. The effect of the taint on pods
|
||||
that do not tolerate the taint. Valid effects are NoSchedule,
|
||||
PreferNoSchedule and NoExecute.
|
||||
type: string
|
||||
key:
|
||||
description: Required. The taint key to be applied to
|
||||
a node.
|
||||
type: string
|
||||
timeAdded:
|
||||
description: TimeAdded represents the time at which the
|
||||
taint was added. It is only written for NoExecute taints.
|
||||
format: date-time
|
||||
type: string
|
||||
value:
|
||||
description: The taint value corresponding to the taint
|
||||
key.
|
||||
type: string
|
||||
required:
|
||||
- effect
|
||||
- key
|
||||
type: object
|
||||
type: array
|
||||
vars:
|
||||
additionalProperties:
|
||||
type: string
|
||||
|
|
|
@ -189,6 +189,35 @@ spec:
|
|||
name:
|
||||
description: Name of the rule.
|
||||
type: string
|
||||
taints:
|
||||
description: Taints to create if the rule matches.
|
||||
items:
|
||||
description: The node this Taint is attached to has the "effect"
|
||||
on any pod that does not tolerate the Taint.
|
||||
properties:
|
||||
effect:
|
||||
description: Required. The effect of the taint on pods
|
||||
that do not tolerate the taint. Valid effects are NoSchedule,
|
||||
PreferNoSchedule and NoExecute.
|
||||
type: string
|
||||
key:
|
||||
description: Required. The taint key to be applied to
|
||||
a node.
|
||||
type: string
|
||||
timeAdded:
|
||||
description: TimeAdded represents the time at which the
|
||||
taint was added. It is only written for NoExecute taints.
|
||||
format: date-time
|
||||
type: string
|
||||
value:
|
||||
description: The taint value corresponding to the taint
|
||||
key.
|
||||
type: string
|
||||
required:
|
||||
- effect
|
||||
- key
|
||||
type: object
|
||||
type: array
|
||||
vars:
|
||||
additionalProperties:
|
||||
type: string
|
||||
|
|
|
@ -99,6 +99,18 @@ Example:
|
|||
nfd-master -cert-file=/opt/nfd/master.crt -key-file=/opt/nfd/master.key -ca-file=/opt/nfd/ca.crt
|
||||
```
|
||||
|
||||
### -enable-taints
|
||||
|
||||
The `-enable-taints` flag enables/disables node tainting feature of NFD.
|
||||
|
||||
Default: *false*
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
nfd-master -enable-taints=true
|
||||
```
|
||||
|
||||
### -key-file
|
||||
|
||||
The `-key-file` is one of the three flags (together with `-ca-file` and
|
||||
|
|
|
@ -30,8 +30,8 @@ labeling:
|
|||
## NodeFeatureRule custom resource
|
||||
|
||||
`NodeFeatureRule` objects provide an easy way to create vendor or application
|
||||
specific labels. It uses a flexible rule-based mechanism for creating labels
|
||||
based on node feature.
|
||||
specific labels and taints. It uses a flexible rule-based mechanism for creating
|
||||
labels and optionally taints based on node features.
|
||||
|
||||
### A NodeFeatureRule example
|
||||
|
||||
|
@ -76,6 +76,54 @@ re-labeling delay up to the sleep-interval of nfd-worker (1 minute by default).
|
|||
|
||||
See [Label rule format](#label-rule-format) for detailed description of
|
||||
available fields and how to write labeling rules.
|
||||
### NodeFeatureRule tainting feature
|
||||
|
||||
This feature is experimental.
|
||||
|
||||
In some circumstances it is desirable keep nodes with specialized hardware away from
|
||||
running general workload and instead leave them for workloads that need the specialized
|
||||
hardware. One way to achieve it is to taint the nodes with the specialized hardware
|
||||
and add corresponding toleration to pods that require the special hardware. NFD
|
||||
offers node tainting functionality which is disabled by default. User can define
|
||||
one or more custom taints via the `taints` field of the NodeFeatureRule CR. The
|
||||
same rule-based mechanism is applied here and the NFD taints only rule matching nodes.
|
||||
|
||||
To enable the tainting feature, `--enable-taints` flag needs to be set to `true`.
|
||||
If the flag `--enable-taints` is set to `false` (i.e. disabled), taints defined in
|
||||
the NodeFeatureRule CR have no effect and will be ignored by the NFD master.
|
||||
|
||||
**NOTE**: Before enabling any taints, make sure to edit nfd-worker daemonset to
|
||||
tolerate the taints to be created. Otherwise, already running pods that do not
|
||||
tolerate the taint are evicted immediately from the node including the nfd-worker
|
||||
pod.
|
||||
|
||||
Example NodeFeatureRule with custom taints:
|
||||
|
||||
```yaml
|
||||
apiVersion: nfd.k8s-sigs.io/v1alpha1
|
||||
kind: NodeFeatureRule
|
||||
metadata:
|
||||
name: my-sample-rule-object
|
||||
spec:
|
||||
rules:
|
||||
- name: "my sample taint rule"
|
||||
taints:
|
||||
- effect: PreferNoSchedule
|
||||
key: "feature.node.kubernetes.io/special-node"
|
||||
value: "true"
|
||||
- effect: NoExecute
|
||||
key: "feature.node.kubernetes.io/dedicated-node"
|
||||
matchFeatures:
|
||||
- feature: kernel.loadedmodule
|
||||
matchExpressions:
|
||||
dummy: {op: Exists}
|
||||
- feature: kernel.config
|
||||
matchExpressions:
|
||||
X86: {op: In, value: ["y"]}
|
||||
```
|
||||
|
||||
In this example, if the `my sample taint rule` rule is matched, `feature.node.kubernetes.io/pci-0300_1d0f.present=true:NoExecute`
|
||||
and `feature.node.kubernetes.io/cpu-cpuid.ADX:NoExecute` taints are set on the node.
|
||||
|
||||
### NodeFeatureRule controller
|
||||
|
||||
|
@ -365,6 +413,15 @@ details.
|
|||
labels specified in the `labels` field will override anything
|
||||
originating from `labelsTemplate`.
|
||||
|
||||
### Taints
|
||||
|
||||
*taints* is a list of taint entries and each entry can have `key`, `value` and `effect`,
|
||||
where the `value` is optional. Effect could be `NoSchedule`, `PreferNoSchedule`
|
||||
or `NoExecute`. To learn more about the meaning of these effects, check out k8s [documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/).
|
||||
|
||||
**NOTE** taints field is not available for the custom rules of nfd-worker and only
|
||||
for NodeFeatureRule objects.
|
||||
|
||||
#### Vars
|
||||
|
||||
The `.vars` field is a map of values (key-value pairs) to store for subsequent
|
||||
|
|
|
@ -43,4 +43,7 @@ const (
|
|||
|
||||
// WorkerVersionAnnotation is the annotation that holds the version of nfd-worker running on the node
|
||||
WorkerVersionAnnotation = AnnotationNs + "/worker.version"
|
||||
|
||||
// NodeTaintsAnnotation is the annotation that holds the taints that nfd-master set on the node
|
||||
NodeTaintsAnnotation = AnnotationNs + "/taints"
|
||||
)
|
||||
|
|
|
@ -22,8 +22,8 @@ import (
|
|||
"strings"
|
||||
"text/template"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"sigs.k8s.io/node-feature-discovery/pkg/utils"
|
||||
)
|
||||
|
||||
|
@ -32,6 +32,7 @@ import (
|
|||
type RuleOutput struct {
|
||||
Labels map[string]string
|
||||
Vars map[string]string
|
||||
Taints []corev1.Taint
|
||||
}
|
||||
|
||||
// Execute the rule against a set of input features.
|
||||
|
@ -94,9 +95,8 @@ func (r *Rule) Execute(features *Features) (RuleOutput, error) {
|
|||
vars[k] = v
|
||||
}
|
||||
|
||||
ret := RuleOutput{Labels: labels, Vars: vars}
|
||||
ret := RuleOutput{Labels: labels, Vars: vars, Taints: r.Taints}
|
||||
utils.KlogDump(2, fmt.Sprintf("rule %q matched with: ", r.Name), " ", ret)
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
package v1alpha1
|
||||
|
||||
import (
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
|
@ -120,6 +121,10 @@ type Rule struct {
|
|||
// +optional
|
||||
VarsTemplate string `json:"varsTemplate"`
|
||||
|
||||
// Taints to create if the rule matches.
|
||||
// +optional
|
||||
Taints []corev1.Taint `json:"taints,omitempty"`
|
||||
|
||||
// MatchFeatures specifies a set of matcher terms all of which must match.
|
||||
// +optional
|
||||
MatchFeatures FeatureMatcher `json:"matchFeatures"`
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
package v1alpha1
|
||||
|
||||
import (
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
)
|
||||
|
||||
|
@ -438,6 +439,13 @@ func (in *Rule) DeepCopyInto(out *Rule) {
|
|||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.Taints != nil {
|
||||
in, out := &in.Taints, &out.Taints
|
||||
*out = make([]v1.Taint, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
if in.MatchFeatures != nil {
|
||||
in, out := &in.MatchFeatures, &out.MatchFeatures
|
||||
*out = make(FeatureMatcher, len(*in))
|
||||
|
|
|
@ -39,10 +39,12 @@ import (
|
|||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
label "k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
"k8s.io/klog/v2"
|
||||
controller "k8s.io/kubernetes/pkg/controller"
|
||||
taintutils "k8s.io/kubernetes/pkg/util/taints"
|
||||
|
||||
"sigs.k8s.io/node-feature-discovery/pkg/apihelper"
|
||||
nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/pkg/apis/nfd/v1alpha1"
|
||||
|
@ -72,6 +74,7 @@ type Args struct {
|
|||
LabelWhiteList utils.RegexpVal
|
||||
FeatureRulesController bool
|
||||
NoPublish bool
|
||||
EnableTaints bool
|
||||
Port int
|
||||
Prune bool
|
||||
VerifyNodeName bool
|
||||
|
@ -294,6 +297,13 @@ func (m *nfdMaster) prune() error {
|
|||
return fmt.Errorf("failed to prune labels from node %q: %v", node.Name, err)
|
||||
}
|
||||
|
||||
// Prune taints
|
||||
err = m.setTaints(cli, []corev1.Taint{}, node.Name)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to prune taints from node %q: %v", node.Name, err)
|
||||
}
|
||||
|
||||
// Prune annotations
|
||||
node, err := m.apihelper.GetNode(cli, node.Name)
|
||||
if err != nil {
|
||||
|
@ -392,14 +402,13 @@ func verifyNodeName(cert *x509.Certificate, nodeName string) error {
|
|||
|
||||
err := cert.VerifyHostname(nodeName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Certificate %q not valid for node %q: %v", cert.Subject.CommonName, nodeName, err)
|
||||
return fmt.Errorf("certificate %q not valid for node %q: %v", cert.Subject.CommonName, nodeName, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetLabels implements LabelerServer
|
||||
func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.SetLabelsReply, error) {
|
||||
|
||||
err := authorizeClient(c, m.args.VerifyNodeName, r.NodeName)
|
||||
if err != nil {
|
||||
return &pb.SetLabelsReply{}, err
|
||||
|
@ -420,7 +429,9 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se
|
|||
// NOTE: we effectively mangle the request struct by not creating a deep copy of the map
|
||||
rawLabels = r.Labels
|
||||
}
|
||||
for k, v := range m.crLabels(r) {
|
||||
crLabels, crTaints := m.processNodeFeatureRule(r)
|
||||
|
||||
for k, v := range crLabels {
|
||||
rawLabels[k] = v
|
||||
}
|
||||
|
||||
|
@ -440,10 +451,101 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se
|
|||
klog.Errorf("failed to advertise labels: %v", err)
|
||||
return &pb.SetLabelsReply{}, err
|
||||
}
|
||||
|
||||
// set taints
|
||||
var taints []corev1.Taint
|
||||
if m.args.EnableTaints {
|
||||
taints = crTaints
|
||||
}
|
||||
|
||||
// Call setTaints even though the feature flag is disabled. This
|
||||
// ensures that we delete NFD owned stale taints when flag got
|
||||
// turned off.
|
||||
err = m.setTaints(cli, taints, r.NodeName)
|
||||
if err != nil {
|
||||
return &pb.SetLabelsReply{}, err
|
||||
}
|
||||
}
|
||||
return &pb.SetLabelsReply{}, nil
|
||||
}
|
||||
|
||||
// setTaints sets node taints and annotations based on the taints passed via
|
||||
// nodeFeatureRule custom resorce. If empty list of taints is passed, currently
|
||||
// NFD owned taints and annotations are removed from the node.
|
||||
func (m *nfdMaster) setTaints(cli *kubernetes.Clientset, taints []corev1.Taint, nodeName string) error {
|
||||
// Fetch the node object.
|
||||
node, err := m.apihelper.GetNode(cli, nodeName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// De-serialize the taints annotation into corev1.Taint type for comparision below.
|
||||
oldTaints := []corev1.Taint{}
|
||||
if val, ok := node.Annotations[nfdv1alpha1.NodeTaintsAnnotation]; ok {
|
||||
sts := strings.Split(val, ",")
|
||||
oldTaints, _, err = taintutils.ParseTaints(sts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Delete old nfd-managed taints that are not found in the set of new taints.
|
||||
taintsUpdated := false
|
||||
newNode := node.DeepCopy()
|
||||
for _, taintToRemove := range oldTaints {
|
||||
if taintutils.TaintExists(taints, &taintToRemove) {
|
||||
continue
|
||||
}
|
||||
|
||||
newTaints, removed := taintutils.DeleteTaint(newNode.Spec.Taints, &taintToRemove)
|
||||
if !removed {
|
||||
klog.V(1).Infof("taint %q already deleted from node", taintToRemove.ToString())
|
||||
}
|
||||
taintsUpdated = taintsUpdated || removed
|
||||
newNode.Spec.Taints = newTaints
|
||||
}
|
||||
|
||||
// Add new taints found in the set of new taints.
|
||||
for _, taint := range taints {
|
||||
var updated bool
|
||||
newNode, updated, err = taintutils.AddOrUpdateTaint(newNode, &taint)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to add %q taint on node %v", taint, node.Name)
|
||||
}
|
||||
taintsUpdated = taintsUpdated || updated
|
||||
}
|
||||
|
||||
if taintsUpdated {
|
||||
err = controller.PatchNodeTaints(context.TODO(), cli, nodeName, node, newNode)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to patch the node %v", node.Name)
|
||||
}
|
||||
klog.Infof("updated node %q taints", nodeName)
|
||||
}
|
||||
|
||||
// Update node annotation that holds the taints managed by us
|
||||
newAnnotations := map[string]string{}
|
||||
if len(taints) > 0 {
|
||||
// Serialize the new taints into string and update the annotation
|
||||
// with that string.
|
||||
taintStrs := make([]string, 0, len(taints))
|
||||
for _, taint := range taints {
|
||||
taintStrs = append(taintStrs, taint.ToString())
|
||||
}
|
||||
newAnnotations[nfdv1alpha1.NodeTaintsAnnotation] = strings.Join(taintStrs, ",")
|
||||
}
|
||||
|
||||
patches := createPatches([]string{nfdv1alpha1.NodeTaintsAnnotation}, node.Annotations, newAnnotations, "/metadata/annotations")
|
||||
if len(patches) > 0 {
|
||||
err = m.apihelper.PatchNode(cli, node.Name, patches)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while patching node object: %v", err)
|
||||
}
|
||||
klog.V(1).Infof("patched node %q annotations for taints", nodeName)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func authorizeClient(c context.Context, checkNodeName bool, nodeName string) error {
|
||||
if checkNodeName {
|
||||
// Client authorization.
|
||||
|
@ -493,20 +595,21 @@ func (m *nfdMaster) UpdateNodeTopology(c context.Context, r *topologypb.NodeTopo
|
|||
return &topologypb.NodeTopologyResponse{}, nil
|
||||
}
|
||||
|
||||
func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string {
|
||||
func (m *nfdMaster) processNodeFeatureRule(r *pb.SetLabelsRequest) (map[string]string, []corev1.Taint) {
|
||||
if m.nfdController == nil {
|
||||
return nil
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
l := make(map[string]string)
|
||||
ruleSpecs, err := m.nfdController.ruleLister.List(labels.Everything())
|
||||
labels := make(map[string]string)
|
||||
var taints []corev1.Taint
|
||||
ruleSpecs, err := m.nfdController.ruleLister.List(label.Everything())
|
||||
sort.Slice(ruleSpecs, func(i, j int) bool {
|
||||
return ruleSpecs[i].Name < ruleSpecs[j].Name
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
klog.Errorf("failed to list NodeFeatureRule resources: %v", err)
|
||||
return nil
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Helper struct for rule processing
|
||||
|
@ -527,9 +630,9 @@ func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string {
|
|||
klog.Errorf("failed to process Rule %q: %v", rule.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
taints = append(taints, ruleOut.Taints...)
|
||||
for k, v := range ruleOut.Labels {
|
||||
l[k] = v
|
||||
labels[k] = v
|
||||
}
|
||||
|
||||
// Feed back rule output to features map for subsequent rules to match
|
||||
|
@ -538,7 +641,7 @@ func (m *nfdMaster) crLabels(r *pb.SetLabelsRequest) map[string]string {
|
|||
}
|
||||
}
|
||||
|
||||
return l
|
||||
return labels, taints
|
||||
}
|
||||
|
||||
// updateNodeFeatures ensures the Kubernetes node object is up to date,
|
||||
|
|
Loading…
Add table
Reference in a new issue