1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2024-12-14 11:57:51 +00:00

Create extended resources with NodeFeatureRule

Add support for management of Extended Resources via the
NodeFeatureRule CRD API.

There are usage scenarios where users want to advertise features
as extended resources instead of labels (or annotations).

This patch enables the discovery of extended resources, via annotation
and patch of node.status.capacity and node.status.allocatable. By using
the NodeFeatureRule API.

Co-authored-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
Co-authored-by: Markus Lehtonen <markus.lehtonen@intel.com>
Co-authored-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
This commit is contained in:
Fabiano Fidêncio 2023-03-10 11:39:41 +01:00 committed by Carlos Eduardo Arango Gutierrez
parent ec014f118b
commit 250aea4741
No known key found for this signature in database
GPG key ID: 5697017E44D90737
15 changed files with 312 additions and 47 deletions

View file

@ -155,6 +155,11 @@ spec:
description: Rule defines a rule for node customization such as
labeling.
properties:
extendedResources:
additionalProperties:
type: string
description: ExtendedResources to create if the rule matches.
type: object
labels:
additionalProperties:
type: string

View file

@ -7,6 +7,7 @@ rules:
- ""
resources:
- nodes
- nodes/status
verbs:
- get
- patch

View file

@ -155,6 +155,11 @@ spec:
description: Rule defines a rule for node customization such as
labeling.
properties:
extendedResources:
additionalProperties:
type: string
description: ExtendedResources to create if the rule matches.
type: object
labels:
additionalProperties:
type: string

View file

@ -10,9 +10,7 @@ rules:
- ""
resources:
- nodes
{{- if .Values.master.resourceLabels | empty | not }}
- nodes/status
{{- end }}
verbs:
- get
- patch

View file

@ -485,7 +485,7 @@ details.
labels specified in the `labels` field will override anything
originating from `labelsTemplate`.
### Taints
#### Taints
*taints* is a list of taint entries and each entry can have `key`, `value` and `effect`,
where the `value` is optional. Effect could be `NoSchedule`, `PreferNoSchedule`
@ -501,6 +501,65 @@ rules to use. In other words, these are variables that are not advertised as
node labels. See [backreferences](#backreferences) for more details on the
usage of vars.
#### Extended resources
The `.extendedResources` field is a list of extended resources to advertise.
See [extended resources](#extended-resources) for more details.
Take this rule as a referential example:
```yaml
apiVersion: nfd.k8s-sigs.io/v1alpha1
kind: NodeFeatureRule
metadata:
name: my-extended-resource-rule
spec:
rules:
- name: "my extended resource rule"
extendedResources:
vendor.io/dynamic: "@kernel.version.major"
vendor.io/static: "123"
matchFeatures:
- feature: kernel.version
matchExpressions:
major: {op: Exists}
```
The extended resource `vendor.io/dynamic` is defined in the form `@feature.attribute`.
The value of the extended resource will be the value of the attribute `major`
of the feature `kernel.version`.
The `@<feature-name>.<element-name>` format can be used to inject values of
detected features to the extended resource. See
[available features](#available-features) for possible values to use. Note that
the value must be eligible as a
Kubernetes resource quantity.
This will yield into the following node status:
```yaml
allocatable:
...
vendor.io/dynamic: "5"
vendor.io/static: "123"
...
capacity:
...
vendor.io/dynamic: "5"
vendor.io/static: "123"
...
```
There are some limitations to the namespace part (i.e. prefix)/ of the Extended
Resources names:
- `kubernetes.io/` and its sub-namespaces (like `sub.ns.kubernetes.io/`) cannot
generally be used
- the only exception is `feature.node.kubernetes.io/` and its sub-namespaces
(like `sub.ns.feature.node.kubernetes.io`)
- unprefixed names will get prefixed with `feature.node.kubernetes.io/`
automatically (e.g. `foo` becomes `feature.node.kubernetes.io/foo`)
#### Vars template
The `.varsTemplate` field specifies a text template for dynamically creating

View file

@ -14,3 +14,18 @@ spec:
- feature: kernel.config
matchExpressions:
X86: {op: In, value: ["y"]}
---
apiVersion: nfd.k8s-sigs.io/v1alpha1
kind: NodeFeatureRule
metadata:
name: my-sample-extened-resource
spec:
rules:
- name: "my sample rule"
extendedResources:
vendor.io/dynamic: "@kernel.version.major"
vendor.io/static: "123"
matchFeatures:
- feature: kernel.version
matchExpressions:
major: {op: Exists}

View file

@ -38,6 +38,12 @@ const (
// AnnotationNs namespace for all NFD-related annotations.
AnnotationNs = "nfd.node.kubernetes.io"
// ExtendedResourceNs is the namespace for extended resources.
ExtendedResourceNs = "feature.node.kubernetes.io"
// ExtendedResourceSubNsSuffix is the suffix for allowed extended resources sub-namespaces.
ExtendedResourceSubNsSuffix = "." + ExtendedResourceNs
// ExtendedResourceAnnotation is the annotation that holds all extended resources managed by NFD.
ExtendedResourceAnnotation = AnnotationNs + "/extended-resources"

View file

@ -30,13 +30,15 @@ import (
// RuleOutput contains the output out rule execution.
// +k8s:deepcopy-gen=false
type RuleOutput struct {
Labels map[string]string
Vars map[string]string
Taints []corev1.Taint
ExtendedResources map[string]string
Labels map[string]string
Vars map[string]string
Taints []corev1.Taint
}
// Execute the rule against a set of input features.
func (r *Rule) Execute(features *Features) (RuleOutput, error) {
extendedResources := make(map[string]string)
labels := make(map[string]string)
vars := make(map[string]string)
@ -88,6 +90,10 @@ func (r *Rule) Execute(features *Features) (RuleOutput, error) {
}
}
for k, v := range r.ExtendedResources {
extendedResources[k] = v
}
for k, v := range r.Labels {
labels[k] = v
}
@ -95,7 +101,7 @@ func (r *Rule) Execute(features *Features) (RuleOutput, error) {
vars[k] = v
}
ret := RuleOutput{Labels: labels, Vars: vars, Taints: r.Taints}
ret := RuleOutput{ExtendedResources: extendedResources, Labels: labels, Vars: vars, Taints: r.Taints}
utils.KlogDump(2, fmt.Sprintf("rule %q matched with: ", r.Name), " ", ret)
return ret, nil
}

View file

@ -163,6 +163,10 @@ type Rule struct {
// +optional
Taints []corev1.Taint `json:"taints,omitempty"`
// ExtendedResources to create if the rule matches.
// +optional
ExtendedResources map[string]string `json:"extendedResources"`
// MatchFeatures specifies a set of matcher terms all of which must match.
// +optional
MatchFeatures FeatureMatcher `json:"matchFeatures"`

View file

@ -527,6 +527,13 @@ func (in *Rule) DeepCopyInto(out *Rule) {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.ExtendedResources != nil {
in, out := &in.ExtendedResources, &out.ExtendedResources
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.MatchFeatures != nil {
in, out := &in.MatchFeatures, &out.MatchFeatures
*out = make(FeatureMatcher, len(*in))

View file

@ -360,7 +360,8 @@ func TestSetLabels(t *testing.T) {
instance := "foo"
vendorFeatureLabel := "vendor." + nfdv1alpha1.FeatureLabelNs + "/feature-4"
vendorProfileLabel := "vendor." + nfdv1alpha1.ProfileLabelNs + "/feature-5"
mockLabels := map[string]string{"feature-1": "val-1",
mockLabels := map[string]string{
"feature-1": "val-1",
"valid.ns/feature-2": "val-2",
"random.denied.ns/feature-3": "val-3",
"kubernetes.io/feature-4": "val-4",

View file

@ -37,8 +37,8 @@ import (
"google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/peer"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
label "k8s.io/apimachinery/pkg/labels"
k8sQuantity "k8s.io/apimachinery/pkg/api/resource"
k8sLabels "k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
"k8s.io/klog/v2"
@ -447,7 +447,6 @@ func (m *nfdMaster) updateMasterNode() error {
// arriving through the gRPC API.
func (m *nfdMaster) filterFeatureLabels(labels Labels) (Labels, ExtendedResources) {
outLabels := Labels{}
for label, value := range labels {
// Add possibly missing default ns
label := addNs(label, nfdv1alpha1.FeatureLabelNs)
@ -541,6 +540,20 @@ func isNamespaceDenied(labelNs string, wildcardDeniedNs map[string]struct{}, nor
return false
}
func isNamespaceAllowed(labelNs string, wildcardAllowedNs map[string]struct{}, normalAllowedNs map[string]struct{}) bool {
for allowedNs := range normalAllowedNs {
if labelNs == allowedNs {
return true
}
}
for allowedNs := range wildcardAllowedNs {
if strings.HasSuffix(labelNs, allowedNs) {
return true
}
}
return false
}
// SetLabels implements LabelerServer
func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.SetLabelsReply, error) {
err := authorizeClient(c, m.args.VerifyNodeName, r.NodeName)
@ -596,7 +609,7 @@ func (m *nfdMaster) nfdAPIUpdateAllNodes() error {
}
func (m *nfdMaster) nfdAPIUpdateOneNode(nodeName string) error {
sel := labels.SelectorFromSet(labels.Set{nfdv1alpha1.NodeFeatureObjNodeNameLabel: nodeName})
sel := k8sLabels.SelectorFromSet(k8sLabels.Set{nfdv1alpha1.NodeFeatureObjNodeNameLabel: nodeName})
objs, err := m.nfdController.featureLister.List(sel)
if err != nil {
return fmt.Errorf("failed to get NodeFeature resources for node %q: %w", nodeName, err)
@ -662,20 +675,87 @@ func (m *nfdMaster) nfdAPIUpdateOneNode(nodeName string) error {
return nil
}
func (m *nfdMaster) refreshNodeFeatures(cli *kubernetes.Clientset, nodeName string, annotations, labels map[string]string, features *nfdv1alpha1.Features) error {
// filterExtendedResources filters extended resources and returns a map
// of valid extended resources.
func (m *nfdMaster) filterExtendedResources(features *nfdv1alpha1.Features, extendedResources ExtendedResources) ExtendedResources {
outExtendedResources := ExtendedResources{}
deniedNs := map[string]struct{}{"kubernetes.io": {}}
deniedWildCarNs := map[string]struct{}{".kubernetes.io": {}}
allowedNs := map[string]struct{}{nfdv1alpha1.ExtendedResourceNs: {}}
allowedWildCardNs := map[string]struct{}{nfdv1alpha1.ExtendedResourceSubNsSuffix: {}}
for extendedResource, capacity := range extendedResources {
if strings.Contains(extendedResource, "/") {
// Check if given NS is allowed
ns, _ := splitNs(extendedResource)
if isNamespaceDenied(ns, deniedWildCarNs, deniedNs) {
if !isNamespaceAllowed(ns, allowedWildCardNs, allowedNs) {
klog.Errorf("namespace %q is not allowed. Ignoring Extended Resource %q", ns, extendedResource)
continue
}
}
} else {
// Add possibly missing default ns
extendedResource = path.Join(nfdv1alpha1.ExtendedResourceNs, extendedResource)
}
// Dynamic Value
if strings.HasPrefix(capacity, "@") {
// capacity is a string in the form of attribute.featureset.elements
split := strings.SplitN(capacity[1:], ".", 3)
featureName := split[0] + "." + split[1]
elementName := split[2]
attrFeatureSet, ok := features.Attributes[featureName]
if !ok {
klog.Errorf("feature %s not found. Ignoring Extended Resource %q", featureName, extendedResource)
continue
}
element, ok := attrFeatureSet.Elements[elementName]
if !ok {
klog.Errorf("element %s not foundon feature %s. Ignoring Extended Resource %q", elementName, featureName, extendedResource)
continue
}
q, err := k8sQuantity.ParseQuantity(element)
if err != nil {
klog.Errorf("bad label value %s encountered for extended resource: %s", q.String(), extendedResource, err)
continue
}
outExtendedResources[extendedResource] = q.String()
continue
}
// Static Value (Pre-Defined at the NodeFeatureRule)
q, err := k8sQuantity.ParseQuantity(capacity)
if err != nil {
klog.Errorf("bad label value %s encountered for extended resource: %s", capacity, extendedResource, err)
continue
}
outExtendedResources[extendedResource] = q.String()
}
return outExtendedResources
}
func (m *nfdMaster) refreshNodeFeatures(cli *kubernetes.Clientset, nodeName string, annotations Annotations, labels map[string]string, features *nfdv1alpha1.Features) error {
if labels == nil {
labels = make(map[string]string)
}
crLabels, crTaints := m.processNodeFeatureRule(features)
crLabels, crExtendedResources, crTaints := m.processNodeFeatureRule(features)
// Mix in CR-originated labels
for k, v := range crLabels {
labels[k] = v
}
// Remove labels which are intended to be extended resources via
// -resource-labels or their NS is not whitelisted
labels, extendedResources := m.filterFeatureLabels(labels)
// Mix in CR-originated extended resources with -resource-labels
for k, v := range crExtendedResources {
extendedResources[k] = v
}
extendedResources = m.filterExtendedResources(features, extendedResources)
var taints []corev1.Taint
if m.config.EnableTaints {
taints = filterTaints(crTaints)
@ -795,21 +875,22 @@ func authorizeClient(c context.Context, checkNodeName bool, nodeName string) err
return nil
}
func (m *nfdMaster) processNodeFeatureRule(features *nfdv1alpha1.Features) (map[string]string, []corev1.Taint) {
func (m *nfdMaster) processNodeFeatureRule(features *nfdv1alpha1.Features) (Labels, ExtendedResources, []corev1.Taint) {
if m.nfdController == nil {
return nil, nil
return nil, nil, nil
}
extendedResources := ExtendedResources{}
labels := make(map[string]string)
var taints []corev1.Taint
ruleSpecs, err := m.nfdController.ruleLister.List(label.Everything())
ruleSpecs, err := m.nfdController.ruleLister.List(k8sLabels.Everything())
sort.Slice(ruleSpecs, func(i, j int) bool {
return ruleSpecs[i].Name < ruleSpecs[j].Name
})
if err != nil {
klog.Errorf("failed to list NodeFeatureRule resources: %v", err)
return nil, nil
return nil, nil, nil
}
// Process all rule CRs
@ -831,6 +912,9 @@ func (m *nfdMaster) processNodeFeatureRule(features *nfdv1alpha1.Features) (map[
for k, v := range ruleOut.Labels {
labels[k] = v
}
for k, v := range ruleOut.ExtendedResources {
extendedResources[k] = v
}
// Feed back rule output to features map for subsequent rules to match
features.InsertAttributeFeatures(nfdv1alpha1.RuleBackrefDomain, nfdv1alpha1.RuleBackrefFeature, ruleOut.Labels)
@ -838,7 +922,7 @@ func (m *nfdMaster) processNodeFeatureRule(features *nfdv1alpha1.Features) (map[
}
}
return labels, taints
return labels, extendedResources, taints
}
// updateNodeObject ensures the Kubernetes node object is up to date,

View file

@ -0,0 +1,32 @@
apiVersion: nfd.k8s-sigs.io/v1alpha1
kind: NodeFeatureRule
metadata:
name: e2e-extened-resource-test
spec:
rules:
- name: "e2e no ns rule"
extendedResources:
nons: "123"
matchFeatures:
- feature: "fake.attribute"
matchExpressions:
"attr_1": {op: IsTrue}
"attr_2": {op: IsFalse}
- name: "e2e Dynamic rule"
extendedResources:
vendor.io/dynamic: "@fake.attribute.attr_3"
matchFeatures:
- feature: "fake.attribute"
matchExpressions:
"attr_3": {op: Exists}
- name: "e2e static rule"
extendedResources:
vendor.io/static: "123"
matchFeatures:
- name: "e2e not allowed rule"
extendedResources:
bad.kubernetes.io/malo: "999"
matchFeatures:

View file

@ -31,6 +31,7 @@ import (
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
extclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
resourcev1 "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
taintutils "k8s.io/kubernetes/pkg/util/taints"
@ -805,6 +806,35 @@ core:
Expect(waitForNfdNodeTaints(f.ClientSet, expectedTaintsUpdated, nodes)).NotTo(HaveOccurred())
Expect(waitForNfdNodeAnnotations(f.ClientSet, expectedAnnotationUpdated)).NotTo(HaveOccurred())
By("Deleting NodeFeatureRule object")
err = nfdClient.NfdV1alpha1().NodeFeatureRules().Delete(context.TODO(), "e2e-test-3", metav1.DeleteOptions{})
Expect(err).NotTo(HaveOccurred())
expectedERAnnotation := map[string]string{
"nfd.node.kubernetes.io/extended-resources": "nons,vendor.io/dynamic,vendor.io/static"}
expectedCapacity := corev1.ResourceList{
"feature.node.kubernetes.io/nons": resourcev1.MustParse("123"),
"vendor.io/dynamic": resourcev1.MustParse("10"),
"vendor.io/static": resourcev1.MustParse("123"),
}
By("Creating NodeFeatureRules #4")
Expect(testutils.CreateNodeFeatureRulesFromFile(nfdClient, "nodefeaturerule-4.yaml")).NotTo(HaveOccurred())
By("Verifying node annotations from NodeFeatureRules #4")
Expect(waitForNfdNodeAnnotations(f.ClientSet, expectedERAnnotation)).NotTo(HaveOccurred())
By("Verfiying node status capacity from NodeFeatureRules #4")
Expect(waitForCapacity(f.ClientSet, expectedCapacity, nodes)).NotTo(HaveOccurred())
By("Deleting NodeFeatureRule object")
err = nfdClient.NfdV1alpha1().NodeFeatureRules().Delete(context.TODO(), "e2e-extened-resource-test", metav1.DeleteOptions{})
Expect(err).NotTo(HaveOccurred())
By("Verfiying node status capacity from NodeFeatureRules #4")
Expect(waitForCapacity(f.ClientSet, nil, nodes)).NotTo(HaveOccurred())
By("Deleting nfd-worker daemonset")
err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Delete(context.TODO(), workerDS.Name, metav1.DeleteOptions{})
Expect(err).NotTo(HaveOccurred())
@ -893,6 +923,42 @@ denyLabelNs: []
})
// simplePoll is a simple and stupid re-try loop
func simplePoll(poll func() error, wait time.Duration) error {
var err error
for retry := 0; retry < 3; retry++ {
if err = poll(); err == nil {
return nil
}
time.Sleep(wait * time.Second)
}
return err
}
// waitForCapacity waits for the capacity to be updated in the node status
func waitForCapacity(cli clientset.Interface, expectedNewERs corev1.ResourceList, oldNodes []corev1.Node) error {
poll := func() error {
nodes, err := getNonControlPlaneNodes(cli)
if err != nil {
return err
}
for _, node := range nodes {
oldNode := getNode(oldNodes, node.Name)
expected := oldNode.Status.DeepCopy().Capacity
for k, v := range expectedNewERs {
expected[k] = v
}
capacity := node.Status.Capacity
if !cmp.Equal(expected, capacity) {
return fmt.Errorf("node %q capacity does not match expected, diff (expected vs. received): %s", node.Name, cmp.Diff(expected, capacity))
}
}
return nil
}
return simplePoll(poll, 10)
}
// waitForNfdNodeAnnotations waits for node to be annotated as expected.
func waitForNfdNodeAnnotations(cli clientset.Interface, expected map[string]string) error {
poll := func() error {
@ -910,15 +976,7 @@ func waitForNfdNodeAnnotations(cli clientset.Interface, expected map[string]stri
return nil
}
// Simple and stupid re-try loop
var err error
for retry := 0; retry < 3; retry++ {
if err = poll(); err == nil {
return nil
}
time.Sleep(2 * time.Second)
}
return err
return simplePoll(poll, 2)
}
type k8sLabels map[string]string
@ -951,15 +1009,7 @@ func checkForNodeLabels(cli clientset.Interface, expectedNewLabels map[string]k8
return nil
}
// Simple and stupid re-try loop
var err error
for retry := 0; retry < 3; retry++ {
if err = poll(); err == nil {
return nil
}
time.Sleep(2 * time.Second)
}
return err
return simplePoll(poll, 3)
}
// waitForNfdNodeTaints waits for node to be tainted as expected.
@ -981,15 +1031,7 @@ func waitForNfdNodeTaints(cli clientset.Interface, expectedNewTaints []corev1.Ta
return nil
}
// Simple and stupid re-try loop
var err error
for retry := 0; retry < 3; retry++ {
if err = poll(); err == nil {
return nil
}
time.Sleep(10 * time.Second)
}
return err
return simplePoll(poll, 10)
}
// getNonControlPlaneNodes gets the nodes that are not tainted for exclusive control-plane usage

View file

@ -141,7 +141,7 @@ func createClusterRoleMaster(cs clientset.Interface) (*rbacv1.ClusterRole, error
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{""},
Resources: []string{"nodes"},
Resources: []string{"nodes", "nodes/status"},
Verbs: []string{"get", "list", "patch", "update"},
},
{