mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2024-12-14 11:57:51 +00:00
metrics: counters for rejected labels, extended resources and taints
Add counters for labels, extended resources and taints rejected/filtered out by nfd-master.
This commit is contained in:
parent
a8a29e6df2
commit
4b24cc1afa
3 changed files with 27 additions and 1 deletions
|
@ -19,6 +19,9 @@ The exposed metrics are
|
|||
| `nfd_worker_build_info` | Gauge | Version from which nfd-worker was built
|
||||
| `nfd_node_updates_total` | Counter | Number of nodes updated
|
||||
| `nfd_node_update_failures_total` | Counter | Number of nodes update failures
|
||||
| `nfd_node_labels_rejected_total` | Counter | Number of nodes labels rejected by nfd-master
|
||||
| `nfd_node_extendedresources_rejected_total` | Counter | Number of nodes extended resources rejected by nfd-master
|
||||
| `nfd_node_taints_rejected_total` | Counter | Number of nodes taints rejected by nfd-master
|
||||
| `nfd_nodefeaturerule_processing_duration_seconds` | Histogram | Time taken to process NodeFeatureRule objects
|
||||
| `nfd_nodefeaturerule_processing_errors_total` | Counter | Number or errors encountered while processing NodeFeatureRule objects
|
||||
| `nfd_feature_discovery_duration_seconds` | Histogram | Time taken to discover features on a node
|
||||
|
|
|
@ -31,6 +31,9 @@ const (
|
|||
buildInfoQuery = "nfd_master_build_info"
|
||||
nodeUpdatesQuery = "nfd_node_updates_total"
|
||||
nodeUpdateFailuresQuery = "nfd_node_update_failures_total"
|
||||
nodeLabelsRejectedQuery = "nfd_node_labels_rejected_total"
|
||||
nodeERsRejectedQuery = "nfd_node_extendedresources_rejected_total"
|
||||
nodeTaintsRejectedQuery = "nfd_node_taints_rejected_total"
|
||||
nfrProcessingTimeQuery = "nfd_nodefeaturerule_processing_duration_seconds"
|
||||
nfrProcessingErrorsQuery = "nfd_nodefeaturerule_processing_errors_total"
|
||||
)
|
||||
|
@ -53,6 +56,18 @@ var (
|
|||
Name: nodeUpdateFailuresQuery,
|
||||
Help: "Number of node update failures.",
|
||||
})
|
||||
nodeLabelsRejected = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: nodeLabelsRejectedQuery,
|
||||
Help: "Number of node labels that were rejected by nfd-master.",
|
||||
})
|
||||
nodeERsRejected = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: nodeERsRejectedQuery,
|
||||
Help: "Number of node extended resources that were rejected by nfd-master.",
|
||||
})
|
||||
nodeTaintsRejected = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: nodeTaintsRejectedQuery,
|
||||
Help: "Number of node taints that were rejected by nfd-master.",
|
||||
})
|
||||
nfrProcessingTime = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: nfrProcessingTimeQuery,
|
||||
|
@ -78,9 +93,13 @@ func registerVersion(version string) {
|
|||
// runMetricsServer starts a http server to expose metrics
|
||||
func runMetricsServer(port int) {
|
||||
r := prometheus.NewRegistry()
|
||||
r.MustRegister(buildInfo,
|
||||
r.MustRegister(
|
||||
buildInfo,
|
||||
nodeUpdates,
|
||||
nodeUpdateFailures,
|
||||
nodeLabelsRejected,
|
||||
nodeERsRejected,
|
||||
nodeTaintsRejected,
|
||||
nfrProcessingTime,
|
||||
nfrProcessingErrors)
|
||||
|
||||
|
|
|
@ -510,6 +510,7 @@ func (m *nfdMaster) filterFeatureLabels(labels Labels, features *nfdv1alpha1.Fea
|
|||
|
||||
if value, err := m.filterFeatureLabel(name, value, features); err != nil {
|
||||
klog.ErrorS(err, "ignoring label", "labelKey", name, "labelValue", value)
|
||||
nodeLabelsRejected.Inc()
|
||||
} else {
|
||||
outLabels[name] = value
|
||||
}
|
||||
|
@ -523,6 +524,7 @@ func (m *nfdMaster) filterFeatureLabels(labels Labels, features *nfdv1alpha1.Fea
|
|||
if value, ok := outLabels[extendedResourceName]; ok {
|
||||
if _, err := strconv.Atoi(value); err != nil {
|
||||
klog.ErrorS(err, "bad label value encountered for extended resource", "labelKey", extendedResourceName, "labelValue", value)
|
||||
nodeERsRejected.Inc()
|
||||
continue // non-numeric label can't be used
|
||||
}
|
||||
|
||||
|
@ -603,6 +605,7 @@ func filterTaints(taints []corev1.Taint) []corev1.Taint {
|
|||
for _, taint := range taints {
|
||||
if err := filterTaint(&taint); err != nil {
|
||||
klog.ErrorS(err, "ignoring taint", "taint", taint)
|
||||
nodeTaintsRejected.Inc()
|
||||
} else {
|
||||
outTaints = append(outTaints, taint)
|
||||
}
|
||||
|
@ -786,6 +789,7 @@ func filterExtendedResources(features *nfdv1alpha1.Features, extendedResources E
|
|||
capacity, err := filterExtendedResource(name, value, features)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "failed to create extended resources", "extendedResourceName", name, "extendedResourceValue", value)
|
||||
nodeERsRejected.Inc()
|
||||
} else {
|
||||
outExtendedResources[name] = capacity
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue