1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-30 19:54:46 +00:00

metrics: add nfd_nodefeaturerule_processing_errors_total counter

Add a counter for errors encountered when processing NodeFeatureRules.
Another simple counter without any additional prometheus labels -
nfd-master logs can provide further details.
This commit is contained in:
Markus Lehtonen 2023-08-01 15:50:03 +03:00
parent b90f2c318e
commit a8a29e6df2
3 changed files with 13 additions and 5 deletions

View file

@ -20,6 +20,7 @@ The exposed metrics are
| `nfd_node_updates_total` | Counter | Number of nodes updated
| `nfd_node_update_failures_total` | Counter | Number of nodes update failures
| `nfd_nodefeaturerule_processing_duration_seconds` | Histogram | Time taken to process NodeFeatureRule objects
| `nfd_nodefeaturerule_processing_errors_total` | Counter | Number or errors encountered while processing NodeFeatureRule objects
| `nfd_feature_discovery_duration_seconds` | Histogram | Time taken to discover features on a node
## Via Kustomize

View file

@ -28,10 +28,11 @@ import (
// When adding metric names, see https://prometheus.io/docs/practices/naming/#metric-names
const (
buildInfoQuery = "nfd_master_build_info"
nodeUpdatesQuery = "nfd_node_updates_total"
nodeUpdateFailuresQuery = "nfd_node_update_failures_total"
nfrProcessingTimeQuery = "nfd_nodefeaturerule_processing_duration_seconds"
buildInfoQuery = "nfd_master_build_info"
nodeUpdatesQuery = "nfd_node_updates_total"
nodeUpdateFailuresQuery = "nfd_node_update_failures_total"
nfrProcessingTimeQuery = "nfd_nodefeaturerule_processing_duration_seconds"
nfrProcessingErrorsQuery = "nfd_nodefeaturerule_processing_errors_total"
)
var (
@ -63,6 +64,10 @@ var (
"node",
},
)
nfrProcessingErrors = prometheus.NewCounter(prometheus.CounterOpts{
Name: nfrProcessingErrorsQuery,
Help: "Number of errors encountered while processing NodeFeatureRule objects.",
})
)
// registerVersion exposes the Operator build version.
@ -76,7 +81,8 @@ func runMetricsServer(port int) {
r.MustRegister(buildInfo,
nodeUpdates,
nodeUpdateFailures,
nfrProcessingTime)
nfrProcessingTime,
nfrProcessingErrors)
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(r, promhttp.HandlerOpts{}))

View file

@ -991,6 +991,7 @@ func (m *nfdMaster) processNodeFeatureRule(nodeName string, features *nfdv1alpha
ruleOut, err := rule.Execute(features)
if err != nil {
klog.ErrorS(err, "failed to process rule", "ruleName", rule.Name, "nodefeaturerule", klog.KObj(spec), "nodeName", nodeName)
nfrProcessingErrors.Inc()
continue
}
taints = append(taints, ruleOut.Taints...)