mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2025-03-09 18:27:01 +00:00
Expose metrics via prometheus.monitoring.coreos.com/v1 The exposed metrics are | Metric | Type | Meaning | | --------------- | ---------------- | ---------------- | | `nfd_master_build_info` | Gauge | Version from which nfd-master was built. | | `nfd_worker_build_info` | Gauge | Version from which nfd-worker was built. | | `nfd_updated_nodes` | Counter | Time taken to label a node | | `nfd_crd_processing_time` | Gauge | Time taken to process a NodeFeatureRule CRD | | `nfd_feature_discovery_duration_seconds` | HistogramVec | Time taken to discover features on a node | Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com> Co-authored-by: Markus Lehtonen <markus.lehtonen@intel.com>
80 lines
2.3 KiB
Go
80 lines
2.3 KiB
Go
/*
|
|
Copyright 2023 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package nfdworker
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
"k8s.io/klog/v2"
|
|
"sigs.k8s.io/node-feature-discovery/pkg/version"
|
|
)
|
|
|
|
// When adding metric names, see https://prometheus.io/docs/practices/naming/#metric-names
|
|
// When adding metric names, see https://prometheus.io/docs/practices/naming/#metric-names
|
|
const (
|
|
buildInfoQuery = "nfd_worker_build_info"
|
|
featureDiscoveryDurationQuery = "nfd_feature_discovery_duration_seconds"
|
|
)
|
|
|
|
var (
|
|
srv *http.Server
|
|
|
|
featureDiscoveryDuration = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: featureDiscoveryDurationQuery,
|
|
Help: "Time taken to discover features",
|
|
},
|
|
[]string{"NodeName"},
|
|
)
|
|
buildInfo = prometheus.NewGauge(prometheus.GaugeOpts{
|
|
Name: buildInfoQuery,
|
|
Help: "Version from which Node Feature Discovery was built.",
|
|
ConstLabels: map[string]string{
|
|
"version": version.Get(),
|
|
},
|
|
})
|
|
)
|
|
|
|
// registerVersion exposes the Operator build version.
|
|
func registerVersion(version string) {
|
|
buildInfo.SetToCurrentTime()
|
|
}
|
|
|
|
// runMetricsServer starts a http server to expose metrics
|
|
func runMetricsServer(port int) {
|
|
r := prometheus.NewRegistry()
|
|
r.MustRegister(featureDiscoveryDuration)
|
|
r.MustRegister(buildInfo)
|
|
|
|
mux := http.NewServeMux()
|
|
mux.Handle("/metrics", promhttp.HandlerFor(r, promhttp.HandlerOpts{}))
|
|
|
|
klog.InfoS("metrics server starting", "port", port)
|
|
srv = &http.Server{Addr: fmt.Sprintf(":%d", port), Handler: mux}
|
|
klog.InfoS("metrics server stopped", "exit code", srv.ListenAndServe())
|
|
}
|
|
|
|
// stopMetricsServer stops the metrics server
|
|
func stopMetricsServer() {
|
|
if srv != nil {
|
|
klog.InfoS("stopping metrics server", "port", srv.Addr)
|
|
srv.Close()
|
|
}
|
|
}
|