mirror of
https://github.com/element-hq/synapse.git
synced 2024-12-14 11:57:44 +00:00
Remove legacy Prometheus metrics names. They were deprecated in Synapse v1.69.0 and disabled by default in Synapse v1.71.0. (#14538)
This commit is contained in:
parent
3b4e150868
commit
9af2be192a
11 changed files with 70 additions and 338 deletions
1
changelog.d/14538.removal
Normal file
1
changelog.d/14538.removal
Normal file
|
@ -0,0 +1 @@
|
|||
Remove legacy Prometheus metrics names. They were deprecated in Synapse v1.69.0 and disabled by default in Synapse v1.71.0.
|
|
@ -88,6 +88,28 @@ process, for example:
|
|||
dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
|
||||
```
|
||||
|
||||
# Upgrading to v1.73.0
|
||||
|
||||
## Legacy Prometheus metric names have now been removed
|
||||
|
||||
Synapse v1.69.0 included the deprecation of legacy Prometheus metric names
|
||||
and offered an option to disable them.
|
||||
Synapse v1.71.0 disabled legacy Prometheus metric names by default.
|
||||
|
||||
This version, v1.73.0, removes those legacy Prometheus metric names entirely.
|
||||
This also means that the `enable_legacy_metrics` configuration option has been
|
||||
removed; it will no longer be possible to re-enable the legacy metric names.
|
||||
|
||||
If you use metrics and have not yet updated your Grafana dashboard(s),
|
||||
Prometheus console(s) or alerting rule(s), please consider doing so when upgrading
|
||||
to this version.
|
||||
Note that the included Grafana dashboard was updated in v1.72.0 to correct some
|
||||
metric names which were missed when legacy metrics were disabled by default.
|
||||
|
||||
See [v1.69.0: Deprecation of legacy Prometheus metric names](#deprecation-of-legacy-prometheus-metric-names)
|
||||
for more context.
|
||||
|
||||
|
||||
# Upgrading to v1.72.0
|
||||
|
||||
## Dropping support for PostgreSQL 10
|
||||
|
|
|
@ -2437,31 +2437,6 @@ Example configuration:
|
|||
enable_metrics: true
|
||||
```
|
||||
---
|
||||
### `enable_legacy_metrics`
|
||||
|
||||
Set to `true` to publish both legacy and non-legacy Prometheus metric names,
|
||||
or to `false` to only publish non-legacy Prometheus metric names.
|
||||
Defaults to `false`. Has no effect if `enable_metrics` is `false`.
|
||||
**In Synapse v1.67.0 up to and including Synapse v1.70.1, this defaulted to `true`.**
|
||||
|
||||
Legacy metric names include:
|
||||
- metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules;
|
||||
- counters that don't end with the `_total` suffix, such as `synapse_federation_client_sent_edus`, therefore not adhering to the OpenMetrics standard.
|
||||
|
||||
These legacy metric names are unconventional and not compliant with OpenMetrics standards.
|
||||
They are included for backwards compatibility.
|
||||
|
||||
Example configuration:
|
||||
```yaml
|
||||
enable_legacy_metrics: false
|
||||
```
|
||||
|
||||
See https://github.com/matrix-org/synapse/issues/11106 for context.
|
||||
|
||||
*Since v1.67.0.*
|
||||
|
||||
**Will be removed in v1.73.0.**
|
||||
---
|
||||
### `sentry`
|
||||
|
||||
Use this option to enable sentry integration. Provide the DSN assigned to you by sentry
|
||||
|
|
|
@ -266,26 +266,18 @@ def register_start(
|
|||
reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper()))
|
||||
|
||||
|
||||
def listen_metrics(
|
||||
bind_addresses: Iterable[str], port: int, enable_legacy_metric_names: bool
|
||||
) -> None:
|
||||
def listen_metrics(bind_addresses: Iterable[str], port: int) -> None:
|
||||
"""
|
||||
Start Prometheus metrics server.
|
||||
"""
|
||||
from prometheus_client import start_http_server as start_http_server_prometheus
|
||||
|
||||
from synapse.metrics import (
|
||||
RegistryProxy,
|
||||
start_http_server as start_http_server_legacy,
|
||||
)
|
||||
from synapse.metrics import RegistryProxy
|
||||
|
||||
for host in bind_addresses:
|
||||
logger.info("Starting metrics listener on %s:%d", host, port)
|
||||
if enable_legacy_metric_names:
|
||||
start_http_server_legacy(port, addr=host, registry=RegistryProxy)
|
||||
else:
|
||||
_set_prometheus_client_use_created_metrics(False)
|
||||
start_http_server_prometheus(port, addr=host, registry=RegistryProxy)
|
||||
_set_prometheus_client_use_created_metrics(False)
|
||||
start_http_server_prometheus(port, addr=host, registry=RegistryProxy)
|
||||
|
||||
|
||||
def _set_prometheus_client_use_created_metrics(new_value: bool) -> None:
|
||||
|
|
|
@ -320,7 +320,6 @@ class GenericWorkerServer(HomeServer):
|
|||
_base.listen_metrics(
|
||||
listener.bind_addresses,
|
||||
listener.port,
|
||||
enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics,
|
||||
)
|
||||
else:
|
||||
logger.warning("Unsupported listener type: %s", listener.type)
|
||||
|
|
|
@ -265,7 +265,6 @@ class SynapseHomeServer(HomeServer):
|
|||
_base.listen_metrics(
|
||||
listener.bind_addresses,
|
||||
listener.port,
|
||||
enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics,
|
||||
)
|
||||
else:
|
||||
# this shouldn't happen, as the listener type should have been checked
|
||||
|
|
|
@ -43,8 +43,6 @@ class MetricsConfig(Config):
|
|||
def read_config(self, config: JsonDict, **kwargs: Any) -> None:
|
||||
self.enable_metrics = config.get("enable_metrics", False)
|
||||
|
||||
self.enable_legacy_metrics = config.get("enable_legacy_metrics", False)
|
||||
|
||||
self.report_stats = config.get("report_stats", None)
|
||||
self.report_stats_endpoint = config.get(
|
||||
"report_stats_endpoint", "https://matrix.org/report-usage-stats/push"
|
||||
|
|
|
@ -47,11 +47,7 @@ from twisted.python.threadpool import ThreadPool
|
|||
# This module is imported for its side effects; flake8 needn't warn that it's unused.
|
||||
import synapse.metrics._reactor_metrics # noqa: F401
|
||||
from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager
|
||||
from synapse.metrics._legacy_exposition import (
|
||||
MetricsResource,
|
||||
generate_latest,
|
||||
start_http_server,
|
||||
)
|
||||
from synapse.metrics._twisted_exposition import MetricsResource, generate_latest
|
||||
from synapse.metrics._types import Collector
|
||||
from synapse.util import SYNAPSE_VERSION
|
||||
|
||||
|
@ -474,7 +470,6 @@ __all__ = [
|
|||
"Collector",
|
||||
"MetricsResource",
|
||||
"generate_latest",
|
||||
"start_http_server",
|
||||
"LaterGauge",
|
||||
"InFlightGauge",
|
||||
"GaugeBucketCollector",
|
||||
|
|
|
@ -1,288 +0,0 @@
|
|||
# Copyright 2015-2019 Prometheus Python Client Developers
|
||||
# Copyright 2019 Matrix.org Foundation C.I.C.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
This code is based off `prometheus_client/exposition.py` from version 0.7.1.
|
||||
|
||||
Due to the renaming of metrics in prometheus_client 0.4.0, this customised
|
||||
vendoring of the code will emit both the old versions that Synapse dashboards
|
||||
expect, and the newer "best practice" version of the up-to-date official client.
|
||||
"""
|
||||
import logging
|
||||
import math
|
||||
import threading
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from socketserver import ThreadingMixIn
|
||||
from typing import Any, Dict, List, Type, Union
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from prometheus_client import REGISTRY, CollectorRegistry
|
||||
from prometheus_client.core import Sample
|
||||
|
||||
from twisted.web.resource import Resource
|
||||
from twisted.web.server import Request
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
|
||||
|
||||
|
||||
def floatToGoString(d: Union[int, float]) -> str:
|
||||
d = float(d)
|
||||
if d == math.inf:
|
||||
return "+Inf"
|
||||
elif d == -math.inf:
|
||||
return "-Inf"
|
||||
elif math.isnan(d):
|
||||
return "NaN"
|
||||
else:
|
||||
s = repr(d)
|
||||
dot = s.find(".")
|
||||
# Go switches to exponents sooner than Python.
|
||||
# We only need to care about positive values for le/quantile.
|
||||
if d > 0 and dot > 6:
|
||||
mantissa = f"{s[0]}.{s[1:dot]}{s[dot + 1 :]}".rstrip("0.")
|
||||
return f"{mantissa}e+0{dot - 1}"
|
||||
return s
|
||||
|
||||
|
||||
def sample_line(line: Sample, name: str) -> str:
|
||||
if line.labels:
|
||||
labelstr = "{{{0}}}".format(
|
||||
",".join(
|
||||
[
|
||||
'{}="{}"'.format(
|
||||
k,
|
||||
v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""),
|
||||
)
|
||||
for k, v in sorted(line.labels.items())
|
||||
]
|
||||
)
|
||||
)
|
||||
else:
|
||||
labelstr = ""
|
||||
timestamp = ""
|
||||
if line.timestamp is not None:
|
||||
# Convert to milliseconds.
|
||||
timestamp = f" {int(float(line.timestamp) * 1000):d}"
|
||||
return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp)
|
||||
|
||||
|
||||
# Mapping from new metric names to legacy metric names.
|
||||
# We translate these back to their old names when exposing them through our
|
||||
# legacy vendored exporter.
|
||||
# Only this legacy exposition module applies these name changes.
|
||||
LEGACY_METRIC_NAMES = {
|
||||
"synapse_util_caches_cache_hits": "synapse_util_caches_cache:hits",
|
||||
"synapse_util_caches_cache_size": "synapse_util_caches_cache:size",
|
||||
"synapse_util_caches_cache_evicted_size": "synapse_util_caches_cache:evicted_size",
|
||||
"synapse_util_caches_cache": "synapse_util_caches_cache:total",
|
||||
"synapse_util_caches_response_cache_size": "synapse_util_caches_response_cache:size",
|
||||
"synapse_util_caches_response_cache_hits": "synapse_util_caches_response_cache:hits",
|
||||
"synapse_util_caches_response_cache_evicted_size": "synapse_util_caches_response_cache:evicted_size",
|
||||
"synapse_util_caches_response_cache": "synapse_util_caches_response_cache:total",
|
||||
"synapse_federation_client_sent_pdu_destinations": "synapse_federation_client_sent_pdu_destinations:total",
|
||||
"synapse_federation_client_sent_pdu_destinations_count": "synapse_federation_client_sent_pdu_destinations:count",
|
||||
"synapse_admin_mau_current": "synapse_admin_mau:current",
|
||||
"synapse_admin_mau_max": "synapse_admin_mau:max",
|
||||
"synapse_admin_mau_registered_reserved_users": "synapse_admin_mau:registered_reserved_users",
|
||||
}
|
||||
|
||||
|
||||
def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> bytes:
|
||||
"""
|
||||
Generate metrics in legacy format. Modern metrics are generated directly
|
||||
by prometheus-client.
|
||||
"""
|
||||
|
||||
output = []
|
||||
|
||||
for metric in registry.collect():
|
||||
if not metric.samples:
|
||||
# No samples, don't bother.
|
||||
continue
|
||||
|
||||
# Translate to legacy metric name if it has one.
|
||||
mname = LEGACY_METRIC_NAMES.get(metric.name, metric.name)
|
||||
mnewname = metric.name
|
||||
mtype = metric.type
|
||||
|
||||
# OpenMetrics -> Prometheus
|
||||
if mtype == "counter":
|
||||
mnewname = mnewname + "_total"
|
||||
elif mtype == "info":
|
||||
mtype = "gauge"
|
||||
mnewname = mnewname + "_info"
|
||||
elif mtype == "stateset":
|
||||
mtype = "gauge"
|
||||
elif mtype == "gaugehistogram":
|
||||
mtype = "histogram"
|
||||
elif mtype == "unknown":
|
||||
mtype = "untyped"
|
||||
|
||||
# Output in the old format for compatibility.
|
||||
if emit_help:
|
||||
output.append(
|
||||
"# HELP {} {}\n".format(
|
||||
mname,
|
||||
metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
|
||||
)
|
||||
)
|
||||
output.append(f"# TYPE {mname} {mtype}\n")
|
||||
|
||||
om_samples: Dict[str, List[str]] = {}
|
||||
for s in metric.samples:
|
||||
for suffix in ["_created", "_gsum", "_gcount"]:
|
||||
if s.name == mname + suffix:
|
||||
# OpenMetrics specific sample, put in a gauge at the end.
|
||||
# (these come from gaugehistograms which don't get renamed,
|
||||
# so no need to faff with mnewname)
|
||||
om_samples.setdefault(suffix, []).append(sample_line(s, s.name))
|
||||
break
|
||||
else:
|
||||
newname = s.name.replace(mnewname, mname)
|
||||
if ":" in newname and newname.endswith("_total"):
|
||||
newname = newname[: -len("_total")]
|
||||
output.append(sample_line(s, newname))
|
||||
|
||||
for suffix, lines in sorted(om_samples.items()):
|
||||
if emit_help:
|
||||
output.append(
|
||||
"# HELP {}{} {}\n".format(
|
||||
mname,
|
||||
suffix,
|
||||
metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
|
||||
)
|
||||
)
|
||||
output.append(f"# TYPE {mname}{suffix} gauge\n")
|
||||
output.extend(lines)
|
||||
|
||||
# Get rid of the weird colon things while we're at it
|
||||
if mtype == "counter":
|
||||
mnewname = mnewname.replace(":total", "")
|
||||
mnewname = mnewname.replace(":", "_")
|
||||
|
||||
if mname == mnewname:
|
||||
continue
|
||||
|
||||
# Also output in the new format, if it's different.
|
||||
if emit_help:
|
||||
output.append(
|
||||
"# HELP {} {}\n".format(
|
||||
mnewname,
|
||||
metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
|
||||
)
|
||||
)
|
||||
output.append(f"# TYPE {mnewname} {mtype}\n")
|
||||
|
||||
for s in metric.samples:
|
||||
# Get rid of the OpenMetrics specific samples (we should already have
|
||||
# dealt with them above anyway.)
|
||||
for suffix in ["_created", "_gsum", "_gcount"]:
|
||||
if s.name == mname + suffix:
|
||||
break
|
||||
else:
|
||||
sample_name = LEGACY_METRIC_NAMES.get(s.name, s.name)
|
||||
output.append(
|
||||
sample_line(s, sample_name.replace(":total", "").replace(":", "_"))
|
||||
)
|
||||
|
||||
return "".join(output).encode("utf-8")
|
||||
|
||||
|
||||
class MetricsHandler(BaseHTTPRequestHandler):
|
||||
"""HTTP handler that gives metrics from ``REGISTRY``."""
|
||||
|
||||
registry = REGISTRY
|
||||
|
||||
def do_GET(self) -> None:
|
||||
registry = self.registry
|
||||
params = parse_qs(urlparse(self.path).query)
|
||||
|
||||
if "help" in params:
|
||||
emit_help = True
|
||||
else:
|
||||
emit_help = False
|
||||
|
||||
try:
|
||||
output = generate_latest(registry, emit_help=emit_help)
|
||||
except Exception:
|
||||
self.send_error(500, "error generating metric output")
|
||||
raise
|
||||
try:
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", CONTENT_TYPE_LATEST)
|
||||
self.send_header("Content-Length", str(len(output)))
|
||||
self.end_headers()
|
||||
self.wfile.write(output)
|
||||
except BrokenPipeError as e:
|
||||
logger.warning(
|
||||
"BrokenPipeError when serving metrics (%s). Did Prometheus restart?", e
|
||||
)
|
||||
|
||||
def log_message(self, format: str, *args: Any) -> None:
|
||||
"""Log nothing."""
|
||||
|
||||
@classmethod
|
||||
def factory(cls, registry: CollectorRegistry) -> Type:
|
||||
"""Returns a dynamic MetricsHandler class tied
|
||||
to the passed registry.
|
||||
"""
|
||||
# This implementation relies on MetricsHandler.registry
|
||||
# (defined above and defaulted to REGISTRY).
|
||||
|
||||
# As we have unicode_literals, we need to create a str()
|
||||
# object for type().
|
||||
cls_name = str(cls.__name__)
|
||||
MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry})
|
||||
return MyMetricsHandler
|
||||
|
||||
|
||||
class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer):
|
||||
"""Thread per request HTTP server."""
|
||||
|
||||
# Make worker threads "fire and forget". Beginning with Python 3.7 this
|
||||
# prevents a memory leak because ``ThreadingMixIn`` starts to gather all
|
||||
# non-daemon threads in a list in order to join on them at server close.
|
||||
# Enabling daemon threads virtually makes ``_ThreadingSimpleServer`` the
|
||||
# same as Python 3.7's ``ThreadingHTTPServer``.
|
||||
daemon_threads = True
|
||||
|
||||
|
||||
def start_http_server(
|
||||
port: int, addr: str = "", registry: CollectorRegistry = REGISTRY
|
||||
) -> None:
|
||||
"""Starts an HTTP server for prometheus metrics as a daemon thread"""
|
||||
CustomMetricsHandler = MetricsHandler.factory(registry)
|
||||
httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler)
|
||||
t = threading.Thread(target=httpd.serve_forever)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
|
||||
class MetricsResource(Resource):
|
||||
"""
|
||||
Twisted ``Resource`` that serves prometheus metrics.
|
||||
"""
|
||||
|
||||
isLeaf = True
|
||||
|
||||
def __init__(self, registry: CollectorRegistry = REGISTRY):
|
||||
self.registry = registry
|
||||
|
||||
def render_GET(self, request: Request) -> bytes:
|
||||
request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii"))
|
||||
response = generate_latest(self.registry)
|
||||
request.setHeader(b"Content-Length", str(len(response)))
|
||||
return response
|
38
synapse/metrics/_twisted_exposition.py
Normal file
38
synapse/metrics/_twisted_exposition.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
# Copyright 2015-2019 Prometheus Python Client Developers
|
||||
# Copyright 2019 Matrix.org Foundation C.I.C.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from prometheus_client import REGISTRY, CollectorRegistry, generate_latest
|
||||
|
||||
from twisted.web.resource import Resource
|
||||
from twisted.web.server import Request
|
||||
|
||||
CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
|
||||
|
||||
|
||||
class MetricsResource(Resource):
|
||||
"""
|
||||
Twisted ``Resource`` that serves prometheus metrics.
|
||||
"""
|
||||
|
||||
isLeaf = True
|
||||
|
||||
def __init__(self, registry: CollectorRegistry = REGISTRY):
|
||||
self.registry = registry
|
||||
|
||||
def render_GET(self, request: Request) -> bytes:
|
||||
request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii"))
|
||||
response = generate_latest(self.registry)
|
||||
request.setHeader(b"Content-Length", str(len(response)))
|
||||
return response
|
|
@ -11,8 +11,9 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from prometheus_client import generate_latest
|
||||
|
||||
from synapse.metrics import REGISTRY, generate_latest
|
||||
from synapse.metrics import REGISTRY
|
||||
from synapse.types import UserID, create_requester
|
||||
|
||||
from tests.unittest import HomeserverTestCase
|
||||
|
@ -53,8 +54,8 @@ class ExtremStatisticsTestCase(HomeserverTestCase):
|
|||
|
||||
items = list(
|
||||
filter(
|
||||
lambda x: b"synapse_forward_extremities_" in x,
|
||||
generate_latest(REGISTRY, emit_help=False).split(b"\n"),
|
||||
lambda x: b"synapse_forward_extremities_" in x and b"# HELP" not in x,
|
||||
generate_latest(REGISTRY).split(b"\n"),
|
||||
)
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in a new issue