mirror of
https://github.com/element-hq/synapse.git
synced 2024-12-14 11:57:44 +00:00
Allow for the configuration of max request retries and min/max retry delays in the matrix federation client (#12504)
Co-authored-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Erik Johnston <erik@matrix.org>
This commit is contained in:
parent
f6321e386c
commit
d84e66144d
5 changed files with 68 additions and 10 deletions
1
changelog.d/12504.misc
Normal file
1
changelog.d/12504.misc
Normal file
|
@ -0,0 +1 @@
|
|||
Allow for the configuration of max request retries and min/max retry delays in the matrix federation client.
|
|
@ -1196,6 +1196,32 @@ Example configuration:
|
|||
allow_device_name_lookup_over_federation: true
|
||||
```
|
||||
---
|
||||
### `federation`
|
||||
|
||||
The federation section defines some sub-options related to federation.
|
||||
|
||||
The following options are related to configuring timeout and retry logic for one request,
|
||||
independently of the others.
|
||||
Short retry algorithm is used when something or someone will wait for the request to have an
|
||||
answer, while long retry is used for requests that happen in the background,
|
||||
like sending a federation transaction.
|
||||
|
||||
* `client_timeout`: timeout for the federation requests in seconds. Default to 60s.
|
||||
* `max_short_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 2s.
|
||||
* `max_long_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 60s.
|
||||
* `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
|
||||
* `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
|
||||
|
||||
Example configuration:
|
||||
```yaml
|
||||
federation:
|
||||
client_timeout: 180
|
||||
max_short_retry_delay: 7
|
||||
max_long_retry_delay: 100
|
||||
max_short_retries: 5
|
||||
max_long_retries: 20
|
||||
```
|
||||
---
|
||||
## Caching
|
||||
|
||||
Options related to caching.
|
||||
|
|
|
@ -22,6 +22,8 @@ class FederationConfig(Config):
|
|||
section = "federation"
|
||||
|
||||
def read_config(self, config: JsonDict, **kwargs: Any) -> None:
|
||||
federation_config = config.setdefault("federation", {})
|
||||
|
||||
# FIXME: federation_domain_whitelist needs sytests
|
||||
self.federation_domain_whitelist: Optional[dict] = None
|
||||
federation_domain_whitelist = config.get("federation_domain_whitelist", None)
|
||||
|
@ -49,5 +51,13 @@ class FederationConfig(Config):
|
|||
"allow_device_name_lookup_over_federation", False
|
||||
)
|
||||
|
||||
# Allow for the configuration of timeout, max request retries
|
||||
# and min/max retry delays in the matrix federation client.
|
||||
self.client_timeout = federation_config.get("client_timeout", 60)
|
||||
self.max_long_retry_delay = federation_config.get("max_long_retry_delay", 60)
|
||||
self.max_short_retry_delay = federation_config.get("max_short_retry_delay", 2)
|
||||
self.max_long_retries = federation_config.get("max_long_retries", 10)
|
||||
self.max_short_retries = federation_config.get("max_short_retries", 3)
|
||||
|
||||
|
||||
_METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}}
|
||||
|
|
|
@ -95,8 +95,6 @@ incoming_responses_counter = Counter(
|
|||
)
|
||||
|
||||
|
||||
MAX_LONG_RETRIES = 10
|
||||
MAX_SHORT_RETRIES = 3
|
||||
MAXINT = sys.maxsize
|
||||
|
||||
|
||||
|
@ -406,7 +404,12 @@ class MatrixFederationHttpClient:
|
|||
self.clock = hs.get_clock()
|
||||
self._store = hs.get_datastores().main
|
||||
self.version_string_bytes = hs.version_string.encode("ascii")
|
||||
self.default_timeout = 60
|
||||
self.default_timeout = hs.config.federation.client_timeout
|
||||
|
||||
self.max_long_retry_delay = hs.config.federation.max_long_retry_delay
|
||||
self.max_short_retry_delay = hs.config.federation.max_short_retry_delay
|
||||
self.max_long_retries = hs.config.federation.max_long_retries
|
||||
self.max_short_retries = hs.config.federation.max_short_retries
|
||||
|
||||
self._cooperator = Cooperator(scheduler=_make_scheduler(self.reactor))
|
||||
|
||||
|
@ -583,9 +586,9 @@ class MatrixFederationHttpClient:
|
|||
# XXX: Would be much nicer to retry only at the transaction-layer
|
||||
# (once we have reliable transactions in place)
|
||||
if long_retries:
|
||||
retries_left = MAX_LONG_RETRIES
|
||||
retries_left = self.max_long_retries
|
||||
else:
|
||||
retries_left = MAX_SHORT_RETRIES
|
||||
retries_left = self.max_short_retries
|
||||
|
||||
url_bytes = request.uri
|
||||
url_str = url_bytes.decode("ascii")
|
||||
|
@ -730,12 +733,12 @@ class MatrixFederationHttpClient:
|
|||
|
||||
if retries_left and not timeout:
|
||||
if long_retries:
|
||||
delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
|
||||
delay = min(delay, 60)
|
||||
delay = 4 ** (self.max_long_retries + 1 - retries_left)
|
||||
delay = min(delay, self.max_long_retry_delay)
|
||||
delay *= random.uniform(0.8, 1.4)
|
||||
else:
|
||||
delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
|
||||
delay = min(delay, 2)
|
||||
delay = 0.5 * 2 ** (self.max_short_retries - retries_left)
|
||||
delay = min(delay, self.max_short_retry_delay)
|
||||
delay *= random.uniform(0.8, 1.4)
|
||||
|
||||
logger.debug(
|
||||
|
|
|
@ -40,7 +40,7 @@ from synapse.server import HomeServer
|
|||
from synapse.util import Clock
|
||||
|
||||
from tests.server import FakeTransport
|
||||
from tests.unittest import HomeserverTestCase
|
||||
from tests.unittest import HomeserverTestCase, override_config
|
||||
|
||||
|
||||
def check_logcontext(context: LoggingContextOrSentinel) -> None:
|
||||
|
@ -640,3 +640,21 @@ class FederationClientTests(HomeserverTestCase):
|
|||
self.cl.build_auth_headers(
|
||||
b"", b"GET", b"https://example.com", destination_is=b""
|
||||
)
|
||||
|
||||
@override_config(
|
||||
{
|
||||
"federation": {
|
||||
"client_timeout": 180,
|
||||
"max_long_retry_delay": 100,
|
||||
"max_short_retry_delay": 7,
|
||||
"max_long_retries": 20,
|
||||
"max_short_retries": 5,
|
||||
}
|
||||
}
|
||||
)
|
||||
def test_configurable_retry_and_delay_values(self) -> None:
|
||||
self.assertEqual(self.cl.default_timeout, 180)
|
||||
self.assertEqual(self.cl.max_long_retry_delay, 100)
|
||||
self.assertEqual(self.cl.max_short_retry_delay, 7)
|
||||
self.assertEqual(self.cl.max_long_retries, 20)
|
||||
self.assertEqual(self.cl.max_short_retries, 5)
|
||||
|
|
Loading…
Reference in a new issue