1
0
Fork 0
mirror of https://github.com/element-hq/synapse.git synced 2025-03-07 00:16:57 +00:00
synapse/tests/utils.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

437 lines
14 KiB
Python
Raw Normal View History

#
2023-11-21 15:29:58 -05:00
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2014-2016 OpenMarket Ltd
2023-11-21 15:29:58 -05:00
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
2018-08-13 16:47:46 +10:00
import atexit
import os
import signal
from types import FrameType, TracebackType
from typing import (
Any,
Callable,
Dict,
List,
Literal,
Optional,
Tuple,
Type,
TypeVar,
Union,
overload,
)
import attr
from typing_extensions import ParamSpec
from synapse.api.constants import EventTypes
from synapse.api.room_versions import RoomVersions
from synapse.config.homeserver import HomeServerConfig
from synapse.config.server import DEFAULT_ROOM_VERSION
from synapse.logging.context import current_context, set_current_context
from synapse.server import HomeServer
from synapse.storage.database import LoggingDatabaseConnection
from synapse.storage.engines import create_engine
from synapse.storage.prepare_database import prepare_database
try:
import authlib # noqa: F401
HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False
# set this to True to run the tests against postgres instead of sqlite.
#
# When running under postgres, we first create a base database with the name
# POSTGRES_BASE_DB and update it to the current schema. Then, for each test case, we
# create another unique database, using the base database as a template.
2018-08-13 16:47:46 +10:00
USE_POSTGRES_FOR_TESTS = os.environ.get("SYNAPSE_POSTGRES", False)
2018-09-04 02:21:48 +10:00
LEAVE_DB = os.environ.get("SYNAPSE_LEAVE_DB", False)
POSTGRES_USER = os.environ.get("SYNAPSE_POSTGRES_USER", None)
POSTGRES_HOST = os.environ.get("SYNAPSE_POSTGRES_HOST", None)
POSTGRES_PASSWORD = os.environ.get("SYNAPSE_POSTGRES_PASSWORD", None)
POSTGRES_PORT = (
int(os.environ["SYNAPSE_POSTGRES_PORT"])
if "SYNAPSE_POSTGRES_PORT" in os.environ
else None
)
2018-08-13 16:47:46 +10:00
POSTGRES_BASE_DB = "_synapse_unit_tests_base_%s" % (os.getpid(),)
# When debugging a specific test, it's occasionally useful to write the
# DB to disk and query it with the sqlite CLI.
SQLITE_PERSIST_DB = os.environ.get("SYNAPSE_TEST_PERSIST_SQLITE_DB") is not None
# the dbname we will connect to in order to create the base database.
POSTGRES_DBNAME_FOR_INITIAL_CREATE = "postgres"
2018-08-13 16:47:46 +10:00
def setupdb() -> None:
2018-08-13 16:47:46 +10:00
# If we're using PostgreSQL, set up the db once
if USE_POSTGRES_FOR_TESTS:
# create a PostgresEngine
db_engine = create_engine({"name": "psycopg2", "args": {}})
# connect to postgres to create the base database.
db_conn = db_engine.module.connect(
2019-05-10 00:12:11 -05:00
user=POSTGRES_USER,
host=POSTGRES_HOST,
port=POSTGRES_PORT,
2019-05-10 00:12:11 -05:00
password=POSTGRES_PASSWORD,
dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE,
)
db_engine.attempt_to_set_autocommit(db_conn, autocommit=True)
2018-08-13 16:47:46 +10:00
cur = db_conn.cursor()
cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,))
cur.execute(
"CREATE DATABASE %s ENCODING 'UTF8' LC_COLLATE='C' LC_CTYPE='C' "
"template=template0;" % (POSTGRES_BASE_DB,)
)
2018-08-13 16:47:46 +10:00
cur.close()
db_conn.close()
# Set up in the db
db_conn = db_engine.module.connect(
dbname=POSTGRES_BASE_DB,
user=POSTGRES_USER,
host=POSTGRES_HOST,
port=POSTGRES_PORT,
password=POSTGRES_PASSWORD,
2018-08-13 16:47:46 +10:00
)
logging_conn = LoggingDatabaseConnection(db_conn, db_engine, "tests")
prepare_database(logging_conn, db_engine, None)
logging_conn.close()
2018-08-13 16:47:46 +10:00
def _cleanup() -> None:
db_conn = db_engine.module.connect(
2019-05-10 00:12:11 -05:00
user=POSTGRES_USER,
host=POSTGRES_HOST,
port=POSTGRES_PORT,
2019-05-10 00:12:11 -05:00
password=POSTGRES_PASSWORD,
dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE,
)
db_engine.attempt_to_set_autocommit(db_conn, autocommit=True)
2018-08-13 16:47:46 +10:00
cur = db_conn.cursor()
cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,))
cur.close()
db_conn.close()
atexit.register(_cleanup)
@overload
def default_config(name: str, parse: Literal[False] = ...) -> Dict[str, object]: ...
@overload
def default_config(name: str, parse: Literal[True]) -> HomeServerConfig: ...
def default_config(
name: str, parse: bool = False
) -> Union[Dict[str, object], HomeServerConfig]:
2018-10-02 00:11:58 +10:00
"""
Create a reasonable test config.
"""
config_dict = {
"server_name": name,
# Setting this to an empty list turns off federation sending.
"federation_sender_instances": [],
"media_store_path": "media",
# the test signing key is just an arbitrary ed25519 key to keep the config
# parser happy
"signing_key": "ed25519 a_lPym qvioDNmfExFBRPgdTU+wtFYKq4JfwFRv7sYVgWvmgJg",
# Disable trusted key servers, otherwise unit tests might try to actually
# reach out to matrix.org.
"trusted_key_servers": [],
"event_cache_size": 1,
"enable_registration": True,
"enable_registration_captcha": False,
"macaroon_secret_key": "not even a little secret",
"password_providers": [],
"worker_app": None,
"block_non_admin_invites": False,
"federation_domain_whitelist": None,
"filter_timeline_limit": 5000,
"user_directory_search_all_users": False,
"user_consent_server_notice_content": None,
"block_events_without_consent_error": None,
"user_consent_at_registration": False,
"user_consent_policy_name": "Privacy Policy",
"media_storage_providers": [],
"autocreate_auto_join_rooms": True,
"auto_join_rooms": [],
"limit_usage_by_mau": False,
"hs_disabled": False,
"hs_disabled_message": "",
"max_mau_value": 50,
"mau_trial_days": 0,
"mau_stats_only": False,
"mau_limits_reserved_threepids": [],
"admin_contact": None,
"rc_message": {"per_second": 10000, "burst_count": 10000},
"rc_registration": {"per_second": 10000, "burst_count": 10000},
"rc_login": {
"address": {"per_second": 10000, "burst_count": 10000},
"account": {"per_second": 10000, "burst_count": 10000},
"failed_attempts": {"per_second": 10000, "burst_count": 10000},
},
2020-07-31 14:34:42 +01:00
"rc_joins": {
"local": {"per_second": 10000, "burst_count": 10000},
"remote": {"per_second": 10000, "burst_count": 10000},
},
2022-07-19 12:45:17 +01:00
"rc_joins_per_room": {"per_second": 10000, "burst_count": 10000},
"rc_invites": {
"per_room": {"per_second": 10000, "burst_count": 10000},
"per_user": {"per_second": 10000, "burst_count": 10000},
},
"rc_3pid_validation": {"per_second": 10000, "burst_count": 10000},
2025-01-24 20:58:01 +01:00
"rc_presence": {"per_user": {"per_second": 10000, "burst_count": 10000}},
"saml2_enabled": False,
"public_baseurl": None,
"default_identity_server": None,
"key_refresh_interval": 24 * 60 * 60 * 1000,
"old_signing_keys": {},
"tls_fingerprints": [],
"use_frozen_dicts": False,
# We need a sane default_room_version, otherwise attempts to create
# rooms will fail.
"default_room_version": DEFAULT_ROOM_VERSION,
# disable user directory updates, because they get done in the
# background, which upsets the test runner. Setting this to an
# (obviously) fake worker name disables updating the user directory.
"update_user_directory_from_worker": "does_not_exist_worker_name",
"caches": {"global_factor": 1, "sync_response_cache_duration": 0},
"listeners": [{"port": 0, "type": "http"}],
}
if parse:
config = HomeServerConfig()
config.parse_config_dict(config_dict, "", "")
return config
return config_dict
2018-10-02 00:11:58 +10:00
def mock_getRawHeaders(headers=None): # type: ignore[no-untyped-def]
2016-09-12 10:46:02 +01:00
headers = headers if headers is not None else {}
def getRawHeaders(name, default=None): # type: ignore[no-untyped-def]
# If the requested header is present, the real twisted function returns
# List[str] if name is a str and List[bytes] if name is a bytes.
# This mock doesn't support that behaviour.
# Fortunately, none of the current callers of mock_getRawHeaders() provide a
# headers dict, so we don't encounter this discrepancy in practice.
2016-09-12 10:46:02 +01:00
return headers.get(name, default)
return getRawHeaders
P = ParamSpec("P")
@attr.s(slots=True, auto_attribs=True)
class Timer:
absolute_time: float
callback: Callable[[], None]
expired: bool
# TODO: Make this generic over a ParamSpec?
@attr.s(slots=True, auto_attribs=True)
class Looper:
func: Callable[..., Any]
interval: float # seconds
last: float
args: Tuple[object, ...]
kwargs: Dict[str, object]
2020-09-04 06:54:56 -04:00
class MockClock:
now = 1000.0
def __init__(self) -> None:
# Timers in no particular order
self.timers: List[Timer] = []
self.loopers: List[Looper] = []
2014-12-10 19:24:12 +00:00
def time(self) -> float:
return self.now
def time_msec(self) -> int:
return int(self.time() * 1000)
def call_later(
self,
delay: float,
callback: Callable[P, object],
*args: P.args,
**kwargs: P.kwargs,
) -> Timer:
ctx = current_context()
2014-12-10 19:24:12 +00:00
def wrapped_callback() -> None:
set_current_context(ctx)
2016-02-15 17:10:40 +00:00
callback(*args, **kwargs)
t = Timer(self.now + delay, wrapped_callback, False)
self.timers.append(t)
return t
2014-12-10 19:24:12 +00:00
def looping_call(
self,
function: Callable[P, object],
interval: float,
*args: P.args,
**kwargs: P.kwargs,
) -> None:
self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs))
def cancel_call_later(self, timer: Timer, ignore_errs: bool = False) -> None:
if timer.expired:
if not ignore_errs:
raise Exception("Cannot cancel an expired timer")
timer.expired = True
self.timers = [t for t in self.timers if t != timer]
2014-12-10 19:24:12 +00:00
2014-08-13 19:17:30 +01:00
# For unit testing
def advance_time(self, secs: float) -> None:
2014-08-13 19:17:30 +01:00
self.now += secs
2014-12-10 19:24:12 +00:00
timers = self.timers
self.timers = []
for t in timers:
if t.expired:
raise Exception("Timer already expired")
if self.now >= t.absolute_time:
t.expired = True
t.callback()
2014-12-10 19:24:12 +00:00
else:
self.timers.append(t)
2014-12-10 19:24:12 +00:00
2016-09-23 13:56:14 +01:00
for looped in self.loopers:
if looped.last + looped.interval < self.now:
looped.func(*looped.args, **looped.kwargs)
looped.last = self.now
2016-09-23 13:56:14 +01:00
def advance_time_msec(self, ms: float) -> None:
self.advance_time(ms / 1000.0)
2015-11-10 15:51:40 +00:00
async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None:
2018-08-09 14:33:49 +01:00
"""Creates and persist a creation event for the given room"""
2022-05-31 13:17:50 +01:00
persistence_store = hs.get_storage_controllers().persistence
assert persistence_store is not None
store = hs.get_datastores().main
2018-08-09 14:33:49 +01:00
event_builder_factory = hs.get_event_builder_factory()
event_creation_handler = hs.get_event_creation_handler()
await store.store_room(
room_id=room_id,
room_creator_user_id=creator_id,
is_public=False,
room_version=RoomVersions.V1,
)
builder = event_builder_factory.for_room_version(
2019-01-24 09:28:16 +00:00
RoomVersions.V1,
2018-09-07 02:58:18 +10:00
{
"type": EventTypes.Create,
"state_key": "",
"sender": creator_id,
"room_id": room_id,
"content": {},
},
2018-08-09 14:33:49 +01:00
)
event, unpersisted_context = await event_creation_handler.create_new_client_event(
builder
)
context = await unpersisted_context.persist(event)
2018-09-07 02:58:18 +10:00
await persistence_store.persist_event(event, context)
Use mypy 1.0 (#15052) * Update mypy and mypy-zope * Remove unused ignores These used to suppress ``` synapse/storage/engines/__init__.py:28: error: "__new__" must return a class instance (got "NoReturn") [misc] ``` and ``` synapse/http/matrixfederationclient.py:1270: error: "BaseException" has no attribute "reasons" [attr-defined] ``` (note that we check `hasattr(e, "reasons")` above) * Avoid empty body warnings, sometimes by marking methods as abstract E.g. ``` tests/handlers/test_register.py:58: error: Missing return statement [empty-body] tests/handlers/test_register.py:108: error: Missing return statement [empty-body] ``` * Suppress false positive about `JaegerConfig` Complaint was ``` synapse/logging/opentracing.py:450: error: Function "Type[Config]" could always be true in boolean context [truthy-function] ``` * Fix not calling `is_state()` Oops! ``` tests/rest/client/test_third_party_rules.py:428: error: Function "Callable[[], bool]" could always be true in boolean context [truthy-function] ``` * Suppress false positives from ParamSpecs ```` synapse/logging/opentracing.py:971: error: Argument 2 to "_custom_sync_async_decorator" has incompatible type "Callable[[Arg(Callable[P, R], 'func'), **P], _GeneratorContextManager[None]]"; expected "Callable[[Callable[P, R], **P], _GeneratorContextManager[None]]" [arg-type] synapse/logging/opentracing.py:1017: error: Argument 2 to "_custom_sync_async_decorator" has incompatible type "Callable[[Arg(Callable[P, R], 'func'), **P], _GeneratorContextManager[None]]"; expected "Callable[[Callable[P, R], **P], _GeneratorContextManager[None]]" [arg-type] ```` * Drive-by improvement to `wrapping_logic` annotation * Workaround false "unreachable" positives See https://github.com/Shoobx/mypy-zope/issues/91 ``` tests/http/test_proxyagent.py:626: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:762: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:826: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:838: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:845: error: Statement is unreachable [unreachable] tests/http/federation/test_matrix_federation_agent.py:151: error: Statement is unreachable [unreachable] tests/http/federation/test_matrix_federation_agent.py:452: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:60: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:93: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:127: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:152: error: Statement is unreachable [unreachable] ``` * Changelog * Tweak DBAPI2 Protocol to be accepted by mypy 1.0 Some extra context in: - https://github.com/matrix-org/python-canonicaljson/pull/57 - https://github.com/python/mypy/issues/6002 - https://mypy.readthedocs.io/en/latest/common_issues.html#covariant-subtyping-of-mutable-protocol-members-is-rejected * Pull in updated canonicaljson lib so the protocol check just works * Improve comments in opentracing I tried to workaround the ignores but found it too much trouble. I think the corresponding issue is https://github.com/python/mypy/issues/12909. The mypy repo has a PR claiming to fix this (https://github.com/python/mypy/pull/14677) which might mean this gets resolved soon? * Better annotation for INTERACTIVE_AUTH_CHECKERS * Drive-by AUTH_TYPE annotation, to remove an ignore
2023-02-16 16:09:11 +00:00
T = TypeVar("T")
def checked_cast(type: Type[T], x: object) -> T:
"""A version of typing.cast that is checked at runtime.
We have our own function for this for two reasons:
1. typing.cast itself is deliberately a no-op at runtime, see
https://docs.python.org/3/library/typing.html#typing.cast
2. To help workaround a mypy-zope bug https://github.com/Shoobx/mypy-zope/issues/91
where mypy would erroneously consider `isinstance(x, type)` to be false in all
circumstances.
For this to make sense, `T` needs to be something that `isinstance` can check; see
https://docs.python.org/3/library/functions.html?highlight=isinstance#isinstance
https://docs.python.org/3/glossary.html#term-abstract-base-class
https://docs.python.org/3/library/typing.html#typing.runtime_checkable
for more details.
"""
assert isinstance(x, type)
return x
class TestTimeout(Exception):
pass
class test_timeout:
Fix join being denied after being invited over federation (#18075) This also happens for rejecting an invite. Basically, any out-of-band membership transition where we first get the membership as an `outlier` and then rely on federation filling us in to de-outlier it. This PR mainly addresses automated test flakiness, bots/scripts, and options within Synapse like [`auto_accept_invites`](https://element-hq.github.io/synapse/v1.122/usage/configuration/config_documentation.html#auto_accept_invites) that are able to react quickly (before federation is able to push us events), but also helps in generic scenarios where federation is lagging. I initially thought this might be a Synapse consistency issue (see issues labeled with [`Z-Read-After-Write`](https://github.com/matrix-org/synapse/labels/Z-Read-After-Write)) but it seems to be an event auth logic problem. Workers probably do increase the number of possible race condition scenarios that make this visible though (replication and cache invalidation lag). Fix https://github.com/element-hq/synapse/issues/15012 (probably fixes https://github.com/matrix-org/synapse/issues/15012 (https://github.com/element-hq/synapse/issues/15012)) Related to https://github.com/matrix-org/matrix-spec/issues/2062 Problems: 1. We don't consider [out-of-band membership](https://github.com/element-hq/synapse/blob/develop/docs/development/room-dag-concepts.md#out-of-band-membership-events) (outliers) in our `event_auth` logic even though we expose them in `/sync`. 1. (This PR doesn't address this point) Perhaps we should consider authing events in the persistence queue as events already in the queue could allow subsequent events to be allowed (events come through many channels: federation transaction, remote invite, remote join, local send). But this doesn't save us in the case where the event is more delayed over federation. ### What happened before? I wrote some Complement test that stresses this exact scenario and reproduces the problem: https://github.com/matrix-org/complement/pull/757 ``` COMPLEMENT_ALWAYS_PRINT_SERVER_LOGS=1 COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh -run TestSynapseConsistency ``` We have `hs1` and `hs2` running in monolith mode (no workers): 1. `@charlie1:hs2` is invited and joins the room: 1. `hs1` invites `@charlie1:hs2` to a room which we receive on `hs2` as `PUT /_matrix/federation/v1/invite/{roomId}/{eventId}` (`on_invite_request(...)`) and the invite membership is persisted as an outlier. The `room_memberships` and `local_current_membership` database tables are also updated which means they are visible down `/sync` at this point. 1. `@charlie1:hs2` decides to join because it saw the invite down `/sync`. Because `hs2` is not yet in the room, this happens as a remote join `make_join`/`send_join` which comes back with all of the auth events needed to auth successfully and now `@charlie1:hs2` is successfully joined to the room. 1. `@charlie2:hs2` is invited and and tries to join the room: 1. `hs1` invites `@charlie2:hs2` to the room which we receive on `hs2` as `PUT /_matrix/federation/v1/invite/{roomId}/{eventId}` (`on_invite_request(...)`) and the invite membership is persisted as an outlier. The `room_memberships` and `local_current_membership` database tables are also updated which means they are visible down `/sync` at this point. 1. Because `hs2` is already participating in the room, we also see the invite come over federation in a transaction and we start processing it (not done yet, see below) 1. `@charlie2:hs2` decides to join because it saw the invite down `/sync`. Because `hs2`, is already in the room, this happens as a local join but we deny the event because our `event_auth` logic thinks that we have no membership in the room :x: (expected to be able to join because we saw the invite down `/sync`) 1. We finally finish processing the `@charlie2:hs2` invite event from and de-outlier it. - If this finished before we tried to join we would have been fine but this is the race condition that makes this situation visible. Logs for `hs2`: ``` 🗳️ on_invite_request: handling event <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=False> 🔦 _store_room_members_txn update room_memberships: <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=True> 🔦 _store_room_members_txn update local_current_membership: <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=True> 📨 Notifying about new event <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=True> ✅ on_invite_request: handled event <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=True> 🧲 do_invite_join for @user-2-charlie1:hs2 in !sfZVBdLUezpPWetrol:hs1 🔦 _store_room_members_txn update room_memberships: <FrozenEventV3 event_id=$bwv8LxFnqfpsw_rhR7OrTjtz09gaJ23MqstKOcs7ygA, type=m.room.member, state_key=@user-1-alice:hs1, membership=join, outlier=True> 🔦 _store_room_members_txn update room_memberships: <FrozenEventV3 event_id=$oju1ts3G3pz5O62IesrxX5is4LxAwU3WPr4xvid5ijI, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=join, outlier=False> 📨 Notifying about new event <FrozenEventV3 event_id=$oju1ts3G3pz5O62IesrxX5is4LxAwU3WPr4xvid5ijI, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=join, outlier=False> ... 🗳️ on_invite_request: handling event <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=False> 🔦 _store_room_members_txn update room_memberships: <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=True> 🔦 _store_room_members_txn update local_current_membership: <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=True> 📨 Notifying about new event <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=True> ✅ on_invite_request: handled event <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=True> 📬 handling received PDU in room !sfZVBdLUezpPWetrol:hs1: <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=False> 📮 handle_new_client_event: handling <FrozenEventV3 event_id=$WNVDTQrxy5tCdPQHMyHyIn7tE4NWqKsZ8Bn8R4WbBSA, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=join, outlier=False> ❌ Denying new event <FrozenEventV3 event_id=$WNVDTQrxy5tCdPQHMyHyIn7tE4NWqKsZ8Bn8R4WbBSA, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=join, outlier=False> because 403: You are not invited to this room. synapse.http.server - 130 - INFO - POST-16 - <SynapseRequest at 0x7f460c91fbf0 method='POST' uri='/_matrix/client/v3/join/%21sfZVBdLUezpPWetrol:hs1?server_name=hs1' clientproto='HTTP/1.0' site='8080'> SynapseError: 403 - You are not invited to this room. 📨 Notifying about new event <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=False> ✅ handled received PDU in room !sfZVBdLUezpPWetrol:hs1: <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=False> ```
2025-01-27 11:21:10 -06:00
"""
FIXME: This implementation is not robust against other code tight-looping and
preventing the signals propagating and timing out the test. You may need to add
`time.sleep(0.1)` to your code in order to allow this timeout to work correctly.
```py
with test_timeout(3):
while True:
my_checking_func()
time.sleep(0.1)
```
"""
def __init__(self, seconds: int, error_message: Optional[str] = None) -> None:
Fix join being denied after being invited over federation (#18075) This also happens for rejecting an invite. Basically, any out-of-band membership transition where we first get the membership as an `outlier` and then rely on federation filling us in to de-outlier it. This PR mainly addresses automated test flakiness, bots/scripts, and options within Synapse like [`auto_accept_invites`](https://element-hq.github.io/synapse/v1.122/usage/configuration/config_documentation.html#auto_accept_invites) that are able to react quickly (before federation is able to push us events), but also helps in generic scenarios where federation is lagging. I initially thought this might be a Synapse consistency issue (see issues labeled with [`Z-Read-After-Write`](https://github.com/matrix-org/synapse/labels/Z-Read-After-Write)) but it seems to be an event auth logic problem. Workers probably do increase the number of possible race condition scenarios that make this visible though (replication and cache invalidation lag). Fix https://github.com/element-hq/synapse/issues/15012 (probably fixes https://github.com/matrix-org/synapse/issues/15012 (https://github.com/element-hq/synapse/issues/15012)) Related to https://github.com/matrix-org/matrix-spec/issues/2062 Problems: 1. We don't consider [out-of-band membership](https://github.com/element-hq/synapse/blob/develop/docs/development/room-dag-concepts.md#out-of-band-membership-events) (outliers) in our `event_auth` logic even though we expose them in `/sync`. 1. (This PR doesn't address this point) Perhaps we should consider authing events in the persistence queue as events already in the queue could allow subsequent events to be allowed (events come through many channels: federation transaction, remote invite, remote join, local send). But this doesn't save us in the case where the event is more delayed over federation. ### What happened before? I wrote some Complement test that stresses this exact scenario and reproduces the problem: https://github.com/matrix-org/complement/pull/757 ``` COMPLEMENT_ALWAYS_PRINT_SERVER_LOGS=1 COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh -run TestSynapseConsistency ``` We have `hs1` and `hs2` running in monolith mode (no workers): 1. `@charlie1:hs2` is invited and joins the room: 1. `hs1` invites `@charlie1:hs2` to a room which we receive on `hs2` as `PUT /_matrix/federation/v1/invite/{roomId}/{eventId}` (`on_invite_request(...)`) and the invite membership is persisted as an outlier. The `room_memberships` and `local_current_membership` database tables are also updated which means they are visible down `/sync` at this point. 1. `@charlie1:hs2` decides to join because it saw the invite down `/sync`. Because `hs2` is not yet in the room, this happens as a remote join `make_join`/`send_join` which comes back with all of the auth events needed to auth successfully and now `@charlie1:hs2` is successfully joined to the room. 1. `@charlie2:hs2` is invited and and tries to join the room: 1. `hs1` invites `@charlie2:hs2` to the room which we receive on `hs2` as `PUT /_matrix/federation/v1/invite/{roomId}/{eventId}` (`on_invite_request(...)`) and the invite membership is persisted as an outlier. The `room_memberships` and `local_current_membership` database tables are also updated which means they are visible down `/sync` at this point. 1. Because `hs2` is already participating in the room, we also see the invite come over federation in a transaction and we start processing it (not done yet, see below) 1. `@charlie2:hs2` decides to join because it saw the invite down `/sync`. Because `hs2`, is already in the room, this happens as a local join but we deny the event because our `event_auth` logic thinks that we have no membership in the room :x: (expected to be able to join because we saw the invite down `/sync`) 1. We finally finish processing the `@charlie2:hs2` invite event from and de-outlier it. - If this finished before we tried to join we would have been fine but this is the race condition that makes this situation visible. Logs for `hs2`: ``` 🗳️ on_invite_request: handling event <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=False> 🔦 _store_room_members_txn update room_memberships: <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=True> 🔦 _store_room_members_txn update local_current_membership: <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=True> 📨 Notifying about new event <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=True> ✅ on_invite_request: handled event <FrozenEventV3 event_id=$PRPCvdXdcqyjdUKP_NxGF2CcukmwOaoK0ZR1WiVOZVk, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=invite, outlier=True> 🧲 do_invite_join for @user-2-charlie1:hs2 in !sfZVBdLUezpPWetrol:hs1 🔦 _store_room_members_txn update room_memberships: <FrozenEventV3 event_id=$bwv8LxFnqfpsw_rhR7OrTjtz09gaJ23MqstKOcs7ygA, type=m.room.member, state_key=@user-1-alice:hs1, membership=join, outlier=True> 🔦 _store_room_members_txn update room_memberships: <FrozenEventV3 event_id=$oju1ts3G3pz5O62IesrxX5is4LxAwU3WPr4xvid5ijI, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=join, outlier=False> 📨 Notifying about new event <FrozenEventV3 event_id=$oju1ts3G3pz5O62IesrxX5is4LxAwU3WPr4xvid5ijI, type=m.room.member, state_key=@user-2-charlie1:hs2, membership=join, outlier=False> ... 🗳️ on_invite_request: handling event <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=False> 🔦 _store_room_members_txn update room_memberships: <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=True> 🔦 _store_room_members_txn update local_current_membership: <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=True> 📨 Notifying about new event <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=True> ✅ on_invite_request: handled event <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=True> 📬 handling received PDU in room !sfZVBdLUezpPWetrol:hs1: <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=False> 📮 handle_new_client_event: handling <FrozenEventV3 event_id=$WNVDTQrxy5tCdPQHMyHyIn7tE4NWqKsZ8Bn8R4WbBSA, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=join, outlier=False> ❌ Denying new event <FrozenEventV3 event_id=$WNVDTQrxy5tCdPQHMyHyIn7tE4NWqKsZ8Bn8R4WbBSA, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=join, outlier=False> because 403: You are not invited to this room. synapse.http.server - 130 - INFO - POST-16 - <SynapseRequest at 0x7f460c91fbf0 method='POST' uri='/_matrix/client/v3/join/%21sfZVBdLUezpPWetrol:hs1?server_name=hs1' clientproto='HTTP/1.0' site='8080'> SynapseError: 403 - You are not invited to this room. 📨 Notifying about new event <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=False> ✅ handled received PDU in room !sfZVBdLUezpPWetrol:hs1: <FrozenEventV3 event_id=$O_54j7O--6xMsegY5EVZ9SA-mI4_iHJOIoRwYyeWIPY, type=m.room.member, state_key=@user-3-charlie2:hs2, membership=invite, outlier=False> ```
2025-01-27 11:21:10 -06:00
self.error_message = f"Test timed out after {seconds}s"
if error_message is not None:
self.error_message += f": {error_message}"
self.seconds = seconds
def handle_timeout(self, signum: int, frame: Optional[FrameType]) -> None:
raise TestTimeout(self.error_message)
def __enter__(self) -> None:
signal.signal(signal.SIGALRM, self.handle_timeout)
signal.alarm(self.seconds)
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> None:
signal.alarm(0)