From 61b7c31772034fe63b311bd63d7c3d7e24551cdf Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 18 Sep 2024 13:12:14 -0500 Subject: [PATCH 1/4] Sliding Sync: Shortcut for checking if certain background updates have completed (#17724) Shortcut for checking if certain background updates have completed Pulling this change out from one of @erikjohnston's branches (https://github.com/element-hq/synapse/compare/develop...erikj/ss_perf) --------- Co-authored-by: Erik Johnston --- changelog.d/17724.misc | 1 + synapse/storage/background_updates.py | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 changelog.d/17724.misc diff --git a/changelog.d/17724.misc b/changelog.d/17724.misc new file mode 100644 index 0000000000..630443f179 --- /dev/null +++ b/changelog.d/17724.misc @@ -0,0 +1 @@ +Shortcut for checking if certain background updates have completed (utilized in Sliding Sync). diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 1194b58ffb..34139f580d 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -490,6 +490,12 @@ class BackgroundUpdater: if self._all_done: return True + # We now check if we have completed all pending background updates. We + # do this as once this returns True then it will set `self._all_done` + # and we can skip checking the database in future. + if await self.has_completed_background_updates(): + return True + rows = await self.db_pool.simple_select_many_batch( table="background_updates", column="update_name", From af998e6c660986da8385d26d724c70b5d0f77c67 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 18 Sep 2024 18:09:23 -0500 Subject: [PATCH 2/4] Sliding sync: Ignore invites from ignored users (#17729) `m.ignored_user_list` in account data --- changelog.d/17729.bugfix | 1 + synapse/handlers/sliding_sync/room_lists.py | 30 ++++- .../client/sliding_sync/test_sliding_sync.py | 113 +++++++++++++++++- 3 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 changelog.d/17729.bugfix diff --git a/changelog.d/17729.bugfix b/changelog.d/17729.bugfix new file mode 100644 index 0000000000..4ba4e551c6 --- /dev/null +++ b/changelog.d/17729.bugfix @@ -0,0 +1 @@ +Ignore invites from ignored users in Sliding Sync. diff --git a/synapse/handlers/sliding_sync/room_lists.py b/synapse/handlers/sliding_sync/room_lists.py index 475bfbbbcb..8457526a45 100644 --- a/synapse/handlers/sliding_sync/room_lists.py +++ b/synapse/handlers/sliding_sync/room_lists.py @@ -224,15 +224,31 @@ class SlidingSyncRoomLists: user_id ) + # Remove invites from ignored users + ignored_users = await self.store.ignored_users(user_id) + if ignored_users: + # TODO: It would be nice to avoid these copies + room_membership_for_user_map = dict(room_membership_for_user_map) + # Make a copy so we don't run into an error: `dictionary changed size during + # iteration`, when we remove items + for room_id in list(room_membership_for_user_map.keys()): + room_for_user_sliding_sync = room_membership_for_user_map[room_id] + if ( + room_for_user_sliding_sync.membership == Membership.INVITE + and room_for_user_sliding_sync.sender in ignored_users + ): + room_membership_for_user_map.pop(room_id, None) + changes = await self._get_rewind_changes_to_current_membership_to_token( sync_config.user, room_membership_for_user_map, to_token=to_token ) if changes: + # TODO: It would be nice to avoid these copies room_membership_for_user_map = dict(room_membership_for_user_map) for room_id, change in changes.items(): if change is None: # Remove rooms that the user joined after the `to_token` - room_membership_for_user_map.pop(room_id) + room_membership_for_user_map.pop(room_id, None) continue existing_room = room_membership_for_user_map.get(room_id) @@ -926,6 +942,18 @@ class SlidingSyncRoomLists: excluded_rooms=self.rooms_to_exclude_globally, ) + # Remove invites from ignored users + ignored_users = await self.store.ignored_users(user_id) + if ignored_users: + room_for_user_list = [ + room_for_user + for room_for_user in room_for_user_list + if not ( + room_for_user.membership == Membership.INVITE + and room_for_user.sender in ignored_users + ) + ] + # If the user has never joined any rooms before, we can just return an empty list if not room_for_user_list: return {}, set(), set() diff --git a/tests/rest/client/sliding_sync/test_sliding_sync.py b/tests/rest/client/sliding_sync/test_sliding_sync.py index fe35cbb532..1126258c43 100644 --- a/tests/rest/client/sliding_sync/test_sliding_sync.py +++ b/tests/rest/client/sliding_sync/test_sliding_sync.py @@ -29,7 +29,7 @@ from synapse.api.constants import ( from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, StrippedStateEvent, make_event_from_dict from synapse.events.snapshot import EventContext -from synapse.rest.client import devices, login, receipts, room, sync +from synapse.rest.client import account_data, devices, login, receipts, room, sync from synapse.server import HomeServer from synapse.types import ( JsonDict, @@ -413,6 +413,7 @@ class SlidingSyncTestCase(SlidingSyncBase): sync.register_servlets, devices.register_servlets, receipts.register_servlets, + account_data.register_servlets, ] def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: @@ -670,6 +671,116 @@ class SlidingSyncTestCase(SlidingSyncBase): exact=True, ) + def test_ignored_user_invites_initial_sync(self) -> None: + """ + Make sure we ignore invites if they are from one of the `m.ignored_user_list` on + initial sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create a room that user1 is already in + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + + # Create a room that user2 is already in + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 is invited to room_id2 + self.helper.invite(room_id2, src=user2_id, targ=user1_id, tok=user2_tok) + + # Sync once before we ignore to make sure the rooms can show up + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 0, + }, + } + } + response_body, _ = self.do_sync(sync_body, tok=user1_tok) + # room_id2 shows up because we haven't ignored the user yet + self.assertIncludes( + set(response_body["lists"]["foo-list"]["ops"][0]["room_ids"]), + {room_id1, room_id2}, + exact=True, + ) + + # User1 ignores user2 + channel = self.make_request( + "PUT", + f"/_matrix/client/v3/user/{user1_id}/account_data/{AccountDataTypes.IGNORED_USER_LIST}", + content={"ignored_users": {user2_id: {}}}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Sync again (initial sync) + response_body, _ = self.do_sync(sync_body, tok=user1_tok) + # The invite for room_id2 should no longer show up because user2 is ignored + self.assertIncludes( + set(response_body["lists"]["foo-list"]["ops"][0]["room_ids"]), + {room_id1}, + exact=True, + ) + + def test_ignored_user_invites_incremental_sync(self) -> None: + """ + Make sure we ignore invites if they are from one of the `m.ignored_user_list` on + incremental sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create a room that user1 is already in + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + + # Create a room that user2 is already in + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # User1 ignores user2 + channel = self.make_request( + "PUT", + f"/_matrix/client/v3/user/{user1_id}/account_data/{AccountDataTypes.IGNORED_USER_LIST}", + content={"ignored_users": {user2_id: {}}}, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + # Initial sync + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 0, + }, + } + } + response_body, from_token = self.do_sync(sync_body, tok=user1_tok) + # User1 only has membership in room_id1 at this point + self.assertIncludes( + set(response_body["lists"]["foo-list"]["ops"][0]["room_ids"]), + {room_id1}, + exact=True, + ) + + # User1 is invited to room_id2 after the initial sync + self.helper.invite(room_id2, src=user2_id, targ=user1_id, tok=user2_tok) + + # Sync again (incremental sync) + response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) + # The invite for room_id2 doesn't show up because user2 is ignored + self.assertIncludes( + set(response_body["lists"]["foo-list"]["ops"][0]["room_ids"]), + {room_id1}, + exact=True, + ) + def test_sort_list(self) -> None: """ Test that the `lists` are sorted by `stream_ordering` From faf5b40520345d5e772ddfc71cb3f814a6509a17 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 19 Sep 2024 03:32:16 -0500 Subject: [PATCH 3/4] Sliding Sync: Fix `_bulk_get_max_event_pos(...)` being inefficient (#17728) Fix `_bulk_get_max_event_pos(...)` being inefficient. It kept adding all of the `batch_results` to the `results` over and over every time we checked a single room in the batch. I think we still ended up with the right answer before because we accumulate `recheck_rooms` and actually recheck them to overwrite the bad data we wrote to the `results` before. Introduced in https://github.com/element-hq/synapse/pull/17606/files#diff-cbd54e4b5a2a1646299d659a2d5884d6cb14e608efd2e1658e72b465bb66e31bR1481 --- changelog.d/17728.misc | 1 + synapse/storage/databases/main/stream.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/17728.misc diff --git a/changelog.d/17728.misc b/changelog.d/17728.misc new file mode 100644 index 0000000000..5ab241e9df --- /dev/null +++ b/changelog.d/17728.misc @@ -0,0 +1 @@ +Fix `_bulk_get_max_event_pos` being inefficient. diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 03b4aa3381..0ab7cb8dbd 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1584,7 +1584,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) for room_id, stream_ordering in batch_results.items(): if stream_ordering <= now_token.stream: - results.update(batch_results) + results[room_id] = stream_ordering else: recheck_rooms.add(room_id) From a9c0e27eb760c0d042e84e6f7beda167611ef148 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 19 Sep 2024 03:33:34 -0500 Subject: [PATCH 4/4] Sliding Sync: No need to sort if the range is large enough to cover all of the rooms (#17731) No need to sort if the range is large enough to cover all of the rooms in the list. Previously, we would only do this optimization if the range was exactly large enough. Follow-up to https://github.com/element-hq/synapse/pull/17672 --- changelog.d/17731.misc | 1 + synapse/handlers/sliding_sync/room_lists.py | 14 +++- .../client/sliding_sync/test_lists_filters.py | 66 ++++++++----------- .../client/sliding_sync/test_rooms_meta.py | 3 +- .../client/sliding_sync/test_sliding_sync.py | 61 ++++++++++++----- 5 files changed, 87 insertions(+), 58 deletions(-) create mode 100644 changelog.d/17731.misc diff --git a/changelog.d/17731.misc b/changelog.d/17731.misc new file mode 100644 index 0000000000..d5df74b4c9 --- /dev/null +++ b/changelog.d/17731.misc @@ -0,0 +1 @@ +Small performance improvement in speeding up Sliding Sync. diff --git a/synapse/handlers/sliding_sync/room_lists.py b/synapse/handlers/sliding_sync/room_lists.py index 8457526a45..a423de74bf 100644 --- a/synapse/handlers/sliding_sync/room_lists.py +++ b/synapse/handlers/sliding_sync/room_lists.py @@ -373,8 +373,18 @@ class SlidingSyncRoomLists: ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: - if list_config.ranges == [(0, len(filtered_sync_room_map) - 1)]: - # If we are asking for the full range, we don't need to sort the list. + # Optimization: If we are asking for the full range, we don't + # need to sort the list. + if ( + # We're looking for a single range that covers the entire list + len(list_config.ranges) == 1 + # Range starts at 0 + and list_config.ranges[0][0] == 0 + # And the range extends to the end of the list or more. Each + # side is inclusive. + and list_config.ranges[0][1] + >= len(filtered_sync_room_map) - 1 + ): sorted_room_info: List[RoomsForUserType] = list( filtered_sync_room_map.values() ) diff --git a/tests/rest/client/sliding_sync/test_lists_filters.py b/tests/rest/client/sliding_sync/test_lists_filters.py index 16e4e8edbc..c59f6aedc4 100644 --- a/tests/rest/client/sliding_sync/test_lists_filters.py +++ b/tests/rest/client/sliding_sync/test_lists_filters.py @@ -230,32 +230,21 @@ class SlidingSyncFiltersTestCase(SlidingSyncBase): response_body, from_token = self.do_sync(sync_body, tok=user1_tok) # Make sure the response has the lists we requested - self.assertListEqual( - list(response_body["lists"].keys()), - ["all-list", "foo-list"], + self.assertIncludes( response_body["lists"].keys(), + {"all-list", "foo-list"}, ) # Make sure the lists have the correct rooms - self.assertListEqual( - list(response_body["lists"]["all-list"]["ops"]), - [ - { - "op": "SYNC", - "range": [0, 99], - "room_ids": [space_room_id, room_id], - } - ], + self.assertIncludes( + set(response_body["lists"]["all-list"]["ops"][0]["room_ids"]), + {space_room_id, room_id}, + exact=True, ) - self.assertListEqual( - list(response_body["lists"]["foo-list"]["ops"]), - [ - { - "op": "SYNC", - "range": [0, 99], - "room_ids": [space_room_id], - } - ], + self.assertIncludes( + set(response_body["lists"]["foo-list"]["ops"][0]["room_ids"]), + {space_room_id}, + exact=True, ) # Everyone leaves the encrypted space room @@ -284,26 +273,23 @@ class SlidingSyncFiltersTestCase(SlidingSyncBase): } response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) - # Make sure the lists have the correct rooms even though we `newly_left` - self.assertListEqual( - list(response_body["lists"]["all-list"]["ops"]), - [ - { - "op": "SYNC", - "range": [0, 99], - "room_ids": [space_room_id, room_id], - } - ], + # Make sure the response has the lists we requested + self.assertIncludes( + response_body["lists"].keys(), + {"all-list", "foo-list"}, + exact=True, ) - self.assertListEqual( - list(response_body["lists"]["foo-list"]["ops"]), - [ - { - "op": "SYNC", - "range": [0, 99], - "room_ids": [space_room_id], - } - ], + + # Make sure the lists have the correct rooms even though we `newly_left` + self.assertIncludes( + set(response_body["lists"]["all-list"]["ops"][0]["room_ids"]), + {space_room_id, room_id}, + exact=True, + ) + self.assertIncludes( + set(response_body["lists"]["foo-list"]["ops"][0]["room_ids"]), + {space_room_id}, + exact=True, ) def test_filters_is_dm(self) -> None: diff --git a/tests/rest/client/sliding_sync/test_rooms_meta.py b/tests/rest/client/sliding_sync/test_rooms_meta.py index 40743d17eb..8d6039715f 100644 --- a/tests/rest/client/sliding_sync/test_rooms_meta.py +++ b/tests/rest/client/sliding_sync/test_rooms_meta.py @@ -935,7 +935,8 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase): op = response_body["lists"]["foo-list"]["ops"][0] self.assertEqual(op["op"], "SYNC") self.assertEqual(op["range"], [0, 1]) - # Note that we don't order the ops anymore, so we need to compare sets. + # Note that we don't sort the rooms when the range includes all of the rooms, so + # we just assert that the rooms are included self.assertIncludes(set(op["room_ids"]), {room_id1, room_id2}, exact=True) # The `bump_stamp` for room1 should point at the latest message (not the diff --git a/tests/rest/client/sliding_sync/test_sliding_sync.py b/tests/rest/client/sliding_sync/test_sliding_sync.py index 1126258c43..c2cfb29866 100644 --- a/tests/rest/client/sliding_sync/test_sliding_sync.py +++ b/tests/rest/client/sliding_sync/test_sliding_sync.py @@ -797,7 +797,38 @@ class SlidingSyncTestCase(SlidingSyncBase): self.helper.send(room_id1, "activity in room1", tok=user1_tok) self.helper.send(room_id2, "activity in room2", tok=user1_tok) - # Make the Sliding Sync request + # Make the Sliding Sync request where the range includes *some* of the rooms + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 1, + } + } + } + response_body, _ = self.do_sync(sync_body, tok=user1_tok) + + # Make sure it has the foo-list we requested + self.assertIncludes( + response_body["lists"].keys(), + {"foo-list"}, + ) + # Make sure the list is sorted in the way we expect (we only sort when the range + # doesn't include all of the room) + self.assertListEqual( + list(response_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 1], + "room_ids": [room_id2, room_id1], + } + ], + response_body["lists"]["foo-list"], + ) + + # Make the Sliding Sync request where the range includes *all* of the rooms sync_body = { "lists": { "foo-list": { @@ -810,24 +841,24 @@ class SlidingSyncTestCase(SlidingSyncBase): response_body, _ = self.do_sync(sync_body, tok=user1_tok) # Make sure it has the foo-list we requested - self.assertListEqual( - list(response_body["lists"].keys()), - ["foo-list"], + self.assertIncludes( response_body["lists"].keys(), + {"foo-list"}, ) - - # Make sure the list is sorted in the way we expect - self.assertListEqual( - list(response_body["lists"]["foo-list"]["ops"]), - [ - { - "op": "SYNC", - "range": [0, 99], - "room_ids": [room_id2, room_id1, room_id3], - } - ], + # Since the range includes all of the rooms, we don't sort the list + self.assertEqual( + len(response_body["lists"]["foo-list"]["ops"]), + 1, response_body["lists"]["foo-list"], ) + op = response_body["lists"]["foo-list"]["ops"][0] + self.assertEqual(op["op"], "SYNC") + self.assertEqual(op["range"], [0, 99]) + # Note that we don't sort the rooms when the range includes all of the rooms, so + # we just assert that the rooms are included + self.assertIncludes( + set(op["room_ids"]), {room_id1, room_id2, room_id3}, exact=True + ) def test_sliced_windows(self) -> None: """