Improve /sync performance of when passing filters with empty arrays. (#14786)

This has two related changes:

* It enables fast-path processing for an empty filter (`[]`) which was
  previously only used for wildcard not-filters (`["*"]`).
* It special cases a `/sync` filter with no-rooms to skip all room
  processing, previously we would partially skip processing, but would
  generally still calculate intermediate values for each room which were
  then unused.

Future changes might consider further optimizations:

* Skip calculating per-room account data when all rooms are filtered (currently
  this is thrown away).
* Make similar improvements to other endpoints which support filters.
This commit is contained in:
Patrick Cloke 2023-01-09 08:43:50 -05:00 committed by GitHub
parent 5e0888076f
commit 7e582a25f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 21 additions and 9 deletions

View file

@ -0,0 +1 @@
Improve performance of `/sync` when filtering all rooms, message types, or senders.

View file

@ -283,6 +283,9 @@ class FilterCollection:
await self._room_filter.filter(events)
)
def blocks_all_rooms(self) -> bool:
return self._room_filter.filters_all_rooms()
def blocks_all_presence(self) -> bool:
return (
self._presence_filter.filters_all_types()
@ -351,13 +354,13 @@ class Filter:
self.not_rel_types = filter_json.get("org.matrix.msc3874.not_rel_types", [])
def filters_all_types(self) -> bool:
return "*" in self.not_types
return self.types == [] or "*" in self.not_types
def filters_all_senders(self) -> bool:
return "*" in self.not_senders
return self.senders == [] or "*" in self.not_senders
def filters_all_rooms(self) -> bool:
return "*" in self.not_rooms
return self.rooms == [] or "*" in self.not_rooms
def _check(self, event: FilterEvent) -> bool:
"""Checks whether the filter matches the given event.
@ -450,8 +453,8 @@ class Filter:
if any(map(match_func, disallowed_values)):
return False
# Other the event does not match at least one of the allowed values,
# reject it.
# Otherwise if the event does not match at least one of the allowed
# values, reject it.
allowed_values = getattr(self, name)
if allowed_values is not None:
if not any(map(match_func, allowed_values)):

View file

@ -275,7 +275,7 @@ class SearchHandler:
)
room_ids = {r.room_id for r in rooms}
# If doing a subset of all rooms seearch, check if any of the rooms
# If doing a subset of all rooms search, check if any of the rooms
# are from an upgraded room, and search their contents as well
if search_filter.rooms:
historical_room_ids: List[str] = []

View file

@ -1403,11 +1403,14 @@ class SyncHandler:
logger.debug("Fetching room data")
res = await self._generate_sync_entry_for_rooms(
(
newly_joined_rooms,
newly_joined_or_invited_or_knocked_users,
newly_left_rooms,
newly_left_users,
) = await self._generate_sync_entry_for_rooms(
sync_result_builder, account_data_by_room
)
newly_joined_rooms, newly_joined_or_invited_or_knocked_users, _, _ = res
_, _, newly_left_rooms, newly_left_users = res
block_all_presence_data = (
since_token is None and sync_config.filter_collection.blocks_all_presence()
@ -1789,6 +1792,11 @@ class SyncHandler:
- newly_left_rooms
- newly_left_users
"""
# If the request doesn't care about rooms then nothing to do!
if sync_result_builder.sync_config.filter_collection.blocks_all_rooms():
return set(), set(), set(), set()
since_token = sync_result_builder.since_token
# 1. Start by fetching all ephemeral events in rooms we've joined (if required).