Batch look-ups to see if rooms are partial stated. (#14917)

* Batch look-ups to see if rooms are partial stated.

* Fix issues found in linting.

* Fix typo.

* Apply suggestions from code review

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Clarify comments.

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Also improve the cache size while we're at it

* is_partial_state_rooms -> is_partial_state_room_batched

* Run `black`

* Improve annotation for `simple_select_many_batch`

* Fix is_partial_state_room_batched impl

* Okay, _actually_ fix impl

* Update description.

* Update synapse/storage/databases/main/room.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Run black.

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
Co-authored-by: David Robertson <davidr@element.io>
This commit is contained in:
Patrick Cloke 2023-01-26 12:15:36 -05:00 committed by GitHub
parent cf66d712c6
commit 8a05d5de21
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 43 additions and 11 deletions

1
changelog.d/14917.misc Normal file
View file

@ -0,0 +1 @@
Faster joins: Improve performance of looking up partial-state status of rooms.

View file

@ -1383,16 +1383,21 @@ class SyncHandler:
if not sync_config.filter_collection.lazy_load_members():
# Non-lazy syncs should never include partially stated rooms.
# Exclude all partially stated rooms from this sync.
for room_id in mutable_joined_room_ids:
if await self.store.is_partial_state_room(room_id):
mutable_rooms_to_exclude.add(room_id)
results = await self.store.is_partial_state_room_batched(
mutable_joined_room_ids
)
mutable_rooms_to_exclude.update(
room_id
for room_id, is_partial_state in results.items()
if is_partial_state
)
# Incremental eager syncs should additionally include rooms that
# - we are joined to
# - are full-stated
# - became fully-stated at some point during the sync period
# (These rooms will have been omitted during a previous eager sync.)
forced_newly_joined_room_ids = set()
forced_newly_joined_room_ids: Set[str] = set()
if since_token and not sync_config.filter_collection.lazy_load_members():
un_partial_stated_rooms = (
await self.store.get_un_partial_stated_rooms_between(
@ -1401,9 +1406,14 @@ class SyncHandler:
mutable_joined_room_ids,
)
)
for room_id in un_partial_stated_rooms:
if not await self.store.is_partial_state_room(room_id):
forced_newly_joined_room_ids.add(room_id)
results = await self.store.is_partial_state_room_batched(
un_partial_stated_rooms
)
forced_newly_joined_room_ids.update(
room_id
for room_id, is_partial_state in results.items()
if not is_partial_state
)
# Now we have our list of joined room IDs, exclude as configured and freeze
joined_room_ids = frozenset(

View file

@ -1819,7 +1819,7 @@ class DatabasePool:
keyvalues: Optional[Dict[str, Any]] = None,
desc: str = "simple_select_many_batch",
batch_size: int = 100,
) -> List[Any]:
) -> List[Dict[str, Any]]:
"""Executes a SELECT query on the named table, which may return zero or
more rows, returning the result as a list of dicts.

View file

@ -60,9 +60,9 @@ from synapse.storage.util.id_generators import (
MultiWriterIdGenerator,
StreamIdGenerator,
)
from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID
from synapse.types import JsonDict, RetentionPolicy, StrCollection, ThirdPartyInstanceID
from synapse.util import json_encoder
from synapse.util.caches.descriptors import cached
from synapse.util.caches.descriptors import cached, cachedList
from synapse.util.stringutils import MXC_REGEX
if TYPE_CHECKING:
@ -1255,7 +1255,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
return room_servers
@cached()
@cached(max_entries=10000)
async def is_partial_state_room(self, room_id: str) -> bool:
"""Checks if this room has partial state.
@ -1274,6 +1274,27 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
return entry is not None
@cachedList(cached_method_name="is_partial_state_room", list_name="room_ids")
async def is_partial_state_room_batched(
self, room_ids: StrCollection
) -> Mapping[str, bool]:
"""Checks if the given rooms have partial state.
Returns true for "partial-state" rooms, which means that the state
at events in the room, and `current_state_events`, may not yet be
complete.
"""
rows: List[Dict[str, str]] = await self.db_pool.simple_select_many_batch(
table="partial_state_rooms",
column="room_id",
iterable=room_ids,
retcols=("room_id",),
desc="is_partial_state_room_batched",
)
partial_state_rooms = {row_dict["room_id"] for row_dict in rows}
return {room_id: room_id in partial_state_rooms for room_id in room_ids}
async def get_join_event_id_and_device_lists_stream_id_for_partial_state(
self, room_id: str
) -> Tuple[str, int]: