Add some miscellaneous comments around sync (#13474)

Add some miscellaneous comments to document sync, especially around
`compute_state_delta`.

Signed-off-by: Sean Quah <seanq@matrix.org>

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
This commit is contained in:
Sean Quah 2022-08-10 12:58:20 +01:00 committed by GitHub
parent 1b09b0832e
commit 51c01d450a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 81 additions and 40 deletions

1
changelog.d/13474.misc Normal file
View file

@ -0,0 +1 @@
Add some miscellaneous comments to document sync, especially around `compute_state_delta`.

View file

@ -13,7 +13,17 @@
# limitations under the License.
import itertools
import logging
from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional, Set, Tuple
from typing import (
TYPE_CHECKING,
Any,
Dict,
FrozenSet,
List,
Optional,
Sequence,
Set,
Tuple,
)
import attr
from prometheus_client import Counter
@ -89,7 +99,7 @@ class SyncConfig:
@attr.s(slots=True, frozen=True, auto_attribs=True)
class TimelineBatch:
prev_batch: StreamToken
events: List[EventBase]
events: Sequence[EventBase]
limited: bool
# A mapping of event ID to the bundled aggregations for the above events.
# This is only calculated if limited is true.
@ -852,16 +862,26 @@ class SyncHandler:
now_token: StreamToken,
full_state: bool,
) -> MutableStateMap[EventBase]:
"""Works out the difference in state between the start of the timeline
and the previous sync.
"""Works out the difference in state between the end of the previous sync and
the start of the timeline.
Args:
room_id:
batch: The timeline batch for the room that will be sent to the user.
sync_config:
since_token: Token of the end of the previous batch. May be None.
since_token: Token of the end of the previous batch. May be `None`.
now_token: Token of the end of the current batch.
full_state: Whether to force returning the full state.
`lazy_load_members` still applies when `full_state` is `True`.
Returns:
The state to return in the sync response for the room.
Clients will overlay this onto the state at the end of the previous sync to
arrive at the state at the start of the timeline.
Clients will then overlay state events in the timeline to arrive at the
state at the end of the timeline, in preparation for the next sync.
"""
# TODO(mjark) Check if the state events were received by the server
# after the previous sync, since we need to include those state
@ -869,7 +889,8 @@ class SyncHandler:
# TODO(mjark) Check for new redactions in the state events.
with Measure(self.clock, "compute_state_delta"):
# The memberships needed for events in the timeline.
# Only calculated when `lazy_load_members` is on.
members_to_fetch = None
lazy_load_members = sync_config.filter_collection.lazy_load_members()
@ -897,38 +918,46 @@ class SyncHandler:
else:
state_filter = StateFilter.all()
# The contribution to the room state from state events in the timeline.
# Only contains the last event for any given state key.
timeline_state = {
(event.type, event.state_key): event.event_id
for event in batch.events
if event.is_state()
}
# Now calculate the state to return in the sync response for the room.
# This is more or less the change in state between the end of the previous
# sync's timeline and the start of the current sync's timeline.
# See the docstring above for details.
state_ids: StateMap[str]
if full_state:
if batch:
current_state_ids = (
state_at_timeline_end = (
await self._state_storage_controller.get_state_ids_for_event(
batch.events[-1].event_id, state_filter=state_filter
)
)
state_ids = (
state_at_timeline_start = (
await self._state_storage_controller.get_state_ids_for_event(
batch.events[0].event_id, state_filter=state_filter
)
)
else:
current_state_ids = await self.get_state_at(
state_at_timeline_end = await self.get_state_at(
room_id, stream_position=now_token, state_filter=state_filter
)
state_ids = current_state_ids
state_at_timeline_start = state_at_timeline_end
state_ids = _calculate_state(
timeline_contains=timeline_state,
timeline_start=state_ids,
previous={},
current=current_state_ids,
timeline_start=state_at_timeline_start,
timeline_end=state_at_timeline_end,
previous_timeline_end={},
lazy_load_members=lazy_load_members,
)
elif batch.limited:
@ -968,24 +997,23 @@ class SyncHandler:
)
if batch:
current_state_ids = (
state_at_timeline_end = (
await self._state_storage_controller.get_state_ids_for_event(
batch.events[-1].event_id, state_filter=state_filter
)
)
else:
# Its not clear how we get here, but empirically we do
# (#5407). Logging has been added elsewhere to try and
# figure out where this state comes from.
current_state_ids = await self.get_state_at(
# We can get here if the user has ignored the senders of all
# the recent events.
state_at_timeline_end = await self.get_state_at(
room_id, stream_position=now_token, state_filter=state_filter
)
state_ids = _calculate_state(
timeline_contains=timeline_state,
timeline_start=state_at_timeline_start,
previous=state_at_previous_sync,
current=current_state_ids,
timeline_end=state_at_timeline_end,
previous_timeline_end=state_at_previous_sync,
# we have to include LL members in case LL initial sync missed them
lazy_load_members=lazy_load_members,
)
@ -1010,6 +1038,13 @@ class SyncHandler:
),
)
# At this point, if `lazy_load_members` is enabled, `state_ids` includes
# the memberships of all event senders in the timeline. This is because we
# may not have sent the memberships in a previous sync.
# When `include_redundant_members` is on, we send all the lazy-loaded
# memberships of event senders. Otherwise we make an effort to limit the set
# of memberships we send to those that we have not already sent to this client.
if lazy_load_members and not include_redundant_members:
cache_key = (sync_config.user.to_string(), sync_config.device_id)
cache = self.get_lazy_loaded_members_cache(cache_key)
@ -2216,8 +2251,8 @@ def _action_has_highlight(actions: List[JsonDict]) -> bool:
def _calculate_state(
timeline_contains: StateMap[str],
timeline_start: StateMap[str],
previous: StateMap[str],
current: StateMap[str],
timeline_end: StateMap[str],
previous_timeline_end: StateMap[str],
lazy_load_members: bool,
) -> StateMap[str]:
"""Works out what state to include in a sync response.
@ -2225,45 +2260,50 @@ def _calculate_state(
Args:
timeline_contains: state in the timeline
timeline_start: state at the start of the timeline
previous: state at the end of the previous sync (or empty dict
timeline_end: state at the end of the timeline
previous_timeline_end: state at the end of the previous sync (or empty dict
if this is an initial sync)
current: state at the end of the timeline
lazy_load_members: whether to return members from timeline_start
or not. assumes that timeline_start has already been filtered to
include only the members the client needs to know about.
"""
event_id_to_key = {
e: key
for key, e in itertools.chain(
event_id_to_state_key = {
event_id: state_key
for state_key, event_id in itertools.chain(
timeline_contains.items(),
previous.items(),
timeline_start.items(),
current.items(),
timeline_end.items(),
previous_timeline_end.items(),
)
}
c_ids = set(current.values())
ts_ids = set(timeline_start.values())
p_ids = set(previous.values())
tc_ids = set(timeline_contains.values())
timeline_end_ids = set(timeline_end.values())
timeline_start_ids = set(timeline_start.values())
previous_timeline_end_ids = set(previous_timeline_end.values())
timeline_contains_ids = set(timeline_contains.values())
# If we are lazyloading room members, we explicitly add the membership events
# for the senders in the timeline into the state block returned by /sync,
# as we may not have sent them to the client before. We find these membership
# events by filtering them out of timeline_start, which has already been filtered
# to only include membership events for the senders in the timeline.
# In practice, we can do this by removing them from the p_ids list,
# which is the list of relevant state we know we have already sent to the client.
# In practice, we can do this by removing them from the previous_timeline_end_ids
# list, which is the list of relevant state we know we have already sent to the
# client.
# see https://github.com/matrix-org/synapse/pull/2970/files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809
if lazy_load_members:
p_ids.difference_update(
previous_timeline_end_ids.difference_update(
e for t, e in timeline_start.items() if t[0] == EventTypes.Member
)
state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids
state_ids = (
(timeline_end_ids | timeline_start_ids)
- previous_timeline_end_ids
- timeline_contains_ids
)
return {event_id_to_key[e]: e for e in state_ids}
return {event_id_to_state_key[e]: e for e in state_ids}
@attr.s(slots=True, auto_attribs=True)

View file

@ -73,8 +73,8 @@ async def filter_events_for_client(
* the user is not currently a member of the room, and:
* the user has not been a member of the room since the given
events
always_include_ids: set of event ids to specifically
include (unless sender is ignored)
always_include_ids: set of event ids to specifically include, if present
in events (unless sender is ignored)
filter_send_to_client: Whether we're checking an event that's going to be
sent to a client. This might not always be the case since this function can
also be called to check whether a user can see the state at a given point.