Prevent join->join membership transitions changing member count (#7977)

`StatsHandler` handles updates to the `current_state_delta_stream`, and updates room stats such as the amount of state events, joined users, etc.

However, it counts every new join membership as a new user entering a room (and that user being in another room), whereas it's possible for a user's membership status to go from join -> join, for instance when they change their per-room profile information.

This PR adds a check for join->join membership transitions, and bails out early, as none of the further checks are necessary at that point.

Due to this bug, membership stats in many rooms have ended up being wildly larger than their true values. I am not sure if we also want to include a migration step which recalculates these statistics (possibly using the `_populate_stats_process_rooms` bg update).

Bug introduced in the initial implementation https://github.com/matrix-org/synapse/pull/4338.
This commit is contained in:
Andrew Morgan 2020-08-03 13:54:24 -07:00 committed by GitHub
parent 6812509807
commit 5d92a1428c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 126 additions and 13 deletions

1
changelog.d/7977.bugfix Normal file
View file

@ -0,0 +1 @@
Fix a bug introduced in Synapse v1.7.2 which caused inaccurate membership counts in the room directory.

View file

@ -232,7 +232,7 @@ class StatsHandler:
if membership == prev_membership:
pass # noop
if membership == Membership.JOIN:
elif membership == Membership.JOIN:
room_stats_delta["joined_members"] += 1
elif membership == Membership.INVITE:
room_stats_delta["invited_members"] += 1

View file

@ -0,0 +1,32 @@
/* Copyright 2020 The Matrix.org Foundation C.I.C.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- Recalculate the stats for all rooms after the fix to joined_members erroneously
-- incrementing on per-room profile changes.
-- Note that the populate_stats_process_rooms background update is already set to
-- run if you're upgrading from Synapse <1.0.0.
-- Additionally, if you've upgraded to v1.18.0 (which doesn't include this fix),
-- this bg job runs, and then update to v1.19.0, you'd end up with only half of
-- your rooms having room stats recalculated after this fix was in place.
-- So we've switched the old `populate_stats_process_rooms` background job to a
-- no-op, and then kick off a bg job with a new name, but with the same
-- functionality as the old one. This effectively restarts the background job
-- from the beginning, without running it twice in a row, supporting both
-- upgrade usecases.
INSERT INTO background_updates (update_name, progress_json) VALUES
('populate_stats_process_rooms_2', '{}');

View file

@ -72,6 +72,9 @@ class StatsStore(StateDeltasStore):
self.db.updates.register_background_update_handler(
"populate_stats_process_rooms", self._populate_stats_process_rooms
)
self.db.updates.register_background_update_handler(
"populate_stats_process_rooms_2", self._populate_stats_process_rooms_2
)
self.db.updates.register_background_update_handler(
"populate_stats_process_users", self._populate_stats_process_users
)
@ -140,11 +143,30 @@ class StatsStore(StateDeltasStore):
return len(users_to_work_on)
async def _populate_stats_process_rooms(self, progress, batch_size):
"""
This was a background update which regenerated statistics for rooms.
It has been replaced by StatsStore._populate_stats_process_rooms_2. This background
job has been scheduled to run as part of Synapse v1.0.0, and again now. To ensure
someone upgrading from <v1.0.0, this background task has been turned into a no-op
so that the potentially expensive task is not run twice.
Further context: https://github.com/matrix-org/synapse/pull/7977
"""
await self.db.updates._end_background_update("populate_stats_process_rooms")
return 1
async def _populate_stats_process_rooms_2(self, progress, batch_size):
"""
This is a background update which regenerates statistics for rooms.
It replaces StatsStore._populate_stats_process_rooms. See its docstring for the
reasoning.
"""
if not self.stats_enabled:
await self.db.updates._end_background_update("populate_stats_process_rooms")
await self.db.updates._end_background_update(
"populate_stats_process_rooms_2"
)
return 1
last_room_id = progress.get("last_room_id", "")
@ -160,12 +182,14 @@ class StatsStore(StateDeltasStore):
return [r for r, in txn]
rooms_to_work_on = await self.db.runInteraction(
"populate_stats_rooms_get_batch", _get_next_batch
"populate_stats_rooms_2_get_batch", _get_next_batch
)
# No more rooms -- complete the transaction.
if not rooms_to_work_on:
await self.db.updates._end_background_update("populate_stats_process_rooms")
await self.db.updates._end_background_update(
"populate_stats_process_rooms_2"
)
return 1
for room_id in rooms_to_work_on:
@ -173,9 +197,9 @@ class StatsStore(StateDeltasStore):
progress["last_room_id"] = room_id
await self.db.runInteraction(
"_populate_stats_process_rooms",
"_populate_stats_process_rooms_2",
self.db.updates._background_update_progress_txn,
"populate_stats_process_rooms",
"populate_stats_process_rooms_2",
progress,
)

View file

@ -54,7 +54,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
self.store.db.simple_insert(
"background_updates",
{
"update_name": "populate_stats_process_rooms",
"update_name": "populate_stats_process_rooms_2",
"progress_json": "{}",
"depends_on": "populate_stats_prepare",
},
@ -66,7 +66,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
{
"update_name": "populate_stats_process_users",
"progress_json": "{}",
"depends_on": "populate_stats_process_rooms",
"depends_on": "populate_stats_process_rooms_2",
},
)
)
@ -219,7 +219,10 @@ class StatsRoomTests(unittest.HomeserverTestCase):
self.get_success(
self.store.db.simple_insert(
"background_updates",
{"update_name": "populate_stats_process_rooms", "progress_json": "{}"},
{
"update_name": "populate_stats_process_rooms_2",
"progress_json": "{}",
},
)
)
self.get_success(
@ -228,7 +231,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
{
"update_name": "populate_stats_cleanup",
"progress_json": "{}",
"depends_on": "populate_stats_process_rooms",
"depends_on": "populate_stats_process_rooms_2",
},
)
)
@ -346,6 +349,37 @@ class StatsRoomTests(unittest.HomeserverTestCase):
self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
def test_updating_profile_information_does_not_increase_joined_members_count(self):
"""
Check that the joined_members count does not increase when a user changes their
profile information (which is done by sending another join membership event into
the room.
"""
self._perform_background_initial_update()
# Create a user and room
u1 = self.register_user("u1", "pass")
u1token = self.login("u1", "pass")
r1 = self.helper.create_room_as(u1, tok=u1token)
# Get the current room stats
r1stats_ante = self._get_current_stats("room", r1)
# Send a profile update into the room
new_profile = {"displayname": "bob"}
self.helper.change_membership(
r1, u1, u1, "join", extra_data=new_profile, tok=u1token
)
# Get the new room stats
r1stats_post = self._get_current_stats("room", r1)
# Ensure that the user count did not changed
self.assertEqual(r1stats_post["joined_members"], r1stats_ante["joined_members"])
self.assertEqual(
r1stats_post["local_users_in_room"], r1stats_ante["local_users_in_room"]
)
def test_send_state_event_nonoverwriting(self):
"""
When we send a non-overwriting state event, it increments total_events AND current_state_events
@ -694,7 +728,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
self.store.db.simple_insert(
"background_updates",
{
"update_name": "populate_stats_process_rooms",
"update_name": "populate_stats_process_rooms_2",
"progress_json": "{}",
"depends_on": "populate_stats_prepare",
},
@ -706,7 +740,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
{
"update_name": "populate_stats_process_users",
"progress_json": "{}",
"depends_on": "populate_stats_process_rooms",
"depends_on": "populate_stats_process_rooms_2",
},
)
)

View file

@ -88,7 +88,28 @@ class RestHelper(object):
expect_code=expect_code,
)
def change_membership(self, room, src, targ, membership, tok=None, expect_code=200):
def change_membership(
self,
room: str,
src: str,
targ: str,
membership: str,
extra_data: dict = {},
tok: Optional[str] = None,
expect_code: int = 200,
) -> None:
"""
Send a membership state event into a room.
Args:
room: The ID of the room to send to
src: The mxid of the event sender
targ: The mxid of the event's target. The state key
membership: The type of membership event
extra_data: Extra information to include in the content of the event
tok: The user access token to use
expect_code: The expected HTTP response code
"""
temp_id = self.auth_user_id
self.auth_user_id = src
@ -97,6 +118,7 @@ class RestHelper(object):
path = path + "?access_token=%s" % tok
data = {"membership": membership}
data.update(extra_data)
request, channel = make_request(
self.hs.get_reactor(), "PUT", path, json.dumps(data).encode("utf8")