1
0
Fork 0
mirror of https://github.com/element-hq/synapse.git synced 2025-03-31 03:45:13 +00:00

Add unique index right away for sliding_sync_joined_rooms_to_recalculate

This makes it so we can always `upsert` to avoid duplicates otherwise
I'm not sure of how to not insert duplicates in certain situations
(see FIXME in the diff) which would cause problems down the line
for the unique index being added later.
This commit is contained in:
Eric Eastwood 2024-08-28 00:50:33 -05:00
parent 8468401a97
commit da463fb102
4 changed files with 21 additions and 78 deletions

View file

@ -91,9 +91,6 @@ class _BackgroundUpdates:
SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE = (
"sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update"
)
SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE = (
"sliding_sync_index_joined_rooms_to_recalculate_table_bg_update"
)
SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE = "sliding_sync_joined_rooms_bg_update"
SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE = (
"sliding_sync_membership_snapshots_bg_update"
@ -318,13 +315,6 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
_BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
self._sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update,
)
self.db_pool.updates.register_background_index_update(
_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
index_name="sliding_sync_joined_rooms_to_recalculate_room_id_idx",
table="sliding_sync_joined_rooms_to_recalculate",
columns=["room_id"],
unique=True,
)
# Add some background updates to populate the sliding sync tables
self.db_pool.updates.register_background_update_handler(
_BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE,
@ -1579,38 +1569,25 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
"""
Prefill `sliding_sync_joined_rooms_to_recalculate` table with all rooms we know about already.
"""
initial_insert = progress.get("initial_insert", False)
def _txn(txn: LoggingTransaction) -> None:
# We do this as one big bulk insert. This has been tested on a bigger
# homeserver with ~10M rooms and took 11s. There is potential for this to
# starve disk usage while this goes on.
if initial_insert:
txn.execute(
"""
INSERT INTO sliding_sync_joined_rooms_to_recalculate
(room_id)
SELECT room_id FROM rooms;
""",
)
else:
# We can only upsert once the unique index has been added to the table
# (see
# `_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE`)
#
# We upsert in case we have to run this multiple times.
#
# The `WHERE TRUE` clause is to avoid "Parsing Ambiguity"
txn.execute(
"""
INSERT INTO sliding_sync_joined_rooms_to_recalculate
(room_id)
SELECT room_id FROM rooms WHERE ?
ON CONFLICT (room_id)
DO NOTHING;
""",
(True,),
)
#
# We upsert in case we have to run this multiple times.
#
# The `WHERE TRUE` clause is to avoid "Parsing Ambiguity"
txn.execute(
"""
INSERT INTO sliding_sync_joined_rooms_to_recalculate
(room_id)
SELECT room_id FROM rooms WHERE ?
ON CONFLICT (room_id)
DO NOTHING;
""",
(True,),
)
await self.db_pool.runInteraction(
"_sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update",

View file

@ -679,10 +679,6 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table(
# Update the `sliding_sync_joined_rooms_to_recalculate` table with the rooms
# that went stale and now need to be recalculated.
#
# FIXME: There is potentially a race where the unique index (added via
# `_BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE`)
# hasn't been added at this point so we won't be able to upsert
DatabasePool.simple_upsert_many_txn_native_upsert(
txn,
table="sliding_sync_joined_rooms_to_recalculate",
@ -707,8 +703,6 @@ def _resolve_stale_data_in_sliding_sync_joined_rooms_table(
# we're already working on it
insertion_values={
"progress_json": "{}",
# Since we're going to upsert, we need to make sure the unique index is in place
"depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
depends_on = (

View file

@ -16,11 +16,9 @@
-- during the background update to populate `sliding_sync_joined_rooms` which works but
-- it takes a lot of work for the database to grab `DISTINCT` room_ids given how many
-- state events there are for each room.
--
-- This table doesn't have any indexes at this point. We add the indexes in a separate
-- step to avoid the extra calculations during the bulk one-shot prefill insert.
CREATE TABLE IF NOT EXISTS sliding_sync_joined_rooms_to_recalculate(
room_id TEXT NOT NULL REFERENCES rooms(room_id)
room_id TEXT NOT NULL REFERENCES rooms(room_id),
PRIMARY KEY (room_id)
);
-- A table for storing room meta data (current state relevant to sliding sync) that the
@ -143,18 +141,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS sliding_sync_membership_snapshots_event_stream
--
-- 1. Add a background update to prefill `sliding_sync_joined_rooms_to_recalculate`.
-- We do a one-shot bulk insert from the `rooms` table to prefill.
-- 2. Add a background update to add indexes to the
-- `sliding_sync_joined_rooms_to_recalculate` table after the one-shot bulk insert.
-- We add the index in a separate step after to avoid the extra calculations during
-- the one-shot bulk insert.
-- 3. Add a background update to populate the new `sliding_sync_joined_rooms` table
-- 2. Add a background update to populate the new `sliding_sync_joined_rooms` table
--
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{ "initial_insert": true }');
(8701, 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update', '{}');
INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
(8701, 'sliding_sync_index_joined_rooms_to_recalculate_table_bg_update', '{}', 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update');
INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
(8701, 'sliding_sync_joined_rooms_bg_update', '{}', 'sliding_sync_index_joined_rooms_to_calculate_table_bg_update');
(8701, 'sliding_sync_joined_rooms_bg_update', '{}', 'sliding_sync_prefill_joined_rooms_to_recalculate_table_bg_update');
-- Add a background updates to populate the new `sliding_sync_membership_snapshots` table
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES

View file

@ -2669,23 +2669,13 @@ class SlidingSyncTablesBackgroundUpdatesTestCase(SlidingSyncTablesTestCaseBase):
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
"progress_json": "{}",
"depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE,
"progress_json": "{}",
"depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
"depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
)
@ -2836,23 +2826,13 @@ class SlidingSyncTablesBackgroundUpdatesTestCase(SlidingSyncTablesTestCaseBase):
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
"progress_json": "{}",
"depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.SLIDING_SYNC_JOINED_ROOMS_BG_UPDATE,
"progress_json": "{}",
"depends_on": _BackgroundUpdates.SLIDING_SYNC_INDEX_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
"depends_on": _BackgroundUpdates.SLIDING_SYNC_PREFILL_JOINED_ROOMS_TO_RECALCULATE_TABLE_BG_UPDATE,
},
)
)