mirror of
https://github.com/element-hq/synapse.git
synced 2024-12-14 11:57:44 +00:00
Preparatory work for tweaking performance of auth chain lookups (#16833)
This commit is contained in:
parent
fa2700f001
commit
14c725f73b
5 changed files with 163 additions and 27 deletions
1
changelog.d/16833.misc
Normal file
1
changelog.d/16833.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Preparatory work for tweaking performance of auth chain lookups.
|
|
@ -159,6 +159,13 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
|
||||||
unique_columns=("event_id", "room_id"),
|
unique_columns=("event_id", "room_id"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.db_pool.updates.register_background_index_update(
|
||||||
|
update_name="event_auth_chain_links_origin_index",
|
||||||
|
index_name="event_auth_chain_links_origin_index",
|
||||||
|
table="event_auth_chain_links",
|
||||||
|
columns=("origin_chain_id", "origin_sequence_number"),
|
||||||
|
)
|
||||||
|
|
||||||
async def get_auth_chain(
|
async def get_auth_chain(
|
||||||
self, room_id: str, event_ids: Collection[str], include_given: bool = False
|
self, room_id: str, event_ids: Collection[str], include_given: bool = False
|
||||||
) -> List[EventBase]:
|
) -> List[EventBase]:
|
||||||
|
@ -271,38 +278,63 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
|
||||||
|
|
||||||
# Now we look up all links for the chains we have, adding chains that
|
# Now we look up all links for the chains we have, adding chains that
|
||||||
# are reachable from any event.
|
# are reachable from any event.
|
||||||
|
#
|
||||||
|
# This query is structured to first get all chain IDs reachable, and
|
||||||
|
# then pull out all links from those chains. This does pull out more
|
||||||
|
# rows than is strictly necessary, however there isn't a way of
|
||||||
|
# structuring the recursive part of query to pull out the links without
|
||||||
|
# also returning large quantities of redundant data (which can make it a
|
||||||
|
# lot slower).
|
||||||
sql = """
|
sql = """
|
||||||
|
WITH RECURSIVE links(chain_id) AS (
|
||||||
|
SELECT
|
||||||
|
DISTINCT origin_chain_id
|
||||||
|
FROM event_auth_chain_links WHERE %s
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
target_chain_id
|
||||||
|
FROM event_auth_chain_links
|
||||||
|
INNER JOIN links ON (chain_id = origin_chain_id)
|
||||||
|
)
|
||||||
SELECT
|
SELECT
|
||||||
origin_chain_id, origin_sequence_number,
|
origin_chain_id, origin_sequence_number,
|
||||||
target_chain_id, target_sequence_number
|
target_chain_id, target_sequence_number
|
||||||
FROM event_auth_chain_links
|
FROM links
|
||||||
WHERE %s
|
INNER JOIN event_auth_chain_links ON (chain_id = origin_chain_id)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# A map from chain ID to max sequence number *reachable* from any event ID.
|
# A map from chain ID to max sequence number *reachable* from any event ID.
|
||||||
chains: Dict[int, int] = {}
|
chains: Dict[int, int] = {}
|
||||||
|
|
||||||
# Add all linked chains reachable from initial set of chains.
|
# Add all linked chains reachable from initial set of chains.
|
||||||
for batch2 in batch_iter(event_chains, 1000):
|
chains_to_fetch = set(event_chains.keys())
|
||||||
|
while chains_to_fetch:
|
||||||
|
batch2 = tuple(itertools.islice(chains_to_fetch, 100))
|
||||||
|
chains_to_fetch.difference_update(batch2)
|
||||||
clause, args = make_in_list_sql_clause(
|
clause, args = make_in_list_sql_clause(
|
||||||
txn.database_engine, "origin_chain_id", batch2
|
txn.database_engine, "origin_chain_id", batch2
|
||||||
)
|
)
|
||||||
txn.execute(sql % (clause,), args)
|
txn.execute(sql % (clause,), args)
|
||||||
|
|
||||||
|
links: Dict[int, List[Tuple[int, int, int]]] = {}
|
||||||
|
|
||||||
for (
|
for (
|
||||||
origin_chain_id,
|
origin_chain_id,
|
||||||
origin_sequence_number,
|
origin_sequence_number,
|
||||||
target_chain_id,
|
target_chain_id,
|
||||||
target_sequence_number,
|
target_sequence_number,
|
||||||
) in txn:
|
) in txn:
|
||||||
# chains are only reachable if the origin sequence number of
|
links.setdefault(origin_chain_id, []).append(
|
||||||
# the link is less than the max sequence number in the
|
(origin_sequence_number, target_chain_id, target_sequence_number)
|
||||||
# origin chain.
|
)
|
||||||
if origin_sequence_number <= event_chains.get(origin_chain_id, 0):
|
|
||||||
chains[target_chain_id] = max(
|
for chain_id in links:
|
||||||
target_sequence_number,
|
if chain_id not in event_chains:
|
||||||
chains.get(target_chain_id, 0),
|
continue
|
||||||
)
|
|
||||||
|
_materialize(chain_id, event_chains[chain_id], links, chains)
|
||||||
|
|
||||||
|
chains_to_fetch.difference_update(chains)
|
||||||
|
|
||||||
# Add the initial set of chains, excluding the sequence corresponding to
|
# Add the initial set of chains, excluding the sequence corresponding to
|
||||||
# initial event.
|
# initial event.
|
||||||
|
@ -529,41 +561,64 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
|
||||||
|
|
||||||
chains[chain_id] = max(seq_no, chains.get(chain_id, 0))
|
chains[chain_id] = max(seq_no, chains.get(chain_id, 0))
|
||||||
|
|
||||||
# Now we look up all links for the chains we have, adding chains to
|
# Now we look up all links for the chains we have, adding chains that
|
||||||
# set_to_chain that are reachable from each set.
|
# are reachable from any event.
|
||||||
|
#
|
||||||
|
# This query is structured to first get all chain IDs reachable, and
|
||||||
|
# then pull out all links from those chains. This does pull out more
|
||||||
|
# rows than is strictly necessary, however there isn't a way of
|
||||||
|
# structuring the recursive part of query to pull out the links without
|
||||||
|
# also returning large quantities of redundant data (which can make it a
|
||||||
|
# lot slower).
|
||||||
sql = """
|
sql = """
|
||||||
|
WITH RECURSIVE links(chain_id) AS (
|
||||||
|
SELECT
|
||||||
|
DISTINCT origin_chain_id
|
||||||
|
FROM event_auth_chain_links WHERE %s
|
||||||
|
UNION
|
||||||
|
SELECT
|
||||||
|
target_chain_id
|
||||||
|
FROM event_auth_chain_links
|
||||||
|
INNER JOIN links ON (chain_id = origin_chain_id)
|
||||||
|
)
|
||||||
SELECT
|
SELECT
|
||||||
origin_chain_id, origin_sequence_number,
|
origin_chain_id, origin_sequence_number,
|
||||||
target_chain_id, target_sequence_number
|
target_chain_id, target_sequence_number
|
||||||
FROM event_auth_chain_links
|
FROM links
|
||||||
WHERE %s
|
INNER JOIN event_auth_chain_links ON (chain_id = origin_chain_id)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# (We need to take a copy of `seen_chains` as we want to mutate it in
|
# (We need to take a copy of `seen_chains` as we want to mutate it in
|
||||||
# the loop)
|
# the loop)
|
||||||
for batch2 in batch_iter(set(seen_chains), 1000):
|
chains_to_fetch = set(seen_chains)
|
||||||
|
while chains_to_fetch:
|
||||||
|
batch2 = tuple(itertools.islice(chains_to_fetch, 100))
|
||||||
clause, args = make_in_list_sql_clause(
|
clause, args = make_in_list_sql_clause(
|
||||||
txn.database_engine, "origin_chain_id", batch2
|
txn.database_engine, "origin_chain_id", batch2
|
||||||
)
|
)
|
||||||
txn.execute(sql % (clause,), args)
|
txn.execute(sql % (clause,), args)
|
||||||
|
|
||||||
|
links: Dict[int, List[Tuple[int, int, int]]] = {}
|
||||||
|
|
||||||
for (
|
for (
|
||||||
origin_chain_id,
|
origin_chain_id,
|
||||||
origin_sequence_number,
|
origin_sequence_number,
|
||||||
target_chain_id,
|
target_chain_id,
|
||||||
target_sequence_number,
|
target_sequence_number,
|
||||||
) in txn:
|
) in txn:
|
||||||
for chains in set_to_chain:
|
links.setdefault(origin_chain_id, []).append(
|
||||||
# chains are only reachable if the origin sequence number of
|
(origin_sequence_number, target_chain_id, target_sequence_number)
|
||||||
# the link is less than the max sequence number in the
|
)
|
||||||
# origin chain.
|
|
||||||
if origin_sequence_number <= chains.get(origin_chain_id, 0):
|
|
||||||
chains[target_chain_id] = max(
|
|
||||||
target_sequence_number,
|
|
||||||
chains.get(target_chain_id, 0),
|
|
||||||
)
|
|
||||||
|
|
||||||
seen_chains.add(target_chain_id)
|
for chains in set_to_chain:
|
||||||
|
for chain_id in links:
|
||||||
|
if chain_id not in chains:
|
||||||
|
continue
|
||||||
|
|
||||||
|
_materialize(chain_id, chains[chain_id], links, chains)
|
||||||
|
|
||||||
|
chains_to_fetch.difference_update(chains)
|
||||||
|
seen_chains.update(chains)
|
||||||
|
|
||||||
# Now for each chain we figure out the maximum sequence number reachable
|
# Now for each chain we figure out the maximum sequence number reachable
|
||||||
# from *any* state set and the minimum sequence number reachable from
|
# from *any* state set and the minimum sequence number reachable from
|
||||||
|
@ -2103,3 +2158,49 @@ class EventFederationStore(EventFederationWorkerStore):
|
||||||
)
|
)
|
||||||
|
|
||||||
return batch_size
|
return batch_size
|
||||||
|
|
||||||
|
|
||||||
|
def _materialize(
|
||||||
|
origin_chain_id: int,
|
||||||
|
origin_sequence_number: int,
|
||||||
|
links: Dict[int, List[Tuple[int, int, int]]],
|
||||||
|
materialized: Dict[int, int],
|
||||||
|
) -> None:
|
||||||
|
"""Helper function for fetching auth chain links. For a given origin chain
|
||||||
|
ID / sequence number and a dictionary of links, updates the materialized
|
||||||
|
dict with the reachable chains.
|
||||||
|
|
||||||
|
To get a dict of all chains reachable from a set of chains this function can
|
||||||
|
be called in a loop, once per origin chain with the same links and
|
||||||
|
materialized args. The materialized dict will the result.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
origin_chain_id, origin_sequence_number
|
||||||
|
links: map of the links between chains as a dict from origin chain ID
|
||||||
|
to list of 3-tuples of origin sequence number, target chain ID and
|
||||||
|
target sequence number.
|
||||||
|
materialized: dict to update with new reachability information, as a
|
||||||
|
map from chain ID to max sequence number reachable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Do a standard graph traversal.
|
||||||
|
stack = [(origin_chain_id, origin_sequence_number)]
|
||||||
|
|
||||||
|
while stack:
|
||||||
|
c, s = stack.pop()
|
||||||
|
|
||||||
|
chain_links = links.get(c, [])
|
||||||
|
for (
|
||||||
|
sequence_number,
|
||||||
|
target_chain_id,
|
||||||
|
target_sequence_number,
|
||||||
|
) in chain_links:
|
||||||
|
# Ignore any links that are higher up the chain
|
||||||
|
if sequence_number > s:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check if we have already visited the target chain before, if so we
|
||||||
|
# can skip it.
|
||||||
|
if materialized.get(target_chain_id, 0) < target_sequence_number:
|
||||||
|
stack.append((target_chain_id, target_sequence_number))
|
||||||
|
materialized[target_chain_id] = target_sequence_number
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
|
|
||||||
SCHEMA_VERSION = 83 # remember to update the list below when updating
|
SCHEMA_VERSION = 84 # remember to update the list below when updating
|
||||||
"""Represents the expectations made by the codebase about the database schema
|
"""Represents the expectations made by the codebase about the database schema
|
||||||
|
|
||||||
This should be incremented whenever the codebase changes its requirements on the
|
This should be incremented whenever the codebase changes its requirements on the
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
--
|
||||||
|
-- This file is licensed under the Affero General Public License (AGPL) version 3.
|
||||||
|
--
|
||||||
|
-- Copyright (C) 2023 New Vector, Ltd
|
||||||
|
--
|
||||||
|
-- This program is free software: you can redistribute it and/or modify
|
||||||
|
-- it under the terms of the GNU Affero General Public License as
|
||||||
|
-- published by the Free Software Foundation, either version 3 of the
|
||||||
|
-- License, or (at your option) any later version.
|
||||||
|
--
|
||||||
|
-- See the GNU Affero General Public License for more details:
|
||||||
|
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
|
||||||
|
|
||||||
|
-- Force the statistics for these tables to show that the number of distinct
|
||||||
|
-- chain IDs are proportional to the total rows, as postgres has trouble
|
||||||
|
-- figuring that out by itself.
|
||||||
|
ALTER TABLE event_auth_chain_links ALTER origin_chain_id SET (n_distinct = -0.5);
|
||||||
|
ALTER TABLE event_auth_chain_links ALTER target_chain_id SET (n_distinct = -0.5);
|
16
synapse/storage/schema/main/delta/84/02_auth_links_index.sql
Normal file
16
synapse/storage/schema/main/delta/84/02_auth_links_index.sql
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
--
|
||||||
|
-- This file is licensed under the Affero General Public License (AGPL) version 3.
|
||||||
|
--
|
||||||
|
-- Copyright (C) 2023 New Vector, Ltd
|
||||||
|
--
|
||||||
|
-- This program is free software: you can redistribute it and/or modify
|
||||||
|
-- it under the terms of the GNU Affero General Public License as
|
||||||
|
-- published by the Free Software Foundation, either version 3 of the
|
||||||
|
-- License, or (at your option) any later version.
|
||||||
|
--
|
||||||
|
-- See the GNU Affero General Public License for more details:
|
||||||
|
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
|
||||||
|
|
||||||
|
|
||||||
|
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
|
||||||
|
(8402, 'event_auth_chain_links_origin_index', '{}');
|
Loading…
Reference in a new issue