1
0
Fork 0
mirror of https://github.com/element-hq/synapse.git synced 2025-04-08 13:03:59 +00:00

Merge branch 'develop' into add_login_hint

This commit is contained in:
Olivier D 2025-03-25 14:10:20 +01:00 committed by GitHub
commit 6f8706905d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 732 additions and 74 deletions

View file

@ -11,12 +11,12 @@ with open("poetry.lock", "rb") as f:
try:
lock_version = lockfile["metadata"]["lock-version"]
assert lock_version == "2.0"
assert lock_version == "2.1"
except Exception:
print(
"""\
Lockfile is not version 2.0. You probably need to upgrade poetry on your local box
and re-run `poetry lock --no-update`. See the Poetry cheat sheet at
Lockfile is not version 2.1. You probably need to upgrade poetry on your local box
and re-run `poetry lock`. See the Poetry cheat sheet at
https://element-hq.github.io/synapse/develop/development/dependencies.html
"""
)

View file

@ -25,9 +25,10 @@ jobs:
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
- name: Setup Poetry
uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
install-project: "false"
poetry-version: "2.1.1"
- name: Run ruff check
continue-on-error: true

View file

@ -46,10 +46,10 @@ jobs:
# The dev dependencies aren't exposed in the wheel metadata (at least with current
# poetry-core versions), so we install with poetry.
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
python-version: "3.x"
poetry-version: "1.3.2"
poetry-version: "2.1.1"
extras: "all"
# Dump installed versions for debugging.
- run: poetry run pip list > before.txt

View file

@ -87,10 +87,10 @@ jobs:
- name: Install Rust
uses: dtolnay/rust-toolchain@e05ebb0e73db581a4877c6ce762e29fe1e0b5073 # 1.66.0
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
python-version: "3.x"
poetry-version: "1.3.2"
poetry-version: "2.1.1"
extras: "all"
- run: poetry run scripts-dev/generate_sample_config.sh --check
- run: poetry run scripts-dev/config-lint.sh
@ -127,8 +127,9 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Setup Poetry
uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
poetry-version: "2.1.1"
install-project: "false"
- name: Run ruff check
@ -152,7 +153,7 @@ jobs:
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
- name: Setup Poetry
uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
# We want to make use of type hints in optional dependencies too.
extras: all
@ -161,6 +162,7 @@ jobs:
# https://github.com/matrix-org/synapse/pull/15376#issuecomment-1498983775
# To make CI green, err towards caution and install the project.
install-project: "true"
poetry-version: "2.1.1"
# Cribbed from
# https://github.com/AustinScola/mypy-cache-github-action/blob/85ea4f2972abed39b33bd02c36e341b28ca59213/src/restore.ts#L10-L17
@ -210,9 +212,9 @@ jobs:
- name: Install Rust
uses: dtolnay/rust-toolchain@e05ebb0e73db581a4877c6ce762e29fe1e0b5073 # 1.66.0
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
poetry-version: "1.3.2"
poetry-version: "2.1.1"
extras: "all"
- run: poetry run scripts-dev/check_pydantic_models.py
@ -363,10 +365,10 @@ jobs:
uses: dtolnay/rust-toolchain@e05ebb0e73db581a4877c6ce762e29fe1e0b5073 # 1.66.0
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
python-version: ${{ matrix.job.python-version }}
poetry-version: "1.3.2"
poetry-version: "2.1.1"
extras: ${{ matrix.job.extras }}
- name: Await PostgreSQL
if: ${{ matrix.job.postgres-version }}
@ -465,10 +467,10 @@ jobs:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
# Install libs necessary for PyPy to build binary wheels for dependencies
- run: sudo apt-get -qq install xmlsec1 libxml2-dev libxslt-dev
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
python-version: ${{ matrix.python-version }}
poetry-version: "1.3.2"
poetry-version: "2.1.1"
extras: ${{ matrix.extras }}
- run: poetry run trial --jobs=2 tests
- name: Dump logs
@ -559,9 +561,9 @@ jobs:
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- run: sudo apt-get -qq install xmlsec1 postgresql-client
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
poetry-version: "1.3.2"
poetry-version: "2.1.1"
extras: "postgres"
- run: .ci/scripts/test_export_data_command.sh
env:
@ -612,10 +614,10 @@ jobs:
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
sudo apt-get update
- run: sudo apt-get -qq install xmlsec1 postgresql-client
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
python-version: ${{ matrix.python-version }}
poetry-version: "1.3.2"
poetry-version: "2.1.1"
extras: "postgres"
- run: .ci/scripts/test_synapse_port_db.sh
id: run_tester_script

View file

@ -46,10 +46,11 @@ jobs:
uses: dtolnay/rust-toolchain@fcf085fcb4b4b8f63f96906cd713eb52181b5ea4 # stable (rust 1.85.1)
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
python-version: "3.x"
extras: "all"
poetry-version: "2.1.1"
- run: |
poetry remove twisted
poetry add --extras tls git+https://github.com/twisted/twisted.git#${{ inputs.twisted_ref || 'trunk' }}
@ -71,10 +72,11 @@ jobs:
uses: dtolnay/rust-toolchain@fcf085fcb4b4b8f63f96906cd713eb52181b5ea4 # stable (rust 1.85.1)
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
with:
python-version: "3.x"
extras: "all test"
poetry-version: "2.1.1"
- run: |
poetry remove twisted
poetry add --extras tls git+https://github.com/twisted/twisted.git#trunk
@ -181,11 +183,11 @@ jobs:
run: |
set -x
DEBIAN_FRONTEND=noninteractive sudo apt-get install -yqq python3 pipx
pipx install poetry==1.3.2
pipx install poetry==2.1.1
poetry remove -n twisted
poetry add -n --extras tls git+https://github.com/twisted/twisted.git#trunk
poetry lock --no-update
poetry lock
working-directory: synapse
- run: |

1
changelog.d/18251.misc Normal file
View file

@ -0,0 +1 @@
Update Poetry to 2.1.1, including updating the lock file version.

View file

@ -0,0 +1 @@
Add background job to clear unreferenced state groups.

1
changelog.d/18276.doc Normal file
View file

@ -0,0 +1 @@
Correct a small typo in the SSO mapping providers documentation.

View file

@ -35,7 +35,7 @@ TEMP_VENV="$(mktemp -d)"
python3 -m venv "$TEMP_VENV"
source "$TEMP_VENV/bin/activate"
pip install -U pip
pip install poetry==1.3.2
pip install poetry==2.1.1 poetry-plugin-export==1.9.0
poetry export \
--extras all \
--extras test \

6
debian/changelog vendored
View file

@ -1,3 +1,9 @@
matrix-synapse-py3 (1.127.0~rc1+nmu1) UNRELEASED; urgency=medium
* Update Poetry to 2.1.1.
-- Synapse Packaging team <packages@matrix.org> Wed, 19 Mar 2025 17:38:49 +0000
matrix-synapse-py3 (1.127.0~rc1) stable; urgency=medium
* New Synapse release 1.127.0rc1.

View file

@ -22,7 +22,7 @@
ARG DEBIAN_VERSION=bookworm
ARG PYTHON_VERSION=3.12
ARG POETRY_VERSION=1.8.3
ARG POETRY_VERSION=2.1.1
###
### Stage 0: generate requirements.txt
@ -56,7 +56,7 @@ ENV UV_LINK_MODE=copy
ARG POETRY_VERSION
RUN --mount=type=cache,target=/root/.cache/uv \
if [ -z "$TEST_ONLY_IGNORE_POETRY_LOCKFILE" ]; then \
uvx --with poetry-plugin-export==1.8.0 \
uvx --with poetry-plugin-export==1.9.0 \
poetry@${POETRY_VERSION} export --extras all -o /synapse/requirements.txt ${TEST_ONLY_SKIP_DEP_HASH_VERIFICATION:+--without-hashes}; \
else \
touch /synapse/requirements.txt; \

View file

@ -162,7 +162,7 @@ by a unique name, the current status (stored in JSON), and some dependency infor
* Whether the update requires a previous update to be complete.
* A rough ordering for which to complete updates.
A new background updates needs to be added to the `background_updates` table:
A new background update needs to be added to the `background_updates` table:
```sql
INSERT INTO background_updates (ordering, update_name, depends_on, progress_json) VALUES

View file

@ -187,7 +187,7 @@ useful.
## ...add a new dependency?
Either:
- manually update `pyproject.toml`; then `poetry lock --no-update`; or else
- manually update `pyproject.toml`; then `poetry lock`; or else
- `poetry add packagename`. See `poetry add --help`; note the `--dev`,
`--extras` and `--optional` flags in particular.
@ -202,12 +202,12 @@ poetry remove packagename
```
ought to do the trick. Alternatively, manually update `pyproject.toml` and
`poetry lock --no-update`. Include the updated `pyproject.toml` and `poetry.lock`
`poetry lock`. Include the updated `pyproject.toml` and `poetry.lock`
files in your commit.
## ...update the version range for an existing dependency?
Best done by manually editing `pyproject.toml`, then `poetry lock --no-update`.
Best done by manually editing `pyproject.toml`, then `poetry lock`.
Include the updated `pyproject.toml` and `poetry.lock` in your commit.
## ...update a dependency in the locked environment?
@ -233,7 +233,7 @@ poetry add packagename==1.2.3
# Get poetry to recompute the content-hash of pyproject.toml without changing
# the locked package versions.
poetry lock --no-update
poetry lock
```
Either way, include the updated `poetry.lock` file in your commit.

View file

@ -10,7 +10,7 @@ As an example, a SSO service may return the email address
to turn that into a displayname when creating a Matrix user for this individual.
It may choose `John Smith`, or `Smith, John [Example.com]` or any number of
variations. As each Synapse configuration may want something different, this is
where SAML mapping providers come into play.
where SSO mapping providers come into play.
SSO mapping providers are currently supported for OpenID and SAML SSO
configurations. Please see the details below for how to implement your own.

220
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -192,6 +192,11 @@ APPEND_ONLY_TABLES = [
IGNORED_TABLES = {
# Porting the auto generated sequence in this table is non-trivial.
# None of the entries in this list are mandatory for Synapse to keep working.
# If state group disk space is an issue after the port, the
# `mark_unreferenced_state_groups_for_deletion_bg_update` background task can be run again.
"state_groups_pending_deletion",
# We don't port these tables, as they're a faff and we can regenerate
# them anyway.
"user_directory",
@ -217,6 +222,15 @@ IGNORED_TABLES = {
}
# These background updates will not be applied upon creation of the postgres database.
IGNORED_BACKGROUND_UPDATES = {
# Reapplying this background update to the postgres database is unnecessary after
# already having waited for the SQLite database to complete all running background
# updates.
"mark_unreferenced_state_groups_for_deletion_bg_update",
}
# Error returned by the run function. Used at the top-level part of the script to
# handle errors and return codes.
end_error: Optional[str] = None
@ -688,6 +702,20 @@ class Porter:
# 0 means off. 1 means full. 2 means incremental.
return autovacuum_setting != 0
async def remove_ignored_background_updates_from_database(self) -> None:
def _remove_delete_unreferenced_state_groups_bg_updates(
txn: LoggingTransaction,
) -> None:
txn.execute(
"DELETE FROM background_updates WHERE update_name = ANY(?)",
(list(IGNORED_BACKGROUND_UPDATES),),
)
await self.postgres_store.db_pool.runInteraction(
"remove_delete_unreferenced_state_groups_bg_updates",
_remove_delete_unreferenced_state_groups_bg_updates,
)
async def run(self) -> None:
"""Ports the SQLite database to a PostgreSQL database.
@ -733,6 +761,8 @@ class Porter:
self.hs_config.database.get_single_database()
)
await self.remove_ignored_background_updates_from_database()
await self.run_background_updates_on_postgres()
self.progress.set_state("Creating port tables")

View file

@ -21,11 +21,19 @@
import itertools
import logging
from typing import TYPE_CHECKING, Collection, Mapping, Set
from typing import (
TYPE_CHECKING,
Collection,
Mapping,
Optional,
Set,
)
from synapse.logging.context import nested_logging_context
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.storage.database import LoggingTransaction
from synapse.storage.databases import Databases
from synapse.types.storage import _BackgroundUpdates
if TYPE_CHECKING:
from synapse.server import HomeServer
@ -44,6 +52,11 @@ class PurgeEventsStorageController:
self._delete_state_groups_loop, 60 * 1000
)
self.stores.state.db_pool.updates.register_background_update_handler(
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE,
self._background_delete_unrefereneced_state_groups,
)
async def purge_room(self, room_id: str) -> None:
"""Deletes all record of a room"""
@ -81,7 +94,8 @@ class PurgeEventsStorageController:
)
async def _find_unreferenced_groups(
self, state_groups: Collection[int]
self,
state_groups: Collection[int],
) -> Set[int]:
"""Used when purging history to figure out which state groups can be
deleted.
@ -203,3 +217,232 @@ class PurgeEventsStorageController:
room_id,
groups_to_sequences,
)
async def _background_delete_unrefereneced_state_groups(
self, progress: dict, batch_size: int
) -> int:
"""This background update will slowly delete any unreferenced state groups"""
last_checked_state_group = progress.get("last_checked_state_group")
if last_checked_state_group is None:
# This is the first run.
last_checked_state_group = (
await self.stores.state.db_pool.simple_select_one_onecol(
table="state_groups",
keyvalues={},
retcol="MAX(id)",
allow_none=True,
desc="get_max_state_group",
)
)
if last_checked_state_group is None:
# There are no state groups so the background process is finished.
await self.stores.state.db_pool.updates._end_background_update(
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE
)
return batch_size
last_checked_state_group += 1
(
last_checked_state_group,
final_batch,
) = await self._delete_unreferenced_state_groups_batch(
last_checked_state_group,
batch_size,
)
if not final_batch:
# There are more state groups to check.
progress = {
"last_checked_state_group": last_checked_state_group,
}
await self.stores.state.db_pool.updates._background_update_progress(
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE,
progress,
)
else:
# This background process is finished.
await self.stores.state.db_pool.updates._end_background_update(
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE
)
return batch_size
async def _delete_unreferenced_state_groups_batch(
self,
last_checked_state_group: int,
batch_size: int,
) -> tuple[int, bool]:
"""Looks for unreferenced state groups starting from the last state group
checked and marks them for deletion.
Args:
last_checked_state_group: The last state group that was checked.
batch_size: How many state groups to process in this iteration.
Returns:
(last_checked_state_group, final_batch)
"""
# Find all state groups that can be deleted if any of the original set are deleted.
(
to_delete,
last_checked_state_group,
final_batch,
) = await self._find_unreferenced_groups_for_background_deletion(
last_checked_state_group, batch_size
)
if len(to_delete) == 0:
return last_checked_state_group, final_batch
await self.stores.state_deletion.mark_state_groups_as_pending_deletion(
to_delete
)
return last_checked_state_group, final_batch
async def _find_unreferenced_groups_for_background_deletion(
self,
last_checked_state_group: int,
batch_size: int,
) -> tuple[Set[int], int, bool]:
"""Used when deleting unreferenced state groups in the background to figure out
which state groups can be deleted.
To avoid increased DB usage due to de-deltaing state groups, this returns only
state groups which are free standing (ie. no shared edges with referenced groups) or
state groups which do not share edges which result in a future referenced group.
The following scenarios outline the possibilities based on state group data in
the DB.
ie. Free standing -> state groups 1-N would be returned:
SG_1
|
...
|
SG_N
ie. Previous reference -> state groups 2-N would be returned:
SG_1 <- referenced by event
|
SG_2
|
...
|
SG_N
ie. Future reference -> none of the following state groups would be returned:
SG_1
|
SG_2
|
...
|
SG_N <- referenced by event
Args:
last_checked_state_group: The last state group that was checked.
batch_size: How many state groups to process in this iteration.
Returns:
(to_delete, last_checked_state_group, final_batch)
"""
# If a state group's next edge is not pending deletion then we don't delete the state group.
# If there is no next edge or the next edges are all marked for deletion, then delete
# the state group.
# This holds since we walk backwards from the latest state groups, ensuring that
# we've already checked newer state groups for event references along the way.
def get_next_state_groups_marked_for_deletion_txn(
txn: LoggingTransaction,
) -> tuple[dict[int, bool], dict[int, int]]:
state_group_sql = """
SELECT s.id, e.state_group, d.state_group
FROM (
SELECT id FROM state_groups
WHERE id < ? ORDER BY id DESC LIMIT ?
) as s
LEFT JOIN state_group_edges AS e ON (s.id = e.prev_state_group)
LEFT JOIN state_groups_pending_deletion AS d ON (e.state_group = d.state_group)
"""
txn.execute(state_group_sql, (last_checked_state_group, batch_size))
# Mapping from state group to whether we should delete it.
state_groups_to_deletion: dict[int, bool] = {}
# Mapping from state group to prev state group.
state_groups_to_prev: dict[int, int] = {}
for row in txn:
state_group = row[0]
next_edge = row[1]
pending_deletion = row[2]
if next_edge is not None:
state_groups_to_prev[next_edge] = state_group
if next_edge is not None and not pending_deletion:
# We have found an edge not marked for deletion.
# Check previous results to see if this group is part of a chain
# within this batch that qualifies for deletion.
# ie. batch contains:
# SG_1 -> SG_2 -> SG_3
# If SG_3 is a candidate for deletion, then SG_2 & SG_1 should also
# be, even though they have edges which may not be marked for
# deletion.
# This relies on SQL results being sorted in DESC order to work.
next_is_deletion_candidate = state_groups_to_deletion.get(next_edge)
if (
next_is_deletion_candidate is None
or not next_is_deletion_candidate
):
state_groups_to_deletion[state_group] = False
else:
state_groups_to_deletion.setdefault(state_group, True)
else:
# This state group may be a candidate for deletion
state_groups_to_deletion.setdefault(state_group, True)
return state_groups_to_deletion, state_groups_to_prev
(
state_groups_to_deletion,
state_group_edges,
) = await self.stores.state.db_pool.runInteraction(
"get_next_state_groups_marked_for_deletion",
get_next_state_groups_marked_for_deletion_txn,
)
deletion_candidates = {
state_group
for state_group, deletion in state_groups_to_deletion.items()
if deletion
}
final_batch = False
state_groups = state_groups_to_deletion.keys()
if len(state_groups) < batch_size:
final_batch = True
else:
last_checked_state_group = min(state_groups)
if len(state_groups) == 0:
return set(), last_checked_state_group, final_batch
# Determine if any of the remaining state groups are directly referenced.
referenced = await self.stores.main.get_referenced_state_groups(
deletion_candidates
)
# Remove state groups from deletion_candidates which are directly referenced or share a
# future edge with a referenced state group within this batch.
def filter_reference_chains(group: Optional[int]) -> None:
while group is not None:
deletion_candidates.discard(group)
group = state_group_edges.get(group)
for referenced_group in referenced:
filter_reference_chains(referenced_group)
return deletion_candidates, last_checked_state_group, final_batch

View file

@ -20,7 +20,15 @@
#
import logging
from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union
from typing import (
TYPE_CHECKING,
Dict,
List,
Mapping,
Optional,
Tuple,
Union,
)
from synapse.logging.opentracing import tag_args, trace
from synapse.storage._base import SQLBaseStore

View file

@ -321,18 +321,42 @@ class StateDeletionDataStore:
async def mark_state_groups_as_pending_deletion(
self, state_groups: Collection[int]
) -> None:
"""Mark the given state groups as pending deletion"""
"""Mark the given state groups as pending deletion.
If any of the state groups are already pending deletion, then those records are
left as is.
"""
await self.db_pool.runInteraction(
"mark_state_groups_as_pending_deletion",
self._mark_state_groups_as_pending_deletion_txn,
state_groups,
)
def _mark_state_groups_as_pending_deletion_txn(
self,
txn: LoggingTransaction,
state_groups: Collection[int],
) -> None:
sql = """
INSERT INTO state_groups_pending_deletion (state_group, insertion_ts)
VALUES %s
ON CONFLICT (state_group)
DO NOTHING
"""
now = self._clock.time_msec()
await self.db_pool.simple_upsert_many(
table="state_groups_pending_deletion",
key_names=("state_group",),
key_values=[(state_group,) for state_group in state_groups],
value_names=("insertion_ts",),
value_values=[(now,) for _ in state_groups],
desc="mark_state_groups_as_pending_deletion",
)
rows = [
(
state_group,
now,
)
for state_group in state_groups
]
if isinstance(txn.database_engine, PostgresEngine):
txn.execute_values(sql % ("?",), rows, fetch=False)
else:
txn.execute_batch(sql % ("(?, ?)",), rows)
async def mark_state_groups_as_used(self, state_groups: Collection[int]) -> None:
"""Mark the given state groups as now being referenced"""

View file

@ -161,6 +161,7 @@ Changes in SCHEMA_VERSION = 89
Changes in SCHEMA_VERSION = 90
- Add a column `participant` to `room_memberships` table
- Add background update to delete unreferenced state groups.
"""

View file

@ -0,0 +1,16 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2025 New Vector, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
-- Add a background update to delete any unreferenced state groups
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(9002, 'mark_unreferenced_state_groups_for_deletion_bg_update', '{}');

View file

@ -48,3 +48,7 @@ class _BackgroundUpdates:
SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_FIX_FORGOTTEN_COLUMN_BG_UPDATE = (
"sliding_sync_membership_snapshots_fix_forgotten_column_bg_update"
)
MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE = (
"mark_unreferenced_state_groups_for_deletion_bg_update"
)

View file

@ -24,6 +24,7 @@ from synapse.api.errors import NotFoundError, SynapseError
from synapse.rest.client import room
from synapse.server import HomeServer
from synapse.types.state import StateFilter
from synapse.types.storage import _BackgroundUpdates
from synapse.util import Clock
from tests.unittest import HomeserverTestCase
@ -303,3 +304,156 @@ class PurgeTests(HomeserverTestCase):
)
)
self.assertEqual(len(state_groups), 1)
def test_clear_unreferenced_state_groups(self) -> None:
"""Test that any unreferenced state groups are automatically cleaned up."""
self.helper.send(self.room_id, body="test1")
state1 = self.helper.send_state(
self.room_id, "org.matrix.test", body={"number": 2}
)
# Create enough state events to require multiple batches of
# mark_unreferenced_state_groups_for_deletion_bg_update to be run.
for i in range(200):
self.helper.send_state(self.room_id, "org.matrix.test", body={"number": i})
self.helper.send(self.room_id, body="test4")
last = self.helper.send(self.room_id, body="test5")
# Create an unreferenced state group that has no prev group.
unreferenced_free_state_group = self.get_success(
self.state_store.store_state_group(
event_id=last["event_id"],
room_id=self.room_id,
prev_group=None,
delta_ids={("org.matrix.test", ""): state1["event_id"]},
current_state_ids={("org.matrix.test", ""): ""},
)
)
# Create some unreferenced state groups that have a prev group of one of the
# existing state groups.
prev_group = self.get_success(
self.store._get_state_group_for_event(state1["event_id"])
)
unreferenced_end_state_group = self.get_success(
self.state_store.store_state_group(
event_id=last["event_id"],
room_id=self.room_id,
prev_group=prev_group,
delta_ids={("org.matrix.test", ""): state1["event_id"]},
current_state_ids=None,
)
)
another_unreferenced_end_state_group = self.get_success(
self.state_store.store_state_group(
event_id=last["event_id"],
room_id=self.room_id,
prev_group=unreferenced_end_state_group,
delta_ids={("org.matrix.test", ""): state1["event_id"]},
current_state_ids=None,
)
)
# Add some other unreferenced state groups which lead to a referenced state
# group.
# These state groups should not get deleted.
chain_state_group = self.get_success(
self.state_store.store_state_group(
event_id=last["event_id"],
room_id=self.room_id,
prev_group=None,
delta_ids={("org.matrix.test", ""): ""},
current_state_ids={("org.matrix.test", ""): ""},
)
)
chain_state_group_2 = self.get_success(
self.state_store.store_state_group(
event_id=last["event_id"],
room_id=self.room_id,
prev_group=chain_state_group,
delta_ids={("org.matrix.test", ""): ""},
current_state_ids=None,
)
)
referenced_chain_state_group = self.get_success(
self.state_store.store_state_group(
event_id=last["event_id"],
room_id=self.room_id,
prev_group=chain_state_group_2,
delta_ids={("org.matrix.test", ""): ""},
current_state_ids=None,
)
)
self.get_success(
self.store.db_pool.simple_insert(
"event_to_state_groups",
{
"event_id": "$new_event",
"state_group": referenced_chain_state_group,
},
)
)
# Insert and run the background update.
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE,
"progress_json": "{}",
},
)
)
self.store.db_pool.updates._all_done = False
self.wait_for_background_updates()
# Advance so that the background job to delete the state groups runs
self.reactor.advance(
1 + self.state_deletion_store.DELAY_BEFORE_DELETION_MS / 1000
)
# We expect that the unreferenced free state group has been deleted.
row = self.get_success(
self.state_store.db_pool.simple_select_one_onecol(
table="state_groups",
keyvalues={"id": unreferenced_free_state_group},
retcol="id",
allow_none=True,
desc="test_purge_unreferenced_state_group",
)
)
self.assertIsNone(row)
# We expect that both unreferenced end state groups have been deleted.
row = self.get_success(
self.state_store.db_pool.simple_select_one_onecol(
table="state_groups",
keyvalues={"id": unreferenced_end_state_group},
retcol="id",
allow_none=True,
desc="test_purge_unreferenced_state_group",
)
)
self.assertIsNone(row)
row = self.get_success(
self.state_store.db_pool.simple_select_one_onecol(
table="state_groups",
keyvalues={"id": another_unreferenced_end_state_group},
retcol="id",
allow_none=True,
desc="test_purge_unreferenced_state_group",
)
)
self.assertIsNone(row)
# We expect there to now only be one state group for the room, which is
# the state group of the last event (as the only outlier).
state_groups = self.get_success(
self.state_store.db_pool.simple_select_onecol(
table="state_groups",
keyvalues={"room_id": self.room_id},
retcol="id",
desc="test_purge_unreferenced_state_group",
)
)
self.assertEqual(len(state_groups), 210)