mirror of
https://github.com/element-hq/synapse.git
synced 2025-04-08 13:03:59 +00:00
Merge branch 'develop' into add_login_hint
This commit is contained in:
commit
6f8706905d
23 changed files with 732 additions and 74 deletions
|
@ -11,12 +11,12 @@ with open("poetry.lock", "rb") as f:
|
|||
|
||||
try:
|
||||
lock_version = lockfile["metadata"]["lock-version"]
|
||||
assert lock_version == "2.0"
|
||||
assert lock_version == "2.1"
|
||||
except Exception:
|
||||
print(
|
||||
"""\
|
||||
Lockfile is not version 2.0. You probably need to upgrade poetry on your local box
|
||||
and re-run `poetry lock --no-update`. See the Poetry cheat sheet at
|
||||
Lockfile is not version 2.1. You probably need to upgrade poetry on your local box
|
||||
and re-run `poetry lock`. See the Poetry cheat sheet at
|
||||
https://element-hq.github.io/synapse/develop/development/dependencies.html
|
||||
"""
|
||||
)
|
||||
|
|
3
.github/workflows/fix_lint.yaml
vendored
3
.github/workflows/fix_lint.yaml
vendored
|
@ -25,9 +25,10 @@ jobs:
|
|||
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
|
||||
|
||||
- name: Setup Poetry
|
||||
uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
install-project: "false"
|
||||
poetry-version: "2.1.1"
|
||||
|
||||
- name: Run ruff check
|
||||
continue-on-error: true
|
||||
|
|
4
.github/workflows/latest_deps.yml
vendored
4
.github/workflows/latest_deps.yml
vendored
|
@ -46,10 +46,10 @@ jobs:
|
|||
|
||||
# The dev dependencies aren't exposed in the wheel metadata (at least with current
|
||||
# poetry-core versions), so we install with poetry.
|
||||
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
python-version: "3.x"
|
||||
poetry-version: "1.3.2"
|
||||
poetry-version: "2.1.1"
|
||||
extras: "all"
|
||||
# Dump installed versions for debugging.
|
||||
- run: poetry run pip list > before.txt
|
||||
|
|
30
.github/workflows/tests.yml
vendored
30
.github/workflows/tests.yml
vendored
|
@ -87,10 +87,10 @@ jobs:
|
|||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@e05ebb0e73db581a4877c6ce762e29fe1e0b5073 # 1.66.0
|
||||
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
|
||||
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
python-version: "3.x"
|
||||
poetry-version: "1.3.2"
|
||||
poetry-version: "2.1.1"
|
||||
extras: "all"
|
||||
- run: poetry run scripts-dev/generate_sample_config.sh --check
|
||||
- run: poetry run scripts-dev/config-lint.sh
|
||||
|
@ -127,8 +127,9 @@ jobs:
|
|||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Setup Poetry
|
||||
uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
poetry-version: "2.1.1"
|
||||
install-project: "false"
|
||||
|
||||
- name: Run ruff check
|
||||
|
@ -152,7 +153,7 @@ jobs:
|
|||
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
|
||||
|
||||
- name: Setup Poetry
|
||||
uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
# We want to make use of type hints in optional dependencies too.
|
||||
extras: all
|
||||
|
@ -161,6 +162,7 @@ jobs:
|
|||
# https://github.com/matrix-org/synapse/pull/15376#issuecomment-1498983775
|
||||
# To make CI green, err towards caution and install the project.
|
||||
install-project: "true"
|
||||
poetry-version: "2.1.1"
|
||||
|
||||
# Cribbed from
|
||||
# https://github.com/AustinScola/mypy-cache-github-action/blob/85ea4f2972abed39b33bd02c36e341b28ca59213/src/restore.ts#L10-L17
|
||||
|
@ -210,9 +212,9 @@ jobs:
|
|||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@e05ebb0e73db581a4877c6ce762e29fe1e0b5073 # 1.66.0
|
||||
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
|
||||
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
poetry-version: "1.3.2"
|
||||
poetry-version: "2.1.1"
|
||||
extras: "all"
|
||||
- run: poetry run scripts-dev/check_pydantic_models.py
|
||||
|
||||
|
@ -363,10 +365,10 @@ jobs:
|
|||
uses: dtolnay/rust-toolchain@e05ebb0e73db581a4877c6ce762e29fe1e0b5073 # 1.66.0
|
||||
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
|
||||
|
||||
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
python-version: ${{ matrix.job.python-version }}
|
||||
poetry-version: "1.3.2"
|
||||
poetry-version: "2.1.1"
|
||||
extras: ${{ matrix.job.extras }}
|
||||
- name: Await PostgreSQL
|
||||
if: ${{ matrix.job.postgres-version }}
|
||||
|
@ -465,10 +467,10 @@ jobs:
|
|||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
# Install libs necessary for PyPy to build binary wheels for dependencies
|
||||
- run: sudo apt-get -qq install xmlsec1 libxml2-dev libxslt-dev
|
||||
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: "1.3.2"
|
||||
poetry-version: "2.1.1"
|
||||
extras: ${{ matrix.extras }}
|
||||
- run: poetry run trial --jobs=2 tests
|
||||
- name: Dump logs
|
||||
|
@ -559,9 +561,9 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- run: sudo apt-get -qq install xmlsec1 postgresql-client
|
||||
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
poetry-version: "1.3.2"
|
||||
poetry-version: "2.1.1"
|
||||
extras: "postgres"
|
||||
- run: .ci/scripts/test_export_data_command.sh
|
||||
env:
|
||||
|
@ -612,10 +614,10 @@ jobs:
|
|||
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
|
||||
sudo apt-get update
|
||||
- run: sudo apt-get -qq install xmlsec1 postgresql-client
|
||||
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: "1.3.2"
|
||||
poetry-version: "2.1.1"
|
||||
extras: "postgres"
|
||||
- run: .ci/scripts/test_synapse_port_db.sh
|
||||
id: run_tester_script
|
||||
|
|
10
.github/workflows/twisted_trunk.yml
vendored
10
.github/workflows/twisted_trunk.yml
vendored
|
@ -46,10 +46,11 @@ jobs:
|
|||
uses: dtolnay/rust-toolchain@fcf085fcb4b4b8f63f96906cd713eb52181b5ea4 # stable (rust 1.85.1)
|
||||
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
|
||||
|
||||
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
python-version: "3.x"
|
||||
extras: "all"
|
||||
poetry-version: "2.1.1"
|
||||
- run: |
|
||||
poetry remove twisted
|
||||
poetry add --extras tls git+https://github.com/twisted/twisted.git#${{ inputs.twisted_ref || 'trunk' }}
|
||||
|
@ -71,10 +72,11 @@ jobs:
|
|||
uses: dtolnay/rust-toolchain@fcf085fcb4b4b8f63f96906cd713eb52181b5ea4 # stable (rust 1.85.1)
|
||||
- uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8
|
||||
|
||||
- uses: matrix-org/setup-python-poetry@4421c92b6223f03ae55560e29aa8ebd39cf6314a # v1.2.4
|
||||
- uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0
|
||||
with:
|
||||
python-version: "3.x"
|
||||
extras: "all test"
|
||||
poetry-version: "2.1.1"
|
||||
- run: |
|
||||
poetry remove twisted
|
||||
poetry add --extras tls git+https://github.com/twisted/twisted.git#trunk
|
||||
|
@ -181,11 +183,11 @@ jobs:
|
|||
run: |
|
||||
set -x
|
||||
DEBIAN_FRONTEND=noninteractive sudo apt-get install -yqq python3 pipx
|
||||
pipx install poetry==1.3.2
|
||||
pipx install poetry==2.1.1
|
||||
|
||||
poetry remove -n twisted
|
||||
poetry add -n --extras tls git+https://github.com/twisted/twisted.git#trunk
|
||||
poetry lock --no-update
|
||||
poetry lock
|
||||
working-directory: synapse
|
||||
|
||||
- run: |
|
||||
|
|
1
changelog.d/18251.misc
Normal file
1
changelog.d/18251.misc
Normal file
|
@ -0,0 +1 @@
|
|||
Update Poetry to 2.1.1, including updating the lock file version.
|
1
changelog.d/18254.feature
Normal file
1
changelog.d/18254.feature
Normal file
|
@ -0,0 +1 @@
|
|||
Add background job to clear unreferenced state groups.
|
1
changelog.d/18276.doc
Normal file
1
changelog.d/18276.doc
Normal file
|
@ -0,0 +1 @@
|
|||
Correct a small typo in the SSO mapping providers documentation.
|
2
debian/build_virtualenv
vendored
2
debian/build_virtualenv
vendored
|
@ -35,7 +35,7 @@ TEMP_VENV="$(mktemp -d)"
|
|||
python3 -m venv "$TEMP_VENV"
|
||||
source "$TEMP_VENV/bin/activate"
|
||||
pip install -U pip
|
||||
pip install poetry==1.3.2
|
||||
pip install poetry==2.1.1 poetry-plugin-export==1.9.0
|
||||
poetry export \
|
||||
--extras all \
|
||||
--extras test \
|
||||
|
|
6
debian/changelog
vendored
6
debian/changelog
vendored
|
@ -1,3 +1,9 @@
|
|||
matrix-synapse-py3 (1.127.0~rc1+nmu1) UNRELEASED; urgency=medium
|
||||
|
||||
* Update Poetry to 2.1.1.
|
||||
|
||||
-- Synapse Packaging team <packages@matrix.org> Wed, 19 Mar 2025 17:38:49 +0000
|
||||
|
||||
matrix-synapse-py3 (1.127.0~rc1) stable; urgency=medium
|
||||
|
||||
* New Synapse release 1.127.0rc1.
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
ARG DEBIAN_VERSION=bookworm
|
||||
ARG PYTHON_VERSION=3.12
|
||||
ARG POETRY_VERSION=1.8.3
|
||||
ARG POETRY_VERSION=2.1.1
|
||||
|
||||
###
|
||||
### Stage 0: generate requirements.txt
|
||||
|
@ -56,7 +56,7 @@ ENV UV_LINK_MODE=copy
|
|||
ARG POETRY_VERSION
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ -z "$TEST_ONLY_IGNORE_POETRY_LOCKFILE" ]; then \
|
||||
uvx --with poetry-plugin-export==1.8.0 \
|
||||
uvx --with poetry-plugin-export==1.9.0 \
|
||||
poetry@${POETRY_VERSION} export --extras all -o /synapse/requirements.txt ${TEST_ONLY_SKIP_DEP_HASH_VERIFICATION:+--without-hashes}; \
|
||||
else \
|
||||
touch /synapse/requirements.txt; \
|
||||
|
|
|
@ -162,7 +162,7 @@ by a unique name, the current status (stored in JSON), and some dependency infor
|
|||
* Whether the update requires a previous update to be complete.
|
||||
* A rough ordering for which to complete updates.
|
||||
|
||||
A new background updates needs to be added to the `background_updates` table:
|
||||
A new background update needs to be added to the `background_updates` table:
|
||||
|
||||
```sql
|
||||
INSERT INTO background_updates (ordering, update_name, depends_on, progress_json) VALUES
|
||||
|
|
|
@ -187,7 +187,7 @@ useful.
|
|||
## ...add a new dependency?
|
||||
|
||||
Either:
|
||||
- manually update `pyproject.toml`; then `poetry lock --no-update`; or else
|
||||
- manually update `pyproject.toml`; then `poetry lock`; or else
|
||||
- `poetry add packagename`. See `poetry add --help`; note the `--dev`,
|
||||
`--extras` and `--optional` flags in particular.
|
||||
|
||||
|
@ -202,12 +202,12 @@ poetry remove packagename
|
|||
```
|
||||
|
||||
ought to do the trick. Alternatively, manually update `pyproject.toml` and
|
||||
`poetry lock --no-update`. Include the updated `pyproject.toml` and `poetry.lock`
|
||||
`poetry lock`. Include the updated `pyproject.toml` and `poetry.lock`
|
||||
files in your commit.
|
||||
|
||||
## ...update the version range for an existing dependency?
|
||||
|
||||
Best done by manually editing `pyproject.toml`, then `poetry lock --no-update`.
|
||||
Best done by manually editing `pyproject.toml`, then `poetry lock`.
|
||||
Include the updated `pyproject.toml` and `poetry.lock` in your commit.
|
||||
|
||||
## ...update a dependency in the locked environment?
|
||||
|
@ -233,7 +233,7 @@ poetry add packagename==1.2.3
|
|||
|
||||
# Get poetry to recompute the content-hash of pyproject.toml without changing
|
||||
# the locked package versions.
|
||||
poetry lock --no-update
|
||||
poetry lock
|
||||
```
|
||||
|
||||
Either way, include the updated `poetry.lock` file in your commit.
|
||||
|
|
|
@ -10,7 +10,7 @@ As an example, a SSO service may return the email address
|
|||
to turn that into a displayname when creating a Matrix user for this individual.
|
||||
It may choose `John Smith`, or `Smith, John [Example.com]` or any number of
|
||||
variations. As each Synapse configuration may want something different, this is
|
||||
where SAML mapping providers come into play.
|
||||
where SSO mapping providers come into play.
|
||||
|
||||
SSO mapping providers are currently supported for OpenID and SAML SSO
|
||||
configurations. Please see the details below for how to implement your own.
|
||||
|
|
220
poetry.lock
generated
220
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -192,6 +192,11 @@ APPEND_ONLY_TABLES = [
|
|||
|
||||
|
||||
IGNORED_TABLES = {
|
||||
# Porting the auto generated sequence in this table is non-trivial.
|
||||
# None of the entries in this list are mandatory for Synapse to keep working.
|
||||
# If state group disk space is an issue after the port, the
|
||||
# `mark_unreferenced_state_groups_for_deletion_bg_update` background task can be run again.
|
||||
"state_groups_pending_deletion",
|
||||
# We don't port these tables, as they're a faff and we can regenerate
|
||||
# them anyway.
|
||||
"user_directory",
|
||||
|
@ -217,6 +222,15 @@ IGNORED_TABLES = {
|
|||
}
|
||||
|
||||
|
||||
# These background updates will not be applied upon creation of the postgres database.
|
||||
IGNORED_BACKGROUND_UPDATES = {
|
||||
# Reapplying this background update to the postgres database is unnecessary after
|
||||
# already having waited for the SQLite database to complete all running background
|
||||
# updates.
|
||||
"mark_unreferenced_state_groups_for_deletion_bg_update",
|
||||
}
|
||||
|
||||
|
||||
# Error returned by the run function. Used at the top-level part of the script to
|
||||
# handle errors and return codes.
|
||||
end_error: Optional[str] = None
|
||||
|
@ -688,6 +702,20 @@ class Porter:
|
|||
# 0 means off. 1 means full. 2 means incremental.
|
||||
return autovacuum_setting != 0
|
||||
|
||||
async def remove_ignored_background_updates_from_database(self) -> None:
|
||||
def _remove_delete_unreferenced_state_groups_bg_updates(
|
||||
txn: LoggingTransaction,
|
||||
) -> None:
|
||||
txn.execute(
|
||||
"DELETE FROM background_updates WHERE update_name = ANY(?)",
|
||||
(list(IGNORED_BACKGROUND_UPDATES),),
|
||||
)
|
||||
|
||||
await self.postgres_store.db_pool.runInteraction(
|
||||
"remove_delete_unreferenced_state_groups_bg_updates",
|
||||
_remove_delete_unreferenced_state_groups_bg_updates,
|
||||
)
|
||||
|
||||
async def run(self) -> None:
|
||||
"""Ports the SQLite database to a PostgreSQL database.
|
||||
|
||||
|
@ -733,6 +761,8 @@ class Porter:
|
|||
self.hs_config.database.get_single_database()
|
||||
)
|
||||
|
||||
await self.remove_ignored_background_updates_from_database()
|
||||
|
||||
await self.run_background_updates_on_postgres()
|
||||
|
||||
self.progress.set_state("Creating port tables")
|
||||
|
|
|
@ -21,11 +21,19 @@
|
|||
|
||||
import itertools
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Collection, Mapping, Set
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Collection,
|
||||
Mapping,
|
||||
Optional,
|
||||
Set,
|
||||
)
|
||||
|
||||
from synapse.logging.context import nested_logging_context
|
||||
from synapse.metrics.background_process_metrics import wrap_as_background_process
|
||||
from synapse.storage.database import LoggingTransaction
|
||||
from synapse.storage.databases import Databases
|
||||
from synapse.types.storage import _BackgroundUpdates
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from synapse.server import HomeServer
|
||||
|
@ -44,6 +52,11 @@ class PurgeEventsStorageController:
|
|||
self._delete_state_groups_loop, 60 * 1000
|
||||
)
|
||||
|
||||
self.stores.state.db_pool.updates.register_background_update_handler(
|
||||
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE,
|
||||
self._background_delete_unrefereneced_state_groups,
|
||||
)
|
||||
|
||||
async def purge_room(self, room_id: str) -> None:
|
||||
"""Deletes all record of a room"""
|
||||
|
||||
|
@ -81,7 +94,8 @@ class PurgeEventsStorageController:
|
|||
)
|
||||
|
||||
async def _find_unreferenced_groups(
|
||||
self, state_groups: Collection[int]
|
||||
self,
|
||||
state_groups: Collection[int],
|
||||
) -> Set[int]:
|
||||
"""Used when purging history to figure out which state groups can be
|
||||
deleted.
|
||||
|
@ -203,3 +217,232 @@ class PurgeEventsStorageController:
|
|||
room_id,
|
||||
groups_to_sequences,
|
||||
)
|
||||
|
||||
async def _background_delete_unrefereneced_state_groups(
|
||||
self, progress: dict, batch_size: int
|
||||
) -> int:
|
||||
"""This background update will slowly delete any unreferenced state groups"""
|
||||
|
||||
last_checked_state_group = progress.get("last_checked_state_group")
|
||||
|
||||
if last_checked_state_group is None:
|
||||
# This is the first run.
|
||||
last_checked_state_group = (
|
||||
await self.stores.state.db_pool.simple_select_one_onecol(
|
||||
table="state_groups",
|
||||
keyvalues={},
|
||||
retcol="MAX(id)",
|
||||
allow_none=True,
|
||||
desc="get_max_state_group",
|
||||
)
|
||||
)
|
||||
if last_checked_state_group is None:
|
||||
# There are no state groups so the background process is finished.
|
||||
await self.stores.state.db_pool.updates._end_background_update(
|
||||
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE
|
||||
)
|
||||
return batch_size
|
||||
last_checked_state_group += 1
|
||||
|
||||
(
|
||||
last_checked_state_group,
|
||||
final_batch,
|
||||
) = await self._delete_unreferenced_state_groups_batch(
|
||||
last_checked_state_group,
|
||||
batch_size,
|
||||
)
|
||||
|
||||
if not final_batch:
|
||||
# There are more state groups to check.
|
||||
progress = {
|
||||
"last_checked_state_group": last_checked_state_group,
|
||||
}
|
||||
await self.stores.state.db_pool.updates._background_update_progress(
|
||||
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE,
|
||||
progress,
|
||||
)
|
||||
else:
|
||||
# This background process is finished.
|
||||
await self.stores.state.db_pool.updates._end_background_update(
|
||||
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE
|
||||
)
|
||||
|
||||
return batch_size
|
||||
|
||||
async def _delete_unreferenced_state_groups_batch(
|
||||
self,
|
||||
last_checked_state_group: int,
|
||||
batch_size: int,
|
||||
) -> tuple[int, bool]:
|
||||
"""Looks for unreferenced state groups starting from the last state group
|
||||
checked and marks them for deletion.
|
||||
|
||||
Args:
|
||||
last_checked_state_group: The last state group that was checked.
|
||||
batch_size: How many state groups to process in this iteration.
|
||||
|
||||
Returns:
|
||||
(last_checked_state_group, final_batch)
|
||||
"""
|
||||
|
||||
# Find all state groups that can be deleted if any of the original set are deleted.
|
||||
(
|
||||
to_delete,
|
||||
last_checked_state_group,
|
||||
final_batch,
|
||||
) = await self._find_unreferenced_groups_for_background_deletion(
|
||||
last_checked_state_group, batch_size
|
||||
)
|
||||
|
||||
if len(to_delete) == 0:
|
||||
return last_checked_state_group, final_batch
|
||||
|
||||
await self.stores.state_deletion.mark_state_groups_as_pending_deletion(
|
||||
to_delete
|
||||
)
|
||||
|
||||
return last_checked_state_group, final_batch
|
||||
|
||||
async def _find_unreferenced_groups_for_background_deletion(
|
||||
self,
|
||||
last_checked_state_group: int,
|
||||
batch_size: int,
|
||||
) -> tuple[Set[int], int, bool]:
|
||||
"""Used when deleting unreferenced state groups in the background to figure out
|
||||
which state groups can be deleted.
|
||||
To avoid increased DB usage due to de-deltaing state groups, this returns only
|
||||
state groups which are free standing (ie. no shared edges with referenced groups) or
|
||||
state groups which do not share edges which result in a future referenced group.
|
||||
|
||||
The following scenarios outline the possibilities based on state group data in
|
||||
the DB.
|
||||
|
||||
ie. Free standing -> state groups 1-N would be returned:
|
||||
SG_1
|
||||
|
|
||||
...
|
||||
|
|
||||
SG_N
|
||||
|
||||
ie. Previous reference -> state groups 2-N would be returned:
|
||||
SG_1 <- referenced by event
|
||||
|
|
||||
SG_2
|
||||
|
|
||||
...
|
||||
|
|
||||
SG_N
|
||||
|
||||
ie. Future reference -> none of the following state groups would be returned:
|
||||
SG_1
|
||||
|
|
||||
SG_2
|
||||
|
|
||||
...
|
||||
|
|
||||
SG_N <- referenced by event
|
||||
|
||||
Args:
|
||||
last_checked_state_group: The last state group that was checked.
|
||||
batch_size: How many state groups to process in this iteration.
|
||||
|
||||
Returns:
|
||||
(to_delete, last_checked_state_group, final_batch)
|
||||
"""
|
||||
|
||||
# If a state group's next edge is not pending deletion then we don't delete the state group.
|
||||
# If there is no next edge or the next edges are all marked for deletion, then delete
|
||||
# the state group.
|
||||
# This holds since we walk backwards from the latest state groups, ensuring that
|
||||
# we've already checked newer state groups for event references along the way.
|
||||
def get_next_state_groups_marked_for_deletion_txn(
|
||||
txn: LoggingTransaction,
|
||||
) -> tuple[dict[int, bool], dict[int, int]]:
|
||||
state_group_sql = """
|
||||
SELECT s.id, e.state_group, d.state_group
|
||||
FROM (
|
||||
SELECT id FROM state_groups
|
||||
WHERE id < ? ORDER BY id DESC LIMIT ?
|
||||
) as s
|
||||
LEFT JOIN state_group_edges AS e ON (s.id = e.prev_state_group)
|
||||
LEFT JOIN state_groups_pending_deletion AS d ON (e.state_group = d.state_group)
|
||||
"""
|
||||
txn.execute(state_group_sql, (last_checked_state_group, batch_size))
|
||||
|
||||
# Mapping from state group to whether we should delete it.
|
||||
state_groups_to_deletion: dict[int, bool] = {}
|
||||
|
||||
# Mapping from state group to prev state group.
|
||||
state_groups_to_prev: dict[int, int] = {}
|
||||
|
||||
for row in txn:
|
||||
state_group = row[0]
|
||||
next_edge = row[1]
|
||||
pending_deletion = row[2]
|
||||
|
||||
if next_edge is not None:
|
||||
state_groups_to_prev[next_edge] = state_group
|
||||
|
||||
if next_edge is not None and not pending_deletion:
|
||||
# We have found an edge not marked for deletion.
|
||||
# Check previous results to see if this group is part of a chain
|
||||
# within this batch that qualifies for deletion.
|
||||
# ie. batch contains:
|
||||
# SG_1 -> SG_2 -> SG_3
|
||||
# If SG_3 is a candidate for deletion, then SG_2 & SG_1 should also
|
||||
# be, even though they have edges which may not be marked for
|
||||
# deletion.
|
||||
# This relies on SQL results being sorted in DESC order to work.
|
||||
next_is_deletion_candidate = state_groups_to_deletion.get(next_edge)
|
||||
if (
|
||||
next_is_deletion_candidate is None
|
||||
or not next_is_deletion_candidate
|
||||
):
|
||||
state_groups_to_deletion[state_group] = False
|
||||
else:
|
||||
state_groups_to_deletion.setdefault(state_group, True)
|
||||
else:
|
||||
# This state group may be a candidate for deletion
|
||||
state_groups_to_deletion.setdefault(state_group, True)
|
||||
|
||||
return state_groups_to_deletion, state_groups_to_prev
|
||||
|
||||
(
|
||||
state_groups_to_deletion,
|
||||
state_group_edges,
|
||||
) = await self.stores.state.db_pool.runInteraction(
|
||||
"get_next_state_groups_marked_for_deletion",
|
||||
get_next_state_groups_marked_for_deletion_txn,
|
||||
)
|
||||
deletion_candidates = {
|
||||
state_group
|
||||
for state_group, deletion in state_groups_to_deletion.items()
|
||||
if deletion
|
||||
}
|
||||
|
||||
final_batch = False
|
||||
state_groups = state_groups_to_deletion.keys()
|
||||
if len(state_groups) < batch_size:
|
||||
final_batch = True
|
||||
else:
|
||||
last_checked_state_group = min(state_groups)
|
||||
|
||||
if len(state_groups) == 0:
|
||||
return set(), last_checked_state_group, final_batch
|
||||
|
||||
# Determine if any of the remaining state groups are directly referenced.
|
||||
referenced = await self.stores.main.get_referenced_state_groups(
|
||||
deletion_candidates
|
||||
)
|
||||
|
||||
# Remove state groups from deletion_candidates which are directly referenced or share a
|
||||
# future edge with a referenced state group within this batch.
|
||||
def filter_reference_chains(group: Optional[int]) -> None:
|
||||
while group is not None:
|
||||
deletion_candidates.discard(group)
|
||||
group = state_group_edges.get(group)
|
||||
|
||||
for referenced_group in referenced:
|
||||
filter_reference_chains(referenced_group)
|
||||
|
||||
return deletion_candidates, last_checked_state_group, final_batch
|
||||
|
|
|
@ -20,7 +20,15 @@
|
|||
#
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Dict,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
from synapse.logging.opentracing import tag_args, trace
|
||||
from synapse.storage._base import SQLBaseStore
|
||||
|
|
|
@ -321,18 +321,42 @@ class StateDeletionDataStore:
|
|||
async def mark_state_groups_as_pending_deletion(
|
||||
self, state_groups: Collection[int]
|
||||
) -> None:
|
||||
"""Mark the given state groups as pending deletion"""
|
||||
"""Mark the given state groups as pending deletion.
|
||||
|
||||
If any of the state groups are already pending deletion, then those records are
|
||||
left as is.
|
||||
"""
|
||||
|
||||
await self.db_pool.runInteraction(
|
||||
"mark_state_groups_as_pending_deletion",
|
||||
self._mark_state_groups_as_pending_deletion_txn,
|
||||
state_groups,
|
||||
)
|
||||
|
||||
def _mark_state_groups_as_pending_deletion_txn(
|
||||
self,
|
||||
txn: LoggingTransaction,
|
||||
state_groups: Collection[int],
|
||||
) -> None:
|
||||
sql = """
|
||||
INSERT INTO state_groups_pending_deletion (state_group, insertion_ts)
|
||||
VALUES %s
|
||||
ON CONFLICT (state_group)
|
||||
DO NOTHING
|
||||
"""
|
||||
|
||||
now = self._clock.time_msec()
|
||||
|
||||
await self.db_pool.simple_upsert_many(
|
||||
table="state_groups_pending_deletion",
|
||||
key_names=("state_group",),
|
||||
key_values=[(state_group,) for state_group in state_groups],
|
||||
value_names=("insertion_ts",),
|
||||
value_values=[(now,) for _ in state_groups],
|
||||
desc="mark_state_groups_as_pending_deletion",
|
||||
)
|
||||
rows = [
|
||||
(
|
||||
state_group,
|
||||
now,
|
||||
)
|
||||
for state_group in state_groups
|
||||
]
|
||||
if isinstance(txn.database_engine, PostgresEngine):
|
||||
txn.execute_values(sql % ("?",), rows, fetch=False)
|
||||
else:
|
||||
txn.execute_batch(sql % ("(?, ?)",), rows)
|
||||
|
||||
async def mark_state_groups_as_used(self, state_groups: Collection[int]) -> None:
|
||||
"""Mark the given state groups as now being referenced"""
|
||||
|
|
|
@ -161,6 +161,7 @@ Changes in SCHEMA_VERSION = 89
|
|||
|
||||
Changes in SCHEMA_VERSION = 90
|
||||
- Add a column `participant` to `room_memberships` table
|
||||
- Add background update to delete unreferenced state groups.
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
--
|
||||
-- This file is licensed under the Affero General Public License (AGPL) version 3.
|
||||
--
|
||||
-- Copyright (C) 2025 New Vector, Ltd
|
||||
--
|
||||
-- This program is free software: you can redistribute it and/or modify
|
||||
-- it under the terms of the GNU Affero General Public License as
|
||||
-- published by the Free Software Foundation, either version 3 of the
|
||||
-- License, or (at your option) any later version.
|
||||
--
|
||||
-- See the GNU Affero General Public License for more details:
|
||||
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
|
||||
|
||||
-- Add a background update to delete any unreferenced state groups
|
||||
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
|
||||
(9002, 'mark_unreferenced_state_groups_for_deletion_bg_update', '{}');
|
|
@ -48,3 +48,7 @@ class _BackgroundUpdates:
|
|||
SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_FIX_FORGOTTEN_COLUMN_BG_UPDATE = (
|
||||
"sliding_sync_membership_snapshots_fix_forgotten_column_bg_update"
|
||||
)
|
||||
|
||||
MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE = (
|
||||
"mark_unreferenced_state_groups_for_deletion_bg_update"
|
||||
)
|
||||
|
|
|
@ -24,6 +24,7 @@ from synapse.api.errors import NotFoundError, SynapseError
|
|||
from synapse.rest.client import room
|
||||
from synapse.server import HomeServer
|
||||
from synapse.types.state import StateFilter
|
||||
from synapse.types.storage import _BackgroundUpdates
|
||||
from synapse.util import Clock
|
||||
|
||||
from tests.unittest import HomeserverTestCase
|
||||
|
@ -303,3 +304,156 @@ class PurgeTests(HomeserverTestCase):
|
|||
)
|
||||
)
|
||||
self.assertEqual(len(state_groups), 1)
|
||||
|
||||
def test_clear_unreferenced_state_groups(self) -> None:
|
||||
"""Test that any unreferenced state groups are automatically cleaned up."""
|
||||
|
||||
self.helper.send(self.room_id, body="test1")
|
||||
state1 = self.helper.send_state(
|
||||
self.room_id, "org.matrix.test", body={"number": 2}
|
||||
)
|
||||
# Create enough state events to require multiple batches of
|
||||
# mark_unreferenced_state_groups_for_deletion_bg_update to be run.
|
||||
for i in range(200):
|
||||
self.helper.send_state(self.room_id, "org.matrix.test", body={"number": i})
|
||||
self.helper.send(self.room_id, body="test4")
|
||||
last = self.helper.send(self.room_id, body="test5")
|
||||
|
||||
# Create an unreferenced state group that has no prev group.
|
||||
unreferenced_free_state_group = self.get_success(
|
||||
self.state_store.store_state_group(
|
||||
event_id=last["event_id"],
|
||||
room_id=self.room_id,
|
||||
prev_group=None,
|
||||
delta_ids={("org.matrix.test", ""): state1["event_id"]},
|
||||
current_state_ids={("org.matrix.test", ""): ""},
|
||||
)
|
||||
)
|
||||
|
||||
# Create some unreferenced state groups that have a prev group of one of the
|
||||
# existing state groups.
|
||||
prev_group = self.get_success(
|
||||
self.store._get_state_group_for_event(state1["event_id"])
|
||||
)
|
||||
unreferenced_end_state_group = self.get_success(
|
||||
self.state_store.store_state_group(
|
||||
event_id=last["event_id"],
|
||||
room_id=self.room_id,
|
||||
prev_group=prev_group,
|
||||
delta_ids={("org.matrix.test", ""): state1["event_id"]},
|
||||
current_state_ids=None,
|
||||
)
|
||||
)
|
||||
another_unreferenced_end_state_group = self.get_success(
|
||||
self.state_store.store_state_group(
|
||||
event_id=last["event_id"],
|
||||
room_id=self.room_id,
|
||||
prev_group=unreferenced_end_state_group,
|
||||
delta_ids={("org.matrix.test", ""): state1["event_id"]},
|
||||
current_state_ids=None,
|
||||
)
|
||||
)
|
||||
|
||||
# Add some other unreferenced state groups which lead to a referenced state
|
||||
# group.
|
||||
# These state groups should not get deleted.
|
||||
chain_state_group = self.get_success(
|
||||
self.state_store.store_state_group(
|
||||
event_id=last["event_id"],
|
||||
room_id=self.room_id,
|
||||
prev_group=None,
|
||||
delta_ids={("org.matrix.test", ""): ""},
|
||||
current_state_ids={("org.matrix.test", ""): ""},
|
||||
)
|
||||
)
|
||||
chain_state_group_2 = self.get_success(
|
||||
self.state_store.store_state_group(
|
||||
event_id=last["event_id"],
|
||||
room_id=self.room_id,
|
||||
prev_group=chain_state_group,
|
||||
delta_ids={("org.matrix.test", ""): ""},
|
||||
current_state_ids=None,
|
||||
)
|
||||
)
|
||||
referenced_chain_state_group = self.get_success(
|
||||
self.state_store.store_state_group(
|
||||
event_id=last["event_id"],
|
||||
room_id=self.room_id,
|
||||
prev_group=chain_state_group_2,
|
||||
delta_ids={("org.matrix.test", ""): ""},
|
||||
current_state_ids=None,
|
||||
)
|
||||
)
|
||||
self.get_success(
|
||||
self.store.db_pool.simple_insert(
|
||||
"event_to_state_groups",
|
||||
{
|
||||
"event_id": "$new_event",
|
||||
"state_group": referenced_chain_state_group,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# Insert and run the background update.
|
||||
self.get_success(
|
||||
self.store.db_pool.simple_insert(
|
||||
"background_updates",
|
||||
{
|
||||
"update_name": _BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE,
|
||||
"progress_json": "{}",
|
||||
},
|
||||
)
|
||||
)
|
||||
self.store.db_pool.updates._all_done = False
|
||||
self.wait_for_background_updates()
|
||||
|
||||
# Advance so that the background job to delete the state groups runs
|
||||
self.reactor.advance(
|
||||
1 + self.state_deletion_store.DELAY_BEFORE_DELETION_MS / 1000
|
||||
)
|
||||
|
||||
# We expect that the unreferenced free state group has been deleted.
|
||||
row = self.get_success(
|
||||
self.state_store.db_pool.simple_select_one_onecol(
|
||||
table="state_groups",
|
||||
keyvalues={"id": unreferenced_free_state_group},
|
||||
retcol="id",
|
||||
allow_none=True,
|
||||
desc="test_purge_unreferenced_state_group",
|
||||
)
|
||||
)
|
||||
self.assertIsNone(row)
|
||||
|
||||
# We expect that both unreferenced end state groups have been deleted.
|
||||
row = self.get_success(
|
||||
self.state_store.db_pool.simple_select_one_onecol(
|
||||
table="state_groups",
|
||||
keyvalues={"id": unreferenced_end_state_group},
|
||||
retcol="id",
|
||||
allow_none=True,
|
||||
desc="test_purge_unreferenced_state_group",
|
||||
)
|
||||
)
|
||||
self.assertIsNone(row)
|
||||
row = self.get_success(
|
||||
self.state_store.db_pool.simple_select_one_onecol(
|
||||
table="state_groups",
|
||||
keyvalues={"id": another_unreferenced_end_state_group},
|
||||
retcol="id",
|
||||
allow_none=True,
|
||||
desc="test_purge_unreferenced_state_group",
|
||||
)
|
||||
)
|
||||
self.assertIsNone(row)
|
||||
|
||||
# We expect there to now only be one state group for the room, which is
|
||||
# the state group of the last event (as the only outlier).
|
||||
state_groups = self.get_success(
|
||||
self.state_store.db_pool.simple_select_onecol(
|
||||
table="state_groups",
|
||||
keyvalues={"room_id": self.room_id},
|
||||
retcol="id",
|
||||
desc="test_purge_unreferenced_state_group",
|
||||
)
|
||||
)
|
||||
self.assertEqual(len(state_groups), 210)
|
||||
|
|
Loading…
Add table
Reference in a new issue