From 526f09b6e8b4179a70bb1840eb2a4dc6c9db9c94 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Thu, 6 Mar 2025 17:54:52 +0000
Subject: [PATCH 1/2] Docstring is a contradiction. Can we make it make sense?

---
 synapse/storage/databases/state/store.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index 90d7beb92f..df82c77468 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -739,8 +739,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
         room_id: str,
         state_groups_to_sequence_numbers: Mapping[int, int],
     ) -> bool:
-        """Deletes no longer referenced state groups and de-deltas any state
-        groups that reference them.
+        """Deletes state groups that are no longer referenced by events <<<TODO confirm this is the intended meaning>>>
+        and de-deltas any state groups that reference them.
 
         Args:
             room_id: The room the state groups belong to (must all be in the

From 82a789f8c00fe90407e2727d02d9790c51a140ba Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Thu, 6 Mar 2025 17:54:57 +0000
Subject: [PATCH 2/2] Demonstration of table explosion

---
 tests/storage/test_state.py | 89 +++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index 48f8d1c340..07aea01067 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -643,3 +643,92 @@ class StateStoreTestCase(HomeserverTestCase):
                     ),
                 )
                 self.assertEqual(context.state_group_before_event, groups[0][0])
+
+
+class StateGroupDataStoreTestCase(HomeserverTestCase):
+    """Tests for the StateGroupDataStore"""
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+        self.storage = hs.get_storage_controllers()
+        self.state_datastore = self.storage.state.stores.state
+
+    def test_purge_unreferenced_state_groups__dedelta(self) -> None:
+        """
+        Tests the de-delta-ing behaviour of event-unreferenced state group purging.
+
+        Set up a state group DAG like:
+
+        1 {m.room.member/@a:h} -> 2 {m.room.member/@b:h} -> 3 {m.room.member/@c:h}
+
+        Then delete state group 2, supposing it is no longer referenced by any events.
+
+        The end state group DAG should look like ...
+        """
+
+        self.get_success(
+            self.state_datastore.db_pool.simple_insert_many(
+                table="state_groups_state",
+                keys=("state_group", "room_id", "type", "state_key", "event_id"),
+                values=[
+                    (1, "!r:h", "m.room.member", "@a:h", "$abc"),
+                    (2, "!r:h", "m.room.member", "@b:h", "$def"),
+                    (3, "!r:h", "m.room.member", "@c:h", "$ghi"),
+                ],
+                desc="_",
+            )
+        )
+
+        self.get_success(
+            self.state_datastore.db_pool.simple_insert_many(
+                table="state_group_edges",
+                keys=("state_group", "prev_state_group"),
+                values=[
+                    (2, 1),
+                    (3, 2),
+                ],
+                desc="_",
+            )
+        )
+
+        self.get_success(
+            self.state_datastore.db_pool.simple_insert_many(
+                table="state_groups_pending_deletion",
+                keys=("state_group", "sequence_number", "insertion_ts"),
+                values=[
+                    (2, 1, -90000000),
+                ],
+                desc="_",
+            )
+        )
+        self.assertTrue(
+            self.get_success(
+                self.state_datastore.purge_unreferenced_state_groups("!r:h", {2: 1})
+            )
+        )
+
+        snapshot_rows = sorted(
+            self.get_success(
+                self.state_datastore.db_pool.simple_select_list(
+                    "state_groups_state",
+                    {},
+                    ("state_group", "type", "state_key", "event_id"),
+                    desc="_",
+                )
+            )
+        )
+
+        self.assertEqual(
+            snapshot_rows,
+            [
+                (1, "m.room.member", "@a:h", "$abc"),
+                (
+                    3,
+                    "m.room.member",
+                    "@a:h",
+                    "$abc",
+                ),  # <--- BOOM, table increased in size!!!
+                (3, "m.room.member", "@b:h", "$def"),
+                (3, "m.room.member", "@c:h", "$ghi"),
+            ],
+        )