diff --git a/changelog.d/18253.feature b/changelog.d/18253.feature new file mode 100644 index 0000000000..f44767eb2b --- /dev/null +++ b/changelog.d/18253.feature @@ -0,0 +1 @@ +Add admin API for fetching (paginated) room reports. \ No newline at end of file diff --git a/docs/admin_api/room_reports.md b/docs/admin_api/room_reports.md new file mode 100644 index 0000000000..77d8fdfa7a --- /dev/null +++ b/docs/admin_api/room_reports.md @@ -0,0 +1,76 @@ +# Show reported rooms + +This API returns information about reported rooms. + +To use it, you will need to authenticate by providing an `access_token` +for a server admin: see [Admin API](../usage/administration/admin_api/). + +The api is: +``` +GET /_synapse/admin/v1/room_reports?from=0&limit=10 +``` + +It returns a JSON body like the following: + +```json +{ + "room_reports": [ + { + "id": 2, + "reason": "foo", + "received_ts": 1570897107409, + "canonical_alias": "#alias1:matrix.org", + "room_id": "!ERAgBpSOcCCuTJqQPk:matrix.org", + "name": "Matrix HQ", + "user_id": "@foo:matrix.org" + }, + { + "id": 3, + "reason": "bar", + "received_ts": 1598889612059, + "canonical_alias": "#alias2:matrix.org", + "room_id": "!eGvUQuTCkHGVwNMOjv:matrix.org", + "name": "Your room name here", + "user_id": "@bar:matrix.org" + } + ], + "next_token": 2, + "total": 4 +} +``` + +To paginate, check for `next_token` and if present, call the endpoint again with `from` +set to the value of `next_token`. This will return a new page. + +If the endpoint does not return a `next_token` then there are no more reports to +paginate through. + +**URL parameters:** + +* `limit`: integer - Is optional but is used for pagination, denoting the maximum number + of items to return in this call. Defaults to `100`. +* `from`: integer - Is optional but used for pagination, denoting the offset in the + returned results. This should be treated as an opaque value and not explicitly set to + anything other than the return value of `next_token` from a previous call. Defaults to `0`. +* `dir`: string - Direction of event report order. Whether to fetch the most recent + first (`b`) or the oldest first (`f`). Defaults to `b`. +* `user_id`: optional string - Filter by the user ID of the reporter. This is the user who reported the event + and wrote the reason. +* `room_id`: optional string - Filter by (reported) room id. + +**Response** + +The following fields are returned in the JSON response body: + +* `id`: integer - ID of room report. +* `received_ts`: integer - The timestamp (in milliseconds since the unix epoch) when this + report was sent. +* `room_id`: string - The ID of the room being reported. +* `name`: string - The name of the room. +* `user_id`: string - This is the user who reported the room and wrote the reason. +* `reason`: string - Comment made by the `user_id` in this report. May be blank or `null`. +* `canonical_alias`: string - The canonical alias of the room. `null` if the room does not + have a canonical alias set. +* `next_token`: integer - Indication for pagination. See above. +* `total`: integer - Total number of room reports related to the query + (`user_id` and `room_id`). diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index f3c99663e8..0b100c1374 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -70,6 +70,7 @@ from synapse.rest.admin.registration_tokens import ( NewRegistrationTokenRestServlet, RegistrationTokenRestServlet, ) +from synapse.rest.admin.room_reports import RoomReportsRestServlet from synapse.rest.admin.rooms import ( BlockRoomRestServlet, DeleteRoomStatusByDeleteIdRestServlet, @@ -302,6 +303,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: LargestRoomsStatistics(hs).register(http_server) EventReportDetailRestServlet(hs).register(http_server) EventReportsRestServlet(hs).register(http_server) + RoomReportsRestServlet(hs).register(http_server) AccountDataRestServlet(hs).register(http_server) PushersRestServlet(hs).register(http_server) MakeRoomAdminRestServlet(hs).register(http_server) diff --git a/synapse/rest/admin/room_reports.py b/synapse/rest/admin/room_reports.py new file mode 100644 index 0000000000..e2f69b64cd --- /dev/null +++ b/synapse/rest/admin/room_reports.py @@ -0,0 +1,96 @@ +# +# This file is licensed under the Affero General Public License (AGPL) version 3. +# +# Copyright (C) 2025 New Vector, Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# See the GNU Affero General Public License for more details: +# . +# +# Originally licensed under the Apache License, Version 2.0: +# . +# +# [This file includes modifications made by New Vector Limited] +# +# + +import logging +from http import HTTPStatus +from typing import TYPE_CHECKING, Tuple + +from synapse.api.constants import Direction +from synapse.api.errors import Codes, SynapseError +from synapse.http.servlet import RestServlet, parse_enum, parse_integer, parse_string +from synapse.http.site import SynapseRequest +from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +# Based upon EventReportsRestServlet +class RoomReportsRestServlet(RestServlet): + """ + List all reported rooms that are known to the homeserver. Results are returned + in a dictionary containing report information. Supports pagination. + The requester must have administrator access in Synapse. + + GET /_synapse/admin/v1/room_reports + returns: + 200 OK with list of reports if success otherwise an error. + + Args: + The parameters `from` and `limit` are required only for pagination. + By default, a `limit` of 100 is used. + The parameter `dir` can be used to define the order of results. + The `user_id` query parameter filters by the user ID of the reporter of the event. + The `room_id` query parameter filters by room id. + Returns: + A list of reported rooms and an integer representing the total number of + reported rooms that exist given this query + """ + + PATTERNS = admin_patterns("/room_reports$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastores().main + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + direction = parse_enum(request, "dir", Direction, Direction.BACKWARDS) + user_id = parse_string(request, "user_id") + room_id = parse_string(request, "room_id") + + if start < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "The start parameter must be a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if limit < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "The limit parameter must be a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + room_reports, total = await self._store.get_room_reports_paginate( + start, limit, direction, user_id, room_id + ) + ret = {"room_reports": room_reports, "total": total} + if (start + limit) < total: + ret["next_token"] = start + len(room_reports) + + return HTTPStatus.OK, ret diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 56217fccdf..32497f642f 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1860,6 +1860,107 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): "get_event_reports_paginate", _get_event_reports_paginate_txn ) + async def get_room_reports_paginate( + self, + start: int, + limit: int, + direction: Direction = Direction.BACKWARDS, + user_id: Optional[str] = None, + room_id: Optional[str] = None, + ) -> Tuple[List[Dict[str, Any]], int]: + """Retrieve a paginated list of room reports + + Args: + start: room offset to begin the query from + limit: number of rows to retrieve + direction: Whether to fetch the most recent first (backwards) or the + oldest first (forwards) + user_id: search for user_id. Ignored if user_id is None + room_id: search for room_id. Ignored if room_id is None + Returns: + Tuple of: + json list of room reports + total number of room reports matching the filter criteria + """ + + def _get_room_reports_paginate_txn( + txn: LoggingTransaction, + ) -> Tuple[List[Dict[str, Any]], int]: + filters = [] + args: List[object] = [] + + if user_id: + filters.append("er.user_id LIKE ?") + args.extend(["%" + user_id + "%"]) + if room_id: + filters.append("er.room_id LIKE ?") + args.extend(["%" + room_id + "%"]) + + if direction == Direction.BACKWARDS: + order = "DESC" + else: + order = "ASC" + + where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" + + # We join on room_stats_state despite not using any columns from it + # because the join can influence the number of rows returned; + # e.g. a room that doesn't have state, maybe because it was deleted. + # The query returning the total count should be consistent with + # the query returning the results. + sql = """ + SELECT COUNT(*) as total_room_reports + FROM room_reports AS rr + JOIN room_stats_state ON room_stats_state.room_id = rr.room_id + {} + """.format(where_clause) + txn.execute(sql, args) + count = cast(Tuple[int], txn.fetchone())[0] + + sql = """ + SELECT + rr.id, + rr.received_ts, + rr.room_id, + rr.user_id, + rr.reason, + room_stats_state.canonical_alias, + room_stats_state.name + FROM event_reports AS rr + JOIN room_stats_state + ON room_stats_state.room_id = rr.room_id + {where_clause} + ORDER BY rr.received_ts {order} + LIMIT ? + OFFSET ? + """.format( + where_clause=where_clause, + order=order, + ) + + args += [limit, start] + txn.execute(sql, args) + + room_reports = [] + for row in txn: + room_reports.append( + { + "id": row[0], + "received_ts": row[1], + "room_id": row[2], + "user_id": row[3], + "reason": row[4], + "canonical_alias": row[5], + "name": row[6], + } + ) + + return room_reports, count + + return await self.db_pool.runInteraction( + "get_room_reports_paginate", _get_room_reports_paginate_txn + ) + async def delete_event_report(self, report_id: int) -> bool: """Remove an event report from database. diff --git a/tests/rest/admin/test_room_reports.py b/tests/rest/admin/test_room_reports.py new file mode 100644 index 0000000000..80b6b0ed88 --- /dev/null +++ b/tests/rest/admin/test_room_reports.py @@ -0,0 +1,421 @@ +# +# This file is licensed under the Affero General Public License (AGPL) version 3. +# +# Copyright (C) 2025 New Vector, Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# See the GNU Affero General Public License for more details: +# . +# +# Originally licensed under the Apache License, Version 2.0: +# . +# +# [This file includes modifications made by New Vector Limited] +# +# +from typing import List + +from twisted.test.proto_helpers import MemoryReactor + +import synapse.rest.admin +from synapse.api.errors import Codes +from synapse.rest.client import login, reporting, room +from synapse.server import HomeServer +from synapse.types import JsonDict +from synapse.util import Clock + +from tests import unittest + + +# Based upon EventReportsTestCase +class RoomReportsTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + room.register_servlets, + reporting.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.other_user = self.register_user("user", "pass") + self.other_user_tok = self.login("user", "pass") + + self.room_id1 = self.helper.create_room_as( + self.other_user, tok=self.other_user_tok, is_public=True + ) + self.helper.join(self.room_id1, user=self.admin_user, tok=self.admin_user_tok) + + self.room_id2 = self.helper.create_room_as( + self.other_user, tok=self.other_user_tok, is_public=True + ) + self.helper.join(self.room_id2, user=self.admin_user, tok=self.admin_user_tok) + + # Every user reports both rooms + self._report_room(self.room_id1, self.other_user_tok) + self._report_room(self.room_id2, self.other_user_tok) + self._report_room_without_parameters(self.room_id1, self.admin_user_tok) + self._report_room_without_parameters(self.room_id2, self.admin_user_tok) + + self.url = "/_synapse/admin/v1/room_reports" + + def test_no_auth(self) -> None: + """ + Try to get an event report without authentication. + """ + channel = self.make_request("GET", self.url, {}) + + self.assertEqual(401, channel.code, msg=channel.json_body) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self) -> None: + """ + If the user is not a server admin, an error 403 is returned. + """ + + channel = self.make_request( + "GET", + self.url, + access_token=self.other_user_tok, + ) + + self.assertEqual(403, channel.code, msg=channel.json_body) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_default_success(self) -> None: + """ + Testing list of reported rooms + """ + + channel = self.make_request( + "GET", + self.url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(len(channel.json_body["room_reports"]), 4) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["room_reports"]) + + def test_limit(self) -> None: + """ + Testing list of reported rooms with limit + """ + + channel = self.make_request( + "GET", + self.url + "?limit=2", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(len(channel.json_body["room_reports"]), 2) + self.assertEqual(channel.json_body["next_token"], 2) + self._check_fields(channel.json_body["room_reports"]) + + def test_from(self) -> None: + """ + Testing list of reported rooms with a defined starting point (from) + """ + + channel = self.make_request( + "GET", + self.url + "?from=2", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(len(channel.json_body["room_reports"]), 2) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["room_reports"]) + + def test_limit_and_from(self) -> None: + """ + Testing list of reported rooms with a defined starting point and limit + """ + + channel = self.make_request( + "GET", + self.url + "?from=2&limit=1", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(channel.json_body["next_token"], 2) + self.assertEqual(len(channel.json_body["room_reports"]), 1) + self._check_fields(channel.json_body["room_reports"]) + + def test_filter_room(self) -> None: + """ + Testing list of reported rooms with a filter of room + """ + + channel = self.make_request( + "GET", + self.url + "?room_id=%s" % self.room_id1, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 2) + self.assertEqual(len(channel.json_body["room_reports"]), 2) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["room_reports"]) + + for report in channel.json_body["room_reports"]: + self.assertEqual(report["room_id"], self.room_id1) + + def test_filter_user(self) -> None: + """ + Testing list of reported rooms with a filter of user + """ + + channel = self.make_request( + "GET", + self.url + "?user_id=%s" % self.other_user, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 2) + self.assertEqual(len(channel.json_body["room_reports"]), 2) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["room_reports"]) + + for report in channel.json_body["room_reports"]: + self.assertEqual(report["user_id"], self.other_user) + + def test_filter_user_and_room(self) -> None: + """ + Testing list of reported rooms with a filter of user and room + """ + + channel = self.make_request( + "GET", + self.url + "?user_id=%s&room_id=%s" % (self.other_user, self.room_id1), + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 1) + self.assertEqual(len(channel.json_body["room_reports"]), 1) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["room_reports"]) + + for report in channel.json_body["room_reports"]: + self.assertEqual(report["user_id"], self.other_user) + self.assertEqual(report["room_id"], self.room_id1) + + def test_valid_search_order(self) -> None: + """ + Testing search order. Order by timestamps. + """ + + # fetch the most recent first, largest timestamp + channel = self.make_request( + "GET", + self.url + "?dir=b", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(len(channel.json_body["room_reports"]), 4) + report = 1 + while report < len(channel.json_body["room_reports"]): + self.assertGreaterEqual( + channel.json_body["room_reports"][report - 1]["received_ts"], + channel.json_body["room_reports"][report]["received_ts"], + ) + report += 1 + + # fetch the oldest first, smallest timestamp + channel = self.make_request( + "GET", + self.url + "?dir=f", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(len(channel.json_body["room_reports"]), 4) + report = 1 + while report < len(channel.json_body["room_reports"]): + self.assertLessEqual( + channel.json_body["room_reports"][report - 1]["received_ts"], + channel.json_body["room_reports"][report]["received_ts"], + ) + report += 1 + + def test_invalid_search_order(self) -> None: + """ + Testing that a invalid search order returns a 400 + """ + + channel = self.make_request( + "GET", + self.url + "?dir=bar", + access_token=self.admin_user_tok, + ) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter 'dir' must be one of ['b', 'f']", + channel.json_body["error"], + ) + + def test_limit_is_negative(self) -> None: + """ + Testing that a negative limit parameter returns a 400 + """ + + channel = self.make_request( + "GET", + self.url + "?limit=-5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + def test_from_is_negative(self) -> None: + """ + Testing that a negative from parameter returns a 400 + """ + + channel = self.make_request( + "GET", + self.url + "?from=-5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + def test_next_token(self) -> None: + """ + Testing that `next_token` appears at the right place + """ + + # `next_token` does not appear + # Number of results is the number of entries + channel = self.make_request( + "GET", + self.url + "?limit=4", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(len(channel.json_body["room_reports"]), 2) + self.assertNotIn("room_reports", channel.json_body) + + # `next_token` does not appear + # Number of max results is larger than the number of entries + channel = self.make_request( + "GET", + self.url + "?limit=5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(len(channel.json_body["room_reports"]), 4) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does appear + # Number of max results is smaller than the number of entries + channel = self.make_request( + "GET", + self.url + "?limit=3", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(len(channel.json_body["room_reports"]), 3) + self.assertEqual(channel.json_body["next_token"], 3) + + # Check + # Set `from` to value of `next_token` for request remaining entries + # `next_token` does not appear + channel = self.make_request( + "GET", + self.url + "?from=3", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], 4) + self.assertEqual(len(channel.json_body["room_reports"]), 1) + self.assertNotIn("next_token", channel.json_body) + + def _report_room(self, room_id: str, user_tok: str) -> None: + """Report a room""" + channel = self.make_request( + "POST", + "rooms/%s/report" % room_id, + {"reason": "this makes me sad"}, + access_token=user_tok, + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + + def _report_room_without_parameters(self, room_id: str, user_tok: str) -> None: + """Report a room, but omit reason""" + channel = self.make_request( + "POST", + "rooms/%s/report" % room_id, + {}, + access_token=user_tok, + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + + def _check_fields(self, content: List[JsonDict]) -> None: + """Checks that all attributes are present in a room report""" + for c in content: + self.assertIn("id", c) + self.assertIn("received_ts", c) + self.assertIn("room_id", c) + self.assertIn("user_id", c) + self.assertIn("canonical_alias", c) + self.assertIn("name", c) + self.assertIn("reason", c) + + def test_count_correct_despite_table_deletions(self) -> None: + """ + Tests that the count matches the number of rows, even if rows in joined tables + are missing. + """ + + # Delete rows from room_stats_state for one of our rooms. + self.get_success( + self.hs.get_datastores().main.db_pool.simple_delete( + "room_stats_state", {"room_id": self.room_id1}, desc="_" + ) + ) + + channel = self.make_request( + "GET", + self.url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + # The 'total' field is 10 because only 10 reports will actually + # be retrievable since we deleted the rows in the room_stats_state + # table. + self.assertEqual(channel.json_body["total"], 2) + # This is consistent with the number of rows actually returned. + self.assertEqual(len(channel.json_body["room_reports"]), 2)