Include method in thumbnail media name (#7124)

This fixes an issue where different methods (crop/scale) overwrite each other.

This first tries the new path. If that fails and we are looking for a
remote thumbnail, it tries the old path. If that still isn't found, it
continues as normal.

This should probably be removed in the future, after some of the newer
thumbnails were generated with the new path on most deployments. Then
the overhead should be minimal if the other thumbnails need to be
regenerated.

Signed-off-by: Nicolas Werner <nicolas.werner@hotmail.de>
This commit is contained in:
DeepBlueV7.X 2020-09-08 16:19:50 +00:00 committed by GitHub
parent 63c0e9e195
commit 560f3b8609
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 181 additions and 1 deletions

1
changelog.d/7124.bugfix Normal file
View file

@ -0,0 +1 @@
Fix a bug in the media repository where remote thumbnails with the same size but different crop methods would overwrite each other. Contributed by @deepbluev7.

View file

@ -80,7 +80,7 @@ class MediaFilePaths:
self, server_name, file_id, width, height, content_type, method
):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
return os.path.join(
"remote_thumbnail",
server_name,
@ -92,6 +92,23 @@ class MediaFilePaths:
remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel)
# Legacy path that was used to store thumbnails previously.
# Should be removed after some time, when most of the thumbnails are stored
# using the new path.
def remote_media_thumbnail_rel_legacy(
self, server_name, file_id, width, height, content_type
):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
return os.path.join(
"remote_thumbnail",
server_name,
file_id[0:2],
file_id[2:4],
file_id[4:],
file_name,
)
def remote_media_thumbnail_dir(self, server_name, file_id):
return os.path.join(
self.base_path,

View file

@ -147,6 +147,20 @@ class MediaStorage:
if os.path.exists(local_path):
return FileResponder(open(local_path, "rb"))
# Fallback for paths without method names
# Should be removed in the future
if file_info.thumbnail and file_info.server_name:
legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy(
server_name=file_info.server_name,
file_id=file_info.file_id,
width=file_info.thumbnail_width,
height=file_info.thumbnail_height,
content_type=file_info.thumbnail_type,
)
legacy_local_path = os.path.join(self.local_media_directory, legacy_path)
if os.path.exists(legacy_local_path):
return FileResponder(open(legacy_local_path, "rb"))
for provider in self.storage_providers:
res = await provider.fetch(path, file_info) # type: Any
if res:
@ -170,6 +184,20 @@ class MediaStorage:
if os.path.exists(local_path):
return local_path
# Fallback for paths without method names
# Should be removed in the future
if file_info.thumbnail and file_info.server_name:
legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy(
server_name=file_info.server_name,
file_id=file_info.file_id,
width=file_info.thumbnail_width,
height=file_info.thumbnail_height,
content_type=file_info.thumbnail_type,
)
legacy_local_path = os.path.join(self.local_media_directory, legacy_path)
if os.path.exists(legacy_local_path):
return legacy_local_path
dirname = os.path.dirname(local_path)
if not os.path.exists(dirname):
os.makedirs(dirname)

View file

@ -17,6 +17,10 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import DatabasePool
BG_UPDATE_REMOVE_MEDIA_REPO_INDEX_WITHOUT_METHOD = (
"media_repository_drop_index_wo_method"
)
class MediaRepositoryBackgroundUpdateStore(SQLBaseStore):
def __init__(self, database: DatabasePool, db_conn, hs):
@ -32,6 +36,59 @@ class MediaRepositoryBackgroundUpdateStore(SQLBaseStore):
where_clause="url_cache IS NOT NULL",
)
# The following the updates add the method to the unique constraint of
# the thumbnail databases. That fixes an issue, where thumbnails of the
# same resolution, but different methods could overwrite one another.
# This can happen with custom thumbnail configs or with dynamic thumbnailing.
self.db_pool.updates.register_background_index_update(
update_name="local_media_repository_thumbnails_method_idx",
index_name="local_media_repository_thumbn_media_id_width_height_method_key",
table="local_media_repository_thumbnails",
columns=[
"media_id",
"thumbnail_width",
"thumbnail_height",
"thumbnail_type",
"thumbnail_method",
],
unique=True,
)
self.db_pool.updates.register_background_index_update(
update_name="remote_media_repository_thumbnails_method_idx",
index_name="remote_media_repository_thumbn_media_origin_id_width_height_method_key",
table="remote_media_cache_thumbnails",
columns=[
"media_origin",
"media_id",
"thumbnail_width",
"thumbnail_height",
"thumbnail_type",
"thumbnail_method",
],
unique=True,
)
self.db_pool.updates.register_background_update_handler(
BG_UPDATE_REMOVE_MEDIA_REPO_INDEX_WITHOUT_METHOD,
self._drop_media_index_without_method,
)
async def _drop_media_index_without_method(self, progress, batch_size):
def f(txn):
txn.execute(
"ALTER TABLE local_media_repository_thumbnails DROP CONSTRAINT IF EXISTS local_media_repository_thumbn_media_id_thumbnail_width_thum_key"
)
txn.execute(
"ALTER TABLE remote_media_cache_thumbnails DROP CONSTRAINT IF EXISTS remote_media_repository_thumbn_media_id_thumbnail_width_thum_key"
)
await self.db_pool.runInteraction("drop_media_indices_without_method", f)
await self.db_pool.updates._end_background_update(
BG_UPDATE_REMOVE_MEDIA_REPO_INDEX_WITHOUT_METHOD
)
return 1
class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
"""Persistence for attachments and avatars"""

View file

@ -0,0 +1,33 @@
/* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This adds the method to the unique key constraint of the thumbnail databases.
* Otherwise you can't have a scaled and a cropped thumbnail with the same
* resolution, which happens quite often with dynamic thumbnailing.
* This is the postgres specific migration modifying the table with a background
* migration.
*/
-- add new index that includes method to local media
INSERT INTO background_updates (update_name, progress_json) VALUES
('local_media_repository_thumbnails_method_idx', '{}');
-- add new index that includes method to remote media
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('remote_media_repository_thumbnails_method_idx', '{}', 'local_media_repository_thumbnails_method_idx');
-- drop old index
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('media_repository_drop_index_wo_method', '{}', 'remote_media_repository_thumbnails_method_idx');

View file

@ -0,0 +1,44 @@
/* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This adds the method to the unique key constraint of the thumbnail databases.
* Otherwise you can't have a scaled and a cropped thumbnail with the same
* resolution, which happens quite often with dynamic thumbnailing.
* This is a sqlite specific migration, since sqlite can't modify the unique
* constraint of a table without recreating it.
*/
CREATE TABLE local_media_repository_thumbnails_new ( media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_type TEXT, thumbnail_method TEXT, thumbnail_length INTEGER, UNIQUE ( media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method ) );
INSERT INTO local_media_repository_thumbnails_new
SELECT media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method, thumbnail_length
FROM local_media_repository_thumbnails;
DROP TABLE local_media_repository_thumbnails;
ALTER TABLE local_media_repository_thumbnails_new RENAME TO local_media_repository_thumbnails;
CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails (media_id);
CREATE TABLE IF NOT EXISTS remote_media_cache_thumbnails_new ( media_origin TEXT, media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_method TEXT, thumbnail_type TEXT, thumbnail_length INTEGER, filesystem_id TEXT, UNIQUE ( media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method ) );
INSERT INTO remote_media_cache_thumbnails_new
SELECT media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_method, thumbnail_type, thumbnail_length, filesystem_id
FROM remote_media_cache_thumbnails;
DROP TABLE remote_media_cache_thumbnails;
ALTER TABLE remote_media_cache_thumbnails_new RENAME TO remote_media_cache_thumbnails;