mirror of
https://github.com/element-hq/synapse.git
synced 2024-12-14 11:57:44 +00:00
Merge pull request #721 from matrix-org/erikj/spider
Sanitize the optional dependencies for spider API
This commit is contained in:
commit
ceeb5b909f
4 changed files with 42 additions and 39 deletions
|
@ -13,10 +13,24 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from ._base import Config
|
||||
from ._base import Config, ConfigError
|
||||
from collections import namedtuple
|
||||
|
||||
import sys
|
||||
|
||||
MISSING_NETADDR = (
|
||||
"Missing netaddr library. This is required for URL preview API."
|
||||
)
|
||||
|
||||
MISSING_LXML = (
|
||||
"""Missing lxml library. This is required for URL preview API.
|
||||
|
||||
Install by running:
|
||||
pip install lxml
|
||||
|
||||
Requires libxslt1-dev system package.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
ThumbnailRequirement = namedtuple(
|
||||
"ThumbnailRequirement", ["width", "height", "method", "media_type"]
|
||||
|
@ -62,18 +76,32 @@ class ContentRepositoryConfig(Config):
|
|||
self.thumbnail_requirements = parse_thumbnail_requirements(
|
||||
config["thumbnail_sizes"]
|
||||
)
|
||||
self.url_preview_enabled = config["url_preview_enabled"]
|
||||
self.url_preview_enabled = config.get("url_preview_enabled", False)
|
||||
if self.url_preview_enabled:
|
||||
try:
|
||||
from netaddr import IPSet
|
||||
if "url_preview_ip_range_blacklist" in config:
|
||||
self.url_preview_ip_range_blacklist = IPSet(
|
||||
config["url_preview_ip_range_blacklist"]
|
||||
)
|
||||
if "url_preview_url_blacklist" in config:
|
||||
self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
|
||||
import lxml
|
||||
lxml # To stop unused lint.
|
||||
except ImportError:
|
||||
sys.stderr.write("\nmissing netaddr dep - disabling preview_url API\n")
|
||||
raise ConfigError(MISSING_LXML)
|
||||
|
||||
try:
|
||||
from netaddr import IPSet
|
||||
except ImportError:
|
||||
raise ConfigError(MISSING_NETADDR)
|
||||
|
||||
if "url_preview_ip_range_blacklist" in config:
|
||||
self.url_preview_ip_range_blacklist = IPSet(
|
||||
config["url_preview_ip_range_blacklist"]
|
||||
)
|
||||
else:
|
||||
raise ConfigError(
|
||||
"For security, you must specify an explicit target IP address "
|
||||
"blacklist in url_preview_ip_range_blacklist for url previewing "
|
||||
"to work"
|
||||
)
|
||||
|
||||
if "url_preview_url_blacklist" in config:
|
||||
self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
|
||||
|
||||
def default_config(self, **kwargs):
|
||||
media_store = self.default_path("media_store")
|
||||
|
|
|
@ -43,7 +43,6 @@ CONDITIONAL_REQUIREMENTS = {
|
|||
"matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"],
|
||||
},
|
||||
"preview_url": {
|
||||
"lxml>=3.6.0": ["lxml"],
|
||||
"netaddr>=0.7.18": ["netaddr"],
|
||||
},
|
||||
}
|
||||
|
|
|
@ -80,8 +80,4 @@ class MediaRepositoryResource(Resource):
|
|||
self.putChild("thumbnail", ThumbnailResource(hs, filepaths))
|
||||
self.putChild("identicon", IdenticonResource())
|
||||
if hs.config.url_preview_enabled:
|
||||
try:
|
||||
self.putChild("preview_url", PreviewUrlResource(hs, filepaths))
|
||||
except Exception as e:
|
||||
logger.warn("Failed to mount preview_url")
|
||||
logger.exception(e)
|
||||
self.putChild("preview_url", PreviewUrlResource(hs, filepaths))
|
||||
|
|
|
@ -40,33 +40,11 @@ import ujson as json
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from lxml import html
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class PreviewUrlResource(BaseMediaResource):
|
||||
isLeaf = True
|
||||
|
||||
def __init__(self, hs, filepaths):
|
||||
try:
|
||||
if html:
|
||||
pass
|
||||
except:
|
||||
raise RuntimeError("Disabling PreviewUrlResource as lxml not available")
|
||||
|
||||
if not hasattr(hs.config, "url_preview_ip_range_blacklist"):
|
||||
logger.warn(
|
||||
"For security, you must specify an explicit target IP address "
|
||||
"blacklist in url_preview_ip_range_blacklist for url previewing "
|
||||
"to work"
|
||||
)
|
||||
raise RuntimeError(
|
||||
"Disabling PreviewUrlResource as "
|
||||
"url_preview_ip_range_blacklist not specified"
|
||||
)
|
||||
|
||||
BaseMediaResource.__init__(self, hs, filepaths)
|
||||
self.client = SpiderHttpClient(hs)
|
||||
if hasattr(hs.config, "url_preview_url_blacklist"):
|
||||
|
@ -201,6 +179,8 @@ class PreviewUrlResource(BaseMediaResource):
|
|||
elif self._is_html(media_info['media_type']):
|
||||
# TODO: somehow stop a big HTML tree from exploding synapse's RAM
|
||||
|
||||
from lxml import html
|
||||
|
||||
try:
|
||||
tree = html.parse(media_info['filename'])
|
||||
og = yield self._calc_og(tree, media_info, requester)
|
||||
|
|
Loading…
Reference in a new issue