From: Thorsten Date: Thu, 11 Feb 2021 16:40:46 +0000 (+0100) Subject: tokenization did require changes as httpsyoutu is not exactly the searched token X-Git-Url: https://git.aero2k.de/?a=commitdiff_plain;h=1f5b8fbb96e10ca0c45fd099aa037dbfb58695ee;p=urlbot-v3.git tokenization did require changes as httpsyoutu is not exactly the searched token --- diff --git a/distbot/plugins/url.py b/distbot/plugins/url.py index 6b41dc9..802dee2 100644 --- a/distbot/plugins/url.py +++ b/distbot/plugins/url.py @@ -89,6 +89,9 @@ class URLResolver(Worker): out = [] for url in result[:10]: + if "https://youtu" in url: + # does not work, another plugins does the job. + continue if any([re.match(b, url) for b in url_blacklist]): logger.info('url blacklist match for ' + url) break diff --git a/distbot/plugins/youtube.py b/distbot/plugins/youtube.py index 0587a8f..4ea3aa0 100644 --- a/distbot/plugins/youtube.py +++ b/distbot/plugins/youtube.py @@ -1,3 +1,4 @@ +import logging import re import requests @@ -5,16 +6,11 @@ import requests from distbot.bot.worker import Worker from distbot.common.action import Action +log = logging.getLogger(__name__) + class Youtube(Worker): - """ - # approach 1: https://www.googleapis.com/youtube/v3/videos?part=snippet&id={YOUTUBE_VIDEO_ID}&fields=items(id,snippet)&key={YOUR_API_KEY} - # approach 2 (without key): https://www.youtube.com/oembed?url=http://youtube.com/watch?v={YOUTUBE_VIDEO_ID}&format=json - """ - binding_keys = [ - "#.youtube.#", "#.youtu.#", - "#.youtube.#.nospoiler.#", "#.youtu.#.nospoiler.#", - ] + binding_keys = Worker.CATCH_ALL description = "resolves titles of posted youtube URLs" URL_TEMPLATE = "https://www.youtube.com/oembed?url=http://youtube.com/watch?v={YOUTUBE_VIDEO_ID}&format=json" @@ -26,6 +22,7 @@ class Youtube(Worker): regex = re.compile(regex) result = regex.search(body) if not result: + log.warning("Could not extract youtube video ID from: %s", body) return None else: return result.groupdict().get("youtubeid", None) @@ -36,6 +33,11 @@ class Youtube(Worker): return response.json().get("title") def parse_body(self, msg): + + result = re.findall(r'(https?://youtu[^\s>]+)', msg["body"]) + if not result or "(nospoiler)" in msg["body"]: + return + try: youtube_id = self.get_youtube_id_from_url(msg["body"]) if not youtube_id: