From: Thorsten <mail@aero2k.de>
Date: Thu, 11 Feb 2021 16:40:46 +0000 (+0100)
Subject: tokenization did require changes as httpsyoutu is not exactly the searched token
X-Git-Url: https://git.aero2k.de/?a=commitdiff_plain;h=1f5b8fbb96e10ca0c45fd099aa037dbfb58695ee;p=urlbot-v3.git

tokenization did require changes as httpsyoutu is not exactly the searched token
---

diff --git a/distbot/plugins/url.py b/distbot/plugins/url.py
index 6b41dc9..802dee2 100644
--- a/distbot/plugins/url.py
+++ b/distbot/plugins/url.py
@@ -89,6 +89,9 @@ class URLResolver(Worker):
 
         out = []
         for url in result[:10]:
+            if "https://youtu" in url:
+                # does not work, another plugins does the job.
+                continue
             if any([re.match(b, url) for b in url_blacklist]):
                 logger.info('url blacklist match for ' + url)
                 break
diff --git a/distbot/plugins/youtube.py b/distbot/plugins/youtube.py
index 0587a8f..4ea3aa0 100644
--- a/distbot/plugins/youtube.py
+++ b/distbot/plugins/youtube.py
@@ -1,3 +1,4 @@
+import logging
 import re
 
 import requests
@@ -5,16 +6,11 @@ import requests
 from distbot.bot.worker import Worker
 from distbot.common.action import Action
 
+log = logging.getLogger(__name__)
+
 
 class Youtube(Worker):
-    """
-    # approach 1: https://www.googleapis.com/youtube/v3/videos?part=snippet&id={YOUTUBE_VIDEO_ID}&fields=items(id,snippet)&key={YOUR_API_KEY}
-    # approach 2 (without key): https://www.youtube.com/oembed?url=http://youtube.com/watch?v={YOUTUBE_VIDEO_ID}&format=json
-    """
-    binding_keys = [
-        "#.youtube.#", "#.youtu.#",
-        "#.youtube.#.nospoiler.#", "#.youtu.#.nospoiler.#",
-    ]
+    binding_keys = Worker.CATCH_ALL
     description = "resolves titles of posted youtube URLs"
 
     URL_TEMPLATE = "https://www.youtube.com/oembed?url=http://youtube.com/watch?v={YOUTUBE_VIDEO_ID}&format=json"
@@ -26,6 +22,7 @@ class Youtube(Worker):
         regex = re.compile(regex)
         result = regex.search(body)
         if not result:
+            log.warning("Could not extract youtube video ID from: %s", body)
             return None
         else:
             return result.groupdict().get("youtubeid", None)
@@ -36,6 +33,11 @@ class Youtube(Worker):
         return response.json().get("title")
 
     def parse_body(self, msg):
+
+        result = re.findall(r'(https?://youtu[^\s>]+)', msg["body"])
+        if not result or "(nospoiler)" in msg["body"]:
+            return
+
         try:
             youtube_id = self.get_youtube_id_from_url(msg["body"])
             if not youtube_id: