]> git.aero2k.de Git - urlbot-v3.git/commitdiff
fix searx after stats page has moved
authorThorsten <mail@aero2k.de>
Tue, 25 Feb 2020 18:53:55 +0000 (19:53 +0100)
committerThorsten <mail@aero2k.de>
Tue, 25 Feb 2020 18:53:55 +0000 (19:53 +0100)
distbot/plugins/searx.py

index 3c1ca1aada082f6e29e899f8d4bb7691d82ccda9..90b36705951aae7d6a71971c162e59b45bcd57af 100644 (file)
@@ -14,7 +14,6 @@ from distbot.common.utils import retry
 
 logger = logging.getLogger(__name__)
 
-
 if not hasattr(json, 'JSONDecodeError'):
     json.JSONDecodeError = ValueError
 
@@ -37,19 +36,19 @@ class Searx(Worker):
 
     def __init__(self, actionqueue):
         super().__init__(actionqueue)
-        self.search_list = self.fetch_all_searx_engines()
+        try:
+            self.search_list = self.fetch_all_searx_engines()
+        except RuntimeError as e:
+            # will eventually fix itself
+            logger.exception(e)
+            pass
 
     @staticmethod
     def fetch_all_searx_engines():
-        # response = requests.get("http://stats.searx.oe5tpo.com")
-        response = requests.get("https://stats.searx.xyz")
+        response = requests.get("https://searx.space/data/instances.json")
         response.raise_for_status()
-        tree = etree.XML(
-            response.content,
-            parser=html.HTMLParser()
-        )
-        searxes = [str(x) for x in tree.xpath('//span[text()[contains(.,"200 - OK")]]/../..//a/text()') if
-                   str(x).startswith("http")]
+        searxes = [url for (url, details) in response.json().get("instances").items() if
+                   details.get("error") is None and details.get("network_type") != "tor"]
         logger.info("Registered {} searxes".format(len(searxes)))
         if not searxes:
             raise RuntimeError("not a single searx discovered... " + str(response.content))