]> git.aero2k.de Git - dfde/trace-search.git/commitdiff
init master
authorThorsten <mail@aero2k.de>
Sat, 11 Jan 2025 16:32:42 +0000 (17:32 +0100)
committerThorsten <mail@aero2k.de>
Sat, 11 Jan 2025 16:32:42 +0000 (17:32 +0100)
.gitignore [new file with mode: 0644]
Readme.md [new file with mode: 0644]
main.py [new file with mode: 0644]
pyproject.toml [new file with mode: 0644]
session_data.json.example [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..2397633
--- /dev/null
@@ -0,0 +1,5 @@
+.idea
+session_data.json
+posts.csv
+threads.csv
+*.egg-info
diff --git a/Readme.md b/Readme.md
new file mode 100644 (file)
index 0000000..f0a3689
--- /dev/null
+++ b/Readme.md
@@ -0,0 +1,18 @@
+# Install
+
+```
+mkvirtualenv dfde-trace-search
+pip install -e .
+```
+
+# Run
+
+```
+cp session_data.json.example session_data.json
+vim session_data.json # fill in values from cookie as seen in browser devtools, phpbb3_6cb66_k=83472398472472384723...
+python main.py
+```
+
+# Inspect
+
+TODO. Read threads.csv, posts.csv
diff --git a/main.py b/main.py
new file mode 100644 (file)
index 0000000..ec61187
--- /dev/null
+++ b/main.py
@@ -0,0 +1,217 @@
+import json
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional
+
+import requests
+import re
+import csv
+from bs4 import BeautifulSoup
+from datetime import datetime
+import sys
+# import logging
+# logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
+
+@dataclass
+class Session:
+    u: str
+    k: str
+    sid: str
+
+    def cookies(self):
+        prefix = "phpbb3_6cb66_"
+        return {
+            prefix + "k": self.k,
+            prefix + "sid": self.sid,
+            prefix + "u": self.u,
+        }
+
+
+@dataclass
+class Member:
+    name: str
+    uid: int
+
+
+class Mode(Enum):
+    egosearch = 1
+    search_posts_by_id = 2
+    search_posts_by_name = 3
+    search_threads_by_id = 4
+    search_threads_by_name = 5
+
+
+date_pattern = re.compile(r"(\d{2}.\d{2}.\d{4}, \d{1,2}:\d{2})")
+
+
+def search_phpbb_member(session: Session, member_to_search: Optional[Member], mode: Mode, searcher_username, forum_url):
+    """
+    Searches for a member in a phpBB forum and logs the results.
+
+    Args:
+        session: The phpbb session attributes
+        member_to_search: The username to search for.
+        searcher_username: The username performing the search.
+        forum_url: The base URL of the phpBB forum.
+    """
+
+    search_url = f"{forum_url}/forum/search.php"
+    cookies = session.cookies()
+
+    # threaded results
+    if mode in [Mode.egosearch, Mode.search_threads_by_id, Mode.search_threads_by_name]:
+        headers = [
+            "Searched User", "Searcher", "Thread Count", "Last Post ID", "Last Thread Date/Time", "Mode", "Search Timestamp"
+        ]
+        csv_filename = "threads.csv"
+    elif mode in [Mode.search_posts_by_id, Mode.search_posts_by_name]:
+        headers = [
+            "Searched User", "Searcher", "Post Count", "Last Post ID", "Last Post Date/Time", "Mode", "Search Timestamp"
+        ]
+        csv_filename = "posts.csv"
+    else:
+        raise ValueError("invalid Mode")
+
+    if mode == Mode.egosearch:
+        params = {
+            "search_id": "egosearch"
+        }
+    elif mode == Mode.search_posts_by_id:
+        params = {
+            "author_id": member_to_search.uid,
+            "sr": "posts"
+        }
+    elif mode == Mode.search_posts_by_name:
+        params = {
+            "author": member_to_search.name,
+            "sr": "posts",
+        }
+    elif mode == Mode.search_threads_by_id:
+        params = {
+            "author_id": member_to_search.uid,
+            "sr": "threads",
+        }
+    elif mode == Mode.search_threads_by_name:
+        params = {
+            "author": member_to_search.name,
+            "sr": "threads",
+        }
+    else:
+        raise ValueError("invalid Mode")
+
+    # Create CSV file with header if it doesn't exist
+    ensure_csv_file(csv_filename, headers)
+
+    headers = {
+        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0",
+    }
+
+    response = requests.get(search_url, cookies=cookies, headers=headers, params=params)
+    response.raise_for_status()
+    # content = b""
+    # for chunk in response.iter_content(chunk_size=128):
+    #     content += chunk
+    # inline
+    content = b"".join([c for c in response.iter_content(chunk_size=128)])
+
+    soup = BeautifulSoup(content, 'html.parser')
+
+    # Extract post count
+    search_results = soup.find('div', class_='pagination')  # Check if there are results by checking pagination
+    if search_results:
+        result_string = search_results.text
+        match = re.search(r"(\d+) Treffer", result_string)  # Extract the number of posts
+        count = int(match.group(1)) if match else 0  # If no match, set to 0
+    else:
+        count = 0
+
+    if mode in [Mode.egosearch, Mode.search_threads_by_id, Mode.search_threads_by_name]:
+        # thread view
+        search_element = soup.select_one(".topiclist .row:nth-child(1)")
+        m = date_pattern.search(search_element.select_one(".lastpost").text)
+        if not m:
+            raise ValueError("invalid date/date not found")
+        else:
+            datetext = m.group(1)
+            last_post_createtime = datetime.strptime(datetext, "%d.%m.%Y, %H:%M")
+        last_item_id = search_element.select_one(".lastpost a[title='Gehe zum letzten Beitrag']")["href"].split("p")[-1]
+    elif mode in [Mode.search_posts_by_id, Mode.search_posts_by_name]:
+        search_element = soup.select_one("#page-body .post.search:nth-child(4)")
+        last_post_createtime = datetime.strptime(search_element.select_one(".search-result-date").text,
+                                                 '%d.%m.%Y, %H:%M')
+        last_item_id = search_element.select_one(".postbody h3 a[href]")["href"].split("p")[-1]
+    else:
+        raise ValueError("invalid Mode")
+
+    # Append to CSV
+    with open(csv_filename, 'a', newline='', encoding='utf-8') as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(
+            [searcher_username if mode == Mode.egosearch else member_to_search.name, searcher_username, count, last_item_id, last_post_createtime,
+             mode, datetime.now().isoformat()])
+
+    print(f"Search for {member_to_search} by {searcher_username} (mode {mode}) complete. Results appended to {csv_filename}")
+
+
+def main():
+    session = Session(**json.load(open("session_data.json")))
+    forum_url = "https://debianforum.de"  # Replace with your forum URL
+    searcher_username = "TRex"  # Username of the script/bot doing the search
+
+    members_to_search = [
+        Member("cosinus", 41132),
+        Member("Meillo", 13014),
+        Member("TRex", 18865),
+        Member("feltel", 2)
+    ]
+
+    search_phpbb_member(
+        session=session,
+        member_to_search=None,
+        mode=Mode.egosearch,
+        searcher_username=searcher_username,
+        forum_url=forum_url
+    )
+    for member in members_to_search:
+        search_phpbb_member(
+            session=session,
+            member_to_search=member,
+            mode=Mode.search_posts_by_id,
+            searcher_username=searcher_username,
+            forum_url=forum_url
+        )
+        search_phpbb_member(
+            session=session,
+            member_to_search=member,
+            mode=Mode.search_posts_by_name,
+            searcher_username=searcher_username,
+            forum_url=forum_url
+        )
+        search_phpbb_member(
+            session=session,
+            member_to_search=member,
+            mode=Mode.search_threads_by_id,
+            searcher_username=searcher_username,
+            forum_url=forum_url
+        )
+        search_phpbb_member(
+            session=session,
+            member_to_search=member,
+            mode=Mode.search_threads_by_name,
+            searcher_username=searcher_username,
+            forum_url=forum_url
+        )
+
+
+def ensure_csv_file(csv_filename, headers):
+    try:
+        with open(csv_filename, 'x', newline='',
+                  encoding='utf-8') as csvfile:  # x flag for exclusive creation, prevents overwriting
+            writer = csv.writer(csvfile)
+            writer.writerow(headers)
+    except FileExistsError:
+        pass  # File exists, no need to create header
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644 (file)
index 0000000..e979d05
--- /dev/null
@@ -0,0 +1,37 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "phpbb_member_search"  # Choose a suitable name
+version = "0.1.0"  # Initial version
+description = "A script to search for members in a phpBB forum and log results."
+readme = "README.md"  # If you have a README
+authors = [{ name = "Your Name", email = "your.email@example.com" }]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",  # Or your preferred license
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "requests",
+    "beautifulsoup4",
+]
+
+[project.optional-dependencies]
+dev = [
+    "flake8",
+    "mypy",
+    "pytest",
+]
+
+[project.scripts]
+phpbb-search = "your_script_filename:main" # Replace your_script_filename with the actual filename (without .py) and main with the function to be called from the command line
+
+[tool.setuptools]
+# You can add more setuptools specific options here if needed.
+# For example, to include data files:
+# package-data = {"your_package": ["*.txt", "*.dat"]}
+
+[tool.setuptools.packages]
+find = {} # Automatically find packages in your project
\ No newline at end of file
diff --git a/session_data.json.example b/session_data.json.example
new file mode 100644 (file)
index 0000000..7facbb9
--- /dev/null
@@ -0,0 +1,5 @@
+{
+  "k": "fillme",
+  "sid": "fillme",
+  "u": "fillme"
+}
\ No newline at end of file