From: Thorsten Date: Sat, 11 Jan 2025 16:32:42 +0000 (+0100) Subject: init X-Git-Url: https://git.aero2k.de/?a=commitdiff_plain;ds=inline;p=dfde%2Ftrace-search.git init --- 23558736f279247a38a8b4b31a1fb3094e8ff820 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2397633 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.idea +session_data.json +posts.csv +threads.csv +*.egg-info diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..f0a3689 --- /dev/null +++ b/Readme.md @@ -0,0 +1,18 @@ +# Install + +``` +mkvirtualenv dfde-trace-search +pip install -e . +``` + +# Run + +``` +cp session_data.json.example session_data.json +vim session_data.json # fill in values from cookie as seen in browser devtools, phpbb3_6cb66_k=83472398472472384723... +python main.py +``` + +# Inspect + +TODO. Read threads.csv, posts.csv diff --git a/main.py b/main.py new file mode 100644 index 0000000..ec61187 --- /dev/null +++ b/main.py @@ -0,0 +1,217 @@ +import json +from dataclasses import dataclass +from enum import Enum +from typing import Optional + +import requests +import re +import csv +from bs4 import BeautifulSoup +from datetime import datetime +import sys +# import logging +# logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) + +@dataclass +class Session: + u: str + k: str + sid: str + + def cookies(self): + prefix = "phpbb3_6cb66_" + return { + prefix + "k": self.k, + prefix + "sid": self.sid, + prefix + "u": self.u, + } + + +@dataclass +class Member: + name: str + uid: int + + +class Mode(Enum): + egosearch = 1 + search_posts_by_id = 2 + search_posts_by_name = 3 + search_threads_by_id = 4 + search_threads_by_name = 5 + + +date_pattern = re.compile(r"(\d{2}.\d{2}.\d{4}, \d{1,2}:\d{2})") + + +def search_phpbb_member(session: Session, member_to_search: Optional[Member], mode: Mode, searcher_username, forum_url): + """ + Searches for a member in a phpBB forum and logs the results. + + Args: + session: The phpbb session attributes + member_to_search: The username to search for. + searcher_username: The username performing the search. + forum_url: The base URL of the phpBB forum. + """ + + search_url = f"{forum_url}/forum/search.php" + cookies = session.cookies() + + # threaded results + if mode in [Mode.egosearch, Mode.search_threads_by_id, Mode.search_threads_by_name]: + headers = [ + "Searched User", "Searcher", "Thread Count", "Last Post ID", "Last Thread Date/Time", "Mode", "Search Timestamp" + ] + csv_filename = "threads.csv" + elif mode in [Mode.search_posts_by_id, Mode.search_posts_by_name]: + headers = [ + "Searched User", "Searcher", "Post Count", "Last Post ID", "Last Post Date/Time", "Mode", "Search Timestamp" + ] + csv_filename = "posts.csv" + else: + raise ValueError("invalid Mode") + + if mode == Mode.egosearch: + params = { + "search_id": "egosearch" + } + elif mode == Mode.search_posts_by_id: + params = { + "author_id": member_to_search.uid, + "sr": "posts" + } + elif mode == Mode.search_posts_by_name: + params = { + "author": member_to_search.name, + "sr": "posts", + } + elif mode == Mode.search_threads_by_id: + params = { + "author_id": member_to_search.uid, + "sr": "threads", + } + elif mode == Mode.search_threads_by_name: + params = { + "author": member_to_search.name, + "sr": "threads", + } + else: + raise ValueError("invalid Mode") + + # Create CSV file with header if it doesn't exist + ensure_csv_file(csv_filename, headers) + + headers = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0", + } + + response = requests.get(search_url, cookies=cookies, headers=headers, params=params) + response.raise_for_status() + # content = b"" + # for chunk in response.iter_content(chunk_size=128): + # content += chunk + # inline + content = b"".join([c for c in response.iter_content(chunk_size=128)]) + + soup = BeautifulSoup(content, 'html.parser') + + # Extract post count + search_results = soup.find('div', class_='pagination') # Check if there are results by checking pagination + if search_results: + result_string = search_results.text + match = re.search(r"(\d+) Treffer", result_string) # Extract the number of posts + count = int(match.group(1)) if match else 0 # If no match, set to 0 + else: + count = 0 + + if mode in [Mode.egosearch, Mode.search_threads_by_id, Mode.search_threads_by_name]: + # thread view + search_element = soup.select_one(".topiclist .row:nth-child(1)") + m = date_pattern.search(search_element.select_one(".lastpost").text) + if not m: + raise ValueError("invalid date/date not found") + else: + datetext = m.group(1) + last_post_createtime = datetime.strptime(datetext, "%d.%m.%Y, %H:%M") + last_item_id = search_element.select_one(".lastpost a[title='Gehe zum letzten Beitrag']")["href"].split("p")[-1] + elif mode in [Mode.search_posts_by_id, Mode.search_posts_by_name]: + search_element = soup.select_one("#page-body .post.search:nth-child(4)") + last_post_createtime = datetime.strptime(search_element.select_one(".search-result-date").text, + '%d.%m.%Y, %H:%M') + last_item_id = search_element.select_one(".postbody h3 a[href]")["href"].split("p")[-1] + else: + raise ValueError("invalid Mode") + + # Append to CSV + with open(csv_filename, 'a', newline='', encoding='utf-8') as csvfile: + writer = csv.writer(csvfile) + writer.writerow( + [searcher_username if mode == Mode.egosearch else member_to_search.name, searcher_username, count, last_item_id, last_post_createtime, + mode, datetime.now().isoformat()]) + + print(f"Search for {member_to_search} by {searcher_username} (mode {mode}) complete. Results appended to {csv_filename}") + + +def main(): + session = Session(**json.load(open("session_data.json"))) + forum_url = "https://debianforum.de" # Replace with your forum URL + searcher_username = "TRex" # Username of the script/bot doing the search + + members_to_search = [ + Member("cosinus", 41132), + Member("Meillo", 13014), + Member("TRex", 18865), + Member("feltel", 2) + ] + + search_phpbb_member( + session=session, + member_to_search=None, + mode=Mode.egosearch, + searcher_username=searcher_username, + forum_url=forum_url + ) + for member in members_to_search: + search_phpbb_member( + session=session, + member_to_search=member, + mode=Mode.search_posts_by_id, + searcher_username=searcher_username, + forum_url=forum_url + ) + search_phpbb_member( + session=session, + member_to_search=member, + mode=Mode.search_posts_by_name, + searcher_username=searcher_username, + forum_url=forum_url + ) + search_phpbb_member( + session=session, + member_to_search=member, + mode=Mode.search_threads_by_id, + searcher_username=searcher_username, + forum_url=forum_url + ) + search_phpbb_member( + session=session, + member_to_search=member, + mode=Mode.search_threads_by_name, + searcher_username=searcher_username, + forum_url=forum_url + ) + + +def ensure_csv_file(csv_filename, headers): + try: + with open(csv_filename, 'x', newline='', + encoding='utf-8') as csvfile: # x flag for exclusive creation, prevents overwriting + writer = csv.writer(csvfile) + writer.writerow(headers) + except FileExistsError: + pass # File exists, no need to create header + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e979d05 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "phpbb_member_search" # Choose a suitable name +version = "0.1.0" # Initial version +description = "A script to search for members in a phpBB forum and log results." +readme = "README.md" # If you have a README +authors = [{ name = "Your Name", email = "your.email@example.com" }] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", # Or your preferred license + "Operating System :: OS Independent", +] +dependencies = [ + "requests", + "beautifulsoup4", +] + +[project.optional-dependencies] +dev = [ + "flake8", + "mypy", + "pytest", +] + +[project.scripts] +phpbb-search = "your_script_filename:main" # Replace your_script_filename with the actual filename (without .py) and main with the function to be called from the command line + +[tool.setuptools] +# You can add more setuptools specific options here if needed. +# For example, to include data files: +# package-data = {"your_package": ["*.txt", "*.dat"]} + +[tool.setuptools.packages] +find = {} # Automatically find packages in your project \ No newline at end of file diff --git a/session_data.json.example b/session_data.json.example new file mode 100644 index 0000000..7facbb9 --- /dev/null +++ b/session_data.json.example @@ -0,0 +1,5 @@ +{ + "k": "fillme", + "sid": "fillme", + "u": "fillme" +} \ No newline at end of file