from dataclasses import dataclass from pathlib import Path from typing import Callable, List import psycopg from ty import FilterableNote, Visibility try: import progressbar2 as progressbar except ImportError: import progressbar print("configuring") config = {} exec(Path("config.py").read_text(), config) conn: psycopg.Connection = config["connect"]() criteria: Callable[[FilterableNote], bool] = config["criteria"] intermediate = {} print("parsing") for line in Path("graph.db").read_text().splitlines(): id, replies, quotes, flags = line.split("\t") intermediate[id] = { "id": id, "replies": replies.split(",") if len(replies) > 0 else [], "quotes": quotes.split(",") if len(quotes) > 0 else [], "flags": flags.split(",") if len(flags) > 0 else [], } def transform(entry: dict) -> FilterableNote: note = conn.execute( 'select "createdAt", reactions, "renoteCount", visibility from note where id = %s', [entry["id"]], ).fetchone() if note is None: return None # part of thread disappeared during processing when, reactions, renotes, visibility = note replies = [transform(intermediate[reply]) for reply in entry["replies"]] quotes = [transform(intermediate[quote]) for quote in entry["quotes"]] if None in replies or None in quotes: return None # bubble up, buttercup return FilterableNote( entry["id"], "self" in entry["flags"], replies, quotes, when.astimezone(), sum(reactions.values()), renotes, Visibility.from_db(visibility), ) root_count = 0 for entry in intermediate.values(): if "root" in entry["flags"]: root_count += 1 pb = progressbar.ProgressBar( 0, root_count, prefix="processing ", ) targets = [] for entry in intermediate.values(): if "root" not in entry["flags"]: continue transformed = transform(entry) if transformed is None: continue # we'll get to it next cycle if criteria(transformed): targets.append(entry["id"]) pb.increment() pb.finish() Path("filtered.list").write_text("\n".join(targets))