diff options
author | mia <mia@mia.jetzt> | 2024-07-26 09:36:56 -0700 |
---|---|---|
committer | mia <mia@mia.jetzt> | 2024-07-26 09:36:56 -0700 |
commit | 81071e8feefdf815e29318226c668664e1706da2 (patch) | |
tree | bda31195ca8018b8c5fe2d6f0286f97fe6bde4c6 /2_filter.py | |
download | scrubber-81071e8feefdf815e29318226c668664e1706da2.tar.gz scrubber-81071e8feefdf815e29318226c668664e1706da2.zip |
initial commit
Diffstat (limited to '2_filter.py')
-rw-r--r-- | 2_filter.py | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/2_filter.py b/2_filter.py new file mode 100644 index 0000000..816e762 --- /dev/null +++ b/2_filter.py @@ -0,0 +1,84 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, List + +import psycopg + +from ty import FilterableNote, Visibility + +try: + import progressbar2 as progressbar +except ImportError: + import progressbar + + +print("configuring") +config = {} +exec(Path("config.py").read_text(), config) +conn: psycopg.Connection = config["connect"]() +criteria: Callable[[FilterableNote], bool] = config["criteria"] + +intermediate = {} + +print("parsing") +for line in Path("graph.db").read_text().splitlines(): + id, replies, quotes, flags = line.split("\t") + intermediate[id] = { + "id": id, + "replies": replies.split(",") if len(replies) > 0 else [], + "quotes": quotes.split(",") if len(quotes) > 0 else [], + "flags": flags.split(",") if len(flags) > 0 else [], + } + + +def transform(entry: dict) -> FilterableNote: + note = conn.execute( + 'select "createdAt", reactions, "renoteCount", visibility from note where id = %s', + [entry["id"]], + ).fetchone() + if note is None: + return None # part of thread disappeared during processing + when, reactions, renotes, visibility = note + + replies = [transform(intermediate[reply]) for reply in entry["replies"]] + quotes = [transform(intermediate[quote]) for quote in entry["quotes"]] + if None in replies or None in quotes: + return None # bubble up, buttercup + + return FilterableNote( + entry["id"], + "self" in entry["flags"], + replies, + quotes, + when.astimezone(), + sum(reactions.values()), + renotes, + Visibility.from_db(visibility), + ) + + +root_count = 0 +for entry in intermediate.values(): + if "root" in entry["flags"]: + root_count += 1 + + +pb = progressbar.ProgressBar( + 0, + root_count, + prefix="processing ", +) +targets = [] +for entry in intermediate.values(): + if "root" not in entry["flags"]: + continue + transformed = transform(entry) + if transformed is None: + continue # we'll get to it next cycle + if criteria(transformed): + targets.append(entry["id"]) + pb.increment() +pb.finish() + + +Path("filtered.list").write_text("\n".join(targets)) |