🗝
summary refs log tree commit diff
path: root/2_filter.py
diff options
context:
space:
mode:
authormia <mia@mia.jetzt>2024-07-26 09:36:56 -0700
committermia <mia@mia.jetzt>2024-07-26 09:36:56 -0700
commit81071e8feefdf815e29318226c668664e1706da2 (patch)
treebda31195ca8018b8c5fe2d6f0286f97fe6bde4c6 /2_filter.py
downloadscrubber-81071e8feefdf815e29318226c668664e1706da2.tar.gz
scrubber-81071e8feefdf815e29318226c668664e1706da2.zip
initial commit
Diffstat (limited to '2_filter.py')
-rw-r--r--2_filter.py84
1 files changed, 84 insertions, 0 deletions
diff --git a/2_filter.py b/2_filter.py
new file mode 100644
index 0000000..816e762
--- /dev/null
+++ b/2_filter.py
@@ -0,0 +1,84 @@
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable, List
+
+import psycopg
+
+from ty import FilterableNote, Visibility
+
+try:
+    import progressbar2 as progressbar
+except ImportError:
+    import progressbar
+
+
+print("configuring")
+config = {}
+exec(Path("config.py").read_text(), config)
+conn: psycopg.Connection = config["connect"]()
+criteria: Callable[[FilterableNote], bool] = config["criteria"]
+
+intermediate = {}
+
+print("parsing")
+for line in Path("graph.db").read_text().splitlines():
+    id, replies, quotes, flags = line.split("\t")
+    intermediate[id] = {
+        "id": id,
+        "replies": replies.split(",") if len(replies) > 0 else [],
+        "quotes": quotes.split(",") if len(quotes) > 0 else [],
+        "flags": flags.split(",") if len(flags) > 0 else [],
+    }
+
+
+def transform(entry: dict) -> FilterableNote:
+    note = conn.execute(
+        'select "createdAt", reactions, "renoteCount", visibility from note where id = %s',
+        [entry["id"]],
+    ).fetchone()
+    if note is None:
+        return None # part of thread disappeared during processing
+    when, reactions, renotes, visibility = note
+
+    replies = [transform(intermediate[reply]) for reply in entry["replies"]]
+    quotes = [transform(intermediate[quote]) for quote in entry["quotes"]]
+    if None in replies or None in quotes:
+        return None # bubble up, buttercup
+
+    return FilterableNote(
+        entry["id"],
+        "self" in entry["flags"],
+        replies,
+        quotes,
+        when.astimezone(),
+        sum(reactions.values()),
+        renotes,
+        Visibility.from_db(visibility),
+    )
+
+
+root_count = 0
+for entry in intermediate.values():
+    if "root" in entry["flags"]:
+        root_count += 1
+
+
+pb = progressbar.ProgressBar(
+    0,
+    root_count,
+    prefix="processing ",
+)
+targets = []
+for entry in intermediate.values():
+    if "root" not in entry["flags"]:
+        continue
+    transformed = transform(entry)
+    if transformed is None:
+        continue # we'll get to it next cycle
+    if criteria(transformed):
+        targets.append(entry["id"])
+    pb.increment()
+pb.finish()
+
+
+Path("filtered.list").write_text("\n".join(targets))