🗝
summary refs log tree commit diff
path: root/1_graph.py
diff options
context:
space:
mode:
authormia <mia@mia.jetzt>2024-07-26 09:36:56 -0700
committermia <mia@mia.jetzt>2024-07-26 09:36:56 -0700
commit81071e8feefdf815e29318226c668664e1706da2 (patch)
treebda31195ca8018b8c5fe2d6f0286f97fe6bde4c6 /1_graph.py
downloadscrubber-81071e8feefdf815e29318226c668664e1706da2.tar.gz
scrubber-81071e8feefdf815e29318226c668664e1706da2.zip
initial commit
Diffstat (limited to '1_graph.py')
-rw-r--r--1_graph.py159
1 files changed, 159 insertions, 0 deletions
diff --git a/1_graph.py b/1_graph.py
new file mode 100644
index 0000000..bc8116c
--- /dev/null
+++ b/1_graph.py
@@ -0,0 +1,159 @@
+import json
+import sys
+from collections import namedtuple
+from functools import cache
+from pathlib import Path
+
+import psycopg
+
+try:
+    import progressbar2 as progressbar
+except ImportError:
+    import progressbar
+
+
+Note = namedtuple("Note", ["renote_id", "reply_id", "user_id"])
+Tree = namedtuple("Tree", ["id", "replies", "renotes"])
+
+print("configuring")
+config = {}
+exec(Path("config.py").read_text(), config)
+conn: psycopg.Connection = config["connect"]()
+user_id: str = config["user_id"]
+early_exit = config.get("early_exit")
+
+
+print("fetching note ids", file=sys.stderr)
+note_ids = set()
+cur = conn.execute(
+    'select id from note where "userId" = %s and not ("renoteId" is not null and text is null)',
+    [user_id],
+)
+while rows := cur.fetchmany(0xFF):
+    for row in rows:
+        note_ids.add(row[0])
+    if early_exit and len(note_ids) > early_exit:
+        break
+
+
+@cache
+def get_note(id: str) -> Note:
+    return Note(
+        *conn.execute(
+            'select "renoteId", "replyId", "userId" from note where id = %s', [id]
+        ).fetchone()
+    )
+
+
+roots = {}
+trees = {}
+
+
+def tree_init(id: str, seek: bool = True) -> Tree:
+    if tree := trees.get(id):
+        return tree
+    tree = Tree(id, [], [])
+    note = get_note(id)
+    if note.reply_id or note.renote_id:
+        if note.reply_id:
+            p_tree = tree_init(note.reply_id)
+            p_tree.replies.append(tree)
+        if note.renote_id:
+            r_tree = tree_init(note.renote_id, False)
+            r_tree.renotes.append(tree)
+    else:
+        roots[id] = tree
+    trees[id] = tree
+    return tree
+
+
+def make_widgets(msg, trees, roots):
+    widgets = [
+        f"{msg} ",
+        progressbar.Percentage(),
+        " ",
+        progressbar.Bar(),
+        " ",
+        progressbar.SimpleProgress("%(value_s)s/%(max_value_s)s"),
+        " ",
+    ]
+    if trees:
+        widgets += [progressbar.Variable("trees"), " "]
+    if roots:
+        widgets += [progressbar.Variable("roots"), " "]
+    widgets += [progressbar.ETA()]
+    return widgets
+
+
+pb = progressbar.ProgressBar(
+    0,
+    len(note_ids),
+    widgets=make_widgets("building trees", True, True),
+)
+for note_id in note_ids:
+    tree_init(note_id)
+    pb.increment(trees=len(trees), roots=len(roots))
+pb.finish()
+
+
+def traverse(tree: Tree):
+    note = get_note(tree.id)
+    if note.user_id == user_id:
+        expand(tree)
+    else:
+        for child in tree.replies:
+            traverse(child)
+
+
+def expand(tree: Tree):
+    for row in conn.execute(
+        "select id from note_replies(%s, 1, 1000)", [tree.id]
+    ).fetchall():
+        if row[0] in trees:
+            continue
+        note = get_note(row[0])
+        new = Tree(row[0], [], [])
+        if note.reply_id == tree.id:
+            # is a reply
+            tree.replies.append(new)
+            trees[row[0]] = new
+        if note.renote_id == tree.id:
+            # is a renote
+            tree.renotes.append(new)
+            trees[row[0]] = new
+    for child in tree.replies:
+        expand(child)
+
+
+roots_len = len(roots)
+pb = progressbar.ProgressBar(
+    0, roots_len, widgets=make_widgets("expanding roots", True, False)
+)
+
+for root in roots.values():
+    traverse(root)
+    pb.increment(trees=len(trees))
+pb.finish()
+
+
+with Path("graph.db").open("w") as f:
+    pb = progressbar.ProgressBar(
+        0, len(trees), widgets=make_widgets("saving graph", False, False)
+    )
+    for key, tree in trees.items():
+        note = get_note(tree.id)
+        is_root = tree.id in roots
+        f.write(f"{tree.id}\t")
+        f.write(",".join((reply.id for reply in tree.replies)))
+        f.write(f"\t")
+        f.write(",".join((renote.id for renote in tree.renotes)))
+        f.write(f"\t")
+        flags = []
+        if tree.id in roots:
+            flags.append("root")
+        if note.user_id == user_id:
+            flags.append("self")
+        f.write(",".join(flags))
+        f.write(f"\n")
+        pb.increment()
+    pb.finish()