From 81071e8feefdf815e29318226c668664e1706da2 Mon Sep 17 00:00:00 2001 From: mia Date: Fri, 26 Jul 2024 09:36:56 -0700 Subject: initial commit --- 1_graph.py | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 1_graph.py (limited to '1_graph.py') diff --git a/1_graph.py b/1_graph.py new file mode 100644 index 0000000..bc8116c --- /dev/null +++ b/1_graph.py @@ -0,0 +1,159 @@ +import json +import sys +from collections import namedtuple +from functools import cache +from pathlib import Path + +import psycopg + +try: + import progressbar2 as progressbar +except ImportError: + import progressbar + + +Note = namedtuple("Note", ["renote_id", "reply_id", "user_id"]) +Tree = namedtuple("Tree", ["id", "replies", "renotes"]) + +print("configuring") +config = {} +exec(Path("config.py").read_text(), config) +conn: psycopg.Connection = config["connect"]() +user_id: str = config["user_id"] +early_exit = config.get("early_exit") + + +print("fetching note ids", file=sys.stderr) +note_ids = set() +cur = conn.execute( + 'select id from note where "userId" = %s and not ("renoteId" is not null and text is null)', + [user_id], +) +while rows := cur.fetchmany(0xFF): + for row in rows: + note_ids.add(row[0]) + if early_exit and len(note_ids) > early_exit: + break + + +@cache +def get_note(id: str) -> Note: + return Note( + *conn.execute( + 'select "renoteId", "replyId", "userId" from note where id = %s', [id] + ).fetchone() + ) + + +roots = {} +trees = {} + + +def tree_init(id: str, seek: bool = True) -> Tree: + if tree := trees.get(id): + return tree + tree = Tree(id, [], []) + note = get_note(id) + if note.reply_id or note.renote_id: + if note.reply_id: + p_tree = tree_init(note.reply_id) + p_tree.replies.append(tree) + if note.renote_id: + r_tree = tree_init(note.renote_id, False) + r_tree.renotes.append(tree) + else: + roots[id] = tree + trees[id] = tree + return tree + + +def make_widgets(msg, trees, roots): + widgets = [ + f"{msg} ", + progressbar.Percentage(), + " ", + progressbar.Bar(), + " ", + progressbar.SimpleProgress("%(value_s)s/%(max_value_s)s"), + " ", + ] + if trees: + widgets += [progressbar.Variable("trees"), " "] + if roots: + widgets += [progressbar.Variable("roots"), " "] + widgets += [progressbar.ETA()] + return widgets + + +pb = progressbar.ProgressBar( + 0, + len(note_ids), + widgets=make_widgets("building trees", True, True), +) +for note_id in note_ids: + tree_init(note_id) + pb.increment(trees=len(trees), roots=len(roots)) +pb.finish() + + +def traverse(tree: Tree): + note = get_note(tree.id) + if note.user_id == user_id: + expand(tree) + else: + for child in tree.replies: + traverse(child) + + +def expand(tree: Tree): + for row in conn.execute( + "select id from note_replies(%s, 1, 1000)", [tree.id] + ).fetchall(): + if row[0] in trees: + continue + note = get_note(row[0]) + new = Tree(row[0], [], []) + if note.reply_id == tree.id: + # is a reply + tree.replies.append(new) + trees[row[0]] = new + if note.renote_id == tree.id: + # is a renote + tree.renotes.append(new) + trees[row[0]] = new + for child in tree.replies: + expand(child) + + +roots_len = len(roots) +pb = progressbar.ProgressBar( + 0, roots_len, widgets=make_widgets("expanding roots", True, False) +) + +for root in roots.values(): + traverse(root) + pb.increment(trees=len(trees)) +pb.finish() + + +with Path("graph.db").open("w") as f: + pb = progressbar.ProgressBar( + 0, len(trees), widgets=make_widgets("saving graph", False, False) + ) + for key, tree in trees.items(): + note = get_note(tree.id) + is_root = tree.id in roots + f.write(f"{tree.id}\t") + f.write(",".join((reply.id for reply in tree.replies))) + f.write(f"\t") + f.write(",".join((renote.id for renote in tree.renotes))) + f.write(f"\t") + flags = [] + if tree.id in roots: + flags.append("root") + if note.user_id == user_id: + flags.append("self") + f.write(",".join(flags)) + f.write(f"\n") + pb.increment() + pb.finish() -- cgit 1.4.1