🗝
summary refs log tree commit diff
path: root/2_filter.py
blob: 816e762f56ece72f98a39a2217cddbf3e8495c13 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, List

import psycopg

from ty import FilterableNote, Visibility

try:
    import progressbar2 as progressbar
except ImportError:
    import progressbar


print("configuring")
config = {}
exec(Path("config.py").read_text(), config)
conn: psycopg.Connection = config["connect"]()
criteria: Callable[[FilterableNote], bool] = config["criteria"]

intermediate = {}

print("parsing")
for line in Path("graph.db").read_text().splitlines():
    id, replies, quotes, flags = line.split("\t")
    intermediate[id] = {
        "id": id,
        "replies": replies.split(",") if len(replies) > 0 else [],
        "quotes": quotes.split(",") if len(quotes) > 0 else [],
        "flags": flags.split(",") if len(flags) > 0 else [],
    }


def transform(entry: dict) -> FilterableNote:
    note = conn.execute(
        'select "createdAt", reactions, "renoteCount", visibility from note where id = %s',
        [entry["id"]],
    ).fetchone()
    if note is None:
        return None # part of thread disappeared during processing
    when, reactions, renotes, visibility = note

    replies = [transform(intermediate[reply]) for reply in entry["replies"]]
    quotes = [transform(intermediate[quote]) for quote in entry["quotes"]]
    if None in replies or None in quotes:
        return None # bubble up, buttercup

    return FilterableNote(
        entry["id"],
        "self" in entry["flags"],
        replies,
        quotes,
        when.astimezone(),
        sum(reactions.values()),
        renotes,
        Visibility.from_db(visibility),
    )


root_count = 0
for entry in intermediate.values():
    if "root" in entry["flags"]:
        root_count += 1


pb = progressbar.ProgressBar(
    0,
    root_count,
    prefix="processing ",
)
targets = []
for entry in intermediate.values():
    if "root" not in entry["flags"]:
        continue
    transformed = transform(entry)
    if transformed is None:
        continue # we'll get to it next cycle
    if criteria(transformed):
        targets.append(entry["id"])
    pb.increment()
pb.finish()


Path("filtered.list").write_text("\n".join(targets))