diff options
author | mia <mia@mia.jetzt> | 2024-11-20 06:52:59 -0800 |
---|---|---|
committer | mia <mia@mia.jetzt> | 2024-11-20 06:52:59 -0800 |
commit | 88394c7d829c5e25be56f3efffbc9805b64736a5 (patch) | |
tree | 7121e65d652643589857819b5123214306346f32 | |
parent | 7e060e5cf2656a0a53d41ea0ff42b753316cd441 (diff) | |
download | scrubber-main.tar.gz scrubber-main.zip |
-rw-r--r-- | 2_filter.py | 5 | ||||
-rw-r--r-- | 3_archive.py | 5 | ||||
-rw-r--r-- | 4_delete.py | 26 | ||||
-rw-r--r-- | com.py | 2 | ||||
-rw-r--r-- | conf_mia.py | 6 |
5 files changed, 31 insertions, 13 deletions
diff --git a/2_filter.py b/2_filter.py index 89311d2..62ca8e3 100644 --- a/2_filter.py +++ b/2_filter.py @@ -17,12 +17,12 @@ intermediate = parse_graph() def transform(entry: dict) -> FilterableNote: time.sleep(0.0001) note = conn.execute( - 'select "createdAt", reactions, "renoteCount", visibility from note where id = %s', + 'select "createdAt", reactions, "renoteCount", visibility, cw from note where id = %s', [entry["id"]], ).fetchone() if note is None: return None # part of thread disappeared during processing - when, reactions, renotes, visibility = note + when, reactions, renotes, visibility, cw = note replies = [transform(intermediate[reply]) for reply in entry["replies"]] quotes = [transform(intermediate[quote]) for quote in entry["quotes"]] @@ -38,6 +38,7 @@ def transform(entry: dict) -> FilterableNote: sum(reactions.values()), renotes, Visibility.from_db(visibility), + cw, ) diff --git a/3_archive.py b/3_archive.py index 39affdd..258392b 100644 --- a/3_archive.py +++ b/3_archive.py @@ -74,7 +74,10 @@ def collect_note(id: str): output["attachments"] = [] for file_id in file_ids: time.sleep(0.0005) - name, type_, comment, url = conn.execute('select name, type, comment, url from drive_file where id = %s', [file_id]).fetchone() + row = conn.execute('select name, type, comment, url from drive_file where id = %s', [file_id]).fetchone() + if row is None: + continue + name, type_, comment, url = row attachment = { "id": file_id, "type": type_, diff --git a/4_delete.py b/4_delete.py index 615fbab..0e47e52 100644 --- a/4_delete.py +++ b/4_delete.py @@ -3,9 +3,10 @@ import time from pathlib import Path import httpx +import psutil import psycopg -from com import eval_config, parse_graph, progressbar, FilterAction +from com import FilterAction, eval_config, parse_graph, progressbar config = eval_config() conn: psycopg.Connection = config["connect"]() @@ -94,17 +95,25 @@ for note, action in queue: pb.update(message="down") time.sleep(1) continue + Path('queue-stats.dump').write_text(f"status:{resp.status_code}\nbody:\n{resp.text}") deliver_waiting = resp.json()["deliver"]["waiting"] obliterate_waiting = resp.json()["obliterate"]["waiting"] - obliterate_delayed = resp.json()["obliterate"]["delayed"] - if deliver_waiting < 100 and obliterate_waiting + obliterate_delayed< 50000: + if deliver_waiting < 100 and obliterate_waiting < 50000: break - pb.update(message=f"queue ({deliver_waiting}/{obliterate_waiting + obliterate_delayed})") + pb.update(message=f"queue ({deliver_waiting}/{obliterate_waiting})") + time.sleep(10) + + # make sure there's enough memory for new jobs + while True: + vmem = psutil.virtual_memory() + if vmem.available > (512 * 1024 * 1024): + break + pb.update(message="memory") time.sleep(10) # prevent api rate limiting req_delay = time.time() - last_req - if req_delay < 15: + if req_delay < 30: pb.update(message="delaying") time.sleep(req_delay) @@ -122,16 +131,19 @@ for note, action in queue: continue elif resp.status_code == 502: pb.update(status="down") - continue time.sleep(1) + continue elif resp.status_code >= 400: body = resp.json() if body["error"]["code"] == "NO_SUCH_NOTE": pb.increment(message="seeking") seeking = True break + elif body["error"]["code"] == "QUEUE_FULL": + print("\nobliterate queue overflowed, exiting to save server") + break err += 1 - if err > 10: + if err > 3: raise Exception(f"{body['error']['code']}: {body['error']['message']}") sys.stdout.write("\r") print(f"err {body['error']['code']} {body['error']['message']} ") diff --git a/com.py b/com.py index 3ebb948..6bc9a63 100644 --- a/com.py +++ b/com.py @@ -44,6 +44,7 @@ class FilterableNote: reactions: int renotes: int visibility: Visibility + cw: str def thread(self) -> List["FilterableNote"]: acc = [] @@ -73,6 +74,7 @@ class FilterableNote: "when": self.when.isoformat(), "reactions": self.reactions, "renotes": self.renotes, + "cw": self.cw, } diff --git a/conf_mia.py b/conf_mia.py index a32255f..9eb0e08 100644 --- a/conf_mia.py +++ b/conf_mia.py @@ -10,7 +10,7 @@ api = "https://void.rehab/api" early_exit = 0xFFF now = datetime.now(UTC) -threshold = 0.1 +threshold = 2.0 def criteria(root: FilterableNote) -> FilterAction: thread = root.thread() @@ -34,7 +34,7 @@ def criteria(root: FilterableNote) -> FilterAction: # get my... most_recent_post = max(thread_self, key=lambda note: note.when) # ...most recent post... - score = lambda note: note.reactions + note.renotes*5 + score = lambda note: note.reactions + note.renotes*5 + 1 high_score_post = max(thread_self, key=score) # ...highest scoring post... # ...and their values... most_recent = most_recent_post.when @@ -45,7 +45,7 @@ def criteria(root: FilterableNote) -> FilterAction: # ...and check it against a threshold if weighted_score < threshold: if any(map( - lambda note: note.visibility in [Visibility.public, Visibility.unlisted], + lambda note: note.visibility in [Visibility.public, Visibility.unlisted] or note.cw, thread_self, )): return FilterAction.Obliterate |