🗝
summary refs log tree commit diff
diff options
context:
space:
mode:
authormia <mia@mia.jetzt>2024-11-20 06:52:59 -0800
committermia <mia@mia.jetzt>2024-11-20 06:52:59 -0800
commit88394c7d829c5e25be56f3efffbc9805b64736a5 (patch)
tree7121e65d652643589857819b5123214306346f32
parent7e060e5cf2656a0a53d41ea0ff42b753316cd441 (diff)
downloadscrubber-main.tar.gz
scrubber-main.zip
changes and alterations HEAD main
-rw-r--r--2_filter.py5
-rw-r--r--3_archive.py5
-rw-r--r--4_delete.py26
-rw-r--r--com.py2
-rw-r--r--conf_mia.py6
5 files changed, 31 insertions, 13 deletions
diff --git a/2_filter.py b/2_filter.py
index 89311d2..62ca8e3 100644
--- a/2_filter.py
+++ b/2_filter.py
@@ -17,12 +17,12 @@ intermediate = parse_graph()
 def transform(entry: dict) -> FilterableNote:
     time.sleep(0.0001)
     note = conn.execute(
-        'select "createdAt", reactions, "renoteCount", visibility from note where id = %s',
+        'select "createdAt", reactions, "renoteCount", visibility, cw from note where id = %s',
         [entry["id"]],
     ).fetchone()
     if note is None:
         return None # part of thread disappeared during processing
-    when, reactions, renotes, visibility = note
+    when, reactions, renotes, visibility, cw = note
 
     replies = [transform(intermediate[reply]) for reply in entry["replies"]]
     quotes = [transform(intermediate[quote]) for quote in entry["quotes"]]
@@ -38,6 +38,7 @@ def transform(entry: dict) -> FilterableNote:
         sum(reactions.values()),
         renotes,
         Visibility.from_db(visibility),
+        cw,
     )
 
 
diff --git a/3_archive.py b/3_archive.py
index 39affdd..258392b 100644
--- a/3_archive.py
+++ b/3_archive.py
@@ -74,7 +74,10 @@ def collect_note(id: str):
     output["attachments"] = []
     for file_id in file_ids:
         time.sleep(0.0005)
-        name, type_, comment, url = conn.execute('select name, type, comment, url from drive_file where id = %s', [file_id]).fetchone()
+        row = conn.execute('select name, type, comment, url from drive_file where id = %s', [file_id]).fetchone()
+        if row is None:
+            continue
+        name, type_, comment, url = row
         attachment = {
             "id": file_id,
             "type": type_,
diff --git a/4_delete.py b/4_delete.py
index 615fbab..0e47e52 100644
--- a/4_delete.py
+++ b/4_delete.py
@@ -3,9 +3,10 @@ import time
 from pathlib import Path
 
 import httpx
+import psutil
 import psycopg
 
-from com import eval_config, parse_graph, progressbar, FilterAction
+from com import FilterAction, eval_config, parse_graph, progressbar
 
 config = eval_config()
 conn: psycopg.Connection = config["connect"]()
@@ -94,17 +95,25 @@ for note, action in queue:
             pb.update(message="down")
             time.sleep(1)
             continue
+        Path('queue-stats.dump').write_text(f"status:{resp.status_code}\nbody:\n{resp.text}")
         deliver_waiting = resp.json()["deliver"]["waiting"]
         obliterate_waiting = resp.json()["obliterate"]["waiting"]
-        obliterate_delayed = resp.json()["obliterate"]["delayed"]
-        if deliver_waiting < 100 and obliterate_waiting + obliterate_delayed< 50000:
+        if deliver_waiting < 100 and obliterate_waiting < 50000:
             break
-        pb.update(message=f"queue ({deliver_waiting}/{obliterate_waiting + obliterate_delayed})")
+        pb.update(message=f"queue ({deliver_waiting}/{obliterate_waiting})")
+        time.sleep(10)
+
+    # make sure there's enough memory for new jobs
+    while True:
+        vmem = psutil.virtual_memory()
+        if vmem.available > (512 * 1024 * 1024):
+            break
+        pb.update(message="memory")
         time.sleep(10)
 
     # prevent api rate limiting
     req_delay = time.time() - last_req
-    if req_delay < 15:
+    if req_delay < 30:
         pb.update(message="delaying")
         time.sleep(req_delay)
 
@@ -122,16 +131,19 @@ for note, action in queue:
             continue
         elif resp.status_code == 502:
             pb.update(status="down")
-            continue
             time.sleep(1)
+            continue
         elif resp.status_code >= 400:
             body = resp.json()
             if body["error"]["code"] == "NO_SUCH_NOTE":
                 pb.increment(message="seeking")
                 seeking = True
                 break
+            elif body["error"]["code"] == "QUEUE_FULL":
+                print("\nobliterate queue overflowed, exiting to save server")
+                break
             err += 1
-            if err > 10:
+            if err > 3:
                 raise Exception(f"{body['error']['code']}: {body['error']['message']}")
             sys.stdout.write("\r")
             print(f"err {body['error']['code']} {body['error']['message']} ")
diff --git a/com.py b/com.py
index 3ebb948..6bc9a63 100644
--- a/com.py
+++ b/com.py
@@ -44,6 +44,7 @@ class FilterableNote:
     reactions: int
     renotes: int
     visibility: Visibility
+    cw: str
 
     def thread(self) -> List["FilterableNote"]:
         acc = []
@@ -73,6 +74,7 @@ class FilterableNote:
             "when": self.when.isoformat(),
             "reactions": self.reactions,
             "renotes": self.renotes,
+            "cw": self.cw,
         }
 
 
diff --git a/conf_mia.py b/conf_mia.py
index a32255f..9eb0e08 100644
--- a/conf_mia.py
+++ b/conf_mia.py
@@ -10,7 +10,7 @@ api = "https://void.rehab/api"
 early_exit = 0xFFF
 
 now = datetime.now(UTC)
-threshold = 0.1
+threshold = 2.0
 
 def criteria(root: FilterableNote) -> FilterAction:
     thread = root.thread()
@@ -34,7 +34,7 @@ def criteria(root: FilterableNote) -> FilterAction:
 
     # get my...
     most_recent_post = max(thread_self, key=lambda note: note.when) # ...most recent post...
-    score = lambda note: note.reactions + note.renotes*5
+    score = lambda note: note.reactions + note.renotes*5 + 1
     high_score_post = max(thread_self, key=score) # ...highest scoring post...
     # ...and their values...
     most_recent = most_recent_post.when
@@ -45,7 +45,7 @@ def criteria(root: FilterableNote) -> FilterAction:
     # ...and check it against a threshold
     if weighted_score < threshold:
         if any(map(
-            lambda note: note.visibility in [Visibility.public, Visibility.unlisted],
+            lambda note: note.visibility in [Visibility.public, Visibility.unlisted] or note.cw,
             thread_self,
         )):
             return FilterAction.Obliterate