From 5db6a0248c9592f74a95c60a9a924df245ce0c43 Mon Sep 17 00:00:00 2001
From: m3taversal <m3taversal@gmail.com>
Date: Fri, 8 May 2026 12:52:12 -0400
Subject: [PATCH] fix(substantive_fixer): SQL-side actionable-tag filter,
 eliminate head-of-line
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Step 4 of the stuck-PR triage. Push the FIXABLE/CONVERTIBLE/UNFIXABLE_TAGS
intersection from a post-fetch Python loop into the SELECT WHERE clause via
json_each + EXISTS. LIMIT 3 now always returns 3 actionable rows (or fewer if
that's all there are), eliminating the head-of-line block where 3 oldest
empty-eval_issues PRs occupied the slots forever.

Background: 11 hours of post-deploy logs showed substantive_fix_cycle stuck
emitting "0 actionable from 3 candidate(s) — head-of-line: [(3922, []), (3926,
[]), (3940, [])]" every cycle. Reaper closed those three on schedule, then a
new triple of empty-eval_issues PRs took their place. Reaper-as-primary-clearance
worked but is defense-in-depth, not the right architecture. Source of the block
is upstream in this SELECT.

Implementation choice: json_each + EXISTS over LIKE. Robust against tag-name
substring overlap, future-proof against tag renames, and SQLite 3.45.1 on VPS
fully supports it. Verified live: returns 13 of 28 currently-stuck PRs as
actionable, 15 fall through to reaper as before.

Tag list builds from the routing constants at runtime so adding a new tag
auto-updates the SELECT filter — no two-place edit footgun.

WARN-on-corrupt-JSON path retained as defense-in-depth (json_each and
json.loads use different parsers; technically possible for a row to pass one
but not the other).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 lib/substantive_fixer.py | 46 ++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/lib/substantive_fixer.py b/lib/substantive_fixer.py
index 4772944..6280e55 100644
--- a/lib/substantive_fixer.py
+++ b/lib/substantive_fixer.py
@@ -522,53 +522,53 @@ async def substantive_fix_cycle(conn, max_workers=None) -> tuple[int, int]:
     Finds PRs with substantive issue tags that haven't exceeded fix budget.
     Processes up to 3 per cycle (Rhea: 180s interval, don't overwhelm eval).
     """
+    # Build the actionable-tag list from the routing constants so adding a new
+    # tag to FIXABLE_TAGS / CONVERTIBLE_TAGS / UNFIXABLE_TAGS auto-updates the
+    # SELECT filter — no two-place edit footgun.
+    actionable_tags = sorted(FIXABLE_TAGS | CONVERTIBLE_TAGS | UNFIXABLE_TAGS)
+    placeholders = ",".join(["?"] * len(actionable_tags))
+
+    # Push the actionable-tag filter into SQL (was a post-fetch Python loop).
+    # The old shape selected the 3 oldest request_changes PRs and then dropped
+    # ones without actionable tags, so empty-eval_issues rows occupied LIMIT-3
+    # forever (head-of-line). Now LIMIT-3 always returns 3 actionable rows.
+    # Reaper handles the empty-tag PRs after their 24h cooldown.
     rows = conn.execute(
-        """SELECT number, eval_issues FROM prs
+        f"""SELECT number, eval_issues FROM prs
            WHERE status = 'open'
            AND tier0_pass = 1
            AND (domain_verdict = 'request_changes' OR leo_verdict = 'request_changes')
            AND COALESCE(fix_attempts, 0) < ?
            AND (last_attempt IS NULL OR last_attempt < datetime('now', '-3 minutes'))
+           AND EXISTS (
+               SELECT 1 FROM json_each(eval_issues)
+               WHERE value IN ({placeholders})
+           )
            ORDER BY created_at ASC
            LIMIT 3""",
-        (MAX_SUBSTANTIVE_FIXES + config.MAX_FIX_ATTEMPTS,),  # Total budget: mechanical + substantive
+        (MAX_SUBSTANTIVE_FIXES + config.MAX_FIX_ATTEMPTS, *actionable_tags),
     ).fetchall()
 
     if not rows:
         return 0, 0
 
-    # Filter to only PRs with substantive issues (not just mechanical)
+    # Defense-in-depth: corrupt eval_issues JSON shouldn't reach here (json_each
+    # would error in the SELECT and SQLite would skip the row), but the WARN log
+    # stays so we catch any edge case where a row's JSON parses for json_each
+    # but not for json.loads (different parsers, technically).
     substantive_rows = []
-    skipped_no_tags = []
     for row in rows:
         try:
-            issues = json.loads(row["eval_issues"] or "[]")
+            json.loads(row["eval_issues"] or "[]")
         except (json.JSONDecodeError, TypeError):
-            # Corrupt JSON in eval_issues is abnormal (post-merge column drift,
-            # hand-edited row, partial write during crash). WARN so ops can chase
-            # the upstream column-write path. Without this, the row drops out of
-            # both substantive_rows and skipped_no_tags — the third silent path.
             logger.warning(
                 "PR #%d: corrupt eval_issues JSON — skipping in substantive fix cycle",
                 row["number"],
             )
             continue
-        if set(issues) & (FIXABLE_TAGS | CONVERTIBLE_TAGS | UNFIXABLE_TAGS):
-            substantive_rows.append(row)
-        else:
-            skipped_no_tags.append((row["number"], issues))
+        substantive_rows.append(row)
 
     if not substantive_rows:
-        # Visibility for the LIMIT-3 head-of-line block: if the oldest
-        # candidates have no fixer-actionable tags (e.g. eval_issues=[],
-        # broken_wiki_links only), the cycle silently returns 0 — and the
-        # next cycle picks the same head-of-line, forever. Log the eval_issues
-        # of skipped candidates so the journal makes the block visible.
-        if skipped_no_tags:
-            logger.info(
-                "Substantive fix cycle: 0 actionable from %d candidate(s) — head-of-line: %s",
-                len(rows), skipped_no_tags,
-            )
         return 0, 0
 
     fixed = 0