fix(reaper): verdict-deadlock reaper — close stuck PRs after 24h

Defense-in-depth for PRs that substantive_fixer can't make progress on. Targets two stuck-verdict shapes empirically observed in production: 1. leo:request_changes + domain:approve Leo asked for substantive fix; fixer either failed silently (no_claim_files / no_review_comments / etc.) or the issue tag isn't in FIXABLE | CONVERTIBLE | UNFIXABLE. 2. leo:skipped + domain:request_changes Eval bypassed Leo (eval_attempts >= MAX). Domain rejected with no structured eval_issues. fixer can't classify the issue. 92 PRs match this gate today, oldest at 2026-04-24 (13d stuck). Behavior: - Hourly throttle via audit_log sentinel ('verdict_deadlock_reaper_run'). - REAPER_DRY_RUN=True default — first deploy emits 'would_close' audit events only. No DB writes. No Forgejo writes. (Ship Apr 24 directive.) - 24h cooldown, oldest-first, capped at 50 per run. - Heartbeat audit fires whether dry-run or live, so throttle works. - Live mode: posts comment + closes Forgejo PR + close_pr() in DB. Audits 'verdict_deadlock_closed' per PR. - Forgejo PATCH None → skip DB close (avoid drift). Wired into fix_cycle() in teleo-pipeline.py. Runs after mechanical and substantive fixes, never blocks them. Followup (post first-run audit verification): - Operator inspects 'verdict_deadlock_would_close' audit rows - Flips REAPER_DRY_RUN to False, redeploys - Reaper actually closes on next hourly tick
2026-05-07 12:03:29 -04:00 · 2026-05-07 12:03:29 -04:00 · e63d27d259
commit e63d27d259
parent 517e9884cc
2 changed files with 159 additions and 3 deletions
--- a/lib/substantive_fixer.py
+++ b/lib/substantive_fixer.py
@ -596,3 +596,154 @@ async def substantive_fix_cycle(conn, max_workers=None) -> tuple[int, int]:
        logger.info("Substantive fix cycle: %d fixed, %d errors", fixed, errors)

    return fixed, errors
+
+
+# ─── Verdict-deadlock reaper ──────────────────────────────────────────────
+#
+# Defense-in-depth for PRs that substantive_fixer can't make progress on.
+# Targets two stuck-verdict shapes empirically observed in production:
+#
+#   1. leo:request_changes + domain:approve
+#      Leo asked for substantive fix; fixer either failed silently
+#      (no_claim_files / no_review_comments / etc.) or the issue tag isn't
+#      in FIXABLE | CONVERTIBLE | UNFIXABLE. PR sits forever.
+#
+#   2. leo:skipped + domain:request_changes
+#      Eval bypassed Leo (eval_attempts >= MAX). Domain rejected with no
+#      structured eval_issues. fixer can't classify → silent skip → forever.
+#
+# Both shapes need a clearance path. Reaper closes them after a 24h cooldown
+# with audit_log breadcrumbs for forensics. First deploy runs in DRY_RUN
+# mode (audit "would_close" events only — no Forgejo writes, no DB closes).
+# Operator reviews dry-run output, flips REAPER_DRY_RUN to False, redeploys.
+
+REAPER_DEADLOCK_AGE_HOURS = 24
+REAPER_INTERVAL_SECONDS = 3600  # at most once per hour
+REAPER_MAX_PER_RUN = 50  # safety cap so a single cycle can't close everything
+REAPER_DRY_RUN = True  # FIRST-DEPLOY DEFAULT — flip to False after audit verification
+
+
+async def verdict_deadlock_reaper_cycle(conn) -> int:
+    """Reap PRs stuck in conflicting-verdict deadlock for >24h.
+
+    Returns count of PRs closed (or "would-close" in dry-run mode).
+    Throttled to once per REAPER_INTERVAL_SECONDS via sentinel audit event.
+    """
+    # Throttle: skip if last reaper run was within REAPER_INTERVAL_SECONDS.
+    # Uses audit_log as the rate-limit ledger so no schema/state needed.
+    last_run = conn.execute(
+        "SELECT MAX(timestamp) FROM audit_log "
+        "WHERE event = 'verdict_deadlock_reaper_run'"
+    ).fetchone()[0]
+    if last_run:
+        cur = conn.execute(
+            "SELECT (julianday('now') - julianday(?)) * 86400 < ?",
+            (last_run, REAPER_INTERVAL_SECONDS),
+        ).fetchone()[0]
+        if cur:
+            return 0
+
+    # Two stuck-verdict shapes: leo:rc+domain:approve, leo:skipped+domain:rc.
+    rows = conn.execute(
+        """SELECT number, branch, eval_issues, leo_verdict, domain_verdict,
+                  last_attempt, fix_attempts
+           FROM prs
+           WHERE status = 'open'
+           AND tier0_pass = 1
+           AND last_attempt IS NOT NULL
+           AND last_attempt < datetime('now', ? || ' hours')
+           AND (
+               (leo_verdict = 'request_changes' AND domain_verdict = 'approve')
+               OR (leo_verdict = 'skipped' AND domain_verdict = 'request_changes')
+           )
+           ORDER BY last_attempt ASC
+           LIMIT ?""",
+        (f"-{REAPER_DEADLOCK_AGE_HOURS}", REAPER_MAX_PER_RUN),
+    ).fetchall()
+
+    mode = "dryrun" if REAPER_DRY_RUN else "live"
+
+    if not rows:
+        # Heartbeat anyway so throttle ticks even when nothing to reap.
+        db.audit(conn, "reaper", "verdict_deadlock_reaper_run", json.dumps({
+            "candidates": 0, "closed": 0, "mode": mode,
+        }))
+        return 0
+
+    logger.info(
+        "Verdict-deadlock reaper [%s]: %d candidate(s) in deadlock >%dh",
+        mode, len(rows), REAPER_DEADLOCK_AGE_HOURS,
+    )
+
+    closed = 0
+    errors = 0
+    for row in rows:
+        pr = row["number"]
+        reason_detail = {
+            "pr": pr,
+            "branch": row["branch"],
+            "leo_verdict": row["leo_verdict"],
+            "domain_verdict": row["domain_verdict"],
+            "eval_issues": row["eval_issues"],
+            "last_attempt": row["last_attempt"],
+            "fix_attempts": row["fix_attempts"],
+        }
+
+        if REAPER_DRY_RUN:
+            # Audit only — do NOT touch DB row or Forgejo state.
+            db.audit(conn, "reaper", "verdict_deadlock_would_close",
+                     json.dumps(reason_detail))
+            logger.info(
+                "Reaper [dryrun]: would close PR #%d (leo=%s domain=%s issues=%s)",
+                pr, row["leo_verdict"], row["domain_verdict"], row["eval_issues"],
+            )
+            closed += 1
+            continue
+
+        try:
+            comment_body = (
+                "Closed by verdict-deadlock reaper.\n\n"
+                f"This PR sat for >{REAPER_DEADLOCK_AGE_HOURS}h with conflicting "
+                f"verdicts (leo={row['leo_verdict']}, domain={row['domain_verdict']}) "
+                f"that the substantive fixer couldn't auto-resolve.\n\n"
+                f"Eval issues: `{row['eval_issues']}`\n"
+                f"Last attempt: {row['last_attempt']}\n\n"
+                "_Automated message from the LivingIP pipeline._"
+            )
+            await forgejo_api(
+                "POST", repo_path(f"issues/{pr}/comments"), {"body": comment_body},
+            )
+            patch_result = await forgejo_api(
+                "PATCH", repo_path(f"pulls/{pr}"), {"state": "closed"},
+                token=get_agent_token("leo"),
+            )
+            if patch_result is None:
+                logger.warning(
+                    "Reaper: PR #%d Forgejo close failed — skipping DB close to "
+                    "avoid drift", pr,
+                )
+                errors += 1
+                continue
+            await close_pr(
+                conn, pr,
+                last_error=(
+                    f"verdict_deadlock_reaper: leo={row['leo_verdict']} "
+                    f"domain={row['domain_verdict']} age>{REAPER_DEADLOCK_AGE_HOURS}h"
+                ),
+            )
+            db.audit(conn, "reaper", "verdict_deadlock_closed",
+                     json.dumps(reason_detail))
+            closed += 1
+        except Exception:
+            logger.exception("Reaper: PR #%d close failed", pr)
+            errors += 1
+
+    db.audit(conn, "reaper", "verdict_deadlock_reaper_run", json.dumps({
+        "candidates": len(rows), "closed": closed, "errors": errors, "mode": mode,
+    }))
+    if errors:
+        logger.warning("Verdict-deadlock reaper [%s]: %d closed, %d errors",
+                       mode, closed, errors)
+    else:
+        logger.info("Verdict-deadlock reaper [%s]: %d closed", mode, closed)
+    return closed
--- a/teleo-pipeline.py
+++ b/teleo-pipeline.py
@ -20,7 +20,7 @@ from lib import log as logmod
 from lib.breaker import CircuitBreaker
 from lib.evaluate import evaluate_cycle
 from lib.fixer import fix_cycle as mechanical_fix_cycle
-from lib.substantive_fixer import substantive_fix_cycle
+from lib.substantive_fixer import substantive_fix_cycle, verdict_deadlock_reaper_cycle
 from lib.health import start_health_server, stop_health_server
 from lib.llm import kill_active_subprocesses
 from lib.merge import merge_cycle
@ -91,14 +91,19 @@ async def ingest_cycle(conn, max_workers=None):


 async def fix_cycle(conn, max_workers=None):
-    """Combined fix stage: mechanical fixes first, then substantive fixes.
+    """Combined fix stage: mechanical fixes first, then substantive fixes,
+    finally the verdict-deadlock reaper.

    Mechanical (fixer.py): wiki link bracket stripping, $0
    Substantive (substantive_fixer.py): confidence/title/scope fixes via LLM, $0.001
+    Reaper (substantive_fixer.verdict_deadlock_reaper_cycle): defense-in-depth
+        for stuck-verdict PRs that the substantive fixer can't progress on.
+        Hourly throttle, dry-run by default. Cost $0.
    """
    m_fixed, m_errors = await mechanical_fix_cycle(conn, max_workers=max_workers)
    s_fixed, s_errors = await substantive_fix_cycle(conn, max_workers=max_workers)
-    return m_fixed + s_fixed, m_errors + s_errors
+    r_closed = await verdict_deadlock_reaper_cycle(conn)
+    return m_fixed + s_fixed + r_closed, m_errors + s_errors


 async def snapshot_cycle(conn, max_workers=None):