From e63d27d259d2a3d0eecffdbd0d9cad80fcec9ca4 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Thu, 7 May 2026 12:03:29 -0400 Subject: [PATCH] =?UTF-8?q?fix(reaper):=20verdict-deadlock=20reaper=20?= =?UTF-8?q?=E2=80=94=20close=20stuck=20PRs=20after=2024h?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defense-in-depth for PRs that substantive_fixer can't make progress on. Targets two stuck-verdict shapes empirically observed in production: 1. leo:request_changes + domain:approve Leo asked for substantive fix; fixer either failed silently (no_claim_files / no_review_comments / etc.) or the issue tag isn't in FIXABLE | CONVERTIBLE | UNFIXABLE. 2. leo:skipped + domain:request_changes Eval bypassed Leo (eval_attempts >= MAX). Domain rejected with no structured eval_issues. fixer can't classify the issue. 92 PRs match this gate today, oldest at 2026-04-24 (13d stuck). Behavior: - Hourly throttle via audit_log sentinel ('verdict_deadlock_reaper_run'). - REAPER_DRY_RUN=True default — first deploy emits 'would_close' audit events only. No DB writes. No Forgejo writes. (Ship Apr 24 directive.) - 24h cooldown, oldest-first, capped at 50 per run. - Heartbeat audit fires whether dry-run or live, so throttle works. - Live mode: posts comment + closes Forgejo PR + close_pr() in DB. Audits 'verdict_deadlock_closed' per PR. - Forgejo PATCH None → skip DB close (avoid drift). Wired into fix_cycle() in teleo-pipeline.py. Runs after mechanical and substantive fixes, never blocks them. Followup (post first-run audit verification): - Operator inspects 'verdict_deadlock_would_close' audit rows - Flips REAPER_DRY_RUN to False, redeploys - Reaper actually closes on next hourly tick --- lib/substantive_fixer.py | 151 +++++++++++++++++++++++++++++++++++++++ teleo-pipeline.py | 11 ++- 2 files changed, 159 insertions(+), 3 deletions(-) diff --git a/lib/substantive_fixer.py b/lib/substantive_fixer.py index 9bc7dbc..29f69e1 100644 --- a/lib/substantive_fixer.py +++ b/lib/substantive_fixer.py @@ -596,3 +596,154 @@ async def substantive_fix_cycle(conn, max_workers=None) -> tuple[int, int]: logger.info("Substantive fix cycle: %d fixed, %d errors", fixed, errors) return fixed, errors + + +# ─── Verdict-deadlock reaper ────────────────────────────────────────────── +# +# Defense-in-depth for PRs that substantive_fixer can't make progress on. +# Targets two stuck-verdict shapes empirically observed in production: +# +# 1. leo:request_changes + domain:approve +# Leo asked for substantive fix; fixer either failed silently +# (no_claim_files / no_review_comments / etc.) or the issue tag isn't +# in FIXABLE | CONVERTIBLE | UNFIXABLE. PR sits forever. +# +# 2. leo:skipped + domain:request_changes +# Eval bypassed Leo (eval_attempts >= MAX). Domain rejected with no +# structured eval_issues. fixer can't classify → silent skip → forever. +# +# Both shapes need a clearance path. Reaper closes them after a 24h cooldown +# with audit_log breadcrumbs for forensics. First deploy runs in DRY_RUN +# mode (audit "would_close" events only — no Forgejo writes, no DB closes). +# Operator reviews dry-run output, flips REAPER_DRY_RUN to False, redeploys. + +REAPER_DEADLOCK_AGE_HOURS = 24 +REAPER_INTERVAL_SECONDS = 3600 # at most once per hour +REAPER_MAX_PER_RUN = 50 # safety cap so a single cycle can't close everything +REAPER_DRY_RUN = True # FIRST-DEPLOY DEFAULT — flip to False after audit verification + + +async def verdict_deadlock_reaper_cycle(conn) -> int: + """Reap PRs stuck in conflicting-verdict deadlock for >24h. + + Returns count of PRs closed (or "would-close" in dry-run mode). + Throttled to once per REAPER_INTERVAL_SECONDS via sentinel audit event. + """ + # Throttle: skip if last reaper run was within REAPER_INTERVAL_SECONDS. + # Uses audit_log as the rate-limit ledger so no schema/state needed. + last_run = conn.execute( + "SELECT MAX(timestamp) FROM audit_log " + "WHERE event = 'verdict_deadlock_reaper_run'" + ).fetchone()[0] + if last_run: + cur = conn.execute( + "SELECT (julianday('now') - julianday(?)) * 86400 < ?", + (last_run, REAPER_INTERVAL_SECONDS), + ).fetchone()[0] + if cur: + return 0 + + # Two stuck-verdict shapes: leo:rc+domain:approve, leo:skipped+domain:rc. + rows = conn.execute( + """SELECT number, branch, eval_issues, leo_verdict, domain_verdict, + last_attempt, fix_attempts + FROM prs + WHERE status = 'open' + AND tier0_pass = 1 + AND last_attempt IS NOT NULL + AND last_attempt < datetime('now', ? || ' hours') + AND ( + (leo_verdict = 'request_changes' AND domain_verdict = 'approve') + OR (leo_verdict = 'skipped' AND domain_verdict = 'request_changes') + ) + ORDER BY last_attempt ASC + LIMIT ?""", + (f"-{REAPER_DEADLOCK_AGE_HOURS}", REAPER_MAX_PER_RUN), + ).fetchall() + + mode = "dryrun" if REAPER_DRY_RUN else "live" + + if not rows: + # Heartbeat anyway so throttle ticks even when nothing to reap. + db.audit(conn, "reaper", "verdict_deadlock_reaper_run", json.dumps({ + "candidates": 0, "closed": 0, "mode": mode, + })) + return 0 + + logger.info( + "Verdict-deadlock reaper [%s]: %d candidate(s) in deadlock >%dh", + mode, len(rows), REAPER_DEADLOCK_AGE_HOURS, + ) + + closed = 0 + errors = 0 + for row in rows: + pr = row["number"] + reason_detail = { + "pr": pr, + "branch": row["branch"], + "leo_verdict": row["leo_verdict"], + "domain_verdict": row["domain_verdict"], + "eval_issues": row["eval_issues"], + "last_attempt": row["last_attempt"], + "fix_attempts": row["fix_attempts"], + } + + if REAPER_DRY_RUN: + # Audit only — do NOT touch DB row or Forgejo state. + db.audit(conn, "reaper", "verdict_deadlock_would_close", + json.dumps(reason_detail)) + logger.info( + "Reaper [dryrun]: would close PR #%d (leo=%s domain=%s issues=%s)", + pr, row["leo_verdict"], row["domain_verdict"], row["eval_issues"], + ) + closed += 1 + continue + + try: + comment_body = ( + "Closed by verdict-deadlock reaper.\n\n" + f"This PR sat for >{REAPER_DEADLOCK_AGE_HOURS}h with conflicting " + f"verdicts (leo={row['leo_verdict']}, domain={row['domain_verdict']}) " + f"that the substantive fixer couldn't auto-resolve.\n\n" + f"Eval issues: `{row['eval_issues']}`\n" + f"Last attempt: {row['last_attempt']}\n\n" + "_Automated message from the LivingIP pipeline._" + ) + await forgejo_api( + "POST", repo_path(f"issues/{pr}/comments"), {"body": comment_body}, + ) + patch_result = await forgejo_api( + "PATCH", repo_path(f"pulls/{pr}"), {"state": "closed"}, + token=get_agent_token("leo"), + ) + if patch_result is None: + logger.warning( + "Reaper: PR #%d Forgejo close failed — skipping DB close to " + "avoid drift", pr, + ) + errors += 1 + continue + await close_pr( + conn, pr, + last_error=( + f"verdict_deadlock_reaper: leo={row['leo_verdict']} " + f"domain={row['domain_verdict']} age>{REAPER_DEADLOCK_AGE_HOURS}h" + ), + ) + db.audit(conn, "reaper", "verdict_deadlock_closed", + json.dumps(reason_detail)) + closed += 1 + except Exception: + logger.exception("Reaper: PR #%d close failed", pr) + errors += 1 + + db.audit(conn, "reaper", "verdict_deadlock_reaper_run", json.dumps({ + "candidates": len(rows), "closed": closed, "errors": errors, "mode": mode, + })) + if errors: + logger.warning("Verdict-deadlock reaper [%s]: %d closed, %d errors", + mode, closed, errors) + else: + logger.info("Verdict-deadlock reaper [%s]: %d closed", mode, closed) + return closed diff --git a/teleo-pipeline.py b/teleo-pipeline.py index 95f412b..58e0bf5 100644 --- a/teleo-pipeline.py +++ b/teleo-pipeline.py @@ -20,7 +20,7 @@ from lib import log as logmod from lib.breaker import CircuitBreaker from lib.evaluate import evaluate_cycle from lib.fixer import fix_cycle as mechanical_fix_cycle -from lib.substantive_fixer import substantive_fix_cycle +from lib.substantive_fixer import substantive_fix_cycle, verdict_deadlock_reaper_cycle from lib.health import start_health_server, stop_health_server from lib.llm import kill_active_subprocesses from lib.merge import merge_cycle @@ -91,14 +91,19 @@ async def ingest_cycle(conn, max_workers=None): async def fix_cycle(conn, max_workers=None): - """Combined fix stage: mechanical fixes first, then substantive fixes. + """Combined fix stage: mechanical fixes first, then substantive fixes, + finally the verdict-deadlock reaper. Mechanical (fixer.py): wiki link bracket stripping, $0 Substantive (substantive_fixer.py): confidence/title/scope fixes via LLM, $0.001 + Reaper (substantive_fixer.verdict_deadlock_reaper_cycle): defense-in-depth + for stuck-verdict PRs that the substantive fixer can't progress on. + Hourly throttle, dry-run by default. Cost $0. """ m_fixed, m_errors = await mechanical_fix_cycle(conn, max_workers=max_workers) s_fixed, s_errors = await substantive_fix_cycle(conn, max_workers=max_workers) - return m_fixed + s_fixed, m_errors + s_errors + r_closed = await verdict_deadlock_reaper_cycle(conn) + return m_fixed + s_fixed + r_closed, m_errors + s_errors async def snapshot_cycle(conn, max_workers=None):