fix(reaper): verdict-deadlock reaper — close stuck PRs after 24h
Defense-in-depth for PRs that substantive_fixer can't make progress on.
Targets two stuck-verdict shapes empirically observed in production:
1. leo:request_changes + domain:approve
Leo asked for substantive fix; fixer either failed silently
(no_claim_files / no_review_comments / etc.) or the issue tag isn't
in FIXABLE | CONVERTIBLE | UNFIXABLE.
2. leo:skipped + domain:request_changes
Eval bypassed Leo (eval_attempts >= MAX). Domain rejected with no
structured eval_issues. fixer can't classify the issue.
92 PRs match this gate today, oldest at 2026-04-24 (13d stuck).
Behavior:
- Hourly throttle via audit_log sentinel ('verdict_deadlock_reaper_run').
- REAPER_DRY_RUN=True default — first deploy emits 'would_close' audit
events only. No DB writes. No Forgejo writes. (Ship Apr 24 directive.)
- 24h cooldown, oldest-first, capped at 50 per run.
- Heartbeat audit fires whether dry-run or live, so throttle works.
- Live mode: posts comment + closes Forgejo PR + close_pr() in DB.
Audits 'verdict_deadlock_closed' per PR.
- Forgejo PATCH None → skip DB close (avoid drift).
Wired into fix_cycle() in teleo-pipeline.py. Runs after mechanical
and substantive fixes, never blocks them.
Followup (post first-run audit verification):
- Operator inspects 'verdict_deadlock_would_close' audit rows
- Flips REAPER_DRY_RUN to False, redeploys
- Reaper actually closes on next hourly tick
This commit is contained in:
parent
87f97eb4fa
commit
f97dd15349
2 changed files with 159 additions and 3 deletions
|
|
@ -569,3 +569,154 @@ async def substantive_fix_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
logger.info("Substantive fix cycle: %d fixed, %d errors", fixed, errors)
|
logger.info("Substantive fix cycle: %d fixed, %d errors", fixed, errors)
|
||||||
|
|
||||||
return fixed, errors
|
return fixed, errors
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Verdict-deadlock reaper ──────────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# Defense-in-depth for PRs that substantive_fixer can't make progress on.
|
||||||
|
# Targets two stuck-verdict shapes empirically observed in production:
|
||||||
|
#
|
||||||
|
# 1. leo:request_changes + domain:approve
|
||||||
|
# Leo asked for substantive fix; fixer either failed silently
|
||||||
|
# (no_claim_files / no_review_comments / etc.) or the issue tag isn't
|
||||||
|
# in FIXABLE | CONVERTIBLE | UNFIXABLE. PR sits forever.
|
||||||
|
#
|
||||||
|
# 2. leo:skipped + domain:request_changes
|
||||||
|
# Eval bypassed Leo (eval_attempts >= MAX). Domain rejected with no
|
||||||
|
# structured eval_issues. fixer can't classify → silent skip → forever.
|
||||||
|
#
|
||||||
|
# Both shapes need a clearance path. Reaper closes them after a 24h cooldown
|
||||||
|
# with audit_log breadcrumbs for forensics. First deploy runs in DRY_RUN
|
||||||
|
# mode (audit "would_close" events only — no Forgejo writes, no DB closes).
|
||||||
|
# Operator reviews dry-run output, flips REAPER_DRY_RUN to False, redeploys.
|
||||||
|
|
||||||
|
REAPER_DEADLOCK_AGE_HOURS = 24
|
||||||
|
REAPER_INTERVAL_SECONDS = 3600 # at most once per hour
|
||||||
|
REAPER_MAX_PER_RUN = 50 # safety cap so a single cycle can't close everything
|
||||||
|
REAPER_DRY_RUN = True # FIRST-DEPLOY DEFAULT — flip to False after audit verification
|
||||||
|
|
||||||
|
|
||||||
|
async def verdict_deadlock_reaper_cycle(conn) -> int:
|
||||||
|
"""Reap PRs stuck in conflicting-verdict deadlock for >24h.
|
||||||
|
|
||||||
|
Returns count of PRs closed (or "would-close" in dry-run mode).
|
||||||
|
Throttled to once per REAPER_INTERVAL_SECONDS via sentinel audit event.
|
||||||
|
"""
|
||||||
|
# Throttle: skip if last reaper run was within REAPER_INTERVAL_SECONDS.
|
||||||
|
# Uses audit_log as the rate-limit ledger so no schema/state needed.
|
||||||
|
last_run = conn.execute(
|
||||||
|
"SELECT MAX(timestamp) FROM audit_log "
|
||||||
|
"WHERE event = 'verdict_deadlock_reaper_run'"
|
||||||
|
).fetchone()[0]
|
||||||
|
if last_run:
|
||||||
|
cur = conn.execute(
|
||||||
|
"SELECT (julianday('now') - julianday(?)) * 86400 < ?",
|
||||||
|
(last_run, REAPER_INTERVAL_SECONDS),
|
||||||
|
).fetchone()[0]
|
||||||
|
if cur:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Two stuck-verdict shapes: leo:rc+domain:approve, leo:skipped+domain:rc.
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT number, branch, eval_issues, leo_verdict, domain_verdict,
|
||||||
|
last_attempt, fix_attempts
|
||||||
|
FROM prs
|
||||||
|
WHERE status = 'open'
|
||||||
|
AND tier0_pass = 1
|
||||||
|
AND last_attempt IS NOT NULL
|
||||||
|
AND last_attempt < datetime('now', ? || ' hours')
|
||||||
|
AND (
|
||||||
|
(leo_verdict = 'request_changes' AND domain_verdict = 'approve')
|
||||||
|
OR (leo_verdict = 'skipped' AND domain_verdict = 'request_changes')
|
||||||
|
)
|
||||||
|
ORDER BY last_attempt ASC
|
||||||
|
LIMIT ?""",
|
||||||
|
(f"-{REAPER_DEADLOCK_AGE_HOURS}", REAPER_MAX_PER_RUN),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
mode = "dryrun" if REAPER_DRY_RUN else "live"
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
# Heartbeat anyway so throttle ticks even when nothing to reap.
|
||||||
|
db.audit(conn, "reaper", "verdict_deadlock_reaper_run", json.dumps({
|
||||||
|
"candidates": 0, "closed": 0, "mode": mode,
|
||||||
|
}))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Verdict-deadlock reaper [%s]: %d candidate(s) in deadlock >%dh",
|
||||||
|
mode, len(rows), REAPER_DEADLOCK_AGE_HOURS,
|
||||||
|
)
|
||||||
|
|
||||||
|
closed = 0
|
||||||
|
errors = 0
|
||||||
|
for row in rows:
|
||||||
|
pr = row["number"]
|
||||||
|
reason_detail = {
|
||||||
|
"pr": pr,
|
||||||
|
"branch": row["branch"],
|
||||||
|
"leo_verdict": row["leo_verdict"],
|
||||||
|
"domain_verdict": row["domain_verdict"],
|
||||||
|
"eval_issues": row["eval_issues"],
|
||||||
|
"last_attempt": row["last_attempt"],
|
||||||
|
"fix_attempts": row["fix_attempts"],
|
||||||
|
}
|
||||||
|
|
||||||
|
if REAPER_DRY_RUN:
|
||||||
|
# Audit only — do NOT touch DB row or Forgejo state.
|
||||||
|
db.audit(conn, "reaper", "verdict_deadlock_would_close",
|
||||||
|
json.dumps(reason_detail))
|
||||||
|
logger.info(
|
||||||
|
"Reaper [dryrun]: would close PR #%d (leo=%s domain=%s issues=%s)",
|
||||||
|
pr, row["leo_verdict"], row["domain_verdict"], row["eval_issues"],
|
||||||
|
)
|
||||||
|
closed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
comment_body = (
|
||||||
|
"Closed by verdict-deadlock reaper.\n\n"
|
||||||
|
f"This PR sat for >{REAPER_DEADLOCK_AGE_HOURS}h with conflicting "
|
||||||
|
f"verdicts (leo={row['leo_verdict']}, domain={row['domain_verdict']}) "
|
||||||
|
f"that the substantive fixer couldn't auto-resolve.\n\n"
|
||||||
|
f"Eval issues: `{row['eval_issues']}`\n"
|
||||||
|
f"Last attempt: {row['last_attempt']}\n\n"
|
||||||
|
"_Automated message from the LivingIP pipeline._"
|
||||||
|
)
|
||||||
|
await forgejo_api(
|
||||||
|
"POST", repo_path(f"issues/{pr}/comments"), {"body": comment_body},
|
||||||
|
)
|
||||||
|
patch_result = await forgejo_api(
|
||||||
|
"PATCH", repo_path(f"pulls/{pr}"), {"state": "closed"},
|
||||||
|
token=get_agent_token("leo"),
|
||||||
|
)
|
||||||
|
if patch_result is None:
|
||||||
|
logger.warning(
|
||||||
|
"Reaper: PR #%d Forgejo close failed — skipping DB close to "
|
||||||
|
"avoid drift", pr,
|
||||||
|
)
|
||||||
|
errors += 1
|
||||||
|
continue
|
||||||
|
await close_pr(
|
||||||
|
conn, pr,
|
||||||
|
last_error=(
|
||||||
|
f"verdict_deadlock_reaper: leo={row['leo_verdict']} "
|
||||||
|
f"domain={row['domain_verdict']} age>{REAPER_DEADLOCK_AGE_HOURS}h"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
db.audit(conn, "reaper", "verdict_deadlock_closed",
|
||||||
|
json.dumps(reason_detail))
|
||||||
|
closed += 1
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Reaper: PR #%d close failed", pr)
|
||||||
|
errors += 1
|
||||||
|
|
||||||
|
db.audit(conn, "reaper", "verdict_deadlock_reaper_run", json.dumps({
|
||||||
|
"candidates": len(rows), "closed": closed, "errors": errors, "mode": mode,
|
||||||
|
}))
|
||||||
|
if errors:
|
||||||
|
logger.warning("Verdict-deadlock reaper [%s]: %d closed, %d errors",
|
||||||
|
mode, closed, errors)
|
||||||
|
else:
|
||||||
|
logger.info("Verdict-deadlock reaper [%s]: %d closed", mode, closed)
|
||||||
|
return closed
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ from lib import log as logmod
|
||||||
from lib.breaker import CircuitBreaker
|
from lib.breaker import CircuitBreaker
|
||||||
from lib.evaluate import evaluate_cycle
|
from lib.evaluate import evaluate_cycle
|
||||||
from lib.fixer import fix_cycle as mechanical_fix_cycle
|
from lib.fixer import fix_cycle as mechanical_fix_cycle
|
||||||
from lib.substantive_fixer import substantive_fix_cycle
|
from lib.substantive_fixer import substantive_fix_cycle, verdict_deadlock_reaper_cycle
|
||||||
from lib.health import start_health_server, stop_health_server
|
from lib.health import start_health_server, stop_health_server
|
||||||
from lib.llm import kill_active_subprocesses
|
from lib.llm import kill_active_subprocesses
|
||||||
from lib.merge import merge_cycle
|
from lib.merge import merge_cycle
|
||||||
|
|
@ -91,14 +91,19 @@ async def ingest_cycle(conn, max_workers=None):
|
||||||
|
|
||||||
|
|
||||||
async def fix_cycle(conn, max_workers=None):
|
async def fix_cycle(conn, max_workers=None):
|
||||||
"""Combined fix stage: mechanical fixes first, then substantive fixes.
|
"""Combined fix stage: mechanical fixes first, then substantive fixes,
|
||||||
|
finally the verdict-deadlock reaper.
|
||||||
|
|
||||||
Mechanical (fixer.py): wiki link bracket stripping, $0
|
Mechanical (fixer.py): wiki link bracket stripping, $0
|
||||||
Substantive (substantive_fixer.py): confidence/title/scope fixes via LLM, $0.001
|
Substantive (substantive_fixer.py): confidence/title/scope fixes via LLM, $0.001
|
||||||
|
Reaper (substantive_fixer.verdict_deadlock_reaper_cycle): defense-in-depth
|
||||||
|
for stuck-verdict PRs that the substantive fixer can't progress on.
|
||||||
|
Hourly throttle, dry-run by default. Cost $0.
|
||||||
"""
|
"""
|
||||||
m_fixed, m_errors = await mechanical_fix_cycle(conn, max_workers=max_workers)
|
m_fixed, m_errors = await mechanical_fix_cycle(conn, max_workers=max_workers)
|
||||||
s_fixed, s_errors = await substantive_fix_cycle(conn, max_workers=max_workers)
|
s_fixed, s_errors = await substantive_fix_cycle(conn, max_workers=max_workers)
|
||||||
return m_fixed + s_fixed, m_errors + s_errors
|
r_closed = await verdict_deadlock_reaper_cycle(conn)
|
||||||
|
return m_fixed + s_fixed + r_closed, m_errors + s_errors
|
||||||
|
|
||||||
|
|
||||||
async def snapshot_cycle(conn, max_workers=None):
|
async def snapshot_cycle(conn, max_workers=None):
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue