"""Stale extraction PR cleanup — closes extraction PRs that produce no claims. When an extraction PR sits open >30 min with claims_count=0, it indicates: - Extraction failed (model couldn't extract anything useful) - Batch job stalled (no claims written) - Source material is empty/junk Auto-closing prevents zombie PRs from blocking the pipeline. Logs each close for root cause analysis (model failures, bad sources, etc.). Epimetheus owns this module. """ import json import logging from datetime import datetime, timezone from . import config, db from .forgejo import api, repo_path logger = logging.getLogger("pipeline.stale_pr") STALE_THRESHOLD_MINUTES = 30 async def check_stale_prs(conn) -> tuple[int, int]: """Auto-close extraction PRs open >30 min with zero claims. Returns (stale_closed, stale_errors) — count of closed PRs and close failures. """ stale_closed = 0 stale_errors = 0 # Find extraction PRs: open >30 min, source has 0 claims stale_prs = conn.execute( """SELECT p.number, p.branch, p.source_path, p.created_at FROM prs p LEFT JOIN sources s ON p.source_path = s.path WHERE p.status = 'open' AND p.commit_type = 'extract' AND datetime(p.created_at) < datetime('now', '-' || ? || ' minutes') AND COALESCE(s.claims_count, 0) = 0""", (STALE_THRESHOLD_MINUTES,), ).fetchall() for pr in stale_prs: pr_num = pr["number"] source_path = pr["source_path"] or "unknown" try: # Close the PR via Forgejo result = await api( "PATCH", repo_path(f"pulls/{pr_num}"), body={"state": "closed"}, ) if result is None: stale_errors += 1 logger.warning( "Failed to close stale extraction PR #%d (%s, %s)", pr_num, source_path, pr["branch"], ) continue # Update local DB status conn.execute( "UPDATE prs SET status = 'closed' WHERE number = ?", (pr_num,), ) db.audit( conn, "watchdog", "stale_pr_closed", json.dumps({ "pr": pr_num, "branch": pr["branch"], "source": source_path, "open_minutes": STALE_THRESHOLD_MINUTES, }), ) stale_closed += 1 logger.info( "WATCHDOG: closed stale extraction PR #%d (no claims after %d min): %s", pr_num, STALE_THRESHOLD_MINUTES, source_path, ) except Exception as e: stale_errors += 1 logger.warning( "Stale PR close exception for #%d: %s", pr_num, e, ) return stale_closed, stale_errors