Some checks are pending
CI / lint-and-test (push) Waiting to run
Critical bug fix: close_pr now checks forgejo_api return value and skips DB update on Forgejo failure, preventing ghost PRs (DB closed, Forgejo open). Returns bool so callers can handle failures. _terminate_pr checks return value — skips source requeue on failure. stale_pr.py migrated from raw Forgejo+DB to close_pr (last raw close transition eliminated). eval_parse.py: 15 pure parsing functions extracted from evaluate.py (~370 lines removed). Zero I/O, zero async, independently testable. evaluate.py drops from ~1510 to ~1140 lines. Tests: 295 passed (42 new eval_parse + 2 new close_pr), zero regressions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
86 lines
2.7 KiB
Python
86 lines
2.7 KiB
Python
"""Stale extraction PR cleanup — closes extraction PRs that produce no claims.
|
|
|
|
When an extraction PR sits open >30 min with claims_count=0, it indicates:
|
|
- Extraction failed (model couldn't extract anything useful)
|
|
- Batch job stalled (no claims written)
|
|
- Source material is empty/junk
|
|
|
|
Auto-closing prevents zombie PRs from blocking the pipeline.
|
|
Logs each close for root cause analysis (model failures, bad sources, etc.).
|
|
|
|
Epimetheus owns this module.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
|
|
from . import config, db
|
|
from .forgejo import api, repo_path
|
|
from .pr_state import close_pr
|
|
|
|
logger = logging.getLogger("pipeline.stale_pr")
|
|
|
|
STALE_THRESHOLD_MINUTES = 45
|
|
|
|
|
|
async def check_stale_prs(conn) -> tuple[int, int]:
|
|
"""Auto-close extraction PRs open >30 min with zero claims.
|
|
|
|
Returns (stale_closed, stale_errors) — count of closed PRs and close failures.
|
|
"""
|
|
stale_closed = 0
|
|
stale_errors = 0
|
|
|
|
# Find extraction PRs: open >30 min, source has 0 claims
|
|
stale_prs = conn.execute(
|
|
"""SELECT p.number, p.branch, p.source_path, p.created_at
|
|
FROM prs p
|
|
LEFT JOIN sources s ON p.source_path = s.path
|
|
WHERE p.status = 'open'
|
|
AND p.commit_type = 'extract'
|
|
AND datetime(p.created_at) < datetime('now', '-' || ? || ' minutes')
|
|
AND COALESCE(s.claims_count, 0) = 0""",
|
|
(STALE_THRESHOLD_MINUTES,),
|
|
).fetchall()
|
|
|
|
for pr in stale_prs:
|
|
pr_num = pr["number"]
|
|
source_path = pr["source_path"] or "unknown"
|
|
|
|
try:
|
|
closed = await close_pr(conn, pr_num,
|
|
last_error=f"stale: no claims after {STALE_THRESHOLD_MINUTES} min")
|
|
if not closed:
|
|
stale_errors += 1
|
|
logger.warning(
|
|
"Failed to close stale extraction PR #%d (%s, %s)",
|
|
pr_num, source_path, pr["branch"],
|
|
)
|
|
continue
|
|
|
|
db.audit(
|
|
conn,
|
|
"watchdog",
|
|
"stale_pr_closed",
|
|
json.dumps({
|
|
"pr": pr_num,
|
|
"branch": pr["branch"],
|
|
"source": source_path,
|
|
"open_minutes": STALE_THRESHOLD_MINUTES,
|
|
}),
|
|
)
|
|
stale_closed += 1
|
|
logger.info(
|
|
"WATCHDOG: closed stale extraction PR #%d (no claims after %d min): %s",
|
|
pr_num, STALE_THRESHOLD_MINUTES, source_path,
|
|
)
|
|
|
|
except Exception as e:
|
|
stale_errors += 1
|
|
logger.warning(
|
|
"Stale PR close exception for #%d: %s",
|
|
pr_num, e,
|
|
)
|
|
|
|
return stale_closed, stale_errors
|