feat(eval): auto-close near-duplicate PRs when merged sibling exists

Prevents Apr 22 runaway-damage pattern (44 open PRs manually bulk-closed)
where a source extracted 20+ times before the cooldown gate landed, each
leaving an orphan 'open' PR after eval correctly rejected as near-duplicate.

Gate fires in dispose_rejected_pr before attempt-count branches:
  all_issues == ["near_duplicate"]  (exact match — compound carries signal)
  AND sibling PR exists with same source_path in status='merged'
  AND diff contains "new file mode"  (not enrichment-only)
  → close on Forgejo + DB with audit, post explanation comment.

Ganymede review — 5 must-fix/warnings applied + 1 must-add:
- Exact match on single-issue near_duplicate (compound rejections preserved)
- Enrichment guard via diff scan (eval_parse regex can flag enrichment prose)
- 10s timeout on get_pr_diff — conservative fallback on Forgejo wedge
- Forgejo comment with canned explanation (best-effort, try/except)
- Partial index idx_prs_source_path + migration v23
- Explicit p1.source_path IS NOT NULL in WHERE

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
m3taversal 2026-04-23 11:17:29 +01:00
parent a053a8ebf9
commit 33c17f87a8
2 changed files with 97 additions and 2 deletions

View file

@ -9,7 +9,7 @@ from . import config
logger = logging.getLogger("pipeline.db")
SCHEMA_VERSION = 22
SCHEMA_VERSION = 23
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS schema_version (
@ -157,6 +157,7 @@ CREATE TABLE IF NOT EXISTS response_audit (
CREATE INDEX IF NOT EXISTS idx_sources_status ON sources(status);
CREATE INDEX IF NOT EXISTS idx_prs_status ON prs(status);
CREATE INDEX IF NOT EXISTS idx_prs_domain ON prs(domain);
CREATE INDEX IF NOT EXISTS idx_prs_source_path ON prs(source_path) WHERE source_path IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_costs_date ON costs(date);
CREATE INDEX IF NOT EXISTS idx_audit_stage ON audit_log(stage);
CREATE INDEX IF NOT EXISTS idx_response_audit_ts ON response_audit(timestamp);
@ -617,6 +618,13 @@ def migrate(conn: sqlite3.Connection):
conn.commit()
logger.info("Migration v22: added source_channel to prs + backfilled from branch prefix")
if current < 23:
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_prs_source_path ON prs(source_path) WHERE source_path IS NOT NULL"
)
conn.commit()
logger.info("Migration v23: added idx_prs_source_path for auto-close dedup lookup")
if current < SCHEMA_VERSION:
conn.execute(
"INSERT OR REPLACE INTO schema_version (version) VALUES (?)",

View file

@ -11,13 +11,14 @@ All functions are async (Forgejo API calls). Dependencies: forgejo, db, config,
pr_state, feedback, eval_parse.
"""
import asyncio
import json
import logging
from . import config, db
from .eval_parse import classify_issues
from .feedback import format_rejection_comment
from .forgejo import api as forgejo_api, get_agent_token, repo_path
from .forgejo import api as forgejo_api, get_agent_token, get_pr_diff, repo_path
from .github_feedback import on_closed, on_eval_complete
from .pr_state import close_pr
@ -114,12 +115,98 @@ async def terminate_pr(conn, pr_number: int, reason: str):
async def dispose_rejected_pr(conn, pr_number: int, eval_attempts: int, all_issues: list[str]):
"""Disposition logic for rejected PRs on attempt 2+.
Auto-close gate (all attempts): near-duplicate of an already-merged PR for
the same source close immediately. Avoids the Apr 22 runaway-damage
pattern where a source extracted 20+ times in a short window produced
dozens of open PRs that all had to be closed manually.
Attempt 1: normal back to open, wait for fix.
Attempt 2: check issue classification.
- Mechanical only: keep open for one more attempt (auto-fix future).
- Substantive or mixed: close PR, requeue source.
Attempt 3+: terminal.
"""
# Auto-close near-duplicate when a merged sibling for the same source exists.
# Runs before the attempt-count branches so it catches the common runaway
# case on attempt 1 instead of waiting for attempt 2's terminate path.
#
# Exact-match requirement (Ganymede review): compound rejections like
# ["near_duplicate", "factual_discrepancy"] carry signal about the merged
# sibling being wrong or limited — we want humans to see those. Only the
# pure single-issue case is safe to auto-close.
if all_issues == ["near_duplicate"]:
existing_merged = conn.execute(
"""SELECT p2.number, p1.source_path FROM prs p1
JOIN prs p2 ON p2.source_path = p1.source_path
WHERE p1.number = ?
AND p1.source_path IS NOT NULL
AND p2.number != p1.number
AND p2.status = 'merged'
LIMIT 1""",
(pr_number,),
).fetchone()
if existing_merged:
sibling = existing_merged[0]
source_path = existing_merged[1]
# Enrichment guard: LLM reviewers can flag enrichment prose as
# "redundant" via eval_parse regex, tagging near_duplicate even
# though validate.py's structural check only fires on NEW files.
# If the PR only MODIFIES existing files (no "new file mode" in
# diff), it's an enrichment — skip auto-close so a human reviews.
#
# 10s timeout bounds damage when Forgejo is wedged (Apr 22 incident:
# hung for 2.5h). Conservative fallback: skip auto-close on any
# failure — fall through to normal rejection path.
try:
diff = await asyncio.wait_for(get_pr_diff(pr_number), timeout=10)
except (asyncio.TimeoutError, Exception):
logger.warning(
"PR #%d: diff fetch failed/timed out for near-dup guard — skipping auto-close",
pr_number, exc_info=True,
)
diff = None
if diff is None:
# Conservative: fall through to attempt-count branches below
pass
elif diff and "new file mode" not in diff:
logger.info(
"PR #%d: near_duplicate but modifies-only (enrichment) — skipping auto-close",
pr_number,
)
else:
logger.info(
"PR #%d: auto-closing near-duplicate of merged PR #%d (same source)",
pr_number, sibling,
)
# Post a brief explanation before closing (best-effort — non-fatal)
try:
await forgejo_api(
"POST",
repo_path(f"issues/{pr_number}/comments"),
{"body": (
f"Auto-closed: near-duplicate of already-merged PR "
f"#{sibling} (same source: `{source_path}`)."
)},
)
except Exception:
logger.debug("PR #%d: auto-close comment failed (non-fatal)", pr_number, exc_info=True)
await close_pr(
conn, pr_number,
last_error=f"auto_closed_near_duplicate: merged sibling #{sibling}",
)
db.audit(
conn, "evaluate", "auto_closed_near_duplicate",
json.dumps({
"pr": pr_number,
"merged_sibling": sibling,
"source_path": source_path,
"eval_attempts": eval_attempts,
}),
)
return
if eval_attempts < 2:
# Attempt 1: post structured feedback so agent learns, but don't close
if all_issues: