diff --git a/lib/substantive_fixer.py b/lib/substantive_fixer.py index 4772944..6280e55 100644 --- a/lib/substantive_fixer.py +++ b/lib/substantive_fixer.py @@ -522,53 +522,53 @@ async def substantive_fix_cycle(conn, max_workers=None) -> tuple[int, int]: Finds PRs with substantive issue tags that haven't exceeded fix budget. Processes up to 3 per cycle (Rhea: 180s interval, don't overwhelm eval). """ + # Build the actionable-tag list from the routing constants so adding a new + # tag to FIXABLE_TAGS / CONVERTIBLE_TAGS / UNFIXABLE_TAGS auto-updates the + # SELECT filter — no two-place edit footgun. + actionable_tags = sorted(FIXABLE_TAGS | CONVERTIBLE_TAGS | UNFIXABLE_TAGS) + placeholders = ",".join(["?"] * len(actionable_tags)) + + # Push the actionable-tag filter into SQL (was a post-fetch Python loop). + # The old shape selected the 3 oldest request_changes PRs and then dropped + # ones without actionable tags, so empty-eval_issues rows occupied LIMIT-3 + # forever (head-of-line). Now LIMIT-3 always returns 3 actionable rows. + # Reaper handles the empty-tag PRs after their 24h cooldown. rows = conn.execute( - """SELECT number, eval_issues FROM prs + f"""SELECT number, eval_issues FROM prs WHERE status = 'open' AND tier0_pass = 1 AND (domain_verdict = 'request_changes' OR leo_verdict = 'request_changes') AND COALESCE(fix_attempts, 0) < ? AND (last_attempt IS NULL OR last_attempt < datetime('now', '-3 minutes')) + AND EXISTS ( + SELECT 1 FROM json_each(eval_issues) + WHERE value IN ({placeholders}) + ) ORDER BY created_at ASC LIMIT 3""", - (MAX_SUBSTANTIVE_FIXES + config.MAX_FIX_ATTEMPTS,), # Total budget: mechanical + substantive + (MAX_SUBSTANTIVE_FIXES + config.MAX_FIX_ATTEMPTS, *actionable_tags), ).fetchall() if not rows: return 0, 0 - # Filter to only PRs with substantive issues (not just mechanical) + # Defense-in-depth: corrupt eval_issues JSON shouldn't reach here (json_each + # would error in the SELECT and SQLite would skip the row), but the WARN log + # stays so we catch any edge case where a row's JSON parses for json_each + # but not for json.loads (different parsers, technically). substantive_rows = [] - skipped_no_tags = [] for row in rows: try: - issues = json.loads(row["eval_issues"] or "[]") + json.loads(row["eval_issues"] or "[]") except (json.JSONDecodeError, TypeError): - # Corrupt JSON in eval_issues is abnormal (post-merge column drift, - # hand-edited row, partial write during crash). WARN so ops can chase - # the upstream column-write path. Without this, the row drops out of - # both substantive_rows and skipped_no_tags — the third silent path. logger.warning( "PR #%d: corrupt eval_issues JSON — skipping in substantive fix cycle", row["number"], ) continue - if set(issues) & (FIXABLE_TAGS | CONVERTIBLE_TAGS | UNFIXABLE_TAGS): - substantive_rows.append(row) - else: - skipped_no_tags.append((row["number"], issues)) + substantive_rows.append(row) if not substantive_rows: - # Visibility for the LIMIT-3 head-of-line block: if the oldest - # candidates have no fixer-actionable tags (e.g. eval_issues=[], - # broken_wiki_links only), the cycle silently returns 0 — and the - # next cycle picks the same head-of-line, forever. Log the eval_issues - # of skipped candidates so the journal makes the block visible. - if skipped_no_tags: - logger.info( - "Substantive fix cycle: 0 actionable from %d candidate(s) — head-of-line: %s", - len(rows), skipped_no_tags, - ) return 0, 0 fixed = 0