diff --git a/lib/extract.py b/lib/extract.py index 17d3e7a..1da8515 100644 --- a/lib/extract.py +++ b/lib/extract.py @@ -776,10 +776,21 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]: if skipped: logger.info("Skipped %d source(s) with existing open PRs", skipped) - if not unprocessed: + # ── Check for re-extraction sources (must run even when queue is empty) ── + reextract_rows = conn.execute( + """SELECT path, feedback FROM sources + WHERE status = 'needs_reextraction' AND feedback IS NOT NULL + ORDER BY updated_at ASC LIMIT ?""", + (max(1, MAX_SOURCES - len(unprocessed)),), + ).fetchall() + + if not unprocessed and not reextract_rows: return 0, 0 - logger.info("Extract cycle: %d unprocessed source(s) found, processing up to %d", len(unprocessed), MAX_SOURCES) + if unprocessed: + logger.info("Extract cycle: %d unprocessed source(s) found, processing up to %d", len(unprocessed), MAX_SOURCES) + if reextract_rows: + logger.info("Extract cycle: %d source(s) queued for re-extraction", len(reextract_rows)) # Load existing claims for dedup existing_claims = load_existing_claims_from_repo(str(main)) @@ -792,14 +803,6 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]: total_ok = 0 total_err = 0 - # ── Re-extraction: pick up sources that failed eval and have feedback ── - reextract_rows = conn.execute( - """SELECT path, feedback FROM sources - WHERE status = 'needs_reextraction' AND feedback IS NOT NULL - ORDER BY updated_at ASC LIMIT ?""", - (max(1, MAX_SOURCES - len(unprocessed)),), - ).fetchall() - for row in reextract_rows: reex_path = row["path"] # Source was archived — read from archive location