fix: re-extraction loop runs even when queue is empty
Some checks are pending
CI / lint-and-test (push) Waiting to run

The re-extraction check was below an early return that fires when
unprocessed queue is empty. Sources in needs_reextraction state were
never picked up unless new sources happened to arrive simultaneously.
Move re-extraction query above the gate so both paths run independently.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
m3taversal 2026-04-16 14:04:49 +01:00
parent 4c3ce265e4
commit c763c99910

View file

@ -776,10 +776,21 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
if skipped:
logger.info("Skipped %d source(s) with existing open PRs", skipped)
if not unprocessed:
# ── Check for re-extraction sources (must run even when queue is empty) ──
reextract_rows = conn.execute(
"""SELECT path, feedback FROM sources
WHERE status = 'needs_reextraction' AND feedback IS NOT NULL
ORDER BY updated_at ASC LIMIT ?""",
(max(1, MAX_SOURCES - len(unprocessed)),),
).fetchall()
if not unprocessed and not reextract_rows:
return 0, 0
if unprocessed:
logger.info("Extract cycle: %d unprocessed source(s) found, processing up to %d", len(unprocessed), MAX_SOURCES)
if reextract_rows:
logger.info("Extract cycle: %d source(s) queued for re-extraction", len(reextract_rows))
# Load existing claims for dedup
existing_claims = load_existing_claims_from_repo(str(main))
@ -792,14 +803,6 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
total_ok = 0
total_err = 0
# ── Re-extraction: pick up sources that failed eval and have feedback ──
reextract_rows = conn.execute(
"""SELECT path, feedback FROM sources
WHERE status = 'needs_reextraction' AND feedback IS NOT NULL
ORDER BY updated_at ASC LIMIT ?""",
(max(1, MAX_SOURCES - len(unprocessed)),),
).fetchall()
for row in reextract_rows:
reex_path = row["path"]
# Source was archived — read from archive location