fix: re-extraction loop runs even when queue is empty
Some checks are pending
CI / lint-and-test (push) Waiting to run
Some checks are pending
CI / lint-and-test (push) Waiting to run
The re-extraction check was below an early return that fires when unprocessed queue is empty. Sources in needs_reextraction state were never picked up unless new sources happened to arrive simultaneously. Move re-extraction query above the gate so both paths run independently. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4c3ce265e4
commit
c763c99910
1 changed files with 13 additions and 10 deletions
|
|
@ -776,10 +776,21 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
|
|||
if skipped:
|
||||
logger.info("Skipped %d source(s) with existing open PRs", skipped)
|
||||
|
||||
if not unprocessed:
|
||||
# ── Check for re-extraction sources (must run even when queue is empty) ──
|
||||
reextract_rows = conn.execute(
|
||||
"""SELECT path, feedback FROM sources
|
||||
WHERE status = 'needs_reextraction' AND feedback IS NOT NULL
|
||||
ORDER BY updated_at ASC LIMIT ?""",
|
||||
(max(1, MAX_SOURCES - len(unprocessed)),),
|
||||
).fetchall()
|
||||
|
||||
if not unprocessed and not reextract_rows:
|
||||
return 0, 0
|
||||
|
||||
if unprocessed:
|
||||
logger.info("Extract cycle: %d unprocessed source(s) found, processing up to %d", len(unprocessed), MAX_SOURCES)
|
||||
if reextract_rows:
|
||||
logger.info("Extract cycle: %d source(s) queued for re-extraction", len(reextract_rows))
|
||||
|
||||
# Load existing claims for dedup
|
||||
existing_claims = load_existing_claims_from_repo(str(main))
|
||||
|
|
@ -792,14 +803,6 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
|
|||
total_ok = 0
|
||||
total_err = 0
|
||||
|
||||
# ── Re-extraction: pick up sources that failed eval and have feedback ──
|
||||
reextract_rows = conn.execute(
|
||||
"""SELECT path, feedback FROM sources
|
||||
WHERE status = 'needs_reextraction' AND feedback IS NOT NULL
|
||||
ORDER BY updated_at ASC LIMIT ?""",
|
||||
(max(1, MAX_SOURCES - len(unprocessed)),),
|
||||
).fetchall()
|
||||
|
||||
for row in reextract_rows:
|
||||
reex_path = row["path"]
|
||||
# Source was archived — read from archive location
|
||||
|
|
|
|||
Loading…
Reference in a new issue