fix: re-extraction loop runs even when queue is empty
Some checks are pending
CI / lint-and-test (push) Waiting to run
Some checks are pending
CI / lint-and-test (push) Waiting to run
The re-extraction check was below an early return that fires when unprocessed queue is empty. Sources in needs_reextraction state were never picked up unless new sources happened to arrive simultaneously. Move re-extraction query above the gate so both paths run independently. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4c3ce265e4
commit
c763c99910
1 changed files with 13 additions and 10 deletions
|
|
@ -776,10 +776,21 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
if skipped:
|
if skipped:
|
||||||
logger.info("Skipped %d source(s) with existing open PRs", skipped)
|
logger.info("Skipped %d source(s) with existing open PRs", skipped)
|
||||||
|
|
||||||
if not unprocessed:
|
# ── Check for re-extraction sources (must run even when queue is empty) ──
|
||||||
|
reextract_rows = conn.execute(
|
||||||
|
"""SELECT path, feedback FROM sources
|
||||||
|
WHERE status = 'needs_reextraction' AND feedback IS NOT NULL
|
||||||
|
ORDER BY updated_at ASC LIMIT ?""",
|
||||||
|
(max(1, MAX_SOURCES - len(unprocessed)),),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not unprocessed and not reextract_rows:
|
||||||
return 0, 0
|
return 0, 0
|
||||||
|
|
||||||
|
if unprocessed:
|
||||||
logger.info("Extract cycle: %d unprocessed source(s) found, processing up to %d", len(unprocessed), MAX_SOURCES)
|
logger.info("Extract cycle: %d unprocessed source(s) found, processing up to %d", len(unprocessed), MAX_SOURCES)
|
||||||
|
if reextract_rows:
|
||||||
|
logger.info("Extract cycle: %d source(s) queued for re-extraction", len(reextract_rows))
|
||||||
|
|
||||||
# Load existing claims for dedup
|
# Load existing claims for dedup
|
||||||
existing_claims = load_existing_claims_from_repo(str(main))
|
existing_claims = load_existing_claims_from_repo(str(main))
|
||||||
|
|
@ -792,14 +803,6 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
total_ok = 0
|
total_ok = 0
|
||||||
total_err = 0
|
total_err = 0
|
||||||
|
|
||||||
# ── Re-extraction: pick up sources that failed eval and have feedback ──
|
|
||||||
reextract_rows = conn.execute(
|
|
||||||
"""SELECT path, feedback FROM sources
|
|
||||||
WHERE status = 'needs_reextraction' AND feedback IS NOT NULL
|
|
||||||
ORDER BY updated_at ASC LIMIT ?""",
|
|
||||||
(max(1, MAX_SOURCES - len(unprocessed)),),
|
|
||||||
).fetchall()
|
|
||||||
|
|
||||||
for row in reextract_rows:
|
for row in reextract_rows:
|
||||||
reex_path = row["path"]
|
reex_path = row["path"]
|
||||||
# Source was archived — read from archive location
|
# Source was archived — read from archive location
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue