diff --git a/ops/extract-cron.sh b/ops/extract-cron.sh index 53827cf..a08789d 100755 --- a/ops/extract-cron.sh +++ b/ops/extract-cron.sh @@ -2,10 +2,22 @@ # Extract claims from unprocessed sources in inbox/archive/ # Runs via cron on VPS every 15 minutes. # +# Concurrency model: +# - Lockfile prevents overlapping runs +# - MAX_SOURCES=5 per cycle (works through backlog over multiple runs) +# - Sequential processing (one source at a time) +# - 50 sources landing at once = ~10 cron cycles to clear, not 50 parallel agents +# +# Domain routing: +# - Reads domain: field from source frontmatter +# - Maps to the domain agent (rio, clay, theseus, vida, astra, leo) +# - Runs extraction AS that agent — their territory, their extraction +# - Skips sources with status: processing (agent handling it themselves) +# # Flow: # 1. Pull latest main -# 2. Find sources with status: unprocessed -# 3. For each: run Claude headless to extract claims +# 2. Find sources with status: unprocessed (skip processing/processed/null-result) +# 3. For each: run Claude headless to extract claims as the domain agent # 4. Commit extractions, push, open PR # 5. Update source status to processed #