diff --git a/.gitignore b/.gitignore index e062cc7fe..3fe9a7869 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ .DS_Store *.DS_Store ops/sessions/ -ops/__pycache__/ +__pycache__/ **/.extraction-debug/ pipeline.db *.excalidraw diff --git a/ops/auto-deploy.sh b/ops/auto-deploy.sh index a96a3d7fa..fa57b762f 100755 --- a/ops/auto-deploy.sh +++ b/ops/auto-deploy.sh @@ -69,7 +69,7 @@ RSYNC_FLAGS="-az --exclude='__pycache__' --exclude='*.pyc' --exclude='*.bak*'" rsync $RSYNC_FLAGS ops/pipeline-v2/lib/ "$PIPELINE_DIR/lib/" -for f in teleo-pipeline.py reweave.py batch-extract-50.sh; do +for f in teleo-pipeline.py reweave.py; do [ -f "ops/pipeline-v2/$f" ] && rsync $RSYNC_FLAGS "ops/pipeline-v2/$f" "$PIPELINE_DIR/$f" done diff --git a/ops/deploy-manifest.md b/ops/deploy-manifest.md index a5a68bc85..92cb69946 100644 --- a/ops/deploy-manifest.md +++ b/ops/deploy-manifest.md @@ -36,7 +36,7 @@ Copy this into your PR description and fill it in: | File type | Example | Needs manifest? | |-----------|---------|-----------------| | Python application code | bot.py, app.py, alerting.py | Yes | -| Shell scripts on VPS | extract-cron.sh, evaluate-trigger.sh | Yes | +| Shell scripts on VPS | research-session.sh, auto-deploy.sh | Yes | | systemd service/timer files | teleo-bot.service | Yes | | Database migrations | ALTER TABLE, new tables | Yes | | HTML/CSS/JS served by app | dashboard.html, teleo-app | Yes | diff --git a/ops/deploy.sh b/ops/deploy.sh index 861ec9bfe..fa7a091a5 100755 --- a/ops/deploy.sh +++ b/ops/deploy.sh @@ -66,7 +66,7 @@ rsync $RSYNC_FLAGS "$REPO_ROOT/ops/pipeline-v2/lib/" "$VPS_HOST:$VPS_PIPELINE/li echo "" echo "=== Pipeline top-level ===" -for f in teleo-pipeline.py reweave.py batch-extract-50.sh; do +for f in teleo-pipeline.py reweave.py; do [ -f "$REPO_ROOT/ops/pipeline-v2/$f" ] || continue rsync $RSYNC_FLAGS "$REPO_ROOT/ops/pipeline-v2/$f" "$VPS_HOST:$VPS_PIPELINE/$f" done diff --git a/ops/evaluate-trigger.sh b/ops/evaluate-trigger.sh deleted file mode 100755 index 078fae861..000000000 --- a/ops/evaluate-trigger.sh +++ /dev/null @@ -1,621 +0,0 @@ -#!/usr/bin/env bash -# evaluate-trigger.sh — Find unreviewed PRs, run 2-agent review, auto-merge if approved. -# -# Reviews each PR with up to THREE agents: -# 1. Leo (evaluator) — quality gates, cross-domain connections, coherence -# 2. Domain agent — domain expertise, duplicate check, technical accuracy -# 3. Ganymede (code reviewer) — code quality, correctness, safety (code PRs only) -# -# Ganymede reviews any PR that touches code files (ops/, diagnostics/, .py, .sh, etc.) -# -# After all reviews, auto-merges if: -# - Leo's comment contains "**Verdict:** approve" -# - Domain agent's comment contains "**Verdict:** approve" (if applicable) -# - Ganymede's comment contains "**Verdict:** approve" (if code PR) -# - No territory violations (files outside proposer's domain) -# -# Usage: -# ./ops/evaluate-trigger.sh # review + auto-merge approved PRs -# ./ops/evaluate-trigger.sh 47 # review a specific PR by number -# ./ops/evaluate-trigger.sh --dry-run # show what would be reviewed, don't run -# ./ops/evaluate-trigger.sh --leo-only # skip domain agent, just run Leo -# ./ops/evaluate-trigger.sh --no-merge # review only, don't auto-merge (old behavior) -# -# Requirements: -# - claude CLI (claude -p for headless mode) -# - gh CLI authenticated with repo access -# - Run from the teleo-codex repo root -# -# Safety: -# - Lockfile prevents concurrent runs -# - Auto-merge requires ALL reviewers to approve + no territory violations -# - Each PR runs sequentially to avoid branch conflicts -# - Timeout: 20 minutes per agent per PR -# - Pre-flight checks: clean working tree, gh auth -# -# Verdict protocol: -# All agents use `gh pr comment` (NOT `gh pr review`) because all agents -# share the m3taversal GitHub account — `gh pr review --approve` fails -# when the PR author and reviewer are the same user. The merge check -# parses issue comments for structured verdict markers instead. - -set -euo pipefail - -# Allow nested Claude Code sessions (headless spawned from interactive) -unset CLAUDECODE 2>/dev/null || true - -REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" -cd "$REPO_ROOT" - -LOCKFILE="/tmp/evaluate-trigger.lock" -LOG_DIR="$REPO_ROOT/ops/sessions" -TIMEOUT_SECONDS=1200 -DRY_RUN=false -LEO_ONLY=false -NO_MERGE=false -SPECIFIC_PR="" - -# --- Code PR detection --- -# Returns "true" if the PR touches code files (ops/, diagnostics/, scripts, .py, .sh, .js, .html) -# These PRs need Ganymede code review in addition to Leo's quality review. -detect_code_pr() { - local pr_number="$1" - local files - - files=$(gh pr view "$pr_number" --json files --jq '.files[].path' 2>/dev/null || echo "") - - if echo "$files" | grep -qE "^ops/|\.py$|\.sh$|\.js$|\.html$|\.css$|\.json$"; then - echo "true" - else - echo "false" - fi -} - -# --- Domain routing map --- -# Maps branch prefix or domain directory to agent name and identity path -detect_domain_agent() { - local pr_number="$1" - local branch files domain agent - - branch=$(gh pr view "$pr_number" --json headRefName --jq '.headRefName' 2>/dev/null || echo "") - files=$(gh pr view "$pr_number" --json files --jq '.files[].path' 2>/dev/null || echo "") - - # Try branch prefix first - case "$branch" in - rio/*|*/internet-finance*) agent="rio"; domain="internet-finance" ;; - clay/*|*/entertainment*) agent="clay"; domain="entertainment" ;; - theseus/*|*/ai-alignment*) agent="theseus"; domain="ai-alignment" ;; - vida/*|*/health*) agent="vida"; domain="health" ;; - astra/*|*/space-development*) agent="astra"; domain="space-development" ;; - leo/*|*/grand-strategy*) agent="leo"; domain="grand-strategy" ;; - contrib/*) - # External contributor — detect domain from changed files (fall through to file check) - agent=""; domain="" - ;; - *) - agent=""; domain="" - ;; - esac - - # If no agent detected from branch prefix, check changed files - if [ -z "$agent" ]; then - if echo "$files" | grep -q "domains/internet-finance/"; then - agent="rio"; domain="internet-finance" - elif echo "$files" | grep -q "domains/entertainment/"; then - agent="clay"; domain="entertainment" - elif echo "$files" | grep -q "domains/ai-alignment/"; then - agent="theseus"; domain="ai-alignment" - elif echo "$files" | grep -q "domains/health/"; then - agent="vida"; domain="health" - elif echo "$files" | grep -q "domains/space-development/"; then - agent="astra"; domain="space-development" - fi - fi - - echo "$agent $domain" -} - -# --- Parse arguments --- -for arg in "$@"; do - case "$arg" in - --dry-run) DRY_RUN=true ;; - --leo-only) LEO_ONLY=true ;; - --no-merge) NO_MERGE=true ;; - [0-9]*) SPECIFIC_PR="$arg" ;; - --help|-h) - head -23 "$0" | tail -21 - exit 0 - ;; - *) - echo "Unknown argument: $arg" - exit 1 - ;; - esac -done - -# --- Pre-flight checks --- -if ! gh auth status >/dev/null 2>&1; then - echo "ERROR: gh CLI not authenticated. Run 'gh auth login' first." - exit 1 -fi - -if ! command -v claude >/dev/null 2>&1; then - echo "ERROR: claude CLI not found. Install it first." - exit 1 -fi - -# Check for dirty working tree (ignore ops/, .claude/, .github/ which may contain local-only files) -DIRTY_FILES=$(git status --porcelain | grep -v '^?? ops/' | grep -v '^ M ops/' | grep -v '^?? \.claude/' | grep -v '^ M \.claude/' | grep -v '^?? \.github/' | grep -v '^ M \.github/' || true) -if [ -n "$DIRTY_FILES" ]; then - echo "ERROR: Working tree is dirty. Clean up before running." - echo "$DIRTY_FILES" - exit 1 -fi - -# --- Lockfile (prevent concurrent runs) --- -if [ -f "$LOCKFILE" ]; then - LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") - if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then - echo "Another evaluate-trigger is running (PID $LOCK_PID). Exiting." - exit 1 - else - echo "Stale lockfile found. Removing." - rm -f "$LOCKFILE" - fi -fi -echo $$ > "$LOCKFILE" -trap 'rm -f "$LOCKFILE"' EXIT - -# --- Ensure log directory exists --- -mkdir -p "$LOG_DIR" - -# --- Find PRs to review --- -if [ -n "$SPECIFIC_PR" ]; then - PR_STATE=$(gh pr view "$SPECIFIC_PR" --json state --jq '.state' 2>/dev/null || echo "NOT_FOUND") - if [ "$PR_STATE" != "OPEN" ]; then - echo "PR #$SPECIFIC_PR is $PR_STATE (not OPEN). Reviewing anyway for testing." - fi - PRS_TO_REVIEW="$SPECIFIC_PR" -else - # NOTE: gh pr list silently returns empty in some worktree configs; use gh api instead - OPEN_PRS=$(gh api repos/:owner/:repo/pulls --jq '.[].number' 2>/dev/null || echo "") - - if [ -z "$OPEN_PRS" ]; then - echo "No open PRs found. Nothing to review." - exit 0 - fi - - PRS_TO_REVIEW="" - for pr in $OPEN_PRS; do - # Check if this PR already has a Leo verdict comment (avoid re-reviewing) - LEO_COMMENTED=$(gh pr view "$pr" --json comments \ - --jq '[.comments[] | select(.body | test("VERDICT:LEO:(APPROVE|REQUEST_CHANGES)"))] | length' 2>/dev/null || echo "0") - LAST_COMMIT_DATE=$(gh pr view "$pr" --json commits --jq '.commits[-1].committedDate' 2>/dev/null || echo "") - - if [ "$LEO_COMMENTED" = "0" ]; then - PRS_TO_REVIEW="$PRS_TO_REVIEW $pr" - else - # Check if new commits since last Leo review - LAST_LEO_DATE=$(gh pr view "$pr" --json comments \ - --jq '[.comments[] | select(.body | test("VERDICT:LEO:")) | .createdAt] | last' 2>/dev/null || echo "") - if [ -n "$LAST_COMMIT_DATE" ] && [ -n "$LAST_LEO_DATE" ] && [[ "$LAST_COMMIT_DATE" > "$LAST_LEO_DATE" ]]; then - echo "PR #$pr: New commits since last review. Queuing for re-review." - PRS_TO_REVIEW="$PRS_TO_REVIEW $pr" - else - echo "PR #$pr: Already reviewed. Skipping." - fi - fi - done - - PRS_TO_REVIEW=$(echo "$PRS_TO_REVIEW" | xargs) - - if [ -z "$PRS_TO_REVIEW" ]; then - echo "All open PRs are up to date. Nothing to do." - exit 0 - fi -fi - -echo "PRs to review: $PRS_TO_REVIEW" - -if [ "$DRY_RUN" = true ]; then - for pr in $PRS_TO_REVIEW; do - read -r agent domain <<< "$(detect_domain_agent "$pr")" - is_code=$(detect_code_pr "$pr") - reviewers="Leo + ${agent:-unknown} (${domain:-unknown domain})" - [ "$is_code" = "true" ] && reviewers="$reviewers + Ganymede (code)" - echo "[DRY RUN] PR #$pr — $reviewers" - done - exit 0 -fi - -# --- Run headless reviews on each PR --- -run_agent_review() { - local pr="$1" agent_name="$2" prompt="$3" model="$4" - local timestamp log_file review_file - - timestamp=$(date +%Y%m%d-%H%M%S) - log_file="$LOG_DIR/${agent_name}-review-pr${pr}-${timestamp}.log" - review_file="/tmp/${agent_name}-review-pr${pr}.md" - - echo " Running ${agent_name} (model: ${model})..." - echo " Log: $log_file" - - if perl -e "alarm $TIMEOUT_SECONDS; exec @ARGV" claude -p \ - --model "$model" \ - --allowedTools "Read,Write,Edit,Bash,Glob,Grep" \ - --permission-mode bypassPermissions \ - "$prompt" \ - > "$log_file" 2>&1; then - echo " ${agent_name}: Review posted." - rm -f "$review_file" - return 0 - else - local exit_code=$? - if [ "$exit_code" -eq 142 ] || [ "$exit_code" -eq 124 ]; then - echo " ${agent_name}: TIMEOUT after ${TIMEOUT_SECONDS}s." - else - echo " ${agent_name}: FAILED (exit code $exit_code)." - fi - rm -f "$review_file" - return 1 - fi -} - -# --- Territory violation check --- -# Verifies all changed files are within the proposer's expected territory -check_territory_violations() { - local pr_number="$1" - local branch files proposer violations - - branch=$(gh pr view "$pr_number" --json headRefName --jq '.headRefName' 2>/dev/null || echo "") - files=$(gh pr view "$pr_number" --json files --jq '.files[].path' 2>/dev/null || echo "") - - # Determine proposer from branch prefix - proposer=$(echo "$branch" | cut -d'/' -f1) - - # Map proposer to allowed directories - local allowed_domains="" - case "$proposer" in - rio) allowed_domains="domains/internet-finance/" ;; - clay) allowed_domains="domains/entertainment/" ;; - theseus) allowed_domains="domains/ai-alignment/" ;; - vida) allowed_domains="domains/health/" ;; - astra) allowed_domains="domains/space-development/" ;; - leo) allowed_domains="core/|foundations/" ;; - contrib) echo ""; return 0 ;; # External contributors — skip territory check - *) echo ""; return 0 ;; # Unknown proposer — skip check - esac - - # Check each file — allow inbox/archive/, agents/{proposer}/, schemas/, foundations/, and the agent's domain - violations="" - while IFS= read -r file; do - [ -z "$file" ] && continue - # Always allowed: inbox/archive, own agent dir, maps/, foundations/ (any agent can propose foundation claims) - if echo "$file" | grep -qE "^inbox/archive/|^agents/${proposer}/|^maps/|^foundations/"; then - continue - fi - # Check against allowed domain directories - if echo "$file" | grep -qE "^${allowed_domains}"; then - continue - fi - violations="${violations} - ${file}\n" - done <<< "$files" - - if [ -n "$violations" ]; then - echo -e "$violations" - else - echo "" - fi -} - -# --- Auto-merge check --- -# Parses issue comments for structured verdict markers. -# Verdict protocol: agents post `` or -# `` as HTML comments in their review. -# This is machine-parseable and invisible in the rendered comment. -check_merge_eligible() { - local pr_number="$1" - local domain_agent="$2" - local leo_passed="$3" - local is_code_pr="${4:-false}" - local ganymede_passed="${5:-true}" - - # Gate 1: Leo must have completed without timeout/error - if [ "$leo_passed" != "true" ]; then - echo "BLOCK: Leo review failed or timed out" - return 1 - fi - - # Gate 2: Check Leo's verdict from issue comments - local leo_verdict - leo_verdict=$(gh pr view "$pr_number" --json comments \ - --jq '[.comments[] | select(.body | test("VERDICT:LEO:")) | .body] | last' 2>/dev/null || echo "") - - if echo "$leo_verdict" | grep -q "VERDICT:LEO:APPROVE"; then - echo "Leo: APPROVED" - elif echo "$leo_verdict" | grep -q "VERDICT:LEO:REQUEST_CHANGES"; then - echo "BLOCK: Leo requested changes" - return 1 - else - echo "BLOCK: Could not find Leo's verdict marker in PR comments" - return 1 - fi - - # Gate 3: Check domain agent verdict (if applicable) - if [ -n "$domain_agent" ] && [ "$domain_agent" != "leo" ]; then - local domain_key - domain_key=$(echo "$domain_agent" | tr '[:lower:]' '[:upper:]') - local domain_verdict - domain_verdict=$(gh pr view "$pr_number" --json comments \ - --jq "[.comments[] | select(.body | test(\"VERDICT:${domain_key}:\")) | .body] | last" 2>/dev/null || echo "") - - if echo "$domain_verdict" | grep -q "VERDICT:${domain_key}:APPROVE"; then - echo "Domain agent ($domain_agent): APPROVED" - elif echo "$domain_verdict" | grep -q "VERDICT:${domain_key}:REQUEST_CHANGES"; then - echo "BLOCK: $domain_agent requested changes" - return 1 - else - echo "BLOCK: No verdict marker found for $domain_agent" - return 1 - fi - else - echo "Domain agent: N/A (leo-only or grand-strategy)" - fi - - # Gate 4: Ganymede code review (for code PRs) - if [ "$is_code_pr" = "true" ]; then - if [ "$ganymede_passed" != "true" ]; then - echo "BLOCK: Ganymede code review failed or timed out" - return 1 - fi - - local ganymede_verdict - ganymede_verdict=$(gh pr view "$pr_number" --json comments \ - --jq '[.comments[] | select(.body | test("VERDICT:GANYMEDE:")) | .body] | last' 2>/dev/null || echo "") - - if echo "$ganymede_verdict" | grep -q "VERDICT:GANYMEDE:APPROVE"; then - echo "Ganymede (code review): APPROVED" - elif echo "$ganymede_verdict" | grep -q "VERDICT:GANYMEDE:REQUEST_CHANGES"; then - echo "BLOCK: Ganymede requested code changes" - return 1 - else - echo "BLOCK: No verdict marker found for Ganymede code review" - return 1 - fi - fi - - # Gate 5: Territory violations - local violations - violations=$(check_territory_violations "$pr_number") - - if [ -n "$violations" ]; then - echo "BLOCK: Territory violations detected:" - echo -e "$violations" - return 1 - else - echo "Territory: clean" - fi - - return 0 -} - -REVIEWED=0 -FAILED=0 -MERGED=0 - -for pr in $PRS_TO_REVIEW; do - echo "" - echo "=== PR #$pr ===" - echo "Started: $(date)" - - # Detect which domain agent should review - read -r DOMAIN_AGENT DOMAIN <<< "$(detect_domain_agent "$pr")" - echo "Domain: ${DOMAIN:-unknown} | Agent: ${DOMAIN_AGENT:-none detected}" - - # --- Review 1: Leo (evaluator) --- - LEO_REVIEW_FILE="/tmp/leo-review-pr${pr}.md" - LEO_PROMPT="You are Leo. Read agents/leo/identity.md, agents/leo/beliefs.md, agents/leo/reasoning.md, and skills/evaluate.md. - -Review PR #${pr} on this repo. - -First, run: gh pr view ${pr} --json title,body,files,additions,deletions -Then checkout the PR branch: gh pr checkout ${pr} -Read every changed file completely. - -Before evaluating, scan the existing knowledge base for duplicate and contradiction checks: -- List claim files in the relevant domain directory (e.g., domains/${DOMAIN}/) -- Read titles to check for semantic duplicates -- Check for contradictions with existing claims in that domain and in foundations/ - -For each proposed claim, evaluate against these 11 quality criteria from CLAUDE.md: -1. Specificity — Is this specific enough to disagree with? -2. Evidence — Is there traceable evidence in the body? -3. Description quality — Does the description add info beyond the title? -4. Confidence calibration — Does the confidence level match the evidence? -5. Duplicate check — Does this already exist in the knowledge base? -6. Contradiction check — Does this contradict an existing claim? If so, is the contradiction explicit? -7. Value add — Does this genuinely expand what the knowledge base knows? -8. Wiki links — Do all [[links]] point to real files? -9. Scope qualification — Does the claim specify structural vs functional, micro vs macro, causal vs correlational? -10. Universal quantifier check — Does the title use unwarranted universals (all, always, never, the only)? -11. Counter-evidence acknowledgment — For likely or higher: is opposing evidence acknowledged? - -Also check: -- Source archive updated correctly (status field) -- Commit messages follow conventions -- Files are in the correct domain directory -- Cross-domain connections that the proposer may have missed - -Write your complete review to ${LEO_REVIEW_FILE} - -CRITICAL — Verdict format: Your review MUST end with exactly one of these verdict markers (as an HTML comment on its own line): - - - -Then post the review as an issue comment: - gh pr comment ${pr} --body-file ${LEO_REVIEW_FILE} - -IMPORTANT: Use 'gh pr comment' NOT 'gh pr review'. We use a shared GitHub account so gh pr review --approve fails. -DO NOT merge — the orchestrator handles merge decisions after all reviews are posted. -Work autonomously. Do not ask for confirmation." - - if run_agent_review "$pr" "leo" "$LEO_PROMPT" "opus"; then - LEO_PASSED=true - else - LEO_PASSED=false - fi - - # Return to main between reviews - git checkout main 2>/dev/null || git checkout -f main - PR_BRANCH=$(gh pr view "$pr" --json headRefName --jq '.headRefName' 2>/dev/null || echo "") - [ -n "$PR_BRANCH" ] && git branch -D "$PR_BRANCH" 2>/dev/null || true - - # --- Review 2: Domain agent --- - if [ "$LEO_ONLY" = true ]; then - echo " Skipping domain agent review (--leo-only)." - elif [ -z "$DOMAIN_AGENT" ]; then - echo " Could not detect domain agent. Skipping domain review." - elif [ "$DOMAIN_AGENT" = "leo" ]; then - echo " Domain is grand-strategy (Leo's territory). Single review sufficient." - else - DOMAIN_REVIEW_FILE="/tmp/${DOMAIN_AGENT}-review-pr${pr}.md" - AGENT_NAME_UPPER=$(echo "${DOMAIN_AGENT}" | awk '{print toupper(substr($0,1,1)) substr($0,2)}') - AGENT_KEY_UPPER=$(echo "${DOMAIN_AGENT}" | tr '[:lower:]' '[:upper:]') - DOMAIN_PROMPT="You are ${AGENT_NAME_UPPER}. Read agents/${DOMAIN_AGENT}/identity.md, agents/${DOMAIN_AGENT}/beliefs.md, and skills/evaluate.md. - -You are reviewing PR #${pr} as the domain expert for ${DOMAIN}. - -First, run: gh pr view ${pr} --json title,body,files,additions,deletions -Then checkout the PR branch: gh pr checkout ${pr} -Read every changed file completely. - -Your review focuses on DOMAIN EXPERTISE — things only a ${DOMAIN} specialist would catch: - -1. **Technical accuracy** — Are the claims factually correct within the ${DOMAIN} domain? -2. **Domain duplicates** — Do any claims duplicate existing knowledge in domains/${DOMAIN}/? - Scan the directory and read titles carefully. -3. **Missing context** — What important nuance from the ${DOMAIN} domain is the claim missing? -4. **Belief impact** — Do any claims affect your current beliefs? Read agents/${DOMAIN_AGENT}/beliefs.md - and flag if any belief needs updating. -5. **Connections** — What existing claims in your domain should be wiki-linked? -6. **Confidence calibration** — From your domain expertise, is the confidence level right? - -Write your review to ${DOMAIN_REVIEW_FILE} - -CRITICAL — Verdict format: Your review MUST end with exactly one of these verdict markers (as an HTML comment on its own line): - - - -Then post the review as an issue comment: - gh pr comment ${pr} --body-file ${DOMAIN_REVIEW_FILE} - -IMPORTANT: Use 'gh pr comment' NOT 'gh pr review'. We use a shared GitHub account so gh pr review --approve fails. -Sign your review as ${AGENT_NAME_UPPER} (domain reviewer for ${DOMAIN}). -DO NOT duplicate Leo's quality gate checks — he covers those. -DO NOT merge — the orchestrator handles merge decisions after all reviews are posted. -Work autonomously. Do not ask for confirmation." - - run_agent_review "$pr" "$DOMAIN_AGENT" "$DOMAIN_PROMPT" "sonnet" - - # Clean up branch again - git checkout main 2>/dev/null || git checkout -f main - [ -n "$PR_BRANCH" ] && git branch -D "$PR_BRANCH" 2>/dev/null || true - fi - - # --- Review 3: Ganymede code review (for PRs touching code files) --- - IS_CODE_PR=$(detect_code_pr "$pr") - GANYMEDE_PASSED=true - - if [ "$IS_CODE_PR" = "true" ] && [ "$LEO_ONLY" != true ]; then - echo " Code files detected — running Ganymede code review." - GANYMEDE_REVIEW_FILE="/tmp/ganymede-review-pr${pr}.md" - GANYMEDE_PROMPT="You are Ganymede, the code quality reviewer for the Teleo collective. - -Review PR #${pr} for code quality, correctness, and safety. - -First, run: gh pr view ${pr} --json title,body,files,additions,deletions -Then checkout the PR branch: gh pr checkout ${pr} -Read every changed file completely. Also read the existing versions of modified files on main for comparison. - -Your review focuses on CODE QUALITY — things a code reviewer catches: - -1. **Correctness** — Does the code do what it claims? Are there logic errors, off-by-one bugs, or unhandled edge cases? -2. **Safety** — Any security issues? SQL injection, path traversal, unchecked inputs, secrets in code? -3. **Breaking changes** — Does this change file formats, API responses, DB schemas, or config structures that other agents depend on? If so, is there a migration path? -4. **Error handling** — Will failures be visible or silent? Are there bare excepts, missing error messages, or swallowed exceptions? -5. **Integration** — Does the code work with the existing system? Are imports correct, paths valid, dependencies present? -6. **Simplicity** — Is this more complex than it needs to be? Could it be simpler? - -Also check: -- systemd ReadWritePaths if new file write paths are introduced -- Path format consistency (absolute vs relative) -- Concurrent edit risk on shared files (app.py, bot.py, etc.) - -Write your review to ${GANYMEDE_REVIEW_FILE} - -CRITICAL — Verdict format: Your review MUST end with exactly one of these verdict markers (as an HTML comment on its own line): - - - -Then post the review as an issue comment: - gh pr comment ${pr} --body-file ${GANYMEDE_REVIEW_FILE} - -IMPORTANT: Use 'gh pr comment' NOT 'gh pr review'. We use a shared GitHub account so gh pr review --approve fails. -Sign your review as Ganymede (code reviewer). -DO NOT duplicate Leo's knowledge quality checks — he covers those. You cover code. -DO NOT merge — the orchestrator handles merge decisions after all reviews are posted. -Work autonomously. Do not ask for confirmation." - - if run_agent_review "$pr" "ganymede" "$GANYMEDE_PROMPT" "sonnet"; then - GANYMEDE_PASSED=true - else - GANYMEDE_PASSED=false - fi - - # Clean up branch - git checkout main 2>/dev/null || git checkout -f main - [ -n "$PR_BRANCH" ] && git branch -D "$PR_BRANCH" 2>/dev/null || true - elif [ "$IS_CODE_PR" = "true" ] && [ "$LEO_ONLY" = true ]; then - echo " Code files detected but skipping Ganymede review (--leo-only)." - fi - - if [ "$LEO_PASSED" = true ]; then - REVIEWED=$((REVIEWED + 1)) - else - FAILED=$((FAILED + 1)) - fi - - # --- Auto-merge decision --- - if [ "$NO_MERGE" = true ]; then - echo " Auto-merge: skipped (--no-merge)" - elif [ "$LEO_PASSED" != "true" ]; then - echo " Auto-merge: skipped (Leo review failed)" - else - echo "" - echo " --- Merge eligibility check ---" - MERGE_LOG=$(check_merge_eligible "$pr" "$DOMAIN_AGENT" "$LEO_PASSED" "$IS_CODE_PR" "$GANYMEDE_PASSED") - MERGE_RESULT=$? - echo "$MERGE_LOG" | sed 's/^/ /' - - if [ "$MERGE_RESULT" -eq 0 ]; then - echo " Auto-merge: ALL GATES PASSED — merging PR #$pr" - if gh pr merge "$pr" --squash 2>&1; then - echo " PR #$pr: MERGED successfully." - MERGED=$((MERGED + 1)) - else - echo " PR #$pr: Merge FAILED. May need manual intervention." - fi - else - echo " Auto-merge: BLOCKED — see reasons above" - fi - fi - - echo "Finished: $(date)" -done - -echo "" -echo "=== Summary ===" -echo "Reviewed: $REVIEWED" -echo "Failed: $FAILED" -echo "Merged: $MERGED" -echo "Logs: $LOG_DIR" diff --git a/ops/extract-cron.sh b/ops/extract-cron.sh deleted file mode 100755 index a08789d82..000000000 --- a/ops/extract-cron.sh +++ /dev/null @@ -1,179 +0,0 @@ -#!/bin/bash -# Extract claims from unprocessed sources in inbox/archive/ -# Runs via cron on VPS every 15 minutes. -# -# Concurrency model: -# - Lockfile prevents overlapping runs -# - MAX_SOURCES=5 per cycle (works through backlog over multiple runs) -# - Sequential processing (one source at a time) -# - 50 sources landing at once = ~10 cron cycles to clear, not 50 parallel agents -# -# Domain routing: -# - Reads domain: field from source frontmatter -# - Maps to the domain agent (rio, clay, theseus, vida, astra, leo) -# - Runs extraction AS that agent — their territory, their extraction -# - Skips sources with status: processing (agent handling it themselves) -# -# Flow: -# 1. Pull latest main -# 2. Find sources with status: unprocessed (skip processing/processed/null-result) -# 3. For each: run Claude headless to extract claims as the domain agent -# 4. Commit extractions, push, open PR -# 5. Update source status to processed -# -# The eval pipeline (webhook.py) handles review and merge separately. - -set -euo pipefail - -REPO_DIR="/opt/teleo-eval/workspaces/extract" -REPO_URL="http://m3taversal:$(cat /opt/teleo-eval/secrets/forgejo-admin-token)@localhost:3000/teleo/teleo-codex.git" -CLAUDE_BIN="/home/teleo/.local/bin/claude" -LOG_DIR="/opt/teleo-eval/logs" -LOG="$LOG_DIR/extract-cron.log" -LOCKFILE="/tmp/extract-cron.lock" -MAX_SOURCES=5 # Process at most 5 sources per run to limit cost - -log() { echo "[$(date -Iseconds)] $*" >> "$LOG"; } - -# --- Lock --- -if [ -f "$LOCKFILE" ]; then - pid=$(cat "$LOCKFILE" 2>/dev/null) - if kill -0 "$pid" 2>/dev/null; then - log "SKIP: already running (pid $pid)" - exit 0 - fi - log "WARN: stale lockfile, removing" - rm -f "$LOCKFILE" -fi -echo $$ > "$LOCKFILE" -trap 'rm -f "$LOCKFILE"' EXIT - -# --- Ensure repo clone --- -if [ ! -d "$REPO_DIR/.git" ]; then - log "Cloning repo..." - git clone "$REPO_URL" "$REPO_DIR" >> "$LOG" 2>&1 -fi - -cd "$REPO_DIR" - -# --- Pull latest main --- -git checkout main >> "$LOG" 2>&1 -git pull --rebase >> "$LOG" 2>&1 - -# --- Find unprocessed sources --- -UNPROCESSED=$(grep -rl '^status: unprocessed' inbox/archive/ 2>/dev/null | head -n "$MAX_SOURCES" || true) - -if [ -z "$UNPROCESSED" ]; then - log "No unprocessed sources found" - exit 0 -fi - -COUNT=$(echo "$UNPROCESSED" | wc -l | tr -d ' ') -log "Found $COUNT unprocessed source(s)" - -# --- Process each source --- -for SOURCE_FILE in $UNPROCESSED; do - SLUG=$(basename "$SOURCE_FILE" .md) - BRANCH="extract/$SLUG" - - log "Processing: $SOURCE_FILE → branch $BRANCH" - - # Create branch from main - git checkout main >> "$LOG" 2>&1 - git branch -D "$BRANCH" 2>/dev/null || true - git checkout -b "$BRANCH" >> "$LOG" 2>&1 - - # Read domain from frontmatter - DOMAIN=$(grep '^domain:' "$SOURCE_FILE" | head -1 | sed 's/domain: *//' | tr -d '"' | tr -d "'" | xargs) - - # Map domain to agent - case "$DOMAIN" in - internet-finance) AGENT="rio" ;; - entertainment) AGENT="clay" ;; - ai-alignment) AGENT="theseus" ;; - health) AGENT="vida" ;; - space-development) AGENT="astra" ;; - *) AGENT="leo" ;; - esac - - AGENT_TOKEN=$(cat "/opt/teleo-eval/secrets/forgejo-${AGENT}-token" 2>/dev/null || cat /opt/teleo-eval/secrets/forgejo-leo-token) - - log "Domain: $DOMAIN, Agent: $AGENT" - - # Run Claude headless to extract claims - EXTRACT_PROMPT="You are $AGENT, a Teleo knowledge base agent. Extract claims from this source. - -READ these files first: -- skills/extract.md (extraction process) -- schemas/claim.md (claim format) -- $SOURCE_FILE (the source to extract from) - -Then scan domains/$DOMAIN/ to check for duplicate claims. - -EXTRACT claims following the process in skills/extract.md: -1. Read the source completely -2. Separate evidence from interpretation -3. Extract candidate claims (specific, disagreeable, evidence-backed) -4. Check for duplicates against existing claims in domains/$DOMAIN/ -5. Write claim files to domains/$DOMAIN/ with proper YAML frontmatter -6. Update $SOURCE_FILE: set status to 'processed', add processed_by: $AGENT, processed_date: $(date +%Y-%m-%d), and claims_extracted list - -If no claims can be extracted, update $SOURCE_FILE: set status to 'null-result' and add notes explaining why. - -IMPORTANT: Use the Edit tool to update the source file status. Use the Write tool to create new claim files. Do not create claims that duplicate existing ones." - - # Run extraction with timeout (10 minutes) - timeout 600 "$CLAUDE_BIN" -p "$EXTRACT_PROMPT" \ - --allowedTools 'Read,Write,Edit,Glob,Grep' \ - --model sonnet \ - >> "$LOG" 2>&1 || { - log "WARN: Claude extraction failed or timed out for $SOURCE_FILE" - git checkout main >> "$LOG" 2>&1 - continue - } - - # Check if any files were created/modified - CHANGES=$(git status --porcelain | wc -l | tr -d ' ') - if [ "$CHANGES" -eq 0 ]; then - log "No changes produced for $SOURCE_FILE" - git checkout main >> "$LOG" 2>&1 - continue - fi - - # Stage and commit - git add inbox/archive/ "domains/$DOMAIN/" >> "$LOG" 2>&1 - git commit -m "$AGENT: extract claims from $(basename "$SOURCE_FILE") - -- Source: $SOURCE_FILE -- Domain: $DOMAIN -- Extracted by: headless extraction cron - -Pentagon-Agent: $(echo "$AGENT" | sed 's/./\U&/') " >> "$LOG" 2>&1 - - # Push branch - git push -u "$REPO_URL" "$BRANCH" --force >> "$LOG" 2>&1 - - # Open PR - PR_TITLE="$AGENT: extract claims from $(basename "$SOURCE_FILE" .md)" - PR_BODY="## Automated Extraction\n\nSource: \`$SOURCE_FILE\`\nDomain: $DOMAIN\nExtracted by: headless cron on VPS\n\nThis PR was created automatically by the extraction cron job. Claims were extracted using \`skills/extract.md\` process via Claude headless." - - curl -s -X POST "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls" \ - -H "Authorization: token $AGENT_TOKEN" \ - -H "Content-Type: application/json" \ - -d "{ - \"title\": \"$PR_TITLE\", - \"body\": \"$PR_BODY\", - \"base\": \"main\", - \"head\": \"$BRANCH\" - }" >> "$LOG" 2>&1 - - log "PR opened for $SOURCE_FILE" - - # Back to main for next source - git checkout main >> "$LOG" 2>&1 - - # Brief pause between extractions - sleep 5 -done - -log "Extraction run complete: processed $COUNT source(s)" diff --git a/ops/pipeline-v2/batch-extract-50.sh b/ops/pipeline-v2/batch-extract-50.sh deleted file mode 100755 index c4499029f..000000000 --- a/ops/pipeline-v2/batch-extract-50.sh +++ /dev/null @@ -1,283 +0,0 @@ -#!/bin/bash -# Batch extract sources from inbox/queue/ — v3 with two-gate skip logic -# -# Uses separate extract/ worktree (not main/ — prevents daemon race condition). -# Skip logic uses two checks instead of local marker files (Ganymede v3 review): -# Gate 1: Is source already in archive/{domain}/? → already processed, dedup -# Gate 2: Does extraction branch exist on Forgejo? → extraction in progress -# Gate 3: Does pipeline.db show ≥3 closed PRs for this source? → zombie, skip -# Gate 4: Does pipeline.db show active OR recently closed PR? → skip (4h cooldown) -# All gates pass → extract -# -# Architecture: Ganymede (two-gate) + Rhea (separate worktrees) - -REPO=/opt/teleo-eval/workspaces/extract -MAIN_REPO=/opt/teleo-eval/workspaces/main -EXTRACT=/opt/teleo-eval/openrouter-extract-v2.py -CLEANUP=/opt/teleo-eval/post-extract-cleanup.py -LOG=/opt/teleo-eval/logs/batch-extract-50.log -DB=/opt/teleo-eval/pipeline/pipeline.db -TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-leo-token) -FORGEJO_URL="http://localhost:3000" -MAX=50 -MAX_CLOSED=3 # zombie retry limit: skip source after this many closed PRs -COUNT=0 -SUCCESS=0 -FAILED=0 -SKIPPED=0 - -# Lockfile to prevent concurrent runs -LOCKFILE="/tmp/batch-extract.lock" -if [ -f "$LOCKFILE" ]; then - pid=$(cat "$LOCKFILE" 2>/dev/null) - if kill -0 "$pid" 2>/dev/null; then - echo "[$(date)] SKIP: batch extract already running (pid $pid)" >> $LOG - exit 0 - fi - rm -f "$LOCKFILE" -fi -echo $$ > "$LOCKFILE" -trap 'rm -f "$LOCKFILE"' EXIT - -echo "[$(date)] Starting batch extraction of $MAX sources" >> $LOG - -cd $REPO || exit 1 - -# Bug fix: don't swallow errors on critical git commands (Ganymede review) -git fetch origin main >> $LOG 2>&1 || { echo "[$(date)] FATAL: fetch origin main failed" >> $LOG; exit 1; } -git checkout -f main >> $LOG 2>&1 || { echo "[$(date)] FATAL: checkout main failed" >> $LOG; exit 1; } -git reset --hard origin/main >> $LOG 2>&1 || { echo "[$(date)] FATAL: reset --hard failed" >> $LOG; exit 1; } - -# SHA canary: verify extract worktree matches origin/main (Ganymede review) -LOCAL_SHA=$(git rev-parse HEAD) -REMOTE_SHA=$(git rev-parse origin/main) -if [ "$LOCAL_SHA" != "$REMOTE_SHA" ]; then - echo "[$(date)] FATAL: extract worktree diverged from main ($LOCAL_SHA vs $REMOTE_SHA)" >> $LOG - exit 1 -fi - -# Pre-extraction cleanup: remove queue files that already exist in archive -# This runs on the MAIN worktree (not extract/) so deletions are committed to git. -# Prevents the "queue duplicate reappears after reset --hard" problem. -CLEANED=0 -for qfile in $MAIN_REPO/inbox/queue/*.md; do - [ -f "$qfile" ] || continue - qbase=$(basename "$qfile") - if find "$MAIN_REPO/inbox/archive" -name "$qbase" 2>/dev/null | grep -q .; then - rm -f "$qfile" - CLEANED=$((CLEANED + 1)) - fi -done -if [ "$CLEANED" -gt 0 ]; then - echo "[$(date)] Cleaned $CLEANED stale queue duplicates" >> $LOG - cd $MAIN_REPO - git add -A inbox/queue/ 2>/dev/null - git commit -m "pipeline: clean $CLEANED stale queue duplicates - -Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>" 2>/dev/null - # Push with retry - for attempt in 1 2 3; do - git pull --rebase origin main 2>/dev/null - git push origin main 2>/dev/null && break - sleep 2 - done - cd $REPO - git fetch origin main 2>/dev/null - git reset --hard origin/main 2>/dev/null -fi - -# Get sources in queue -SOURCES=$(ls inbox/queue/*.md 2>/dev/null | head -$MAX) - -# Batch fetch all remote branches once (Ganymede: 1 call instead of 84) -REMOTE_BRANCHES=$(git ls-remote --heads origin 2>/dev/null) -if [ $? -ne 0 ]; then - echo "[$(date)] ABORT: git ls-remote failed — remote unreachable, skipping cycle" >> $LOG - exit 0 -fi - -for SOURCE in $SOURCES; do - COUNT=$((COUNT + 1)) - BASENAME=$(basename "$SOURCE" .md) - BRANCH="extract/$BASENAME" - - # Skip conversation archives — valuable content enters through standalone sources, - # inline tags (SOURCE:/CLAIM:), and transcript review. Raw conversations produce - # low-quality claims with schema failures. (Epimetheus session 4) - if grep -q "^format: conversation" "$SOURCE" 2>/dev/null; then - # Move to archive instead of leaving in queue (prevents re-processing) - mv "$SOURCE" "$MAIN_REPO/inbox/archive/telegram/" 2>/dev/null - echo "[$(date)] [$COUNT/$MAX] ARCHIVE $BASENAME (conversation — skipped extraction)" >> $LOG - SKIPPED=$((SKIPPED + 1)) - continue - fi - - # Gate 1: Already in archive? Source was already processed — dedup (Ganymede) - if find "$MAIN_REPO/inbox/archive" -name "$BASENAME.md" 2>/dev/null | grep -q .; then - echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (already in archive)" >> $LOG - # Delete the queue duplicate - rm -f "$MAIN_REPO/inbox/queue/$BASENAME.md" 2>/dev/null - SKIPPED=$((SKIPPED + 1)) - continue - fi - - # Gate 2: Branch exists on Forgejo? Extraction already in progress (cached lookup) - # Enhancement: 2-hour staleness check (Ganymede review) — if branch is >2h old - # and PR is unmergeable, close PR + delete branch and re-extract - if echo "$REMOTE_BRANCHES" | grep -q "refs/heads/$BRANCH$"; then - # Check branch age - BRANCH_SHA=$(echo "$REMOTE_BRANCHES" | grep "refs/heads/$BRANCH$" | awk '{print $1}') - BRANCH_AGE_EPOCH=$(git log -1 --format='%ct' "$BRANCH_SHA" 2>/dev/null || echo 0) - NOW_EPOCH=$(date +%s) - AGE_HOURS=$(( (NOW_EPOCH - BRANCH_AGE_EPOCH) / 3600 )) - - if [ "$AGE_HOURS" -ge 2 ]; then - # Branch is stale — check if PR is mergeable - # Note: Forgejo head= filter is unreliable. Fetch all open PRs and filter locally. - PR_NUM=$(curl -sf "$FORGEJO_URL/api/v1/repos/teleo/teleo-codex/pulls?state=open&limit=50" \ - -H "Authorization: token $TOKEN" | python3 -c " -import sys,json -prs=json.load(sys.stdin) -branch='$BRANCH' -matches=[p for p in prs if p['head']['ref']==branch] -print(matches[0]['number'] if matches else '') -" 2>/dev/null) - if [ -n "$PR_NUM" ]; then - PR_MERGEABLE=$(curl -sf "$FORGEJO_URL/api/v1/repos/teleo/teleo-codex/pulls/$PR_NUM" \ - -H "Authorization: token $TOKEN" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("mergeable","true"))' 2>/dev/null) - if [ "$PR_MERGEABLE" = "False" ] || [ "$PR_MERGEABLE" = "false" ]; then - echo "[$(date)] [$COUNT/$MAX] STALE: $BASENAME (${AGE_HOURS}h old, unmergeable PR #$PR_NUM) — closing + re-extracting" >> $LOG - # Close PR with audit comment - curl -sf -X POST "$FORGEJO_URL/api/v1/repos/teleo/teleo-codex/issues/$PR_NUM/comments" \ - -H "Authorization: token $TOKEN" -H "Content-Type: application/json" \ - -d '{"body":"Auto-closed: extraction branch stale >2h, conflict unresolvable. Source will be re-extracted from current main."}' > /dev/null 2>&1 - curl -sf -X PATCH "$FORGEJO_URL/api/v1/repos/teleo/teleo-codex/pulls/$PR_NUM" \ - -H "Authorization: token $TOKEN" -H "Content-Type: application/json" \ - -d '{"state":"closed"}' > /dev/null 2>&1 - # Delete remote branch - git push origin --delete "$BRANCH" 2>/dev/null - # Fall through to extraction below - else - echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (branch exists ${AGE_HOURS}h, PR #$PR_NUM mergeable — waiting)" >> $LOG - SKIPPED=$((SKIPPED + 1)) - continue - fi - else - # No PR found but branch exists — orphan branch, clean up - echo "[$(date)] [$COUNT/$MAX] STALE: $BASENAME (orphan branch ${AGE_HOURS}h, no PR) — deleting" >> $LOG - git push origin --delete "$BRANCH" 2>/dev/null - # Fall through to extraction - fi - else - echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (branch exists — in progress, ${AGE_HOURS}h old)" >> $LOG - SKIPPED=$((SKIPPED + 1)) - continue - fi - fi - - # Gate 3: Check pipeline.db for zombie sources — too many closed PRs means - # the source keeps failing eval. Skip after MAX_CLOSED rejections. (Epimetheus) - if [ -f "$DB" ]; then - CLOSED_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM prs WHERE branch = 'extract/$BASENAME' AND status = 'closed'" 2>/dev/null || echo 0) - if [ "$CLOSED_COUNT" -ge "$MAX_CLOSED" ]; then - echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (zombie: $CLOSED_COUNT closed PRs >= $MAX_CLOSED limit)" >> $LOG - SKIPPED=$((SKIPPED + 1)) - continue - fi - fi - - # Gate 4: Check pipeline.db for active or recently closed PRs — prevents - # re-extraction waste when eval closes a PR and batch-extract runs again - # before the source is manually reviewed. 4h cooldown after closure. - if [ -f "$DB" ]; then - ACTIVE_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM prs WHERE branch = 'extract/$BASENAME' AND status IN ('extracting','approved','merging')" 2>/dev/null || echo 0) - if [ "$ACTIVE_COUNT" -ge 1 ]; then - echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (active PR exists)" >> $LOG - SKIPPED=$((SKIPPED + 1)) - continue - fi - RECENT_CLOSED=$(sqlite3 "$DB" "SELECT COUNT(*) FROM prs WHERE branch = 'extract/$BASENAME' AND status = 'closed' AND created_at > datetime('now', '-4 hours')" 2>/dev/null || echo 0) - if [ "$RECENT_CLOSED" -ge 1 ]; then - echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (recently closed PR — 4h cooldown)" >> $LOG - SKIPPED=$((SKIPPED + 1)) - continue - fi - fi - - echo "[$(date)] [$COUNT/$MAX] Processing $BASENAME" >> $LOG - - # Reset to main (log errors — don't swallow) - git checkout -f main >> $LOG 2>&1 || { echo " -> SKIP (checkout main failed)" >> $LOG; SKIPPED=$((SKIPPED + 1)); continue; } - git fetch origin main >> $LOG 2>&1 - git reset --hard origin/main >> $LOG 2>&1 || { echo " -> SKIP (reset failed)" >> $LOG; SKIPPED=$((SKIPPED + 1)); continue; } - - # Clean stale remote branch (Leo's catch — prevents checkout conflicts) - git push origin --delete "$BRANCH" 2>/dev/null - - # Create fresh branch - git branch -D "$BRANCH" 2>/dev/null - git checkout -b "$BRANCH" 2>/dev/null - if [ $? -ne 0 ]; then - echo " -> SKIP (branch creation failed)" >> $LOG - SKIPPED=$((SKIPPED + 1)) - continue - fi - - # Run extraction - python3 $EXTRACT "$SOURCE" --no-review >> $LOG 2>&1 - EXTRACT_RC=$? - - - - if [ $EXTRACT_RC -ne 0 ]; then - FAILED=$((FAILED + 1)) - echo " -> FAILED (extract rc=$EXTRACT_RC)" >> $LOG - continue - fi - - # Post-extraction cleanup - python3 $CLEANUP $REPO >> $LOG 2>&1 - - # Check if any files were created/modified - CHANGED=$(git status --porcelain | wc -l | tr -d " ") - if [ "$CHANGED" -eq 0 ]; then - echo " -> No changes (enrichment/null-result only)" >> $LOG - continue - fi - - # Commit - git add -A - git commit -m "extract: $BASENAME - -Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>" >> $LOG 2>&1 - - # Push - git push "http://leo:${TOKEN}@localhost:3000/teleo/teleo-codex.git" "$BRANCH" --force >> $LOG 2>&1 - - # Create PR (include prior art sidecar if available) - PRIOR_ART_FILE="${SOURCE}.prior-art" - PR_BODY="" - if [ -f "$PRIOR_ART_FILE" ]; then - # Escape JSON special chars in prior art content - PR_BODY=$(cat "$PRIOR_ART_FILE" | python3 -c 'import sys,json; print(json.dumps(sys.stdin.read()))') - PR_BODY=${PR_BODY:1:-1} # Strip outer quotes from json.dumps - fi - curl -sf -X POST "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls" \ - -H "Authorization: token $TOKEN" \ - -H "Content-Type: application/json" \ - -d "{\"title\":\"extract: $BASENAME\",\"head\":\"$BRANCH\",\"base\":\"main\",\"body\":\"$PR_BODY\"}" >> /dev/null 2>&1 - - SUCCESS=$((SUCCESS + 1)) - echo " -> SUCCESS ($CHANGED files)" >> $LOG - - # Back to main - git checkout -f main >> $LOG 2>&1 - - # Rate limit - sleep 2 -done - -echo "[$(date)] Batch complete: $SUCCESS success, $FAILED failed, $SKIPPED skipped (already attempted)" >> $LOG - -git checkout -f main >> $LOG 2>&1 -git reset --hard origin/main >> $LOG 2>&1 diff --git a/ops/schema-change-protocol.md b/ops/schema-change-protocol.md index a9827b600..ef584a8ae 100644 --- a/ops/schema-change-protocol.md +++ b/ops/schema-change-protocol.md @@ -37,7 +37,7 @@ When any agent changes a file format, database table, API response shape, or ser | Format | Schema | Producers | Consumers | Pipeline | |---|---|---|---|---| | Claim | `schemas/claim.md` | All proposers (Rio, Clay, Theseus, Vida, Astra) | Leo (eval), all agents (beliefs), visitors | `extract-graph-data.py` | -| Source | `schemas/source.md` | All proposers, Epimetheus (pipeline) | Proposers (extraction), Epimetheus (pipeline) | `extract-cron.sh` | +| Source | `schemas/source.md` | All proposers, Epimetheus (pipeline) | Proposers (extraction), Epimetheus (pipeline) | `lib/extract.py` | | Entity | `schemas/entity.md` | Domain agents | All agents (references), visitors | `extract-graph-data.py` | | Belief | `schemas/belief.md` | Each agent (own file) | Leo (review), other agents (cross-ref) | None currently | | Position | `schemas/position.md` | Each agent (own file) | Leo (review), visitors | None currently |