#!/usr/bin/env bash # evaluate-trigger.sh — Find unreviewed PRs, run 2-agent review, auto-merge if approved. # # Reviews each PR with TWO agents: # 1. Leo (evaluator) — quality gates, cross-domain connections, coherence # 2. Domain agent — domain expertise, duplicate check, technical accuracy # # After both reviews, auto-merges if: # - Leo's comment contains "**Verdict:** approve" # - Domain agent's comment contains "**Verdict:** approve" # - No territory violations (files outside proposer's domain) # # Usage: # ./ops/evaluate-trigger.sh # review + auto-merge approved PRs # ./ops/evaluate-trigger.sh 47 # review a specific PR by number # ./ops/evaluate-trigger.sh --dry-run # show what would be reviewed, don't run # ./ops/evaluate-trigger.sh --leo-only # skip domain agent, just run Leo # ./ops/evaluate-trigger.sh --no-merge # review only, don't auto-merge (old behavior) # # Requirements: # - claude CLI (claude -p for headless mode) # - gh CLI authenticated with repo access # - Run from the teleo-codex repo root # # Safety: # - Lockfile prevents concurrent runs # - Auto-merge requires ALL reviewers to approve + no territory violations # - Each PR runs sequentially to avoid branch conflicts # - Timeout: 20 minutes per agent per PR # - Pre-flight checks: clean working tree, gh auth # # Verdict protocol: # All agents use `gh pr comment` (NOT `gh pr review`) because all agents # share the m3taversal GitHub account — `gh pr review --approve` fails # when the PR author and reviewer are the same user. The merge check # parses issue comments for structured verdict markers instead. set -euo pipefail # Allow nested Claude Code sessions (headless spawned from interactive) unset CLAUDECODE 2>/dev/null || true REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "$REPO_ROOT" LOCKFILE="/tmp/evaluate-trigger.lock" LOG_DIR="$REPO_ROOT/ops/sessions" TIMEOUT_SECONDS=1200 DRY_RUN=false LEO_ONLY=false NO_MERGE=false SPECIFIC_PR="" # --- Domain routing map --- # Maps branch prefix or domain directory to agent name and identity path detect_domain_agent() { local pr_number="$1" local branch files domain agent branch=$(gh pr view "$pr_number" --json headRefName --jq '.headRefName' 2>/dev/null || echo "") files=$(gh pr view "$pr_number" --json files --jq '.files[].path' 2>/dev/null || echo "") # Try branch prefix first case "$branch" in rio/*|*/internet-finance*) agent="rio"; domain="internet-finance" ;; clay/*|*/entertainment*) agent="clay"; domain="entertainment" ;; theseus/*|logos/*|*/ai-alignment*) agent="theseus"; domain="ai-alignment" ;; vida/*|*/health*) agent="vida"; domain="health" ;; astra/*|*/space-development*) agent="astra"; domain="space-development" ;; leo/*|*/grand-strategy*) agent="leo"; domain="grand-strategy" ;; contrib/*) # External contributor — detect domain from changed files (fall through to file check) agent=""; domain="" ;; *) agent=""; domain="" ;; esac # If no agent detected from branch prefix, check changed files if [ -z "$agent" ]; then if echo "$files" | grep -q "domains/internet-finance/"; then agent="rio"; domain="internet-finance" elif echo "$files" | grep -q "domains/entertainment/"; then agent="clay"; domain="entertainment" elif echo "$files" | grep -q "domains/ai-alignment/"; then agent="theseus"; domain="ai-alignment" elif echo "$files" | grep -q "domains/health/"; then agent="vida"; domain="health" elif echo "$files" | grep -q "domains/space-development/"; then agent="astra"; domain="space-development" fi fi echo "$agent $domain" } # --- Parse arguments --- for arg in "$@"; do case "$arg" in --dry-run) DRY_RUN=true ;; --leo-only) LEO_ONLY=true ;; --no-merge) NO_MERGE=true ;; [0-9]*) SPECIFIC_PR="$arg" ;; --help|-h) head -23 "$0" | tail -21 exit 0 ;; *) echo "Unknown argument: $arg" exit 1 ;; esac done # --- Pre-flight checks --- if ! gh auth status >/dev/null 2>&1; then echo "ERROR: gh CLI not authenticated. Run 'gh auth login' first." exit 1 fi if ! command -v claude >/dev/null 2>&1; then echo "ERROR: claude CLI not found. Install it first." exit 1 fi # Check for dirty working tree (ignore ops/, .claude/, .github/ which may contain local-only files) DIRTY_FILES=$(git status --porcelain | grep -v '^?? ops/' | grep -v '^ M ops/' | grep -v '^?? \.claude/' | grep -v '^ M \.claude/' | grep -v '^?? \.github/' | grep -v '^ M \.github/' || true) if [ -n "$DIRTY_FILES" ]; then echo "ERROR: Working tree is dirty. Clean up before running." echo "$DIRTY_FILES" exit 1 fi # --- Lockfile (prevent concurrent runs) --- if [ -f "$LOCKFILE" ]; then LOCK_PID=$(cat "$LOCKFILE" 2>/dev/null || echo "") if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then echo "Another evaluate-trigger is running (PID $LOCK_PID). Exiting." exit 1 else echo "Stale lockfile found. Removing." rm -f "$LOCKFILE" fi fi echo $$ > "$LOCKFILE" trap 'rm -f "$LOCKFILE"' EXIT # --- Ensure log directory exists --- mkdir -p "$LOG_DIR" # --- Find PRs to review --- if [ -n "$SPECIFIC_PR" ]; then PR_STATE=$(gh pr view "$SPECIFIC_PR" --json state --jq '.state' 2>/dev/null || echo "NOT_FOUND") if [ "$PR_STATE" != "OPEN" ]; then echo "PR #$SPECIFIC_PR is $PR_STATE (not OPEN). Reviewing anyway for testing." fi PRS_TO_REVIEW="$SPECIFIC_PR" else # NOTE: gh pr list silently returns empty in some worktree configs; use gh api instead OPEN_PRS=$(gh api repos/:owner/:repo/pulls --jq '.[].number' 2>/dev/null || echo "") if [ -z "$OPEN_PRS" ]; then echo "No open PRs found. Nothing to review." exit 0 fi PRS_TO_REVIEW="" for pr in $OPEN_PRS; do # Check if this PR already has a Leo verdict comment (avoid re-reviewing) LEO_COMMENTED=$(gh pr view "$pr" --json comments \ --jq '[.comments[] | select(.body | test("VERDICT:LEO:(APPROVE|REQUEST_CHANGES)"))] | length' 2>/dev/null || echo "0") LAST_COMMIT_DATE=$(gh pr view "$pr" --json commits --jq '.commits[-1].committedDate' 2>/dev/null || echo "") if [ "$LEO_COMMENTED" = "0" ]; then PRS_TO_REVIEW="$PRS_TO_REVIEW $pr" else # Check if new commits since last Leo review LAST_LEO_DATE=$(gh pr view "$pr" --json comments \ --jq '[.comments[] | select(.body | test("VERDICT:LEO:")) | .createdAt] | last' 2>/dev/null || echo "") if [ -n "$LAST_COMMIT_DATE" ] && [ -n "$LAST_LEO_DATE" ] && [[ "$LAST_COMMIT_DATE" > "$LAST_LEO_DATE" ]]; then echo "PR #$pr: New commits since last review. Queuing for re-review." PRS_TO_REVIEW="$PRS_TO_REVIEW $pr" else echo "PR #$pr: Already reviewed. Skipping." fi fi done PRS_TO_REVIEW=$(echo "$PRS_TO_REVIEW" | xargs) if [ -z "$PRS_TO_REVIEW" ]; then echo "All open PRs are up to date. Nothing to do." exit 0 fi fi echo "PRs to review: $PRS_TO_REVIEW" if [ "$DRY_RUN" = true ]; then for pr in $PRS_TO_REVIEW; do read -r agent domain <<< "$(detect_domain_agent "$pr")" echo "[DRY RUN] PR #$pr — Leo + ${agent:-unknown} (${domain:-unknown domain})" done exit 0 fi # --- Run headless reviews on each PR --- run_agent_review() { local pr="$1" agent_name="$2" prompt="$3" model="$4" local timestamp log_file review_file timestamp=$(date +%Y%m%d-%H%M%S) log_file="$LOG_DIR/${agent_name}-review-pr${pr}-${timestamp}.log" review_file="/tmp/${agent_name}-review-pr${pr}.md" echo " Running ${agent_name} (model: ${model})..." echo " Log: $log_file" if perl -e "alarm $TIMEOUT_SECONDS; exec @ARGV" claude -p \ --model "$model" \ --allowedTools "Read,Write,Edit,Bash,Glob,Grep" \ --permission-mode bypassPermissions \ "$prompt" \ > "$log_file" 2>&1; then echo " ${agent_name}: Review posted." rm -f "$review_file" return 0 else local exit_code=$? if [ "$exit_code" -eq 142 ] || [ "$exit_code" -eq 124 ]; then echo " ${agent_name}: TIMEOUT after ${TIMEOUT_SECONDS}s." else echo " ${agent_name}: FAILED (exit code $exit_code)." fi rm -f "$review_file" return 1 fi } # --- Territory violation check --- # Verifies all changed files are within the proposer's expected territory check_territory_violations() { local pr_number="$1" local branch files proposer violations branch=$(gh pr view "$pr_number" --json headRefName --jq '.headRefName' 2>/dev/null || echo "") files=$(gh pr view "$pr_number" --json files --jq '.files[].path' 2>/dev/null || echo "") # Determine proposer from branch prefix proposer=$(echo "$branch" | cut -d'/' -f1) # Map proposer to allowed directories local allowed_domains="" case "$proposer" in rio) allowed_domains="domains/internet-finance/" ;; clay) allowed_domains="domains/entertainment/" ;; theseus) allowed_domains="domains/ai-alignment/" ;; vida) allowed_domains="domains/health/" ;; astra) allowed_domains="domains/space-development/" ;; leo) allowed_domains="core/|foundations/" ;; contrib) echo ""; return 0 ;; # External contributors — skip territory check *) echo ""; return 0 ;; # Unknown proposer — skip check esac # Check each file — allow inbox/archive/, agents/{proposer}/, schemas/, foundations/, and the agent's domain violations="" while IFS= read -r file; do [ -z "$file" ] && continue # Always allowed: inbox/archive, own agent dir, maps/, foundations/ (any agent can propose foundation claims) if echo "$file" | grep -qE "^inbox/archive/|^agents/${proposer}/|^maps/|^foundations/"; then continue fi # Check against allowed domain directories if echo "$file" | grep -qE "^${allowed_domains}"; then continue fi violations="${violations} - ${file}\n" done <<< "$files" if [ -n "$violations" ]; then echo -e "$violations" else echo "" fi } # --- Auto-merge check --- # Parses issue comments for structured verdict markers. # Verdict protocol: agents post `` or # `` as HTML comments in their review. # This is machine-parseable and invisible in the rendered comment. check_merge_eligible() { local pr_number="$1" local domain_agent="$2" local leo_passed="$3" # Gate 1: Leo must have completed without timeout/error if [ "$leo_passed" != "true" ]; then echo "BLOCK: Leo review failed or timed out" return 1 fi # Gate 2: Check Leo's verdict from issue comments local leo_verdict leo_verdict=$(gh pr view "$pr_number" --json comments \ --jq '[.comments[] | select(.body | test("VERDICT:LEO:")) | .body] | last' 2>/dev/null || echo "") if echo "$leo_verdict" | grep -q "VERDICT:LEO:APPROVE"; then echo "Leo: APPROVED" elif echo "$leo_verdict" | grep -q "VERDICT:LEO:REQUEST_CHANGES"; then echo "BLOCK: Leo requested changes" return 1 else echo "BLOCK: Could not find Leo's verdict marker in PR comments" return 1 fi # Gate 3: Check domain agent verdict (if applicable) if [ -n "$domain_agent" ] && [ "$domain_agent" != "leo" ]; then local domain_key domain_key=$(echo "$domain_agent" | tr '[:lower:]' '[:upper:]') local domain_verdict domain_verdict=$(gh pr view "$pr_number" --json comments \ --jq "[.comments[] | select(.body | test(\"VERDICT:${domain_key}:\")) | .body] | last" 2>/dev/null || echo "") if echo "$domain_verdict" | grep -q "VERDICT:${domain_key}:APPROVE"; then echo "Domain agent ($domain_agent): APPROVED" elif echo "$domain_verdict" | grep -q "VERDICT:${domain_key}:REQUEST_CHANGES"; then echo "BLOCK: $domain_agent requested changes" return 1 else echo "BLOCK: No verdict marker found for $domain_agent" return 1 fi else echo "Domain agent: N/A (leo-only or grand-strategy)" fi # Gate 4: Territory violations local violations violations=$(check_territory_violations "$pr_number") if [ -n "$violations" ]; then echo "BLOCK: Territory violations detected:" echo -e "$violations" return 1 else echo "Territory: clean" fi return 0 } REVIEWED=0 FAILED=0 MERGED=0 for pr in $PRS_TO_REVIEW; do echo "" echo "=== PR #$pr ===" echo "Started: $(date)" # Detect which domain agent should review read -r DOMAIN_AGENT DOMAIN <<< "$(detect_domain_agent "$pr")" echo "Domain: ${DOMAIN:-unknown} | Agent: ${DOMAIN_AGENT:-none detected}" # --- Review 1: Leo (evaluator) --- LEO_REVIEW_FILE="/tmp/leo-review-pr${pr}.md" LEO_PROMPT="You are Leo. Read agents/leo/identity.md, agents/leo/beliefs.md, agents/leo/reasoning.md, and skills/evaluate.md. Review PR #${pr} on this repo. First, run: gh pr view ${pr} --json title,body,files,additions,deletions Then checkout the PR branch: gh pr checkout ${pr} Read every changed file completely. Before evaluating, scan the existing knowledge base for duplicate and contradiction checks: - List claim files in the relevant domain directory (e.g., domains/${DOMAIN}/) - Read titles to check for semantic duplicates - Check for contradictions with existing claims in that domain and in foundations/ For each proposed claim, evaluate against these 11 quality criteria from CLAUDE.md: 1. Specificity — Is this specific enough to disagree with? 2. Evidence — Is there traceable evidence in the body? 3. Description quality — Does the description add info beyond the title? 4. Confidence calibration — Does the confidence level match the evidence? 5. Duplicate check — Does this already exist in the knowledge base? 6. Contradiction check — Does this contradict an existing claim? If so, is the contradiction explicit? 7. Value add — Does this genuinely expand what the knowledge base knows? 8. Wiki links — Do all [[links]] point to real files? 9. Scope qualification — Does the claim specify structural vs functional, micro vs macro, causal vs correlational? 10. Universal quantifier check — Does the title use unwarranted universals (all, always, never, the only)? 11. Counter-evidence acknowledgment — For likely or higher: is opposing evidence acknowledged? Also check: - Source archive updated correctly (status field) - Commit messages follow conventions - Files are in the correct domain directory - Cross-domain connections that the proposer may have missed Write your complete review to ${LEO_REVIEW_FILE} CRITICAL — Verdict format: Your review MUST end with exactly one of these verdict markers (as an HTML comment on its own line): Then post the review as an issue comment: gh pr comment ${pr} --body-file ${LEO_REVIEW_FILE} IMPORTANT: Use 'gh pr comment' NOT 'gh pr review'. We use a shared GitHub account so gh pr review --approve fails. DO NOT merge — the orchestrator handles merge decisions after all reviews are posted. Work autonomously. Do not ask for confirmation." if run_agent_review "$pr" "leo" "$LEO_PROMPT" "opus"; then LEO_PASSED=true else LEO_PASSED=false fi # Return to main between reviews git checkout main 2>/dev/null || git checkout -f main PR_BRANCH=$(gh pr view "$pr" --json headRefName --jq '.headRefName' 2>/dev/null || echo "") [ -n "$PR_BRANCH" ] && git branch -D "$PR_BRANCH" 2>/dev/null || true # --- Review 2: Domain agent --- if [ "$LEO_ONLY" = true ]; then echo " Skipping domain agent review (--leo-only)." elif [ -z "$DOMAIN_AGENT" ]; then echo " Could not detect domain agent. Skipping domain review." elif [ "$DOMAIN_AGENT" = "leo" ]; then echo " Domain is grand-strategy (Leo's territory). Single review sufficient." else DOMAIN_REVIEW_FILE="/tmp/${DOMAIN_AGENT}-review-pr${pr}.md" AGENT_NAME_UPPER=$(echo "${DOMAIN_AGENT}" | awk '{print toupper(substr($0,1,1)) substr($0,2)}') AGENT_KEY_UPPER=$(echo "${DOMAIN_AGENT}" | tr '[:lower:]' '[:upper:]') DOMAIN_PROMPT="You are ${AGENT_NAME_UPPER}. Read agents/${DOMAIN_AGENT}/identity.md, agents/${DOMAIN_AGENT}/beliefs.md, and skills/evaluate.md. You are reviewing PR #${pr} as the domain expert for ${DOMAIN}. First, run: gh pr view ${pr} --json title,body,files,additions,deletions Then checkout the PR branch: gh pr checkout ${pr} Read every changed file completely. Your review focuses on DOMAIN EXPERTISE — things only a ${DOMAIN} specialist would catch: 1. **Technical accuracy** — Are the claims factually correct within the ${DOMAIN} domain? 2. **Domain duplicates** — Do any claims duplicate existing knowledge in domains/${DOMAIN}/? Scan the directory and read titles carefully. 3. **Missing context** — What important nuance from the ${DOMAIN} domain is the claim missing? 4. **Belief impact** — Do any claims affect your current beliefs? Read agents/${DOMAIN_AGENT}/beliefs.md and flag if any belief needs updating. 5. **Connections** — What existing claims in your domain should be wiki-linked? 6. **Confidence calibration** — From your domain expertise, is the confidence level right? Write your review to ${DOMAIN_REVIEW_FILE} CRITICAL — Verdict format: Your review MUST end with exactly one of these verdict markers (as an HTML comment on its own line): Then post the review as an issue comment: gh pr comment ${pr} --body-file ${DOMAIN_REVIEW_FILE} IMPORTANT: Use 'gh pr comment' NOT 'gh pr review'. We use a shared GitHub account so gh pr review --approve fails. Sign your review as ${AGENT_NAME_UPPER} (domain reviewer for ${DOMAIN}). DO NOT duplicate Leo's quality gate checks — he covers those. DO NOT merge — the orchestrator handles merge decisions after all reviews are posted. Work autonomously. Do not ask for confirmation." run_agent_review "$pr" "$DOMAIN_AGENT" "$DOMAIN_PROMPT" "sonnet" # Clean up branch again git checkout main 2>/dev/null || git checkout -f main [ -n "$PR_BRANCH" ] && git branch -D "$PR_BRANCH" 2>/dev/null || true fi if [ "$LEO_PASSED" = true ]; then REVIEWED=$((REVIEWED + 1)) else FAILED=$((FAILED + 1)) fi # --- Auto-merge decision --- if [ "$NO_MERGE" = true ]; then echo " Auto-merge: skipped (--no-merge)" elif [ "$LEO_PASSED" != "true" ]; then echo " Auto-merge: skipped (Leo review failed)" else echo "" echo " --- Merge eligibility check ---" MERGE_LOG=$(check_merge_eligible "$pr" "$DOMAIN_AGENT" "$LEO_PASSED") MERGE_RESULT=$? echo "$MERGE_LOG" | sed 's/^/ /' if [ "$MERGE_RESULT" -eq 0 ]; then echo " Auto-merge: ALL GATES PASSED — merging PR #$pr" if gh pr merge "$pr" --squash 2>&1; then echo " PR #$pr: MERGED successfully." MERGED=$((MERGED + 1)) else echo " PR #$pr: Merge FAILED. May need manual intervention." fi else echo " Auto-merge: BLOCKED — see reasons above" fi fi echo "Finished: $(date)" done echo "" echo "=== Summary ===" echo "Reviewed: $REVIEWED" echo "Failed: $FAILED" echo "Merged: $MERGED" echo "Logs: $LOG_DIR"