From b5927c55d5b2367a77f9a33af414e77ba98d0ce8 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Sat, 28 Mar 2026 21:01:34 +0000 Subject: [PATCH] theseus: add Ganymede pre-merge code review gate to evaluate trigger - What: PRs touching code files (ops/, diagnostics/, .py, .sh, etc.) now get Ganymede code review in addition to Leo + domain agent - Why: Ganymede was reviewing ~30% of code PRs after deploy, not before. This makes code review 100% pre-merge, matching how Leo already gates claims. - How: detect_code_pr() checks file patterns, runs Ganymede with code-focused prompt, adds VERDICT:GANYMEDE gate to merge eligibility check Pentagon-Agent: Theseus <24DE7DA0-E4D5-4023-B1A2-3F736AFF4EEE> --- ops/evaluate-trigger.sh | 116 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 6 deletions(-) diff --git a/ops/evaluate-trigger.sh b/ops/evaluate-trigger.sh index 1a893089..fc1f24e0 100755 --- a/ops/evaluate-trigger.sh +++ b/ops/evaluate-trigger.sh @@ -1,13 +1,17 @@ #!/usr/bin/env bash # evaluate-trigger.sh — Find unreviewed PRs, run 2-agent review, auto-merge if approved. # -# Reviews each PR with TWO agents: +# Reviews each PR with up to THREE agents: # 1. Leo (evaluator) — quality gates, cross-domain connections, coherence # 2. Domain agent — domain expertise, duplicate check, technical accuracy +# 3. Ganymede (code reviewer) — code quality, correctness, safety (code PRs only) # -# After both reviews, auto-merges if: +# Ganymede reviews any PR that touches code files (ops/, diagnostics/, .py, .sh, etc.) +# +# After all reviews, auto-merges if: # - Leo's comment contains "**Verdict:** approve" -# - Domain agent's comment contains "**Verdict:** approve" +# - Domain agent's comment contains "**Verdict:** approve" (if applicable) +# - Ganymede's comment contains "**Verdict:** approve" (if code PR) # - No territory violations (files outside proposer's domain) # # Usage: @@ -51,6 +55,22 @@ LEO_ONLY=false NO_MERGE=false SPECIFIC_PR="" +# --- Code PR detection --- +# Returns "true" if the PR touches code files (ops/, diagnostics/, scripts, .py, .sh, .js, .html) +# These PRs need Ganymede code review in addition to Leo's quality review. +detect_code_pr() { + local pr_number="$1" + local files + + files=$(gh pr view "$pr_number" --json files --jq '.files[].path' 2>/dev/null || echo "") + + if echo "$files" | grep -qE "^ops/|^diagnostics/|\.py$|\.sh$|\.js$|\.html$|\.css$|\.json$"; then + echo "true" + else + echo "false" + fi +} + # --- Domain routing map --- # Maps branch prefix or domain directory to agent name and identity path detect_domain_agent() { @@ -200,7 +220,10 @@ echo "PRs to review: $PRS_TO_REVIEW" if [ "$DRY_RUN" = true ]; then for pr in $PRS_TO_REVIEW; do read -r agent domain <<< "$(detect_domain_agent "$pr")" - echo "[DRY RUN] PR #$pr — Leo + ${agent:-unknown} (${domain:-unknown domain})" + is_code=$(detect_code_pr "$pr") + reviewers="Leo + ${agent:-unknown} (${domain:-unknown domain})" + [ "$is_code" = "true" ] && reviewers="$reviewers + Ganymede (code)" + echo "[DRY RUN] PR #$pr — $reviewers" done exit 0 fi @@ -294,6 +317,8 @@ check_merge_eligible() { local pr_number="$1" local domain_agent="$2" local leo_passed="$3" + local is_code_pr="${4:-false}" + local ganymede_passed="${5:-true}" # Gate 1: Leo must have completed without timeout/error if [ "$leo_passed" != "true" ]; then @@ -337,7 +362,29 @@ check_merge_eligible() { echo "Domain agent: N/A (leo-only or grand-strategy)" fi - # Gate 4: Territory violations + # Gate 4: Ganymede code review (for code PRs) + if [ "$is_code_pr" = "true" ]; then + if [ "$ganymede_passed" != "true" ]; then + echo "BLOCK: Ganymede code review failed or timed out" + return 1 + fi + + local ganymede_verdict + ganymede_verdict=$(gh pr view "$pr_number" --json comments \ + --jq '[.comments[] | select(.body | test("VERDICT:GANYMEDE:")) | .body] | last' 2>/dev/null || echo "") + + if echo "$ganymede_verdict" | grep -q "VERDICT:GANYMEDE:APPROVE"; then + echo "Ganymede (code review): APPROVED" + elif echo "$ganymede_verdict" | grep -q "VERDICT:GANYMEDE:REQUEST_CHANGES"; then + echo "BLOCK: Ganymede requested code changes" + return 1 + else + echo "BLOCK: No verdict marker found for Ganymede code review" + return 1 + fi + fi + + # Gate 5: Territory violations local violations violations=$(check_territory_violations "$pr_number") @@ -475,6 +522,63 @@ Work autonomously. Do not ask for confirmation." [ -n "$PR_BRANCH" ] && git branch -D "$PR_BRANCH" 2>/dev/null || true fi + # --- Review 3: Ganymede code review (for PRs touching code files) --- + IS_CODE_PR=$(detect_code_pr "$pr") + GANYMEDE_PASSED=true + + if [ "$IS_CODE_PR" = "true" ] && [ "$LEO_ONLY" != true ]; then + echo " Code files detected — running Ganymede code review." + GANYMEDE_REVIEW_FILE="/tmp/ganymede-review-pr${pr}.md" + GANYMEDE_PROMPT="You are Ganymede, the code quality reviewer for the Teleo collective. + +Review PR #${pr} for code quality, correctness, and safety. + +First, run: gh pr view ${pr} --json title,body,files,additions,deletions +Then checkout the PR branch: gh pr checkout ${pr} +Read every changed file completely. Also read the existing versions of modified files on main for comparison. + +Your review focuses on CODE QUALITY — things a code reviewer catches: + +1. **Correctness** — Does the code do what it claims? Are there logic errors, off-by-one bugs, or unhandled edge cases? +2. **Safety** — Any security issues? SQL injection, path traversal, unchecked inputs, secrets in code? +3. **Breaking changes** — Does this change file formats, API responses, DB schemas, or config structures that other agents depend on? If so, is there a migration path? +4. **Error handling** — Will failures be visible or silent? Are there bare excepts, missing error messages, or swallowed exceptions? +5. **Integration** — Does the code work with the existing system? Are imports correct, paths valid, dependencies present? +6. **Simplicity** — Is this more complex than it needs to be? Could it be simpler? + +Also check: +- systemd ReadWritePaths if new file write paths are introduced +- Path format consistency (absolute vs relative) +- Concurrent edit risk on shared files (app.py, bot.py, etc.) + +Write your review to ${GANYMEDE_REVIEW_FILE} + +CRITICAL — Verdict format: Your review MUST end with exactly one of these verdict markers (as an HTML comment on its own line): + + + +Then post the review as an issue comment: + gh pr comment ${pr} --body-file ${GANYMEDE_REVIEW_FILE} + +IMPORTANT: Use 'gh pr comment' NOT 'gh pr review'. We use a shared GitHub account so gh pr review --approve fails. +Sign your review as Ganymede (code reviewer). +DO NOT duplicate Leo's knowledge quality checks — he covers those. You cover code. +DO NOT merge — the orchestrator handles merge decisions after all reviews are posted. +Work autonomously. Do not ask for confirmation." + + if run_agent_review "$pr" "ganymede" "$GANYMEDE_PROMPT" "sonnet"; then + GANYMEDE_PASSED=true + else + GANYMEDE_PASSED=false + fi + + # Clean up branch + git checkout main 2>/dev/null || git checkout -f main + [ -n "$PR_BRANCH" ] && git branch -D "$PR_BRANCH" 2>/dev/null || true + elif [ "$IS_CODE_PR" = "true" ] && [ "$LEO_ONLY" = true ]; then + echo " Code files detected but skipping Ganymede review (--leo-only)." + fi + if [ "$LEO_PASSED" = true ]; then REVIEWED=$((REVIEWED + 1)) else @@ -489,7 +593,7 @@ Work autonomously. Do not ask for confirmation." else echo "" echo " --- Merge eligibility check ---" - MERGE_LOG=$(check_merge_eligible "$pr" "$DOMAIN_AGENT" "$LEO_PASSED") + MERGE_LOG=$(check_merge_eligible "$pr" "$DOMAIN_AGENT" "$LEO_PASSED" "$IS_CODE_PR" "$GANYMEDE_PASSED") MERGE_RESULT=$? echo "$MERGE_LOG" | sed 's/^/ /'