theseus: add Ganymede pre-merge code review gate to evaluate trigger

- What: PRs touching code files (ops/, diagnostics/, .py, .sh, etc.) now
  get Ganymede code review in addition to Leo + domain agent
- Why: Ganymede was reviewing ~30% of code PRs after deploy, not before.
  This makes code review 100% pre-merge, matching how Leo already gates claims.
- How: detect_code_pr() checks file patterns, runs Ganymede with code-focused
  prompt, adds VERDICT:GANYMEDE gate to merge eligibility check

Pentagon-Agent: Theseus <24DE7DA0-E4D5-4023-B1A2-3F736AFF4EEE>
This commit is contained in:
m3taversal 2026-03-28 21:01:34 +00:00
parent 2542c1f20d
commit b5927c55d5

View file

@ -1,13 +1,17 @@
#!/usr/bin/env bash
# evaluate-trigger.sh — Find unreviewed PRs, run 2-agent review, auto-merge if approved.
#
# Reviews each PR with TWO agents:
# Reviews each PR with up to THREE agents:
# 1. Leo (evaluator) — quality gates, cross-domain connections, coherence
# 2. Domain agent — domain expertise, duplicate check, technical accuracy
# 3. Ganymede (code reviewer) — code quality, correctness, safety (code PRs only)
#
# After both reviews, auto-merges if:
# Ganymede reviews any PR that touches code files (ops/, diagnostics/, .py, .sh, etc.)
#
# After all reviews, auto-merges if:
# - Leo's comment contains "**Verdict:** approve"
# - Domain agent's comment contains "**Verdict:** approve"
# - Domain agent's comment contains "**Verdict:** approve" (if applicable)
# - Ganymede's comment contains "**Verdict:** approve" (if code PR)
# - No territory violations (files outside proposer's domain)
#
# Usage:
@ -51,6 +55,22 @@ LEO_ONLY=false
NO_MERGE=false
SPECIFIC_PR=""
# --- Code PR detection ---
# Returns "true" if the PR touches code files (ops/, diagnostics/, scripts, .py, .sh, .js, .html)
# These PRs need Ganymede code review in addition to Leo's quality review.
detect_code_pr() {
local pr_number="$1"
local files
files=$(gh pr view "$pr_number" --json files --jq '.files[].path' 2>/dev/null || echo "")
if echo "$files" | grep -qE "^ops/|^diagnostics/|\.py$|\.sh$|\.js$|\.html$|\.css$|\.json$"; then
echo "true"
else
echo "false"
fi
}
# --- Domain routing map ---
# Maps branch prefix or domain directory to agent name and identity path
detect_domain_agent() {
@ -200,7 +220,10 @@ echo "PRs to review: $PRS_TO_REVIEW"
if [ "$DRY_RUN" = true ]; then
for pr in $PRS_TO_REVIEW; do
read -r agent domain <<< "$(detect_domain_agent "$pr")"
echo "[DRY RUN] PR #$pr — Leo + ${agent:-unknown} (${domain:-unknown domain})"
is_code=$(detect_code_pr "$pr")
reviewers="Leo + ${agent:-unknown} (${domain:-unknown domain})"
[ "$is_code" = "true" ] && reviewers="$reviewers + Ganymede (code)"
echo "[DRY RUN] PR #$pr$reviewers"
done
exit 0
fi
@ -294,6 +317,8 @@ check_merge_eligible() {
local pr_number="$1"
local domain_agent="$2"
local leo_passed="$3"
local is_code_pr="${4:-false}"
local ganymede_passed="${5:-true}"
# Gate 1: Leo must have completed without timeout/error
if [ "$leo_passed" != "true" ]; then
@ -337,7 +362,29 @@ check_merge_eligible() {
echo "Domain agent: N/A (leo-only or grand-strategy)"
fi
# Gate 4: Territory violations
# Gate 4: Ganymede code review (for code PRs)
if [ "$is_code_pr" = "true" ]; then
if [ "$ganymede_passed" != "true" ]; then
echo "BLOCK: Ganymede code review failed or timed out"
return 1
fi
local ganymede_verdict
ganymede_verdict=$(gh pr view "$pr_number" --json comments \
--jq '[.comments[] | select(.body | test("VERDICT:GANYMEDE:")) | .body] | last' 2>/dev/null || echo "")
if echo "$ganymede_verdict" | grep -q "VERDICT:GANYMEDE:APPROVE"; then
echo "Ganymede (code review): APPROVED"
elif echo "$ganymede_verdict" | grep -q "VERDICT:GANYMEDE:REQUEST_CHANGES"; then
echo "BLOCK: Ganymede requested code changes"
return 1
else
echo "BLOCK: No verdict marker found for Ganymede code review"
return 1
fi
fi
# Gate 5: Territory violations
local violations
violations=$(check_territory_violations "$pr_number")
@ -475,6 +522,63 @@ Work autonomously. Do not ask for confirmation."
[ -n "$PR_BRANCH" ] && git branch -D "$PR_BRANCH" 2>/dev/null || true
fi
# --- Review 3: Ganymede code review (for PRs touching code files) ---
IS_CODE_PR=$(detect_code_pr "$pr")
GANYMEDE_PASSED=true
if [ "$IS_CODE_PR" = "true" ] && [ "$LEO_ONLY" != true ]; then
echo " Code files detected — running Ganymede code review."
GANYMEDE_REVIEW_FILE="/tmp/ganymede-review-pr${pr}.md"
GANYMEDE_PROMPT="You are Ganymede, the code quality reviewer for the Teleo collective.
Review PR #${pr} for code quality, correctness, and safety.
First, run: gh pr view ${pr} --json title,body,files,additions,deletions
Then checkout the PR branch: gh pr checkout ${pr}
Read every changed file completely. Also read the existing versions of modified files on main for comparison.
Your review focuses on CODE QUALITY — things a code reviewer catches:
1. **Correctness** — Does the code do what it claims? Are there logic errors, off-by-one bugs, or unhandled edge cases?
2. **Safety** — Any security issues? SQL injection, path traversal, unchecked inputs, secrets in code?
3. **Breaking changes** — Does this change file formats, API responses, DB schemas, or config structures that other agents depend on? If so, is there a migration path?
4. **Error handling** — Will failures be visible or silent? Are there bare excepts, missing error messages, or swallowed exceptions?
5. **Integration** — Does the code work with the existing system? Are imports correct, paths valid, dependencies present?
6. **Simplicity** — Is this more complex than it needs to be? Could it be simpler?
Also check:
- systemd ReadWritePaths if new file write paths are introduced
- Path format consistency (absolute vs relative)
- Concurrent edit risk on shared files (app.py, bot.py, etc.)
Write your review to ${GANYMEDE_REVIEW_FILE}
CRITICAL — Verdict format: Your review MUST end with exactly one of these verdict markers (as an HTML comment on its own line):
<!-- VERDICT:GANYMEDE:APPROVE -->
<!-- VERDICT:GANYMEDE:REQUEST_CHANGES -->
Then post the review as an issue comment:
gh pr comment ${pr} --body-file ${GANYMEDE_REVIEW_FILE}
IMPORTANT: Use 'gh pr comment' NOT 'gh pr review'. We use a shared GitHub account so gh pr review --approve fails.
Sign your review as Ganymede (code reviewer).
DO NOT duplicate Leo's knowledge quality checks — he covers those. You cover code.
DO NOT merge — the orchestrator handles merge decisions after all reviews are posted.
Work autonomously. Do not ask for confirmation."
if run_agent_review "$pr" "ganymede" "$GANYMEDE_PROMPT" "sonnet"; then
GANYMEDE_PASSED=true
else
GANYMEDE_PASSED=false
fi
# Clean up branch
git checkout main 2>/dev/null || git checkout -f main
[ -n "$PR_BRANCH" ] && git branch -D "$PR_BRANCH" 2>/dev/null || true
elif [ "$IS_CODE_PR" = "true" ] && [ "$LEO_ONLY" = true ]; then
echo " Code files detected but skipping Ganymede review (--leo-only)."
fi
if [ "$LEO_PASSED" = true ]; then
REVIEWED=$((REVIEWED + 1))
else
@ -489,7 +593,7 @@ Work autonomously. Do not ask for confirmation."
else
echo ""
echo " --- Merge eligibility check ---"
MERGE_LOG=$(check_merge_eligible "$pr" "$DOMAIN_AGENT" "$LEO_PASSED")
MERGE_LOG=$(check_merge_eligible "$pr" "$DOMAIN_AGENT" "$LEO_PASSED" "$IS_CODE_PR" "$GANYMEDE_PASSED")
MERGE_RESULT=$?
echo "$MERGE_LOG" | sed 's/^/ /'