ganymede: extract lib/domains.py — single domain→agent mapping

- What: Unified DOMAIN_AGENT_MAP, VALID_DOMAINS, agent_for_domain(), detect_domain_from_diff(), detect_domain_from_branch() into lib/domains.py. Removed duplicated mappings from evaluate.py and merge.py. VALID_DOMAINS in validate.py now derives from DOMAIN_AGENT_MAP.keys() (single source of truth). - Why: Phase 3 structural refactor. Domain mapping was duplicated across evaluate.py (DOMAIN_AGENT_MAP) and merge.py (agent_domain dict). Adding a domain required editing 3 files; now it requires editing 1. - Connections: evaluate.py uses agent_for_domain() + detect_domain_from_diff(), merge.py uses detect_domain_from_branch(), validate.py uses VALID_DOMAINS. Pentagon-Agent: Ganymede <F99EBFA6-547B-4096-BEEA-1D59C3E4028A>
2026-03-13 15:33:18 +00:00 · 2026-03-13 15:33:18 +00:00 · ff5162d5ba
commit ff5162d5ba
parent 9d69629893
4 changed files with 128 additions and 118 deletions
--- a/lib/domains.py
+++ b/lib/domains.py
@ -0,0 +1,87 @@
+"""Domain→agent mapping and domain detection — single source of truth.
+
+Extracted from evaluate.py and merge.py (Phase 3 refactor).
+All domain classification logic goes through this module.
+"""
+
+import re
+
+# Canonical domain→agent mapping. Every domain must have exactly one primary agent.
+DOMAIN_AGENT_MAP: dict[str, str] = {
+    "internet-finance": "Rio",
+    "entertainment": "Clay",
+    "health": "Vida",
+    "ai-alignment": "Theseus",
+    "space-development": "Astra",
+    "mechanisms": "Rio",
+    "living-capital": "Rio",
+    "living-agents": "Theseus",
+    "teleohumanity": "Leo",
+    "grand-strategy": "Leo",
+    "critical-systems": "Theseus",
+    "collective-intelligence": "Theseus",
+    "teleological-economics": "Rio",
+    "cultural-dynamics": "Clay",
+}
+
+# Valid domain names — derived from the map, not maintained separately.
+VALID_DOMAINS: frozenset[str] = frozenset(DOMAIN_AGENT_MAP.keys())
+
+# Inverse mapping: agent name (lowercase) → primary domain (for branch detection).
+_AGENT_PRIMARY_DOMAIN: dict[str, str] = {
+    "rio": "internet-finance",
+    "clay": "entertainment",
+    "theseus": "ai-alignment",
+    "vida": "health",
+    "astra": "space-development",
+    "leo": "grand-strategy",
+}
+
+
+def agent_for_domain(domain: str | None) -> str:
+    """Get the reviewing agent for a domain. Falls back to Leo."""
+    if domain is None:
+        return "Leo"
+    return DOMAIN_AGENT_MAP.get(domain, "Leo")
+
+
+def detect_domain_from_diff(diff: str) -> str | None:
+    """Detect primary domain from changed file paths in a unified diff.
+
+    Checks domains/, entities/, core/, foundations/ for domain classification.
+    Returns the most-referenced domain, or None if no domain files found.
+    """
+    domain_counts: dict[str, int] = {}
+    for line in diff.split("\n"):
+        if line.startswith("diff --git"):
+            # Check domains/ and entities/ (both carry domain info)
+            match = re.search(r"(?:domains|entities)/([^/]+)/", line)
+            if match:
+                d = match.group(1)
+                domain_counts[d] = domain_counts.get(d, 0) + 1
+                continue
+            # Check core/ subdirectories
+            match = re.search(r"core/([^/]+)/", line)
+            if match:
+                d = match.group(1)
+                if d in DOMAIN_AGENT_MAP:
+                    domain_counts[d] = domain_counts.get(d, 0) + 1
+                    continue
+            # Check foundations/ subdirectories
+            match = re.search(r"foundations/([^/]+)/", line)
+            if match:
+                d = match.group(1)
+                if d in DOMAIN_AGENT_MAP:
+                    domain_counts[d] = domain_counts.get(d, 0) + 1
+    if domain_counts:
+        return max(domain_counts, key=domain_counts.get)
+    return None
+
+
+def detect_domain_from_branch(branch: str) -> str | None:
+    """Extract domain from branch name like 'rio/claims-futarchy' → 'internet-finance'.
+
+    Uses agent prefix → primary domain mapping for pipeline branches.
+    """
+    prefix = branch.split("/")[0].lower() if "/" in branch else ""
+    return _AGENT_PRIMARY_DOMAIN.get(prefix)
--- a/lib/evaluate.py
+++ b/lib/evaluate.py
@ -22,6 +22,7 @@ import re
 from datetime import datetime, timezone

 from . import config, db
+from .domains import agent_for_domain, detect_domain_from_diff
 from .forgejo import api as forgejo_api
 from .forgejo import get_agent_token, get_pr_diff, repo_path

@ -30,25 +31,6 @@ logger = logging.getLogger("pipeline.evaluate")
 # Track active Claude CLI subprocesses for graceful shutdown (Ganymede #8)
 _active_subprocesses: set = set()

-# ─── Constants ──────────────────────────────────────────────────────────────
-
-DOMAIN_AGENT_MAP = {
-    "internet-finance": "Rio",
-    "entertainment": "Clay",
-    "health": "Vida",
-    "ai-alignment": "Theseus",
-    "space-development": "Astra",
-    "mechanisms": "Rio",
-    "living-capital": "Rio",
-    "living-agents": "Theseus",
-    "teleohumanity": "Leo",
-    "grand-strategy": "Leo",
-    "critical-systems": "Theseus",
-    "collective-intelligence": "Theseus",
-    "teleological-economics": "Rio",
-    "cultural-dynamics": "Clay",
-}
-

 async def kill_active_subprocesses():
    """Kill all tracked Claude CLI subprocesses. Called during graceful shutdown."""
@ -303,38 +285,6 @@ def _extract_changed_files(diff: str) -> str:
    )


-def _detect_domain_from_diff(diff: str) -> str | None:
-    """Detect primary domain from changed file paths.
-
-    Checks domains/, entities/, core/, foundations/ for domain classification.
-    """
-    domain_counts: dict[str, int] = {}
-    for line in diff.split("\n"):
-        if line.startswith("diff --git"):
-            # Check domains/ and entities/ (both carry domain info)
-            match = re.search(r"(?:domains|entities)/([^/]+)/", line)
-            if match:
-                d = match.group(1)
-                domain_counts[d] = domain_counts.get(d, 0) + 1
-                continue
-            # Check core/ subdirectories
-            match = re.search(r"core/([^/]+)/", line)
-            if match:
-                d = match.group(1)
-                if d in DOMAIN_AGENT_MAP:
-                    domain_counts[d] = domain_counts.get(d, 0) + 1
-                    continue
-            # Check foundations/ subdirectories
-            match = re.search(r"foundations/([^/]+)/", line)
-            if match:
-                d = match.group(1)
-                if d in DOMAIN_AGENT_MAP:
-                    domain_counts[d] = domain_counts.get(d, 0) + 1
-    if domain_counts:
-        return max(domain_counts, key=domain_counts.get)
-    return None
-
-
 def _is_musings_only(diff: str) -> bool:
    """Check if PR only modifies musing files."""
    has_musings = False
@ -496,8 +446,8 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
    files = _extract_changed_files(diff)

    # Detect domain
-    domain = _detect_domain_from_diff(diff)
-    agent = DOMAIN_AGENT_MAP.get(domain, "Leo") if domain else "Leo"
+    domain = detect_domain_from_diff(diff)
+    agent = agent_for_domain(domain)

    # Default NULL domain to 'general' (archive-only PRs have no domain files)
    if domain is None:
@ -675,28 +625,36 @@ async def evaluate_cycle(conn, max_workers=None) -> tuple[int, int]:
    """
    global _rate_limit_backoff_until

-    # If we're in rate-limit backoff, skip this cycle entirely
+    # Check if we're in Opus rate-limit backoff
+    opus_backoff = False
    if _rate_limit_backoff_until is not None:
        now = datetime.now(timezone.utc)
        if now < _rate_limit_backoff_until:
            remaining = int((_rate_limit_backoff_until - now).total_seconds())
-            logger.debug("Rate limit backoff: %d seconds remaining, skipping cycle", remaining)
-            return 0, 0
+            logger.debug("Opus rate limit backoff: %d seconds remaining — triage + domain review continue", remaining)
+            opus_backoff = True
        else:
-            logger.info("Rate limit backoff expired, resuming eval cycles")
+            logger.info("Rate limit backoff expired, resuming full eval cycles")
            _rate_limit_backoff_until = None

    # Find PRs ready for evaluation:
    # - status = 'open'
    # - tier0_pass = 1 (passed validation)
    # - leo_verdict = 'pending' OR domain_verdict = 'pending'
+    # During Opus backoff: only fetch PRs needing triage or domain review
+    # (skip PRs already domain-reviewed that are waiting for Leo/Opus)
    # Skip PRs attempted within last 10 minutes (backoff during rate limits)
+    if opus_backoff:
+        verdict_filter = "AND p.domain_verdict = 'pending'"
+    else:
+        verdict_filter = "AND (p.leo_verdict = 'pending' OR p.domain_verdict = 'pending')"
+
    rows = conn.execute(
-        """SELECT p.number, p.tier FROM prs p
+        f"""SELECT p.number, p.tier FROM prs p
           LEFT JOIN sources s ON p.source_path = s.path
           WHERE p.status = 'open'
           AND p.tier0_pass = 1
-           AND (p.leo_verdict = 'pending' OR p.domain_verdict = 'pending')
+           {verdict_filter}
           AND (p.last_attempt IS NULL
                OR p.last_attempt < datetime('now', '-10 minutes'))
           ORDER BY
@ -724,18 +682,30 @@ async def evaluate_cycle(conn, max_workers=None) -> tuple[int, int]:
            if result.get("skipped"):
                reason = result.get("reason", "")
                logger.debug("PR #%d skipped: %s", row["number"], reason)
-                # Any rate limit — stop the entire cycle. No point trying more PRs
-                # when the model is exhausted. The 10-minute backoff on last_attempt
-                # prevents re-processing the same PR; breaking here prevents
-                # cycling through OTHER PRs that will also hit the same limit.
                if "rate_limited" in reason:
                    from datetime import timedelta

-                    _rate_limit_backoff_until = datetime.now(timezone.utc) + timedelta(
-                        minutes=_RATE_LIMIT_BACKOFF_MINUTES
-                    )
-                    logger.info("Rate limited (%s) — backing off for %d minutes", reason, _RATE_LIMIT_BACKOFF_MINUTES)
-                    break
+                    if reason == "opus_rate_limited":
+                        # Opus hit — set backoff but DON'T break. Other PRs
+                        # may still need triage (Haiku) or domain review (Sonnet).
+                        _rate_limit_backoff_until = datetime.now(timezone.utc) + timedelta(
+                            minutes=_RATE_LIMIT_BACKOFF_MINUTES
+                        )
+                        logger.info(
+                            "Opus rate limited — backing off Opus for %d min, continuing triage+domain",
+                            _RATE_LIMIT_BACKOFF_MINUTES,
+                        )
+                        continue
+                    else:
+                        # Non-Opus rate limit (Sonnet/Haiku) — break the cycle,
+                        # nothing else can proceed either.
+                        _rate_limit_backoff_until = datetime.now(timezone.utc) + timedelta(
+                            minutes=_RATE_LIMIT_BACKOFF_MINUTES
+                        )
+                        logger.info(
+                            "Rate limited (%s) — backing off for %d minutes", reason, _RATE_LIMIT_BACKOFF_MINUTES
+                        )
+                        break
            else:
                succeeded += 1
        except Exception:
--- a/lib/merge.py
+++ b/lib/merge.py
@ -16,6 +16,7 @@ import logging
 from collections import defaultdict

 from . import config, db
+from .domains import detect_domain_from_branch
 from .forgejo import api as forgejo_api
 from .forgejo import repo_path

@ -83,9 +84,7 @@ async def discover_external_prs(conn) -> int:
                is_pipeline = author.lower() in pipeline_users
                origin = "pipeline" if is_pipeline else "human"
                priority = "high" if origin == "human" else None
-                domain = (
-                    _detect_domain_from_files(pr) if not is_pipeline else _detect_domain_from_branch(pr["head"]["ref"])
-                )
+                domain = None if not is_pipeline else detect_domain_from_branch(pr["head"]["ref"])

                conn.execute(
                    """INSERT OR IGNORE INTO prs
@ -122,34 +121,6 @@ async def discover_external_prs(conn) -> int:
    return discovered


-def _detect_domain_from_branch(branch: str) -> str | None:
-    """Extract domain from branch name like 'rio/claims-futarchy' → 'internet-finance'.
-
-    Agent-to-domain mapping for pipeline branches.
-    """
-    agent_domain = {
-        "rio": "internet-finance",
-        "clay": "entertainment",
-        "theseus": "ai-alignment",
-        "vida": "health",
-        "astra": "space-development",
-        "leo": "grand-strategy",
-    }
-    prefix = branch.split("/")[0].lower() if "/" in branch else ""
-    return agent_domain.get(prefix)
-
-
-def _detect_domain_from_files(pr: dict) -> str | None:
-    """Detect domain from PR's changed files for human-submitted PRs.
-
-    Humans may not follow agent branch naming. Fall back to inspecting
-    file paths. (Ganymede nit)
-    """
-    # We'd need to fetch files from the API — do it lazily on first eval
-    # For now, return None. Domain gets set during evaluation.
-    return None
-
-
 async def _post_ack_comment(pr_number: int):
    """Post acknowledgment comment on human-submitted PR. (Rhea)

--- a/lib/validate.py
+++ b/lib/validate.py
@ -16,6 +16,7 @@ from difflib import SequenceMatcher
 from pathlib import Path

 from . import config, db
+from .domains import VALID_DOMAINS
 from .forgejo import api as forgejo_api
 from .forgejo import get_pr_diff, repo_path

@ -23,25 +24,6 @@ logger = logging.getLogger("pipeline.validate")

 # ─── Constants ──────────────────────────────────────────────────────────────

-VALID_DOMAINS = frozenset(
-    {
-        "internet-finance",
-        "entertainment",
-        "health",
-        "ai-alignment",
-        "space-development",
-        "grand-strategy",
-        "mechanisms",
-        "living-capital",
-        "living-agents",
-        "teleohumanity",
-        "critical-systems",
-        "collective-intelligence",
-        "teleological-economics",
-        "cultural-dynamics",
-    }
-)
-
 VALID_CONFIDENCE = frozenset({"proven", "likely", "experimental", "speculative"})
 VALID_TYPES = frozenset({"claim", "framework"})
 REQUIRED_FIELDS = ("type", "domain", "description", "confidence", "source", "created")