ganymede: extract lib/domains.py — single domain→agent mapping
Some checks failed
CI / lint-and-test (pull_request) Has been cancelled
Some checks failed
CI / lint-and-test (pull_request) Has been cancelled
- What: Unified DOMAIN_AGENT_MAP, VALID_DOMAINS, agent_for_domain(), detect_domain_from_diff(), detect_domain_from_branch() into lib/domains.py. Removed duplicated mappings from evaluate.py and merge.py. VALID_DOMAINS in validate.py now derives from DOMAIN_AGENT_MAP.keys() (single source of truth). - Why: Phase 3 structural refactor. Domain mapping was duplicated across evaluate.py (DOMAIN_AGENT_MAP) and merge.py (agent_domain dict). Adding a domain required editing 3 files; now it requires editing 1. - Connections: evaluate.py uses agent_for_domain() + detect_domain_from_diff(), merge.py uses detect_domain_from_branch(), validate.py uses VALID_DOMAINS. Pentagon-Agent: Ganymede <F99EBFA6-547B-4096-BEEA-1D59C3E4028A>
This commit is contained in:
parent
9d69629893
commit
ff5162d5ba
4 changed files with 128 additions and 118 deletions
87
lib/domains.py
Normal file
87
lib/domains.py
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
"""Domain→agent mapping and domain detection — single source of truth.
|
||||||
|
|
||||||
|
Extracted from evaluate.py and merge.py (Phase 3 refactor).
|
||||||
|
All domain classification logic goes through this module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Canonical domain→agent mapping. Every domain must have exactly one primary agent.
|
||||||
|
DOMAIN_AGENT_MAP: dict[str, str] = {
|
||||||
|
"internet-finance": "Rio",
|
||||||
|
"entertainment": "Clay",
|
||||||
|
"health": "Vida",
|
||||||
|
"ai-alignment": "Theseus",
|
||||||
|
"space-development": "Astra",
|
||||||
|
"mechanisms": "Rio",
|
||||||
|
"living-capital": "Rio",
|
||||||
|
"living-agents": "Theseus",
|
||||||
|
"teleohumanity": "Leo",
|
||||||
|
"grand-strategy": "Leo",
|
||||||
|
"critical-systems": "Theseus",
|
||||||
|
"collective-intelligence": "Theseus",
|
||||||
|
"teleological-economics": "Rio",
|
||||||
|
"cultural-dynamics": "Clay",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Valid domain names — derived from the map, not maintained separately.
|
||||||
|
VALID_DOMAINS: frozenset[str] = frozenset(DOMAIN_AGENT_MAP.keys())
|
||||||
|
|
||||||
|
# Inverse mapping: agent name (lowercase) → primary domain (for branch detection).
|
||||||
|
_AGENT_PRIMARY_DOMAIN: dict[str, str] = {
|
||||||
|
"rio": "internet-finance",
|
||||||
|
"clay": "entertainment",
|
||||||
|
"theseus": "ai-alignment",
|
||||||
|
"vida": "health",
|
||||||
|
"astra": "space-development",
|
||||||
|
"leo": "grand-strategy",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def agent_for_domain(domain: str | None) -> str:
|
||||||
|
"""Get the reviewing agent for a domain. Falls back to Leo."""
|
||||||
|
if domain is None:
|
||||||
|
return "Leo"
|
||||||
|
return DOMAIN_AGENT_MAP.get(domain, "Leo")
|
||||||
|
|
||||||
|
|
||||||
|
def detect_domain_from_diff(diff: str) -> str | None:
|
||||||
|
"""Detect primary domain from changed file paths in a unified diff.
|
||||||
|
|
||||||
|
Checks domains/, entities/, core/, foundations/ for domain classification.
|
||||||
|
Returns the most-referenced domain, or None if no domain files found.
|
||||||
|
"""
|
||||||
|
domain_counts: dict[str, int] = {}
|
||||||
|
for line in diff.split("\n"):
|
||||||
|
if line.startswith("diff --git"):
|
||||||
|
# Check domains/ and entities/ (both carry domain info)
|
||||||
|
match = re.search(r"(?:domains|entities)/([^/]+)/", line)
|
||||||
|
if match:
|
||||||
|
d = match.group(1)
|
||||||
|
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||||||
|
continue
|
||||||
|
# Check core/ subdirectories
|
||||||
|
match = re.search(r"core/([^/]+)/", line)
|
||||||
|
if match:
|
||||||
|
d = match.group(1)
|
||||||
|
if d in DOMAIN_AGENT_MAP:
|
||||||
|
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||||||
|
continue
|
||||||
|
# Check foundations/ subdirectories
|
||||||
|
match = re.search(r"foundations/([^/]+)/", line)
|
||||||
|
if match:
|
||||||
|
d = match.group(1)
|
||||||
|
if d in DOMAIN_AGENT_MAP:
|
||||||
|
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||||||
|
if domain_counts:
|
||||||
|
return max(domain_counts, key=domain_counts.get)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def detect_domain_from_branch(branch: str) -> str | None:
|
||||||
|
"""Extract domain from branch name like 'rio/claims-futarchy' → 'internet-finance'.
|
||||||
|
|
||||||
|
Uses agent prefix → primary domain mapping for pipeline branches.
|
||||||
|
"""
|
||||||
|
prefix = branch.split("/")[0].lower() if "/" in branch else ""
|
||||||
|
return _AGENT_PRIMARY_DOMAIN.get(prefix)
|
||||||
106
lib/evaluate.py
106
lib/evaluate.py
|
|
@ -22,6 +22,7 @@ import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from . import config, db
|
from . import config, db
|
||||||
|
from .domains import agent_for_domain, detect_domain_from_diff
|
||||||
from .forgejo import api as forgejo_api
|
from .forgejo import api as forgejo_api
|
||||||
from .forgejo import get_agent_token, get_pr_diff, repo_path
|
from .forgejo import get_agent_token, get_pr_diff, repo_path
|
||||||
|
|
||||||
|
|
@ -30,25 +31,6 @@ logger = logging.getLogger("pipeline.evaluate")
|
||||||
# Track active Claude CLI subprocesses for graceful shutdown (Ganymede #8)
|
# Track active Claude CLI subprocesses for graceful shutdown (Ganymede #8)
|
||||||
_active_subprocesses: set = set()
|
_active_subprocesses: set = set()
|
||||||
|
|
||||||
# ─── Constants ──────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
DOMAIN_AGENT_MAP = {
|
|
||||||
"internet-finance": "Rio",
|
|
||||||
"entertainment": "Clay",
|
|
||||||
"health": "Vida",
|
|
||||||
"ai-alignment": "Theseus",
|
|
||||||
"space-development": "Astra",
|
|
||||||
"mechanisms": "Rio",
|
|
||||||
"living-capital": "Rio",
|
|
||||||
"living-agents": "Theseus",
|
|
||||||
"teleohumanity": "Leo",
|
|
||||||
"grand-strategy": "Leo",
|
|
||||||
"critical-systems": "Theseus",
|
|
||||||
"collective-intelligence": "Theseus",
|
|
||||||
"teleological-economics": "Rio",
|
|
||||||
"cultural-dynamics": "Clay",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def kill_active_subprocesses():
|
async def kill_active_subprocesses():
|
||||||
"""Kill all tracked Claude CLI subprocesses. Called during graceful shutdown."""
|
"""Kill all tracked Claude CLI subprocesses. Called during graceful shutdown."""
|
||||||
|
|
@ -303,38 +285,6 @@ def _extract_changed_files(diff: str) -> str:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _detect_domain_from_diff(diff: str) -> str | None:
|
|
||||||
"""Detect primary domain from changed file paths.
|
|
||||||
|
|
||||||
Checks domains/, entities/, core/, foundations/ for domain classification.
|
|
||||||
"""
|
|
||||||
domain_counts: dict[str, int] = {}
|
|
||||||
for line in diff.split("\n"):
|
|
||||||
if line.startswith("diff --git"):
|
|
||||||
# Check domains/ and entities/ (both carry domain info)
|
|
||||||
match = re.search(r"(?:domains|entities)/([^/]+)/", line)
|
|
||||||
if match:
|
|
||||||
d = match.group(1)
|
|
||||||
domain_counts[d] = domain_counts.get(d, 0) + 1
|
|
||||||
continue
|
|
||||||
# Check core/ subdirectories
|
|
||||||
match = re.search(r"core/([^/]+)/", line)
|
|
||||||
if match:
|
|
||||||
d = match.group(1)
|
|
||||||
if d in DOMAIN_AGENT_MAP:
|
|
||||||
domain_counts[d] = domain_counts.get(d, 0) + 1
|
|
||||||
continue
|
|
||||||
# Check foundations/ subdirectories
|
|
||||||
match = re.search(r"foundations/([^/]+)/", line)
|
|
||||||
if match:
|
|
||||||
d = match.group(1)
|
|
||||||
if d in DOMAIN_AGENT_MAP:
|
|
||||||
domain_counts[d] = domain_counts.get(d, 0) + 1
|
|
||||||
if domain_counts:
|
|
||||||
return max(domain_counts, key=domain_counts.get)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _is_musings_only(diff: str) -> bool:
|
def _is_musings_only(diff: str) -> bool:
|
||||||
"""Check if PR only modifies musing files."""
|
"""Check if PR only modifies musing files."""
|
||||||
has_musings = False
|
has_musings = False
|
||||||
|
|
@ -496,8 +446,8 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
|
||||||
files = _extract_changed_files(diff)
|
files = _extract_changed_files(diff)
|
||||||
|
|
||||||
# Detect domain
|
# Detect domain
|
||||||
domain = _detect_domain_from_diff(diff)
|
domain = detect_domain_from_diff(diff)
|
||||||
agent = DOMAIN_AGENT_MAP.get(domain, "Leo") if domain else "Leo"
|
agent = agent_for_domain(domain)
|
||||||
|
|
||||||
# Default NULL domain to 'general' (archive-only PRs have no domain files)
|
# Default NULL domain to 'general' (archive-only PRs have no domain files)
|
||||||
if domain is None:
|
if domain is None:
|
||||||
|
|
@ -675,28 +625,36 @@ async def evaluate_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
"""
|
"""
|
||||||
global _rate_limit_backoff_until
|
global _rate_limit_backoff_until
|
||||||
|
|
||||||
# If we're in rate-limit backoff, skip this cycle entirely
|
# Check if we're in Opus rate-limit backoff
|
||||||
|
opus_backoff = False
|
||||||
if _rate_limit_backoff_until is not None:
|
if _rate_limit_backoff_until is not None:
|
||||||
now = datetime.now(timezone.utc)
|
now = datetime.now(timezone.utc)
|
||||||
if now < _rate_limit_backoff_until:
|
if now < _rate_limit_backoff_until:
|
||||||
remaining = int((_rate_limit_backoff_until - now).total_seconds())
|
remaining = int((_rate_limit_backoff_until - now).total_seconds())
|
||||||
logger.debug("Rate limit backoff: %d seconds remaining, skipping cycle", remaining)
|
logger.debug("Opus rate limit backoff: %d seconds remaining — triage + domain review continue", remaining)
|
||||||
return 0, 0
|
opus_backoff = True
|
||||||
else:
|
else:
|
||||||
logger.info("Rate limit backoff expired, resuming eval cycles")
|
logger.info("Rate limit backoff expired, resuming full eval cycles")
|
||||||
_rate_limit_backoff_until = None
|
_rate_limit_backoff_until = None
|
||||||
|
|
||||||
# Find PRs ready for evaluation:
|
# Find PRs ready for evaluation:
|
||||||
# - status = 'open'
|
# - status = 'open'
|
||||||
# - tier0_pass = 1 (passed validation)
|
# - tier0_pass = 1 (passed validation)
|
||||||
# - leo_verdict = 'pending' OR domain_verdict = 'pending'
|
# - leo_verdict = 'pending' OR domain_verdict = 'pending'
|
||||||
|
# During Opus backoff: only fetch PRs needing triage or domain review
|
||||||
|
# (skip PRs already domain-reviewed that are waiting for Leo/Opus)
|
||||||
# Skip PRs attempted within last 10 minutes (backoff during rate limits)
|
# Skip PRs attempted within last 10 minutes (backoff during rate limits)
|
||||||
|
if opus_backoff:
|
||||||
|
verdict_filter = "AND p.domain_verdict = 'pending'"
|
||||||
|
else:
|
||||||
|
verdict_filter = "AND (p.leo_verdict = 'pending' OR p.domain_verdict = 'pending')"
|
||||||
|
|
||||||
rows = conn.execute(
|
rows = conn.execute(
|
||||||
"""SELECT p.number, p.tier FROM prs p
|
f"""SELECT p.number, p.tier FROM prs p
|
||||||
LEFT JOIN sources s ON p.source_path = s.path
|
LEFT JOIN sources s ON p.source_path = s.path
|
||||||
WHERE p.status = 'open'
|
WHERE p.status = 'open'
|
||||||
AND p.tier0_pass = 1
|
AND p.tier0_pass = 1
|
||||||
AND (p.leo_verdict = 'pending' OR p.domain_verdict = 'pending')
|
{verdict_filter}
|
||||||
AND (p.last_attempt IS NULL
|
AND (p.last_attempt IS NULL
|
||||||
OR p.last_attempt < datetime('now', '-10 minutes'))
|
OR p.last_attempt < datetime('now', '-10 minutes'))
|
||||||
ORDER BY
|
ORDER BY
|
||||||
|
|
@ -724,18 +682,30 @@ async def evaluate_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
if result.get("skipped"):
|
if result.get("skipped"):
|
||||||
reason = result.get("reason", "")
|
reason = result.get("reason", "")
|
||||||
logger.debug("PR #%d skipped: %s", row["number"], reason)
|
logger.debug("PR #%d skipped: %s", row["number"], reason)
|
||||||
# Any rate limit — stop the entire cycle. No point trying more PRs
|
|
||||||
# when the model is exhausted. The 10-minute backoff on last_attempt
|
|
||||||
# prevents re-processing the same PR; breaking here prevents
|
|
||||||
# cycling through OTHER PRs that will also hit the same limit.
|
|
||||||
if "rate_limited" in reason:
|
if "rate_limited" in reason:
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
_rate_limit_backoff_until = datetime.now(timezone.utc) + timedelta(
|
if reason == "opus_rate_limited":
|
||||||
minutes=_RATE_LIMIT_BACKOFF_MINUTES
|
# Opus hit — set backoff but DON'T break. Other PRs
|
||||||
)
|
# may still need triage (Haiku) or domain review (Sonnet).
|
||||||
logger.info("Rate limited (%s) — backing off for %d minutes", reason, _RATE_LIMIT_BACKOFF_MINUTES)
|
_rate_limit_backoff_until = datetime.now(timezone.utc) + timedelta(
|
||||||
break
|
minutes=_RATE_LIMIT_BACKOFF_MINUTES
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Opus rate limited — backing off Opus for %d min, continuing triage+domain",
|
||||||
|
_RATE_LIMIT_BACKOFF_MINUTES,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Non-Opus rate limit (Sonnet/Haiku) — break the cycle,
|
||||||
|
# nothing else can proceed either.
|
||||||
|
_rate_limit_backoff_until = datetime.now(timezone.utc) + timedelta(
|
||||||
|
minutes=_RATE_LIMIT_BACKOFF_MINUTES
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Rate limited (%s) — backing off for %d minutes", reason, _RATE_LIMIT_BACKOFF_MINUTES
|
||||||
|
)
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
succeeded += 1
|
succeeded += 1
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
33
lib/merge.py
33
lib/merge.py
|
|
@ -16,6 +16,7 @@ import logging
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from . import config, db
|
from . import config, db
|
||||||
|
from .domains import detect_domain_from_branch
|
||||||
from .forgejo import api as forgejo_api
|
from .forgejo import api as forgejo_api
|
||||||
from .forgejo import repo_path
|
from .forgejo import repo_path
|
||||||
|
|
||||||
|
|
@ -83,9 +84,7 @@ async def discover_external_prs(conn) -> int:
|
||||||
is_pipeline = author.lower() in pipeline_users
|
is_pipeline = author.lower() in pipeline_users
|
||||||
origin = "pipeline" if is_pipeline else "human"
|
origin = "pipeline" if is_pipeline else "human"
|
||||||
priority = "high" if origin == "human" else None
|
priority = "high" if origin == "human" else None
|
||||||
domain = (
|
domain = None if not is_pipeline else detect_domain_from_branch(pr["head"]["ref"])
|
||||||
_detect_domain_from_files(pr) if not is_pipeline else _detect_domain_from_branch(pr["head"]["ref"])
|
|
||||||
)
|
|
||||||
|
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"""INSERT OR IGNORE INTO prs
|
"""INSERT OR IGNORE INTO prs
|
||||||
|
|
@ -122,34 +121,6 @@ async def discover_external_prs(conn) -> int:
|
||||||
return discovered
|
return discovered
|
||||||
|
|
||||||
|
|
||||||
def _detect_domain_from_branch(branch: str) -> str | None:
|
|
||||||
"""Extract domain from branch name like 'rio/claims-futarchy' → 'internet-finance'.
|
|
||||||
|
|
||||||
Agent-to-domain mapping for pipeline branches.
|
|
||||||
"""
|
|
||||||
agent_domain = {
|
|
||||||
"rio": "internet-finance",
|
|
||||||
"clay": "entertainment",
|
|
||||||
"theseus": "ai-alignment",
|
|
||||||
"vida": "health",
|
|
||||||
"astra": "space-development",
|
|
||||||
"leo": "grand-strategy",
|
|
||||||
}
|
|
||||||
prefix = branch.split("/")[0].lower() if "/" in branch else ""
|
|
||||||
return agent_domain.get(prefix)
|
|
||||||
|
|
||||||
|
|
||||||
def _detect_domain_from_files(pr: dict) -> str | None:
|
|
||||||
"""Detect domain from PR's changed files for human-submitted PRs.
|
|
||||||
|
|
||||||
Humans may not follow agent branch naming. Fall back to inspecting
|
|
||||||
file paths. (Ganymede nit)
|
|
||||||
"""
|
|
||||||
# We'd need to fetch files from the API — do it lazily on first eval
|
|
||||||
# For now, return None. Domain gets set during evaluation.
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def _post_ack_comment(pr_number: int):
|
async def _post_ack_comment(pr_number: int):
|
||||||
"""Post acknowledgment comment on human-submitted PR. (Rhea)
|
"""Post acknowledgment comment on human-submitted PR. (Rhea)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ from difflib import SequenceMatcher
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from . import config, db
|
from . import config, db
|
||||||
|
from .domains import VALID_DOMAINS
|
||||||
from .forgejo import api as forgejo_api
|
from .forgejo import api as forgejo_api
|
||||||
from .forgejo import get_pr_diff, repo_path
|
from .forgejo import get_pr_diff, repo_path
|
||||||
|
|
||||||
|
|
@ -23,25 +24,6 @@ logger = logging.getLogger("pipeline.validate")
|
||||||
|
|
||||||
# ─── Constants ──────────────────────────────────────────────────────────────
|
# ─── Constants ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
VALID_DOMAINS = frozenset(
|
|
||||||
{
|
|
||||||
"internet-finance",
|
|
||||||
"entertainment",
|
|
||||||
"health",
|
|
||||||
"ai-alignment",
|
|
||||||
"space-development",
|
|
||||||
"grand-strategy",
|
|
||||||
"mechanisms",
|
|
||||||
"living-capital",
|
|
||||||
"living-agents",
|
|
||||||
"teleohumanity",
|
|
||||||
"critical-systems",
|
|
||||||
"collective-intelligence",
|
|
||||||
"teleological-economics",
|
|
||||||
"cultural-dynamics",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
VALID_CONFIDENCE = frozenset({"proven", "likely", "experimental", "speculative"})
|
VALID_CONFIDENCE = frozenset({"proven", "likely", "experimental", "speculative"})
|
||||||
VALID_TYPES = frozenset({"claim", "framework"})
|
VALID_TYPES = frozenset({"claim", "framework"})
|
||||||
REQUIRED_FIELDS = ("type", "domain", "description", "confidence", "source", "created")
|
REQUIRED_FIELDS = ("type", "domain", "description", "confidence", "source", "created")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue