teleo-codex/ops/diagnostics/review_queue.py

"""Review queue: fetches open PRs from Forgejo, classifies and enriches them.

Data sources:
  - Forgejo API (git.livingip.xyz) for PR metadata, reviews, changed files
  - pipeline.db prs table for eval status cross-reference

Display priority: broken > needs-review (by age) > approved-awaiting-merge > changes-requested
"""

import asyncio
import logging
from datetime import datetime, timezone
from typing import Any

import aiohttp

logger = logging.getLogger("argus.review_queue")

FORGEJO_BASE = "https://git.livingip.xyz/api/v1"
REPO = "teleo/teleo-codex"

# Domain detection from branch prefixes or path patterns
DOMAIN_KEYWORDS = {
    "internet-finance": ["internet-finance", "defi", "dao", "prediction-market"],
    "entertainment": ["entertainment", "clay", "media", "ip-"],
    "ai-alignment": ["ai-alignment", "alignment", "theseus"],
    "health": ["health", "vida", "biotech", "glp"],
    "space-development": ["space", "astra", "orbital", "lunar"],
    "energy": ["energy", "solar", "nuclear", "fusion"],
    "grand-strategy": ["grand-strategy", "leo", "strategy"],
    "collective-intelligence": ["collective-intelligence", "coordination"],
    "critical-systems": ["critical-systems", "complexity", "emergence"],
    "teleological-economics": ["teleological-economics", "disruption", "attractor"],
    "cultural-dynamics": ["cultural-dynamics", "memetics", "narrative"],
    "mechanisms": ["mechanisms", "futarchy", "governance"],
    "living-capital": ["living-capital", "investment"],
    "living-agents": ["living-agents", "agent-architecture"],
    "teleohumanity": ["teleohumanity", "worldview"],
    "general": ["general"],
}


def _detect_domain(branch: str, title: str, files: list[dict]) -> str:
    """Detect domain from branch name, title, or changed file paths."""
    text = f"{branch} {title}".lower()

    # Check branch/title
    for domain, keywords in DOMAIN_KEYWORDS.items():
        for kw in keywords:
            if kw in text:
                return domain

    # Check file paths
    for f in files:
        path = f.get("filename", "")
        if path.startswith("domains/") or path.startswith("foundations/") or path.startswith("core/"):
            parts = path.split("/")
            if len(parts) >= 2:
                return parts[1]

    return "unknown"


def _classify_files(files: list[dict]) -> dict[str, int]:
    """Count claim, enrichment, and challenge files from changed files list."""
    counts = {"claim_count": 0, "enrichment_count": 0, "challenge_count": 0}
    for f in files:
        path = f.get("filename", "")
        status = f.get("status", "")  # added, modified, removed

        if not path.startswith("domains/") and not path.startswith("foundations/") and not path.startswith("core/"):
            continue

        name = path.split("/")[-1].lower()

        if "challenge" in name or "divergence" in name:
            counts["challenge_count"] += 1
        elif status == "modified":
            counts["enrichment_count"] += 1
        else:
            counts["claim_count"] += 1

    return counts


def _classify_status(
    changed_files: int,
    reviews: list[dict],
    requested_reviewers: list[dict],
) -> str:
    """Classify PR status: broken, needs-review, approved-awaiting-merge, changes-requested."""
    if changed_files == 0:
        return "broken"

    has_changes_requested = any(r["state"] == "REQUEST_CHANGES" for r in reviews)
    if has_changes_requested:
        # Check if there's a newer approval after the changes request
        last_change_req = max(
            (r["submitted_at"] for r in reviews if r["state"] == "REQUEST_CHANGES"),
            default="",
        )
        later_approvals = [
            r for r in reviews
            if r["state"] == "APPROVED" and r["submitted_at"] > last_change_req
        ]
        if not later_approvals:
            return "changes-requested"

    approvals = [r for r in reviews if r["state"] == "APPROVED"]
    if len(approvals) >= 2:
        return "approved-awaiting-merge"

    return "needs-review"


def _days_open(created_at: str) -> int:
    """Calculate days since PR was opened."""
    created = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
    now = datetime.now(timezone.utc)
    return (now - created).days


_STATUS_PRIORITY = {
    "broken": 0,
    "needs-review": 1,
    "approved-awaiting-merge": 2,
    "changes-requested": 3,
}


async def fetch_review_queue(
    forgejo_token: str | None = None,
    timeout_s: int = 15,
) -> list[dict[str, Any]]:
    """Fetch open PRs from Forgejo and return enriched review queue.

    Returns list sorted by display priority (broken first, then needs-review by age).
    """
    headers = {"Accept": "application/json"}
    if forgejo_token:
        headers["Authorization"] = f"token {forgejo_token}"

    connector = aiohttp.TCPConnector(ssl=False)
    async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
        # Fetch open PRs
        url = f"{FORGEJO_BASE}/repos/{REPO}/pulls?state=open&limit=50&sort=oldest"
        try:
            async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
                if resp.status != 200:
                    logger.error("Forgejo PR list returned %d", resp.status)
                    return []
                prs = await resp.json()
        except Exception as e:
            logger.error("Failed to fetch PRs from Forgejo: %s", e)
            return []

        # Fetch reviews and files for all PRs in parallel
        async def _fetch_json(session, url, label=""):
            try:
                async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
                    if resp.status == 200:
                        return await resp.json()
            except Exception as e:
                logger.warning("Failed to fetch %s: %s", label, e)
            return []

        sub_tasks = []
        for pr in prs:
            n = pr["number"]
            sub_tasks.append(_fetch_json(session, f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}/reviews", f"reviews PR#{n}"))
            sub_tasks.append(_fetch_json(session, f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}/files", f"files PR#{n}"))

        sub_results = await asyncio.gather(*sub_tasks)

        queue = []
        for i, pr in enumerate(prs):
            reviews = sub_results[i * 2]
            files = sub_results[i * 2 + 1]

            # Build enriched PR record
            branch = pr.get("head", {}).get("ref", "") if pr.get("head") else ""
            title = pr.get("title", "")
            author = pr.get("user", {}).get("login", "unknown")
            created_at = pr.get("created_at", "")
            changed_files = pr.get("changed_files", len(files))
            requested_reviewers = pr.get("requested_reviewers", [])

            domain = _detect_domain(branch, title, files)
            file_counts = _classify_files(files)
            status = _classify_status(changed_files, reviews, requested_reviewers)
            days = _days_open(created_at) if created_at else 0

            review_list = [
                {
                    "reviewer": r.get("user", {}).get("login", "unknown"),
                    "outcome": r.get("state", "PENDING").lower(),
                    "date": r.get("submitted_at", ""),
                    "summary": r.get("body", "")[:200],
                }
                for r in reviews
                if r.get("state") and r["state"] != "PENDING"
            ]

            queue.append({
                "pr_number": pr["number"],
                "title": title,
                "author": author,
                "domain": domain,
                "branch": branch,
                "created_at": created_at,
                "days_open": days,
                "status": status,
                "changed_files": changed_files,
                **file_counts,
                "reviews": review_list,
                "url": pr.get("html_url", ""),
            })

        # Sort: broken first, then needs-review by days_open desc, then rest
        queue.sort(key=lambda x: (_STATUS_PRIORITY.get(x["status"], 99), -x["days_open"]))

        return queue