teleo-codex/ops/pipeline-v2/lib/digest.py

"""Daily digest — sends Cory a summary of all Tier 3 activity at 8am London time.

Aggregates: merged claims (with insight summaries), pipeline metrics, agent activity,
pending review items. Runs as a scheduled job in bot.py.

Epimetheus owns this module.
"""

import logging
import sqlite3
from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo

logger = logging.getLogger("telegram.digest")

LONDON_TZ = ZoneInfo("Europe/London")
DIGEST_HOUR_LONDON = 8  # 8am London time (auto-adjusts for BST/GMT)


def next_digest_time() -> datetime:
    """Calculate the next 8am London time as a UTC datetime.

    Handles BST/GMT transitions automatically via zoneinfo.
    """
    now = datetime.now(LONDON_TZ)
    target = now.replace(hour=DIGEST_HOUR_LONDON, minute=0, second=0, microsecond=0)
    if target <= now:
        target += timedelta(days=1)
    return target.astimezone(timezone.utc)


def _get_merged_claims_24h(conn: sqlite3.Connection) -> list[dict]:
    """Get PRs merged in the last 24 hours with domain and branch info."""
    rows = conn.execute(
        """SELECT number, branch, domain, agent, commit_type, merged_at, description
           FROM prs
           WHERE merged_at > datetime('now', '-24 hours')
             AND status = 'merged'
           ORDER BY merged_at DESC""",
    ).fetchall()
    return [dict(r) for r in rows]


def _get_pipeline_metrics_24h(conn: sqlite3.Connection) -> dict:
    """Get pipeline activity metrics for the last 24 hours."""
    total_merged = conn.execute(
        "SELECT COUNT(*) FROM prs WHERE merged_at > datetime('now', '-24 hours') AND status = 'merged'"
    ).fetchone()[0]

    total_closed = conn.execute(
        "SELECT COUNT(*) FROM prs WHERE status = 'closed' AND created_at > datetime('now', '-24 hours')"
    ).fetchone()[0]

    total_conflict = conn.execute(
        "SELECT COUNT(*) FROM prs WHERE status IN ('conflict', 'conflict_permanent') AND created_at > datetime('now', '-24 hours')"
    ).fetchone()[0]

    total_open = conn.execute(
        "SELECT COUNT(*) FROM prs WHERE status IN ('open', 'reviewing', 'approved', 'merging')"
    ).fetchone()[0]

    # Approval rate (last 24h)
    evaluated = conn.execute(
        "SELECT COUNT(*) FROM prs WHERE leo_verdict IN ('approve', 'request_changes') AND created_at > datetime('now', '-24 hours')"
    ).fetchone()[0]
    approved = conn.execute(
        "SELECT COUNT(*) FROM prs WHERE leo_verdict = 'approve' AND created_at > datetime('now', '-24 hours')"
    ).fetchone()[0]
    approval_rate = (approved / evaluated * 100) if evaluated > 0 else 0

    return {
        "merged": total_merged,
        "closed": total_closed,
        "conflict": total_conflict,
        "open": total_open,
        "evaluated": evaluated,
        "approved": approved,
        "approval_rate": approval_rate,
    }


def _get_agent_activity_24h(conn: sqlite3.Connection) -> dict[str, int]:
    """Get PR count by agent for the last 24 hours."""
    rows = conn.execute(
        """SELECT agent, COUNT(*) as cnt
           FROM prs
           WHERE created_at > datetime('now', '-24 hours')
             AND agent IS NOT NULL
           GROUP BY agent
           ORDER BY cnt DESC""",
    ).fetchall()
    return {r["agent"]: r["cnt"] for r in rows}


def _get_pending_review_count(conn: sqlite3.Connection) -> int:
    """Count PRs awaiting review."""
    return conn.execute(
        "SELECT COUNT(*) FROM prs WHERE status IN ('open', 'reviewing')"
    ).fetchone()[0]


def _extract_claim_title(branch: str) -> str:
    """Extract a human-readable claim title from a branch name.

    Branch format: extract/source-slug or agent/description
    """
    # Strip prefix (extract/, research/, theseus/, etc.)
    parts = branch.split("/", 1)
    slug = parts[1] if len(parts) > 1 else parts[0]
    # Convert slug to readable title
    return slug.replace("-", " ").replace("_", " ").title()


def format_digest(
    merged_claims: list[dict],
    metrics: dict,
    agent_activity: dict[str, int],
    pending_review: int,
) -> str:
    """Format the daily digest message."""
    now = datetime.now(timezone.utc)
    date_str = now.strftime("%Y-%m-%d")

    parts = [f"DAILY DIGEST — {date_str}", ""]

    # Merged claims section
    if merged_claims:
        # Group by domain
        by_domain: dict[str, list] = {}
        for claim in merged_claims:
            domain = claim.get("domain") or "unknown"
            by_domain.setdefault(domain, []).append(claim)

        parts.append(f"CLAIMS MERGED ({len(merged_claims)})")
        for domain, claims in sorted(by_domain.items()):
            for c in claims:
                # Use real description from frontmatter if available, fall back to slug title
                desc = c.get("description")
                if desc:
                    # Take first description if multiple (pipe-delimited)
                    display = desc.split(" | ")[0]
                    if len(display) > 120:
                        display = display[:117] + "..."
                else:
                    display = _extract_claim_title(c.get("branch", "unknown"))
                commit_type = c.get("commit_type", "")
                type_tag = f"[{commit_type}] " if commit_type else ""
                parts.append(f"  {type_tag}{display} ({domain})")
        parts.append("")
    else:
        parts.extend(["CLAIMS MERGED (0)", "  No claims merged in the last 24h", ""])

    # Pipeline metrics
    success_rate = 0
    total_attempted = metrics["merged"] + metrics["closed"] + metrics["conflict"]
    if total_attempted > 0:
        success_rate = metrics["merged"] / total_attempted * 100

    parts.append("PIPELINE")
    parts.append(f"  Merged: {metrics['merged']} | Closed: {metrics['closed']} | Conflicts: {metrics['conflict']}")
    parts.append(f"  Success rate: {success_rate:.0f}% | Approval rate: {metrics['approval_rate']:.0f}%")
    parts.append(f"  Open PRs: {metrics['open']}")
    parts.append("")

    # Agent activity
    if agent_activity:
        parts.append("AGENTS")
        for agent, count in agent_activity.items():
            parts.append(f"  {agent}: {count} PRs")
        parts.append("")
    else:
        parts.extend(["AGENTS", "  No agent activity in the last 24h", ""])

    # Pending review
    if pending_review > 0:
        parts.append(f"PENDING YOUR REVIEW: {pending_review}")
    else:
        parts.append("PENDING YOUR REVIEW: 0")

    return "\n".join(parts)


async def send_daily_digest(context):
    """Send daily digest to admin chat. Scheduled job."""
    conn = context.bot_data.get("approval_conn")
    admin_chat_id = context.bot_data.get("admin_chat_id")

    if not conn or not admin_chat_id:
        logger.debug("Digest skipped — no DB connection or admin chat ID")
        return

    try:
        merged = _get_merged_claims_24h(conn)
        metrics = _get_pipeline_metrics_24h(conn)
        activity = _get_agent_activity_24h(conn)
        pending = _get_pending_review_count(conn)

        text = format_digest(merged, metrics, activity, pending)

        await context.bot.send_message(
            chat_id=admin_chat_id,
            text=text,
        )
        logger.info("Daily digest sent (%d claims, %d agents active)",
                    len(merged), len(activity))
    except Exception as e:
        logger.error("Failed to send daily digest: %s", e)