teleo-codex/ops/diagnostics/tier1_metrics.py

"""Tier 1 Metrics — The three numbers that matter most for knowledge production.

1. Extraction yield: claims merged / claims evaluated, per agent, per week
2. Cost per merged claim: total spend / merged claims, per week
3. Fix success rate by rejection tag: which rejection reasons are fixable vs terminal

These queries run against pipeline.db (read-only) and power the /api/yield,
/api/cost-per-claim, and /api/fix-rates endpoints.

Owner: Argus <69AF7290-758F-464B-B472-04AFCA4AB340>
"""

import sqlite3


def extraction_yield(conn: sqlite3.Connection, days: int = 30) -> dict:
    """Extraction yield = merged / evaluated, trended per agent per week.

    Returns:
        {
            "daily": [{"day": "2026-W13", "agent": "rio", "evaluated": 20, "merged": 8, "yield": 0.4}, ...],
            "totals": [{"agent": "rio", "evaluated": 100, "merged": 40, "yield": 0.4}, ...],
            "system": {"evaluated": 500, "merged": 200, "yield": 0.4}
        }
    """
    # Weekly yield per agent
    # Uses strftime('%Y-W%W') for ISO week grouping
    # evaluated = approved + rejected (all terminal eval events)
    # merged = approved events only
    weekly = conn.execute(
        """
        SELECT date(timestamp) as day,
               json_extract(detail, '$.agent') as agent,
               COUNT(*) as evaluated,
               SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged
        FROM audit_log
        WHERE stage = 'evaluate'
          AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
          AND timestamp > datetime('now', ? || ' days')
        GROUP BY day, agent
        ORDER BY day DESC, agent
        """,
        (f"-{days}",),
    ).fetchall()

    daily_data = []
    for r in weekly:
        ev = r["evaluated"] or 0
        mg = r["merged"] or 0
        daily_data.append({
            "day": r["day"],
            "agent": r["agent"] or "unknown",
            "evaluated": ev,
            "merged": mg,
            "yield": round(mg / ev, 3) if ev else 0,
        })

    # Per-agent totals (same window)
    totals = conn.execute(
        """
        SELECT json_extract(detail, '$.agent') as agent,
               COUNT(*) as evaluated,
               SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged
        FROM audit_log
        WHERE stage = 'evaluate'
          AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
          AND timestamp > datetime('now', ? || ' days')
        GROUP BY agent
        ORDER BY merged DESC
        """,
        (f"-{days}",),
    ).fetchall()

    totals_data = []
    for r in totals:
        ev = r["evaluated"] or 0
        mg = r["merged"] or 0
        totals_data.append({
            "agent": r["agent"] or "unknown",
            "evaluated": ev,
            "merged": mg,
            "yield": round(mg / ev, 3) if ev else 0,
        })

    # System-wide total
    sys_row = conn.execute(
        """
        SELECT COUNT(*) as evaluated,
               SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged
        FROM audit_log
        WHERE stage = 'evaluate'
          AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
          AND timestamp > datetime('now', ? || ' days')
        """,
        (f"-{days}",),
    ).fetchone()

    sys_ev = sys_row["evaluated"] or 0
    sys_mg = sys_row["merged"] or 0

    return {
        "days": days,
        "daily": daily_data,
        "totals": totals_data,
        "system": {
            "evaluated": sys_ev,
            "merged": sys_mg,
            "yield": round(sys_mg / sys_ev, 3) if sys_ev else 0,
        },
    }


def cost_per_merged_claim(conn: sqlite3.Connection, days: int = 30) -> dict:
    """Cost and compute per merged claim, trended per week.

    Uses costs table for spend + tokens and prs table for merge counts.
    Breaks down by stage. Separates API spend (dollars) from subscription
    compute (tokens only — Claude Max is flat-rate, so dollars are meaningless).

    Returns:
        {
            "daily": [{"day": "2026-W13", "api_cost": 1.50, "merged": 8,
                         "cost_per_claim": 0.19, "input_tokens": 50000,
                         "output_tokens": 5000, "total_tokens": 55000,
                         "tokens_per_claim": 6875}, ...],
            "by_stage": [{"stage": "eval_leo:openrouter", "api_cost": 1.50,
                          "input_tokens": 300000, "output_tokens": 50000,
                          "calls": 100, "billing": "api"}, ...],
            "system": {"api_cost": 2.36, "merged": 80, "cost_per_claim": 0.03,
                       "total_tokens": 1200000, "tokens_per_claim": 15000,
                       "subscription_tokens": 0, "api_tokens": 1200000}
        }
    """
    # Weekly: cost + tokens from costs table, merged count from prs table
    daily_cost = conn.execute(
        """
        SELECT date as day,
               SUM(cost_usd) as api_cost,
               SUM(cost_estimate_usd) as estimated_cost,
               SUM(input_tokens) as input_tokens,
               SUM(output_tokens) as output_tokens
        FROM costs
        WHERE date > date('now', ? || ' days')
        GROUP BY day
        ORDER BY day DESC
        """,
        (f"-{days}",),
    ).fetchall()

    daily_merges = conn.execute(
        """
        SELECT date(merged_at) as day,
               COUNT(*) as merged
        FROM prs
        WHERE status = 'merged'
          AND merged_at > datetime('now', ? || ' days')
        GROUP BY day
        ORDER BY day DESC
        """,
        (f"-{days}",),
    ).fetchall()

    # Merge into combined weekly view
    merge_map = {r["day"]: r["merged"] for r in daily_merges}
    cost_map = {}
    for r in daily_cost:
        cost_map[r["day"]] = {
            "api_cost": r["api_cost"] or 0,
            "estimated_cost": r["estimated_cost"] or 0,
            "input_tokens": r["input_tokens"] or 0,
            "output_tokens": r["output_tokens"] or 0,
        }

    all_days = sorted(set(list(merge_map.keys()) + list(cost_map.keys())), reverse=True)
    daily_data = []
    for w in all_days:
        c = cost_map.get(w, {"api_cost": 0, "estimated_cost": 0, "input_tokens": 0, "output_tokens": 0})
        merged = merge_map.get(w, 0) or 0
        total_tokens = c["input_tokens"] + c["output_tokens"]
        daily_data.append({
            "day": w,
            "actual_spend": round(c["api_cost"], 4),
            "estimated_cost": round(c["estimated_cost"], 4),
            "merged": merged,
            "cost_per_claim": round(c["estimated_cost"] / merged, 4) if merged else None,
            "input_tokens": c["input_tokens"],
            "output_tokens": c["output_tokens"],
            "total_tokens": total_tokens,
            "tokens_per_claim": round(total_tokens / merged) if merged else None,
        })

    # By stage with billing type (full window)
    by_stage = conn.execute(
        """
        SELECT stage,
               SUM(cost_usd) as api_cost,
               SUM(cost_estimate_usd) as estimated_cost,
               SUM(input_tokens) as input_tokens,
               SUM(output_tokens) as output_tokens,
               SUM(calls) as calls
        FROM costs
        WHERE date > date('now', ? || ' days')
        GROUP BY stage
        ORDER BY SUM(input_tokens + output_tokens) DESC
        """,
        (f"-{days}",),
    ).fetchall()

    stage_data = []
    total_api_cost = 0
    total_estimated_cost = 0
    total_input = 0
    total_output = 0
    subscription_tokens = 0
    api_tokens = 0
    for r in by_stage:
        cost = r["api_cost"] or 0
        est = r["estimated_cost"] or 0
        inp = r["input_tokens"] or 0
        out = r["output_tokens"] or 0
        calls = r["calls"] or 0
        stage_name = r["stage"]
        # :max suffix = subscription, :openrouter suffix = API
        billing = "subscription" if ":max" in stage_name else "api"
        total_api_cost += cost
        total_estimated_cost += est
        total_input += inp
        total_output += out
        if billing == "subscription":
            subscription_tokens += inp + out
        else:
            api_tokens += inp + out
        stage_data.append({
            "stage": stage_name,
            "api_cost": round(cost, 4),
            "estimated_cost": round(est, 4),
            "input_tokens": inp,
            "output_tokens": out,
            "calls": calls,
            "billing": billing,
        })

    # System totals
    sys_merged = conn.execute(
        "SELECT COUNT(*) as n FROM prs WHERE status='merged' AND merged_at > datetime('now', ? || ' days')",
        (f"-{days}",),
    ).fetchone()["n"] or 0

    total_tokens = total_input + total_output

    return {
        "days": days,
        "daily": daily_data,
        "by_stage": stage_data,
        "system": {
            "actual_spend": round(total_api_cost, 4),
            "estimated_cost": round(total_estimated_cost, 4),
            "merged": sys_merged,
            "cost_per_claim": round(total_estimated_cost / sys_merged, 4) if sys_merged else None,
            "total_tokens": total_tokens,
            "tokens_per_claim": round(total_tokens / sys_merged) if sys_merged else None,
            "subscription_tokens": subscription_tokens,
            "api_tokens": api_tokens,
            "note": "estimated_cost = API-rate equivalent for all calls (unified metric). actual_spend = real dollars charged to OpenRouter.",
        },
    }


def fix_success_by_tag(conn: sqlite3.Connection, days: int = 30) -> dict:
    """Fix success rate broken down by rejection reason.

    For each rejection tag: how many PRs got that rejection, how many eventually
    merged (successful fix), how many are still open (in progress), how many
    were abandoned (closed/zombie without merge).

    Returns:
        {
            "tags": [
                {
                    "tag": "insufficient_evidence",
                    "total": 50,
                    "fixed": 10,
                    "in_progress": 5,
                    "terminal": 35,
                    "fix_rate": 0.2,
                    "terminal_rate": 0.7
                }, ...
            ]
        }
    """
    # Get all rejection events with their tags and PR numbers
    # Then join with prs table to see final outcome
    rows = conn.execute(
        """
        SELECT value as tag,
               json_extract(al.detail, '$.pr') as pr_number
        FROM audit_log al, json_each(json_extract(al.detail, '$.issues'))
        WHERE al.stage = 'evaluate'
          AND al.event IN ('changes_requested', 'domain_rejected', 'tier05_rejected')
          AND al.timestamp > datetime('now', ? || ' days')
        """,
        (f"-{days}",),
    ).fetchall()

    # Collect unique PRs per tag
    tag_prs: dict[str, set] = {}
    for r in rows:
        tag = r["tag"]
        pr = r["pr_number"]
        if tag not in tag_prs:
            tag_prs[tag] = set()
        if pr is not None:
            tag_prs[tag].add(pr)

    if not tag_prs:
        return {"days": days, "tags": []}

    # Get status for all referenced PRs in one query
    all_prs = set()
    for prs in tag_prs.values():
        all_prs.update(prs)

    if not all_prs:
        return {"days": days, "tags": []}

    placeholders = ",".join("?" for _ in all_prs)
    pr_statuses = conn.execute(
        f"SELECT number, status FROM prs WHERE number IN ({placeholders})",
        list(all_prs),
    ).fetchall()
    status_map = {r["number"]: r["status"] for r in pr_statuses}

    # Compute per-tag outcomes
    tag_data = []
    for tag, prs in sorted(tag_prs.items(), key=lambda x: -len(x[1])):
        fixed = 0
        in_progress = 0
        terminal = 0
        for pr in prs:
            st = status_map.get(pr, "unknown")
            if st == "merged":
                fixed += 1
            elif st in ("open", "validating", "reviewing", "merging"):
                in_progress += 1
            else:
                # closed, zombie, conflict, unknown
                terminal += 1

        total = len(prs)
        # Fix rate excludes in-progress (only counts resolved PRs)
        resolved = fixed + terminal
        tag_data.append({
            "tag": tag,
            "total": total,
            "fixed": fixed,
            "in_progress": in_progress,
            "terminal": terminal,
            "fix_rate": round(fixed / resolved, 3) if resolved else None,
            "terminal_rate": round(terminal / resolved, 3) if resolved else None,
        })

    return {"days": days, "tags": tag_data}


def compute_profile(conn: "sqlite3.Connection", days: int = 30) -> dict:
    """Compute profile — Max subscription telemetry alongside API usage.

    Surfaces: cache hit rates, latency, cost estimates (API-equivalent),
    token breakdown by billing type.
    """
    rows = conn.execute(
        """
        SELECT stage, model,
               SUM(calls) as calls,
               SUM(input_tokens) as input_tokens,
               SUM(output_tokens) as output_tokens,
               SUM(cost_usd) as api_cost,
               SUM(duration_ms) as duration_ms,
               SUM(cache_read_tokens) as cache_read_tokens,
               SUM(cache_write_tokens) as cache_write_tokens,
               SUM(cost_estimate_usd) as cost_estimate_usd
        FROM costs
        WHERE date > date('now', ? || ' days')
        GROUP BY stage, model
        ORDER BY SUM(input_tokens + output_tokens) DESC
        """,
        (f"-{days}",),
    ).fetchall()

    stage_data = []
    total_calls = 0
    total_tokens = 0
    total_duration = 0
    total_cache_read = 0
    total_cache_write = 0
    api_calls = 0
    sub_calls = 0
    api_spend = 0.0
    sub_estimate = 0.0
    sub_input_tokens = 0

    for r in rows:
        calls = r["calls"] or 0
        inp = r["input_tokens"] or 0
        out = r["output_tokens"] or 0
        dur = r["duration_ms"] or 0
        cr = r["cache_read_tokens"] or 0
        cw = r["cache_write_tokens"] or 0
        cost = r["api_cost"] or 0
        est = r["cost_estimate_usd"] or 0
        stage_name = r["stage"]
        billing = "subscription" if ":max" in stage_name else "api"

        total_calls += calls
        total_tokens += inp + out
        total_duration += dur
        total_cache_read += cr
        total_cache_write += cw

        if billing == "subscription":
            sub_calls += calls
            sub_estimate += est
            sub_input_tokens += inp
        else:
            api_calls += calls
            api_spend += cost

        stage_data.append({
            "stage": stage_name,
            "model": r["model"],
            "calls": calls,
            "input_tokens": inp,
            "output_tokens": out,
            "total_tokens": inp + out,
            "duration_ms": dur,
            "avg_latency_ms": round(dur / calls) if calls else 0,
            "cache_read_tokens": cr,
            "cache_write_tokens": cw,
            "cache_hit_rate": round(cr / (cr + inp), 3) if (cr + inp) else 0,
            "api_cost": round(cost, 4),
            "cost_estimate_usd": round(est, 4),
            "billing": billing,
        })

    # Cache summary (only meaningful for subscription/Max calls)
    total_cacheable = total_cache_read + total_cache_write + sub_input_tokens
    cache_hit_rate = round(total_cache_read / total_cacheable, 3) if total_cacheable else 0

    return {
        "days": days,
        "by_stage": stage_data,
        "cache": {
            "read_tokens": total_cache_read,
            "write_tokens": total_cache_write,
            "hit_rate": cache_hit_rate,
            "note": "Cache hits are prompt tokens served from cache (cheaper/faster)",
        },
        "latency": {
            "total_ms": total_duration,
            "avg_ms_per_call": round(total_duration / total_calls) if total_calls else 0,
            "note": "Wall-clock time including network. Only populated for Claude Max calls.",
        },
        "subscription_estimate": {
            "total_cost_usd": round(sub_estimate, 4),
            "note": "What subscription calls would cost at API rates. Actual cost: $0 (flat-rate Max plan).",
        },
        "system": {
            "total_calls": total_calls,
            "total_tokens": total_tokens,
            "api_calls": api_calls,
            "subscription_calls": sub_calls,
            "api_spend": round(api_spend, 4),
            "subscription_estimate": round(sub_estimate, 4),
            "cache_hit_rate": cache_hit_rate,
        },
    }