"""Tier 1 Metrics — The three numbers that matter most for knowledge production. 1. Extraction yield: claims merged / claims evaluated, per agent, per week 2. Cost per merged claim: total spend / merged claims, per week 3. Fix success rate by rejection tag: which rejection reasons are fixable vs terminal These queries run against pipeline.db (read-only) and power the /api/yield, /api/cost-per-claim, and /api/fix-rates endpoints. Owner: Argus <69AF7290-758F-464B-B472-04AFCA4AB340> """ import sqlite3 def extraction_yield(conn: sqlite3.Connection, days: int = 30) -> dict: """Extraction yield = merged / evaluated, trended per agent per week. Returns: { "daily": [{"day": "2026-W13", "agent": "rio", "evaluated": 20, "merged": 8, "yield": 0.4}, ...], "totals": [{"agent": "rio", "evaluated": 100, "merged": 40, "yield": 0.4}, ...], "system": {"evaluated": 500, "merged": 200, "yield": 0.4} } """ # Weekly yield per agent # Uses strftime('%Y-W%W') for ISO week grouping # evaluated = approved + rejected (all terminal eval events) # merged = approved events only weekly = conn.execute( """ SELECT date(timestamp) as day, json_extract(detail, '$.agent') as agent, COUNT(*) as evaluated, SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged FROM audit_log WHERE stage = 'evaluate' AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected') AND timestamp > datetime('now', ? || ' days') GROUP BY day, agent ORDER BY day DESC, agent """, (f"-{days}",), ).fetchall() daily_data = [] for r in weekly: ev = r["evaluated"] or 0 mg = r["merged"] or 0 daily_data.append({ "day": r["day"], "agent": r["agent"] or "unknown", "evaluated": ev, "merged": mg, "yield": round(mg / ev, 3) if ev else 0, }) # Per-agent totals (same window) totals = conn.execute( """ SELECT json_extract(detail, '$.agent') as agent, COUNT(*) as evaluated, SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged FROM audit_log WHERE stage = 'evaluate' AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected') AND timestamp > datetime('now', ? || ' days') GROUP BY agent ORDER BY merged DESC """, (f"-{days}",), ).fetchall() totals_data = [] for r in totals: ev = r["evaluated"] or 0 mg = r["merged"] or 0 totals_data.append({ "agent": r["agent"] or "unknown", "evaluated": ev, "merged": mg, "yield": round(mg / ev, 3) if ev else 0, }) # System-wide total sys_row = conn.execute( """ SELECT COUNT(*) as evaluated, SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged FROM audit_log WHERE stage = 'evaluate' AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected') AND timestamp > datetime('now', ? || ' days') """, (f"-{days}",), ).fetchone() sys_ev = sys_row["evaluated"] or 0 sys_mg = sys_row["merged"] or 0 return { "days": days, "daily": daily_data, "totals": totals_data, "system": { "evaluated": sys_ev, "merged": sys_mg, "yield": round(sys_mg / sys_ev, 3) if sys_ev else 0, }, } def cost_per_merged_claim(conn: sqlite3.Connection, days: int = 30) -> dict: """Cost and compute per merged claim, trended per week. Uses costs table for spend + tokens and prs table for merge counts. Breaks down by stage. Separates API spend (dollars) from subscription compute (tokens only — Claude Max is flat-rate, so dollars are meaningless). Returns: { "daily": [{"day": "2026-W13", "api_cost": 1.50, "merged": 8, "cost_per_claim": 0.19, "input_tokens": 50000, "output_tokens": 5000, "total_tokens": 55000, "tokens_per_claim": 6875}, ...], "by_stage": [{"stage": "eval_leo:openrouter", "api_cost": 1.50, "input_tokens": 300000, "output_tokens": 50000, "calls": 100, "billing": "api"}, ...], "system": {"api_cost": 2.36, "merged": 80, "cost_per_claim": 0.03, "total_tokens": 1200000, "tokens_per_claim": 15000, "subscription_tokens": 0, "api_tokens": 1200000} } """ # Weekly: cost + tokens from costs table, merged count from prs table daily_cost = conn.execute( """ SELECT date as day, SUM(cost_usd) as api_cost, SUM(cost_estimate_usd) as estimated_cost, SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens FROM costs WHERE date > date('now', ? || ' days') GROUP BY day ORDER BY day DESC """, (f"-{days}",), ).fetchall() daily_merges = conn.execute( """ SELECT date(merged_at) as day, COUNT(*) as merged FROM prs WHERE status = 'merged' AND merged_at > datetime('now', ? || ' days') GROUP BY day ORDER BY day DESC """, (f"-{days}",), ).fetchall() # Merge into combined weekly view merge_map = {r["day"]: r["merged"] for r in daily_merges} cost_map = {} for r in daily_cost: cost_map[r["day"]] = { "api_cost": r["api_cost"] or 0, "estimated_cost": r["estimated_cost"] or 0, "input_tokens": r["input_tokens"] or 0, "output_tokens": r["output_tokens"] or 0, } all_days = sorted(set(list(merge_map.keys()) + list(cost_map.keys())), reverse=True) daily_data = [] for w in all_days: c = cost_map.get(w, {"api_cost": 0, "estimated_cost": 0, "input_tokens": 0, "output_tokens": 0}) merged = merge_map.get(w, 0) or 0 total_tokens = c["input_tokens"] + c["output_tokens"] daily_data.append({ "day": w, "actual_spend": round(c["api_cost"], 4), "estimated_cost": round(c["estimated_cost"], 4), "merged": merged, "cost_per_claim": round(c["estimated_cost"] / merged, 4) if merged else None, "input_tokens": c["input_tokens"], "output_tokens": c["output_tokens"], "total_tokens": total_tokens, "tokens_per_claim": round(total_tokens / merged) if merged else None, }) # By stage with billing type (full window) by_stage = conn.execute( """ SELECT stage, SUM(cost_usd) as api_cost, SUM(cost_estimate_usd) as estimated_cost, SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens, SUM(calls) as calls FROM costs WHERE date > date('now', ? || ' days') GROUP BY stage ORDER BY SUM(input_tokens + output_tokens) DESC """, (f"-{days}",), ).fetchall() stage_data = [] total_api_cost = 0 total_estimated_cost = 0 total_input = 0 total_output = 0 subscription_tokens = 0 api_tokens = 0 for r in by_stage: cost = r["api_cost"] or 0 est = r["estimated_cost"] or 0 inp = r["input_tokens"] or 0 out = r["output_tokens"] or 0 calls = r["calls"] or 0 stage_name = r["stage"] # :max suffix = subscription, :openrouter suffix = API billing = "subscription" if ":max" in stage_name else "api" total_api_cost += cost total_estimated_cost += est total_input += inp total_output += out if billing == "subscription": subscription_tokens += inp + out else: api_tokens += inp + out stage_data.append({ "stage": stage_name, "api_cost": round(cost, 4), "estimated_cost": round(est, 4), "input_tokens": inp, "output_tokens": out, "calls": calls, "billing": billing, }) # System totals sys_merged = conn.execute( "SELECT COUNT(*) as n FROM prs WHERE status='merged' AND merged_at > datetime('now', ? || ' days')", (f"-{days}",), ).fetchone()["n"] or 0 total_tokens = total_input + total_output return { "days": days, "daily": daily_data, "by_stage": stage_data, "system": { "actual_spend": round(total_api_cost, 4), "estimated_cost": round(total_estimated_cost, 4), "merged": sys_merged, "cost_per_claim": round(total_estimated_cost / sys_merged, 4) if sys_merged else None, "total_tokens": total_tokens, "tokens_per_claim": round(total_tokens / sys_merged) if sys_merged else None, "subscription_tokens": subscription_tokens, "api_tokens": api_tokens, "note": "estimated_cost = API-rate equivalent for all calls (unified metric). actual_spend = real dollars charged to OpenRouter.", }, } def fix_success_by_tag(conn: sqlite3.Connection, days: int = 30) -> dict: """Fix success rate broken down by rejection reason. For each rejection tag: how many PRs got that rejection, how many eventually merged (successful fix), how many are still open (in progress), how many were abandoned (closed/zombie without merge). Returns: { "tags": [ { "tag": "insufficient_evidence", "total": 50, "fixed": 10, "in_progress": 5, "terminal": 35, "fix_rate": 0.2, "terminal_rate": 0.7 }, ... ] } """ # Get all rejection events with their tags and PR numbers # Then join with prs table to see final outcome rows = conn.execute( """ SELECT value as tag, json_extract(al.detail, '$.pr') as pr_number FROM audit_log al, json_each(json_extract(al.detail, '$.issues')) WHERE al.stage = 'evaluate' AND al.event IN ('changes_requested', 'domain_rejected', 'tier05_rejected') AND al.timestamp > datetime('now', ? || ' days') """, (f"-{days}",), ).fetchall() # Collect unique PRs per tag tag_prs: dict[str, set] = {} for r in rows: tag = r["tag"] pr = r["pr_number"] if tag not in tag_prs: tag_prs[tag] = set() if pr is not None: tag_prs[tag].add(pr) if not tag_prs: return {"days": days, "tags": []} # Get status for all referenced PRs in one query all_prs = set() for prs in tag_prs.values(): all_prs.update(prs) if not all_prs: return {"days": days, "tags": []} placeholders = ",".join("?" for _ in all_prs) pr_statuses = conn.execute( f"SELECT number, status FROM prs WHERE number IN ({placeholders})", list(all_prs), ).fetchall() status_map = {r["number"]: r["status"] for r in pr_statuses} # Compute per-tag outcomes tag_data = [] for tag, prs in sorted(tag_prs.items(), key=lambda x: -len(x[1])): fixed = 0 in_progress = 0 terminal = 0 for pr in prs: st = status_map.get(pr, "unknown") if st == "merged": fixed += 1 elif st in ("open", "validating", "reviewing", "merging"): in_progress += 1 else: # closed, zombie, conflict, unknown terminal += 1 total = len(prs) # Fix rate excludes in-progress (only counts resolved PRs) resolved = fixed + terminal tag_data.append({ "tag": tag, "total": total, "fixed": fixed, "in_progress": in_progress, "terminal": terminal, "fix_rate": round(fixed / resolved, 3) if resolved else None, "terminal_rate": round(terminal / resolved, 3) if resolved else None, }) return {"days": days, "tags": tag_data} def compute_profile(conn: "sqlite3.Connection", days: int = 30) -> dict: """Compute profile — Max subscription telemetry alongside API usage. Surfaces: cache hit rates, latency, cost estimates (API-equivalent), token breakdown by billing type. """ rows = conn.execute( """ SELECT stage, model, SUM(calls) as calls, SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens, SUM(cost_usd) as api_cost, SUM(duration_ms) as duration_ms, SUM(cache_read_tokens) as cache_read_tokens, SUM(cache_write_tokens) as cache_write_tokens, SUM(cost_estimate_usd) as cost_estimate_usd FROM costs WHERE date > date('now', ? || ' days') GROUP BY stage, model ORDER BY SUM(input_tokens + output_tokens) DESC """, (f"-{days}",), ).fetchall() stage_data = [] total_calls = 0 total_tokens = 0 total_duration = 0 total_cache_read = 0 total_cache_write = 0 api_calls = 0 sub_calls = 0 api_spend = 0.0 sub_estimate = 0.0 sub_input_tokens = 0 for r in rows: calls = r["calls"] or 0 inp = r["input_tokens"] or 0 out = r["output_tokens"] or 0 dur = r["duration_ms"] or 0 cr = r["cache_read_tokens"] or 0 cw = r["cache_write_tokens"] or 0 cost = r["api_cost"] or 0 est = r["cost_estimate_usd"] or 0 stage_name = r["stage"] billing = "subscription" if ":max" in stage_name else "api" total_calls += calls total_tokens += inp + out total_duration += dur total_cache_read += cr total_cache_write += cw if billing == "subscription": sub_calls += calls sub_estimate += est sub_input_tokens += inp else: api_calls += calls api_spend += cost stage_data.append({ "stage": stage_name, "model": r["model"], "calls": calls, "input_tokens": inp, "output_tokens": out, "total_tokens": inp + out, "duration_ms": dur, "avg_latency_ms": round(dur / calls) if calls else 0, "cache_read_tokens": cr, "cache_write_tokens": cw, "cache_hit_rate": round(cr / (cr + inp), 3) if (cr + inp) else 0, "api_cost": round(cost, 4), "cost_estimate_usd": round(est, 4), "billing": billing, }) # Cache summary (only meaningful for subscription/Max calls) total_cacheable = total_cache_read + total_cache_write + sub_input_tokens cache_hit_rate = round(total_cache_read / total_cacheable, 3) if total_cacheable else 0 return { "days": days, "by_stage": stage_data, "cache": { "read_tokens": total_cache_read, "write_tokens": total_cache_write, "hit_rate": cache_hit_rate, "note": "Cache hits are prompt tokens served from cache (cheaper/faster)", }, "latency": { "total_ms": total_duration, "avg_ms_per_call": round(total_duration / total_calls) if total_calls else 0, "note": "Wall-clock time including network. Only populated for Claude Max calls.", }, "subscription_estimate": { "total_cost_usd": round(sub_estimate, 4), "note": "What subscription calls would cost at API rates. Actual cost: $0 (flat-rate Max plan).", }, "system": { "total_calls": total_calls, "total_tokens": total_tokens, "api_calls": api_calls, "subscription_calls": sub_calls, "api_spend": round(api_spend, 4), "subscription_estimate": round(sub_estimate, 4), "cache_hit_rate": cache_hit_rate, }, }