teleo-codex/ops/diagnostics/response_audit_routes.py

"""Response audit API routes — agent cost tracking, reasoning traces, unified activity.

Endpoints:
  GET /api/response-audit          — paginated response list with cost columns
  GET /api/response-audit/{id}     — single response detail with full tool_calls
  GET /api/agent-costs             — aggregated cost view from response_audit
  GET /api/unified-activity        — merged prs + response_audit timeline

Data source: response_audit table in pipeline.db (written by Epimetheus's Telegram bot).

Owner: Argus
"""

import json
import logging
import sqlite3

from aiohttp import web

logger = logging.getLogger("argus.response_audit_routes")


def _conn(app):
    """Read-only connection to pipeline.db."""
    db_path = app["db_path"]
    conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
    conn.row_factory = sqlite3.Row
    return conn


# ─── GET /api/response-audit ─────────────────────────────────────────────

async def handle_response_audit_list(request):
    """Paginated response audit list with cost and model data.

    Query params:
      agent   — filter by agent name
      hours   — lookback window (default 24, max 168)
      limit   — max results (default 50, max 200)
      offset  — pagination offset (default 0)
      model   — filter by model name (substring match)
    """
    agent = request.query.get("agent")
    model_filter = request.query.get("model")
    try:
        hours = min(int(request.query.get("hours", 24)), 168)
    except (ValueError, TypeError):
        hours = 24
    try:
        limit = min(int(request.query.get("limit", 50)), 200)
    except (ValueError, TypeError):
        limit = 50
    try:
        offset = max(int(request.query.get("offset", 0)), 0)
    except (ValueError, TypeError):
        offset = 0

    conn = _conn(request.app)
    try:
        where = ["timestamp > datetime('now', ?)"]
        params: list = [f"-{hours} hours"]

        if agent:
            where.append("agent = ?")
            params.append(agent)
        if model_filter:
            where.append("model LIKE ?")
            params.append(f"%{model_filter}%")

        where_clause = " AND ".join(where)

        # Count total matching
        total = conn.execute(
            f"SELECT COUNT(*) as cnt FROM response_audit WHERE {where_clause}",
            params,
        ).fetchone()["cnt"]

        # Fetch page — exclude large text fields for list view
        rows = conn.execute(
            f"""SELECT id, timestamp, agent, model, query,
                       prompt_tokens, completion_tokens,
                       generation_cost, embedding_cost, total_cost,
                       confidence_score, response_time_ms, query_type,
                       CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
                            THEN json_array_length(tool_calls)
                            ELSE 0 END as tool_call_count,
                       LENGTH(display_response) as response_length
                FROM response_audit
                WHERE {where_clause}
                ORDER BY timestamp DESC
                LIMIT ? OFFSET ?""",
            params + [limit, offset],
        ).fetchall()

        responses = []
        for r in rows:
            responses.append({
                "id": r["id"],
                "timestamp": r["timestamp"],
                "agent": r["agent"],
                "model": r["model"],
                "query": r["query"],
                "query_type": r["query_type"],
                "prompt_tokens": r["prompt_tokens"],
                "completion_tokens": r["completion_tokens"],
                "generation_cost": r["generation_cost"],
                "embedding_cost": r["embedding_cost"],
                "total_cost": r["total_cost"],
                "confidence": r["confidence_score"],
                "response_time_ms": r["response_time_ms"],
                "tool_call_count": r["tool_call_count"],
                "response_length": r["response_length"],
            })

        return web.json_response({
            "total": total,
            "limit": limit,
            "offset": offset,
            "hours": hours,
            "responses": responses,
        })
    finally:
        conn.close()


# ─── GET /api/response-audit/{id} ────────────────────────────────────────

async def handle_response_audit_detail(request):
    """Full response detail including reasoning trace and tool calls.

    Returns the complete response_audit row with tool_calls parsed as JSON.
    """
    try:
        audit_id = int(request.match_info["id"])
    except (ValueError, TypeError):
        return web.json_response({"error": "Invalid ID"}, status=400)

    conn = _conn(request.app)
    try:
        row = conn.execute(
            """SELECT id, timestamp, chat_id, user, agent, model,
                      query, query_type, conversation_window,
                      entities_matched, claims_matched,
                      retrieval_layers_hit, retrieval_gap,
                      market_data, research_context,
                      tool_calls, raw_response, display_response,
                      confidence_score, response_time_ms,
                      prompt_tokens, completion_tokens,
                      generation_cost, embedding_cost, total_cost,
                      blocked, block_reason
               FROM response_audit WHERE id = ?""",
            (audit_id,),
        ).fetchone()

        if not row:
            return web.json_response({"error": "Response not found"}, status=404)

        # Parse JSON fields
        def parse_json(val):
            if val is None:
                return None
            try:
                return json.loads(val)
            except (json.JSONDecodeError, TypeError):
                return val

        result = {
            "id": row["id"],
            "timestamp": row["timestamp"],
            "chat_id": row["chat_id"],
            "user": row["user"],
            "agent": row["agent"],
            "model": row["model"],
            "query": row["query"],
            "query_type": row["query_type"],
            "conversation_window": parse_json(row["conversation_window"]),
            "entities_matched": parse_json(row["entities_matched"]),
            "claims_matched": parse_json(row["claims_matched"]),
            "retrieval_layers_hit": parse_json(row["retrieval_layers_hit"]),
            "retrieval_gap": row["retrieval_gap"],
            "market_data": parse_json(row["market_data"]),
            "research_context": row["research_context"],
            "tool_calls": parse_json(row["tool_calls"]),
            "display_response": row["display_response"],
            "raw_response": row["raw_response"],
            "confidence_score": row["confidence_score"],
            "response_time_ms": row["response_time_ms"],
            "prompt_tokens": row["prompt_tokens"],
            "completion_tokens": row["completion_tokens"],
            "generation_cost": row["generation_cost"],
            "embedding_cost": row["embedding_cost"],
            "total_cost": row["total_cost"],
            "blocked": bool(row["blocked"]) if row["blocked"] is not None else None,
            "block_reason": row["block_reason"],
        }

        # Compute iteration summary from tool_calls
        tool_calls = result["tool_calls"] or []
        if isinstance(tool_calls, list):
            reasoning_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "reasoning"]
            tool_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "tool_call"]
            result["trace_summary"] = {
                "total_steps": len(tool_calls),
                "reasoning_steps": len(reasoning_steps),
                "tool_steps": len(tool_steps),
                "tools_used": list({t.get("tool", "unknown") for t in tool_steps}),
                "total_duration_ms": sum(t.get("duration_ms", 0) for t in tool_steps),
            }
        else:
            result["trace_summary"] = None

        return web.json_response(result)
    finally:
        conn.close()


# ─── GET /api/agent-costs ─────────────────────────────────────────────────

async def handle_agent_costs(request):
    """Aggregated agent cost data from response_audit.

    Query params:
      days  — lookback window (default 7, max 30)
      by    — grouping: agent, model, day (default agent)
    """
    try:
        days = min(int(request.query.get("days", 7)), 30)
    except (ValueError, TypeError):
        days = 7
    group_by = request.query.get("by", "agent")
    agent = request.query.get("agent")

    conn = _conn(request.app)
    try:
        if group_by == "model":
            group_col = "model"
        elif group_by == "day":
            group_col = "date(timestamp)"
        else:
            group_col = "agent"
            group_by = "agent"

        where = ["timestamp > datetime('now', ?)"]
        params: list = [f"-{days} days"]
        if agent:
            where.append("agent = ?")
            params.append(agent)

        where_clause = " AND ".join(where)

        rows = conn.execute(
            f"""SELECT {group_col} as grp,
                       COUNT(*) as responses,
                       SUM(prompt_tokens) as total_prompt_tokens,
                       SUM(completion_tokens) as total_completion_tokens,
                       SUM(COALESCE(total_cost, generation_cost, 0)) as total_cost,
                       AVG(COALESCE(total_cost, generation_cost, 0)) as avg_cost,
                       AVG(response_time_ms) as avg_response_ms,
                       AVG(confidence_score) as avg_confidence
                FROM response_audit
                WHERE {where_clause}
                GROUP BY grp
                ORDER BY total_cost DESC""",
            params,
        ).fetchall()

        breakdown = []
        for r in rows:
            breakdown.append({
                group_by: r["grp"],
                "responses": r["responses"],
                "prompt_tokens": r["total_prompt_tokens"] or 0,
                "completion_tokens": r["total_completion_tokens"] or 0,
                "total_cost": round(r["total_cost"] or 0, 4),
                "avg_cost_per_response": round(r["avg_cost"] or 0, 4),
                "avg_response_ms": round(r["avg_response_ms"] or 0, 0),
                "avg_confidence": round(r["avg_confidence"] or 0, 3) if r["avg_confidence"] else None,
            })

        grand_total = sum(b["total_cost"] for b in breakdown)
        total_responses = sum(b["responses"] for b in breakdown)

        # Daily trend (always included regardless of grouping)
        daily_where = ["timestamp > datetime('now', ?)"]
        daily_params: list = [f"-{days} days"]
        if agent:
            daily_where.append("agent = ?")
            daily_params.append(agent)

        daily = conn.execute(
            f"""SELECT date(timestamp) as day,
                      COUNT(*) as responses,
                      SUM(COALESCE(total_cost, generation_cost, 0)) as cost
               FROM response_audit
               WHERE {' AND '.join(daily_where)}
               GROUP BY day ORDER BY day""",
            daily_params,
        ).fetchall()

        daily_trend = [
            {"date": r["day"], "responses": r["responses"],
             "cost": round(r["cost"] or 0, 4)}
            for r in daily
        ]

        return web.json_response({
            "period_days": days,
            "grand_total": round(grand_total, 4),
            "total_responses": total_responses,
            "avg_cost_per_response": round(grand_total / total_responses, 4) if total_responses else 0,
            f"by_{group_by}": breakdown,
            "daily_trend": daily_trend,
        })
    finally:
        conn.close()


# ─── GET /api/unified-activity ────────────────────────────────────────────

async def handle_unified_activity(request):
    """Unified activity feed merging pipeline ops (prs) + agent responses (response_audit).

    Query params:
      hours  — lookback window (default 24, max 168)
      limit  — max results (default 100, max 500)
      agent  — filter by agent name
      type   — filter: pipeline, response, or all (default all)
    """
    try:
        hours = min(int(request.query.get("hours", 24)), 168)
    except (ValueError, TypeError):
        hours = 24
    try:
        limit = min(int(request.query.get("limit", 100)), 500)
    except (ValueError, TypeError):
        limit = 100
    agent = request.query.get("agent")
    activity_type = request.query.get("type", "all")

    conn = _conn(request.app)
    try:
        entries = []

        # Pipeline events from prs table
        if activity_type in ("all", "pipeline"):
            pr_where = ["COALESCE(merged_at, created_at) > datetime('now', ?)"]
            pr_params: list = [f"-{hours} hours"]
            if agent:
                pr_where.append("agent = ?")
                pr_params.append(agent)

            prs = conn.execute(
                f"""SELECT number, branch, status, domain, agent, tier,
                           commit_type, cost_usd,
                           created_at, merged_at,
                           leo_verdict, domain_verdict
                    FROM prs
                    WHERE {' AND '.join(pr_where)}
                    ORDER BY COALESCE(merged_at, created_at) DESC""",
                pr_params,
            ).fetchall()

            for pr in prs:
                ts = pr["merged_at"] or pr["created_at"]
                # Derive action description from status
                if pr["status"] == "merged":
                    action = f"Merged {pr['commit_type'] or 'PR'}"
                elif pr["status"] == "closed":
                    action = f"Closed {pr['commit_type'] or 'PR'}"
                elif pr["status"] in ("approved", "reviewing"):
                    action = f"{pr['commit_type'] or 'PR'} awaiting merge"
                else:
                    action = f"{pr['commit_type'] or 'PR'} {pr['status']}"

                entries.append({
                    "timestamp": ts,
                    "type": "pipeline",
                    "agent": pr["agent"],
                    "action": action,
                    "domain": pr["domain"],
                    "pr_number": pr["number"],
                    "branch": pr["branch"],
                    "status": pr["status"],
                    "commit_type": pr["commit_type"],
                    "cost": pr["cost_usd"],
                    "detail": {
                        "tier": pr["tier"],
                        "leo_verdict": pr["leo_verdict"],
                        "domain_verdict": pr["domain_verdict"],
                    },
                })

        # Agent responses from response_audit
        if activity_type in ("all", "response"):
            ra_where = ["timestamp > datetime('now', ?)"]
            ra_params: list = [f"-{hours} hours"]
            if agent:
                ra_where.append("agent = ?")
                ra_params.append(agent)

            responses = conn.execute(
                f"""SELECT id, timestamp, agent, model, query,
                           generation_cost, response_time_ms,
                           confidence_score,
                           CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
                                THEN json_array_length(tool_calls)
                                ELSE 0 END as tool_call_count
                    FROM response_audit
                    WHERE {' AND '.join(ra_where)}
                    ORDER BY timestamp DESC""",
                ra_params,
            ).fetchall()

            for r in responses:
                # Truncate query for feed display
                query_preview = (r["query"] or "")[:120]
                if len(r["query"] or "") > 120:
                    query_preview += "..."

                entries.append({
                    "timestamp": r["timestamp"],
                    "type": "response",
                    "agent": r["agent"],
                    "action": f"Responded to query ({r['tool_call_count']} tool calls)",
                    "domain": None,
                    "pr_number": None,
                    "audit_id": r["id"],
                    "query_preview": query_preview,
                    "model": r["model"],
                    "cost": r["generation_cost"],
                    "detail": {
                        "response_time_ms": r["response_time_ms"],
                        "confidence": r["confidence_score"],
                        "tool_call_count": r["tool_call_count"],
                    },
                })

        # Sort combined entries by timestamp descending
        entries.sort(key=lambda e: e["timestamp"] or "", reverse=True)
        entries = entries[:limit]

        # Summary stats
        pipeline_count = sum(1 for e in entries if e["type"] == "pipeline")
        response_count = sum(1 for e in entries if e["type"] == "response")
        total_cost = sum(e.get("cost") or 0 for e in entries)

        return web.json_response({
            "hours": hours,
            "total_entries": len(entries),
            "pipeline_events": pipeline_count,
            "response_events": response_count,
            "total_cost": round(total_cost, 4),
            "entries": entries,
        })
    finally:
        conn.close()


# ─── Registration ─────────────────────────────────────────────────────────

def register_response_audit_routes(app):
    """Register response audit API routes. Call from create_app()."""
    app.router.add_get("/api/response-audit", handle_response_audit_list)
    app.router.add_get("/api/response-audit/{id}", handle_response_audit_detail)
    app.router.add_get("/api/agent-costs", handle_agent_costs)
    app.router.add_get("/api/unified-activity", handle_unified_activity)


# Public paths for auth middleware
RESPONSE_AUDIT_PUBLIC_PATHS = frozenset({
    "/api/response-audit",
    "/api/agent-costs",
    "/api/unified-activity",
})
# /api/response-audit/{id} needs prefix matching in auth middleware