teleo-codex/ops/diagnostics/response_audit_routes.py
m3taversal 05d74d5e32 sync: import all VPS pipeline + diagnostics code as baseline
Imports 67 files from VPS (/opt/teleo-eval/) into repo as the single source
of truth. Previously only 8 of 67 files existed in repo — the rest were
deployed directly to VPS via SCP, causing massive drift.

Includes:
- pipeline/lib/: 33 Python modules (daemon core, extraction, evaluation, merge, cascade, cross-domain, costs, attribution, etc.)
- pipeline/: main daemon (teleo-pipeline.py), reweave.py, batch-extract-50.sh
- diagnostics/: 19 files (4-page dashboard, alerting, daily digest, review queue, tier1 metrics)
- agent-state/: bootstrap, lib-state, cascade inbox processor, schema
- systemd/: service unit files for reference
- deploy.sh: rsync-based deploy with --dry-run, syntax checks, dirty-tree gate
- research-session.sh: updated with Step 8.5 digest + cascade inbox processing

No new code written — all files are exact copies from VPS as of 2026-04-06.
From this point forward: edit in repo, commit, then deploy.sh.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 00:00:00 +01:00

475 lines
18 KiB
Python

"""Response audit API routes — agent cost tracking, reasoning traces, unified activity.
Endpoints:
GET /api/response-audit — paginated response list with cost columns
GET /api/response-audit/{id} — single response detail with full tool_calls
GET /api/agent-costs — aggregated cost view from response_audit
GET /api/unified-activity — merged prs + response_audit timeline
Data source: response_audit table in pipeline.db (written by Epimetheus's Telegram bot).
Owner: Argus
"""
import json
import logging
import sqlite3
from aiohttp import web
logger = logging.getLogger("argus.response_audit_routes")
def _conn(app):
"""Read-only connection to pipeline.db."""
db_path = app["db_path"]
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
conn.row_factory = sqlite3.Row
return conn
# ─── GET /api/response-audit ─────────────────────────────────────────────
async def handle_response_audit_list(request):
"""Paginated response audit list with cost and model data.
Query params:
agent — filter by agent name
hours — lookback window (default 24, max 168)
limit — max results (default 50, max 200)
offset — pagination offset (default 0)
model — filter by model name (substring match)
"""
agent = request.query.get("agent")
model_filter = request.query.get("model")
try:
hours = min(int(request.query.get("hours", 24)), 168)
except (ValueError, TypeError):
hours = 24
try:
limit = min(int(request.query.get("limit", 50)), 200)
except (ValueError, TypeError):
limit = 50
try:
offset = max(int(request.query.get("offset", 0)), 0)
except (ValueError, TypeError):
offset = 0
conn = _conn(request.app)
try:
where = ["timestamp > datetime('now', ?)"]
params: list = [f"-{hours} hours"]
if agent:
where.append("agent = ?")
params.append(agent)
if model_filter:
where.append("model LIKE ?")
params.append(f"%{model_filter}%")
where_clause = " AND ".join(where)
# Count total matching
total = conn.execute(
f"SELECT COUNT(*) as cnt FROM response_audit WHERE {where_clause}",
params,
).fetchone()["cnt"]
# Fetch page — exclude large text fields for list view
rows = conn.execute(
f"""SELECT id, timestamp, agent, model, query,
prompt_tokens, completion_tokens,
generation_cost, embedding_cost, total_cost,
confidence_score, response_time_ms, query_type,
CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
THEN json_array_length(tool_calls)
ELSE 0 END as tool_call_count,
LENGTH(display_response) as response_length
FROM response_audit
WHERE {where_clause}
ORDER BY timestamp DESC
LIMIT ? OFFSET ?""",
params + [limit, offset],
).fetchall()
responses = []
for r in rows:
responses.append({
"id": r["id"],
"timestamp": r["timestamp"],
"agent": r["agent"],
"model": r["model"],
"query": r["query"],
"query_type": r["query_type"],
"prompt_tokens": r["prompt_tokens"],
"completion_tokens": r["completion_tokens"],
"generation_cost": r["generation_cost"],
"embedding_cost": r["embedding_cost"],
"total_cost": r["total_cost"],
"confidence": r["confidence_score"],
"response_time_ms": r["response_time_ms"],
"tool_call_count": r["tool_call_count"],
"response_length": r["response_length"],
})
return web.json_response({
"total": total,
"limit": limit,
"offset": offset,
"hours": hours,
"responses": responses,
})
finally:
conn.close()
# ─── GET /api/response-audit/{id} ────────────────────────────────────────
async def handle_response_audit_detail(request):
"""Full response detail including reasoning trace and tool calls.
Returns the complete response_audit row with tool_calls parsed as JSON.
"""
try:
audit_id = int(request.match_info["id"])
except (ValueError, TypeError):
return web.json_response({"error": "Invalid ID"}, status=400)
conn = _conn(request.app)
try:
row = conn.execute(
"""SELECT id, timestamp, chat_id, user, agent, model,
query, query_type, conversation_window,
entities_matched, claims_matched,
retrieval_layers_hit, retrieval_gap,
market_data, research_context,
tool_calls, raw_response, display_response,
confidence_score, response_time_ms,
prompt_tokens, completion_tokens,
generation_cost, embedding_cost, total_cost,
blocked, block_reason
FROM response_audit WHERE id = ?""",
(audit_id,),
).fetchone()
if not row:
return web.json_response({"error": "Response not found"}, status=404)
# Parse JSON fields
def parse_json(val):
if val is None:
return None
try:
return json.loads(val)
except (json.JSONDecodeError, TypeError):
return val
result = {
"id": row["id"],
"timestamp": row["timestamp"],
"chat_id": row["chat_id"],
"user": row["user"],
"agent": row["agent"],
"model": row["model"],
"query": row["query"],
"query_type": row["query_type"],
"conversation_window": parse_json(row["conversation_window"]),
"entities_matched": parse_json(row["entities_matched"]),
"claims_matched": parse_json(row["claims_matched"]),
"retrieval_layers_hit": parse_json(row["retrieval_layers_hit"]),
"retrieval_gap": row["retrieval_gap"],
"market_data": parse_json(row["market_data"]),
"research_context": row["research_context"],
"tool_calls": parse_json(row["tool_calls"]),
"display_response": row["display_response"],
"raw_response": row["raw_response"],
"confidence_score": row["confidence_score"],
"response_time_ms": row["response_time_ms"],
"prompt_tokens": row["prompt_tokens"],
"completion_tokens": row["completion_tokens"],
"generation_cost": row["generation_cost"],
"embedding_cost": row["embedding_cost"],
"total_cost": row["total_cost"],
"blocked": bool(row["blocked"]) if row["blocked"] is not None else None,
"block_reason": row["block_reason"],
}
# Compute iteration summary from tool_calls
tool_calls = result["tool_calls"] or []
if isinstance(tool_calls, list):
reasoning_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "reasoning"]
tool_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "tool_call"]
result["trace_summary"] = {
"total_steps": len(tool_calls),
"reasoning_steps": len(reasoning_steps),
"tool_steps": len(tool_steps),
"tools_used": list({t.get("tool", "unknown") for t in tool_steps}),
"total_duration_ms": sum(t.get("duration_ms", 0) for t in tool_steps),
}
else:
result["trace_summary"] = None
return web.json_response(result)
finally:
conn.close()
# ─── GET /api/agent-costs ─────────────────────────────────────────────────
async def handle_agent_costs(request):
"""Aggregated agent cost data from response_audit.
Query params:
days — lookback window (default 7, max 30)
by — grouping: agent, model, day (default agent)
"""
try:
days = min(int(request.query.get("days", 7)), 30)
except (ValueError, TypeError):
days = 7
group_by = request.query.get("by", "agent")
agent = request.query.get("agent")
conn = _conn(request.app)
try:
if group_by == "model":
group_col = "model"
elif group_by == "day":
group_col = "date(timestamp)"
else:
group_col = "agent"
group_by = "agent"
where = ["timestamp > datetime('now', ?)"]
params: list = [f"-{days} days"]
if agent:
where.append("agent = ?")
params.append(agent)
where_clause = " AND ".join(where)
rows = conn.execute(
f"""SELECT {group_col} as grp,
COUNT(*) as responses,
SUM(prompt_tokens) as total_prompt_tokens,
SUM(completion_tokens) as total_completion_tokens,
SUM(COALESCE(total_cost, generation_cost, 0)) as total_cost,
AVG(COALESCE(total_cost, generation_cost, 0)) as avg_cost,
AVG(response_time_ms) as avg_response_ms,
AVG(confidence_score) as avg_confidence
FROM response_audit
WHERE {where_clause}
GROUP BY grp
ORDER BY total_cost DESC""",
params,
).fetchall()
breakdown = []
for r in rows:
breakdown.append({
group_by: r["grp"],
"responses": r["responses"],
"prompt_tokens": r["total_prompt_tokens"] or 0,
"completion_tokens": r["total_completion_tokens"] or 0,
"total_cost": round(r["total_cost"] or 0, 4),
"avg_cost_per_response": round(r["avg_cost"] or 0, 4),
"avg_response_ms": round(r["avg_response_ms"] or 0, 0),
"avg_confidence": round(r["avg_confidence"] or 0, 3) if r["avg_confidence"] else None,
})
grand_total = sum(b["total_cost"] for b in breakdown)
total_responses = sum(b["responses"] for b in breakdown)
# Daily trend (always included regardless of grouping)
daily_where = ["timestamp > datetime('now', ?)"]
daily_params: list = [f"-{days} days"]
if agent:
daily_where.append("agent = ?")
daily_params.append(agent)
daily = conn.execute(
f"""SELECT date(timestamp) as day,
COUNT(*) as responses,
SUM(COALESCE(total_cost, generation_cost, 0)) as cost
FROM response_audit
WHERE {' AND '.join(daily_where)}
GROUP BY day ORDER BY day""",
daily_params,
).fetchall()
daily_trend = [
{"date": r["day"], "responses": r["responses"],
"cost": round(r["cost"] or 0, 4)}
for r in daily
]
return web.json_response({
"period_days": days,
"grand_total": round(grand_total, 4),
"total_responses": total_responses,
"avg_cost_per_response": round(grand_total / total_responses, 4) if total_responses else 0,
f"by_{group_by}": breakdown,
"daily_trend": daily_trend,
})
finally:
conn.close()
# ─── GET /api/unified-activity ────────────────────────────────────────────
async def handle_unified_activity(request):
"""Unified activity feed merging pipeline ops (prs) + agent responses (response_audit).
Query params:
hours — lookback window (default 24, max 168)
limit — max results (default 100, max 500)
agent — filter by agent name
type — filter: pipeline, response, or all (default all)
"""
try:
hours = min(int(request.query.get("hours", 24)), 168)
except (ValueError, TypeError):
hours = 24
try:
limit = min(int(request.query.get("limit", 100)), 500)
except (ValueError, TypeError):
limit = 100
agent = request.query.get("agent")
activity_type = request.query.get("type", "all")
conn = _conn(request.app)
try:
entries = []
# Pipeline events from prs table
if activity_type in ("all", "pipeline"):
pr_where = ["COALESCE(merged_at, created_at) > datetime('now', ?)"]
pr_params: list = [f"-{hours} hours"]
if agent:
pr_where.append("agent = ?")
pr_params.append(agent)
prs = conn.execute(
f"""SELECT number, branch, status, domain, agent, tier,
commit_type, cost_usd,
created_at, merged_at,
leo_verdict, domain_verdict
FROM prs
WHERE {' AND '.join(pr_where)}
ORDER BY COALESCE(merged_at, created_at) DESC""",
pr_params,
).fetchall()
for pr in prs:
ts = pr["merged_at"] or pr["created_at"]
# Derive action description from status
if pr["status"] == "merged":
action = f"Merged {pr['commit_type'] or 'PR'}"
elif pr["status"] == "closed":
action = f"Closed {pr['commit_type'] or 'PR'}"
elif pr["status"] in ("approved", "reviewing"):
action = f"{pr['commit_type'] or 'PR'} awaiting merge"
else:
action = f"{pr['commit_type'] or 'PR'} {pr['status']}"
entries.append({
"timestamp": ts,
"type": "pipeline",
"agent": pr["agent"],
"action": action,
"domain": pr["domain"],
"pr_number": pr["number"],
"branch": pr["branch"],
"status": pr["status"],
"commit_type": pr["commit_type"],
"cost": pr["cost_usd"],
"detail": {
"tier": pr["tier"],
"leo_verdict": pr["leo_verdict"],
"domain_verdict": pr["domain_verdict"],
},
})
# Agent responses from response_audit
if activity_type in ("all", "response"):
ra_where = ["timestamp > datetime('now', ?)"]
ra_params: list = [f"-{hours} hours"]
if agent:
ra_where.append("agent = ?")
ra_params.append(agent)
responses = conn.execute(
f"""SELECT id, timestamp, agent, model, query,
generation_cost, response_time_ms,
confidence_score,
CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
THEN json_array_length(tool_calls)
ELSE 0 END as tool_call_count
FROM response_audit
WHERE {' AND '.join(ra_where)}
ORDER BY timestamp DESC""",
ra_params,
).fetchall()
for r in responses:
# Truncate query for feed display
query_preview = (r["query"] or "")[:120]
if len(r["query"] or "") > 120:
query_preview += "..."
entries.append({
"timestamp": r["timestamp"],
"type": "response",
"agent": r["agent"],
"action": f"Responded to query ({r['tool_call_count']} tool calls)",
"domain": None,
"pr_number": None,
"audit_id": r["id"],
"query_preview": query_preview,
"model": r["model"],
"cost": r["generation_cost"],
"detail": {
"response_time_ms": r["response_time_ms"],
"confidence": r["confidence_score"],
"tool_call_count": r["tool_call_count"],
},
})
# Sort combined entries by timestamp descending
entries.sort(key=lambda e: e["timestamp"] or "", reverse=True)
entries = entries[:limit]
# Summary stats
pipeline_count = sum(1 for e in entries if e["type"] == "pipeline")
response_count = sum(1 for e in entries if e["type"] == "response")
total_cost = sum(e.get("cost") or 0 for e in entries)
return web.json_response({
"hours": hours,
"total_entries": len(entries),
"pipeline_events": pipeline_count,
"response_events": response_count,
"total_cost": round(total_cost, 4),
"entries": entries,
})
finally:
conn.close()
# ─── Registration ─────────────────────────────────────────────────────────
def register_response_audit_routes(app):
"""Register response audit API routes. Call from create_app()."""
app.router.add_get("/api/response-audit", handle_response_audit_list)
app.router.add_get("/api/response-audit/{id}", handle_response_audit_detail)
app.router.add_get("/api/agent-costs", handle_agent_costs)
app.router.add_get("/api/unified-activity", handle_unified_activity)
# Public paths for auth middleware
RESPONSE_AUDIT_PUBLIC_PATHS = frozenset({
"/api/response-audit",
"/api/agent-costs",
"/api/unified-activity",
})
# /api/response-audit/{id} needs prefix matching in auth middleware