Imports 67 files from VPS (/opt/teleo-eval/) into repo as the single source of truth. Previously only 8 of 67 files existed in repo — the rest were deployed directly to VPS via SCP, causing massive drift. Includes: - pipeline/lib/: 33 Python modules (daemon core, extraction, evaluation, merge, cascade, cross-domain, costs, attribution, etc.) - pipeline/: main daemon (teleo-pipeline.py), reweave.py, batch-extract-50.sh - diagnostics/: 19 files (4-page dashboard, alerting, daily digest, review queue, tier1 metrics) - agent-state/: bootstrap, lib-state, cascade inbox processor, schema - systemd/: service unit files for reference - deploy.sh: rsync-based deploy with --dry-run, syntax checks, dirty-tree gate - research-session.sh: updated with Step 8.5 digest + cascade inbox processing No new code written — all files are exact copies from VPS as of 2026-04-06. From this point forward: edit in repo, commit, then deploy.sh. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
475 lines
18 KiB
Python
475 lines
18 KiB
Python
"""Response audit API routes — agent cost tracking, reasoning traces, unified activity.
|
|
|
|
Endpoints:
|
|
GET /api/response-audit — paginated response list with cost columns
|
|
GET /api/response-audit/{id} — single response detail with full tool_calls
|
|
GET /api/agent-costs — aggregated cost view from response_audit
|
|
GET /api/unified-activity — merged prs + response_audit timeline
|
|
|
|
Data source: response_audit table in pipeline.db (written by Epimetheus's Telegram bot).
|
|
|
|
Owner: Argus
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import sqlite3
|
|
|
|
from aiohttp import web
|
|
|
|
logger = logging.getLogger("argus.response_audit_routes")
|
|
|
|
|
|
def _conn(app):
|
|
"""Read-only connection to pipeline.db."""
|
|
db_path = app["db_path"]
|
|
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
# ─── GET /api/response-audit ─────────────────────────────────────────────
|
|
|
|
async def handle_response_audit_list(request):
|
|
"""Paginated response audit list with cost and model data.
|
|
|
|
Query params:
|
|
agent — filter by agent name
|
|
hours — lookback window (default 24, max 168)
|
|
limit — max results (default 50, max 200)
|
|
offset — pagination offset (default 0)
|
|
model — filter by model name (substring match)
|
|
"""
|
|
agent = request.query.get("agent")
|
|
model_filter = request.query.get("model")
|
|
try:
|
|
hours = min(int(request.query.get("hours", 24)), 168)
|
|
except (ValueError, TypeError):
|
|
hours = 24
|
|
try:
|
|
limit = min(int(request.query.get("limit", 50)), 200)
|
|
except (ValueError, TypeError):
|
|
limit = 50
|
|
try:
|
|
offset = max(int(request.query.get("offset", 0)), 0)
|
|
except (ValueError, TypeError):
|
|
offset = 0
|
|
|
|
conn = _conn(request.app)
|
|
try:
|
|
where = ["timestamp > datetime('now', ?)"]
|
|
params: list = [f"-{hours} hours"]
|
|
|
|
if agent:
|
|
where.append("agent = ?")
|
|
params.append(agent)
|
|
if model_filter:
|
|
where.append("model LIKE ?")
|
|
params.append(f"%{model_filter}%")
|
|
|
|
where_clause = " AND ".join(where)
|
|
|
|
# Count total matching
|
|
total = conn.execute(
|
|
f"SELECT COUNT(*) as cnt FROM response_audit WHERE {where_clause}",
|
|
params,
|
|
).fetchone()["cnt"]
|
|
|
|
# Fetch page — exclude large text fields for list view
|
|
rows = conn.execute(
|
|
f"""SELECT id, timestamp, agent, model, query,
|
|
prompt_tokens, completion_tokens,
|
|
generation_cost, embedding_cost, total_cost,
|
|
confidence_score, response_time_ms, query_type,
|
|
CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
|
|
THEN json_array_length(tool_calls)
|
|
ELSE 0 END as tool_call_count,
|
|
LENGTH(display_response) as response_length
|
|
FROM response_audit
|
|
WHERE {where_clause}
|
|
ORDER BY timestamp DESC
|
|
LIMIT ? OFFSET ?""",
|
|
params + [limit, offset],
|
|
).fetchall()
|
|
|
|
responses = []
|
|
for r in rows:
|
|
responses.append({
|
|
"id": r["id"],
|
|
"timestamp": r["timestamp"],
|
|
"agent": r["agent"],
|
|
"model": r["model"],
|
|
"query": r["query"],
|
|
"query_type": r["query_type"],
|
|
"prompt_tokens": r["prompt_tokens"],
|
|
"completion_tokens": r["completion_tokens"],
|
|
"generation_cost": r["generation_cost"],
|
|
"embedding_cost": r["embedding_cost"],
|
|
"total_cost": r["total_cost"],
|
|
"confidence": r["confidence_score"],
|
|
"response_time_ms": r["response_time_ms"],
|
|
"tool_call_count": r["tool_call_count"],
|
|
"response_length": r["response_length"],
|
|
})
|
|
|
|
return web.json_response({
|
|
"total": total,
|
|
"limit": limit,
|
|
"offset": offset,
|
|
"hours": hours,
|
|
"responses": responses,
|
|
})
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
# ─── GET /api/response-audit/{id} ────────────────────────────────────────
|
|
|
|
async def handle_response_audit_detail(request):
|
|
"""Full response detail including reasoning trace and tool calls.
|
|
|
|
Returns the complete response_audit row with tool_calls parsed as JSON.
|
|
"""
|
|
try:
|
|
audit_id = int(request.match_info["id"])
|
|
except (ValueError, TypeError):
|
|
return web.json_response({"error": "Invalid ID"}, status=400)
|
|
|
|
conn = _conn(request.app)
|
|
try:
|
|
row = conn.execute(
|
|
"""SELECT id, timestamp, chat_id, user, agent, model,
|
|
query, query_type, conversation_window,
|
|
entities_matched, claims_matched,
|
|
retrieval_layers_hit, retrieval_gap,
|
|
market_data, research_context,
|
|
tool_calls, raw_response, display_response,
|
|
confidence_score, response_time_ms,
|
|
prompt_tokens, completion_tokens,
|
|
generation_cost, embedding_cost, total_cost,
|
|
blocked, block_reason
|
|
FROM response_audit WHERE id = ?""",
|
|
(audit_id,),
|
|
).fetchone()
|
|
|
|
if not row:
|
|
return web.json_response({"error": "Response not found"}, status=404)
|
|
|
|
# Parse JSON fields
|
|
def parse_json(val):
|
|
if val is None:
|
|
return None
|
|
try:
|
|
return json.loads(val)
|
|
except (json.JSONDecodeError, TypeError):
|
|
return val
|
|
|
|
result = {
|
|
"id": row["id"],
|
|
"timestamp": row["timestamp"],
|
|
"chat_id": row["chat_id"],
|
|
"user": row["user"],
|
|
"agent": row["agent"],
|
|
"model": row["model"],
|
|
"query": row["query"],
|
|
"query_type": row["query_type"],
|
|
"conversation_window": parse_json(row["conversation_window"]),
|
|
"entities_matched": parse_json(row["entities_matched"]),
|
|
"claims_matched": parse_json(row["claims_matched"]),
|
|
"retrieval_layers_hit": parse_json(row["retrieval_layers_hit"]),
|
|
"retrieval_gap": row["retrieval_gap"],
|
|
"market_data": parse_json(row["market_data"]),
|
|
"research_context": row["research_context"],
|
|
"tool_calls": parse_json(row["tool_calls"]),
|
|
"display_response": row["display_response"],
|
|
"raw_response": row["raw_response"],
|
|
"confidence_score": row["confidence_score"],
|
|
"response_time_ms": row["response_time_ms"],
|
|
"prompt_tokens": row["prompt_tokens"],
|
|
"completion_tokens": row["completion_tokens"],
|
|
"generation_cost": row["generation_cost"],
|
|
"embedding_cost": row["embedding_cost"],
|
|
"total_cost": row["total_cost"],
|
|
"blocked": bool(row["blocked"]) if row["blocked"] is not None else None,
|
|
"block_reason": row["block_reason"],
|
|
}
|
|
|
|
# Compute iteration summary from tool_calls
|
|
tool_calls = result["tool_calls"] or []
|
|
if isinstance(tool_calls, list):
|
|
reasoning_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "reasoning"]
|
|
tool_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "tool_call"]
|
|
result["trace_summary"] = {
|
|
"total_steps": len(tool_calls),
|
|
"reasoning_steps": len(reasoning_steps),
|
|
"tool_steps": len(tool_steps),
|
|
"tools_used": list({t.get("tool", "unknown") for t in tool_steps}),
|
|
"total_duration_ms": sum(t.get("duration_ms", 0) for t in tool_steps),
|
|
}
|
|
else:
|
|
result["trace_summary"] = None
|
|
|
|
return web.json_response(result)
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
# ─── GET /api/agent-costs ─────────────────────────────────────────────────
|
|
|
|
async def handle_agent_costs(request):
|
|
"""Aggregated agent cost data from response_audit.
|
|
|
|
Query params:
|
|
days — lookback window (default 7, max 30)
|
|
by — grouping: agent, model, day (default agent)
|
|
"""
|
|
try:
|
|
days = min(int(request.query.get("days", 7)), 30)
|
|
except (ValueError, TypeError):
|
|
days = 7
|
|
group_by = request.query.get("by", "agent")
|
|
agent = request.query.get("agent")
|
|
|
|
conn = _conn(request.app)
|
|
try:
|
|
if group_by == "model":
|
|
group_col = "model"
|
|
elif group_by == "day":
|
|
group_col = "date(timestamp)"
|
|
else:
|
|
group_col = "agent"
|
|
group_by = "agent"
|
|
|
|
where = ["timestamp > datetime('now', ?)"]
|
|
params: list = [f"-{days} days"]
|
|
if agent:
|
|
where.append("agent = ?")
|
|
params.append(agent)
|
|
|
|
where_clause = " AND ".join(where)
|
|
|
|
rows = conn.execute(
|
|
f"""SELECT {group_col} as grp,
|
|
COUNT(*) as responses,
|
|
SUM(prompt_tokens) as total_prompt_tokens,
|
|
SUM(completion_tokens) as total_completion_tokens,
|
|
SUM(COALESCE(total_cost, generation_cost, 0)) as total_cost,
|
|
AVG(COALESCE(total_cost, generation_cost, 0)) as avg_cost,
|
|
AVG(response_time_ms) as avg_response_ms,
|
|
AVG(confidence_score) as avg_confidence
|
|
FROM response_audit
|
|
WHERE {where_clause}
|
|
GROUP BY grp
|
|
ORDER BY total_cost DESC""",
|
|
params,
|
|
).fetchall()
|
|
|
|
breakdown = []
|
|
for r in rows:
|
|
breakdown.append({
|
|
group_by: r["grp"],
|
|
"responses": r["responses"],
|
|
"prompt_tokens": r["total_prompt_tokens"] or 0,
|
|
"completion_tokens": r["total_completion_tokens"] or 0,
|
|
"total_cost": round(r["total_cost"] or 0, 4),
|
|
"avg_cost_per_response": round(r["avg_cost"] or 0, 4),
|
|
"avg_response_ms": round(r["avg_response_ms"] or 0, 0),
|
|
"avg_confidence": round(r["avg_confidence"] or 0, 3) if r["avg_confidence"] else None,
|
|
})
|
|
|
|
grand_total = sum(b["total_cost"] for b in breakdown)
|
|
total_responses = sum(b["responses"] for b in breakdown)
|
|
|
|
# Daily trend (always included regardless of grouping)
|
|
daily_where = ["timestamp > datetime('now', ?)"]
|
|
daily_params: list = [f"-{days} days"]
|
|
if agent:
|
|
daily_where.append("agent = ?")
|
|
daily_params.append(agent)
|
|
|
|
daily = conn.execute(
|
|
f"""SELECT date(timestamp) as day,
|
|
COUNT(*) as responses,
|
|
SUM(COALESCE(total_cost, generation_cost, 0)) as cost
|
|
FROM response_audit
|
|
WHERE {' AND '.join(daily_where)}
|
|
GROUP BY day ORDER BY day""",
|
|
daily_params,
|
|
).fetchall()
|
|
|
|
daily_trend = [
|
|
{"date": r["day"], "responses": r["responses"],
|
|
"cost": round(r["cost"] or 0, 4)}
|
|
for r in daily
|
|
]
|
|
|
|
return web.json_response({
|
|
"period_days": days,
|
|
"grand_total": round(grand_total, 4),
|
|
"total_responses": total_responses,
|
|
"avg_cost_per_response": round(grand_total / total_responses, 4) if total_responses else 0,
|
|
f"by_{group_by}": breakdown,
|
|
"daily_trend": daily_trend,
|
|
})
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
# ─── GET /api/unified-activity ────────────────────────────────────────────
|
|
|
|
async def handle_unified_activity(request):
|
|
"""Unified activity feed merging pipeline ops (prs) + agent responses (response_audit).
|
|
|
|
Query params:
|
|
hours — lookback window (default 24, max 168)
|
|
limit — max results (default 100, max 500)
|
|
agent — filter by agent name
|
|
type — filter: pipeline, response, or all (default all)
|
|
"""
|
|
try:
|
|
hours = min(int(request.query.get("hours", 24)), 168)
|
|
except (ValueError, TypeError):
|
|
hours = 24
|
|
try:
|
|
limit = min(int(request.query.get("limit", 100)), 500)
|
|
except (ValueError, TypeError):
|
|
limit = 100
|
|
agent = request.query.get("agent")
|
|
activity_type = request.query.get("type", "all")
|
|
|
|
conn = _conn(request.app)
|
|
try:
|
|
entries = []
|
|
|
|
# Pipeline events from prs table
|
|
if activity_type in ("all", "pipeline"):
|
|
pr_where = ["COALESCE(merged_at, created_at) > datetime('now', ?)"]
|
|
pr_params: list = [f"-{hours} hours"]
|
|
if agent:
|
|
pr_where.append("agent = ?")
|
|
pr_params.append(agent)
|
|
|
|
prs = conn.execute(
|
|
f"""SELECT number, branch, status, domain, agent, tier,
|
|
commit_type, cost_usd,
|
|
created_at, merged_at,
|
|
leo_verdict, domain_verdict
|
|
FROM prs
|
|
WHERE {' AND '.join(pr_where)}
|
|
ORDER BY COALESCE(merged_at, created_at) DESC""",
|
|
pr_params,
|
|
).fetchall()
|
|
|
|
for pr in prs:
|
|
ts = pr["merged_at"] or pr["created_at"]
|
|
# Derive action description from status
|
|
if pr["status"] == "merged":
|
|
action = f"Merged {pr['commit_type'] or 'PR'}"
|
|
elif pr["status"] == "closed":
|
|
action = f"Closed {pr['commit_type'] or 'PR'}"
|
|
elif pr["status"] in ("approved", "reviewing"):
|
|
action = f"{pr['commit_type'] or 'PR'} awaiting merge"
|
|
else:
|
|
action = f"{pr['commit_type'] or 'PR'} {pr['status']}"
|
|
|
|
entries.append({
|
|
"timestamp": ts,
|
|
"type": "pipeline",
|
|
"agent": pr["agent"],
|
|
"action": action,
|
|
"domain": pr["domain"],
|
|
"pr_number": pr["number"],
|
|
"branch": pr["branch"],
|
|
"status": pr["status"],
|
|
"commit_type": pr["commit_type"],
|
|
"cost": pr["cost_usd"],
|
|
"detail": {
|
|
"tier": pr["tier"],
|
|
"leo_verdict": pr["leo_verdict"],
|
|
"domain_verdict": pr["domain_verdict"],
|
|
},
|
|
})
|
|
|
|
# Agent responses from response_audit
|
|
if activity_type in ("all", "response"):
|
|
ra_where = ["timestamp > datetime('now', ?)"]
|
|
ra_params: list = [f"-{hours} hours"]
|
|
if agent:
|
|
ra_where.append("agent = ?")
|
|
ra_params.append(agent)
|
|
|
|
responses = conn.execute(
|
|
f"""SELECT id, timestamp, agent, model, query,
|
|
generation_cost, response_time_ms,
|
|
confidence_score,
|
|
CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
|
|
THEN json_array_length(tool_calls)
|
|
ELSE 0 END as tool_call_count
|
|
FROM response_audit
|
|
WHERE {' AND '.join(ra_where)}
|
|
ORDER BY timestamp DESC""",
|
|
ra_params,
|
|
).fetchall()
|
|
|
|
for r in responses:
|
|
# Truncate query for feed display
|
|
query_preview = (r["query"] or "")[:120]
|
|
if len(r["query"] or "") > 120:
|
|
query_preview += "..."
|
|
|
|
entries.append({
|
|
"timestamp": r["timestamp"],
|
|
"type": "response",
|
|
"agent": r["agent"],
|
|
"action": f"Responded to query ({r['tool_call_count']} tool calls)",
|
|
"domain": None,
|
|
"pr_number": None,
|
|
"audit_id": r["id"],
|
|
"query_preview": query_preview,
|
|
"model": r["model"],
|
|
"cost": r["generation_cost"],
|
|
"detail": {
|
|
"response_time_ms": r["response_time_ms"],
|
|
"confidence": r["confidence_score"],
|
|
"tool_call_count": r["tool_call_count"],
|
|
},
|
|
})
|
|
|
|
# Sort combined entries by timestamp descending
|
|
entries.sort(key=lambda e: e["timestamp"] or "", reverse=True)
|
|
entries = entries[:limit]
|
|
|
|
# Summary stats
|
|
pipeline_count = sum(1 for e in entries if e["type"] == "pipeline")
|
|
response_count = sum(1 for e in entries if e["type"] == "response")
|
|
total_cost = sum(e.get("cost") or 0 for e in entries)
|
|
|
|
return web.json_response({
|
|
"hours": hours,
|
|
"total_entries": len(entries),
|
|
"pipeline_events": pipeline_count,
|
|
"response_events": response_count,
|
|
"total_cost": round(total_cost, 4),
|
|
"entries": entries,
|
|
})
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
# ─── Registration ─────────────────────────────────────────────────────────
|
|
|
|
def register_response_audit_routes(app):
|
|
"""Register response audit API routes. Call from create_app()."""
|
|
app.router.add_get("/api/response-audit", handle_response_audit_list)
|
|
app.router.add_get("/api/response-audit/{id}", handle_response_audit_detail)
|
|
app.router.add_get("/api/agent-costs", handle_agent_costs)
|
|
app.router.add_get("/api/unified-activity", handle_unified_activity)
|
|
|
|
|
|
# Public paths for auth middleware
|
|
RESPONSE_AUDIT_PUBLIC_PATHS = frozenset({
|
|
"/api/response-audit",
|
|
"/api/agent-costs",
|
|
"/api/unified-activity",
|
|
})
|
|
# /api/response-audit/{id} needs prefix matching in auth middleware
|