teleo-codex/ops/pipeline-v2/lib/analytics.py
m3taversal 05d74d5e32 sync: import all VPS pipeline + diagnostics code as baseline
Imports 67 files from VPS (/opt/teleo-eval/) into repo as the single source
of truth. Previously only 8 of 67 files existed in repo — the rest were
deployed directly to VPS via SCP, causing massive drift.

Includes:
- pipeline/lib/: 33 Python modules (daemon core, extraction, evaluation, merge, cascade, cross-domain, costs, attribution, etc.)
- pipeline/: main daemon (teleo-pipeline.py), reweave.py, batch-extract-50.sh
- diagnostics/: 19 files (4-page dashboard, alerting, daily digest, review queue, tier1 metrics)
- agent-state/: bootstrap, lib-state, cascade inbox processor, schema
- systemd/: service unit files for reference
- deploy.sh: rsync-based deploy with --dry-run, syntax checks, dirty-tree gate
- research-session.sh: updated with Step 8.5 digest + cascade inbox processing

No new code written — all files are exact copies from VPS as of 2026-04-06.
From this point forward: edit in repo, commit, then deploy.sh.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 00:00:00 +01:00

210 lines
7.9 KiB
Python

"""Analytics module — time-series metrics snapshots + chart data endpoints.
Records pipeline metrics every 15 minutes. Serves historical data for
Chart.js dashboard. Tracks source origin (agent/human/scraper) for
pipeline funnel visualization.
Priority 1 from Cory via Ganymede.
Epimetheus owns this module.
"""
import json
import logging
import re
from datetime import datetime, timezone
from . import config, db
logger = logging.getLogger("pipeline.analytics")
# ─── Snapshot recording ────────────────────────────────────────────────────
def record_snapshot(conn) -> dict:
"""Record a metrics snapshot. Called every 15 minutes by the pipeline daemon.
Returns the snapshot dict for logging/debugging.
"""
# Throughput (last hour)
throughput = conn.execute(
"""SELECT COUNT(*) as n FROM audit_log
WHERE timestamp > datetime('now', '-1 hour')
AND event IN ('approved', 'changes_requested', 'merged')"""
).fetchone()
# PR status counts
statuses = conn.execute("SELECT status, COUNT(*) as n FROM prs GROUP BY status").fetchall()
status_map = {r["status"]: r["n"] for r in statuses}
# Approval rate (24h)
verdicts = conn.execute(
"""SELECT COUNT(*) as total,
SUM(CASE WHEN status IN ('merged', 'approved') THEN 1 ELSE 0 END) as passed
FROM prs WHERE last_attempt > datetime('now', '-24 hours')"""
).fetchone()
total = verdicts["total"] or 0
passed = verdicts["passed"] or 0
approval_rate = round(passed / total, 3) if total > 0 else None
# Evaluated in 24h
evaluated = conn.execute(
"""SELECT COUNT(*) as n FROM prs
WHERE last_attempt > datetime('now', '-24 hours')
AND domain_verdict != 'pending'"""
).fetchone()
# Fix success rate
fix_stats = conn.execute(
"""SELECT COUNT(*) as attempted,
SUM(CASE WHEN status IN ('merged', 'approved') THEN 1 ELSE 0 END) as succeeded
FROM prs WHERE fix_attempts > 0"""
).fetchone()
fix_rate = round((fix_stats["succeeded"] or 0) / fix_stats["attempted"], 3) if fix_stats["attempted"] else None
# Rejection reasons (24h)
issue_rows = conn.execute(
"""SELECT eval_issues FROM prs
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
AND last_attempt > datetime('now', '-24 hours')"""
).fetchall()
tag_counts = {}
for row in issue_rows:
try:
tags = json.loads(row["eval_issues"])
for tag in tags:
if isinstance(tag, str):
tag_counts[tag] = tag_counts.get(tag, 0) + 1
except (json.JSONDecodeError, TypeError):
pass
# Source origin counts (24h) — agent vs human vs scraper
source_origins = _count_source_origins(conn)
snapshot = {
"throughput_1h": throughput["n"] if throughput else 0,
"approval_rate": approval_rate,
"open_prs": status_map.get("open", 0),
"merged_total": status_map.get("merged", 0),
"closed_total": status_map.get("closed", 0),
"conflict_total": status_map.get("conflict", 0),
"evaluated_24h": evaluated["n"] if evaluated else 0,
"fix_success_rate": fix_rate,
"rejection_broken_wiki_links": tag_counts.get("broken_wiki_links", 0),
"rejection_frontmatter_schema": tag_counts.get("frontmatter_schema", 0),
"rejection_near_duplicate": tag_counts.get("near_duplicate", 0),
"rejection_confidence": tag_counts.get("confidence_miscalibration", 0),
"rejection_other": sum(v for k, v in tag_counts.items()
if k not in ("broken_wiki_links", "frontmatter_schema",
"near_duplicate", "confidence_miscalibration")),
"extraction_model": config.EXTRACT_MODEL,
"eval_domain_model": config.EVAL_DOMAIN_MODEL,
"eval_leo_model": config.EVAL_LEO_STANDARD_MODEL,
"prompt_version": config.PROMPT_VERSION,
"pipeline_version": config.PIPELINE_VERSION,
"source_origin_agent": source_origins.get("agent", 0),
"source_origin_human": source_origins.get("human", 0),
"source_origin_scraper": source_origins.get("scraper", 0),
}
# Write to DB
conn.execute(
"""INSERT INTO metrics_snapshots (
throughput_1h, approval_rate, open_prs, merged_total, closed_total,
conflict_total, evaluated_24h, fix_success_rate,
rejection_broken_wiki_links, rejection_frontmatter_schema,
rejection_near_duplicate, rejection_confidence, rejection_other,
extraction_model, eval_domain_model, eval_leo_model,
prompt_version, pipeline_version,
source_origin_agent, source_origin_human, source_origin_scraper
) VALUES (
:throughput_1h, :approval_rate, :open_prs, :merged_total, :closed_total,
:conflict_total, :evaluated_24h, :fix_success_rate,
:rejection_broken_wiki_links, :rejection_frontmatter_schema,
:rejection_near_duplicate, :rejection_confidence, :rejection_other,
:extraction_model, :eval_domain_model, :eval_leo_model,
:prompt_version, :pipeline_version,
:source_origin_agent, :source_origin_human, :source_origin_scraper
)""",
snapshot,
)
logger.debug("Recorded metrics snapshot: approval=%.1f%%, throughput=%d/h",
(approval_rate or 0) * 100, snapshot["throughput_1h"])
return snapshot
def _count_source_origins(conn) -> dict[str, int]:
"""Count source origins from recent PRs. Returns {agent: N, human: N, scraper: N}."""
counts = {"agent": 0, "human": 0, "scraper": 0}
rows = conn.execute(
"""SELECT origin, COUNT(*) as n FROM prs
WHERE created_at > datetime('now', '-24 hours')
GROUP BY origin"""
).fetchall()
for row in rows:
origin = row["origin"] or "pipeline"
if origin == "human":
counts["human"] += row["n"]
elif origin == "pipeline":
counts["agent"] += row["n"]
else:
counts["scraper"] += row["n"]
return counts
# ─── Chart data endpoints ─────────────────────────────────────────────────
def get_snapshot_history(conn, days: int = 7) -> list[dict]:
"""Get snapshot history for charting. Returns list of snapshot dicts."""
rows = conn.execute(
"""SELECT * FROM metrics_snapshots
WHERE ts > datetime('now', ? || ' days')
ORDER BY ts ASC""",
(f"-{days}",),
).fetchall()
return [dict(row) for row in rows]
def get_version_changes(conn, days: int = 30) -> list[dict]:
"""Get points where prompt_version or pipeline_version changed.
Used for chart annotations — vertical lines marking deployments.
"""
rows = conn.execute(
"""SELECT ts, prompt_version, pipeline_version
FROM metrics_snapshots
WHERE ts > datetime('now', ? || ' days')
ORDER BY ts ASC""",
(f"-{days}",),
).fetchall()
changes = []
prev_prompt = None
prev_pipeline = None
for row in rows:
if row["prompt_version"] != prev_prompt and prev_prompt is not None:
changes.append({
"ts": row["ts"],
"type": "prompt",
"from": prev_prompt,
"to": row["prompt_version"],
})
if row["pipeline_version"] != prev_pipeline and prev_pipeline is not None:
changes.append({
"ts": row["ts"],
"type": "pipeline",
"from": prev_pipeline,
"to": row["pipeline_version"],
})
prev_prompt = row["prompt_version"]
prev_pipeline = row["pipeline_version"]
return changes