Add vitality modules + upgrade alerting with SQL injection protection

- vitality.py (25K): 10-dimension vitality scoring (Ship + Argus, Leo-approved) - vitality_routes.py (10K): Dashboard routes for vitality endpoints - alerting.py: Updated with _ALLOWED_DIM_EXPRS SQL injection protection, stricter dim_expr validation - alerting_routes.py: Added proper try/finally/conn.close, ValueError catch on hours param - Diff log documenting multi-copy resolution decisions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 10:12:53 +02:00 · 2026-04-13 10:12:53 +02:00 · c0cc4ef090
commit c0cc4ef090
parent 681afad506
5 changed files with 998 additions and 13 deletions
--- a/diagnostics/CONSOLIDATION-DIFF-LOG.md
+++ b/diagnostics/CONSOLIDATION-DIFF-LOG.md
@ -0,0 +1,47 @@
+# Diagnostics Consolidation Diff Log
+# Branch: epimetheus/consolidate-infra
+# Date: 2026-04-13
+
+## Files with multiple copies — resolution
+
+### alerting.py
+- ROOT diagnostics/alerting.py (22320 bytes) — KEPT (newer: has _ALLOWED_DIM_EXPRS SQL injection protection, stricter dim_expr validation)
+- ops/diagnostics/alerting.py (22039 bytes) — OVERWRITTEN (missing SQL injection guards)
+- VPS /opt/teleo-eval/diagnostics/alerting.py (22039 bytes) — matches ops/ version, needs deploy
+
+### alerting_routes.py
+- ROOT diagnostics/alerting_routes.py (4216 bytes) — KEPT (newer: proper try/finally/conn.close, ValueError catch on hours param)
+- ops/diagnostics/alerting_routes.py (4043 bytes) — OVERWRITTEN (missing error handling, missing conn.close)
+- VPS /opt/teleo-eval/diagnostics/alerting_routes.py (4043 bytes) — matches ops/ version, needs deploy
+
+### vitality.py
+- ROOT diagnostics/vitality.py (25548 bytes) — KEPT (only copy in repo, larger than VPS)
+- VPS /opt/teleo-eval/diagnostics/vitality.py (18539 bytes) — older version, needs deploy
+- MOVED TO: ops/diagnostics/vitality.py
+
+### vitality_routes.py
+- ROOT diagnostics/vitality_routes.py (10824 bytes) — KEPT (only copy in repo, larger than VPS)
+- VPS /opt/teleo-eval/diagnostics/vitality_routes.py (9729 bytes) — older version, needs deploy
+- MOVED TO: ops/diagnostics/vitality_routes.py
+
+## Files moved
+
+| From | To | Reason |
+|------|-----|--------|
+| diagnostics/vitality.py | ops/diagnostics/vitality.py | Consolidate to canonical location |
+| diagnostics/vitality_routes.py | ops/diagnostics/vitality_routes.py | Consolidate to canonical location |
+| diagnostics/alerting.py | ops/diagnostics/alerting.py | Newer version overwrites older |
+| diagnostics/alerting_routes.py | ops/diagnostics/alerting_routes.py | Newer version overwrites older |
+
+## Root diagnostics/ after consolidation
+- PATCH_INSTRUCTIONS.md — kept (documentation, not code)
+- evolution.md — kept (documentation)
+- weekly/2026-03-25-week3.md — kept (report)
+- ops/sessions/*.json — kept (session data)
+- All .py files REMOVED from root diagnostics/
+
+## VPS .bak files inventory (30+ files)
+All in /opt/teleo-eval/diagnostics/. Git is the backup now. Safe to delete after consolidation verified.
+
+## VPS deploy needed after merge
+alerting.py, alerting_routes.py, vitality.py, vitality_routes.py — all local versions are newer than VPS.
--- a/diagnostics/alerting.py
+++ b/diagnostics/alerting.py
@ -157,8 +157,17 @@ def check_quality_regression(conn: sqlite3.Connection) -> list[dict]:
    return alerts


+_ALLOWED_DIM_EXPRS = frozenset({
+    "json_extract(detail, '$.agent')",
+    "json_extract(detail, '$.domain')",
+    "COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent'))",
+})
+
+
 def _check_approval_by_dimension(conn, alerts, dim_name, dim_expr):
-    """Check approval rate regression grouped by a dimension (agent or domain)."""
+    """Check approval rate regression grouped by a dimension. dim_expr must be in _ALLOWED_DIM_EXPRS."""
+    if dim_expr not in _ALLOWED_DIM_EXPRS:
+        raise ValueError(f"untrusted dim_expr: {dim_expr}")
    # 7-day baseline per dimension
    baseline_rows = conn.execute(
        f"""SELECT {dim_expr} as dim_val,
@ -468,7 +477,7 @@ def generate_failure_report(conn: sqlite3.Connection, agent: str, hours: int = 2
           FROM audit_log, json_each(json_extract(detail, '$.issues'))
           WHERE stage='evaluate'
           AND event IN ('changes_requested','domain_rejected','tier05_rejected')
-           AND COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) = ?
+           AND json_extract(detail, '$.agent') = ?
           AND timestamp > datetime('now', ? || ' hours')
           GROUP BY tag ORDER BY cnt DESC
           LIMIT 5""",
--- a/diagnostics/alerting_routes.py
+++ b/diagnostics/alerting_routes.py
@ -26,22 +26,24 @@ async def handle_check(request):
    conn = request.app["_alerting_conn_func"]()
    try:
        alerts = run_all_checks(conn)
+
+        # Generate failure reports for agents with stuck loops
+        failure_reports = {}
+        stuck_agents = {a["agent"] for a in alerts if a["category"] == "health" and "stuck" in a["id"] and a["agent"]}
+        for agent in stuck_agents:
+            report = generate_failure_report(conn, agent)
+            if report:
+                failure_reports[agent] = report
    except Exception as e:
        logger.error("Check failed: %s", e)
        return web.json_response({"error": str(e)}, status=500)
+    finally:
+        conn.close()

    global _active_alerts, _last_check
    _active_alerts = alerts
    _last_check = datetime.now(timezone.utc).isoformat()

-    # Generate failure reports for agents with stuck loops
-    failure_reports = {}
-    stuck_agents = {a["agent"] for a in alerts if a["category"] == "health" and "stuck" in a["id"] and a["agent"]}
-    for agent in stuck_agents:
-        report = generate_failure_report(conn, agent)
-        if report:
-            failure_reports[agent] = report
-
    result = {
        "checked_at": _last_check,
        "alert_count": len(alerts),
@ -104,10 +106,15 @@ async def handle_api_failure_report(request):
      hours: lookback window (default 24)
    """
    agent = request.match_info["agent"]
-    hours = int(request.query.get("hours", "24"))
+    try:
+        hours = min(int(request.query.get("hours", "24")), 168)
+    except ValueError:
+        hours = 24
    conn = request.app["_alerting_conn_func"]()
-
-    report = generate_failure_report(conn, agent, hours)
+    try:
+        report = generate_failure_report(conn, agent, hours)
+    finally:
+        conn.close()
    if not report:
        return web.json_response({"agent": agent, "status": "no_rejections", "period_hours": hours})

--- a/diagnostics/vitality.py
+++ b/diagnostics/vitality.py
@ -0,0 +1,629 @@
+"""Agent Vitality Diagnostics — data collection and schema.
+
+Records daily vitality snapshots per agent across 10 dimensions.
+Designed as the objective function for agent "aliveness" ranking.
+
+Owner: Ship (data collection) + Argus (storage, API, dashboard)
+Data sources: pipeline.db (read-only), claim-index API, agent-state filesystem, review_records
+
+Dimension keys (agreed with Leo 2026-04-08):
+  knowledge_output, knowledge_quality, contributor_engagement,
+  review_performance, spend_efficiency, autonomy,
+  infrastructure_health, social_reach, capital, external_impact
+"""
+
+import json
+import logging
+import os
+import sqlite3
+import urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+
+logger = logging.getLogger("vitality")
+
+# Known domain agents and their primary domains
+AGENT_DOMAINS = {
+    "rio": ["internet-finance"],
+    "theseus": ["collective-intelligence", "living-agents"],
+    "astra": ["space-development", "energy", "manufacturing", "robotics"],
+    "vida": ["health"],
+    "clay": ["entertainment", "cultural-dynamics"],
+    "leo": ["grand-strategy", "teleohumanity"],
+    "hermes": [],      # communications, no domain
+    "rhea": [],        # infrastructure ops, no domain
+    "ganymede": [],    # code review, no domain
+    "epimetheus": [],  # pipeline, no domain
+    "oberon": [],      # dashboard, no domain
+    "argus": [],       # diagnostics, no domain
+    "ship": [],        # engineering, no domain
+}
+
+# Agent file path prefixes — for matching claims by location, not just domain field.
+# Handles claims in core/ and foundations/ that may not have a standard domain field
+# in the claim-index (domain derived from directory path).
+AGENT_PATHS = {
+    "rio": ["domains/internet-finance/"],
+    "theseus": ["domains/ai-alignment/", "core/living-agents/", "core/collective-intelligence/",
+                "foundations/collective-intelligence/"],
+    "astra": ["domains/space-development/", "domains/energy/",
+              "domains/manufacturing/", "domains/robotics/"],
+    "vida": ["domains/health/"],
+    "clay": ["domains/entertainment/", "foundations/cultural-dynamics/"],
+    "leo": ["core/grand-strategy/", "core/teleohumanity/", "core/mechanisms/",
+            "core/living-capital/", "foundations/teleological-economics/",
+            "foundations/critical-systems/"],
+}
+
+ALL_AGENTS = list(AGENT_DOMAINS.keys())
+
+# Agent-state directory (VPS filesystem)
+AGENT_STATE_DIR = Path(os.environ.get(
+    "AGENT_STATE_DIR", "/opt/teleo-eval/agent-state"
+))
+
+MIGRATION_SQL = """
+CREATE TABLE IF NOT EXISTS vitality_snapshots (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    agent_name TEXT NOT NULL,
+    dimension TEXT NOT NULL,
+    metric TEXT NOT NULL,
+    value REAL NOT NULL DEFAULT 0,
+    unit TEXT NOT NULL DEFAULT '',
+    source TEXT,
+    recorded_at TEXT NOT NULL DEFAULT (datetime('now')),
+    UNIQUE(agent_name, dimension, metric, recorded_at)
+);
+CREATE INDEX IF NOT EXISTS idx_vitality_agent_time
+    ON vitality_snapshots(agent_name, recorded_at);
+CREATE INDEX IF NOT EXISTS idx_vitality_dimension
+    ON vitality_snapshots(dimension, recorded_at);
+"""
+
+# Add source column if missing (idempotent upgrade from v1 schema)
+UPGRADE_SQL = """
+ALTER TABLE vitality_snapshots ADD COLUMN source TEXT;
+"""
+
+
+def ensure_schema(db_path: str):
+    """Create vitality_snapshots table if it doesn't exist."""
+    conn = sqlite3.connect(db_path, timeout=30)
+    try:
+        conn.executescript(MIGRATION_SQL)
+        try:
+            conn.execute(UPGRADE_SQL)
+        except sqlite3.OperationalError:
+            pass  # column already exists
+        conn.commit()
+        logger.info("vitality_snapshots schema ensured")
+    finally:
+        conn.close()
+
+
+def _fetch_claim_index(url: str = "http://localhost:8080/claim-index") -> dict | None:
+    """Fetch claim-index from pipeline health API."""
+    try:
+        req = urllib.request.Request(url, headers={"Accept": "application/json"})
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            return json.loads(resp.read())
+    except Exception as e:
+        logger.warning("claim-index fetch failed: %s", e)
+        return None
+
+
+def _ro_conn(db_path: str) -> sqlite3.Connection:
+    conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=30)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+
+# ---------------------------------------------------------------------------
+# Dimension 1: knowledge_output — "How much has this agent produced?"
+# ---------------------------------------------------------------------------
+
+def collect_knowledge_output(conn: sqlite3.Connection, agent: str) -> list[dict]:
+    """Claims merged, domain count, PRs submitted."""
+    metrics = []
+
+    row = conn.execute(
+        "SELECT COUNT(*) as cnt FROM prs WHERE agent = ? AND status = 'merged'",
+        (agent,),
+    ).fetchone()
+    metrics.append({"metric": "claims_merged", "value": row["cnt"], "unit": "claims"})
+
+    row = conn.execute(
+        "SELECT COUNT(DISTINCT domain) as cnt FROM prs "
+        "WHERE agent = ? AND domain IS NOT NULL AND status = 'merged'",
+        (agent,),
+    ).fetchone()
+    metrics.append({"metric": "domains_contributed", "value": row["cnt"], "unit": "domains"})
+
+    row = conn.execute(
+        "SELECT COUNT(*) as cnt FROM prs WHERE agent = ? AND created_at > datetime('now', '-7 days')",
+        (agent,),
+    ).fetchone()
+    metrics.append({"metric": "prs_7d", "value": row["cnt"], "unit": "PRs"})
+
+    return metrics
+
+
+# ---------------------------------------------------------------------------
+# Dimension 2: knowledge_quality — "How good is the output?"
+# ---------------------------------------------------------------------------
+
+def collect_knowledge_quality(
+    conn: sqlite3.Connection, claim_index: dict | None, agent: str
+) -> list[dict]:
+    """Evidence density, challenge rate, cross-domain links, domain coverage."""
+    metrics = []
+    agent_domains = AGENT_DOMAINS.get(agent, [])
+
+    # Challenge rate = challenge PRs / total PRs
+    rows = conn.execute(
+        "SELECT commit_type, COUNT(*) as cnt FROM prs "
+        "WHERE agent = ? AND commit_type IS NOT NULL GROUP BY commit_type",
+        (agent,),
+    ).fetchall()
+    total = sum(r["cnt"] for r in rows)
+    type_counts = {r["commit_type"]: r["cnt"] for r in rows}
+    challenge_rate = type_counts.get("challenge", 0) / total if total > 0 else 0
+    metrics.append({"metric": "challenge_rate", "value": round(challenge_rate, 4), "unit": "ratio"})
+
+    # Activity breadth (distinct commit types)
+    metrics.append({"metric": "activity_breadth", "value": len(type_counts), "unit": "types"})
+
+    # Evidence density + cross-domain links from claim-index
+    # Match by domain field OR file path prefix (catches core/, foundations/ claims)
+    agent_paths = AGENT_PATHS.get(agent, [])
+    if claim_index and (agent_domains or agent_paths):
+        claims = claim_index.get("claims", [])
+        agent_claims = [
+            c for c in claims
+            if c.get("domain") in agent_domains
+            or any(c.get("file", "").startswith(p) for p in agent_paths)
+        ]
+        total_claims = len(agent_claims)
+
+        # Evidence density: claims with incoming links / total claims
+        linked = sum(1 for c in agent_claims if c.get("incoming_count", 0) > 0)
+        density = linked / total_claims if total_claims > 0 else 0
+        metrics.append({"metric": "evidence_density", "value": round(density, 4), "unit": "ratio"})
+
+        # Cross-domain links
+        cross_domain = sum(
+            1 for c in agent_claims
+            for link in c.get("outgoing_links", [])
+            if any(d in link for d in claim_index.get("domains", {}).keys()
+                   if d not in agent_domains)
+        )
+        metrics.append({"metric": "cross_domain_links", "value": cross_domain, "unit": "links"})
+
+        # Domain coverage: agent's claims / average domain size
+        domains_data = claim_index.get("domains", {})
+        agent_claim_count = sum(domains_data.get(d, 0) for d in agent_domains)
+        avg_domain_size = (sum(domains_data.values()) / len(domains_data)) if domains_data else 1
+        coverage = min(agent_claim_count / avg_domain_size, 1.0) if avg_domain_size > 0 else 0
+        metrics.append({"metric": "domain_coverage", "value": round(coverage, 4), "unit": "ratio"})
+    else:
+        metrics.append({"metric": "evidence_density", "value": 0, "unit": "ratio"})
+        metrics.append({"metric": "cross_domain_links", "value": 0, "unit": "links"})
+        metrics.append({"metric": "domain_coverage", "value": 0, "unit": "ratio"})
+
+    return metrics
+
+
+# ---------------------------------------------------------------------------
+# Dimension 3: contributor_engagement — "Who contributes to this agent's domain?"
+# ---------------------------------------------------------------------------
+
+def collect_contributor_engagement(conn: sqlite3.Connection, agent: str) -> list[dict]:
+    """Unique submitters to this agent's domain."""
+    row = conn.execute(
+        "SELECT COUNT(DISTINCT submitted_by) as cnt FROM prs "
+        "WHERE agent = ? AND submitted_by IS NOT NULL AND submitted_by != ''",
+        (agent,),
+    ).fetchone()
+    return [
+        {"metric": "unique_submitters", "value": row["cnt"], "unit": "contributors"},
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Dimension 4: review_performance — "How good is the evaluator feedback loop?"
+# ---------------------------------------------------------------------------
+
+def collect_review_performance(conn: sqlite3.Connection, agent: str) -> list[dict]:
+    """Approval rate, rejection reasons from review_records."""
+    metrics = []
+
+    # Check if review_records table exists
+    table_check = conn.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='review_records'"
+    ).fetchone()
+    if not table_check:
+        return [
+            {"metric": "approval_rate", "value": 0, "unit": "ratio"},
+            {"metric": "total_reviews", "value": 0, "unit": "reviews"},
+        ]
+
+    # Overall approval rate for this agent's claims (join through prs table)
+    row = conn.execute(
+        "SELECT COUNT(*) as total, "
+        "SUM(CASE WHEN r.outcome = 'approved' THEN 1 ELSE 0 END) as approved, "
+        "SUM(CASE WHEN r.outcome = 'approved-with-changes' THEN 1 ELSE 0 END) as with_changes, "
+        "SUM(CASE WHEN r.outcome = 'rejected' THEN 1 ELSE 0 END) as rejected "
+        "FROM review_records r "
+        "JOIN prs p ON r.pr_number = p.pr_number "
+        "WHERE LOWER(p.agent) = LOWER(?)",
+        (agent,),
+    ).fetchone()
+    total = row["total"] or 0
+    approved = (row["approved"] or 0) + (row["with_changes"] or 0)
+    rejected = row["rejected"] or 0
+    approval_rate = approved / total if total > 0 else 0
+
+    metrics.append({"metric": "total_reviews", "value": total, "unit": "reviews"})
+    metrics.append({"metric": "approval_rate", "value": round(approval_rate, 4), "unit": "ratio"})
+    metrics.append({"metric": "approved", "value": row["approved"] or 0, "unit": "reviews"})
+    metrics.append({"metric": "approved_with_changes", "value": row["with_changes"] or 0, "unit": "reviews"})
+    metrics.append({"metric": "rejected", "value": rejected, "unit": "reviews"})
+
+    # Top rejection reasons (last 30 days)
+    reasons = conn.execute(
+        "SELECT r.rejection_reason, COUNT(*) as cnt FROM review_records r "
+        "JOIN prs p ON r.pr_number = p.pr_number "
+        "WHERE LOWER(p.agent) = LOWER(?) AND r.outcome = 'rejected' "
+        "AND r.rejection_reason IS NOT NULL "
+        "AND r.review_date > datetime('now', '-30 days') "
+        "GROUP BY r.rejection_reason ORDER BY cnt DESC",
+        (agent,),
+    ).fetchall()
+    for r in reasons:
+        metrics.append({
+            "metric": f"rejection_{r['rejection_reason']}",
+            "value": r["cnt"],
+            "unit": "rejections",
+        })
+
+    return metrics
+
+
+# ---------------------------------------------------------------------------
+# Dimension 5: spend_efficiency — "What does it cost per merged claim?"
+# ---------------------------------------------------------------------------
+
+def collect_spend_efficiency(conn: sqlite3.Connection, agent: str) -> list[dict]:
+    """Cost per merged claim, total spend, response costs."""
+    metrics = []
+
+    # Pipeline cost attributed to this agent (from prs.cost_usd)
+    row = conn.execute(
+        "SELECT COALESCE(SUM(cost_usd), 0) as cost, COUNT(*) as merged "
+        "FROM prs WHERE agent = ? AND status = 'merged'",
+        (agent,),
+    ).fetchone()
+    total_cost = row["cost"] or 0
+    merged = row["merged"] or 0
+    cost_per_claim = total_cost / merged if merged > 0 else 0
+
+    metrics.append({"metric": "total_pipeline_cost", "value": round(total_cost, 4), "unit": "USD"})
+    metrics.append({"metric": "cost_per_merged_claim", "value": round(cost_per_claim, 4), "unit": "USD"})
+
+    # Response audit costs (Telegram bot) — per-agent
+    row = conn.execute(
+        "SELECT COALESCE(SUM(generation_cost), 0) as cost, COUNT(*) as cnt "
+        "FROM response_audit WHERE agent = ?",
+        (agent,),
+    ).fetchone()
+    metrics.append({"metric": "response_cost_total", "value": round(row["cost"], 4), "unit": "USD"})
+    metrics.append({"metric": "total_responses", "value": row["cnt"], "unit": "responses"})
+
+    # 24h spend snapshot
+    row = conn.execute(
+        "SELECT COALESCE(SUM(generation_cost), 0) as cost "
+        "FROM response_audit WHERE agent = ? AND timestamp > datetime('now', '-24 hours')",
+        (agent,),
+    ).fetchone()
+    metrics.append({"metric": "response_cost_24h", "value": round(row["cost"], 4), "unit": "USD"})
+
+    return metrics
+
+
+# ---------------------------------------------------------------------------
+# Dimension 6: autonomy — "How independently does this agent act?"
+# ---------------------------------------------------------------------------
+
+def collect_autonomy(conn: sqlite3.Connection, agent: str) -> list[dict]:
+    """Self-directed actions, active days."""
+    metrics = []
+
+    # Autonomous responses in last 24h
+    row = conn.execute(
+        "SELECT COUNT(*) as cnt FROM response_audit "
+        "WHERE agent = ? AND timestamp > datetime('now', '-24 hours')",
+        (agent,),
+    ).fetchone()
+    metrics.append({"metric": "autonomous_responses_24h", "value": row["cnt"], "unit": "actions"})
+
+    # Active days in last 7
+    row = conn.execute(
+        "SELECT COUNT(DISTINCT date(created_at)) as days FROM prs "
+        "WHERE agent = ? AND created_at > datetime('now', '-7 days')",
+        (agent,),
+    ).fetchone()
+    metrics.append({"metric": "active_days_7d", "value": row["days"], "unit": "days"})
+
+    return metrics
+
+
+# ---------------------------------------------------------------------------
+# Dimension 7: infrastructure_health — "Is the agent's machinery working?"
+# ---------------------------------------------------------------------------
+
+def collect_infrastructure_health(conn: sqlite3.Connection, agent: str) -> list[dict]:
+    """Circuit breakers, PR success rate, agent-state liveness."""
+    metrics = []
+
+    # Circuit breakers
+    rows = conn.execute(
+        "SELECT name, state FROM circuit_breakers WHERE name LIKE ?",
+        (f"%{agent}%",),
+    ).fetchall()
+    open_breakers = sum(1 for r in rows if r["state"] != "closed")
+    metrics.append({"metric": "open_circuit_breakers", "value": open_breakers, "unit": "breakers"})
+
+    # PR success rate last 7 days
+    row = conn.execute(
+        "SELECT COUNT(*) as total, "
+        "SUM(CASE WHEN status='merged' THEN 1 ELSE 0 END) as merged "
+        "FROM prs WHERE agent = ? AND created_at > datetime('now', '-7 days')",
+        (agent,),
+    ).fetchone()
+    total = row["total"]
+    rate = row["merged"] / total if total > 0 else 0
+    metrics.append({"metric": "merge_rate_7d", "value": round(rate, 4), "unit": "ratio"})
+
+    # Agent-state liveness (read metrics.json from filesystem)
+    state_file = AGENT_STATE_DIR / agent / "metrics.json"
+    if state_file.exists():
+        try:
+            with open(state_file) as f:
+                state = json.load(f)
+            lifetime = state.get("lifetime", {})
+            metrics.append({
+                "metric": "sessions_total",
+                "value": lifetime.get("sessions_total", 0),
+                "unit": "sessions",
+            })
+            metrics.append({
+                "metric": "sessions_timeout",
+                "value": lifetime.get("sessions_timeout", 0),
+                "unit": "sessions",
+            })
+            metrics.append({
+                "metric": "sessions_error",
+                "value": lifetime.get("sessions_error", 0),
+                "unit": "sessions",
+            })
+        except (json.JSONDecodeError, OSError) as e:
+            logger.warning("Failed to read agent-state for %s: %s", agent, e)
+
+    return metrics
+
+
+# ---------------------------------------------------------------------------
+# Dimensions 8-10: Stubs (no data sources yet)
+# ---------------------------------------------------------------------------
+
+def collect_social_reach(agent: str) -> list[dict]:
+    """Social dimension: stub zeros until X API accounts are active."""
+    return [
+        {"metric": "followers", "value": 0, "unit": "followers"},
+        {"metric": "impressions_7d", "value": 0, "unit": "impressions"},
+        {"metric": "engagement_rate", "value": 0, "unit": "ratio"},
+    ]
+
+
+def collect_capital(agent: str) -> list[dict]:
+    """Capital dimension: stub zeros until treasury/revenue tracking exists."""
+    return [
+        {"metric": "aum", "value": 0, "unit": "USD"},
+        {"metric": "treasury", "value": 0, "unit": "USD"},
+    ]
+
+
+def collect_external_impact(agent: str) -> list[dict]:
+    """External impact dimension: stub zeros until manual tracking exists."""
+    return [
+        {"metric": "decisions_informed", "value": 0, "unit": "decisions"},
+        {"metric": "deals_sourced", "value": 0, "unit": "deals"},
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Orchestration
+# ---------------------------------------------------------------------------
+
+DIMENSION_MAP = {
+    "knowledge_output": lambda conn, ci, agent: collect_knowledge_output(conn, agent),
+    "knowledge_quality": collect_knowledge_quality,
+    "contributor_engagement": lambda conn, ci, agent: collect_contributor_engagement(conn, agent),
+    "review_performance": lambda conn, ci, agent: collect_review_performance(conn, agent),
+    "spend_efficiency": lambda conn, ci, agent: collect_spend_efficiency(conn, agent),
+    "autonomy": lambda conn, ci, agent: collect_autonomy(conn, agent),
+    "infrastructure_health": lambda conn, ci, agent: collect_infrastructure_health(conn, agent),
+    "social_reach": lambda conn, ci, agent: collect_social_reach(agent),
+    "capital": lambda conn, ci, agent: collect_capital(agent),
+    "external_impact": lambda conn, ci, agent: collect_external_impact(agent),
+}
+
+
+def collect_all_for_agent(
+    db_path: str,
+    agent: str,
+    claim_index_url: str = "http://localhost:8080/claim-index",
+) -> dict:
+    """Collect all 10 vitality dimensions for a single agent.
+    Returns {dimension: [metrics]}.
+    """
+    claim_index = _fetch_claim_index(claim_index_url)
+    conn = _ro_conn(db_path)
+    try:
+        result = {}
+        for dim_key, collector in DIMENSION_MAP.items():
+            try:
+                result[dim_key] = collector(conn, claim_index, agent)
+            except Exception as e:
+                logger.error("collector %s failed for %s: %s", dim_key, agent, e)
+                result[dim_key] = []
+        return result
+    finally:
+        conn.close()
+
+
+def collect_system_aggregate(
+    db_path: str,
+    claim_index_url: str = "http://localhost:8080/claim-index",
+) -> dict:
+    """System-level aggregate vitality metrics."""
+    claim_index = _fetch_claim_index(claim_index_url)
+    conn = _ro_conn(db_path)
+    try:
+        metrics = {}
+
+        # Knowledge totals
+        total_claims = claim_index["total_claims"] if claim_index else 0
+        orphan_ratio = claim_index.get("orphan_ratio", 0) if claim_index else 0
+        domain_count = len(claim_index.get("domains", {})) if claim_index else 0
+
+        metrics["knowledge_output"] = [
+            {"metric": "total_claims", "value": total_claims, "unit": "claims"},
+            {"metric": "total_domains", "value": domain_count, "unit": "domains"},
+            {"metric": "orphan_ratio", "value": round(orphan_ratio, 4), "unit": "ratio"},
+        ]
+
+        # Cross-domain citation rate
+        if claim_index:
+            claims = claim_index.get("claims", [])
+            total_links = sum(c.get("outgoing_count", 0) for c in claims)
+            cross_domain = 0
+            for c in claims:
+                src_domain = c.get("domain")
+                for link in c.get("outgoing_links", []):
+                    linked_claims = [
+                        x for x in claims
+                        if x.get("stem") in link or x.get("file", "").endswith(link + ".md")
+                    ]
+                    for lc in linked_claims:
+                        if lc.get("domain") != src_domain:
+                            cross_domain += 1
+            metrics["knowledge_quality"] = [
+                {"metric": "cross_domain_citation_rate",
+                 "value": round(cross_domain / max(total_links, 1), 4),
+                 "unit": "ratio"},
+            ]
+
+        # Pipeline throughput
+        row = conn.execute(
+            "SELECT COUNT(*) as merged FROM prs "
+            "WHERE status='merged' AND merged_at > datetime('now', '-24 hours')"
+        ).fetchone()
+        row2 = conn.execute("SELECT COUNT(*) as total FROM sources").fetchone()
+        row3 = conn.execute(
+            "SELECT COUNT(*) as pending FROM prs "
+            "WHERE status NOT IN ('merged','rejected','closed')"
+        ).fetchone()
+
+        metrics["infrastructure_health"] = [
+            {"metric": "prs_merged_24h", "value": row["merged"], "unit": "PRs/day"},
+            {"metric": "total_sources", "value": row2["total"], "unit": "sources"},
+            {"metric": "queue_depth", "value": row3["pending"], "unit": "PRs"},
+        ]
+
+        # Total spend
+        row = conn.execute(
+            "SELECT COALESCE(SUM(cost_usd), 0) as cost "
+            "FROM costs WHERE date > date('now', '-1 day')"
+        ).fetchone()
+        row2 = conn.execute(
+            "SELECT COALESCE(SUM(generation_cost), 0) as cost FROM response_audit "
+            "WHERE timestamp > datetime('now', '-24 hours')"
+        ).fetchone()
+        metrics["spend_efficiency"] = [
+            {"metric": "pipeline_cost_24h", "value": round(row["cost"], 4), "unit": "USD"},
+            {"metric": "response_cost_24h", "value": round(row2["cost"], 4), "unit": "USD"},
+            {"metric": "total_cost_24h",
+             "value": round(row["cost"] + row2["cost"], 4), "unit": "USD"},
+        ]
+
+        # Stubs
+        metrics["social_reach"] = [{"metric": "total_followers", "value": 0, "unit": "followers"}]
+        metrics["capital"] = [{"metric": "total_aum", "value": 0, "unit": "USD"}]
+
+        return metrics
+    finally:
+        conn.close()
+
+
+def record_snapshot(
+    db_path: str,
+    claim_index_url: str = "http://localhost:8080/claim-index",
+):
+    """Run a full vitality snapshot — one row per agent per dimension per metric."""
+    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    rows = []
+
+    # Per-agent snapshots
+    for agent in ALL_AGENTS:
+        try:
+            dimensions = collect_all_for_agent(db_path, agent, claim_index_url)
+            for dim_name, metrics in dimensions.items():
+                collector_name = f"{dim_name}_collector"
+                for m in metrics:
+                    rows.append((
+                        agent, dim_name, m["metric"], m["value"],
+                        m["unit"], collector_name, now,
+                    ))
+        except Exception as e:
+            logger.error("vitality collection failed for %s: %s", agent, e)
+
+    # System aggregate
+    try:
+        system = collect_system_aggregate(db_path, claim_index_url)
+        for dim_name, metrics in system.items():
+            for m in metrics:
+                rows.append((
+                    "_system", dim_name, m["metric"], m["value"],
+                    m["unit"], "system_aggregate", now,
+                ))
+    except Exception as e:
+        logger.error("vitality system aggregate failed: %s", e)
+
+    # Write all rows
+    ensure_schema(db_path)
+    conn = sqlite3.connect(db_path, timeout=30)
+    try:
+        conn.executemany(
+            "INSERT OR REPLACE INTO vitality_snapshots "
+            "(agent_name, dimension, metric, value, unit, source, recorded_at) "
+            "VALUES (?, ?, ?, ?, ?, ?, ?)",
+            rows,
+        )
+        conn.commit()
+        logger.info(
+            "vitality snapshot recorded: %d rows for %d agents + system",
+            len(rows), len(ALL_AGENTS),
+        )
+        return {"rows_written": len(rows), "agents": len(ALL_AGENTS), "recorded_at": now}
+    finally:
+        conn.close()
+
+
+if __name__ == "__main__":
+    """CLI: python3 vitality.py [db_path] — runs a snapshot."""
+    import sys
+    logging.basicConfig(level=logging.INFO)
+    db = sys.argv[1] if len(sys.argv) > 1 else "/opt/teleo-eval/pipeline/pipeline.db"
+    result = record_snapshot(db)
+    print(json.dumps(result, indent=2))
--- a/diagnostics/vitality_routes.py
+++ b/diagnostics/vitality_routes.py
@ -0,0 +1,293 @@
+"""Vitality API routes for Argus diagnostics dashboard.
+
+Endpoints:
+  GET /api/vitality              — latest snapshot + time-series for all agents or one
+  GET /api/vitality/snapshot     — trigger a new snapshot (POST-like via GET for cron curl)
+  GET /api/vitality/leaderboard  — agents ranked by composite vitality score
+
+Owner: Argus
+"""
+
+import json
+import logging
+import sqlite3
+from pathlib import Path
+
+from aiohttp import web
+
+from vitality import (
+    ALL_AGENTS,
+    MIGRATION_SQL,
+    collect_all_for_agent,
+    collect_system_aggregate,
+    record_snapshot,
+)
+
+logger = logging.getLogger("argus.vitality")
+
+# Composite vitality weights — Leo-approved 2026-04-08
+# Dimension keys match Ship's refactored vitality.py DIMENSION_MAP
+VITALITY_WEIGHTS = {
+    "knowledge_output": 0.30,       # primary output — highest weight
+    "knowledge_quality": 0.20,      # was "diversity" — quality of output
+    "contributor_engagement": 0.15, # attracting external contributors
+    "review_performance": 0.00,     # new dim, zero until review_records populated
+    "autonomy": 0.15,               # independent action
+    "infrastructure_health": 0.05,  # machinery working
+    "spend_efficiency": 0.05,       # cost discipline
+    "social_reach": 0.00,           # zero until accounts active
+    "capital": 0.00,                # zero until treasury exists
+    "external_impact": 0.00,        # zero until measurable
+}
+
+# Public paths (no auth required)
+VITALITY_PUBLIC_PATHS = frozenset({
+    "/api/vitality",
+    "/api/vitality/snapshot",
+    "/api/vitality/leaderboard",
+})
+
+
+def _ro_conn(db_path: str) -> sqlite3.Connection:
+    conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=30)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+
+async def handle_vitality(request: web.Request) -> web.Response:
+    """GET /api/vitality?agent=<name>&days=7
+
+    Returns latest snapshot and time-series data.
+    If agent is specified, returns that agent only. Otherwise returns all.
+    """
+    db_path = request.app["db_path"]
+    agent = request.query.get("agent")
+    try:
+        days = min(int(request.query.get("days", "7")), 90)
+    except ValueError:
+        days = 7
+
+    conn = _ro_conn(db_path)
+    try:
+        # Check if table exists
+        table_check = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='vitality_snapshots'"
+        ).fetchone()
+        if not table_check:
+            return web.json_response({
+                "error": "No vitality data yet. Trigger a snapshot first via /api/vitality/snapshot",
+                "has_data": False
+            })
+
+        # Latest snapshot timestamp
+        latest = conn.execute(
+            "SELECT MAX(recorded_at) as ts FROM vitality_snapshots"
+        ).fetchone()
+        latest_ts = latest["ts"] if latest else None
+
+        if not latest_ts:
+            return web.json_response({"has_data": False})
+
+        # Latest snapshot data
+        if agent:
+            agents_filter = [agent]
+        else:
+            agents_filter = ALL_AGENTS + ["_system"]
+
+        result = {"latest_snapshot": latest_ts, "agents": {}}
+
+        for a in agents_filter:
+            rows = conn.execute(
+                "SELECT dimension, metric, value, unit FROM vitality_snapshots "
+                "WHERE agent_name = ? AND recorded_at = ?",
+                (a, latest_ts)
+            ).fetchall()
+
+            if not rows:
+                continue
+
+            dimensions = {}
+            for r in rows:
+                dim = r["dimension"]
+                if dim not in dimensions:
+                    dimensions[dim] = []
+                dimensions[dim].append({
+                    "metric": r["metric"],
+                    "value": r["value"],
+                    "unit": r["unit"],
+                })
+            result["agents"][a] = dimensions
+
+        # Time-series for trend charts (one data point per snapshot)
+        ts_query_agent = agent if agent else "_system"
+        ts_rows = conn.execute(
+            "SELECT recorded_at, dimension, metric, value "
+            "FROM vitality_snapshots "
+            "WHERE agent_name = ? AND recorded_at > datetime('now', ?)"
+            "ORDER BY recorded_at",
+            (ts_query_agent, f"-{days} days")
+        ).fetchall()
+
+        time_series = {}
+        for r in ts_rows:
+            key = f"{r['dimension']}.{r['metric']}"
+            if key not in time_series:
+                time_series[key] = []
+            time_series[key].append({
+                "t": r["recorded_at"],
+                "v": r["value"],
+            })
+        result["time_series"] = time_series
+        result["has_data"] = True
+
+        return web.json_response(result)
+    finally:
+        conn.close()
+
+
+async def handle_vitality_snapshot(request: web.Request) -> web.Response:
+    """GET /api/vitality/snapshot — trigger a new snapshot collection.
+
+    Used by cron: curl http://localhost:8081/api/vitality/snapshot
+    Requires ?confirm=1 to prevent accidental triggers from crawlers/prefetch.
+    """
+    if request.query.get("confirm") != "1":
+        return web.json_response(
+            {"status": "noop", "error": "Add ?confirm=1 to trigger a snapshot write"},
+            status=400,
+        )
+    db_path = request.app["db_path"]
+    claim_index_url = request.app.get("claim_index_url", "http://localhost:8080/claim-index")
+
+    try:
+        result = record_snapshot(db_path, claim_index_url)
+        return web.json_response({"status": "ok", **result})
+    except Exception as e:
+        logger.error("vitality snapshot failed: %s", e)
+        return web.json_response({"status": "error", "error": str(e)}, status=500)
+
+
+async def handle_vitality_leaderboard(request: web.Request) -> web.Response:
+    """GET /api/vitality/leaderboard — agents ranked by composite vitality score.
+
+    Scoring approach:
+    - Each dimension gets a 0-1 normalized score based on the metric values
+    - Weighted sum produces composite score
+    - Agents ranked by composite score descending
+    """
+    db_path = request.app["db_path"]
+    conn = _ro_conn(db_path)
+    try:
+        table_check = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='vitality_snapshots'"
+        ).fetchone()
+        if not table_check:
+            return web.json_response({"error": "No vitality data yet", "has_data": False})
+
+        latest = conn.execute(
+            "SELECT MAX(recorded_at) as ts FROM vitality_snapshots"
+        ).fetchone()
+        if not latest or not latest["ts"]:
+            return web.json_response({"has_data": False})
+
+        latest_ts = latest["ts"]
+
+        # Collect all agents' latest data
+        agent_scores = []
+        for agent in ALL_AGENTS:
+            rows = conn.execute(
+                "SELECT dimension, metric, value FROM vitality_snapshots "
+                "WHERE agent_name = ? AND recorded_at = ?",
+                (agent, latest_ts)
+            ).fetchall()
+            if not rows:
+                continue
+
+            dims = {}
+            for r in rows:
+                dim = r["dimension"]
+                if dim not in dims:
+                    dims[dim] = {}
+                dims[dim][r["metric"]] = r["value"]
+
+            # Normalize each dimension to 0-1
+            # Dimension keys match Ship's refactored vitality.py DIMENSION_MAP
+            dim_scores = {}
+
+            # knowledge_output: claims_merged (cap at 100 = 1.0)
+            ko = dims.get("knowledge_output", {})
+            claims = ko.get("claims_merged", 0)
+            dim_scores["knowledge_output"] = min(claims / 100, 1.0)
+
+            # knowledge_quality: challenge_rate + breadth + evidence_density + domain_coverage
+            kq = dims.get("knowledge_quality", {})
+            cr = kq.get("challenge_rate", 0)
+            breadth = kq.get("activity_breadth", 0)
+            evidence = kq.get("evidence_density", 0)
+            coverage = kq.get("domain_coverage", 0)
+            dim_scores["knowledge_quality"] = min(
+                (cr / 0.1 * 0.2 + breadth / 4 * 0.2 + evidence * 0.3 + coverage * 0.3), 1.0
+            )
+
+            # contributor_engagement: unique_submitters (cap at 5 = 1.0)
+            ce = dims.get("contributor_engagement", {})
+            dim_scores["contributor_engagement"] = min(ce.get("unique_submitters", 0) / 5, 1.0)
+
+            # review_performance: approval_rate from review_records (0 until populated)
+            rp = dims.get("review_performance", {})
+            dim_scores["review_performance"] = rp.get("approval_rate", 0)
+
+            # autonomy: active_days_7d (7 = 1.0)
+            am = dims.get("autonomy", {})
+            dim_scores["autonomy"] = min(am.get("active_days_7d", 0) / 7, 1.0)
+
+            # infrastructure_health: merge_rate_7d directly (already 0-1)
+            ih = dims.get("infrastructure_health", {})
+            dim_scores["infrastructure_health"] = ih.get("merge_rate_7d", 0)
+
+            # spend_efficiency: inverted — lower cost per claim is better
+            se = dims.get("spend_efficiency", {})
+            daily_cost = se.get("response_cost_24h", 0)
+            dim_scores["spend_efficiency"] = max(1.0 - daily_cost / 10.0, 0)
+
+            # Social/Capital/External: stubbed at 0
+            dim_scores["social_reach"] = 0
+            dim_scores["capital"] = 0
+            dim_scores["external_impact"] = 0
+
+            # Composite weighted score
+            composite = sum(
+                dim_scores.get(dim, 0) * weight
+                for dim, weight in VITALITY_WEIGHTS.items()
+            )
+
+            agent_scores.append({
+                "agent": agent,
+                "composite_score": round(composite, 4),
+                "dimension_scores": {k: round(v, 4) for k, v in dim_scores.items()},
+                "raw_highlights": {
+                    "claims_merged": int(claims),
+                    "merge_rate": round(ih.get("merge_rate_7d", 0) * 100, 1),
+                    "active_days": int(am.get("active_days_7d", 0)),
+                    "challenge_rate": round(cr * 100, 1),
+                    "evidence_density": round(evidence * 100, 1),
+                },
+            })
+
+        # Sort by composite score descending
+        agent_scores.sort(key=lambda x: x["composite_score"], reverse=True)
+
+        return web.json_response({
+            "has_data": True,
+            "snapshot_at": latest_ts,
+            "leaderboard": agent_scores,
+        })
+    finally:
+        conn.close()
+
+
+def register_vitality_routes(app: web.Application):
+    """Register vitality endpoints on the aiohttp app."""
+    app.router.add_get("/api/vitality", handle_vitality)
+    app.router.add_get("/api/vitality/snapshot", handle_vitality_snapshot)
+    app.router.add_get("/api/vitality/leaderboard", handle_vitality_leaderboard)