epimetheus: merge root/diagnostics fixes into canonical ops/diagnostics

dashboard_routes.py — root copy is superset:
  - Extraction yield query: source_url→path, s.url→s.path (truth audit)
  - insufficient_data flag on cascade-coverage endpoint
  - Rejection reasons fallback to prs.eval_issues when review_records empty
  - rejection_source field replaces disagreement_types in review-summary
  - New /api/agent-scorecard endpoint (Argus truth audit)
  - Route registration for agent-scorecard

alerting.py — merged from both copies:
  - FROM ROOT: "unknown" agent filter in check_agent_health (bug #3)
  - FROM ROOT: prs.eval_issues queries in check_rejection_spike,
    check_stuck_loops, check_domain_rejection_patterns,
    generate_failure_report (truth audit correction Apr 2)
  - FROM CANONICAL: _ALLOWED_DIM_EXPRS SQL whitelist + validation
    in _check_approval_by_dimension (Ganymede security fix)

Files verified canonical=newer (no changes needed):
  IDENTICAL: dashboard_prs.py, shared_ui.py, dashboard_ops.py,
    dashboard_health.py, research_tracking.py, response_audit_routes.py
  CANONICAL WINS: dashboard_epistemic.py, tier1_metrics.py,
    dashboard_agents.py, alerting_routes.py, tier1_routes.py

NOTE: dashboard_routes.py review-summary API no longer returns
disagreement_types, but canonical dashboard_epistemic.py still renders
it — UI will show empty data. Flag for Ganymede review.

Root /diagnostics/ copies are now safe to delete for these 2 files.
Remaining root files already match or are older than canonical.

Pentagon-Agent: Epimetheus <0144398E-4ED3-4FE2-95A3-3D72E1ABF887>
This commit is contained in:
m3taversal 2026-04-14 11:37:12 +01:00
parent 70e774fa32
commit 143adb09e9
2 changed files with 164 additions and 47 deletions

View file

@ -67,6 +67,8 @@ def check_agent_health(conn: sqlite3.Connection) -> list[dict]:
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
for r in rows: for r in rows:
agent = r["agent"] agent = r["agent"]
if agent in ("unknown", None):
continue
latest = r["latest"] latest = r["latest"]
if not latest: if not latest:
continue continue
@ -266,24 +268,22 @@ def check_rejection_spike(conn: sqlite3.Connection) -> list[dict]:
"""Detect single rejection reason exceeding REJECTION_SPIKE_RATIO of recent rejections.""" """Detect single rejection reason exceeding REJECTION_SPIKE_RATIO of recent rejections."""
alerts = [] alerts = []
# Total rejections in 24h # Total rejected PRs in 24h (prs.eval_issues is the canonical source — Epimetheus 2026-04-02)
total = conn.execute( total = conn.execute(
"""SELECT COUNT(*) as n FROM audit_log """SELECT COUNT(*) as n FROM prs
WHERE stage='evaluate' WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
AND event IN ('changes_requested','domain_rejected','tier05_rejected') AND created_at > datetime('now', '-24 hours')"""
AND timestamp > datetime('now', '-24 hours')"""
).fetchone()["n"] ).fetchone()["n"]
if total < 10: if total < 10:
return alerts # Not enough data return alerts # Not enough data
# Count by rejection tag # Count by rejection tag from prs.eval_issues
tags = conn.execute( tags = conn.execute(
"""SELECT value as tag, COUNT(*) as cnt """SELECT value as tag, COUNT(*) as cnt
FROM audit_log, json_each(json_extract(detail, '$.issues')) FROM prs, json_each(prs.eval_issues)
WHERE stage='evaluate' WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
AND event IN ('changes_requested','domain_rejected','tier05_rejected') AND created_at > datetime('now', '-24 hours')
AND timestamp > datetime('now', '-24 hours')
GROUP BY tag ORDER BY cnt DESC""" GROUP BY tag ORDER BY cnt DESC"""
).fetchall() ).fetchall()
@ -315,16 +315,13 @@ def check_stuck_loops(conn: sqlite3.Connection) -> list[dict]:
"""Detect agents repeatedly failing on the same rejection reason.""" """Detect agents repeatedly failing on the same rejection reason."""
alerts = [] alerts = []
# COALESCE: rejection events use $.agent, eval events use $.domain_agent (Epimetheus 2026-03-28) # Agent + rejection reason from prs table directly (Epimetheus correction 2026-04-02)
rows = conn.execute( rows = conn.execute(
"""SELECT COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) as agent, """SELECT agent, value as tag, COUNT(*) as cnt
value as tag, FROM prs, json_each(prs.eval_issues)
COUNT(*) as cnt WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
FROM audit_log, json_each(json_extract(detail, '$.issues')) AND agent IS NOT NULL
WHERE stage='evaluate' AND created_at > datetime('now', '-6 hours')
AND event IN ('changes_requested','domain_rejected','tier05_rejected')
AND timestamp > datetime('now', '-6 hours')
AND COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) IS NOT NULL
GROUP BY agent, tag GROUP BY agent, tag
HAVING cnt > ?""", HAVING cnt > ?""",
(STUCK_LOOP_THRESHOLD,), (STUCK_LOOP_THRESHOLD,),
@ -412,16 +409,13 @@ def check_domain_rejection_patterns(conn: sqlite3.Connection) -> list[dict]:
"""Track rejection reason shift per domain — surfaces domain maturity issues.""" """Track rejection reason shift per domain — surfaces domain maturity issues."""
alerts = [] alerts = []
# Per-domain rejection breakdown in 24h # Per-domain rejection breakdown in 24h from prs table (Epimetheus correction 2026-04-02)
rows = conn.execute( rows = conn.execute(
"""SELECT json_extract(detail, '$.domain') as domain, """SELECT domain, value as tag, COUNT(*) as cnt
value as tag, FROM prs, json_each(prs.eval_issues)
COUNT(*) as cnt WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
FROM audit_log, json_each(json_extract(detail, '$.issues')) AND domain IS NOT NULL
WHERE stage='evaluate' AND created_at > datetime('now', '-24 hours')
AND event IN ('changes_requested','domain_rejected','tier05_rejected')
AND timestamp > datetime('now', '-24 hours')
AND json_extract(detail, '$.domain') IS NOT NULL
GROUP BY domain, tag GROUP BY domain, tag
ORDER BY domain, cnt DESC""" ORDER BY domain, cnt DESC"""
).fetchall() ).fetchall()
@ -473,12 +467,11 @@ def generate_failure_report(conn: sqlite3.Connection, agent: str, hours: int = 2
hours = int(hours) # defensive — callers should pass int, but enforce it hours = int(hours) # defensive — callers should pass int, but enforce it
rows = conn.execute( rows = conn.execute(
"""SELECT value as tag, COUNT(*) as cnt, """SELECT value as tag, COUNT(*) as cnt,
GROUP_CONCAT(DISTINCT json_extract(detail, '$.pr')) as pr_numbers GROUP_CONCAT(DISTINCT number) as pr_numbers
FROM audit_log, json_each(json_extract(detail, '$.issues')) FROM prs, json_each(prs.eval_issues)
WHERE stage='evaluate' WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
AND event IN ('changes_requested','domain_rejected','tier05_rejected') AND agent = ?
AND json_extract(detail, '$.agent') = ? AND created_at > datetime('now', ? || ' hours')
AND timestamp > datetime('now', ? || ' hours')
GROUP BY tag ORDER BY cnt DESC GROUP BY tag ORDER BY cnt DESC
LIMIT 5""", LIMIT 5""",
(agent, f"-{hours}"), (agent, f"-{hours}"),

View file

@ -237,9 +237,9 @@ async def handle_extraction_yield_by_domain(request):
# Sources per domain (approximate from PR source_path domain) # Sources per domain (approximate from PR source_path domain)
source_counts = conn.execute( source_counts = conn.execute(
"""SELECT domain, COUNT(DISTINCT source_url) as sources """SELECT domain, COUNT(DISTINCT path) as sources
FROM sources s FROM sources s
JOIN prs p ON p.source_path LIKE '%' || s.url || '%' JOIN prs p ON p.source_path LIKE '%' || s.path || '%'
WHERE s.created_at > datetime('now', ? || ' days') WHERE s.created_at > datetime('now', ? || ' days')
GROUP BY domain""", GROUP BY domain""",
(f"-{days}",), (f"-{days}",),
@ -444,6 +444,8 @@ async def handle_cascade_coverage(request):
for r in triggered for r in triggered
] ]
insufficient_data = total_triggered < 5
return web.json_response({ return web.json_response({
"days": days, "days": days,
"total_triggered": total_triggered, "total_triggered": total_triggered,
@ -452,6 +454,7 @@ async def handle_cascade_coverage(request):
"total_notifications": summaries["total_notifications"] if summaries else 0, "total_notifications": summaries["total_notifications"] if summaries else 0,
"merges_with_cascade": summaries["total_merges_with_cascade"] if summaries else 0, "merges_with_cascade": summaries["total_merges_with_cascade"] if summaries else 0,
"by_agent": by_agent, "by_agent": by_agent,
"insufficient_data": insufficient_data,
}) })
finally: finally:
conn.close() conn.close()
@ -490,7 +493,7 @@ async def handle_review_summary(request):
(f"-{days}",), (f"-{days}",),
).fetchall() ).fetchall()
# Rejection reasons # Rejection reasons — try review_records first, fall back to prs.eval_issues
reasons = conn.execute( reasons = conn.execute(
"""SELECT rejection_reason, COUNT(*) as cnt """SELECT rejection_reason, COUNT(*) as cnt
FROM review_records FROM review_records
@ -500,15 +503,17 @@ async def handle_review_summary(request):
(f"-{days}",), (f"-{days}",),
).fetchall() ).fetchall()
# Disagreement types rejection_source = "review_records"
disagreements = conn.execute( if not reasons:
"""SELECT disagreement_type, COUNT(*) as cnt reasons = conn.execute(
FROM review_records """SELECT value AS rejection_reason, COUNT(*) as cnt
WHERE disagreement_type IS NOT NULL FROM prs, json_each(prs.eval_issues)
AND reviewed_at > datetime('now', ? || ' days') WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
GROUP BY disagreement_type ORDER BY cnt DESC""", AND created_at > datetime('now', ? || ' days')
GROUP BY value ORDER BY cnt DESC""",
(f"-{days}",), (f"-{days}",),
).fetchall() ).fetchall()
rejection_source = "prs.eval_issues"
# Per-reviewer breakdown # Per-reviewer breakdown
reviewers = conn.execute( reviewers = conn.execute(
@ -541,7 +546,7 @@ async def handle_review_summary(request):
"total": total, "total": total,
"outcomes": {r["outcome"]: r["cnt"] for r in outcomes}, "outcomes": {r["outcome"]: r["cnt"] for r in outcomes},
"rejection_reasons": [{"reason": r["rejection_reason"], "count": r["cnt"]} for r in reasons], "rejection_reasons": [{"reason": r["rejection_reason"], "count": r["cnt"]} for r in reasons],
"disagreement_types": [{"type": r["disagreement_type"], "count": r["cnt"]} for r in disagreements], "rejection_source": rejection_source,
"reviewers": [ "reviewers": [
{"reviewer": r["reviewer"], "approved": r["approved"], "approved_with_changes": r["approved_with_changes"], {"reviewer": r["reviewer"], "approved": r["approved"], "approved_with_changes": r["approved_with_changes"],
"rejected": r["rejected"], "total": r["total"]} "rejected": r["rejected"], "total": r["total"]}
@ -557,6 +562,124 @@ async def handle_review_summary(request):
conn.close() conn.close()
# ─── GET /api/agent-scorecard ──────────────────────────────────────────────
async def handle_agent_scorecard(request):
"""Per-agent scorecard: PRs submitted, review outcomes, rejection reasons.
Data from review_records (structured reviews) + prs (submission counts).
Falls back to prs.eval_issues for rejection reasons when review_records
has no rejections yet.
"""
conn = request.app["_get_conn"]()
try:
try:
days = min(int(request.query.get("days", "30")), 90)
except ValueError:
days = 30
day_filter = f"-{days}"
# PRs submitted per agent
prs_by_agent = conn.execute(
"""SELECT agent, COUNT(*) as cnt FROM prs
WHERE agent IS NOT NULL
AND created_at > datetime('now', ? || ' days')
GROUP BY agent""",
(day_filter,),
).fetchall()
prs_map = {r["agent"]: r["cnt"] for r in prs_by_agent}
# Review outcomes from review_records
review_data = {}
try:
reviews = conn.execute(
"""SELECT reviewer as agent, outcome, COUNT(*) as cnt
FROM review_records
WHERE reviewed_at > datetime('now', ? || ' days')
GROUP BY reviewer, outcome""",
(day_filter,),
).fetchall()
for r in reviews:
agent = r["agent"]
if agent not in review_data:
review_data[agent] = {"approved": 0, "approved_with_changes": 0, "rejected": 0, "total": 0}
review_data[agent][r["outcome"].replace("-", "_")] = r["cnt"]
review_data[agent]["total"] += r["cnt"]
except sqlite3.OperationalError:
pass
# If review_records is empty, fall back to audit_log eval events
if not review_data:
evals = conn.execute(
"""SELECT
COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) as agent,
event, COUNT(*) as cnt
FROM audit_log
WHERE stage='evaluate'
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
AND timestamp > datetime('now', ? || ' days')
GROUP BY agent, event""",
(day_filter,),
).fetchall()
for r in evals:
agent = r["agent"]
if not agent:
continue
if agent not in review_data:
review_data[agent] = {"approved": 0, "approved_with_changes": 0, "rejected": 0, "total": 0}
if r["event"] == "approved":
review_data[agent]["approved"] += r["cnt"]
elif r["event"] == "changes_requested": # fixer auto-remediated; equivalent in pre-review_records era
review_data[agent]["approved_with_changes"] += r["cnt"]
else:
review_data[agent]["rejected"] += r["cnt"]
review_data[agent]["total"] += r["cnt"]
# Rejection reasons from prs.eval_issues (canonical source)
reason_rows = conn.execute(
"""SELECT agent, value as reason, COUNT(*) as cnt
FROM prs, json_each(prs.eval_issues)
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
AND agent IS NOT NULL
AND created_at > datetime('now', ? || ' days')
GROUP BY agent, reason ORDER BY agent, cnt DESC""",
(day_filter,),
).fetchall()
reasons_map = {}
for r in reason_rows:
if r["agent"] not in reasons_map:
reasons_map[r["agent"]] = {}
reasons_map[r["agent"]][r["reason"]] = r["cnt"]
# Build scorecards
all_agents = sorted(set(list(prs_map.keys()) + list(review_data.keys())))
scorecards = []
for agent in all_agents:
if agent in ("unknown", None):
continue
rd = review_data.get(agent, {"approved": 0, "approved_with_changes": 0, "rejected": 0, "total": 0})
total_reviews = rd["total"]
approved = rd["approved"]
approved_wc = rd["approved_with_changes"]
rejected = rd["rejected"]
approval_rate = ((approved + approved_wc) / total_reviews * 100) if total_reviews else 0
scorecards.append({
"agent": agent,
"total_prs": prs_map.get(agent, 0),
"total_reviews": total_reviews,
"approved": approved,
"approved_with_changes": approved_wc,
"rejected": rejected,
"approval_rate": round(approval_rate, 1),
"rejection_reasons": reasons_map.get(agent, {}),
})
scorecards.sort(key=lambda x: x["total_reviews"], reverse=True)
return web.json_response({"days": days, "scorecards": scorecards})
finally:
conn.close()
# ─── Trace endpoint ──────────────────────────────────────────────────────── # ─── Trace endpoint ────────────────────────────────────────────────────────
@ -998,6 +1121,7 @@ def register_dashboard_routes(app: web.Application, get_conn):
app.router.add_get("/api/agents-dashboard", handle_agents_dashboard) app.router.add_get("/api/agents-dashboard", handle_agents_dashboard)
app.router.add_get("/api/cascade-coverage", handle_cascade_coverage) app.router.add_get("/api/cascade-coverage", handle_cascade_coverage)
app.router.add_get("/api/review-summary", handle_review_summary) app.router.add_get("/api/review-summary", handle_review_summary)
app.router.add_get("/api/agent-scorecard", handle_agent_scorecard)
app.router.add_get("/api/trace/{trace_id}", handle_trace) app.router.add_get("/api/trace/{trace_id}", handle_trace)
app.router.add_get("/api/growth", handle_growth) app.router.add_get("/api/growth", handle_growth)
app.router.add_get("/api/pr-lifecycle", handle_pr_lifecycle) app.router.add_get("/api/pr-lifecycle", handle_pr_lifecycle)