sync: import all VPS pipeline + diagnostics code as baseline
Imports 67 files from VPS (/opt/teleo-eval/) into repo as the single source of truth. Previously only 8 of 67 files existed in repo — the rest were deployed directly to VPS via SCP, causing massive drift. Includes: - pipeline/lib/: 33 Python modules (daemon core, extraction, evaluation, merge, cascade, cross-domain, costs, attribution, etc.) - pipeline/: main daemon (teleo-pipeline.py), reweave.py, batch-extract-50.sh - diagnostics/: 19 files (4-page dashboard, alerting, daily digest, review queue, tier1 metrics) - agent-state/: bootstrap, lib-state, cascade inbox processor, schema - systemd/: service unit files for reference - deploy.sh: rsync-based deploy with --dry-run, syntax checks, dirty-tree gate - research-session.sh: updated with Step 8.5 digest + cascade inbox processing No new code written — all files are exact copies from VPS as of 2026-04-06. From this point forward: edit in repo, commit, then deploy.sh. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
68bed4bda5
commit
05d74d5e32
59 changed files with 19652 additions and 182 deletions
99
ops/deploy.sh
Executable file
99
ops/deploy.sh
Executable file
|
|
@ -0,0 +1,99 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# deploy.sh — Deploy pipeline and diagnostics to VPS from repo
|
||||||
|
# Usage: ./deploy.sh [--dry-run] [--restart]
|
||||||
|
#
|
||||||
|
# Requires: committed, clean working tree. Enforces repo-first workflow.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
VPS_HOST="teleo@77.42.65.182"
|
||||||
|
VPS_PIPELINE="/opt/teleo-eval/pipeline"
|
||||||
|
VPS_DIAGNOSTICS="/opt/teleo-eval/diagnostics"
|
||||||
|
VPS_AGENT_STATE="/opt/teleo-eval/ops/agent-state"
|
||||||
|
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
|
||||||
|
DRY_RUN=false
|
||||||
|
RESTART=false
|
||||||
|
|
||||||
|
for arg in "$@"; do
|
||||||
|
case "$arg" in
|
||||||
|
--dry-run) DRY_RUN=true ;;
|
||||||
|
--restart) RESTART=true ;;
|
||||||
|
--help|-h)
|
||||||
|
echo "Usage: $0 [--dry-run] [--restart]"
|
||||||
|
echo " --dry-run Show what would be deployed without doing it"
|
||||||
|
echo " --restart Restart services after deploy"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*) echo "Unknown arg: $arg"; exit 1 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Gate: working tree must be clean
|
||||||
|
if [ -n "$(git -C "$REPO_ROOT" status --porcelain)" ]; then
|
||||||
|
echo "ERROR: Uncommitted changes. Commit first, deploy second."
|
||||||
|
git -C "$REPO_ROOT" status --short
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Deploying from commit: $(git -C "$REPO_ROOT" log --oneline -1)"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Syntax check all Python files before deploying
|
||||||
|
echo "=== Pre-deploy syntax check ==="
|
||||||
|
ERRORS=0
|
||||||
|
for f in "$REPO_ROOT/ops/pipeline-v2/lib/"*.py "$REPO_ROOT/ops/pipeline-v2/"*.py "$REPO_ROOT/ops/diagnostics/"*.py; do
|
||||||
|
[ -f "$f" ] || continue
|
||||||
|
if ! python3 -c "import ast; ast.parse(open('$f').read())" 2>/dev/null; then
|
||||||
|
echo "SYNTAX ERROR: $f"
|
||||||
|
ERRORS=$((ERRORS + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ "$ERRORS" -gt 0 ]; then
|
||||||
|
echo "ERROR: $ERRORS files have syntax errors. Fix before deploying."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "All files pass syntax check."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
RSYNC_FLAGS="-avz --exclude='__pycache__' --exclude='*.pyc' --exclude='*.bak*'"
|
||||||
|
if $DRY_RUN; then
|
||||||
|
RSYNC_FLAGS="$RSYNC_FLAGS --dry-run"
|
||||||
|
echo "=== DRY RUN ==="
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "=== Pipeline lib/ ==="
|
||||||
|
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/pipeline-v2/lib/" "$VPS_HOST:$VPS_PIPELINE/lib/"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "=== Pipeline top-level ==="
|
||||||
|
for f in teleo-pipeline.py reweave.py batch-extract-50.sh; do
|
||||||
|
[ -f "$REPO_ROOT/ops/pipeline-v2/$f" ] || continue
|
||||||
|
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/pipeline-v2/$f" "$VPS_HOST:$VPS_PIPELINE/$f"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "=== Diagnostics ==="
|
||||||
|
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/diagnostics/" "$VPS_HOST:$VPS_DIAGNOSTICS/"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "=== Agent state ==="
|
||||||
|
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/agent-state/" "$VPS_HOST:$VPS_AGENT_STATE/"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "=== Research session ==="
|
||||||
|
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/research-session.sh" "$VPS_HOST:/opt/teleo-eval/research-session.sh"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if $DRY_RUN; then
|
||||||
|
echo "Dry run complete. No changes made."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Deploy complete."
|
||||||
|
|
||||||
|
if $RESTART; then
|
||||||
|
echo ""
|
||||||
|
echo "=== Restarting services ==="
|
||||||
|
ssh "$VPS_HOST" "sudo systemctl restart teleo-pipeline teleo-diagnostics"
|
||||||
|
echo "Services restarted."
|
||||||
|
fi
|
||||||
262
ops/diagnostics/activity_endpoint.py
Normal file
262
ops/diagnostics/activity_endpoint.py
Normal file
|
|
@ -0,0 +1,262 @@
|
||||||
|
"""
|
||||||
|
/api/activity endpoint for diagnostics service.
|
||||||
|
|
||||||
|
Serves per-operation events for the dashboard v2 timeline hero panel.
|
||||||
|
Derives events from the prs table (per-PR granularity) and audit_log
|
||||||
|
(pipeline-level ops). Cursor-based pagination via timestamp.
|
||||||
|
|
||||||
|
Integration: add route and handler to app.py:
|
||||||
|
app.router.add_get('/api/activity', handle_activity)
|
||||||
|
|
||||||
|
Contract (endpoint #7):
|
||||||
|
GET /api/activity?limit=100&cursor=<ISO-timestamp>
|
||||||
|
Response: {
|
||||||
|
events: [{timestamp, agent, operation, target, domain, description, status, pr_number}],
|
||||||
|
limit: int,
|
||||||
|
cursor: string|null,
|
||||||
|
has_more: bool
|
||||||
|
}
|
||||||
|
|
||||||
|
Data sources:
|
||||||
|
- prs table: number, status, domain, agent, created_at, merged_at, branch, source_path
|
||||||
|
- audit_log table: timestamp, stage, event, detail
|
||||||
|
- contributors table: handle, display_name (for agent name resolution)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from aiohttp import web
|
||||||
|
import sqlite3
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
# Map PR status to Clay's operation color palette
|
||||||
|
# extract (cyan), new (green), enrich (amber), challenge (red-orange),
|
||||||
|
# decision (violet), infra (grey)
|
||||||
|
STATUS_TO_OPERATION = {
|
||||||
|
'merged': 'new', # green — new knowledge merged
|
||||||
|
'approved': 'enrich', # amber — approved, enriching KB
|
||||||
|
'open': 'extract', # cyan — new extraction in progress
|
||||||
|
'validating': 'extract', # cyan — being validated
|
||||||
|
'reviewing': 'extract', # cyan — under review
|
||||||
|
'merging': 'new', # green — merge in progress
|
||||||
|
'closed': 'infra', # grey — closed/rejected
|
||||||
|
'zombie': 'infra', # grey — stale
|
||||||
|
'conflict': 'challenge', # red-orange — conflict detected
|
||||||
|
}
|
||||||
|
|
||||||
|
# Map audit_log stage to operation type
|
||||||
|
STAGE_TO_OPERATION = {
|
||||||
|
'ingest': 'extract',
|
||||||
|
'extract': 'extract',
|
||||||
|
'validate': 'infra',
|
||||||
|
'evaluate': 'infra',
|
||||||
|
'merge': 'new',
|
||||||
|
'reject': 'infra',
|
||||||
|
'breaker': 'challenge',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def pr_description(row):
|
||||||
|
"""Generate human-readable description from a PR row."""
|
||||||
|
status = row['status']
|
||||||
|
domain = row['domain'] or 'unknown'
|
||||||
|
branch = row['branch'] or ''
|
||||||
|
|
||||||
|
# Extract a meaningful target from the branch name
|
||||||
|
# Branch format is typically: agent-name/claims-description
|
||||||
|
target = branch.split('/')[-1] if '/' in branch else branch
|
||||||
|
|
||||||
|
# Infer agent from branch prefix if not in the row
|
||||||
|
branch_agent = branch.split('/')[0] if '/' in branch else None
|
||||||
|
|
||||||
|
# Build a richer description with domain context
|
||||||
|
domain_tag = f" [{domain}]" if domain and domain != 'unknown' and domain != 'general' else ''
|
||||||
|
|
||||||
|
templates = {
|
||||||
|
'merged': f"Merged{domain_tag}: {target}",
|
||||||
|
'approved': f"Approved{domain_tag}: {target}",
|
||||||
|
'open': f"Opened{domain_tag}: {target}",
|
||||||
|
'validating': f"Validating{domain_tag}: {target}",
|
||||||
|
'reviewing': f"Reviewing{domain_tag}: {target}",
|
||||||
|
'merging': f"Merging{domain_tag}: {target}",
|
||||||
|
'closed': f"Closed{domain_tag}: {target}",
|
||||||
|
'zombie': f"Stale{domain_tag}: {target}",
|
||||||
|
'conflict': f"Conflict{domain_tag}: {target}",
|
||||||
|
}
|
||||||
|
|
||||||
|
return templates.get(status, f"PR #{row['number']}{domain_tag}: {target}")
|
||||||
|
|
||||||
|
|
||||||
|
def audit_description(row):
|
||||||
|
"""Generate human-readable description from an audit_log row."""
|
||||||
|
stage = row['stage'] or ''
|
||||||
|
event = row['event'] or ''
|
||||||
|
detail = row['detail'] or ''
|
||||||
|
|
||||||
|
# Try to parse detail as JSON
|
||||||
|
if detail:
|
||||||
|
try:
|
||||||
|
detail_obj = json.loads(detail)
|
||||||
|
if isinstance(detail_obj, dict):
|
||||||
|
msg = detail_obj.get('message') or detail_obj.get('reason', '')
|
||||||
|
if msg:
|
||||||
|
return f"[{stage}] {msg}"[:150]
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
if event:
|
||||||
|
desc = f"[{stage}] {event}"
|
||||||
|
if detail and len(detail) < 80:
|
||||||
|
desc += f" — {detail}"
|
||||||
|
return desc[:150]
|
||||||
|
|
||||||
|
return f"[{stage}] pipeline event"
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_activity(request):
|
||||||
|
"""Handler for GET /api/activity.
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
limit (int, default 100, max 500): number of events to return
|
||||||
|
cursor (ISO timestamp): return events older than this timestamp
|
||||||
|
|
||||||
|
Derives events from two sources:
|
||||||
|
1. prs table — per-PR events with domain, agent, status
|
||||||
|
2. audit_log — pipeline-level operational events
|
||||||
|
|
||||||
|
Events are merged and sorted by timestamp descending (most recent first).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
limit = min(int(request.query.get('limit', 100)), 500)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
limit = 100
|
||||||
|
|
||||||
|
cursor = request.query.get('cursor')
|
||||||
|
db_path = request.app['db_path']
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
|
||||||
|
events = []
|
||||||
|
|
||||||
|
# Source 1: PR events (primary — these have the granularity we need)
|
||||||
|
# Each PR generates events at created_at and merged_at timestamps
|
||||||
|
pr_query = """
|
||||||
|
SELECT number, status, domain, agent, branch, source_path,
|
||||||
|
created_at, merged_at
|
||||||
|
FROM prs
|
||||||
|
WHERE {where_clause}
|
||||||
|
ORDER BY COALESCE(merged_at, created_at) DESC
|
||||||
|
LIMIT ?
|
||||||
|
"""
|
||||||
|
|
||||||
|
if cursor:
|
||||||
|
rows = conn.execute(
|
||||||
|
pr_query.format(where_clause="COALESCE(merged_at, created_at) < ?"),
|
||||||
|
(cursor, limit + 1)
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
pr_query.format(where_clause="1=1"),
|
||||||
|
(limit + 1,)
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Known knowledge agents for branch-prefix inference
|
||||||
|
knowledge_agents = {'rio', 'clay', 'theseus', 'vida', 'astra', 'leo'}
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
row_dict = dict(row)
|
||||||
|
operation = STATUS_TO_OPERATION.get(row_dict['status'], 'infra')
|
||||||
|
description = pr_description(row_dict)
|
||||||
|
|
||||||
|
# Use merged_at if available (more interesting event), else created_at
|
||||||
|
timestamp = row_dict['merged_at'] or row_dict['created_at']
|
||||||
|
|
||||||
|
# Infer agent from branch prefix if DB column is null
|
||||||
|
# Branch format: agent-name/claims-description
|
||||||
|
agent = row_dict['agent']
|
||||||
|
if not agent and row_dict.get('branch'):
|
||||||
|
prefix = row_dict['branch'].split('/')[0].lower()
|
||||||
|
if prefix in knowledge_agents:
|
||||||
|
agent = prefix
|
||||||
|
|
||||||
|
events.append({
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'agent': agent,
|
||||||
|
'operation': operation,
|
||||||
|
'target': (row_dict['branch'] or '').split('/')[-1] if row_dict['branch'] else None,
|
||||||
|
'domain': row_dict['domain'],
|
||||||
|
'description': description,
|
||||||
|
'status': row_dict['status'],
|
||||||
|
'pr_number': row_dict['number'],
|
||||||
|
})
|
||||||
|
|
||||||
|
# Source 2: Audit log events (secondary — pipeline-level)
|
||||||
|
# Only include if we haven't hit our limit from PRs alone
|
||||||
|
if len(events) < limit:
|
||||||
|
remaining = limit - len(events) + 1
|
||||||
|
audit_query = """
|
||||||
|
SELECT timestamp, stage, event, detail
|
||||||
|
FROM audit_log
|
||||||
|
WHERE {where_clause}
|
||||||
|
ORDER BY timestamp DESC
|
||||||
|
LIMIT ?
|
||||||
|
"""
|
||||||
|
|
||||||
|
if cursor:
|
||||||
|
audit_rows = conn.execute(
|
||||||
|
audit_query.format(where_clause="timestamp < ?"),
|
||||||
|
(cursor, remaining)
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
audit_rows = conn.execute(
|
||||||
|
audit_query.format(where_clause="1=1"),
|
||||||
|
(remaining,)
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for row in audit_rows:
|
||||||
|
row_dict = dict(row)
|
||||||
|
operation = STAGE_TO_OPERATION.get(row_dict['stage'], 'infra')
|
||||||
|
description = audit_description(row_dict)
|
||||||
|
|
||||||
|
events.append({
|
||||||
|
'timestamp': row_dict['timestamp'],
|
||||||
|
'agent': None, # audit_log has no agent column
|
||||||
|
'operation': operation,
|
||||||
|
'target': None,
|
||||||
|
'domain': None,
|
||||||
|
'description': description,
|
||||||
|
'status': None,
|
||||||
|
'pr_number': None,
|
||||||
|
})
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
except sqlite3.Error as e:
|
||||||
|
return web.json_response({'error': f'Database error: {e}'}, status=500)
|
||||||
|
|
||||||
|
# Sort all events by timestamp descending
|
||||||
|
events.sort(key=lambda e: e['timestamp'] or '', reverse=True)
|
||||||
|
|
||||||
|
# Apply limit and check for more
|
||||||
|
has_more = len(events) > limit
|
||||||
|
events = events[:limit]
|
||||||
|
|
||||||
|
# Cursor is the timestamp of the last event returned
|
||||||
|
next_cursor = events[-1]['timestamp'] if events else None
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
'events': events,
|
||||||
|
'limit': limit,
|
||||||
|
'cursor': next_cursor,
|
||||||
|
'has_more': has_more,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# --- Integration snippet for app.py ---
|
||||||
|
# Add to your route setup:
|
||||||
|
#
|
||||||
|
# from activity_endpoint import handle_activity
|
||||||
|
# app.router.add_get('/api/activity', handle_activity)
|
||||||
|
#
|
||||||
|
# Requires: app['db_path'] set to the pipeline.db path
|
||||||
|
# e.g.: app['db_path'] = '/opt/teleo-eval/pipeline/pipeline.db'
|
||||||
537
ops/diagnostics/alerting.py
Normal file
537
ops/diagnostics/alerting.py
Normal file
|
|
@ -0,0 +1,537 @@
|
||||||
|
"""Argus active monitoring — health watchdog, quality regression, throughput anomaly detection.
|
||||||
|
|
||||||
|
Provides check functions that detect problems and return structured alerts.
|
||||||
|
Called by /check endpoint (periodic cron) or on-demand.
|
||||||
|
|
||||||
|
Alert schema:
|
||||||
|
{
|
||||||
|
"id": str, # unique key for dedup (e.g. "dormant:ganymede")
|
||||||
|
"severity": str, # "critical" | "warning" | "info"
|
||||||
|
"category": str, # "health" | "quality" | "throughput" | "failure_pattern"
|
||||||
|
"title": str, # human-readable headline
|
||||||
|
"detail": str, # actionable description
|
||||||
|
"agent": str|None, # affected agent (if applicable)
|
||||||
|
"domain": str|None, # affected domain (if applicable)
|
||||||
|
"detected_at": str, # ISO timestamp
|
||||||
|
"auto_resolve": bool, # clears when condition clears
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
import statistics
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Agent-domain mapping (static config, maintained by Argus) ──────────────
|
||||||
|
|
||||||
|
AGENT_DOMAINS = {
|
||||||
|
"rio": ["internet-finance"],
|
||||||
|
"clay": ["creative-industries"],
|
||||||
|
"ganymede": None, # reviewer — cross-domain
|
||||||
|
"epimetheus": None, # infra
|
||||||
|
"leo": None, # standards
|
||||||
|
"oberon": None, # evolution tracking
|
||||||
|
"vida": None, # health monitoring
|
||||||
|
"hermes": None, # comms
|
||||||
|
"astra": None, # research
|
||||||
|
}
|
||||||
|
|
||||||
|
# Thresholds
|
||||||
|
DORMANCY_HOURS = 48
|
||||||
|
APPROVAL_DROP_THRESHOLD = 15 # percentage points below 7-day baseline
|
||||||
|
THROUGHPUT_DROP_RATIO = 0.5 # alert if today < 50% of 7-day SMA
|
||||||
|
REJECTION_SPIKE_RATIO = 0.20 # single reason > 20% of recent rejections
|
||||||
|
STUCK_LOOP_THRESHOLD = 3 # same agent + same rejection reason > N times in 6h
|
||||||
|
COST_SPIKE_RATIO = 2.0 # daily cost > 2x 7-day average
|
||||||
|
|
||||||
|
|
||||||
|
def _now_iso() -> str:
|
||||||
|
return datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Check: Agent Health (dormancy detection) ───────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def check_agent_health(conn: sqlite3.Connection) -> list[dict]:
|
||||||
|
"""Detect agents with no PR activity in the last DORMANCY_HOURS hours."""
|
||||||
|
alerts = []
|
||||||
|
|
||||||
|
# Get last activity per agent
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT agent, MAX(last_attempt) as latest, COUNT(*) as total_prs
|
||||||
|
FROM prs WHERE agent IS NOT NULL
|
||||||
|
GROUP BY agent"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
for r in rows:
|
||||||
|
agent = r["agent"]
|
||||||
|
latest = r["latest"]
|
||||||
|
if not latest:
|
||||||
|
continue
|
||||||
|
|
||||||
|
last_dt = datetime.fromisoformat(latest)
|
||||||
|
if last_dt.tzinfo is None:
|
||||||
|
last_dt = last_dt.replace(tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
hours_since = (now - last_dt).total_seconds() / 3600
|
||||||
|
|
||||||
|
if hours_since > DORMANCY_HOURS:
|
||||||
|
alerts.append({
|
||||||
|
"id": f"dormant:{agent}",
|
||||||
|
"severity": "warning",
|
||||||
|
"category": "health",
|
||||||
|
"title": f"Agent '{agent}' dormant for {int(hours_since)}h",
|
||||||
|
"detail": (
|
||||||
|
f"No PR activity since {latest}. "
|
||||||
|
f"Last seen {int(hours_since)}h ago (threshold: {DORMANCY_HOURS}h). "
|
||||||
|
f"Total historical PRs: {r['total_prs']}."
|
||||||
|
),
|
||||||
|
"agent": agent,
|
||||||
|
"domain": None,
|
||||||
|
"detected_at": _now_iso(),
|
||||||
|
"auto_resolve": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Check: Quality Regression (approval rate drop) ─────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def check_quality_regression(conn: sqlite3.Connection) -> list[dict]:
|
||||||
|
"""Detect approval rate drops vs 7-day baseline, per agent and per domain."""
|
||||||
|
alerts = []
|
||||||
|
|
||||||
|
# 7-day baseline approval rate (overall)
|
||||||
|
baseline = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
COUNT(CASE WHEN event='approved' THEN 1 END) as approved,
|
||||||
|
COUNT(*) as total
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', '-7 days')"""
|
||||||
|
).fetchone()
|
||||||
|
baseline_rate = (baseline["approved"] / baseline["total"] * 100) if baseline["total"] else None
|
||||||
|
|
||||||
|
# 24h approval rate (overall)
|
||||||
|
recent = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
COUNT(CASE WHEN event='approved' THEN 1 END) as approved,
|
||||||
|
COUNT(*) as total
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', '-24 hours')"""
|
||||||
|
).fetchone()
|
||||||
|
recent_rate = (recent["approved"] / recent["total"] * 100) if recent["total"] else None
|
||||||
|
|
||||||
|
if baseline_rate is not None and recent_rate is not None:
|
||||||
|
drop = baseline_rate - recent_rate
|
||||||
|
if drop > APPROVAL_DROP_THRESHOLD:
|
||||||
|
alerts.append({
|
||||||
|
"id": "quality_regression:overall",
|
||||||
|
"severity": "critical",
|
||||||
|
"category": "quality",
|
||||||
|
"title": f"Approval rate dropped {drop:.0f}pp (24h: {recent_rate:.0f}% vs 7d: {baseline_rate:.0f}%)",
|
||||||
|
"detail": (
|
||||||
|
f"24h approval rate ({recent_rate:.1f}%) is {drop:.1f} percentage points below "
|
||||||
|
f"7-day baseline ({baseline_rate:.1f}%). "
|
||||||
|
f"Evaluated {recent['total']} PRs in last 24h."
|
||||||
|
),
|
||||||
|
"agent": None,
|
||||||
|
"domain": None,
|
||||||
|
"detected_at": _now_iso(),
|
||||||
|
"auto_resolve": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Per-agent approval rate (24h vs 7d) — only for agents with >=5 evals in each window
|
||||||
|
# COALESCE: rejection events use $.agent, eval events use $.domain_agent (Epimetheus 2026-03-28)
|
||||||
|
_check_approval_by_dimension(conn, alerts, "agent", "COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent'))")
|
||||||
|
|
||||||
|
# Per-domain approval rate (24h vs 7d) — Theseus addition
|
||||||
|
_check_approval_by_dimension(conn, alerts, "domain", "json_extract(detail, '$.domain')")
|
||||||
|
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
|
||||||
|
def _check_approval_by_dimension(conn, alerts, dim_name, dim_expr):
|
||||||
|
"""Check approval rate regression grouped by a dimension (agent or domain)."""
|
||||||
|
# 7-day baseline per dimension
|
||||||
|
baseline_rows = conn.execute(
|
||||||
|
f"""SELECT {dim_expr} as dim_val,
|
||||||
|
COUNT(CASE WHEN event='approved' THEN 1 END) as approved,
|
||||||
|
COUNT(*) as total
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', '-7 days')
|
||||||
|
AND {dim_expr} IS NOT NULL
|
||||||
|
GROUP BY dim_val HAVING total >= 5"""
|
||||||
|
).fetchall()
|
||||||
|
baselines = {r["dim_val"]: (r["approved"] / r["total"] * 100) for r in baseline_rows}
|
||||||
|
|
||||||
|
# 24h per dimension
|
||||||
|
recent_rows = conn.execute(
|
||||||
|
f"""SELECT {dim_expr} as dim_val,
|
||||||
|
COUNT(CASE WHEN event='approved' THEN 1 END) as approved,
|
||||||
|
COUNT(*) as total
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', '-24 hours')
|
||||||
|
AND {dim_expr} IS NOT NULL
|
||||||
|
GROUP BY dim_val HAVING total >= 5"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for r in recent_rows:
|
||||||
|
val = r["dim_val"]
|
||||||
|
if val not in baselines:
|
||||||
|
continue
|
||||||
|
recent_rate = r["approved"] / r["total"] * 100
|
||||||
|
base_rate = baselines[val]
|
||||||
|
drop = base_rate - recent_rate
|
||||||
|
if drop > APPROVAL_DROP_THRESHOLD:
|
||||||
|
alerts.append({
|
||||||
|
"id": f"quality_regression:{dim_name}:{val}",
|
||||||
|
"severity": "warning",
|
||||||
|
"category": "quality",
|
||||||
|
"title": f"{dim_name.title()} '{val}' approval dropped {drop:.0f}pp",
|
||||||
|
"detail": (
|
||||||
|
f"24h: {recent_rate:.1f}% vs 7d baseline: {base_rate:.1f}% "
|
||||||
|
f"({r['total']} evals in 24h)."
|
||||||
|
),
|
||||||
|
"agent": val if dim_name == "agent" else None,
|
||||||
|
"domain": val if dim_name == "domain" else None,
|
||||||
|
"detected_at": _now_iso(),
|
||||||
|
"auto_resolve": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Check: Throughput Anomaly ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def check_throughput(conn: sqlite3.Connection) -> list[dict]:
|
||||||
|
"""Detect throughput stalling — today vs 7-day SMA."""
|
||||||
|
alerts = []
|
||||||
|
|
||||||
|
# Daily merged counts for last 7 days
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT date(merged_at) as day, COUNT(*) as n
|
||||||
|
FROM prs WHERE merged_at > datetime('now', '-7 days')
|
||||||
|
GROUP BY day ORDER BY day"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if len(rows) < 2:
|
||||||
|
return alerts # Not enough data
|
||||||
|
|
||||||
|
daily_counts = [r["n"] for r in rows]
|
||||||
|
sma = statistics.mean(daily_counts[:-1]) if len(daily_counts) > 1 else daily_counts[0]
|
||||||
|
today_count = daily_counts[-1]
|
||||||
|
|
||||||
|
if sma > 0 and today_count < sma * THROUGHPUT_DROP_RATIO:
|
||||||
|
alerts.append({
|
||||||
|
"id": "throughput:stalling",
|
||||||
|
"severity": "warning",
|
||||||
|
"category": "throughput",
|
||||||
|
"title": f"Throughput stalling: {today_count} merges today vs {sma:.0f}/day avg",
|
||||||
|
"detail": (
|
||||||
|
f"Today's merge count ({today_count}) is below {THROUGHPUT_DROP_RATIO:.0%} of "
|
||||||
|
f"7-day average ({sma:.1f}/day). Daily counts: {daily_counts}."
|
||||||
|
),
|
||||||
|
"agent": None,
|
||||||
|
"domain": None,
|
||||||
|
"detected_at": _now_iso(),
|
||||||
|
"auto_resolve": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Check: Rejection Reason Spike ─────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def check_rejection_spike(conn: sqlite3.Connection) -> list[dict]:
|
||||||
|
"""Detect single rejection reason exceeding REJECTION_SPIKE_RATIO of recent rejections."""
|
||||||
|
alerts = []
|
||||||
|
|
||||||
|
# Total rejections in 24h
|
||||||
|
total = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as n FROM audit_log
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', '-24 hours')"""
|
||||||
|
).fetchone()["n"]
|
||||||
|
|
||||||
|
if total < 10:
|
||||||
|
return alerts # Not enough data
|
||||||
|
|
||||||
|
# Count by rejection tag
|
||||||
|
tags = conn.execute(
|
||||||
|
"""SELECT value as tag, COUNT(*) as cnt
|
||||||
|
FROM audit_log, json_each(json_extract(detail, '$.issues'))
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', '-24 hours')
|
||||||
|
GROUP BY tag ORDER BY cnt DESC"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for t in tags:
|
||||||
|
ratio = t["cnt"] / total
|
||||||
|
if ratio > REJECTION_SPIKE_RATIO:
|
||||||
|
alerts.append({
|
||||||
|
"id": f"rejection_spike:{t['tag']}",
|
||||||
|
"severity": "warning",
|
||||||
|
"category": "quality",
|
||||||
|
"title": f"Rejection reason '{t['tag']}' at {ratio:.0%} of rejections",
|
||||||
|
"detail": (
|
||||||
|
f"'{t['tag']}' accounts for {t['cnt']}/{total} rejections in 24h "
|
||||||
|
f"({ratio:.1%}). Threshold: {REJECTION_SPIKE_RATIO:.0%}."
|
||||||
|
),
|
||||||
|
"agent": None,
|
||||||
|
"domain": None,
|
||||||
|
"detected_at": _now_iso(),
|
||||||
|
"auto_resolve": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Check: Stuck Loops ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def check_stuck_loops(conn: sqlite3.Connection) -> list[dict]:
|
||||||
|
"""Detect agents repeatedly failing on the same rejection reason."""
|
||||||
|
alerts = []
|
||||||
|
|
||||||
|
# COALESCE: rejection events use $.agent, eval events use $.domain_agent (Epimetheus 2026-03-28)
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) as agent,
|
||||||
|
value as tag,
|
||||||
|
COUNT(*) as cnt
|
||||||
|
FROM audit_log, json_each(json_extract(detail, '$.issues'))
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', '-6 hours')
|
||||||
|
AND COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) IS NOT NULL
|
||||||
|
GROUP BY agent, tag
|
||||||
|
HAVING cnt > ?""",
|
||||||
|
(STUCK_LOOP_THRESHOLD,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for r in rows:
|
||||||
|
alerts.append({
|
||||||
|
"id": f"stuck_loop:{r['agent']}:{r['tag']}",
|
||||||
|
"severity": "critical",
|
||||||
|
"category": "health",
|
||||||
|
"title": f"Agent '{r['agent']}' stuck: '{r['tag']}' failed {r['cnt']}x in 6h",
|
||||||
|
"detail": (
|
||||||
|
f"Agent '{r['agent']}' has been rejected for '{r['tag']}' "
|
||||||
|
f"{r['cnt']} times in the last 6 hours (threshold: {STUCK_LOOP_THRESHOLD}). "
|
||||||
|
f"Stop and reassess."
|
||||||
|
),
|
||||||
|
"agent": r["agent"],
|
||||||
|
"domain": None,
|
||||||
|
"detected_at": _now_iso(),
|
||||||
|
"auto_resolve": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Check: Cost Spikes ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def check_cost_spikes(conn: sqlite3.Connection) -> list[dict]:
|
||||||
|
"""Detect daily cost exceeding 2x of 7-day average per agent."""
|
||||||
|
alerts = []
|
||||||
|
|
||||||
|
# Check if costs table exists and has agent column
|
||||||
|
try:
|
||||||
|
cols = conn.execute("PRAGMA table_info(costs)").fetchall()
|
||||||
|
col_names = {c["name"] for c in cols}
|
||||||
|
except sqlite3.Error:
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
if "agent" not in col_names or "cost_usd" not in col_names:
|
||||||
|
# Fall back to per-PR cost tracking
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT agent,
|
||||||
|
SUM(CASE WHEN created_at > datetime('now', '-1 day') THEN cost_usd ELSE 0 END) as today_cost,
|
||||||
|
SUM(CASE WHEN created_at > datetime('now', '-7 days') THEN cost_usd ELSE 0 END) / 7.0 as avg_daily
|
||||||
|
FROM prs WHERE agent IS NOT NULL AND cost_usd > 0
|
||||||
|
GROUP BY agent
|
||||||
|
HAVING avg_daily > 0"""
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT agent,
|
||||||
|
SUM(CASE WHEN timestamp > datetime('now', '-1 day') THEN cost_usd ELSE 0 END) as today_cost,
|
||||||
|
SUM(CASE WHEN timestamp > datetime('now', '-7 days') THEN cost_usd ELSE 0 END) / 7.0 as avg_daily
|
||||||
|
FROM costs WHERE agent IS NOT NULL
|
||||||
|
GROUP BY agent
|
||||||
|
HAVING avg_daily > 0"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for r in rows:
|
||||||
|
if r["avg_daily"] and r["today_cost"] > r["avg_daily"] * COST_SPIKE_RATIO:
|
||||||
|
ratio = r["today_cost"] / r["avg_daily"]
|
||||||
|
alerts.append({
|
||||||
|
"id": f"cost_spike:{r['agent']}",
|
||||||
|
"severity": "warning",
|
||||||
|
"category": "health",
|
||||||
|
"title": f"Agent '{r['agent']}' cost spike: ${r['today_cost']:.2f} today ({ratio:.1f}x avg)",
|
||||||
|
"detail": (
|
||||||
|
f"Today's cost (${r['today_cost']:.2f}) is {ratio:.1f}x the 7-day daily average "
|
||||||
|
f"(${r['avg_daily']:.2f}). Threshold: {COST_SPIKE_RATIO}x."
|
||||||
|
),
|
||||||
|
"agent": r["agent"],
|
||||||
|
"domain": None,
|
||||||
|
"detected_at": _now_iso(),
|
||||||
|
"auto_resolve": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Check: Domain Rejection Patterns (Theseus addition) ───────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def check_domain_rejection_patterns(conn: sqlite3.Connection) -> list[dict]:
|
||||||
|
"""Track rejection reason shift per domain — surfaces domain maturity issues."""
|
||||||
|
alerts = []
|
||||||
|
|
||||||
|
# Per-domain rejection breakdown in 24h
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT json_extract(detail, '$.domain') as domain,
|
||||||
|
value as tag,
|
||||||
|
COUNT(*) as cnt
|
||||||
|
FROM audit_log, json_each(json_extract(detail, '$.issues'))
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', '-24 hours')
|
||||||
|
AND json_extract(detail, '$.domain') IS NOT NULL
|
||||||
|
GROUP BY domain, tag
|
||||||
|
ORDER BY domain, cnt DESC"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Group by domain
|
||||||
|
domain_tags = {}
|
||||||
|
for r in rows:
|
||||||
|
d = r["domain"]
|
||||||
|
if d not in domain_tags:
|
||||||
|
domain_tags[d] = []
|
||||||
|
domain_tags[d].append({"tag": r["tag"], "count": r["cnt"]})
|
||||||
|
|
||||||
|
# Flag if a domain has >50% of rejections from a single reason (concentrated failure)
|
||||||
|
for domain, tags in domain_tags.items():
|
||||||
|
total = sum(t["count"] for t in tags)
|
||||||
|
if total < 5:
|
||||||
|
continue
|
||||||
|
top = tags[0]
|
||||||
|
ratio = top["count"] / total
|
||||||
|
if ratio > 0.5:
|
||||||
|
alerts.append({
|
||||||
|
"id": f"domain_rejection_pattern:{domain}:{top['tag']}",
|
||||||
|
"severity": "info",
|
||||||
|
"category": "failure_pattern",
|
||||||
|
"title": f"Domain '{domain}': {ratio:.0%} of rejections are '{top['tag']}'",
|
||||||
|
"detail": (
|
||||||
|
f"In domain '{domain}', {top['count']}/{total} rejections (24h) are for "
|
||||||
|
f"'{top['tag']}'. This may indicate a systematic issue with evidence standards "
|
||||||
|
f"or schema compliance in this domain."
|
||||||
|
),
|
||||||
|
"agent": None,
|
||||||
|
"domain": domain,
|
||||||
|
"detected_at": _now_iso(),
|
||||||
|
"auto_resolve": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Failure Report Generator ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def generate_failure_report(conn: sqlite3.Connection, agent: str, hours: int = 24) -> dict | None:
|
||||||
|
"""Compile a failure report for a specific agent.
|
||||||
|
|
||||||
|
Returns top rejection reasons, example PRs, and suggested fixes.
|
||||||
|
Designed to be sent directly to the agent via Pentagon messaging.
|
||||||
|
"""
|
||||||
|
hours = int(hours) # defensive — callers should pass int, but enforce it
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT value as tag, COUNT(*) as cnt,
|
||||||
|
GROUP_CONCAT(DISTINCT json_extract(detail, '$.pr')) as pr_numbers
|
||||||
|
FROM audit_log, json_each(json_extract(detail, '$.issues'))
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) = ?
|
||||||
|
AND timestamp > datetime('now', ? || ' hours')
|
||||||
|
GROUP BY tag ORDER BY cnt DESC
|
||||||
|
LIMIT 5""",
|
||||||
|
(agent, f"-{hours}"),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return None
|
||||||
|
|
||||||
|
total_rejections = sum(r["cnt"] for r in rows)
|
||||||
|
top_reasons = []
|
||||||
|
for r in rows:
|
||||||
|
prs = r["pr_numbers"].split(",")[:3] if r["pr_numbers"] else []
|
||||||
|
top_reasons.append({
|
||||||
|
"reason": r["tag"],
|
||||||
|
"count": r["cnt"],
|
||||||
|
"pct": round(r["cnt"] / total_rejections * 100, 1),
|
||||||
|
"example_prs": prs,
|
||||||
|
"suggestion": _suggest_fix(r["tag"]),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"agent": agent,
|
||||||
|
"period_hours": hours,
|
||||||
|
"total_rejections": total_rejections,
|
||||||
|
"top_reasons": top_reasons,
|
||||||
|
"generated_at": _now_iso(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _suggest_fix(rejection_tag: str) -> str:
|
||||||
|
"""Map known rejection reasons to actionable suggestions."""
|
||||||
|
suggestions = {
|
||||||
|
"broken_wiki_links": "Check that all [[wiki links]] in claims resolve to existing files. Run link validation before submitting.",
|
||||||
|
"near_duplicate": "Search existing claims before creating new ones. Use semantic search to find similar claims.",
|
||||||
|
"frontmatter_schema": "Validate YAML frontmatter against the claim schema. Required fields: title, domain, confidence, type.",
|
||||||
|
"weak_evidence": "Add concrete sources, data points, or citations. Claims need evidence that can be independently verified.",
|
||||||
|
"missing_confidence": "Every claim needs a confidence level: proven, likely, experimental, or speculative.",
|
||||||
|
"domain_mismatch": "Ensure claims are filed under the correct domain. Check domain definitions if unsure.",
|
||||||
|
"too_broad": "Break broad claims into specific, testable sub-claims.",
|
||||||
|
"missing_links": "Claims should link to related claims, entities, or sources. Isolated claims are harder to verify.",
|
||||||
|
}
|
||||||
|
return suggestions.get(rejection_tag, f"Review rejection reason '{rejection_tag}' and adjust extraction accordingly.")
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Run All Checks ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def run_all_checks(conn: sqlite3.Connection) -> list[dict]:
|
||||||
|
"""Execute all check functions and return combined alerts."""
|
||||||
|
alerts = []
|
||||||
|
alerts.extend(check_agent_health(conn))
|
||||||
|
alerts.extend(check_quality_regression(conn))
|
||||||
|
alerts.extend(check_throughput(conn))
|
||||||
|
alerts.extend(check_rejection_spike(conn))
|
||||||
|
alerts.extend(check_stuck_loops(conn))
|
||||||
|
alerts.extend(check_cost_spikes(conn))
|
||||||
|
alerts.extend(check_domain_rejection_patterns(conn))
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
|
||||||
|
def format_alert_message(alert: dict) -> str:
|
||||||
|
"""Format an alert for Pentagon messaging."""
|
||||||
|
severity_icon = {"critical": "!!", "warning": "!", "info": "~"}
|
||||||
|
icon = severity_icon.get(alert["severity"], "?")
|
||||||
|
return f"[{icon}] {alert['title']}\n{alert['detail']}"
|
||||||
125
ops/diagnostics/alerting_routes.py
Normal file
125
ops/diagnostics/alerting_routes.py
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
"""Route handlers for /check and /api/alerts endpoints.
|
||||||
|
|
||||||
|
Import into app.py and register routes in create_app().
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from aiohttp import web
|
||||||
|
from alerting import run_all_checks, generate_failure_report, format_alert_message # requires CWD = deploy dir; switch to relative import if packaged
|
||||||
|
|
||||||
|
logger = logging.getLogger("argus.alerting")
|
||||||
|
|
||||||
|
# In-memory alert store (replaced each /check cycle, persists between requests)
|
||||||
|
_active_alerts: list[dict] = []
|
||||||
|
_last_check: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_check(request):
|
||||||
|
"""GET /check — run all monitoring checks, update active alerts, return results.
|
||||||
|
|
||||||
|
Designed to be called by systemd timer every 5 minutes.
|
||||||
|
Returns JSON summary of all detected issues.
|
||||||
|
"""
|
||||||
|
conn = request.app["_alerting_conn_func"]()
|
||||||
|
try:
|
||||||
|
alerts = run_all_checks(conn)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Check failed: %s", e)
|
||||||
|
return web.json_response({"error": str(e)}, status=500)
|
||||||
|
|
||||||
|
global _active_alerts, _last_check
|
||||||
|
_active_alerts = alerts
|
||||||
|
_last_check = datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
# Generate failure reports for agents with stuck loops
|
||||||
|
failure_reports = {}
|
||||||
|
stuck_agents = {a["agent"] for a in alerts if a["category"] == "health" and "stuck" in a["id"] and a["agent"]}
|
||||||
|
for agent in stuck_agents:
|
||||||
|
report = generate_failure_report(conn, agent)
|
||||||
|
if report:
|
||||||
|
failure_reports[agent] = report
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"checked_at": _last_check,
|
||||||
|
"alert_count": len(alerts),
|
||||||
|
"critical": sum(1 for a in alerts if a["severity"] == "critical"),
|
||||||
|
"warning": sum(1 for a in alerts if a["severity"] == "warning"),
|
||||||
|
"info": sum(1 for a in alerts if a["severity"] == "info"),
|
||||||
|
"alerts": alerts,
|
||||||
|
"failure_reports": failure_reports,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Check complete: %d alerts (%d critical, %d warning)",
|
||||||
|
len(alerts),
|
||||||
|
result["critical"],
|
||||||
|
result["warning"],
|
||||||
|
)
|
||||||
|
|
||||||
|
return web.json_response(result)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_api_alerts(request):
|
||||||
|
"""GET /api/alerts — return current active alerts.
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
severity: filter by severity (critical, warning, info)
|
||||||
|
category: filter by category (health, quality, throughput, failure_pattern)
|
||||||
|
agent: filter by agent name
|
||||||
|
domain: filter by domain
|
||||||
|
"""
|
||||||
|
alerts = list(_active_alerts)
|
||||||
|
|
||||||
|
# Filters
|
||||||
|
severity = request.query.get("severity")
|
||||||
|
if severity:
|
||||||
|
alerts = [a for a in alerts if a["severity"] == severity]
|
||||||
|
|
||||||
|
category = request.query.get("category")
|
||||||
|
if category:
|
||||||
|
alerts = [a for a in alerts if a["category"] == category]
|
||||||
|
|
||||||
|
agent = request.query.get("agent")
|
||||||
|
if agent:
|
||||||
|
alerts = [a for a in alerts if a.get("agent") == agent]
|
||||||
|
|
||||||
|
domain = request.query.get("domain")
|
||||||
|
if domain:
|
||||||
|
alerts = [a for a in alerts if a.get("domain") == domain]
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"alerts": alerts,
|
||||||
|
"total": len(alerts),
|
||||||
|
"last_check": _last_check,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_api_failure_report(request):
|
||||||
|
"""GET /api/failure-report/{agent} — generate failure report for an agent.
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
hours: lookback window (default 24)
|
||||||
|
"""
|
||||||
|
agent = request.match_info["agent"]
|
||||||
|
hours = int(request.query.get("hours", "24"))
|
||||||
|
conn = request.app["_alerting_conn_func"]()
|
||||||
|
|
||||||
|
report = generate_failure_report(conn, agent, hours)
|
||||||
|
if not report:
|
||||||
|
return web.json_response({"agent": agent, "status": "no_rejections", "period_hours": hours})
|
||||||
|
|
||||||
|
return web.json_response(report)
|
||||||
|
|
||||||
|
|
||||||
|
def register_alerting_routes(app, get_conn_func):
|
||||||
|
"""Register alerting routes on the app.
|
||||||
|
|
||||||
|
get_conn_func: callable that returns a read-only sqlite3.Connection
|
||||||
|
"""
|
||||||
|
app["_alerting_conn_func"] = get_conn_func
|
||||||
|
app.router.add_get("/check", handle_check)
|
||||||
|
app.router.add_get("/api/alerts", handle_api_alerts)
|
||||||
|
app.router.add_get("/api/failure-report/{agent}", handle_api_failure_report)
|
||||||
2299
ops/diagnostics/app.py
Normal file
2299
ops/diagnostics/app.py
Normal file
File diff suppressed because it is too large
Load diff
312
ops/diagnostics/daily_digest.py
Normal file
312
ops/diagnostics/daily_digest.py
Normal file
|
|
@ -0,0 +1,312 @@
|
||||||
|
"""Daily digest: aggregates 24h activity for Telegram bot consumption.
|
||||||
|
|
||||||
|
Data sources:
|
||||||
|
- pipeline.db: merged PRs, audit events, contributor activity
|
||||||
|
- Forgejo API: PR descriptions for claim summaries
|
||||||
|
- claim-index: total claims, domain breakdown
|
||||||
|
- review queue: pending approval counts
|
||||||
|
|
||||||
|
Endpoint: GET /api/daily-digest?hours=24
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
logger = logging.getLogger("argus.daily_digest")
|
||||||
|
|
||||||
|
FORGEJO_BASE = "https://git.livingip.xyz/api/v1"
|
||||||
|
REPO = "teleo/teleo-codex"
|
||||||
|
CLAIM_INDEX_URL = "http://localhost:8080/claim-index"
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_daily_digest(
|
||||||
|
db_path: str,
|
||||||
|
forgejo_token: str | None = None,
|
||||||
|
hours: int = 24,
|
||||||
|
timeout_s: int = 15,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Build the daily digest payload.
|
||||||
|
|
||||||
|
Returns structured data for Epimetheus's Telegram bot to format and send.
|
||||||
|
"""
|
||||||
|
cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
|
||||||
|
|
||||||
|
# Parallel: DB queries + HTTP fetches
|
||||||
|
db_data = _query_db(db_path, cutoff, hours)
|
||||||
|
|
||||||
|
headers = {"Accept": "application/json"}
|
||||||
|
if forgejo_token:
|
||||||
|
headers["Authorization"] = f"token {forgejo_token}"
|
||||||
|
|
||||||
|
connector = aiohttp.TCPConnector(ssl=False)
|
||||||
|
async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
|
||||||
|
# Fetch claim-index, merged PR details from Forgejo, and open PR count in parallel
|
||||||
|
merged_numbers = [pr["number"] for pr in db_data["merged_prs"]]
|
||||||
|
|
||||||
|
tasks = [
|
||||||
|
_fetch_claim_index(session, timeout_s),
|
||||||
|
_fetch_merged_pr_details(session, merged_numbers, timeout_s),
|
||||||
|
_fetch_open_pr_count(session, timeout_s),
|
||||||
|
]
|
||||||
|
claim_index, pr_details, open_pr_count = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# Enrich merged PRs with Forgejo descriptions
|
||||||
|
merged_claims = _build_merged_claims(db_data["merged_prs"], pr_details)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"period_hours": hours,
|
||||||
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"claims_merged": merged_claims,
|
||||||
|
"pipeline_stats": {
|
||||||
|
"prs_merged": db_data["prs_merged"],
|
||||||
|
"prs_opened": db_data["prs_opened"],
|
||||||
|
"prs_rejected": db_data["prs_rejected"],
|
||||||
|
"approval_rate": db_data["approval_rate"],
|
||||||
|
"top_rejection_reasons": db_data["top_rejection_reasons"],
|
||||||
|
},
|
||||||
|
"agent_activity": db_data["agent_activity"],
|
||||||
|
"pending_review": {
|
||||||
|
"open_prs": open_pr_count,
|
||||||
|
},
|
||||||
|
"knowledge_base": {
|
||||||
|
"total_claims": claim_index.get("total_claims", 0),
|
||||||
|
"domains": claim_index.get("domains", {}),
|
||||||
|
"orphan_ratio": claim_index.get("orphan_ratio", 0),
|
||||||
|
"cross_domain_links": claim_index.get("cross_domain_links", 0),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _query_db(db_path: str, cutoff: str, hours: int) -> dict[str, Any]:
|
||||||
|
"""Run all DB queries synchronously (SQLite is fast enough for digest)."""
|
||||||
|
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
try:
|
||||||
|
# Merged PRs in period
|
||||||
|
merged_prs = conn.execute(
|
||||||
|
"""SELECT number, branch, domain, agent, commit_type, merged_at, cost_usd
|
||||||
|
FROM prs WHERE status = 'merged' AND merged_at >= ?
|
||||||
|
ORDER BY merged_at DESC""",
|
||||||
|
(cutoff,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
prs_merged = len(merged_prs)
|
||||||
|
|
||||||
|
# PRs opened in period
|
||||||
|
prs_opened = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM prs WHERE created_at >= ?", (cutoff,)
|
||||||
|
).fetchone()[0]
|
||||||
|
|
||||||
|
# Rejected PRs in period (closed/zombie with rejection events)
|
||||||
|
prs_rejected = conn.execute(
|
||||||
|
"""SELECT COUNT(DISTINCT json_extract(detail, '$.pr'))
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage = 'evaluate'
|
||||||
|
AND event IN ('domain_rejected', 'tier05_rejected')
|
||||||
|
AND timestamp >= ?""",
|
||||||
|
(cutoff,),
|
||||||
|
).fetchone()[0]
|
||||||
|
|
||||||
|
# Approval rate
|
||||||
|
total_evaluated = prs_merged + prs_rejected
|
||||||
|
approval_rate = round(prs_merged / total_evaluated * 100, 1) if total_evaluated > 0 else 0.0
|
||||||
|
|
||||||
|
# Top rejection reasons
|
||||||
|
rejection_rows = conn.execute(
|
||||||
|
"""SELECT json_extract(detail, '$.issues') as issues
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage = 'evaluate'
|
||||||
|
AND event IN ('domain_rejected', 'tier05_rejected')
|
||||||
|
AND timestamp >= ?
|
||||||
|
AND json_valid(detail)""",
|
||||||
|
(cutoff,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
reason_counts: dict[str, int] = {}
|
||||||
|
import json
|
||||||
|
for row in rejection_rows:
|
||||||
|
if row["issues"]:
|
||||||
|
try:
|
||||||
|
issues = json.loads(row["issues"])
|
||||||
|
if isinstance(issues, list):
|
||||||
|
for issue in issues:
|
||||||
|
reason_counts[issue] = reason_counts.get(issue, 0) + 1
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
top_rejection_reasons = sorted(reason_counts.items(), key=lambda x: -x[1])[:5]
|
||||||
|
top_rejection_reasons = [{"reason": r, "count": c} for r, c in top_rejection_reasons]
|
||||||
|
|
||||||
|
# Agent activity — who contributed what
|
||||||
|
agent_rows = conn.execute(
|
||||||
|
"""SELECT agent,
|
||||||
|
COUNT(*) as total,
|
||||||
|
SUM(CASE WHEN status = 'merged' THEN 1 ELSE 0 END) as merged,
|
||||||
|
SUM(CASE WHEN commit_type = 'extract' OR commit_type = 'research' THEN 1 ELSE 0 END) as extractions,
|
||||||
|
SUM(CASE WHEN commit_type = 'challenge' THEN 1 ELSE 0 END) as challenges,
|
||||||
|
SUM(CASE WHEN commit_type = 'enrich' OR commit_type = 'reweave' THEN 1 ELSE 0 END) as enrichments,
|
||||||
|
SUM(CASE WHEN commit_type = 'synthesize' THEN 1 ELSE 0 END) as syntheses
|
||||||
|
FROM prs
|
||||||
|
WHERE created_at >= ? AND agent IS NOT NULL AND agent != ''
|
||||||
|
GROUP BY agent
|
||||||
|
ORDER BY merged DESC""",
|
||||||
|
(cutoff,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
agent_activity = [
|
||||||
|
{
|
||||||
|
"agent": row["agent"],
|
||||||
|
"prs_total": row["total"],
|
||||||
|
"prs_merged": row["merged"],
|
||||||
|
"extractions": row["extractions"],
|
||||||
|
"challenges": row["challenges"],
|
||||||
|
"enrichments": row["enrichments"],
|
||||||
|
"syntheses": row["syntheses"],
|
||||||
|
}
|
||||||
|
for row in agent_rows
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"merged_prs": [dict(pr) for pr in merged_prs],
|
||||||
|
"prs_merged": prs_merged,
|
||||||
|
"prs_opened": prs_opened,
|
||||||
|
"prs_rejected": prs_rejected,
|
||||||
|
"approval_rate": approval_rate,
|
||||||
|
"top_rejection_reasons": top_rejection_reasons,
|
||||||
|
"agent_activity": agent_activity,
|
||||||
|
}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_claim_index(session: aiohttp.ClientSession, timeout_s: int) -> dict:
|
||||||
|
"""Fetch claim-index summary stats."""
|
||||||
|
try:
|
||||||
|
async with session.get(
|
||||||
|
CLAIM_INDEX_URL,
|
||||||
|
timeout=aiohttp.ClientTimeout(total=timeout_s),
|
||||||
|
) as resp:
|
||||||
|
if resp.status == 200:
|
||||||
|
data = await resp.json()
|
||||||
|
return {
|
||||||
|
"total_claims": data.get("total_claims", 0),
|
||||||
|
"domains": data.get("domains", {}),
|
||||||
|
"orphan_ratio": data.get("orphan_ratio", 0),
|
||||||
|
"cross_domain_links": data.get("cross_domain_links", 0),
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to fetch claim-index: %s", e)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_merged_pr_details(
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
pr_numbers: list[int],
|
||||||
|
timeout_s: int,
|
||||||
|
) -> dict[int, dict]:
|
||||||
|
"""Fetch PR details from Forgejo for merged PRs (parallel)."""
|
||||||
|
if not pr_numbers:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
async def _fetch_one(n: int) -> tuple[int, dict]:
|
||||||
|
url = f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}"
|
||||||
|
try:
|
||||||
|
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
||||||
|
if resp.status == 200:
|
||||||
|
return n, await resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to fetch PR #%d: %s", n, e)
|
||||||
|
return n, {}
|
||||||
|
|
||||||
|
results = await asyncio.gather(*[_fetch_one(n) for n in pr_numbers])
|
||||||
|
return {n: data for n, data in results}
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_open_pr_count(session: aiohttp.ClientSession, timeout_s: int) -> int:
|
||||||
|
"""Get count of open PRs from Forgejo."""
|
||||||
|
url = f"{FORGEJO_BASE}/repos/{REPO}/pulls?state=open&limit=1"
|
||||||
|
try:
|
||||||
|
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
||||||
|
if resp.status == 200:
|
||||||
|
# Forgejo returns X-Total-Count header
|
||||||
|
total = resp.headers.get("X-Total-Count")
|
||||||
|
if total is not None:
|
||||||
|
return int(total)
|
||||||
|
# Fallback: fetch all and count
|
||||||
|
data = await resp.json()
|
||||||
|
return len(data)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to fetch open PR count: %s", e)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _build_merged_claims(
|
||||||
|
merged_prs: list[dict],
|
||||||
|
pr_details: dict[int, dict],
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Build claim summaries from merged PRs + Forgejo PR bodies."""
|
||||||
|
claims = []
|
||||||
|
for pr in merged_prs:
|
||||||
|
number = pr["number"]
|
||||||
|
detail = pr_details.get(number, {})
|
||||||
|
|
||||||
|
# Extract summary from PR body (first paragraph or first 200 chars)
|
||||||
|
body = detail.get("body", "") or ""
|
||||||
|
summary = _extract_summary(body)
|
||||||
|
|
||||||
|
claims.append({
|
||||||
|
"pr_number": number,
|
||||||
|
"title": detail.get("title", pr.get("branch", f"PR #{number}")),
|
||||||
|
"agent": pr.get("agent", "unknown"),
|
||||||
|
"domain": pr.get("domain", "unknown"),
|
||||||
|
"commit_type": pr.get("commit_type", "knowledge"),
|
||||||
|
"summary": summary,
|
||||||
|
"merged_at": pr.get("merged_at", ""),
|
||||||
|
"cost_usd": pr.get("cost_usd", 0.0),
|
||||||
|
"url": detail.get("html_url", ""),
|
||||||
|
})
|
||||||
|
|
||||||
|
return claims
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_summary(body: str) -> str:
|
||||||
|
"""Extract a 1-2 sentence summary from PR body markdown.
|
||||||
|
|
||||||
|
Looks for a Summary section first, then falls back to first non-header paragraph.
|
||||||
|
"""
|
||||||
|
if not body:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
lines = body.strip().split("\n")
|
||||||
|
|
||||||
|
# Look for ## Summary section
|
||||||
|
in_summary = False
|
||||||
|
summary_lines = []
|
||||||
|
for line in lines:
|
||||||
|
if line.strip().lower().startswith("## summary"):
|
||||||
|
in_summary = True
|
||||||
|
continue
|
||||||
|
if in_summary:
|
||||||
|
if line.startswith("##"):
|
||||||
|
break
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped and not stripped.startswith("- ["): # skip checklists
|
||||||
|
summary_lines.append(stripped)
|
||||||
|
if len(summary_lines) >= 3:
|
||||||
|
break
|
||||||
|
|
||||||
|
if summary_lines:
|
||||||
|
return " ".join(summary_lines)[:300]
|
||||||
|
|
||||||
|
# Fallback: first non-header, non-empty paragraph
|
||||||
|
for line in lines:
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped and not stripped.startswith("#") and not stripped.startswith("- ["):
|
||||||
|
return stripped[:300]
|
||||||
|
|
||||||
|
return ""
|
||||||
62
ops/diagnostics/daily_digest_routes.py
Normal file
62
ops/diagnostics/daily_digest_routes.py
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
"""Route handlers for /api/daily-digest endpoint.
|
||||||
|
|
||||||
|
Import into app.py and register routes in create_app().
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from aiohttp import web
|
||||||
|
from daily_digest import fetch_daily_digest
|
||||||
|
|
||||||
|
logger = logging.getLogger("argus.daily_digest")
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_daily_digest(request):
|
||||||
|
"""GET /api/daily-digest — structured data for Telegram daily digest.
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
hours: lookback period in hours (default: 24, max: 168)
|
||||||
|
|
||||||
|
Returns JSON with:
|
||||||
|
claims_merged: merged claims with summaries
|
||||||
|
pipeline_stats: PRs merged/opened/rejected, approval rate, rejection reasons
|
||||||
|
agent_activity: per-agent contribution breakdown
|
||||||
|
pending_review: open PR count
|
||||||
|
knowledge_base: total claims, domain breakdown, orphan ratio
|
||||||
|
"""
|
||||||
|
# Validate hours param
|
||||||
|
try:
|
||||||
|
hours = int(request.query.get("hours", 24))
|
||||||
|
hours = max(1, min(hours, 168)) # clamp to 1h-7d
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
hours = 24
|
||||||
|
|
||||||
|
db_path = request.app.get("_db_path")
|
||||||
|
if not db_path:
|
||||||
|
return web.json_response({"error": "database not configured"}, status=500)
|
||||||
|
|
||||||
|
token = request.app.get("_forgejo_token")
|
||||||
|
|
||||||
|
try:
|
||||||
|
digest = await fetch_daily_digest(
|
||||||
|
db_path=db_path,
|
||||||
|
forgejo_token=token,
|
||||||
|
hours=hours,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Daily digest fetch failed: %s", e)
|
||||||
|
return web.json_response({"error": str(e)}, status=500)
|
||||||
|
|
||||||
|
return web.json_response(digest)
|
||||||
|
|
||||||
|
|
||||||
|
def register_daily_digest_routes(app, db_path: str, forgejo_token: str | None = None):
|
||||||
|
"""Register daily digest routes on the app.
|
||||||
|
|
||||||
|
db_path: path to pipeline.db
|
||||||
|
forgejo_token: optional Forgejo API token
|
||||||
|
"""
|
||||||
|
app["_db_path"] = db_path
|
||||||
|
if forgejo_token:
|
||||||
|
app["_forgejo_token"] = forgejo_token
|
||||||
|
app.router.add_get("/api/daily-digest", handle_daily_digest)
|
||||||
1424
ops/diagnostics/dashboard-v2.html
Normal file
1424
ops/diagnostics/dashboard-v2.html
Normal file
File diff suppressed because one or more lines are too long
348
ops/diagnostics/dashboard_agents.py
Normal file
348
ops/diagnostics/dashboard_agents.py
Normal file
|
|
@ -0,0 +1,348 @@
|
||||||
|
"""Page 3: Agent Performance — "Who's contributing what?"
|
||||||
|
|
||||||
|
Slim version v2 per Cory feedback (2026-04-03):
|
||||||
|
- Hero: total merged, rejection rate, claims/week — 3 numbers
|
||||||
|
- Table: agent, merged, rejection rate, last active, inbox depth — 5 columns
|
||||||
|
- One chart: weekly contributions by agent (stacked bar)
|
||||||
|
- No CI scores, no yield (redundant with rejection rate), no top issue (too granular)
|
||||||
|
|
||||||
|
Fetches /api/agents-dashboard + /api/agent-state, merges client-side.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from shared_ui import render_page
|
||||||
|
|
||||||
|
|
||||||
|
def render_agents_page(contributors_principal: list, contributors_agent: list, now: datetime) -> str:
|
||||||
|
"""Render the slim Agent Performance page."""
|
||||||
|
|
||||||
|
body = """
|
||||||
|
<!-- Hero Metrics (filled by JS) -->
|
||||||
|
<div class="grid" id="hero-metrics">
|
||||||
|
<div class="card" style="text-align:center;color:#8b949e">Loading...</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Per-Agent Table -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Agent Breakdown (30d)</div>
|
||||||
|
<div class="card">
|
||||||
|
<table id="agent-table">
|
||||||
|
<tr>
|
||||||
|
<th>Agent</th>
|
||||||
|
<th style="text-align:right">Merged</th>
|
||||||
|
<th style="text-align:right">Rejection Rate</th>
|
||||||
|
<th style="text-align:right">Last Active</th>
|
||||||
|
<th style="text-align:right">Inbox</th>
|
||||||
|
</tr>
|
||||||
|
<tr><td colspan="5" style="color:#8b949e;text-align:center">Loading...</td></tr>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Weekly Contributions Chart -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="chart-container" style="max-width:100%">
|
||||||
|
<h2>Claims Merged per Week by Agent</h2>
|
||||||
|
<canvas id="trendChart"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Agent Scorecard (from review_records) -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Agent Scorecard (Structured Reviews)</div>
|
||||||
|
<div class="card">
|
||||||
|
<table id="scorecard-table">
|
||||||
|
<tr><td colspan="7" style="color:#8b949e;text-align:center">Loading...</td></tr>
|
||||||
|
</table>
|
||||||
|
<div id="scorecard-rejections" style="margin-top:12px"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Latest Session Digests -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Latest Session Digests</div>
|
||||||
|
<div id="digest-container">
|
||||||
|
<div class="card" style="text-align:center;color:#8b949e">Loading...</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
|
||||||
|
scripts = """<script>
|
||||||
|
Promise.all([
|
||||||
|
fetch('/api/agents-dashboard?days=30').then(r => r.json()),
|
||||||
|
fetch('/api/agent-state').then(r => r.json()).catch(() => ({agents: {}}))
|
||||||
|
])
|
||||||
|
.then(([data, stateData]) => {
|
||||||
|
const agents = data.agents || {};
|
||||||
|
const agentState = stateData.agents || {};
|
||||||
|
|
||||||
|
// Sort by approved desc, filter to agents with evals
|
||||||
|
const sorted = Object.entries(agents)
|
||||||
|
.filter(([_, a]) => a.evaluated > 0)
|
||||||
|
.sort((a, b) => (b[1].approved || 0) - (a[1].approved || 0));
|
||||||
|
|
||||||
|
// --- Hero metrics ---
|
||||||
|
let totalMerged = 0, totalRejected = 0, totalEval = 0;
|
||||||
|
const weekMerged = {};
|
||||||
|
for (const [_, a] of sorted) {
|
||||||
|
totalMerged += a.approved || 0;
|
||||||
|
totalRejected += a.rejected || 0;
|
||||||
|
totalEval += a.evaluated || 0;
|
||||||
|
if (a.weekly_trend) {
|
||||||
|
a.weekly_trend.forEach(w => {
|
||||||
|
weekMerged[w.week] = (weekMerged[w.week] || 0) + (w.merged || 0);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const weeks = Object.keys(weekMerged).sort();
|
||||||
|
const recentWeeks = weeks.slice(-4);
|
||||||
|
const claimsPerWeek = recentWeeks.length > 0
|
||||||
|
? Math.round(recentWeeks.reduce((s, w) => s + weekMerged[w], 0) / recentWeeks.length)
|
||||||
|
: 0;
|
||||||
|
const rejRate = totalEval > 0 ? ((totalRejected / totalEval) * 100).toFixed(1) : '0';
|
||||||
|
|
||||||
|
document.getElementById('hero-metrics').innerHTML =
|
||||||
|
'<div class="card" style="text-align:center">' +
|
||||||
|
'<div class="label">Claims Merged (30d)</div>' +
|
||||||
|
'<div style="font-size:32px;font-weight:700;color:#3fb950">' + totalMerged + '</div>' +
|
||||||
|
'</div>' +
|
||||||
|
'<div class="card" style="text-align:center">' +
|
||||||
|
'<div class="label">Rejection Rate</div>' +
|
||||||
|
'<div style="font-size:32px;font-weight:700;color:' + (parseFloat(rejRate) > 30 ? '#f85149' : '#e3b341') + '">' + rejRate + '%</div>' +
|
||||||
|
'</div>' +
|
||||||
|
'<div class="card" style="text-align:center">' +
|
||||||
|
'<div class="label">Claims/Week (avg last 4w)</div>' +
|
||||||
|
'<div style="font-size:32px;font-weight:700;color:#58a6ff">' + claimsPerWeek + '</div>' +
|
||||||
|
'</div>';
|
||||||
|
|
||||||
|
// --- Per-agent table ---
|
||||||
|
if (sorted.length === 0) {
|
||||||
|
document.getElementById('agent-table').innerHTML =
|
||||||
|
'<tr><th>Agent</th><th>Merged</th><th>Rejection Rate</th><th>Last Active</th><th>Inbox</th></tr>' +
|
||||||
|
'<tr><td colspan="5" style="color:#8b949e;text-align:center">No evaluation data yet</td></tr>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: format relative time
|
||||||
|
function timeAgo(isoStr) {
|
||||||
|
if (!isoStr) return '<span style="color:#484f58">unknown</span>';
|
||||||
|
const diff = (Date.now() - new Date(isoStr).getTime()) / 1000;
|
||||||
|
if (diff < 3600) return Math.round(diff / 60) + 'm ago';
|
||||||
|
if (diff < 86400) return Math.round(diff / 3600) + 'h ago';
|
||||||
|
return Math.round(diff / 86400) + 'd ago';
|
||||||
|
}
|
||||||
|
|
||||||
|
let tableHtml = '<tr><th>Agent</th><th style="text-align:right">Merged</th>' +
|
||||||
|
'<th style="text-align:right">Rejection Rate</th>' +
|
||||||
|
'<th style="text-align:right">Last Active</th>' +
|
||||||
|
'<th style="text-align:right">Inbox</th></tr>';
|
||||||
|
|
||||||
|
for (const [name, a] of sorted) {
|
||||||
|
const color = agentColor(name);
|
||||||
|
const rr = a.evaluated > 0 ? ((a.rejected / a.evaluated) * 100).toFixed(1) + '%' : '-';
|
||||||
|
const rrColor = a.rejection_rate > 0.3 ? '#f85149' : a.rejection_rate > 0.15 ? '#e3b341' : '#3fb950';
|
||||||
|
|
||||||
|
// Agent state lookup (case-insensitive match)
|
||||||
|
const stateKey = Object.keys(agentState).find(k => k.toLowerCase() === name.toLowerCase()) || '';
|
||||||
|
const state = agentState[stateKey] || {};
|
||||||
|
const lastActive = timeAgo(state.last_active);
|
||||||
|
const inboxDepth = state.inbox_depth != null ? state.inbox_depth : '-';
|
||||||
|
const inboxColor = inboxDepth > 10 ? '#f85149' : inboxDepth > 5 ? '#d29922' : inboxDepth > 0 ? '#58a6ff' : '#3fb950';
|
||||||
|
|
||||||
|
tableHtml += '<tr>' +
|
||||||
|
'<td><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:' + color + ';margin-right:6px"></span>' + esc(name) + '</td>' +
|
||||||
|
'<td style="text-align:right;font-weight:600;color:#3fb950">' + (a.approved || 0) + '</td>' +
|
||||||
|
'<td style="text-align:right;color:' + rrColor + '">' + rr + '</td>' +
|
||||||
|
'<td style="text-align:right">' + lastActive + '</td>' +
|
||||||
|
'<td style="text-align:right;color:' + inboxColor + '">' + inboxDepth + '</td>' +
|
||||||
|
'</tr>';
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('agent-table').innerHTML = tableHtml;
|
||||||
|
|
||||||
|
// --- Weekly trend chart ---
|
||||||
|
const allWeeks = new Set();
|
||||||
|
const agentNames = [];
|
||||||
|
for (const [name, a] of sorted) {
|
||||||
|
if (a.weekly_trend && a.weekly_trend.length > 0) {
|
||||||
|
agentNames.push(name);
|
||||||
|
a.weekly_trend.forEach(w => allWeeks.add(w.week));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const sortedWeeks = [...allWeeks].sort();
|
||||||
|
|
||||||
|
if (sortedWeeks.length > 0 && agentNames.length > 0) {
|
||||||
|
const trendMap = {};
|
||||||
|
for (const [name, a] of sorted) {
|
||||||
|
if (a.weekly_trend) {
|
||||||
|
trendMap[name] = {};
|
||||||
|
a.weekly_trend.forEach(w => { trendMap[name][w.week] = w.merged; });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
new Chart(document.getElementById('trendChart'), {
|
||||||
|
type: 'bar',
|
||||||
|
data: {
|
||||||
|
labels: sortedWeeks,
|
||||||
|
datasets: agentNames.map(name => ({
|
||||||
|
label: name,
|
||||||
|
data: sortedWeeks.map(w => (trendMap[name] || {})[w] || 0),
|
||||||
|
backgroundColor: agentColor(name),
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
responsive: true,
|
||||||
|
scales: {
|
||||||
|
x: { stacked: true, grid: { display: false } },
|
||||||
|
y: { stacked: true, title: { display: true, text: 'Claims Merged' }, min: 0 },
|
||||||
|
},
|
||||||
|
plugins: { legend: { labels: { boxWidth: 12 } } },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}).catch(err => {
|
||||||
|
document.getElementById('hero-metrics').innerHTML =
|
||||||
|
'<div class="card" style="grid-column:1/-1;text-align:center;color:#f85149">Failed to load: ' + err.message + '</div>';
|
||||||
|
});
|
||||||
|
|
||||||
|
// --- Agent Scorecard ---
|
||||||
|
fetch('/api/agent-scorecard')
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {
|
||||||
|
const cards = data.scorecards || [];
|
||||||
|
if (cards.length === 0 || cards.every(c => c.total_reviews === 0)) {
|
||||||
|
document.getElementById('scorecard-table').innerHTML =
|
||||||
|
'<tr><td colspan="7" style="color:#8b949e;text-align:center">No structured review data yet (review_records table is empty)</td></tr>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let html = '<tr><th>Agent</th><th style="text-align:right">PRs</th><th style="text-align:right">Reviews</th>' +
|
||||||
|
'<th style="text-align:right">Approved</th><th style="text-align:right">w/ Changes</th>' +
|
||||||
|
'<th style="text-align:right">Rejected</th><th style="text-align:right">Approval Rate</th></tr>';
|
||||||
|
|
||||||
|
const allReasons = {};
|
||||||
|
for (const c of cards) {
|
||||||
|
const arColor = c.approval_rate >= 80 ? '#3fb950' : c.approval_rate >= 60 ? '#d29922' : '#f85149';
|
||||||
|
html += '<tr>' +
|
||||||
|
'<td><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:' + agentColor(c.agent) + ';margin-right:6px"></span>' + esc(c.agent) + '</td>' +
|
||||||
|
'<td style="text-align:right">' + c.total_prs + '</td>' +
|
||||||
|
'<td style="text-align:right">' + c.total_reviews + '</td>' +
|
||||||
|
'<td style="text-align:right;color:#3fb950">' + c.approved + '</td>' +
|
||||||
|
'<td style="text-align:right;color:#d29922">' + c.approved_with_changes + '</td>' +
|
||||||
|
'<td style="text-align:right;color:#f85149">' + c.rejected + '</td>' +
|
||||||
|
'<td style="text-align:right;font-weight:600;color:' + arColor + '">' + c.approval_rate.toFixed(1) + '%</td>' +
|
||||||
|
'</tr>';
|
||||||
|
if (c.rejection_reasons) {
|
||||||
|
for (const [reason, cnt] of Object.entries(c.rejection_reasons)) {
|
||||||
|
allReasons[reason] = (allReasons[reason] || 0) + cnt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.getElementById('scorecard-table').innerHTML = html;
|
||||||
|
|
||||||
|
// Top rejection reasons across all agents
|
||||||
|
const sortedReasons = Object.entries(allReasons).sort((a, b) => b[1] - a[1]);
|
||||||
|
if (sortedReasons.length > 0) {
|
||||||
|
let rHtml = '<div style="font-size:12px;font-weight:600;color:#8b949e;margin-bottom:6px;text-transform:uppercase">Top Rejection Reasons</div>';
|
||||||
|
rHtml += sortedReasons.map(([reason, cnt]) =>
|
||||||
|
'<span style="display:inline-block;margin:2px 4px;padding:3px 10px;background:#f8514922;border:1px solid #f8514944;border-radius:12px;font-size:12px;color:#f85149">' +
|
||||||
|
esc(reason) + ' <strong>' + cnt + '</strong></span>'
|
||||||
|
).join('');
|
||||||
|
rHtml += '<div style="margin-top:8px;font-size:11px;color:#484f58">Target: 80% approval rate. Too high = too conservative, too low = wasting pipeline compute.</div>';
|
||||||
|
document.getElementById('scorecard-rejections').innerHTML = rHtml;
|
||||||
|
}
|
||||||
|
}).catch(() => {
|
||||||
|
document.getElementById('scorecard-table').innerHTML =
|
||||||
|
'<tr><td colspan="7" style="color:#8b949e;text-align:center">Failed to load scorecard</td></tr>';
|
||||||
|
});
|
||||||
|
|
||||||
|
// --- Latest Session Digests ---
|
||||||
|
fetch('/api/session-digest?latest=true')
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {
|
||||||
|
const digests = data.digests || [];
|
||||||
|
if (digests.length === 0) {
|
||||||
|
document.getElementById('digest-container').innerHTML =
|
||||||
|
'<div class="card" style="text-align:center;color:#8b949e">No session digests yet. Data starts flowing when agents complete research sessions.</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let html = '<div class="grid" style="grid-template-columns:repeat(auto-fit, minmax(320px, 1fr))">';
|
||||||
|
for (const d of digests) {
|
||||||
|
const color = agentColor(d.agent);
|
||||||
|
const dateStr = d.date || d.timestamp || '';
|
||||||
|
|
||||||
|
html += '<div class="card" style="border-left:3px solid ' + color + '">' +
|
||||||
|
'<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:8px">' +
|
||||||
|
'<strong style="color:' + color + '">' + esc(d.agent || 'unknown') + '</strong>' +
|
||||||
|
'<span style="font-size:11px;color:#484f58">' + esc(dateStr) + '</span>' +
|
||||||
|
'</div>';
|
||||||
|
|
||||||
|
if (d.research_question) {
|
||||||
|
html += '<div style="font-size:13px;font-style:italic;color:#c9d1d9;margin-bottom:8px">' + esc(d.research_question) + '</div>';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (d.key_findings && d.key_findings.length > 0) {
|
||||||
|
html += '<div style="font-size:11px;color:#8b949e;text-transform:uppercase;margin-bottom:4px">Key Findings</div><ul style="margin:0 0 8px 16px;font-size:12px">';
|
||||||
|
for (const f of d.key_findings) html += '<li>' + esc(f) + '</li>';
|
||||||
|
html += '</ul>';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (d.surprises && d.surprises.length > 0) {
|
||||||
|
html += '<div style="font-size:11px;color:#8b949e;text-transform:uppercase;margin-bottom:4px">Surprises</div><ul style="margin:0 0 8px 16px;font-size:12px">';
|
||||||
|
for (const s of d.surprises) html += '<li>' + esc(s) + '</li>';
|
||||||
|
html += '</ul>';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (d.confidence_shifts && d.confidence_shifts.length > 0) {
|
||||||
|
html += '<div style="font-size:11px;color:#8b949e;text-transform:uppercase;margin-bottom:4px">Confidence Shifts</div>';
|
||||||
|
for (const cs of d.confidence_shifts) {
|
||||||
|
const arrow = cs.direction === 'up' ? '▲' : cs.direction === 'down' ? '▼' : '▶';
|
||||||
|
const arrowColor = cs.direction === 'up' ? '#3fb950' : cs.direction === 'down' ? '#f85149' : '#d29922';
|
||||||
|
html += '<div style="font-size:12px;margin-left:16px"><span style="color:' + arrowColor + '">' + arrow + '</span> ' + esc(cs.claim || cs.topic || '') + '</div>';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expandable details
|
||||||
|
const detailId = 'digest-detail-' + Math.random().toString(36).substr(2, 6);
|
||||||
|
const hasDetails = (d.sources_archived && d.sources_archived.length > 0) ||
|
||||||
|
(d.prs_submitted && d.prs_submitted.length > 0) ||
|
||||||
|
(d.follow_ups && d.follow_ups.length > 0);
|
||||||
|
if (hasDetails) {
|
||||||
|
html += '<a style="color:#58a6ff;cursor:pointer;font-size:11px;display:block;margin-top:6px" ' +
|
||||||
|
'onclick="var e=document.getElementById(\\x27' + detailId + '\\x27);e.style.display=e.style.display===\\x27none\\x27?\\x27block\\x27:\\x27none\\x27">Details</a>';
|
||||||
|
html += '<div id="' + detailId + '" style="display:none;margin-top:6px;font-size:12px">';
|
||||||
|
if (d.sources_archived && d.sources_archived.length > 0) {
|
||||||
|
html += '<div style="color:#8b949e;font-size:11px">Sources: ' + d.sources_archived.length + '</div>';
|
||||||
|
}
|
||||||
|
if (d.prs_submitted && d.prs_submitted.length > 0) {
|
||||||
|
html += '<div style="color:#8b949e;font-size:11px">PRs: ' + d.prs_submitted.map(p => '#' + p).join(', ') + '</div>';
|
||||||
|
}
|
||||||
|
if (d.follow_ups && d.follow_ups.length > 0) {
|
||||||
|
html += '<div style="color:#8b949e;font-size:11px;margin-top:4px">Follow-ups:</div><ul style="margin:2px 0 0 16px">';
|
||||||
|
for (const fu of d.follow_ups) html += '<li>' + esc(fu) + '</li>';
|
||||||
|
html += '</ul>';
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
}
|
||||||
|
|
||||||
|
html += '</div>';
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
document.getElementById('digest-container').innerHTML = html;
|
||||||
|
}).catch(() => {
|
||||||
|
document.getElementById('digest-container').innerHTML =
|
||||||
|
'<div class="card" style="text-align:center;color:#8b949e">Failed to load session digests</div>';
|
||||||
|
});
|
||||||
|
</script>"""
|
||||||
|
|
||||||
|
return render_page(
|
||||||
|
title="Agent Performance",
|
||||||
|
subtitle="Who's contributing what?",
|
||||||
|
active_path="/agents",
|
||||||
|
body_html=body,
|
||||||
|
scripts=scripts,
|
||||||
|
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||||
|
)
|
||||||
239
ops/diagnostics/dashboard_epistemic.py
Normal file
239
ops/diagnostics/dashboard_epistemic.py
Normal file
|
|
@ -0,0 +1,239 @@
|
||||||
|
"""Page 4: Epistemic Integrity — "Can we trust what we know?"
|
||||||
|
|
||||||
|
Live sections:
|
||||||
|
- Confidence calibration (from claim-index via vital signs)
|
||||||
|
- Cascade coverage (from audit_log stage='cascade')
|
||||||
|
- Review quality (from review_records table)
|
||||||
|
|
||||||
|
Placeholder sections:
|
||||||
|
- Multi-model agreement (needs model_evals table)
|
||||||
|
- Belief staleness (needs cascade tracking to give it meaning)
|
||||||
|
- Divergence tracking (needs divergence events)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from shared_ui import render_page
|
||||||
|
|
||||||
|
|
||||||
|
def render_epistemic_page(vital_signs: dict, now: datetime) -> str:
|
||||||
|
"""Render the Epistemic Integrity page."""
|
||||||
|
|
||||||
|
vs_conf = vital_signs.get("confidence_distribution", {})
|
||||||
|
total_claims = sum(vs_conf.values()) if vs_conf else 0
|
||||||
|
|
||||||
|
# Confidence calibration table
|
||||||
|
conf_rows = ""
|
||||||
|
for level in ["proven", "likely", "experimental", "speculative"]:
|
||||||
|
count = vs_conf.get(level, 0)
|
||||||
|
pct = round(count / total_claims * 100, 1) if total_claims else 0
|
||||||
|
conf_rows += f'<tr><td>{level}</td><td>{count}</td><td>{pct}%</td></tr>'
|
||||||
|
|
||||||
|
body = f"""
|
||||||
|
<!-- Confidence Calibration (LIVE) -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Confidence Calibration</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<tr><th>Level</th><th>Claims</th><th>Share</th></tr>
|
||||||
|
{conf_rows}
|
||||||
|
</table>
|
||||||
|
<div style="margin-top:12px;font-size:12px;color:#8b949e">
|
||||||
|
Total claims: {total_claims}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="chart-container">
|
||||||
|
<h2>Confidence Distribution</h2>
|
||||||
|
<canvas id="confPieChart"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Cascade Coverage (LIVE — from audit_log) -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Cascade Coverage</div>
|
||||||
|
<div id="cascade-container">
|
||||||
|
<div class="card" style="text-align:center;color:#8b949e">Loading cascade data...</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Review Quality (LIVE — from review_records table) -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Review Quality</div>
|
||||||
|
<div id="review-container">
|
||||||
|
<div class="card" style="text-align:center;color:#8b949e">Loading review data...</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Multi-Model Agreement — Placeholder -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Multi-Model Agreement</div>
|
||||||
|
<div class="card" style="text-align:center;padding:40px">
|
||||||
|
<div style="font-size:40px;margin-bottom:12px;opacity:0.3">⚙</div>
|
||||||
|
<div style="color:#8b949e">
|
||||||
|
Multi-model agreement rate requires the <code>model_evals</code> table.<br>
|
||||||
|
<span style="font-size:12px">Blocked on: model_evals table creation (Theseus 2 Phase 3)</span>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:16px;font-size:12px;color:#8b949e">
|
||||||
|
Current eval models: Haiku (triage), GPT-4o (domain), Sonnet/Opus (Leo).<br>
|
||||||
|
Agreement tracking needs per-model verdicts stored separately.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Belief Staleness — Placeholder -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Belief Staleness</div>
|
||||||
|
<div class="card" style="text-align:center;padding:40px">
|
||||||
|
<div style="font-size:40px;margin-bottom:12px;opacity:0.3">⏲</div>
|
||||||
|
<div style="color:#8b949e">
|
||||||
|
Belief staleness scan will compare belief file <code>depends_on</code> frontmatter<br>
|
||||||
|
against claim <code>merged_at</code> timestamps.<br>
|
||||||
|
<span style="font-size:12px">Ready to implement once cascade tracking accumulates data</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
|
||||||
|
scripts = f"""<script>
|
||||||
|
// Confidence pie chart
|
||||||
|
const confData = {json.dumps(vs_conf)};
|
||||||
|
const confLabels = Object.keys(confData);
|
||||||
|
const confValues = Object.values(confData);
|
||||||
|
if (confLabels.length > 0) {{
|
||||||
|
const confColors = {{ 'proven': '#3fb950', 'likely': '#58a6ff', 'experimental': '#d29922', 'speculative': '#f85149', 'unknown': '#8b949e' }};
|
||||||
|
new Chart(document.getElementById('confPieChart'), {{
|
||||||
|
type: 'doughnut',
|
||||||
|
data: {{
|
||||||
|
labels: confLabels,
|
||||||
|
datasets: [{{
|
||||||
|
data: confValues,
|
||||||
|
backgroundColor: confLabels.map(l => confColors[l] || '#8b949e'),
|
||||||
|
borderColor: '#161b22',
|
||||||
|
borderWidth: 2,
|
||||||
|
}}],
|
||||||
|
}},
|
||||||
|
options: {{
|
||||||
|
responsive: true,
|
||||||
|
plugins: {{
|
||||||
|
legend: {{ position: 'right', labels: {{ boxWidth: 12 }} }},
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
}});
|
||||||
|
}}
|
||||||
|
|
||||||
|
// --- Cascade Coverage (live) ---
|
||||||
|
fetch('/api/cascade-coverage?days=30')
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {{
|
||||||
|
const el = document.getElementById('cascade-container');
|
||||||
|
if (data.total_triggered === 0) {{
|
||||||
|
el.innerHTML = `
|
||||||
|
<div class="card" style="text-align:center;padding:30px">
|
||||||
|
<div style="font-size:14px;color:#d29922">No cascade events recorded yet</div>
|
||||||
|
<div style="font-size:12px;color:#8b949e;margin-top:8px">
|
||||||
|
Cascade instrumentation is deployed. Events will appear as new PRs flow through eval and trigger belief/position reviews.
|
||||||
|
</div>
|
||||||
|
</div>`;
|
||||||
|
return;
|
||||||
|
}}
|
||||||
|
|
||||||
|
const compRate = data.completion_rate != null ? (data.completion_rate * 100).toFixed(1) + '%' : '--';
|
||||||
|
const compColor = data.completion_rate >= 0.7 ? '#3fb950' : data.completion_rate >= 0.4 ? '#d29922' : '#f85149';
|
||||||
|
|
||||||
|
let agentRows = '';
|
||||||
|
for (const a of (data.by_agent || [])) {{
|
||||||
|
agentRows += '<tr><td>' + esc(a.agent) + '</td><td>' + a.triggered + '</td><td>' + a.claims_affected + '</td></tr>';
|
||||||
|
}}
|
||||||
|
|
||||||
|
el.innerHTML = `
|
||||||
|
<div class="grid">
|
||||||
|
<div class="card"><div class="label">Cascades Triggered</div><div class="hero-value">${{data.total_triggered}}</div></div>
|
||||||
|
<div class="card"><div class="label">Cascades Reviewed</div><div class="hero-value">${{data.total_reviewed}}</div></div>
|
||||||
|
<div class="card"><div class="label">Completion Rate</div><div class="hero-value" style="color:${{compColor}}">${{compRate}}</div></div>
|
||||||
|
<div class="card"><div class="label">Merges w/ Cascade</div><div class="hero-value">${{data.merges_with_cascade}}</div></div>
|
||||||
|
</div>
|
||||||
|
<div class="card" style="margin-top:12px">
|
||||||
|
<table>
|
||||||
|
<tr><th>Agent</th><th>Cascades Triggered</th><th>Claims Affected</th></tr>
|
||||||
|
${{agentRows || '<tr><td colspan="3" style="color:#8b949e">No per-agent data</td></tr>'}}
|
||||||
|
</table>
|
||||||
|
</div>`;
|
||||||
|
}}).catch(() => {{
|
||||||
|
document.getElementById('cascade-container').innerHTML =
|
||||||
|
'<div class="card" style="color:#f85149">Failed to load cascade data</div>';
|
||||||
|
}});
|
||||||
|
|
||||||
|
// --- Review Quality (live from review_records) ---
|
||||||
|
fetch('/api/review-summary?days=30')
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {{
|
||||||
|
const el = document.getElementById('review-container');
|
||||||
|
if (!data.populated) {{
|
||||||
|
el.innerHTML = `
|
||||||
|
<div class="card" style="text-align:center;padding:30px">
|
||||||
|
<div style="font-size:14px;color:#d29922">Review records table is empty</div>
|
||||||
|
<div style="font-size:12px;color:#8b949e;margin-top:8px">
|
||||||
|
review_records (migration v12) is deployed. Structured review data will populate as new PRs are evaluated.
|
||||||
|
</div>
|
||||||
|
</div>`;
|
||||||
|
return;
|
||||||
|
}}
|
||||||
|
|
||||||
|
const outcomes = data.outcomes || {{}};
|
||||||
|
const approved = (outcomes['approved'] || 0) + (outcomes['approved-with-changes'] || 0);
|
||||||
|
const rejected = outcomes['rejected'] || 0;
|
||||||
|
const approvalRate = data.total > 0 ? ((approved / data.total) * 100).toFixed(1) : '--';
|
||||||
|
const approvalColor = approved / data.total >= 0.7 ? '#3fb950' : approved / data.total >= 0.5 ? '#d29922' : '#f85149';
|
||||||
|
|
||||||
|
// Rejection reasons
|
||||||
|
let reasonRows = '';
|
||||||
|
for (const r of (data.rejection_reasons || [])) {{
|
||||||
|
reasonRows += '<tr><td><code>' + esc(r.reason) + '</code></td><td>' + r.count + '</td></tr>';
|
||||||
|
}}
|
||||||
|
|
||||||
|
// Disagreement types
|
||||||
|
let disagreeRows = '';
|
||||||
|
for (const d of (data.disagreement_types || [])) {{
|
||||||
|
disagreeRows += '<tr><td>' + esc(d.type) + '</td><td>' + d.count + '</td></tr>';
|
||||||
|
}}
|
||||||
|
|
||||||
|
el.innerHTML = `
|
||||||
|
<div class="grid">
|
||||||
|
<div class="card"><div class="label">Total Reviews</div><div class="hero-value">${{data.total}}</div></div>
|
||||||
|
<div class="card"><div class="label">Approval Rate</div><div class="hero-value" style="color:${{approvalColor}}">${{approvalRate}}%</div></div>
|
||||||
|
<div class="card"><div class="label">Approved w/ Changes</div><div class="hero-value" style="color:#d29922">${{outcomes['approved-with-changes'] || 0}}</div></div>
|
||||||
|
<div class="card"><div class="label">Rejected</div><div class="hero-value" style="color:#f85149">${{rejected}}</div></div>
|
||||||
|
</div>
|
||||||
|
<div class="row" style="margin-top:12px">
|
||||||
|
<div class="card">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">Rejection Reasons</div>
|
||||||
|
<table>
|
||||||
|
<tr><th>Reason</th><th>Count</th></tr>
|
||||||
|
${{reasonRows || '<tr><td colspan="2" style="color:#8b949e">No rejections</td></tr>'}}
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">Disagreement Types</div>
|
||||||
|
<table>
|
||||||
|
<tr><th>Type</th><th>Count</th></tr>
|
||||||
|
${{disagreeRows || '<tr><td colspan="2" style="color:#8b949e">No disagreements</td></tr>'}}
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>`;
|
||||||
|
}}).catch(() => {{
|
||||||
|
document.getElementById('review-container').innerHTML =
|
||||||
|
'<div class="card" style="color:#f85149">Failed to load review data</div>';
|
||||||
|
}});
|
||||||
|
</script>"""
|
||||||
|
|
||||||
|
return render_page(
|
||||||
|
title="Epistemic Integrity",
|
||||||
|
subtitle="Can we trust what we know?",
|
||||||
|
active_path="/epistemic",
|
||||||
|
body_html=body,
|
||||||
|
scripts=scripts,
|
||||||
|
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||||
|
)
|
||||||
223
ops/diagnostics/dashboard_health.py
Normal file
223
ops/diagnostics/dashboard_health.py
Normal file
|
|
@ -0,0 +1,223 @@
|
||||||
|
"""Page 2: Knowledge Health — "What do we know and how good is it?"
|
||||||
|
|
||||||
|
Renders: claims by domain, Herfindahl index, evidence freshness,
|
||||||
|
orphan ratio, link density, confidence distribution, extraction yield.
|
||||||
|
|
||||||
|
Data sources: /api/vital-signs, /api/herfindahl, /api/extraction-yield-by-domain,
|
||||||
|
/api/domains, claim-index (cached).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from shared_ui import render_page
|
||||||
|
|
||||||
|
|
||||||
|
def render_health_page(vital_signs: dict, domain_breakdown: dict, now: datetime) -> str:
|
||||||
|
"""Render the Knowledge Health page."""
|
||||||
|
|
||||||
|
# --- Vital signs data ---
|
||||||
|
vs_orphan = vital_signs.get("orphan_ratio", {})
|
||||||
|
orphan_ratio_val = vs_orphan.get("ratio")
|
||||||
|
orphan_color = {"healthy": "green", "warning": "yellow", "critical": "red"}.get(vs_orphan.get("status", ""), "")
|
||||||
|
orphan_display = f"{orphan_ratio_val:.1%}" if orphan_ratio_val is not None else "—"
|
||||||
|
|
||||||
|
vs_linkage = vital_signs.get("linkage_density") or {}
|
||||||
|
linkage_display = f'{vs_linkage.get("avg_outgoing_links", "—")}'
|
||||||
|
cross_domain_ratio = vs_linkage.get("cross_domain_ratio")
|
||||||
|
cross_domain_color = "green" if cross_domain_ratio and cross_domain_ratio >= 0.15 else (
|
||||||
|
"yellow" if cross_domain_ratio and cross_domain_ratio >= 0.05 else "red"
|
||||||
|
) if cross_domain_ratio is not None else ""
|
||||||
|
|
||||||
|
vs_fresh = vital_signs.get("evidence_freshness") or {}
|
||||||
|
fresh_display = f'{vs_fresh.get("median_age_days", "—")}' if vs_fresh.get("median_age_days") else "—"
|
||||||
|
fresh_pct = vs_fresh.get("fresh_30d_pct", 0)
|
||||||
|
|
||||||
|
vs_conf = vital_signs.get("confidence_distribution", {})
|
||||||
|
|
||||||
|
# Domain activity
|
||||||
|
stagnant = vital_signs.get("domain_activity", {}).get("stagnant", [])
|
||||||
|
active_domains = vital_signs.get("domain_activity", {}).get("active", [])
|
||||||
|
|
||||||
|
claim_status = vital_signs.get("claim_index_status", "unavailable")
|
||||||
|
|
||||||
|
# Domain breakdown table
|
||||||
|
domain_rows = ""
|
||||||
|
for domain, stats in sorted(domain_breakdown.items(), key=lambda x: x[1].get("knowledge_prs", 0), reverse=True):
|
||||||
|
if stats.get("knowledge_prs", 0) > 0:
|
||||||
|
top_contribs = ", ".join(f'{c["handle"]} ({c["claims"]})' for c in stats.get("contributors", [])[:3])
|
||||||
|
domain_rows += f"""<tr>
|
||||||
|
<td style="color:#58a6ff">{domain}</td>
|
||||||
|
<td>{stats["knowledge_prs"]}</td>
|
||||||
|
<td>{stats["total_prs"]}</td>
|
||||||
|
<td style="font-size:12px;color:#8b949e">{top_contribs}</td>
|
||||||
|
</tr>"""
|
||||||
|
|
||||||
|
body = f"""
|
||||||
|
<!-- Vital Signs Cards -->
|
||||||
|
<div class="grid">
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Orphan Ratio</div>
|
||||||
|
<div class="value {orphan_color}">{orphan_display}</div>
|
||||||
|
<div class="detail">{vs_orphan.get("count", "?")} / {vs_orphan.get("total", "?")} claims · target <15%</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Avg Links/Claim</div>
|
||||||
|
<div class="value">{linkage_display}</div>
|
||||||
|
<div class="detail">cross-domain: <span class="{cross_domain_color}">{f"{cross_domain_ratio:.1%}" if cross_domain_ratio is not None else "—"}</span> · target 15-30%</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Evidence Freshness</div>
|
||||||
|
<div class="value">{fresh_display}<span style="font-size:14px;color:#8b949e">d median</span></div>
|
||||||
|
<div class="detail">{vs_fresh.get("fresh_30d_count", "?")} claims <30d old · {fresh_pct:.0f}% fresh</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Confidence Spread</div>
|
||||||
|
<div class="value" style="font-size:16px">{" / ".join(f"{vs_conf.get(k, 0)}" for k in ["proven", "likely", "experimental", "speculative"])}</div>
|
||||||
|
<div class="detail">proven / likely / experimental / speculative</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Claim Index</div>
|
||||||
|
<div class="value {'green' if claim_status == 'live' else 'red'}">{claim_status}</div>
|
||||||
|
<div class="detail">{vs_orphan.get("total", "?")} claims indexed</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Herfindahl + Domain Yield (loaded via JS) -->
|
||||||
|
<div class="row">
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Domain Concentration</div>
|
||||||
|
<div id="herfindahl-container" class="card" style="text-align:center;padding:24px">
|
||||||
|
<div class="label">Loading...</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Extraction Yield by Domain</div>
|
||||||
|
<div id="yield-domain-container" class="card">
|
||||||
|
<div style="color:#8b949e;text-align:center;padding:16px">Loading...</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Charts -->
|
||||||
|
<div class="row">
|
||||||
|
<div class="chart-container">
|
||||||
|
<h2>Claims by Domain</h2>
|
||||||
|
<canvas id="domainChart"></canvas>
|
||||||
|
</div>
|
||||||
|
<div class="chart-container">
|
||||||
|
<h2>Confidence Distribution</h2>
|
||||||
|
<canvas id="confidenceChart"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Domain Breakdown Table -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Contributions by Domain</div>
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<tr><th>Domain</th><th>Knowledge PRs</th><th>Total PRs</th><th>Top Contributors</th></tr>
|
||||||
|
{domain_rows if domain_rows else "<tr><td colspan='4' style='color:#8b949e'>No domain data</td></tr>"}
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Stagnation Alerts -->
|
||||||
|
{"" if not stagnant else f'''
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title" style="color:#d29922">Stagnation Alerts</div>
|
||||||
|
<div class="card">
|
||||||
|
<p style="color:#d29922">Domains with no PR activity in 7 days: <strong>{", ".join(stagnant)}</strong></p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
'''}
|
||||||
|
"""
|
||||||
|
|
||||||
|
scripts = f"""<script>
|
||||||
|
// --- Herfindahl index ---
|
||||||
|
fetch('/api/herfindahl?days=30')
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {{
|
||||||
|
const container = document.getElementById('herfindahl-container');
|
||||||
|
const statusColor = data.status === 'diverse' ? 'green' : data.status === 'moderate' ? 'yellow' : 'red';
|
||||||
|
let domainsHtml = data.domains.map(d =>
|
||||||
|
'<div style="display:flex;justify-content:space-between;padding:4px 0;border-bottom:1px solid #21262d">' +
|
||||||
|
'<span>' + esc(d.domain) + '</span>' +
|
||||||
|
'<span style="color:#8b949e">' + d.count + ' (' + (d.share * 100).toFixed(1) + '%)</span></div>'
|
||||||
|
).join('');
|
||||||
|
container.innerHTML =
|
||||||
|
'<div class="value ' + statusColor + '">' + data.hhi.toFixed(4) + '</div>' +
|
||||||
|
'<div class="detail">' + data.status + ' · ' + data.total_merged + ' merged (30d)</div>' +
|
||||||
|
'<div style="margin-top:12px;text-align:left">' + domainsHtml + '</div>';
|
||||||
|
}}).catch(() => {{}});
|
||||||
|
|
||||||
|
// --- Extraction yield by domain ---
|
||||||
|
fetch('/api/extraction-yield-by-domain?days=30')
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {{
|
||||||
|
const container = document.getElementById('yield-domain-container');
|
||||||
|
if (!data.domains || data.domains.length === 0) {{
|
||||||
|
container.innerHTML = '<div style="color:#8b949e;text-align:center;padding:16px">No yield data</div>';
|
||||||
|
return;
|
||||||
|
}}
|
||||||
|
let html = '<table><tr><th>Domain</th><th>PRs</th><th>Merged</th><th>Yield</th></tr>';
|
||||||
|
data.domains.forEach(d => {{
|
||||||
|
const yieldColor = d.yield >= 0.5 ? 'green' : d.yield >= 0.3 ? 'yellow' : 'red';
|
||||||
|
html += '<tr><td>' + esc(d.domain) + '</td><td>' + d.total_prs + '</td>' +
|
||||||
|
'<td>' + d.merged + '</td><td class="' + yieldColor + '">' + (d.yield * 100).toFixed(1) + '%</td></tr>';
|
||||||
|
}});
|
||||||
|
html += '</table>';
|
||||||
|
container.innerHTML = html;
|
||||||
|
}}).catch(() => {{}});
|
||||||
|
|
||||||
|
// --- Domain distribution chart ---
|
||||||
|
const domainData = {json.dumps({d: s.get("knowledge_prs", 0) for d, s in domain_breakdown.items() if s.get("knowledge_prs", 0) > 0})};
|
||||||
|
const domainLabels = Object.keys(domainData);
|
||||||
|
const domainValues = Object.values(domainData);
|
||||||
|
if (domainLabels.length > 0) {{
|
||||||
|
const colors = ['#58a6ff', '#3fb950', '#d29922', '#f0883e', '#bc8cff', '#f85149', '#8b949e', '#ec4899'];
|
||||||
|
new Chart(document.getElementById('domainChart'), {{
|
||||||
|
type: 'doughnut',
|
||||||
|
data: {{
|
||||||
|
labels: domainLabels,
|
||||||
|
datasets: [{{ data: domainValues, backgroundColor: domainLabels.map((_, i) => colors[i % colors.length]), borderColor: '#161b22', borderWidth: 2 }}],
|
||||||
|
}},
|
||||||
|
options: {{
|
||||||
|
responsive: true,
|
||||||
|
plugins: {{ legend: {{ position: 'right', labels: {{ boxWidth: 12, font: {{ size: 11 }} }} }} }},
|
||||||
|
}},
|
||||||
|
}});
|
||||||
|
}}
|
||||||
|
|
||||||
|
// --- Confidence distribution chart ---
|
||||||
|
const confData = {json.dumps(vs_conf)};
|
||||||
|
const confLabels = Object.keys(confData);
|
||||||
|
const confValues = Object.values(confData);
|
||||||
|
if (confLabels.length > 0) {{
|
||||||
|
const confColors = {{ 'proven': '#3fb950', 'likely': '#58a6ff', 'experimental': '#d29922', 'speculative': '#f85149', 'unknown': '#8b949e' }};
|
||||||
|
new Chart(document.getElementById('confidenceChart'), {{
|
||||||
|
type: 'bar',
|
||||||
|
data: {{
|
||||||
|
labels: confLabels,
|
||||||
|
datasets: [{{ data: confValues, backgroundColor: confLabels.map(l => confColors[l] || '#8b949e') }}],
|
||||||
|
}},
|
||||||
|
options: {{
|
||||||
|
responsive: true,
|
||||||
|
plugins: {{ legend: {{ display: false }} }},
|
||||||
|
scales: {{
|
||||||
|
y: {{ title: {{ display: true, text: 'Claims' }}, min: 0 }},
|
||||||
|
x: {{ grid: {{ display: false }} }},
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
}});
|
||||||
|
}}
|
||||||
|
</script>"""
|
||||||
|
|
||||||
|
return render_page(
|
||||||
|
title="Knowledge Health",
|
||||||
|
subtitle="What do we know and how good is it?",
|
||||||
|
active_path="/health",
|
||||||
|
body_html=body,
|
||||||
|
scripts=scripts,
|
||||||
|
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||||
|
)
|
||||||
464
ops/diagnostics/dashboard_ops.py
Normal file
464
ops/diagnostics/dashboard_ops.py
Normal file
|
|
@ -0,0 +1,464 @@
|
||||||
|
"""Page 1: Pipeline Operations — "Is the machine running?"
|
||||||
|
|
||||||
|
Renders: queue depth, throughput, error rate, stage flow, breakers,
|
||||||
|
funnel, rejection reasons, fix cycle, time-series charts.
|
||||||
|
|
||||||
|
All data comes from existing endpoints: /api/metrics, /api/snapshots,
|
||||||
|
/api/stage-times, /api/alerts, /api/fix-rates.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from shared_ui import render_page
|
||||||
|
|
||||||
|
|
||||||
|
def render_ops_page(metrics: dict, snapshots: list, changes: list,
|
||||||
|
vital_signs: dict, now: datetime) -> str:
|
||||||
|
"""Render the Pipeline Operations page."""
|
||||||
|
|
||||||
|
# --- Prepare chart data ---
|
||||||
|
timestamps = [s["ts"] for s in snapshots]
|
||||||
|
throughput_data = [s.get("throughput_1h", 0) for s in snapshots]
|
||||||
|
approval_data = [(s.get("approval_rate") or 0) * 100 for s in snapshots]
|
||||||
|
open_prs_data = [s.get("open_prs", 0) for s in snapshots]
|
||||||
|
merged_data = [s.get("merged_total", 0) for s in snapshots]
|
||||||
|
|
||||||
|
rej_wiki = [s.get("rejection_broken_wiki_links", 0) for s in snapshots]
|
||||||
|
rej_schema = [s.get("rejection_frontmatter_schema", 0) for s in snapshots]
|
||||||
|
rej_dup = [s.get("rejection_near_duplicate", 0) for s in snapshots]
|
||||||
|
rej_conf = [s.get("rejection_confidence", 0) for s in snapshots]
|
||||||
|
rej_other = [s.get("rejection_other", 0) for s in snapshots]
|
||||||
|
|
||||||
|
# origin_agent/origin_human removed — replaced by /api/growth chart
|
||||||
|
|
||||||
|
annotations_js = json.dumps([
|
||||||
|
{
|
||||||
|
"type": "line", "xMin": c["ts"], "xMax": c["ts"],
|
||||||
|
"borderColor": "#d29922" if c["type"] == "prompt" else "#58a6ff",
|
||||||
|
"borderWidth": 1, "borderDash": [4, 4],
|
||||||
|
"label": {"display": True, "content": f"{c['type']}: {c.get('to', '?')}",
|
||||||
|
"position": "start", "backgroundColor": "#161b22",
|
||||||
|
"color": "#8b949e", "font": {"size": 10}},
|
||||||
|
}
|
||||||
|
for c in changes
|
||||||
|
])
|
||||||
|
|
||||||
|
# --- Status helpers ---
|
||||||
|
sm = metrics["status_map"]
|
||||||
|
ar = metrics["approval_rate"]
|
||||||
|
ar_color = "green" if ar > 0.5 else ("yellow" if ar > 0.2 else "red")
|
||||||
|
fr_color = "green" if metrics["fix_rate"] > 0.3 else ("yellow" if metrics["fix_rate"] > 0.1 else "red")
|
||||||
|
|
||||||
|
vs_review = vital_signs["review_throughput"]
|
||||||
|
vs_status_color = {"healthy": "green", "warning": "yellow", "critical": "red"}.get(vs_review["status"], "yellow")
|
||||||
|
|
||||||
|
# --- Rejection reasons table ---
|
||||||
|
reason_rows = "".join(
|
||||||
|
f'<tr><td><code>{r["tag"]}</code></td><td>{r["unique_prs"]}</td>'
|
||||||
|
f'<td style="color:#8b949e">{r["count"]}</td></tr>'
|
||||||
|
for r in metrics["rejection_reasons"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Breaker rows ---
|
||||||
|
breaker_rows = ""
|
||||||
|
for name, info in metrics["breakers"].items():
|
||||||
|
state = info["state"]
|
||||||
|
color = "green" if state == "closed" else ("red" if state == "open" else "yellow")
|
||||||
|
age = f'{info.get("age_s", "?")}s ago' if "age_s" in info else "-"
|
||||||
|
breaker_rows += f'<tr><td>{name}</td><td class="{color}">{state}</td><td>{info["failures"]}</td><td>{age}</td></tr>'
|
||||||
|
|
||||||
|
# --- Funnel ---
|
||||||
|
funnel = vital_signs["funnel"]
|
||||||
|
|
||||||
|
# --- Queue staleness ---
|
||||||
|
qs = vital_signs.get("queue_staleness", {})
|
||||||
|
stale_count = qs.get("stale_count", 0)
|
||||||
|
stale_status = qs.get("status", "healthy")
|
||||||
|
stale_color = {"healthy": "green", "warning": "yellow", "critical": "red"}.get(stale_status, "")
|
||||||
|
|
||||||
|
body = f"""
|
||||||
|
<!-- Hero Cards -->
|
||||||
|
<div class="grid">
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Throughput</div>
|
||||||
|
<div class="value">{metrics["throughput_1h"]}<span style="font-size:14px;color:#8b949e">/hr</span></div>
|
||||||
|
<div class="detail">merged last hour</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Approval Rate (24h)</div>
|
||||||
|
<div class="value {ar_color}">{ar:.1%}</div>
|
||||||
|
<div class="detail">{metrics["approved_24h"]}/{metrics["evaluated_24h"]} evaluated</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Review Backlog</div>
|
||||||
|
<div class="value {vs_status_color}">{vs_review["backlog"]}</div>
|
||||||
|
<div class="detail">{vs_review["open_prs"]} open + {vs_review["reviewing_prs"]} reviewing + {vs_review["approved_waiting"]} approved</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Merged Total</div>
|
||||||
|
<div class="value green">{sm.get("merged", 0)}</div>
|
||||||
|
<div class="detail">{sm.get("closed", 0)} closed</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Fix Success</div>
|
||||||
|
<div class="value {fr_color}">{metrics["fix_rate"]:.1%}</div>
|
||||||
|
<div class="detail">{metrics["fix_succeeded"]}/{metrics["fix_attempted"]} fixed</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Time to Merge</div>
|
||||||
|
<div class="value">{f"{metrics['median_ttm_minutes']:.0f}" if metrics["median_ttm_minutes"] else "—"}<span style="font-size:14px;color:#8b949e">min</span></div>
|
||||||
|
<div class="detail">median (24h)</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Alert Banner (loaded via JS) -->
|
||||||
|
<div id="alert-banner"></div>
|
||||||
|
|
||||||
|
<!-- Pipeline Funnel -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Pipeline Funnel</div>
|
||||||
|
<div class="funnel">
|
||||||
|
<div class="funnel-step"><div class="num">{funnel["sources_total"]}</div><div class="lbl">Sources</div></div>
|
||||||
|
<div class="funnel-arrow">→</div>
|
||||||
|
<div class="funnel-step"><div class="num" style="color:#f0883e">{funnel["sources_queued"]}</div><div class="lbl">In Queue</div></div>
|
||||||
|
<div class="funnel-arrow">→</div>
|
||||||
|
<div class="funnel-step"><div class="num">{funnel["sources_extracted"]}</div><div class="lbl">Extracted</div></div>
|
||||||
|
<div class="funnel-arrow">→</div>
|
||||||
|
<div class="funnel-step"><div class="num">{funnel["prs_total"]}</div><div class="lbl">PRs Created</div></div>
|
||||||
|
<div class="funnel-arrow">→</div>
|
||||||
|
<div class="funnel-step"><div class="num green">{funnel["prs_merged"]}</div><div class="lbl">Merged</div></div>
|
||||||
|
<div class="funnel-arrow">→</div>
|
||||||
|
<div class="funnel-step"><div class="num blue">{funnel["conversion_rate"]:.1%}</div><div class="lbl">Conversion</div></div>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:8px;font-size:12px;color:#8b949e">
|
||||||
|
Queue staleness: <span class="{stale_color}">{stale_count} stale</span>
|
||||||
|
{f'(oldest: {qs.get("oldest_age_days", "?")}d)' if stale_count > 0 else ""}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Stage Dwell Times (loaded via JS) -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Stage Dwell Times</div>
|
||||||
|
<div id="stage-times-container" class="grid"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Charts -->
|
||||||
|
<div id="no-chart-data" class="card" style="text-align:center;padding:40px;margin:16px 0;display:none">
|
||||||
|
<p style="color:#8b949e">No time-series data yet.</p>
|
||||||
|
</div>
|
||||||
|
<div id="chart-section">
|
||||||
|
<div class="row">
|
||||||
|
<div class="chart-container">
|
||||||
|
<h2>Throughput & Approval Rate</h2>
|
||||||
|
<canvas id="throughputChart"></canvas>
|
||||||
|
</div>
|
||||||
|
<div class="chart-container">
|
||||||
|
<h2>Rejection Reasons Over Time</h2>
|
||||||
|
<canvas id="rejectionChart"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="chart-container">
|
||||||
|
<h2>PR Backlog</h2>
|
||||||
|
<canvas id="backlogChart"></canvas>
|
||||||
|
</div>
|
||||||
|
<div class="chart-container">
|
||||||
|
<h2>Cumulative Growth</h2>
|
||||||
|
<canvas id="growthChart"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- PR Trace Lookup -->
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">PR Trace Lookup</div>
|
||||||
|
<div class="card">
|
||||||
|
<div style="display:flex;gap:8px;align-items:center">
|
||||||
|
<input id="trace-pr-input" type="number" placeholder="Enter PR number"
|
||||||
|
style="background:#0d1117;border:1px solid #30363d;color:#c9d1d9;padding:8px 12px;border-radius:6px;width:180px;font-size:14px">
|
||||||
|
<button onclick="loadTrace()" style="background:#238636;color:#fff;border:none;padding:8px 16px;border-radius:6px;cursor:pointer;font-size:13px;font-weight:600">Trace</button>
|
||||||
|
</div>
|
||||||
|
<div id="trace-result" style="margin-top:12px"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Tables -->
|
||||||
|
<div class="row">
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Top Rejection Reasons (24h)</div>
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<tr><th>Issue</th><th>PRs</th><th style="color:#8b949e">Events</th></tr>
|
||||||
|
{reason_rows if reason_rows else "<tr><td colspan='3' style='color:#8b949e'>No rejections in 24h</td></tr>"}
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="section">
|
||||||
|
<div class="section-title">Circuit Breakers</div>
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<tr><th>Stage</th><th>State</th><th>Failures</th><th>Last Success</th></tr>
|
||||||
|
{breaker_rows if breaker_rows else "<tr><td colspan='4' style='color:#8b949e'>No breaker data</td></tr>"}
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
|
||||||
|
scripts = f"""<script>
|
||||||
|
const timestamps = {json.dumps(timestamps)};
|
||||||
|
|
||||||
|
// --- Alerts banner ---
|
||||||
|
fetch('/api/alerts')
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {{
|
||||||
|
if (data.alerts && data.alerts.length > 0) {{
|
||||||
|
const critical = data.alerts.filter(a => a.severity === 'critical');
|
||||||
|
const warning = data.alerts.filter(a => a.severity === 'warning');
|
||||||
|
let html = '';
|
||||||
|
if (critical.length > 0) {{
|
||||||
|
html += '<div class="alert-banner alert-critical">' +
|
||||||
|
critical.map(a => '!! ' + esc(a.title)).join('<br>') + '</div>';
|
||||||
|
}}
|
||||||
|
if (warning.length > 0) {{
|
||||||
|
html += '<div class="alert-banner alert-warning">' +
|
||||||
|
warning.map(a => '! ' + esc(a.title)).join('<br>') + '</div>';
|
||||||
|
}}
|
||||||
|
document.getElementById('alert-banner').innerHTML = html;
|
||||||
|
}}
|
||||||
|
}}).catch(() => {{}});
|
||||||
|
|
||||||
|
// --- Stage dwell times ---
|
||||||
|
fetch('/api/stage-times?hours=24')
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {{
|
||||||
|
const container = document.getElementById('stage-times-container');
|
||||||
|
const stages = data.stages || {{}};
|
||||||
|
if (Object.keys(stages).length === 0) {{
|
||||||
|
container.innerHTML = '<div class="card" style="grid-column:1/-1;text-align:center;color:#8b949e">No stage timing data yet</div>';
|
||||||
|
return;
|
||||||
|
}}
|
||||||
|
let html = '';
|
||||||
|
for (const [label, info] of Object.entries(stages)) {{
|
||||||
|
const color = info.median_minutes < 5 ? 'green' : info.median_minutes < 30 ? 'yellow' : 'red';
|
||||||
|
html += '<div class="card"><div class="label">' + esc(label) + '</div>' +
|
||||||
|
'<div class="value ' + color + '">' + info.median_minutes.toFixed(1) + '<span style="font-size:14px;color:#8b949e">min</span></div>' +
|
||||||
|
'<div class="detail">median (' + info.count + ' PRs)' +
|
||||||
|
(info.p90_minutes ? ' · p90: ' + info.p90_minutes.toFixed(1) + 'min' : '') +
|
||||||
|
'</div></div>';
|
||||||
|
}}
|
||||||
|
container.innerHTML = html;
|
||||||
|
}}).catch(() => {{}});
|
||||||
|
|
||||||
|
// --- Time-series charts ---
|
||||||
|
if (timestamps.length === 0) {{
|
||||||
|
document.getElementById('chart-section').style.display = 'none';
|
||||||
|
document.getElementById('no-chart-data').style.display = 'block';
|
||||||
|
}} else {{
|
||||||
|
|
||||||
|
const throughputData = {json.dumps(throughput_data)};
|
||||||
|
const approvalData = {json.dumps(approval_data)};
|
||||||
|
const openPrsData = {json.dumps(open_prs_data)};
|
||||||
|
const mergedData = {json.dumps(merged_data)};
|
||||||
|
const rejWiki = {json.dumps(rej_wiki)};
|
||||||
|
const rejSchema = {json.dumps(rej_schema)};
|
||||||
|
const rejDup = {json.dumps(rej_dup)};
|
||||||
|
const rejConf = {json.dumps(rej_conf)};
|
||||||
|
const rejOther = {json.dumps(rej_other)};
|
||||||
|
const annotations = {annotations_js};
|
||||||
|
|
||||||
|
new Chart(document.getElementById('throughputChart'), {{
|
||||||
|
type: 'line',
|
||||||
|
data: {{
|
||||||
|
labels: timestamps,
|
||||||
|
datasets: [
|
||||||
|
{{ label: 'Throughput/hr', data: throughputData, borderColor: '#58a6ff', backgroundColor: 'rgba(88,166,255,0.1)', fill: true, tension: 0.3, yAxisID: 'y', pointRadius: 1 }},
|
||||||
|
{{ label: 'Approval %', data: approvalData, borderColor: '#3fb950', borderDash: [4,2], tension: 0.3, yAxisID: 'y1', pointRadius: 1 }},
|
||||||
|
],
|
||||||
|
}},
|
||||||
|
options: {{
|
||||||
|
responsive: true,
|
||||||
|
interaction: {{ mode: 'index', intersect: false }},
|
||||||
|
scales: {{
|
||||||
|
x: {{ type: 'time', time: {{ unit: 'hour', displayFormats: {{ hour: 'MMM d HH:mm' }} }}, grid: {{ display: false }} }},
|
||||||
|
y: {{ position: 'left', title: {{ display: true, text: 'PRs/hr' }}, min: 0 }},
|
||||||
|
y1: {{ position: 'right', title: {{ display: true, text: 'Approval %' }}, min: 0, max: 100, grid: {{ drawOnChartArea: false }} }},
|
||||||
|
}},
|
||||||
|
plugins: {{ annotation: {{ annotations }}, legend: {{ labels: {{ boxWidth: 12 }} }} }},
|
||||||
|
}},
|
||||||
|
}});
|
||||||
|
|
||||||
|
new Chart(document.getElementById('rejectionChart'), {{
|
||||||
|
type: 'line',
|
||||||
|
data: {{
|
||||||
|
labels: timestamps,
|
||||||
|
datasets: [
|
||||||
|
{{ label: 'Wiki Links', data: rejWiki, borderColor: '#f85149', backgroundColor: 'rgba(248,81,73,0.2)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||||
|
{{ label: 'Schema', data: rejSchema, borderColor: '#d29922', backgroundColor: 'rgba(210,153,34,0.2)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||||
|
{{ label: 'Duplicate', data: rejDup, borderColor: '#8b949e', backgroundColor: 'rgba(139,148,158,0.2)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||||
|
{{ label: 'Confidence', data: rejConf, borderColor: '#bc8cff', backgroundColor: 'rgba(188,140,255,0.2)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||||
|
{{ label: 'Other', data: rejOther, borderColor: '#6e7681', backgroundColor: 'rgba(110,118,129,0.15)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||||
|
],
|
||||||
|
}},
|
||||||
|
options: {{
|
||||||
|
responsive: true,
|
||||||
|
scales: {{
|
||||||
|
x: {{ type: 'time', time: {{ unit: 'hour', displayFormats: {{ hour: 'MMM d HH:mm' }} }}, grid: {{ display: false }} }},
|
||||||
|
y: {{ stacked: true, min: 0, title: {{ display: true, text: 'Count (24h)' }} }},
|
||||||
|
}},
|
||||||
|
plugins: {{ annotation: {{ annotations }}, legend: {{ labels: {{ boxWidth: 12 }} }} }},
|
||||||
|
}},
|
||||||
|
}});
|
||||||
|
|
||||||
|
new Chart(document.getElementById('backlogChart'), {{
|
||||||
|
type: 'line',
|
||||||
|
data: {{
|
||||||
|
labels: timestamps,
|
||||||
|
datasets: [
|
||||||
|
{{ label: 'Open PRs', data: openPrsData, borderColor: '#d29922', backgroundColor: 'rgba(210,153,34,0.15)', fill: true, tension: 0.3, pointRadius: 1 }},
|
||||||
|
{{ label: 'Merged (total)', data: mergedData, borderColor: '#3fb950', tension: 0.3, pointRadius: 1 }},
|
||||||
|
],
|
||||||
|
}},
|
||||||
|
options: {{
|
||||||
|
responsive: true,
|
||||||
|
scales: {{
|
||||||
|
x: {{ type: 'time', time: {{ unit: 'hour', displayFormats: {{ hour: 'MMM d HH:mm' }} }}, grid: {{ display: false }} }},
|
||||||
|
y: {{ min: 0, title: {{ display: true, text: 'PRs' }} }},
|
||||||
|
}},
|
||||||
|
plugins: {{ legend: {{ labels: {{ boxWidth: 12 }} }} }},
|
||||||
|
}},
|
||||||
|
}});
|
||||||
|
|
||||||
|
}} // end if timestamps
|
||||||
|
|
||||||
|
// Growth chart loaded async from /api/growth (independent of snapshots)
|
||||||
|
fetch('/api/growth?days=90')
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {{
|
||||||
|
if (!data.dates || data.dates.length === 0) return;
|
||||||
|
new Chart(document.getElementById('growthChart'), {{
|
||||||
|
type: 'line',
|
||||||
|
data: {{
|
||||||
|
labels: data.dates,
|
||||||
|
datasets: [
|
||||||
|
{{ label: 'Sources', data: data.sources, borderColor: '#58a6ff', backgroundColor: 'rgba(88,166,255,0.1)', fill: true, tension: 0.3, pointRadius: 1 }},
|
||||||
|
{{ label: 'PRs Created', data: data.prs, borderColor: '#d29922', backgroundColor: 'rgba(210,153,34,0.1)', fill: false, tension: 0.3, pointRadius: 1 }},
|
||||||
|
{{ label: 'Merged', data: data.merged, borderColor: '#3fb950', backgroundColor: 'rgba(63,185,80,0.1)', fill: false, tension: 0.3, pointRadius: 1 }},
|
||||||
|
],
|
||||||
|
}},
|
||||||
|
options: {{
|
||||||
|
responsive: true,
|
||||||
|
interaction: {{ mode: 'index', intersect: false }},
|
||||||
|
scales: {{
|
||||||
|
x: {{ type: 'time', time: {{ unit: 'day', displayFormats: {{ day: 'MMM d' }} }}, grid: {{ display: false }} }},
|
||||||
|
y: {{ min: 0, title: {{ display: true, text: 'Cumulative Count' }} }},
|
||||||
|
}},
|
||||||
|
plugins: {{ legend: {{ labels: {{ boxWidth: 12 }} }} }},
|
||||||
|
}},
|
||||||
|
}});
|
||||||
|
}}).catch(() => {{}});
|
||||||
|
|
||||||
|
// --- PR Trace Lookup ---
|
||||||
|
document.getElementById('trace-pr-input').addEventListener('keydown', e => {{ if (e.key === 'Enter') loadTrace(); }});
|
||||||
|
|
||||||
|
function loadTrace() {{
|
||||||
|
const pr = document.getElementById('trace-pr-input').value.trim();
|
||||||
|
const container = document.getElementById('trace-result');
|
||||||
|
if (!pr) {{ container.innerHTML = '<p style="color:#8b949e">Enter a PR number</p>'; return; }}
|
||||||
|
container.innerHTML = '<p style="color:#8b949e">Loading...</p>';
|
||||||
|
|
||||||
|
fetch('/api/trace/' + encodeURIComponent(pr))
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {{
|
||||||
|
if (!data.pr && data.timeline.length === 0) {{
|
||||||
|
container.innerHTML = '<p style="color:#8b949e">No trace found for PR ' + esc(pr) + '</p>';
|
||||||
|
return;
|
||||||
|
}}
|
||||||
|
|
||||||
|
const stageColors = {{
|
||||||
|
ingest: '#58a6ff', validate: '#d29922', evaluate: '#f0883e',
|
||||||
|
merge: '#3fb950', cascade: '#bc8cff', cross_domain: '#79c0ff'
|
||||||
|
}};
|
||||||
|
|
||||||
|
let html = '';
|
||||||
|
|
||||||
|
// PR summary
|
||||||
|
if (data.pr) {{
|
||||||
|
const p = data.pr;
|
||||||
|
html += '<div style="margin-bottom:12px;padding:8px 12px;background:#21262d;border-radius:6px;font-size:13px">' +
|
||||||
|
'<strong>PR #' + esc(String(p.number)) + '</strong> · ' +
|
||||||
|
'<span style="color:' + (p.status === 'merged' ? '#3fb950' : '#d29922') + '">' + esc(p.status) + '</span>' +
|
||||||
|
' · ' + esc(p.domain || 'general') +
|
||||||
|
' · ' + esc(p.agent || '?') +
|
||||||
|
' · ' + esc(p.tier || '?') +
|
||||||
|
' · created ' + esc(p.created_at || '') +
|
||||||
|
(p.merged_at ? ' · merged ' + esc(p.merged_at) : '') +
|
||||||
|
'</div>';
|
||||||
|
}}
|
||||||
|
|
||||||
|
// Timeline
|
||||||
|
if (data.timeline.length > 0) {{
|
||||||
|
html += '<div style="font-size:12px;font-weight:600;color:#8b949e;margin-bottom:6px;text-transform:uppercase">Timeline</div>';
|
||||||
|
html += '<table style="font-size:12px"><tr><th>Time</th><th>Stage</th><th>Event</th><th>Details</th></tr>';
|
||||||
|
for (const evt of data.timeline) {{
|
||||||
|
const sc = stageColors[evt.stage] || '#8b949e';
|
||||||
|
const detail = evt.detail || {{}};
|
||||||
|
// Show key fields inline, expandable full JSON
|
||||||
|
const keyFields = [];
|
||||||
|
if (detail.issues) keyFields.push('issues: ' + detail.issues.join(', '));
|
||||||
|
if (detail.agent) keyFields.push('agent: ' + detail.agent);
|
||||||
|
if (detail.tier) keyFields.push('tier: ' + detail.tier);
|
||||||
|
if (detail.leo) keyFields.push('leo: ' + detail.leo);
|
||||||
|
if (detail.domain) keyFields.push('domain: ' + detail.domain);
|
||||||
|
if (detail.pass != null) keyFields.push('pass: ' + detail.pass);
|
||||||
|
if (detail.attempt) keyFields.push('attempt: ' + detail.attempt);
|
||||||
|
const summary = keyFields.length > 0 ? esc(keyFields.join(' | ')) : '';
|
||||||
|
const fullJson = JSON.stringify(detail, null, 2);
|
||||||
|
const detailId = 'trace-detail-' + Math.random().toString(36).substr(2, 6);
|
||||||
|
|
||||||
|
html += '<tr>' +
|
||||||
|
'<td style="white-space:nowrap;color:#8b949e">' + esc(evt.timestamp) + '</td>' +
|
||||||
|
'<td><span style="color:' + sc + ';font-weight:600">' + esc(evt.stage) + '</span></td>' +
|
||||||
|
'<td>' + esc(evt.event) + '</td>' +
|
||||||
|
'<td>' + summary +
|
||||||
|
(Object.keys(detail).length > 0
|
||||||
|
? ' <a style="color:#58a6ff;cursor:pointer;font-size:11px" onclick="document.getElementById(\\\'' + detailId + '\\\').style.display=document.getElementById(\\\'' + detailId + '\\\').style.display===\\\'none\\\'?\\\'block\\\':\\\'none\\\'">[json]</a>' +
|
||||||
|
'<pre id="' + detailId + '" style="display:none;margin-top:4px;background:#0d1117;padding:6px;border-radius:4px;font-size:11px;overflow-x:auto;max-width:500px">' + esc(fullJson) + '</pre>'
|
||||||
|
: '') +
|
||||||
|
'</td></tr>';
|
||||||
|
}}
|
||||||
|
html += '</table>';
|
||||||
|
}}
|
||||||
|
|
||||||
|
// Reviews
|
||||||
|
if (data.reviews && data.reviews.length > 0) {{
|
||||||
|
html += '<div style="font-size:12px;font-weight:600;color:#8b949e;margin:12px 0 6px;text-transform:uppercase">Reviews</div>';
|
||||||
|
html += '<table style="font-size:12px"><tr><th>Claim</th><th>Outcome</th><th>Reviewer</th><th>Reason</th></tr>';
|
||||||
|
for (const rv of data.reviews) {{
|
||||||
|
const outColor = rv.outcome === 'approved' ? '#3fb950' : rv.outcome === 'rejected' ? '#f85149' : '#d29922';
|
||||||
|
html += '<tr>' +
|
||||||
|
'<td style="max-width:250px;overflow:hidden;text-overflow:ellipsis">' + esc(rv.claim_path || '-') + '</td>' +
|
||||||
|
'<td><span class="badge" style="background:' + outColor + '33;color:' + outColor + '">' + esc(rv.outcome || '-') + '</span></td>' +
|
||||||
|
'<td>' + esc(rv.reviewer || '-') + '</td>' +
|
||||||
|
'<td>' + esc(rv.rejection_reason || '') + '</td></tr>';
|
||||||
|
}}
|
||||||
|
html += '</table>';
|
||||||
|
}}
|
||||||
|
|
||||||
|
container.innerHTML = html;
|
||||||
|
}})
|
||||||
|
.catch(err => {{
|
||||||
|
container.innerHTML = '<p style="color:#f85149">Error: ' + esc(err.message) + '</p>';
|
||||||
|
}});
|
||||||
|
}}
|
||||||
|
</script>"""
|
||||||
|
|
||||||
|
return render_page(
|
||||||
|
title="Pipeline Operations",
|
||||||
|
subtitle="Is the machine running?",
|
||||||
|
active_path="/ops",
|
||||||
|
body_html=body,
|
||||||
|
scripts=scripts,
|
||||||
|
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||||
|
)
|
||||||
492
ops/diagnostics/dashboard_prs.py
Normal file
492
ops/diagnostics/dashboard_prs.py
Normal file
|
|
@ -0,0 +1,492 @@
|
||||||
|
"""PR Lifecycle dashboard — single-page view of every PR through the pipeline.
|
||||||
|
|
||||||
|
Sortable table: PR#, summary, agent, domain, outcome, TTM, date.
|
||||||
|
Click any row to expand the full trace (triage reasoning, review text, cascade).
|
||||||
|
Hero cards: total PRs, merge rate, median TTM, median eval rounds.
|
||||||
|
|
||||||
|
Data sources: prs table, audit_log (eval rounds), review_records.
|
||||||
|
Owner: Ship
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from shared_ui import render_page
|
||||||
|
|
||||||
|
|
||||||
|
EXTRA_CSS = """
|
||||||
|
.filters { display: flex; gap: 12px; flex-wrap: wrap; margin-bottom: 16px; }
|
||||||
|
.filters select, .filters input {
|
||||||
|
background: #161b22; color: #c9d1d9; border: 1px solid #30363d;
|
||||||
|
border-radius: 6px; padding: 6px 10px; font-size: 12px; }
|
||||||
|
.filters select:focus, .filters input:focus { border-color: #58a6ff; outline: none; }
|
||||||
|
.pr-table { width: 100%; border-collapse: collapse; font-size: 13px; table-layout: fixed; }
|
||||||
|
.pr-table th:nth-child(1) { width: 60px; } /* PR# */
|
||||||
|
.pr-table th:nth-child(2) { width: 38%; } /* Summary */
|
||||||
|
.pr-table th:nth-child(3) { width: 10%; } /* Agent */
|
||||||
|
.pr-table th:nth-child(4) { width: 14%; } /* Domain */
|
||||||
|
.pr-table th:nth-child(5) { width: 10%; } /* Outcome */
|
||||||
|
.pr-table th:nth-child(6) { width: 7%; } /* TTM */
|
||||||
|
.pr-table th:nth-child(7) { width: 10%; } /* Date */
|
||||||
|
.pr-table td { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; padding: 8px 6px; }
|
||||||
|
.pr-table td:nth-child(2) { white-space: normal; overflow: visible; line-height: 1.4; }
|
||||||
|
.pr-table th { cursor: pointer; user-select: none; position: relative; padding: 8px 18px 8px 6px; }
|
||||||
|
.pr-table th:hover { color: #58a6ff; }
|
||||||
|
.pr-table th .sort-arrow { position: absolute; right: 4px; top: 50%; transform: translateY(-50%); font-size: 10px; opacity: 0.5; }
|
||||||
|
.pr-table th.sorted .sort-arrow { opacity: 1; color: #58a6ff; }
|
||||||
|
.pr-table tr { cursor: pointer; transition: background 0.1s; }
|
||||||
|
.pr-table tbody tr:hover { background: #161b22; }
|
||||||
|
.pr-table .outcome-merged { color: #3fb950; }
|
||||||
|
.pr-table .outcome-closed { color: #f85149; }
|
||||||
|
.pr-table .outcome-open { color: #d29922; }
|
||||||
|
.pr-table .tier-deep { color: #bc8cff; font-weight: 600; }
|
||||||
|
.pr-table .tier-standard { color: #58a6ff; }
|
||||||
|
.pr-table .tier-light { color: #8b949e; }
|
||||||
|
.pr-table .pr-link { color: #58a6ff; text-decoration: none; }
|
||||||
|
.pr-table .pr-link:hover { text-decoration: underline; }
|
||||||
|
.pr-table td .summary-text { font-size: 12px; color: #c9d1d9; }
|
||||||
|
.pr-table td .review-snippet { font-size: 11px; color: #f85149; margin-top: 2px; opacity: 0.8; }
|
||||||
|
.pr-table td .model-tag { font-size: 10px; color: #6e7681; background: #161b22; border-radius: 3px; padding: 1px 4px; }
|
||||||
|
.pr-table td .expand-chevron { display: inline-block; width: 12px; color: #484f58; font-size: 10px; transition: transform 0.2s; }
|
||||||
|
.pr-table tr.expanded .expand-chevron { transform: rotate(90deg); color: #58a6ff; }
|
||||||
|
.trace-panel { background: #0d1117; border: 1px solid #30363d; border-radius: 8px;
|
||||||
|
padding: 16px; margin: 4px 0 8px 0; font-size: 12px; display: none; }
|
||||||
|
.trace-panel.open { display: block; }
|
||||||
|
.trace-timeline { list-style: none; padding: 0; }
|
||||||
|
.trace-timeline li { padding: 4px 0; border-left: 2px solid #30363d; padding-left: 12px; margin-left: 8px; }
|
||||||
|
.trace-timeline li .ts { color: #484f58; font-size: 11px; }
|
||||||
|
.trace-timeline li .ev { font-weight: 600; }
|
||||||
|
.trace-timeline li.ev-approved .ev { color: #3fb950; }
|
||||||
|
.trace-timeline li.ev-rejected .ev { color: #f85149; }
|
||||||
|
.trace-timeline li.ev-changes .ev { color: #d29922; }
|
||||||
|
.review-text { background: #161b22; padding: 8px 12px; border-radius: 4px;
|
||||||
|
margin: 4px 0; white-space: pre-wrap; font-size: 11px; color: #8b949e; max-height: 200px; overflow-y: auto; }
|
||||||
|
.pagination { display: flex; gap: 8px; align-items: center; justify-content: center; margin-top: 16px; }
|
||||||
|
.pagination button { background: #161b22; color: #c9d1d9; border: 1px solid #30363d;
|
||||||
|
border-radius: 4px; padding: 4px 12px; cursor: pointer; font-size: 12px; }
|
||||||
|
.pagination button:hover { border-color: #58a6ff; }
|
||||||
|
.pagination button:disabled { opacity: 0.4; cursor: default; }
|
||||||
|
.pagination .page-info { color: #8b949e; font-size: 12px; }
|
||||||
|
.stat-row { display: flex; gap: 6px; flex-wrap: wrap; margin-top: 4px; }
|
||||||
|
.stat-row .mini-stat { font-size: 11px; color: #8b949e; }
|
||||||
|
.stat-row .mini-stat span { color: #c9d1d9; font-weight: 600; }
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def render_prs_page(now: datetime) -> str:
|
||||||
|
"""Render the PR lifecycle page. All data loaded client-side via /api/pr-lifecycle."""
|
||||||
|
|
||||||
|
body = """
|
||||||
|
<!-- Hero cards (populated by JS) -->
|
||||||
|
<div class="grid" id="hero-cards">
|
||||||
|
<div class="card"><div class="label">Total PRs</div><div class="value blue" id="kpi-total">--</div><div class="detail" id="kpi-total-detail"></div></div>
|
||||||
|
<div class="card"><div class="label">Merge Rate</div><div class="value green" id="kpi-merge-rate">--</div><div class="detail" id="kpi-merge-detail"></div></div>
|
||||||
|
<div class="card"><div class="label">Median Time-to-Merge</div><div class="value" id="kpi-ttm">--</div><div class="detail" id="kpi-ttm-detail"></div></div>
|
||||||
|
<div class="card"><div class="label">Median Eval Rounds</div><div class="value" id="kpi-rounds">--</div><div class="detail" id="kpi-rounds-detail"></div></div>
|
||||||
|
<div class="card"><div class="label">Total Claims</div><div class="value blue" id="kpi-claims">--</div><div class="detail" id="kpi-claims-detail"></div></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Filters -->
|
||||||
|
<div class="filters">
|
||||||
|
<select id="filter-domain"><option value="">All Domains</option></select>
|
||||||
|
<select id="filter-agent"><option value="">All Agents</option></select>
|
||||||
|
<select id="filter-outcome">
|
||||||
|
<option value="">All Outcomes</option>
|
||||||
|
<option value="merged">Merged</option>
|
||||||
|
<option value="closed">Rejected</option>
|
||||||
|
<option value="open">Open</option>
|
||||||
|
</select>
|
||||||
|
<select id="filter-tier">
|
||||||
|
<option value="">All Tiers</option>
|
||||||
|
<option value="DEEP">Deep</option>
|
||||||
|
<option value="STANDARD">Standard</option>
|
||||||
|
<option value="LIGHT">Light</option>
|
||||||
|
</select>
|
||||||
|
<select id="filter-days">
|
||||||
|
<option value="7">Last 7 days</option>
|
||||||
|
<option value="30" selected>Last 30 days</option>
|
||||||
|
<option value="90">Last 90 days</option>
|
||||||
|
<option value="0">All time</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- PR table -->
|
||||||
|
<div class="card" style="padding: 0; overflow: hidden;">
|
||||||
|
<table class="pr-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th data-col="number">PR# <span class="sort-arrow">▲</span></th>
|
||||||
|
<th data-col="summary">Summary <span class="sort-arrow">▲</span></th>
|
||||||
|
<th data-col="agent">Agent <span class="sort-arrow">▲</span></th>
|
||||||
|
<th data-col="domain">Domain <span class="sort-arrow">▲</span></th>
|
||||||
|
<th data-col="status">Outcome <span class="sort-arrow">▲</span></th>
|
||||||
|
<th data-col="ttm_minutes">TTM <span class="sort-arrow">▲</span></th>
|
||||||
|
<th data-col="created_at">Date <span class="sort-arrow">▲</span></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="pr-tbody"></tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Pagination -->
|
||||||
|
<div class="pagination">
|
||||||
|
<button id="pg-prev" disabled>« Prev</button>
|
||||||
|
<span class="page-info" id="pg-info">--</span>
|
||||||
|
<button id="pg-next" disabled>Next »</button>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use single-quoted JS strings throughout to avoid Python/HTML escaping issues
|
||||||
|
scripts = """<script>
|
||||||
|
const PAGE_SIZE = 50;
|
||||||
|
const FORGEJO = 'https://git.livingip.xyz/teleo/teleo-codex/pulls/';
|
||||||
|
let allData = [];
|
||||||
|
let filtered = [];
|
||||||
|
let sortCol = 'number';
|
||||||
|
let sortAsc = false;
|
||||||
|
let page = 0;
|
||||||
|
let expandedPr = null;
|
||||||
|
|
||||||
|
function loadData() {
|
||||||
|
var days = document.getElementById('filter-days').value;
|
||||||
|
var url = '/api/pr-lifecycle' + (days !== '0' ? '?days=' + days : '?days=9999');
|
||||||
|
fetch(url).then(function(r) { return r.json(); }).then(function(data) {
|
||||||
|
allData = data.prs || [];
|
||||||
|
populateFilters(allData);
|
||||||
|
updateKPIs(data);
|
||||||
|
applyFilters();
|
||||||
|
}).catch(function() {
|
||||||
|
document.getElementById('pr-tbody').innerHTML =
|
||||||
|
'<tr><td colspan="7" style="text-align:center;color:#f85149;">Failed to load data</td></tr>';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function populateFilters(prs) {
|
||||||
|
var domains = [], agents = [], seenD = {}, seenA = {};
|
||||||
|
prs.forEach(function(p) {
|
||||||
|
if (p.domain && !seenD[p.domain]) { seenD[p.domain] = 1; domains.push(p.domain); }
|
||||||
|
if (p.agent && !seenA[p.agent]) { seenA[p.agent] = 1; agents.push(p.agent); }
|
||||||
|
});
|
||||||
|
domains.sort(); agents.sort();
|
||||||
|
var domSel = document.getElementById('filter-domain');
|
||||||
|
var agSel = document.getElementById('filter-agent');
|
||||||
|
var curDom = domSel.value, curAg = agSel.value;
|
||||||
|
domSel.innerHTML = '<option value="">All Domains</option>' +
|
||||||
|
domains.map(function(d) { return '<option value="' + esc(d) + '">' + esc(d) + '</option>'; }).join('');
|
||||||
|
agSel.innerHTML = '<option value="">All Agents</option>' +
|
||||||
|
agents.map(function(a) { return '<option value="' + esc(a) + '">' + esc(a) + '</option>'; }).join('');
|
||||||
|
domSel.value = curDom; agSel.value = curAg;
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateKPIs(data) {
|
||||||
|
document.getElementById('kpi-total').textContent = fmtNum(data.total);
|
||||||
|
document.getElementById('kpi-total-detail').textContent =
|
||||||
|
fmtNum(data.merged) + ' merged, ' + fmtNum(data.closed) + ' rejected';
|
||||||
|
|
||||||
|
var rate = data.total > 0 ? data.merged / (data.merged + data.closed) : 0;
|
||||||
|
document.getElementById('kpi-merge-rate').textContent = fmtPct(rate);
|
||||||
|
document.getElementById('kpi-merge-detail').textContent = fmtNum(data.open) + ' open';
|
||||||
|
|
||||||
|
document.getElementById('kpi-ttm').textContent =
|
||||||
|
data.median_ttm != null ? fmtDuration(data.median_ttm) : '--';
|
||||||
|
document.getElementById('kpi-ttm-detail').textContent =
|
||||||
|
data.p90_ttm != null ? 'p90: ' + fmtDuration(data.p90_ttm) : '';
|
||||||
|
|
||||||
|
document.getElementById('kpi-rounds').textContent =
|
||||||
|
data.median_rounds != null ? data.median_rounds.toFixed(1) : '--';
|
||||||
|
document.getElementById('kpi-rounds-detail').textContent =
|
||||||
|
data.max_rounds != null ? 'max: ' + data.max_rounds : '';
|
||||||
|
|
||||||
|
var totalClaims = 0, mergedClaims = 0;
|
||||||
|
(data.prs || []).forEach(function(p) {
|
||||||
|
totalClaims += (p.claims_count || 1);
|
||||||
|
if (p.status === 'merged') mergedClaims += (p.claims_count || 1);
|
||||||
|
});
|
||||||
|
document.getElementById('kpi-claims').textContent = fmtNum(totalClaims);
|
||||||
|
document.getElementById('kpi-claims-detail').textContent = fmtNum(mergedClaims) + ' merged';
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtDuration(mins) {
|
||||||
|
if (mins < 60) return mins.toFixed(0) + 'm';
|
||||||
|
if (mins < 1440) return (mins / 60).toFixed(1) + 'h';
|
||||||
|
return (mins / 1440).toFixed(1) + 'd';
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyFilters() {
|
||||||
|
var dom = document.getElementById('filter-domain').value;
|
||||||
|
var ag = document.getElementById('filter-agent').value;
|
||||||
|
var out = document.getElementById('filter-outcome').value;
|
||||||
|
var tier = document.getElementById('filter-tier').value;
|
||||||
|
|
||||||
|
filtered = allData.filter(function(p) {
|
||||||
|
if (dom && p.domain !== dom) return false;
|
||||||
|
if (ag && p.agent !== ag) return false;
|
||||||
|
if (out && p.status !== out) return false;
|
||||||
|
if (tier && p.tier !== tier) return false;
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
sortData();
|
||||||
|
page = 0;
|
||||||
|
renderTable();
|
||||||
|
}
|
||||||
|
|
||||||
|
function sortData() {
|
||||||
|
filtered.sort(function(a, b) {
|
||||||
|
var va = a[sortCol], vb = b[sortCol];
|
||||||
|
if (va == null) va = '';
|
||||||
|
if (vb == null) vb = '';
|
||||||
|
if (typeof va === 'number' && typeof vb === 'number') {
|
||||||
|
return sortAsc ? va - vb : vb - va;
|
||||||
|
}
|
||||||
|
va = String(va).toLowerCase();
|
||||||
|
vb = String(vb).toLowerCase();
|
||||||
|
return sortAsc ? va.localeCompare(vb) : vb.localeCompare(va);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function truncate(s, n) {
|
||||||
|
if (!s) return '';
|
||||||
|
return s.length > n ? s.substring(0, n) + '...' : s;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderTable() {
|
||||||
|
var tbody = document.getElementById('pr-tbody');
|
||||||
|
var start = page * PAGE_SIZE;
|
||||||
|
var slice = filtered.slice(start, start + PAGE_SIZE);
|
||||||
|
var totalPages = Math.ceil(filtered.length / PAGE_SIZE);
|
||||||
|
|
||||||
|
if (slice.length === 0) {
|
||||||
|
tbody.innerHTML = '<tr><td colspan="7" style="text-align:center;color:#8b949e;">No PRs match filters</td></tr>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var rows = [];
|
||||||
|
slice.forEach(function(p) {
|
||||||
|
var outClass = p.status === 'merged' ? 'outcome-merged' :
|
||||||
|
p.status === 'closed' ? 'outcome-closed' : 'outcome-open';
|
||||||
|
var tierClass = (p.tier || '').toLowerCase() === 'deep' ? 'tier-deep' :
|
||||||
|
(p.tier || '').toLowerCase() === 'standard' ? 'tier-standard' : 'tier-light';
|
||||||
|
var ttm = p.ttm_minutes != null ? fmtDuration(p.ttm_minutes) : '--';
|
||||||
|
var date = p.created_at ? p.created_at.substring(0, 10) : '--';
|
||||||
|
var agent = p.agent || '--';
|
||||||
|
|
||||||
|
// Summary: first claim title from description
|
||||||
|
var summary = '--';
|
||||||
|
if (p.summary) {
|
||||||
|
summary = p.summary;
|
||||||
|
} else if (p.description) {
|
||||||
|
var parts = p.description.split('|');
|
||||||
|
summary = truncate(parts[0].trim(), 80);
|
||||||
|
if (parts.length > 1) summary += ' (+' + (parts.length - 1) + ' more)';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Outcome label with eval rounds
|
||||||
|
var outcomeLabel = esc(p.status || '--');
|
||||||
|
if (p.eval_rounds > 1) {
|
||||||
|
outcomeLabel += ' <span style="color:#6e7681;font-size:11px;">(' + p.eval_rounds + ' evals)</span>';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Review snippet for closed/changes PRs
|
||||||
|
var reviewSnippet = '';
|
||||||
|
if (p.status === 'closed' && p.review_snippet) {
|
||||||
|
reviewSnippet = '<div class="review-snippet">' + esc(truncate(p.review_snippet, 120)) + '</div>';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tier badge inline with outcome
|
||||||
|
var tierBadge = p.tier ? ' <span class="' + tierClass + '" style="font-size:10px;">' + esc(p.tier) + '</span>' : '';
|
||||||
|
|
||||||
|
rows.push(
|
||||||
|
'<tr data-pr="' + p.number + '">' +
|
||||||
|
'<td><span class="expand-chevron">▶</span> ' +
|
||||||
|
'<a class="pr-link" href="' + FORGEJO + p.number + '" target="_blank" rel="noopener" onclick="event.stopPropagation();">#' + p.number + '</a></td>' +
|
||||||
|
'<td style="white-space:normal;"><span class="summary-text">' + esc(summary) + '</span>' + reviewSnippet + '</td>' +
|
||||||
|
'<td>' + esc(agent) + '</td>' +
|
||||||
|
'<td>' + esc(p.domain || '--') + '</td>' +
|
||||||
|
'<td class="' + outClass + '">' + outcomeLabel + tierBadge + '</td>' +
|
||||||
|
'<td>' + ttm + '</td>' +
|
||||||
|
'<td>' + date + '</td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'<tr id="trace-' + p.number + '" style="display:none;"><td colspan="7" style="padding:0;">' +
|
||||||
|
'<div class="trace-panel" id="panel-' + p.number + '">Loading trace...</div>' +
|
||||||
|
'</td></tr>'
|
||||||
|
);
|
||||||
|
});
|
||||||
|
tbody.innerHTML = rows.join('');
|
||||||
|
|
||||||
|
// Pagination
|
||||||
|
document.getElementById('pg-info').textContent =
|
||||||
|
'Page ' + (totalPages > 0 ? page + 1 : 0) + ' of ' + totalPages +
|
||||||
|
' (' + filtered.length + ' PRs)';
|
||||||
|
document.getElementById('pg-prev').disabled = page <= 0;
|
||||||
|
document.getElementById('pg-next').disabled = page >= totalPages - 1;
|
||||||
|
|
||||||
|
// Update sort arrows
|
||||||
|
document.querySelectorAll('.pr-table th').forEach(function(th) {
|
||||||
|
th.classList.toggle('sorted', th.dataset.col === sortCol);
|
||||||
|
var arrow = th.querySelector('.sort-arrow');
|
||||||
|
if (arrow) arrow.innerHTML = (th.dataset.col === sortCol && sortAsc) ? '▲' : '▼';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort click
|
||||||
|
document.querySelectorAll('.pr-table th').forEach(function(th) {
|
||||||
|
th.addEventListener('click', function() {
|
||||||
|
var col = th.dataset.col;
|
||||||
|
if (col === sortCol) { sortAsc = !sortAsc; }
|
||||||
|
else { sortCol = col; sortAsc = col === 'number' ? false : true; }
|
||||||
|
sortData();
|
||||||
|
renderTable();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Row click -> trace expand
|
||||||
|
document.getElementById('pr-tbody').addEventListener('click', function(e) {
|
||||||
|
// Don't expand if clicking a link
|
||||||
|
if (e.target.closest('a')) return;
|
||||||
|
var row = e.target.closest('tr[data-pr]');
|
||||||
|
if (!row) return;
|
||||||
|
var pr = row.dataset.pr;
|
||||||
|
var traceRow = document.getElementById('trace-' + pr);
|
||||||
|
var panel = document.getElementById('panel-' + pr);
|
||||||
|
if (!traceRow) return;
|
||||||
|
|
||||||
|
if (traceRow.style.display === 'none') {
|
||||||
|
if (expandedPr && expandedPr !== pr) {
|
||||||
|
var prev = document.getElementById('trace-' + expandedPr);
|
||||||
|
if (prev) prev.style.display = 'none';
|
||||||
|
var prevRow = document.querySelector('tr[data-pr="' + expandedPr + '"]');
|
||||||
|
if (prevRow) prevRow.classList.remove('expanded');
|
||||||
|
}
|
||||||
|
traceRow.style.display = '';
|
||||||
|
panel.classList.add('open');
|
||||||
|
row.classList.add('expanded');
|
||||||
|
expandedPr = pr;
|
||||||
|
loadTrace(pr, panel);
|
||||||
|
} else {
|
||||||
|
traceRow.style.display = 'none';
|
||||||
|
panel.classList.remove('open');
|
||||||
|
row.classList.remove('expanded');
|
||||||
|
expandedPr = null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function loadTrace(pr, panel) {
|
||||||
|
fetch('/api/trace/' + pr).then(function(r) { return r.json(); }).then(function(data) {
|
||||||
|
var html = '';
|
||||||
|
|
||||||
|
// PR metadata
|
||||||
|
if (data.pr) {
|
||||||
|
html += '<div class="stat-row" style="gap:16px;">';
|
||||||
|
html += '<div class="mini-stat">Source: <span>' + esc(data.pr.source_path || '--') + '</span></div>';
|
||||||
|
if (data.pr.agent) html += '<div class="mini-stat">Agent: <span>' + esc(data.pr.agent) + '</span></div>';
|
||||||
|
if (data.pr.tier) html += '<div class="mini-stat">Tier: <span>' + esc(data.pr.tier) + '</span></div>';
|
||||||
|
html += '<div class="mini-stat"><a class="pr-link" href="' + FORGEJO + pr + '" target="_blank">View on Forgejo</a></div>';
|
||||||
|
html += '</div>';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Eval chain models
|
||||||
|
var models = {};
|
||||||
|
if (data.timeline) {
|
||||||
|
data.timeline.forEach(function(ev) {
|
||||||
|
if (ev.detail) {
|
||||||
|
if (ev.detail.model) models[ev.stage + '.' + ev.event] = ev.detail.model;
|
||||||
|
if (ev.detail.domain_model) models['domain_review'] = ev.detail.domain_model;
|
||||||
|
if (ev.detail.leo_model) models['leo_review'] = ev.detail.leo_model;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (Object.keys(models).length > 0) {
|
||||||
|
html += '<div style="background:#161b22;border-radius:6px;padding:8px 12px;margin:4px 0 8px;font-size:12px;">';
|
||||||
|
html += '<strong style="color:#58a6ff;">Eval Chain:</strong> ';
|
||||||
|
var parts = [];
|
||||||
|
if (models['triage.haiku_triage']) parts.push('Triage: ' + models['triage.haiku_triage']);
|
||||||
|
if (models['domain_review']) parts.push('Domain: ' + models['domain_review']);
|
||||||
|
if (models['leo_review']) parts.push('Leo: ' + models['leo_review']);
|
||||||
|
html += parts.length > 0 ? parts.join(' → ') : '<span style="color:#484f58;">No model data</span>';
|
||||||
|
html += '</div>';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Timeline
|
||||||
|
if (data.timeline && data.timeline.length > 0) {
|
||||||
|
html += '<h4 style="color:#58a6ff;font-size:12px;margin:8px 0 4px;">Timeline</h4>';
|
||||||
|
html += '<ul class="trace-timeline">';
|
||||||
|
data.timeline.forEach(function(ev) {
|
||||||
|
var cls = ev.event === 'approved' ? 'ev-approved' :
|
||||||
|
(ev.event === 'domain_rejected' || ev.event === 'tier05_rejected') ? 'ev-rejected' :
|
||||||
|
ev.event === 'changes_requested' ? 'ev-changes' : '';
|
||||||
|
var ts = ev.timestamp ? ev.timestamp.substring(0, 19).replace('T', ' ') : '';
|
||||||
|
var detail = '';
|
||||||
|
if (ev.detail) {
|
||||||
|
if (ev.detail.tier) detail += ' tier=' + ev.detail.tier;
|
||||||
|
if (ev.detail.reason) detail += ' — ' + esc(ev.detail.reason);
|
||||||
|
if (ev.detail.model) detail += ' [' + esc(ev.detail.model) + ']';
|
||||||
|
if (ev.detail.review_text) {
|
||||||
|
detail += '<div class="review-text">' + esc(ev.detail.review_text).substring(0, 2000) + '</div>';
|
||||||
|
}
|
||||||
|
if (ev.detail.domain_review_text) {
|
||||||
|
detail += '<div class="review-text"><strong>Domain review:</strong><br>' + esc(ev.detail.domain_review_text).substring(0, 2000) + '</div>';
|
||||||
|
}
|
||||||
|
if (ev.detail.leo_review_text) {
|
||||||
|
detail += '<div class="review-text"><strong>Leo review:</strong><br>' + esc(ev.detail.leo_review_text).substring(0, 2000) + '</div>';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
html += '<li class="' + cls + '">' +
|
||||||
|
'<span class="ts">' + ts + '</span> ' +
|
||||||
|
'<span class="ev">' + esc(ev.stage + '.' + ev.event) + '</span>' +
|
||||||
|
detail + '</li>';
|
||||||
|
});
|
||||||
|
html += '</ul>';
|
||||||
|
} else {
|
||||||
|
html += '<div style="color:#484f58;font-size:12px;">No timeline events</div>';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reviews
|
||||||
|
if (data.reviews && data.reviews.length > 0) {
|
||||||
|
html += '<h4 style="color:#58a6ff;font-size:12px;margin:8px 0 4px;">Reviews</h4>';
|
||||||
|
data.reviews.forEach(function(r) {
|
||||||
|
var cls = r.outcome === 'approved' ? 'badge-green' :
|
||||||
|
r.outcome === 'rejected' ? 'badge-red' : 'badge-yellow';
|
||||||
|
html += '<div style="margin:4px 0;">' +
|
||||||
|
'<span class="badge ' + cls + '">' + esc(r.outcome) + '</span> ' +
|
||||||
|
'<span style="color:#8b949e;font-size:11px;">' + esc(r.reviewer || '') + ' ' +
|
||||||
|
(r.model ? '[' + esc(r.model) + ']' : '') + ' ' +
|
||||||
|
(r.reviewed_at || '').substring(0, 19) + '</span>';
|
||||||
|
if (r.rejection_reason) {
|
||||||
|
html += ' <code>' + esc(r.rejection_reason) + '</code>';
|
||||||
|
}
|
||||||
|
if (r.notes) {
|
||||||
|
html += '<div class="review-text">' + esc(r.notes) + '</div>';
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
panel.innerHTML = html || '<div style="color:#484f58;font-size:12px;">No trace data</div>';
|
||||||
|
}).catch(function() {
|
||||||
|
panel.innerHTML = '<div style="color:#f85149;font-size:12px;">Failed to load trace</div>';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter listeners
|
||||||
|
['filter-domain', 'filter-agent', 'filter-outcome', 'filter-tier'].forEach(function(id) {
|
||||||
|
document.getElementById(id).addEventListener('change', applyFilters);
|
||||||
|
});
|
||||||
|
document.getElementById('filter-days').addEventListener('change', loadData);
|
||||||
|
|
||||||
|
// Pagination
|
||||||
|
document.getElementById('pg-prev').addEventListener('click', function() { page--; renderTable(); });
|
||||||
|
document.getElementById('pg-next').addEventListener('click', function() { page++; renderTable(); });
|
||||||
|
|
||||||
|
// Init
|
||||||
|
loadData();
|
||||||
|
</script>"""
|
||||||
|
|
||||||
|
return render_page(
|
||||||
|
title="PR Lifecycle",
|
||||||
|
subtitle="Every PR through the pipeline — triage to merge",
|
||||||
|
active_path="/prs",
|
||||||
|
body_html=body,
|
||||||
|
scripts=scripts,
|
||||||
|
extra_css=EXTRA_CSS,
|
||||||
|
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||||
|
)
|
||||||
929
ops/diagnostics/dashboard_routes.py
Normal file
929
ops/diagnostics/dashboard_routes.py
Normal file
|
|
@ -0,0 +1,929 @@
|
||||||
|
"""New API endpoints for the 4-page dashboard.
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
GET /api/stage-times — median dwell time per pipeline stage
|
||||||
|
GET /api/herfindahl — domain concentration index
|
||||||
|
GET /api/agent-state — live agent-state from filesystem
|
||||||
|
GET /api/extraction-yield-by-domain — sources→claims conversion per domain
|
||||||
|
GET /api/agents-dashboard — batched agent performance payload
|
||||||
|
|
||||||
|
Owner: Argus
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import statistics
|
||||||
|
import time
|
||||||
|
import urllib.request
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from aiohttp import web
|
||||||
|
|
||||||
|
logger = logging.getLogger("argus.dashboard_routes")
|
||||||
|
|
||||||
|
# ─── Claim-index cache (60s TTL) ───────────────────────────────────────────
|
||||||
|
|
||||||
|
_claim_index_cache: dict | None = None
|
||||||
|
_claim_index_ts: float = 0
|
||||||
|
CLAIM_INDEX_TTL = 60 # seconds
|
||||||
|
|
||||||
|
CLAIM_INDEX_URL = os.environ.get("CLAIM_INDEX_URL", "http://localhost:8080/claim-index")
|
||||||
|
AGENT_STATE_DIR = Path(os.environ.get("AGENT_STATE_DIR", "/opt/teleo-eval/agent-state"))
|
||||||
|
|
||||||
|
|
||||||
|
def get_claim_index() -> dict | None:
|
||||||
|
"""Fetch claim-index with 60s cache."""
|
||||||
|
global _claim_index_cache, _claim_index_ts
|
||||||
|
now = time.monotonic()
|
||||||
|
if _claim_index_cache is not None and (now - _claim_index_ts) < CLAIM_INDEX_TTL:
|
||||||
|
return _claim_index_cache
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(CLAIM_INDEX_URL, timeout=5) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
_claim_index_cache = data
|
||||||
|
_claim_index_ts = now
|
||||||
|
return data
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to fetch claim-index: %s", e)
|
||||||
|
# Return stale cache if available
|
||||||
|
return _claim_index_cache
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/stage-times ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_stage_times(request):
|
||||||
|
"""Median dwell time per pipeline stage from audit_log timestamps.
|
||||||
|
|
||||||
|
Stages: discover → validate → evaluate → merge
|
||||||
|
Returns median minutes between consecutive stages.
|
||||||
|
"""
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
hours = int(request.query.get("hours", "24"))
|
||||||
|
|
||||||
|
# Get per-PR event timestamps
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT json_extract(detail, '$.pr') as pr, event, timestamp
|
||||||
|
FROM audit_log
|
||||||
|
WHERE timestamp > datetime('now', ? || ' hours')
|
||||||
|
AND json_extract(detail, '$.pr') IS NOT NULL
|
||||||
|
ORDER BY json_extract(detail, '$.pr'), timestamp""",
|
||||||
|
(f"-{hours}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Group by PR
|
||||||
|
pr_events: dict[int, list] = {}
|
||||||
|
for r in rows:
|
||||||
|
pr = r["pr"]
|
||||||
|
if pr not in pr_events:
|
||||||
|
pr_events[pr] = []
|
||||||
|
pr_events[pr].append({"event": r["event"], "ts": r["timestamp"]})
|
||||||
|
|
||||||
|
# Compute stage dwell times
|
||||||
|
stage_pairs = [
|
||||||
|
("pr_discovered", "tier0_complete", "Ingest → Validate"),
|
||||||
|
("tier0_complete", "approved", "Validate → Approve"),
|
||||||
|
("tier0_complete", "domain_rejected", "Validate → Reject"),
|
||||||
|
("approved", "merged", "Approve → Merge"),
|
||||||
|
]
|
||||||
|
|
||||||
|
stage_times = {}
|
||||||
|
for start_event, end_event, label in stage_pairs:
|
||||||
|
durations = []
|
||||||
|
for pr, events in pr_events.items():
|
||||||
|
start_ts = None
|
||||||
|
end_ts = None
|
||||||
|
for e in events:
|
||||||
|
if e["event"] == start_event and start_ts is None:
|
||||||
|
start_ts = e["ts"]
|
||||||
|
if e["event"] == end_event and end_ts is None:
|
||||||
|
end_ts = e["ts"]
|
||||||
|
if start_ts and end_ts:
|
||||||
|
try:
|
||||||
|
s = datetime.fromisoformat(start_ts)
|
||||||
|
e = datetime.fromisoformat(end_ts)
|
||||||
|
mins = (e - s).total_seconds() / 60
|
||||||
|
if mins >= 0:
|
||||||
|
durations.append(mins)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
if durations:
|
||||||
|
stage_times[label] = {
|
||||||
|
"median_minutes": round(statistics.median(durations), 1),
|
||||||
|
"p90_minutes": round(sorted(durations)[int(len(durations) * 0.9)], 1) if len(durations) >= 5 else None,
|
||||||
|
"count": len(durations),
|
||||||
|
}
|
||||||
|
|
||||||
|
return web.json_response({"hours": hours, "stages": stage_times})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/herfindahl ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_herfindahl(request):
|
||||||
|
"""Domain concentration index (Herfindahl-Hirschman).
|
||||||
|
|
||||||
|
HHI = sum of (domain_share^2). 1.0 = single domain, lower = more diverse.
|
||||||
|
"""
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = int(request.query.get("days", "30"))
|
||||||
|
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT domain, COUNT(*) as cnt
|
||||||
|
FROM prs WHERE status='merged' AND domain IS NOT NULL
|
||||||
|
AND merged_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY domain""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return web.json_response({"hhi": 0, "domains": [], "days": days})
|
||||||
|
|
||||||
|
total = sum(r["cnt"] for r in rows)
|
||||||
|
domains = []
|
||||||
|
hhi = 0
|
||||||
|
for r in rows:
|
||||||
|
share = r["cnt"] / total
|
||||||
|
hhi += share ** 2
|
||||||
|
domains.append({
|
||||||
|
"domain": r["domain"],
|
||||||
|
"count": r["cnt"],
|
||||||
|
"share": round(share, 4),
|
||||||
|
})
|
||||||
|
|
||||||
|
domains.sort(key=lambda x: x["count"], reverse=True)
|
||||||
|
|
||||||
|
# Interpret: HHI < 0.15 = diverse, 0.15-0.25 = moderate, >0.25 = concentrated
|
||||||
|
status = "diverse" if hhi < 0.15 else ("moderate" if hhi < 0.25 else "concentrated")
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"hhi": round(hhi, 4),
|
||||||
|
"status": status,
|
||||||
|
"domains": domains,
|
||||||
|
"total_merged": total,
|
||||||
|
"days": days,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/agent-state ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_agent_state(request):
|
||||||
|
"""Read live agent-state from filesystem. 6 agents, ~1KB each."""
|
||||||
|
if not AGENT_STATE_DIR.exists():
|
||||||
|
return web.json_response({"error": "agent-state directory not found", "path": str(AGENT_STATE_DIR)}, status=404)
|
||||||
|
|
||||||
|
agents = {}
|
||||||
|
for agent_dir in sorted(AGENT_STATE_DIR.iterdir()):
|
||||||
|
if not agent_dir.is_dir():
|
||||||
|
continue
|
||||||
|
name = agent_dir.name
|
||||||
|
state = {"name": name}
|
||||||
|
|
||||||
|
# metrics.json
|
||||||
|
metrics_file = agent_dir / "metrics.json"
|
||||||
|
if metrics_file.exists():
|
||||||
|
try:
|
||||||
|
m = json.loads(metrics_file.read_text())
|
||||||
|
state["last_active"] = m.get("updated_at")
|
||||||
|
state["metrics"] = m
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
state["metrics_error"] = True
|
||||||
|
|
||||||
|
# tasks.json
|
||||||
|
tasks_file = agent_dir / "tasks.json"
|
||||||
|
if tasks_file.exists():
|
||||||
|
try:
|
||||||
|
t = json.loads(tasks_file.read_text())
|
||||||
|
state["tasks"] = t if isinstance(t, list) else []
|
||||||
|
state["task_count"] = len(state["tasks"])
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
state["tasks"] = []
|
||||||
|
|
||||||
|
# session.json
|
||||||
|
session_file = agent_dir / "session.json"
|
||||||
|
if session_file.exists():
|
||||||
|
try:
|
||||||
|
s = json.loads(session_file.read_text())
|
||||||
|
state["session"] = s
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# inbox depth
|
||||||
|
inbox_dir = agent_dir / "inbox"
|
||||||
|
if inbox_dir.exists() and inbox_dir.is_dir():
|
||||||
|
state["inbox_depth"] = len(list(inbox_dir.iterdir()))
|
||||||
|
else:
|
||||||
|
state["inbox_depth"] = 0
|
||||||
|
|
||||||
|
agents[name] = state
|
||||||
|
|
||||||
|
return web.json_response({"agents": agents, "agent_count": len(agents)})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/extraction-yield-by-domain ──────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_extraction_yield_by_domain(request):
|
||||||
|
"""Sources → claims conversion rate per domain."""
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = int(request.query.get("days", "30"))
|
||||||
|
|
||||||
|
# Sources per domain (approximate from PR source_path domain)
|
||||||
|
source_counts = conn.execute(
|
||||||
|
"""SELECT domain, COUNT(DISTINCT source_url) as sources
|
||||||
|
FROM sources s
|
||||||
|
JOIN prs p ON p.source_path LIKE '%' || s.url || '%'
|
||||||
|
WHERE s.created_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY domain""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Fallback: simpler query if the join doesn't work well
|
||||||
|
merged_by_domain = conn.execute(
|
||||||
|
"""SELECT domain, COUNT(*) as merged
|
||||||
|
FROM prs WHERE status='merged' AND domain IS NOT NULL
|
||||||
|
AND merged_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY domain""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
sources_by_domain = conn.execute(
|
||||||
|
"""SELECT domain, COUNT(*) as total_prs,
|
||||||
|
SUM(CASE WHEN status='merged' THEN 1 ELSE 0 END) as merged
|
||||||
|
FROM prs WHERE domain IS NOT NULL
|
||||||
|
AND created_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY domain""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
domains = []
|
||||||
|
for r in sources_by_domain:
|
||||||
|
total = r["total_prs"] or 0
|
||||||
|
merged = r["merged"] or 0
|
||||||
|
domains.append({
|
||||||
|
"domain": r["domain"],
|
||||||
|
"total_prs": total,
|
||||||
|
"merged": merged,
|
||||||
|
"yield": round(merged / total, 3) if total else 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
domains.sort(key=lambda x: x["merged"], reverse=True)
|
||||||
|
return web.json_response({"days": days, "domains": domains})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/agents-dashboard ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_agents_dashboard(request):
|
||||||
|
"""Batched agent performance payload for Page 3.
|
||||||
|
|
||||||
|
Returns per-agent: merged count, rejection rate, yield, CI score,
|
||||||
|
top rejection reasons, contribution trend (weekly).
|
||||||
|
All in one response to avoid N client-side fetches.
|
||||||
|
"""
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = int(request.query.get("days", "30"))
|
||||||
|
|
||||||
|
# Per-agent merged + rejected counts
|
||||||
|
agent_stats = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) as agent,
|
||||||
|
COUNT(*) as evaluated,
|
||||||
|
SUM(CASE WHEN event='approved' THEN 1 ELSE 0 END) as approved,
|
||||||
|
SUM(CASE WHEN event IN ('changes_requested','domain_rejected','tier05_rejected') THEN 1 ELSE 0 END) as rejected
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', ? || ' days')
|
||||||
|
AND COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) IS NOT NULL
|
||||||
|
GROUP BY agent""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
agents = {}
|
||||||
|
for r in agent_stats:
|
||||||
|
name = r["agent"]
|
||||||
|
ev = r["evaluated"] or 0
|
||||||
|
ap = r["approved"] or 0
|
||||||
|
rj = r["rejected"] or 0
|
||||||
|
agents[name] = {
|
||||||
|
"evaluated": ev,
|
||||||
|
"approved": ap,
|
||||||
|
"rejected": rj,
|
||||||
|
"yield": round(ap / ev, 3) if ev else 0,
|
||||||
|
"rejection_rate": round(rj / ev, 3) if ev else 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Per-agent top rejection reasons from prs.eval_issues (Epimetheus correction 2026-04-02)
|
||||||
|
tag_rows = conn.execute(
|
||||||
|
"""SELECT agent, value as tag, COUNT(*) as cnt
|
||||||
|
FROM prs, json_each(prs.eval_issues)
|
||||||
|
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
|
||||||
|
AND agent IS NOT NULL
|
||||||
|
AND created_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY agent, tag
|
||||||
|
ORDER BY agent, cnt DESC""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for r in tag_rows:
|
||||||
|
name = r["agent"]
|
||||||
|
if name in agents:
|
||||||
|
if "top_rejections" not in agents[name]:
|
||||||
|
agents[name]["top_rejections"] = []
|
||||||
|
if len(agents[name]["top_rejections"]) < 5:
|
||||||
|
agents[name]["top_rejections"].append({"tag": r["tag"], "count": r["cnt"]})
|
||||||
|
|
||||||
|
# Weekly contribution trend per agent
|
||||||
|
weekly = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) as agent,
|
||||||
|
strftime('%Y-W%W', timestamp) as week,
|
||||||
|
SUM(CASE WHEN event='approved' THEN 1 ELSE 0 END) as merged,
|
||||||
|
COUNT(*) as evaluated
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage='evaluate'
|
||||||
|
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', ? || ' days')
|
||||||
|
AND COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent')) IS NOT NULL
|
||||||
|
GROUP BY agent, week
|
||||||
|
ORDER BY agent, week""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for r in weekly:
|
||||||
|
name = r["agent"]
|
||||||
|
if name in agents:
|
||||||
|
if "weekly_trend" not in agents[name]:
|
||||||
|
agents[name]["weekly_trend"] = []
|
||||||
|
agents[name]["weekly_trend"].append({
|
||||||
|
"week": r["week"],
|
||||||
|
"merged": r["merged"] or 0,
|
||||||
|
"evaluated": r["evaluated"] or 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
# CI scores from contributors table
|
||||||
|
weights = {"sourcer": 0.15, "extractor": 0.05, "challenger": 0.35, "synthesizer": 0.25, "reviewer": 0.20}
|
||||||
|
try:
|
||||||
|
contribs = conn.execute(
|
||||||
|
"SELECT handle, sourcer_count, extractor_count, challenger_count, "
|
||||||
|
"synthesizer_count, reviewer_count, claims_merged, tier FROM contributors"
|
||||||
|
).fetchall()
|
||||||
|
for c in contribs:
|
||||||
|
name = c["handle"]
|
||||||
|
if name not in agents:
|
||||||
|
agents[name] = {}
|
||||||
|
ci = sum((c[f"{role}_count"] or 0) * w for role, w in weights.items())
|
||||||
|
agents[name]["ci_score"] = round(ci, 2)
|
||||||
|
agents[name]["claims_merged"] = c["claims_merged"] or 0
|
||||||
|
agents[name]["tier"] = c["tier"]
|
||||||
|
except sqlite3.Error:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return web.json_response({"days": days, "agents": agents})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/cascade-coverage ────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_cascade_coverage(request):
|
||||||
|
"""Cascade coverage from audit_log stage='cascade' events.
|
||||||
|
|
||||||
|
Returns: triggered count, by-agent breakdown, claims affected.
|
||||||
|
"""
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = int(request.query.get("days", "30"))
|
||||||
|
|
||||||
|
triggered = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
json_extract(detail, '$.agent') as agent,
|
||||||
|
COUNT(*) as cnt,
|
||||||
|
SUM(json_array_length(json_extract(detail, '$.source_claims'))) as claims_affected
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage='cascade' AND event='cascade_triggered'
|
||||||
|
AND timestamp > datetime('now', ? || ' days')
|
||||||
|
GROUP BY agent""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
summaries = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
SUM(json_extract(detail, '$.notifications_sent')) as total_notifications,
|
||||||
|
COUNT(*) as total_merges_with_cascade
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage='cascade' AND event='cascade_summary'
|
||||||
|
AND timestamp > datetime('now', ? || ' days')""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
reviewed = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as cnt
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage='cascade' AND event='cascade_reviewed'
|
||||||
|
AND timestamp > datetime('now', ? || ' days')""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
total_triggered = sum(r["cnt"] for r in triggered)
|
||||||
|
total_reviewed = reviewed["cnt"] if reviewed else 0
|
||||||
|
completion_rate = round(total_reviewed / total_triggered, 3) if total_triggered else None
|
||||||
|
|
||||||
|
by_agent = [
|
||||||
|
{"agent": r["agent"], "triggered": r["cnt"], "claims_affected": r["claims_affected"] or 0}
|
||||||
|
for r in triggered
|
||||||
|
]
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"days": days,
|
||||||
|
"total_triggered": total_triggered,
|
||||||
|
"total_reviewed": total_reviewed,
|
||||||
|
"completion_rate": completion_rate,
|
||||||
|
"total_notifications": summaries["total_notifications"] if summaries else 0,
|
||||||
|
"merges_with_cascade": summaries["total_merges_with_cascade"] if summaries else 0,
|
||||||
|
"by_agent": by_agent,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/review-summary ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_review_summary(request):
|
||||||
|
"""Structured review data from review_records table (migration v12).
|
||||||
|
|
||||||
|
Cleaner than audit_log parsing — structured outcome, rejection_reason,
|
||||||
|
disagreement_type columns.
|
||||||
|
"""
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = int(request.query.get("days", "30"))
|
||||||
|
|
||||||
|
# Check if table exists and has data
|
||||||
|
try:
|
||||||
|
total = conn.execute(
|
||||||
|
"SELECT COUNT(*) as cnt FROM review_records WHERE reviewed_at > datetime('now', ? || ' days')",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
except Exception:
|
||||||
|
return web.json_response({"error": "review_records table not available", "populated": False})
|
||||||
|
|
||||||
|
if total == 0:
|
||||||
|
return web.json_response({"populated": False, "total": 0, "days": days})
|
||||||
|
|
||||||
|
# Outcome breakdown
|
||||||
|
outcomes = conn.execute(
|
||||||
|
"""SELECT outcome, COUNT(*) as cnt
|
||||||
|
FROM review_records
|
||||||
|
WHERE reviewed_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY outcome""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Rejection reasons
|
||||||
|
reasons = conn.execute(
|
||||||
|
"""SELECT rejection_reason, COUNT(*) as cnt
|
||||||
|
FROM review_records
|
||||||
|
WHERE rejection_reason IS NOT NULL
|
||||||
|
AND reviewed_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY rejection_reason ORDER BY cnt DESC""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Disagreement types
|
||||||
|
disagreements = conn.execute(
|
||||||
|
"""SELECT disagreement_type, COUNT(*) as cnt
|
||||||
|
FROM review_records
|
||||||
|
WHERE disagreement_type IS NOT NULL
|
||||||
|
AND reviewed_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY disagreement_type ORDER BY cnt DESC""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Per-reviewer breakdown
|
||||||
|
reviewers = conn.execute(
|
||||||
|
"""SELECT reviewer,
|
||||||
|
SUM(CASE WHEN outcome='approved' THEN 1 ELSE 0 END) as approved,
|
||||||
|
SUM(CASE WHEN outcome='approved-with-changes' THEN 1 ELSE 0 END) as approved_with_changes,
|
||||||
|
SUM(CASE WHEN outcome='rejected' THEN 1 ELSE 0 END) as rejected,
|
||||||
|
COUNT(*) as total
|
||||||
|
FROM review_records
|
||||||
|
WHERE reviewed_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY reviewer ORDER BY total DESC""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Per-domain breakdown
|
||||||
|
domains = conn.execute(
|
||||||
|
"""SELECT domain,
|
||||||
|
SUM(CASE WHEN outcome='rejected' THEN 1 ELSE 0 END) as rejected,
|
||||||
|
COUNT(*) as total
|
||||||
|
FROM review_records
|
||||||
|
WHERE domain IS NOT NULL
|
||||||
|
AND reviewed_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY domain ORDER BY total DESC""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"populated": True,
|
||||||
|
"days": days,
|
||||||
|
"total": total,
|
||||||
|
"outcomes": {r["outcome"]: r["cnt"] for r in outcomes},
|
||||||
|
"rejection_reasons": [{"reason": r["rejection_reason"], "count": r["cnt"]} for r in reasons],
|
||||||
|
"disagreement_types": [{"type": r["disagreement_type"], "count": r["cnt"]} for r in disagreements],
|
||||||
|
"reviewers": [
|
||||||
|
{"reviewer": r["reviewer"], "approved": r["approved"], "approved_with_changes": r["approved_with_changes"],
|
||||||
|
"rejected": r["rejected"], "total": r["total"]}
|
||||||
|
for r in reviewers
|
||||||
|
],
|
||||||
|
"domains": [
|
||||||
|
{"domain": r["domain"], "rejected": r["rejected"], "total": r["total"],
|
||||||
|
"rejection_rate": round(r["rejected"] / r["total"], 3) if r["total"] else 0}
|
||||||
|
for r in domains
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Trace endpoint ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_trace(request: web.Request) -> web.Response:
|
||||||
|
"""Return the full lifecycle of a source/PR through the pipeline.
|
||||||
|
|
||||||
|
GET /api/trace/1234 → all audit_log + review_records + costs for PR 1234.
|
||||||
|
One thread, every stage, chronological.
|
||||||
|
"""
|
||||||
|
trace_id = request.match_info["trace_id"]
|
||||||
|
get_conn = request.app["_get_conn"]
|
||||||
|
conn = get_conn()
|
||||||
|
|
||||||
|
# Audit log events (the backbone)
|
||||||
|
# Try trace_id first, fall back to PR number in detail JSON
|
||||||
|
events = conn.execute(
|
||||||
|
"""SELECT timestamp, stage, event, detail
|
||||||
|
FROM audit_log
|
||||||
|
WHERE trace_id = ?
|
||||||
|
ORDER BY timestamp""",
|
||||||
|
(trace_id,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not events:
|
||||||
|
# Fallback: match by PR number in detail JSON (for rows without trace_id)
|
||||||
|
events = conn.execute(
|
||||||
|
"""SELECT timestamp, stage, event, detail
|
||||||
|
FROM audit_log
|
||||||
|
WHERE CAST(json_extract(detail, '$.pr') AS TEXT) = ?
|
||||||
|
ORDER BY timestamp""",
|
||||||
|
(trace_id,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Review records for this PR
|
||||||
|
reviews = conn.execute(
|
||||||
|
"""SELECT reviewed_at, reviewer, reviewer_model, outcome,
|
||||||
|
rejection_reason, disagreement_type, notes, claim_path
|
||||||
|
FROM review_records
|
||||||
|
WHERE pr_number = ?
|
||||||
|
ORDER BY reviewed_at""",
|
||||||
|
(trace_id,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# PR metadata
|
||||||
|
pr = conn.execute(
|
||||||
|
"""SELECT number, source_path, domain, agent, tier, status,
|
||||||
|
origin, created_at, merged_at
|
||||||
|
FROM prs
|
||||||
|
WHERE number = ?""",
|
||||||
|
(trace_id,),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"trace_id": trace_id,
|
||||||
|
"pr": dict(pr) if pr else None,
|
||||||
|
"timeline": [
|
||||||
|
{"timestamp": r[0], "stage": r[1], "event": r[2],
|
||||||
|
"detail": json.loads(r[3]) if r[3] else None}
|
||||||
|
for r in events
|
||||||
|
],
|
||||||
|
"reviews": [
|
||||||
|
{"reviewed_at": r[0], "reviewer": r[1], "model": r[2],
|
||||||
|
"outcome": r[3], "rejection_reason": r[4],
|
||||||
|
"disagreement_type": r[5], "notes": r[6], "claim_path": r[7]}
|
||||||
|
for r in reviews
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
return web.json_response(result)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/growth ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_growth(request):
|
||||||
|
"""Cumulative growth of sources, PRs, and merged claims over time.
|
||||||
|
|
||||||
|
Returns daily data points with running totals for each series.
|
||||||
|
"""
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = int(request.query.get("days", "90"))
|
||||||
|
|
||||||
|
# Daily new sources
|
||||||
|
source_rows = conn.execute(
|
||||||
|
"""SELECT date(created_at) as day, COUNT(*) as cnt
|
||||||
|
FROM sources
|
||||||
|
WHERE created_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY day ORDER BY day""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Daily new PRs
|
||||||
|
pr_rows = conn.execute(
|
||||||
|
"""SELECT date(created_at) as day, COUNT(*) as cnt
|
||||||
|
FROM prs
|
||||||
|
WHERE created_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY day ORDER BY day""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Daily merged PRs
|
||||||
|
merged_rows = conn.execute(
|
||||||
|
"""SELECT date(merged_at) as day, COUNT(*) as cnt
|
||||||
|
FROM prs
|
||||||
|
WHERE status = 'merged' AND merged_at IS NOT NULL
|
||||||
|
AND merged_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY day ORDER BY day""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Get totals BEFORE the window for correct cumulative baseline
|
||||||
|
source_base = conn.execute(
|
||||||
|
"SELECT COUNT(*) as cnt FROM sources WHERE created_at <= datetime('now', ? || ' days')",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
|
||||||
|
pr_base = conn.execute(
|
||||||
|
"SELECT COUNT(*) as cnt FROM prs WHERE created_at <= datetime('now', ? || ' days')",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
|
||||||
|
merged_base = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as cnt FROM prs
|
||||||
|
WHERE status = 'merged' AND merged_at IS NOT NULL
|
||||||
|
AND merged_at <= datetime('now', ? || ' days')""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
|
||||||
|
# Collect all unique dates
|
||||||
|
all_dates = sorted(set(
|
||||||
|
[r["day"] for r in source_rows] +
|
||||||
|
[r["day"] for r in pr_rows] +
|
||||||
|
[r["day"] for r in merged_rows]
|
||||||
|
))
|
||||||
|
|
||||||
|
# Build lookup dicts
|
||||||
|
src_by_day = {r["day"]: r["cnt"] for r in source_rows}
|
||||||
|
pr_by_day = {r["day"]: r["cnt"] for r in pr_rows}
|
||||||
|
mrg_by_day = {r["day"]: r["cnt"] for r in merged_rows}
|
||||||
|
|
||||||
|
# Build cumulative arrays
|
||||||
|
dates = []
|
||||||
|
sources_cum = []
|
||||||
|
prs_cum = []
|
||||||
|
merged_cum = []
|
||||||
|
|
||||||
|
s_total = source_base
|
||||||
|
p_total = pr_base
|
||||||
|
m_total = merged_base
|
||||||
|
|
||||||
|
for day in all_dates:
|
||||||
|
s_total += src_by_day.get(day, 0)
|
||||||
|
p_total += pr_by_day.get(day, 0)
|
||||||
|
m_total += mrg_by_day.get(day, 0)
|
||||||
|
dates.append(day)
|
||||||
|
sources_cum.append(s_total)
|
||||||
|
prs_cum.append(p_total)
|
||||||
|
merged_cum.append(m_total)
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"days": days,
|
||||||
|
"dates": dates,
|
||||||
|
"sources": sources_cum,
|
||||||
|
"prs": prs_cum,
|
||||||
|
"merged": merged_cum,
|
||||||
|
"current": {
|
||||||
|
"sources": s_total,
|
||||||
|
"prs": p_total,
|
||||||
|
"merged": m_total,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
_DATE_PREFIX_RE = re.compile(r"^\d{4}-\d{2}-\d{2}-?")
|
||||||
|
|
||||||
|
# ─── GET /api/pr-lifecycle ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_pr_lifecycle(request):
|
||||||
|
"""All PRs with eval rounds, reviews, and time-to-merge in one payload.
|
||||||
|
|
||||||
|
Returns: summary KPIs + per-PR array for the table.
|
||||||
|
Joins prs + audit_log (eval rounds) + review_records.
|
||||||
|
"""
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = int(request.query.get("days", "30"))
|
||||||
|
|
||||||
|
day_clause = "AND p.created_at > datetime('now', ? || ' days')" if days < 9999 else ""
|
||||||
|
params = (f"-{days}",) if days < 9999 else ()
|
||||||
|
|
||||||
|
# Base PR data
|
||||||
|
pr_rows = conn.execute(
|
||||||
|
f"""SELECT p.number, p.agent, p.domain, p.tier, p.status,
|
||||||
|
p.created_at, p.merged_at, p.leo_verdict, p.description,
|
||||||
|
p.domain_agent, p.domain_model, p.branch
|
||||||
|
FROM prs p
|
||||||
|
WHERE 1=1 {day_clause}
|
||||||
|
ORDER BY p.number DESC""",
|
||||||
|
params,
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Eval round counts per PR (from audit_log)
|
||||||
|
eval_rows = conn.execute(
|
||||||
|
f"""SELECT CAST(json_extract(detail, '$.pr') AS INTEGER) as pr,
|
||||||
|
COUNT(*) as rounds
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage = 'evaluate'
|
||||||
|
AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
|
||||||
|
AND json_extract(detail, '$.pr') IS NOT NULL
|
||||||
|
GROUP BY pr""",
|
||||||
|
).fetchall()
|
||||||
|
eval_map = {r["pr"]: r["rounds"] for r in eval_rows}
|
||||||
|
|
||||||
|
# Review outcomes per PR (from review_records)
|
||||||
|
review_rows = conn.execute(
|
||||||
|
"""SELECT pr_number, outcome,
|
||||||
|
GROUP_CONCAT(DISTINCT reviewer) as reviewers,
|
||||||
|
COUNT(*) as review_count
|
||||||
|
FROM review_records
|
||||||
|
GROUP BY pr_number, outcome""",
|
||||||
|
).fetchall()
|
||||||
|
review_map = {}
|
||||||
|
for r in review_rows:
|
||||||
|
pr = r["pr_number"]
|
||||||
|
if pr not in review_map:
|
||||||
|
review_map[pr] = {"outcomes": [], "reviewers": set(), "count": 0}
|
||||||
|
review_map[pr]["outcomes"].append(r["outcome"])
|
||||||
|
if r["reviewers"]:
|
||||||
|
review_map[pr]["reviewers"].update(r["reviewers"].split(","))
|
||||||
|
review_map[pr]["count"] += r["review_count"]
|
||||||
|
|
||||||
|
# Review snippets for closed PRs — from review_text or issues list
|
||||||
|
snippet_rows = conn.execute(
|
||||||
|
"""SELECT CAST(json_extract(detail, '$.pr') AS INTEGER) as pr,
|
||||||
|
COALESCE(
|
||||||
|
json_extract(detail, '$.review_text'),
|
||||||
|
json_extract(detail, '$.domain_review_text'),
|
||||||
|
json_extract(detail, '$.leo_review_text')
|
||||||
|
) as review_text,
|
||||||
|
json_extract(detail, '$.issues') as issues,
|
||||||
|
json_extract(detail, '$.leo') as leo_verdict
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage = 'evaluate'
|
||||||
|
AND event IN ('domain_rejected', 'changes_requested')
|
||||||
|
AND json_extract(detail, '$.pr') IS NOT NULL
|
||||||
|
ORDER BY timestamp DESC""",
|
||||||
|
).fetchall()
|
||||||
|
snippet_map = {}
|
||||||
|
for r in snippet_rows:
|
||||||
|
pr = r["pr"]
|
||||||
|
if pr not in snippet_map:
|
||||||
|
if r["review_text"]:
|
||||||
|
text = r["review_text"].strip()
|
||||||
|
lines = [ln.strip() for ln in text.split("\n") if ln.strip() and not ln.strip().startswith("#")]
|
||||||
|
snippet_map[pr] = lines[0][:200] if lines else text[:200]
|
||||||
|
elif r["issues"]:
|
||||||
|
try:
|
||||||
|
issues = json.loads(r["issues"]) if isinstance(r["issues"], str) else r["issues"]
|
||||||
|
if isinstance(issues, list) and issues:
|
||||||
|
snippet_map[pr] = "Issues: " + ", ".join(str(i).replace("_", " ") for i in issues)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Build PR list
|
||||||
|
prs = []
|
||||||
|
ttm_values = []
|
||||||
|
round_values = []
|
||||||
|
merged_count = 0
|
||||||
|
closed_count = 0
|
||||||
|
open_count = 0
|
||||||
|
|
||||||
|
for r in pr_rows:
|
||||||
|
pr_num = r["number"]
|
||||||
|
ttm = None
|
||||||
|
if r["merged_at"] and r["created_at"]:
|
||||||
|
try:
|
||||||
|
created = datetime.fromisoformat(r["created_at"])
|
||||||
|
merged = datetime.fromisoformat(r["merged_at"])
|
||||||
|
ttm = (merged - created).total_seconds() / 60
|
||||||
|
if ttm >= 0:
|
||||||
|
ttm_values.append(ttm)
|
||||||
|
else:
|
||||||
|
ttm = None
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
rounds = eval_map.get(pr_num, 0)
|
||||||
|
if rounds > 0:
|
||||||
|
round_values.append(rounds)
|
||||||
|
|
||||||
|
review_info = review_map.get(pr_num)
|
||||||
|
|
||||||
|
status = r["status"] or "unknown"
|
||||||
|
if status == "merged":
|
||||||
|
merged_count += 1
|
||||||
|
elif status == "closed":
|
||||||
|
closed_count += 1
|
||||||
|
elif status == "open":
|
||||||
|
open_count += 1
|
||||||
|
|
||||||
|
# Claims count from pipe-separated description titles
|
||||||
|
desc = r["description"] or ""
|
||||||
|
claims_count = desc.count("|") + 1 if desc.strip() else 1
|
||||||
|
|
||||||
|
# Summary: first claim title from description, fallback to branch name
|
||||||
|
summary = None
|
||||||
|
if desc.strip():
|
||||||
|
first_title = desc.split("|")[0].strip()
|
||||||
|
summary = first_title[:120] if first_title else None
|
||||||
|
if not summary:
|
||||||
|
branch = r["branch"] or ""
|
||||||
|
# Use prefix as category if present: "extract/...", "reweave/...", etc.
|
||||||
|
prefix = ""
|
||||||
|
if "/" in branch:
|
||||||
|
prefix = branch.split("/", 1)[0]
|
||||||
|
branch = branch.split("/", 1)[1]
|
||||||
|
# Strip date prefix like "2026-04-06-" or "2026-02-00-"
|
||||||
|
branch = _DATE_PREFIX_RE.sub("", branch)
|
||||||
|
# Strip trailing hash suffix like "-116d" or "-2cb1"
|
||||||
|
branch = re.sub(r"-[0-9a-f]{4}$", "", branch)
|
||||||
|
if branch:
|
||||||
|
summary = branch.replace("-", " ").replace("_", " ").strip()[:120]
|
||||||
|
elif prefix:
|
||||||
|
summary = prefix # "reweave", "ingestion", etc.
|
||||||
|
|
||||||
|
prs.append({
|
||||||
|
"number": pr_num,
|
||||||
|
"agent": r["agent"],
|
||||||
|
"domain": r["domain"],
|
||||||
|
"tier": r["tier"],
|
||||||
|
"status": status,
|
||||||
|
"claims_count": claims_count,
|
||||||
|
"eval_rounds": rounds,
|
||||||
|
"ttm_minutes": round(ttm, 1) if ttm is not None else None,
|
||||||
|
"created_at": r["created_at"],
|
||||||
|
"merged_at": r["merged_at"],
|
||||||
|
"leo_verdict": r["leo_verdict"],
|
||||||
|
"review_count": review_info["count"] if review_info else 0,
|
||||||
|
"summary": summary,
|
||||||
|
"description": desc if desc.strip() else None,
|
||||||
|
"review_snippet": snippet_map.get(pr_num),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Summary KPIs
|
||||||
|
ttm_values.sort()
|
||||||
|
round_values.sort()
|
||||||
|
|
||||||
|
def median(vals):
|
||||||
|
if not vals:
|
||||||
|
return None
|
||||||
|
n = len(vals)
|
||||||
|
if n % 2 == 0:
|
||||||
|
return (vals[n // 2 - 1] + vals[n // 2]) / 2
|
||||||
|
return vals[n // 2]
|
||||||
|
|
||||||
|
def p90(vals):
|
||||||
|
if len(vals) < 5:
|
||||||
|
return None
|
||||||
|
return vals[int(len(vals) * 0.9)]
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"days": days,
|
||||||
|
"total": len(prs),
|
||||||
|
"merged": merged_count,
|
||||||
|
"closed": closed_count,
|
||||||
|
"open": open_count,
|
||||||
|
"median_ttm": round(median(ttm_values), 1) if median(ttm_values) is not None else None,
|
||||||
|
"p90_ttm": round(p90(ttm_values), 1) if p90(ttm_values) is not None else None,
|
||||||
|
"median_rounds": round(median(round_values), 1) if median(round_values) is not None else None,
|
||||||
|
"max_rounds": max(round_values) if round_values else None,
|
||||||
|
"prs": prs,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Registration ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def register_dashboard_routes(app: web.Application, get_conn):
|
||||||
|
"""Register new dashboard API routes."""
|
||||||
|
app["_get_conn"] = get_conn
|
||||||
|
app.router.add_get("/api/stage-times", handle_stage_times)
|
||||||
|
app.router.add_get("/api/herfindahl", handle_herfindahl)
|
||||||
|
app.router.add_get("/api/agent-state", handle_agent_state)
|
||||||
|
app.router.add_get("/api/extraction-yield-by-domain", handle_extraction_yield_by_domain)
|
||||||
|
app.router.add_get("/api/agents-dashboard", handle_agents_dashboard)
|
||||||
|
app.router.add_get("/api/cascade-coverage", handle_cascade_coverage)
|
||||||
|
app.router.add_get("/api/review-summary", handle_review_summary)
|
||||||
|
app.router.add_get("/api/trace/{trace_id}", handle_trace)
|
||||||
|
app.router.add_get("/api/growth", handle_growth)
|
||||||
|
app.router.add_get("/api/pr-lifecycle", handle_pr_lifecycle)
|
||||||
475
ops/diagnostics/response_audit_routes.py
Normal file
475
ops/diagnostics/response_audit_routes.py
Normal file
|
|
@ -0,0 +1,475 @@
|
||||||
|
"""Response audit API routes — agent cost tracking, reasoning traces, unified activity.
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
GET /api/response-audit — paginated response list with cost columns
|
||||||
|
GET /api/response-audit/{id} — single response detail with full tool_calls
|
||||||
|
GET /api/agent-costs — aggregated cost view from response_audit
|
||||||
|
GET /api/unified-activity — merged prs + response_audit timeline
|
||||||
|
|
||||||
|
Data source: response_audit table in pipeline.db (written by Epimetheus's Telegram bot).
|
||||||
|
|
||||||
|
Owner: Argus
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
from aiohttp import web
|
||||||
|
|
||||||
|
logger = logging.getLogger("argus.response_audit_routes")
|
||||||
|
|
||||||
|
|
||||||
|
def _conn(app):
|
||||||
|
"""Read-only connection to pipeline.db."""
|
||||||
|
db_path = app["db_path"]
|
||||||
|
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/response-audit ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_response_audit_list(request):
|
||||||
|
"""Paginated response audit list with cost and model data.
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
agent — filter by agent name
|
||||||
|
hours — lookback window (default 24, max 168)
|
||||||
|
limit — max results (default 50, max 200)
|
||||||
|
offset — pagination offset (default 0)
|
||||||
|
model — filter by model name (substring match)
|
||||||
|
"""
|
||||||
|
agent = request.query.get("agent")
|
||||||
|
model_filter = request.query.get("model")
|
||||||
|
try:
|
||||||
|
hours = min(int(request.query.get("hours", 24)), 168)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
hours = 24
|
||||||
|
try:
|
||||||
|
limit = min(int(request.query.get("limit", 50)), 200)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
limit = 50
|
||||||
|
try:
|
||||||
|
offset = max(int(request.query.get("offset", 0)), 0)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
offset = 0
|
||||||
|
|
||||||
|
conn = _conn(request.app)
|
||||||
|
try:
|
||||||
|
where = ["timestamp > datetime('now', ?)"]
|
||||||
|
params: list = [f"-{hours} hours"]
|
||||||
|
|
||||||
|
if agent:
|
||||||
|
where.append("agent = ?")
|
||||||
|
params.append(agent)
|
||||||
|
if model_filter:
|
||||||
|
where.append("model LIKE ?")
|
||||||
|
params.append(f"%{model_filter}%")
|
||||||
|
|
||||||
|
where_clause = " AND ".join(where)
|
||||||
|
|
||||||
|
# Count total matching
|
||||||
|
total = conn.execute(
|
||||||
|
f"SELECT COUNT(*) as cnt FROM response_audit WHERE {where_clause}",
|
||||||
|
params,
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
|
||||||
|
# Fetch page — exclude large text fields for list view
|
||||||
|
rows = conn.execute(
|
||||||
|
f"""SELECT id, timestamp, agent, model, query,
|
||||||
|
prompt_tokens, completion_tokens,
|
||||||
|
generation_cost, embedding_cost, total_cost,
|
||||||
|
confidence_score, response_time_ms, query_type,
|
||||||
|
CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
|
||||||
|
THEN json_array_length(tool_calls)
|
||||||
|
ELSE 0 END as tool_call_count,
|
||||||
|
LENGTH(display_response) as response_length
|
||||||
|
FROM response_audit
|
||||||
|
WHERE {where_clause}
|
||||||
|
ORDER BY timestamp DESC
|
||||||
|
LIMIT ? OFFSET ?""",
|
||||||
|
params + [limit, offset],
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
responses = []
|
||||||
|
for r in rows:
|
||||||
|
responses.append({
|
||||||
|
"id": r["id"],
|
||||||
|
"timestamp": r["timestamp"],
|
||||||
|
"agent": r["agent"],
|
||||||
|
"model": r["model"],
|
||||||
|
"query": r["query"],
|
||||||
|
"query_type": r["query_type"],
|
||||||
|
"prompt_tokens": r["prompt_tokens"],
|
||||||
|
"completion_tokens": r["completion_tokens"],
|
||||||
|
"generation_cost": r["generation_cost"],
|
||||||
|
"embedding_cost": r["embedding_cost"],
|
||||||
|
"total_cost": r["total_cost"],
|
||||||
|
"confidence": r["confidence_score"],
|
||||||
|
"response_time_ms": r["response_time_ms"],
|
||||||
|
"tool_call_count": r["tool_call_count"],
|
||||||
|
"response_length": r["response_length"],
|
||||||
|
})
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"total": total,
|
||||||
|
"limit": limit,
|
||||||
|
"offset": offset,
|
||||||
|
"hours": hours,
|
||||||
|
"responses": responses,
|
||||||
|
})
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/response-audit/{id} ────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_response_audit_detail(request):
|
||||||
|
"""Full response detail including reasoning trace and tool calls.
|
||||||
|
|
||||||
|
Returns the complete response_audit row with tool_calls parsed as JSON.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
audit_id = int(request.match_info["id"])
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return web.json_response({"error": "Invalid ID"}, status=400)
|
||||||
|
|
||||||
|
conn = _conn(request.app)
|
||||||
|
try:
|
||||||
|
row = conn.execute(
|
||||||
|
"""SELECT id, timestamp, chat_id, user, agent, model,
|
||||||
|
query, query_type, conversation_window,
|
||||||
|
entities_matched, claims_matched,
|
||||||
|
retrieval_layers_hit, retrieval_gap,
|
||||||
|
market_data, research_context,
|
||||||
|
tool_calls, raw_response, display_response,
|
||||||
|
confidence_score, response_time_ms,
|
||||||
|
prompt_tokens, completion_tokens,
|
||||||
|
generation_cost, embedding_cost, total_cost,
|
||||||
|
blocked, block_reason
|
||||||
|
FROM response_audit WHERE id = ?""",
|
||||||
|
(audit_id,),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
return web.json_response({"error": "Response not found"}, status=404)
|
||||||
|
|
||||||
|
# Parse JSON fields
|
||||||
|
def parse_json(val):
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return json.loads(val)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
return val
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"id": row["id"],
|
||||||
|
"timestamp": row["timestamp"],
|
||||||
|
"chat_id": row["chat_id"],
|
||||||
|
"user": row["user"],
|
||||||
|
"agent": row["agent"],
|
||||||
|
"model": row["model"],
|
||||||
|
"query": row["query"],
|
||||||
|
"query_type": row["query_type"],
|
||||||
|
"conversation_window": parse_json(row["conversation_window"]),
|
||||||
|
"entities_matched": parse_json(row["entities_matched"]),
|
||||||
|
"claims_matched": parse_json(row["claims_matched"]),
|
||||||
|
"retrieval_layers_hit": parse_json(row["retrieval_layers_hit"]),
|
||||||
|
"retrieval_gap": row["retrieval_gap"],
|
||||||
|
"market_data": parse_json(row["market_data"]),
|
||||||
|
"research_context": row["research_context"],
|
||||||
|
"tool_calls": parse_json(row["tool_calls"]),
|
||||||
|
"display_response": row["display_response"],
|
||||||
|
"raw_response": row["raw_response"],
|
||||||
|
"confidence_score": row["confidence_score"],
|
||||||
|
"response_time_ms": row["response_time_ms"],
|
||||||
|
"prompt_tokens": row["prompt_tokens"],
|
||||||
|
"completion_tokens": row["completion_tokens"],
|
||||||
|
"generation_cost": row["generation_cost"],
|
||||||
|
"embedding_cost": row["embedding_cost"],
|
||||||
|
"total_cost": row["total_cost"],
|
||||||
|
"blocked": bool(row["blocked"]) if row["blocked"] is not None else None,
|
||||||
|
"block_reason": row["block_reason"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Compute iteration summary from tool_calls
|
||||||
|
tool_calls = result["tool_calls"] or []
|
||||||
|
if isinstance(tool_calls, list):
|
||||||
|
reasoning_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "reasoning"]
|
||||||
|
tool_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "tool_call"]
|
||||||
|
result["trace_summary"] = {
|
||||||
|
"total_steps": len(tool_calls),
|
||||||
|
"reasoning_steps": len(reasoning_steps),
|
||||||
|
"tool_steps": len(tool_steps),
|
||||||
|
"tools_used": list({t.get("tool", "unknown") for t in tool_steps}),
|
||||||
|
"total_duration_ms": sum(t.get("duration_ms", 0) for t in tool_steps),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
result["trace_summary"] = None
|
||||||
|
|
||||||
|
return web.json_response(result)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/agent-costs ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_agent_costs(request):
|
||||||
|
"""Aggregated agent cost data from response_audit.
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
days — lookback window (default 7, max 30)
|
||||||
|
by — grouping: agent, model, day (default agent)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
days = min(int(request.query.get("days", 7)), 30)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
days = 7
|
||||||
|
group_by = request.query.get("by", "agent")
|
||||||
|
agent = request.query.get("agent")
|
||||||
|
|
||||||
|
conn = _conn(request.app)
|
||||||
|
try:
|
||||||
|
if group_by == "model":
|
||||||
|
group_col = "model"
|
||||||
|
elif group_by == "day":
|
||||||
|
group_col = "date(timestamp)"
|
||||||
|
else:
|
||||||
|
group_col = "agent"
|
||||||
|
group_by = "agent"
|
||||||
|
|
||||||
|
where = ["timestamp > datetime('now', ?)"]
|
||||||
|
params: list = [f"-{days} days"]
|
||||||
|
if agent:
|
||||||
|
where.append("agent = ?")
|
||||||
|
params.append(agent)
|
||||||
|
|
||||||
|
where_clause = " AND ".join(where)
|
||||||
|
|
||||||
|
rows = conn.execute(
|
||||||
|
f"""SELECT {group_col} as grp,
|
||||||
|
COUNT(*) as responses,
|
||||||
|
SUM(prompt_tokens) as total_prompt_tokens,
|
||||||
|
SUM(completion_tokens) as total_completion_tokens,
|
||||||
|
SUM(COALESCE(total_cost, generation_cost, 0)) as total_cost,
|
||||||
|
AVG(COALESCE(total_cost, generation_cost, 0)) as avg_cost,
|
||||||
|
AVG(response_time_ms) as avg_response_ms,
|
||||||
|
AVG(confidence_score) as avg_confidence
|
||||||
|
FROM response_audit
|
||||||
|
WHERE {where_clause}
|
||||||
|
GROUP BY grp
|
||||||
|
ORDER BY total_cost DESC""",
|
||||||
|
params,
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
breakdown = []
|
||||||
|
for r in rows:
|
||||||
|
breakdown.append({
|
||||||
|
group_by: r["grp"],
|
||||||
|
"responses": r["responses"],
|
||||||
|
"prompt_tokens": r["total_prompt_tokens"] or 0,
|
||||||
|
"completion_tokens": r["total_completion_tokens"] or 0,
|
||||||
|
"total_cost": round(r["total_cost"] or 0, 4),
|
||||||
|
"avg_cost_per_response": round(r["avg_cost"] or 0, 4),
|
||||||
|
"avg_response_ms": round(r["avg_response_ms"] or 0, 0),
|
||||||
|
"avg_confidence": round(r["avg_confidence"] or 0, 3) if r["avg_confidence"] else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
grand_total = sum(b["total_cost"] for b in breakdown)
|
||||||
|
total_responses = sum(b["responses"] for b in breakdown)
|
||||||
|
|
||||||
|
# Daily trend (always included regardless of grouping)
|
||||||
|
daily_where = ["timestamp > datetime('now', ?)"]
|
||||||
|
daily_params: list = [f"-{days} days"]
|
||||||
|
if agent:
|
||||||
|
daily_where.append("agent = ?")
|
||||||
|
daily_params.append(agent)
|
||||||
|
|
||||||
|
daily = conn.execute(
|
||||||
|
f"""SELECT date(timestamp) as day,
|
||||||
|
COUNT(*) as responses,
|
||||||
|
SUM(COALESCE(total_cost, generation_cost, 0)) as cost
|
||||||
|
FROM response_audit
|
||||||
|
WHERE {' AND '.join(daily_where)}
|
||||||
|
GROUP BY day ORDER BY day""",
|
||||||
|
daily_params,
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
daily_trend = [
|
||||||
|
{"date": r["day"], "responses": r["responses"],
|
||||||
|
"cost": round(r["cost"] or 0, 4)}
|
||||||
|
for r in daily
|
||||||
|
]
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"period_days": days,
|
||||||
|
"grand_total": round(grand_total, 4),
|
||||||
|
"total_responses": total_responses,
|
||||||
|
"avg_cost_per_response": round(grand_total / total_responses, 4) if total_responses else 0,
|
||||||
|
f"by_{group_by}": breakdown,
|
||||||
|
"daily_trend": daily_trend,
|
||||||
|
})
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── GET /api/unified-activity ────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def handle_unified_activity(request):
|
||||||
|
"""Unified activity feed merging pipeline ops (prs) + agent responses (response_audit).
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
hours — lookback window (default 24, max 168)
|
||||||
|
limit — max results (default 100, max 500)
|
||||||
|
agent — filter by agent name
|
||||||
|
type — filter: pipeline, response, or all (default all)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
hours = min(int(request.query.get("hours", 24)), 168)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
hours = 24
|
||||||
|
try:
|
||||||
|
limit = min(int(request.query.get("limit", 100)), 500)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
limit = 100
|
||||||
|
agent = request.query.get("agent")
|
||||||
|
activity_type = request.query.get("type", "all")
|
||||||
|
|
||||||
|
conn = _conn(request.app)
|
||||||
|
try:
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
# Pipeline events from prs table
|
||||||
|
if activity_type in ("all", "pipeline"):
|
||||||
|
pr_where = ["COALESCE(merged_at, created_at) > datetime('now', ?)"]
|
||||||
|
pr_params: list = [f"-{hours} hours"]
|
||||||
|
if agent:
|
||||||
|
pr_where.append("agent = ?")
|
||||||
|
pr_params.append(agent)
|
||||||
|
|
||||||
|
prs = conn.execute(
|
||||||
|
f"""SELECT number, branch, status, domain, agent, tier,
|
||||||
|
commit_type, cost_usd,
|
||||||
|
created_at, merged_at,
|
||||||
|
leo_verdict, domain_verdict
|
||||||
|
FROM prs
|
||||||
|
WHERE {' AND '.join(pr_where)}
|
||||||
|
ORDER BY COALESCE(merged_at, created_at) DESC""",
|
||||||
|
pr_params,
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for pr in prs:
|
||||||
|
ts = pr["merged_at"] or pr["created_at"]
|
||||||
|
# Derive action description from status
|
||||||
|
if pr["status"] == "merged":
|
||||||
|
action = f"Merged {pr['commit_type'] or 'PR'}"
|
||||||
|
elif pr["status"] == "closed":
|
||||||
|
action = f"Closed {pr['commit_type'] or 'PR'}"
|
||||||
|
elif pr["status"] in ("approved", "reviewing"):
|
||||||
|
action = f"{pr['commit_type'] or 'PR'} awaiting merge"
|
||||||
|
else:
|
||||||
|
action = f"{pr['commit_type'] or 'PR'} {pr['status']}"
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
"timestamp": ts,
|
||||||
|
"type": "pipeline",
|
||||||
|
"agent": pr["agent"],
|
||||||
|
"action": action,
|
||||||
|
"domain": pr["domain"],
|
||||||
|
"pr_number": pr["number"],
|
||||||
|
"branch": pr["branch"],
|
||||||
|
"status": pr["status"],
|
||||||
|
"commit_type": pr["commit_type"],
|
||||||
|
"cost": pr["cost_usd"],
|
||||||
|
"detail": {
|
||||||
|
"tier": pr["tier"],
|
||||||
|
"leo_verdict": pr["leo_verdict"],
|
||||||
|
"domain_verdict": pr["domain_verdict"],
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
# Agent responses from response_audit
|
||||||
|
if activity_type in ("all", "response"):
|
||||||
|
ra_where = ["timestamp > datetime('now', ?)"]
|
||||||
|
ra_params: list = [f"-{hours} hours"]
|
||||||
|
if agent:
|
||||||
|
ra_where.append("agent = ?")
|
||||||
|
ra_params.append(agent)
|
||||||
|
|
||||||
|
responses = conn.execute(
|
||||||
|
f"""SELECT id, timestamp, agent, model, query,
|
||||||
|
generation_cost, response_time_ms,
|
||||||
|
confidence_score,
|
||||||
|
CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
|
||||||
|
THEN json_array_length(tool_calls)
|
||||||
|
ELSE 0 END as tool_call_count
|
||||||
|
FROM response_audit
|
||||||
|
WHERE {' AND '.join(ra_where)}
|
||||||
|
ORDER BY timestamp DESC""",
|
||||||
|
ra_params,
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for r in responses:
|
||||||
|
# Truncate query for feed display
|
||||||
|
query_preview = (r["query"] or "")[:120]
|
||||||
|
if len(r["query"] or "") > 120:
|
||||||
|
query_preview += "..."
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
"timestamp": r["timestamp"],
|
||||||
|
"type": "response",
|
||||||
|
"agent": r["agent"],
|
||||||
|
"action": f"Responded to query ({r['tool_call_count']} tool calls)",
|
||||||
|
"domain": None,
|
||||||
|
"pr_number": None,
|
||||||
|
"audit_id": r["id"],
|
||||||
|
"query_preview": query_preview,
|
||||||
|
"model": r["model"],
|
||||||
|
"cost": r["generation_cost"],
|
||||||
|
"detail": {
|
||||||
|
"response_time_ms": r["response_time_ms"],
|
||||||
|
"confidence": r["confidence_score"],
|
||||||
|
"tool_call_count": r["tool_call_count"],
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort combined entries by timestamp descending
|
||||||
|
entries.sort(key=lambda e: e["timestamp"] or "", reverse=True)
|
||||||
|
entries = entries[:limit]
|
||||||
|
|
||||||
|
# Summary stats
|
||||||
|
pipeline_count = sum(1 for e in entries if e["type"] == "pipeline")
|
||||||
|
response_count = sum(1 for e in entries if e["type"] == "response")
|
||||||
|
total_cost = sum(e.get("cost") or 0 for e in entries)
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"hours": hours,
|
||||||
|
"total_entries": len(entries),
|
||||||
|
"pipeline_events": pipeline_count,
|
||||||
|
"response_events": response_count,
|
||||||
|
"total_cost": round(total_cost, 4),
|
||||||
|
"entries": entries,
|
||||||
|
})
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Registration ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def register_response_audit_routes(app):
|
||||||
|
"""Register response audit API routes. Call from create_app()."""
|
||||||
|
app.router.add_get("/api/response-audit", handle_response_audit_list)
|
||||||
|
app.router.add_get("/api/response-audit/{id}", handle_response_audit_detail)
|
||||||
|
app.router.add_get("/api/agent-costs", handle_agent_costs)
|
||||||
|
app.router.add_get("/api/unified-activity", handle_unified_activity)
|
||||||
|
|
||||||
|
|
||||||
|
# Public paths for auth middleware
|
||||||
|
RESPONSE_AUDIT_PUBLIC_PATHS = frozenset({
|
||||||
|
"/api/response-audit",
|
||||||
|
"/api/agent-costs",
|
||||||
|
"/api/unified-activity",
|
||||||
|
})
|
||||||
|
# /api/response-audit/{id} needs prefix matching in auth middleware
|
||||||
222
ops/diagnostics/review_queue.py
Normal file
222
ops/diagnostics/review_queue.py
Normal file
|
|
@ -0,0 +1,222 @@
|
||||||
|
"""Review queue: fetches open PRs from Forgejo, classifies and enriches them.
|
||||||
|
|
||||||
|
Data sources:
|
||||||
|
- Forgejo API (git.livingip.xyz) for PR metadata, reviews, changed files
|
||||||
|
- pipeline.db prs table for eval status cross-reference
|
||||||
|
|
||||||
|
Display priority: broken > needs-review (by age) > approved-awaiting-merge > changes-requested
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
logger = logging.getLogger("argus.review_queue")
|
||||||
|
|
||||||
|
FORGEJO_BASE = "https://git.livingip.xyz/api/v1"
|
||||||
|
REPO = "teleo/teleo-codex"
|
||||||
|
|
||||||
|
# Domain detection from branch prefixes or path patterns
|
||||||
|
DOMAIN_KEYWORDS = {
|
||||||
|
"internet-finance": ["internet-finance", "defi", "dao", "prediction-market"],
|
||||||
|
"entertainment": ["entertainment", "clay", "media", "ip-"],
|
||||||
|
"ai-alignment": ["ai-alignment", "alignment", "theseus"],
|
||||||
|
"health": ["health", "vida", "biotech", "glp"],
|
||||||
|
"space-development": ["space", "astra", "orbital", "lunar"],
|
||||||
|
"energy": ["energy", "solar", "nuclear", "fusion"],
|
||||||
|
"grand-strategy": ["grand-strategy", "leo", "strategy"],
|
||||||
|
"collective-intelligence": ["collective-intelligence", "coordination"],
|
||||||
|
"critical-systems": ["critical-systems", "complexity", "emergence"],
|
||||||
|
"teleological-economics": ["teleological-economics", "disruption", "attractor"],
|
||||||
|
"cultural-dynamics": ["cultural-dynamics", "memetics", "narrative"],
|
||||||
|
"mechanisms": ["mechanisms", "futarchy", "governance"],
|
||||||
|
"living-capital": ["living-capital", "investment"],
|
||||||
|
"living-agents": ["living-agents", "agent-architecture"],
|
||||||
|
"teleohumanity": ["teleohumanity", "worldview"],
|
||||||
|
"general": ["general"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_domain(branch: str, title: str, files: list[dict]) -> str:
|
||||||
|
"""Detect domain from branch name, title, or changed file paths."""
|
||||||
|
text = f"{branch} {title}".lower()
|
||||||
|
|
||||||
|
# Check branch/title
|
||||||
|
for domain, keywords in DOMAIN_KEYWORDS.items():
|
||||||
|
for kw in keywords:
|
||||||
|
if kw in text:
|
||||||
|
return domain
|
||||||
|
|
||||||
|
# Check file paths
|
||||||
|
for f in files:
|
||||||
|
path = f.get("filename", "")
|
||||||
|
if path.startswith("domains/") or path.startswith("foundations/") or path.startswith("core/"):
|
||||||
|
parts = path.split("/")
|
||||||
|
if len(parts) >= 2:
|
||||||
|
return parts[1]
|
||||||
|
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_files(files: list[dict]) -> dict[str, int]:
|
||||||
|
"""Count claim, enrichment, and challenge files from changed files list."""
|
||||||
|
counts = {"claim_count": 0, "enrichment_count": 0, "challenge_count": 0}
|
||||||
|
for f in files:
|
||||||
|
path = f.get("filename", "")
|
||||||
|
status = f.get("status", "") # added, modified, removed
|
||||||
|
|
||||||
|
if not path.startswith("domains/") and not path.startswith("foundations/") and not path.startswith("core/"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
name = path.split("/")[-1].lower()
|
||||||
|
|
||||||
|
if "challenge" in name or "divergence" in name:
|
||||||
|
counts["challenge_count"] += 1
|
||||||
|
elif status == "modified":
|
||||||
|
counts["enrichment_count"] += 1
|
||||||
|
else:
|
||||||
|
counts["claim_count"] += 1
|
||||||
|
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_status(
|
||||||
|
changed_files: int,
|
||||||
|
reviews: list[dict],
|
||||||
|
requested_reviewers: list[dict],
|
||||||
|
) -> str:
|
||||||
|
"""Classify PR status: broken, needs-review, approved-awaiting-merge, changes-requested."""
|
||||||
|
if changed_files == 0:
|
||||||
|
return "broken"
|
||||||
|
|
||||||
|
has_changes_requested = any(r["state"] == "REQUEST_CHANGES" for r in reviews)
|
||||||
|
if has_changes_requested:
|
||||||
|
# Check if there's a newer approval after the changes request
|
||||||
|
last_change_req = max(
|
||||||
|
(r["submitted_at"] for r in reviews if r["state"] == "REQUEST_CHANGES"),
|
||||||
|
default="",
|
||||||
|
)
|
||||||
|
later_approvals = [
|
||||||
|
r for r in reviews
|
||||||
|
if r["state"] == "APPROVED" and r["submitted_at"] > last_change_req
|
||||||
|
]
|
||||||
|
if not later_approvals:
|
||||||
|
return "changes-requested"
|
||||||
|
|
||||||
|
approvals = [r for r in reviews if r["state"] == "APPROVED"]
|
||||||
|
if len(approvals) >= 2:
|
||||||
|
return "approved-awaiting-merge"
|
||||||
|
|
||||||
|
return "needs-review"
|
||||||
|
|
||||||
|
|
||||||
|
def _days_open(created_at: str) -> int:
|
||||||
|
"""Calculate days since PR was opened."""
|
||||||
|
created = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
return (now - created).days
|
||||||
|
|
||||||
|
|
||||||
|
_STATUS_PRIORITY = {
|
||||||
|
"broken": 0,
|
||||||
|
"needs-review": 1,
|
||||||
|
"approved-awaiting-merge": 2,
|
||||||
|
"changes-requested": 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_review_queue(
|
||||||
|
forgejo_token: str | None = None,
|
||||||
|
timeout_s: int = 15,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Fetch open PRs from Forgejo and return enriched review queue.
|
||||||
|
|
||||||
|
Returns list sorted by display priority (broken first, then needs-review by age).
|
||||||
|
"""
|
||||||
|
headers = {"Accept": "application/json"}
|
||||||
|
if forgejo_token:
|
||||||
|
headers["Authorization"] = f"token {forgejo_token}"
|
||||||
|
|
||||||
|
connector = aiohttp.TCPConnector(ssl=False)
|
||||||
|
async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
|
||||||
|
# Fetch open PRs
|
||||||
|
url = f"{FORGEJO_BASE}/repos/{REPO}/pulls?state=open&limit=50&sort=oldest"
|
||||||
|
try:
|
||||||
|
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
logger.error("Forgejo PR list returned %d", resp.status)
|
||||||
|
return []
|
||||||
|
prs = await resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Failed to fetch PRs from Forgejo: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Fetch reviews and files for all PRs in parallel
|
||||||
|
async def _fetch_json(session, url, label=""):
|
||||||
|
try:
|
||||||
|
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
||||||
|
if resp.status == 200:
|
||||||
|
return await resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to fetch %s: %s", label, e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
sub_tasks = []
|
||||||
|
for pr in prs:
|
||||||
|
n = pr["number"]
|
||||||
|
sub_tasks.append(_fetch_json(session, f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}/reviews", f"reviews PR#{n}"))
|
||||||
|
sub_tasks.append(_fetch_json(session, f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}/files", f"files PR#{n}"))
|
||||||
|
|
||||||
|
sub_results = await asyncio.gather(*sub_tasks)
|
||||||
|
|
||||||
|
queue = []
|
||||||
|
for i, pr in enumerate(prs):
|
||||||
|
reviews = sub_results[i * 2]
|
||||||
|
files = sub_results[i * 2 + 1]
|
||||||
|
|
||||||
|
# Build enriched PR record
|
||||||
|
branch = pr.get("head", {}).get("ref", "") if pr.get("head") else ""
|
||||||
|
title = pr.get("title", "")
|
||||||
|
author = pr.get("user", {}).get("login", "unknown")
|
||||||
|
created_at = pr.get("created_at", "")
|
||||||
|
changed_files = pr.get("changed_files", len(files))
|
||||||
|
requested_reviewers = pr.get("requested_reviewers", [])
|
||||||
|
|
||||||
|
domain = _detect_domain(branch, title, files)
|
||||||
|
file_counts = _classify_files(files)
|
||||||
|
status = _classify_status(changed_files, reviews, requested_reviewers)
|
||||||
|
days = _days_open(created_at) if created_at else 0
|
||||||
|
|
||||||
|
review_list = [
|
||||||
|
{
|
||||||
|
"reviewer": r.get("user", {}).get("login", "unknown"),
|
||||||
|
"outcome": r.get("state", "PENDING").lower(),
|
||||||
|
"date": r.get("submitted_at", ""),
|
||||||
|
"summary": r.get("body", "")[:200],
|
||||||
|
}
|
||||||
|
for r in reviews
|
||||||
|
if r.get("state") and r["state"] != "PENDING"
|
||||||
|
]
|
||||||
|
|
||||||
|
queue.append({
|
||||||
|
"pr_number": pr["number"],
|
||||||
|
"title": title,
|
||||||
|
"author": author,
|
||||||
|
"domain": domain,
|
||||||
|
"branch": branch,
|
||||||
|
"created_at": created_at,
|
||||||
|
"days_open": days,
|
||||||
|
"status": status,
|
||||||
|
"changed_files": changed_files,
|
||||||
|
**file_counts,
|
||||||
|
"reviews": review_list,
|
||||||
|
"url": pr.get("html_url", ""),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort: broken first, then needs-review by days_open desc, then rest
|
||||||
|
queue.sort(key=lambda x: (_STATUS_PRIORITY.get(x["status"], 99), -x["days_open"]))
|
||||||
|
|
||||||
|
return queue
|
||||||
64
ops/diagnostics/review_queue_routes.py
Normal file
64
ops/diagnostics/review_queue_routes.py
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
"""Route handlers for /api/review-queue endpoint.
|
||||||
|
|
||||||
|
Import into app.py and register routes in create_app().
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from aiohttp import web
|
||||||
|
from review_queue import fetch_review_queue
|
||||||
|
|
||||||
|
logger = logging.getLogger("argus.review_queue")
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_review_queue(request):
|
||||||
|
"""GET /api/review-queue — PR review pipeline view.
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
status: filter by status (broken, needs-review, approved-awaiting-merge, changes-requested)
|
||||||
|
author: filter by agent/author name
|
||||||
|
domain: filter by domain
|
||||||
|
|
||||||
|
Returns JSON with queue items sorted by display priority:
|
||||||
|
broken (flagged) > needs-review (by age) > approved-awaiting-merge
|
||||||
|
"""
|
||||||
|
token = request.app.get("_forgejo_token")
|
||||||
|
|
||||||
|
try:
|
||||||
|
queue = await fetch_review_queue(forgejo_token=token)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Review queue fetch failed: %s", e)
|
||||||
|
return web.json_response({"error": str(e)}, status=500)
|
||||||
|
|
||||||
|
# Apply filters
|
||||||
|
status_filter = request.query.get("status")
|
||||||
|
if status_filter:
|
||||||
|
queue = [item for item in queue if item["status"] == status_filter]
|
||||||
|
|
||||||
|
author_filter = request.query.get("author")
|
||||||
|
if author_filter:
|
||||||
|
queue = [item for item in queue if item["author"] == author_filter]
|
||||||
|
|
||||||
|
domain_filter = request.query.get("domain")
|
||||||
|
if domain_filter:
|
||||||
|
queue = [item for item in queue if item["domain"] == domain_filter]
|
||||||
|
|
||||||
|
# Summary stats
|
||||||
|
status_counts = {}
|
||||||
|
for item in queue:
|
||||||
|
status_counts[item["status"]] = status_counts.get(item["status"], 0) + 1
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"queue": queue,
|
||||||
|
"total": len(queue),
|
||||||
|
"status_counts": status_counts,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def register_review_queue_routes(app, forgejo_token=None):
|
||||||
|
"""Register review queue routes on the app.
|
||||||
|
|
||||||
|
forgejo_token: optional Forgejo API token for authenticated requests
|
||||||
|
"""
|
||||||
|
app["_forgejo_token"] = forgejo_token
|
||||||
|
app.router.add_get("/api/review-queue", handle_review_queue)
|
||||||
149
ops/diagnostics/shared_ui.py
Normal file
149
ops/diagnostics/shared_ui.py
Normal file
|
|
@ -0,0 +1,149 @@
|
||||||
|
"""Shared UI components for the 4-page Argus dashboard.
|
||||||
|
|
||||||
|
Provides: nav bar, CSS, page skeleton, Chart.js imports, shared JS helpers.
|
||||||
|
All pages import render_page() and pass their body HTML + page-specific scripts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Page definitions — used by nav bar
|
||||||
|
PAGES = [
|
||||||
|
{"path": "/prs", "label": "PRs", "icon": "✎"},
|
||||||
|
{"path": "/ops", "label": "Operations", "icon": "⚙"},
|
||||||
|
{"path": "/health", "label": "Knowledge Health", "icon": "♥"},
|
||||||
|
{"path": "/agents", "label": "Agents", "icon": "★"},
|
||||||
|
{"path": "/epistemic", "label": "Epistemic", "icon": "⚖"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _nav_html(active_path: str) -> str:
|
||||||
|
"""Render the shared navigation bar."""
|
||||||
|
links = []
|
||||||
|
for p in PAGES:
|
||||||
|
cls = "nav-active" if p["path"] == active_path else ""
|
||||||
|
links.append(
|
||||||
|
f'<a href="{p["path"]}" class="nav-link {cls}">'
|
||||||
|
f'{p["icon"]} {p["label"]}</a>'
|
||||||
|
)
|
||||||
|
return f"""<nav class="top-nav">
|
||||||
|
<div class="nav-brand">Argus</div>
|
||||||
|
<div class="nav-links">{"".join(links)}</div>
|
||||||
|
<div class="nav-aux">
|
||||||
|
<a href="/audit" class="nav-link">Audit</a>
|
||||||
|
<a href="/api/metrics" class="nav-link">API</a>
|
||||||
|
</div>
|
||||||
|
</nav>"""
|
||||||
|
|
||||||
|
|
||||||
|
SHARED_CSS = """
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { font-family: -apple-system, system-ui, 'Segoe UI', sans-serif; background: #0d1117; color: #c9d1d9; }
|
||||||
|
.top-nav { display: flex; align-items: center; gap: 16px; padding: 12px 24px;
|
||||||
|
background: #161b22; border-bottom: 1px solid #30363d; position: sticky; top: 0; z-index: 100; }
|
||||||
|
.nav-brand { color: #58a6ff; font-weight: 700; font-size: 18px; }
|
||||||
|
.nav-links { display: flex; gap: 4px; flex: 1; }
|
||||||
|
.nav-aux { display: flex; gap: 4px; }
|
||||||
|
.nav-link { color: #8b949e; text-decoration: none; padding: 6px 12px; border-radius: 6px;
|
||||||
|
font-size: 13px; transition: all 0.15s; white-space: nowrap; }
|
||||||
|
.nav-link:hover { color: #c9d1d9; background: #21262d; }
|
||||||
|
.nav-active { color: #58a6ff !important; background: #0d1117; font-weight: 600; }
|
||||||
|
.page-content { padding: 24px; max-width: 1400px; margin: 0 auto; }
|
||||||
|
.page-header { margin-bottom: 20px; }
|
||||||
|
.page-header h1 { color: #58a6ff; font-size: 22px; }
|
||||||
|
.page-header .subtitle { color: #8b949e; font-size: 13px; margin-top: 4px; }
|
||||||
|
.grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); gap: 12px; margin: 16px 0; }
|
||||||
|
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; }
|
||||||
|
.card .label { color: #8b949e; font-size: 11px; text-transform: uppercase; letter-spacing: 0.5px; }
|
||||||
|
.card .value { font-size: 28px; font-weight: 700; margin-top: 2px; }
|
||||||
|
.card .detail { color: #8b949e; font-size: 11px; margin-top: 2px; }
|
||||||
|
.green { color: #3fb950; }
|
||||||
|
.yellow { color: #d29922; }
|
||||||
|
.red { color: #f85149; }
|
||||||
|
.blue { color: #58a6ff; }
|
||||||
|
.purple { color: #bc8cff; }
|
||||||
|
.chart-container { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 16px 0; }
|
||||||
|
.chart-container h2 { color: #c9d1d9; font-size: 14px; margin-bottom: 12px; }
|
||||||
|
canvas { max-height: 260px; }
|
||||||
|
.row { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }
|
||||||
|
@media (max-width: 800px) { .row { grid-template-columns: 1fr; } }
|
||||||
|
table { width: 100%; border-collapse: collapse; font-size: 13px; }
|
||||||
|
th { color: #8b949e; font-size: 11px; text-transform: uppercase; text-align: left; padding: 6px 10px; border-bottom: 1px solid #30363d; }
|
||||||
|
td { padding: 6px 10px; border-bottom: 1px solid #21262d; }
|
||||||
|
code { background: #21262d; padding: 2px 6px; border-radius: 3px; font-size: 12px; }
|
||||||
|
.section { margin-top: 28px; }
|
||||||
|
.section-title { color: #58a6ff; font-size: 15px; font-weight: 600; margin-bottom: 12px; padding-bottom: 6px; border-bottom: 1px solid #21262d; }
|
||||||
|
.funnel { display: flex; align-items: center; gap: 8px; flex-wrap: wrap; }
|
||||||
|
.funnel-step { text-align: center; flex: 1; min-width: 100px; }
|
||||||
|
.funnel-step .num { font-size: 24px; font-weight: 700; }
|
||||||
|
.funnel-step .lbl { font-size: 11px; color: #8b949e; text-transform: uppercase; }
|
||||||
|
.funnel-arrow { color: #30363d; font-size: 20px; }
|
||||||
|
.footer { margin-top: 40px; padding: 16px 24px; border-top: 1px solid #21262d; color: #484f58; font-size: 11px; text-align: center; }
|
||||||
|
.footer a { color: #484f58; text-decoration: none; }
|
||||||
|
.footer a:hover { color: #8b949e; }
|
||||||
|
.alert-banner { padding: 8px 16px; font-size: 12px; border-radius: 6px; margin-bottom: 12px; }
|
||||||
|
.alert-critical { background: #f8514922; border: 1px solid #f85149; color: #f85149; }
|
||||||
|
.alert-warning { background: #d2992222; border: 1px solid #d29922; color: #d29922; }
|
||||||
|
.alert-info { background: #58a6ff22; border: 1px solid #58a6ff; color: #58a6ff; }
|
||||||
|
.badge { display: inline-block; padding: 2px 8px; border-radius: 4px; font-size: 11px; font-weight: 600; }
|
||||||
|
.badge-green { background: #23863633; color: #3fb950; }
|
||||||
|
.badge-yellow { background: #d2992233; color: #d29922; }
|
||||||
|
.badge-red { background: #f8514933; color: #f85149; }
|
||||||
|
.badge-blue { background: #1f6feb33; color: #58a6ff; }
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
CHART_JS_IMPORTS = """<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.6"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-date-fns@3.0.0"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-annotation@3.1.0"></script>"""
|
||||||
|
|
||||||
|
|
||||||
|
SHARED_JS = """
|
||||||
|
const AGENT_COLORS = {
|
||||||
|
'rio': '#58a6ff', 'clay': '#3fb950', 'astra': '#bc8cff',
|
||||||
|
'leo': '#d29922', 'vida': '#f0883e', 'theseus': '#f85149',
|
||||||
|
'epimetheus': '#79c0ff', 'ganymede': '#8b949e', 'oberon': '#ec4899',
|
||||||
|
};
|
||||||
|
function agentColor(name) {
|
||||||
|
return AGENT_COLORS[name?.toLowerCase()] ||
|
||||||
|
'#' + ((name||'').split('').reduce((a,c) => (a*31+c.charCodeAt(0))&0xFFFFFF, 0x556677)).toString(16).padStart(6,'0');
|
||||||
|
}
|
||||||
|
Chart.defaults.color = '#8b949e';
|
||||||
|
Chart.defaults.borderColor = '#21262d';
|
||||||
|
Chart.defaults.font.family = '-apple-system, system-ui, sans-serif';
|
||||||
|
Chart.defaults.font.size = 11;
|
||||||
|
|
||||||
|
function esc(s) { const d = document.createElement('div'); d.textContent = s; return d.innerHTML; }
|
||||||
|
function fmtPct(v) { return v != null ? (v * 100).toFixed(1) + '%' : '--'; }
|
||||||
|
function fmtNum(v) { return v != null ? v.toLocaleString() : '--'; }
|
||||||
|
function fmtDollars(v) { return v != null ? '$' + v.toFixed(2) : '--'; }
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def render_page(title: str, subtitle: str, active_path: str, body_html: str,
|
||||||
|
scripts: str = "", extra_css: str = "", timestamp: str = "") -> str:
|
||||||
|
"""Render a complete page with nav, content, and footer."""
|
||||||
|
ts_display = f" · {timestamp}" if timestamp else ""
|
||||||
|
return f"""<!DOCTYPE html>
|
||||||
|
<html lang="en"><head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Argus - {title}</title>
|
||||||
|
<meta http-equiv="refresh" content="60">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
{CHART_JS_IMPORTS}
|
||||||
|
<style>{SHARED_CSS}{extra_css}</style>
|
||||||
|
</head><body>
|
||||||
|
{_nav_html(active_path)}
|
||||||
|
<div class="page-content">
|
||||||
|
<div class="page-header">
|
||||||
|
<h1>{title}</h1>
|
||||||
|
<div class="subtitle">{subtitle}{ts_display} · auto-refresh 60s</div>
|
||||||
|
</div>
|
||||||
|
{body_html}
|
||||||
|
</div>
|
||||||
|
<div class="footer">
|
||||||
|
Argus · Teleo Pipeline Diagnostics ·
|
||||||
|
<a href="/api/metrics">Metrics API</a> ·
|
||||||
|
<a href="/api/vital-signs">Vital Signs API</a> ·
|
||||||
|
<a href="/api/contributors">Contributors API</a>
|
||||||
|
</div>
|
||||||
|
<script>{SHARED_JS}</script>
|
||||||
|
{scripts}
|
||||||
|
</body></html>"""
|
||||||
476
ops/diagnostics/tier1_metrics.py
Normal file
476
ops/diagnostics/tier1_metrics.py
Normal file
|
|
@ -0,0 +1,476 @@
|
||||||
|
"""Tier 1 Metrics — The three numbers that matter most for knowledge production.
|
||||||
|
|
||||||
|
1. Extraction yield: claims merged / claims evaluated, per agent, per week
|
||||||
|
2. Cost per merged claim: total spend / merged claims, per week
|
||||||
|
3. Fix success rate by rejection tag: which rejection reasons are fixable vs terminal
|
||||||
|
|
||||||
|
These queries run against pipeline.db (read-only) and power the /api/yield,
|
||||||
|
/api/cost-per-claim, and /api/fix-rates endpoints.
|
||||||
|
|
||||||
|
Owner: Argus <69AF7290-758F-464B-B472-04AFCA4AB340>
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
|
||||||
|
def extraction_yield(conn: sqlite3.Connection, days: int = 30) -> dict:
|
||||||
|
"""Extraction yield = merged / evaluated, trended per agent per week.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
"daily": [{"day": "2026-W13", "agent": "rio", "evaluated": 20, "merged": 8, "yield": 0.4}, ...],
|
||||||
|
"totals": [{"agent": "rio", "evaluated": 100, "merged": 40, "yield": 0.4}, ...],
|
||||||
|
"system": {"evaluated": 500, "merged": 200, "yield": 0.4}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
# Weekly yield per agent
|
||||||
|
# Uses strftime('%Y-W%W') for ISO week grouping
|
||||||
|
# evaluated = approved + rejected (all terminal eval events)
|
||||||
|
# merged = approved events only
|
||||||
|
weekly = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT date(timestamp) as day,
|
||||||
|
json_extract(detail, '$.agent') as agent,
|
||||||
|
COUNT(*) as evaluated,
|
||||||
|
SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage = 'evaluate'
|
||||||
|
AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', ? || ' days')
|
||||||
|
GROUP BY day, agent
|
||||||
|
ORDER BY day DESC, agent
|
||||||
|
""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
daily_data = []
|
||||||
|
for r in weekly:
|
||||||
|
ev = r["evaluated"] or 0
|
||||||
|
mg = r["merged"] or 0
|
||||||
|
daily_data.append({
|
||||||
|
"day": r["day"],
|
||||||
|
"agent": r["agent"] or "unknown",
|
||||||
|
"evaluated": ev,
|
||||||
|
"merged": mg,
|
||||||
|
"yield": round(mg / ev, 3) if ev else 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Per-agent totals (same window)
|
||||||
|
totals = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT json_extract(detail, '$.agent') as agent,
|
||||||
|
COUNT(*) as evaluated,
|
||||||
|
SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage = 'evaluate'
|
||||||
|
AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', ? || ' days')
|
||||||
|
GROUP BY agent
|
||||||
|
ORDER BY merged DESC
|
||||||
|
""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
totals_data = []
|
||||||
|
for r in totals:
|
||||||
|
ev = r["evaluated"] or 0
|
||||||
|
mg = r["merged"] or 0
|
||||||
|
totals_data.append({
|
||||||
|
"agent": r["agent"] or "unknown",
|
||||||
|
"evaluated": ev,
|
||||||
|
"merged": mg,
|
||||||
|
"yield": round(mg / ev, 3) if ev else 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
# System-wide total
|
||||||
|
sys_row = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT COUNT(*) as evaluated,
|
||||||
|
SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged
|
||||||
|
FROM audit_log
|
||||||
|
WHERE stage = 'evaluate'
|
||||||
|
AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
|
||||||
|
AND timestamp > datetime('now', ? || ' days')
|
||||||
|
""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
sys_ev = sys_row["evaluated"] or 0
|
||||||
|
sys_mg = sys_row["merged"] or 0
|
||||||
|
|
||||||
|
return {
|
||||||
|
"days": days,
|
||||||
|
"daily": daily_data,
|
||||||
|
"totals": totals_data,
|
||||||
|
"system": {
|
||||||
|
"evaluated": sys_ev,
|
||||||
|
"merged": sys_mg,
|
||||||
|
"yield": round(sys_mg / sys_ev, 3) if sys_ev else 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def cost_per_merged_claim(conn: sqlite3.Connection, days: int = 30) -> dict:
|
||||||
|
"""Cost and compute per merged claim, trended per week.
|
||||||
|
|
||||||
|
Uses costs table for spend + tokens and prs table for merge counts.
|
||||||
|
Breaks down by stage. Separates API spend (dollars) from subscription
|
||||||
|
compute (tokens only — Claude Max is flat-rate, so dollars are meaningless).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
"daily": [{"day": "2026-W13", "api_cost": 1.50, "merged": 8,
|
||||||
|
"cost_per_claim": 0.19, "input_tokens": 50000,
|
||||||
|
"output_tokens": 5000, "total_tokens": 55000,
|
||||||
|
"tokens_per_claim": 6875}, ...],
|
||||||
|
"by_stage": [{"stage": "eval_leo:openrouter", "api_cost": 1.50,
|
||||||
|
"input_tokens": 300000, "output_tokens": 50000,
|
||||||
|
"calls": 100, "billing": "api"}, ...],
|
||||||
|
"system": {"api_cost": 2.36, "merged": 80, "cost_per_claim": 0.03,
|
||||||
|
"total_tokens": 1200000, "tokens_per_claim": 15000,
|
||||||
|
"subscription_tokens": 0, "api_tokens": 1200000}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
# Weekly: cost + tokens from costs table, merged count from prs table
|
||||||
|
daily_cost = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT date as day,
|
||||||
|
SUM(cost_usd) as api_cost,
|
||||||
|
SUM(cost_estimate_usd) as estimated_cost,
|
||||||
|
SUM(input_tokens) as input_tokens,
|
||||||
|
SUM(output_tokens) as output_tokens
|
||||||
|
FROM costs
|
||||||
|
WHERE date > date('now', ? || ' days')
|
||||||
|
GROUP BY day
|
||||||
|
ORDER BY day DESC
|
||||||
|
""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
daily_merges = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT date(merged_at) as day,
|
||||||
|
COUNT(*) as merged
|
||||||
|
FROM prs
|
||||||
|
WHERE status = 'merged'
|
||||||
|
AND merged_at > datetime('now', ? || ' days')
|
||||||
|
GROUP BY day
|
||||||
|
ORDER BY day DESC
|
||||||
|
""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Merge into combined weekly view
|
||||||
|
merge_map = {r["day"]: r["merged"] for r in daily_merges}
|
||||||
|
cost_map = {}
|
||||||
|
for r in daily_cost:
|
||||||
|
cost_map[r["day"]] = {
|
||||||
|
"api_cost": r["api_cost"] or 0,
|
||||||
|
"estimated_cost": r["estimated_cost"] or 0,
|
||||||
|
"input_tokens": r["input_tokens"] or 0,
|
||||||
|
"output_tokens": r["output_tokens"] or 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
all_days = sorted(set(list(merge_map.keys()) + list(cost_map.keys())), reverse=True)
|
||||||
|
daily_data = []
|
||||||
|
for w in all_days:
|
||||||
|
c = cost_map.get(w, {"api_cost": 0, "estimated_cost": 0, "input_tokens": 0, "output_tokens": 0})
|
||||||
|
merged = merge_map.get(w, 0) or 0
|
||||||
|
total_tokens = c["input_tokens"] + c["output_tokens"]
|
||||||
|
daily_data.append({
|
||||||
|
"day": w,
|
||||||
|
"actual_spend": round(c["api_cost"], 4),
|
||||||
|
"estimated_cost": round(c["estimated_cost"], 4),
|
||||||
|
"merged": merged,
|
||||||
|
"cost_per_claim": round(c["estimated_cost"] / merged, 4) if merged else None,
|
||||||
|
"input_tokens": c["input_tokens"],
|
||||||
|
"output_tokens": c["output_tokens"],
|
||||||
|
"total_tokens": total_tokens,
|
||||||
|
"tokens_per_claim": round(total_tokens / merged) if merged else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
# By stage with billing type (full window)
|
||||||
|
by_stage = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT stage,
|
||||||
|
SUM(cost_usd) as api_cost,
|
||||||
|
SUM(cost_estimate_usd) as estimated_cost,
|
||||||
|
SUM(input_tokens) as input_tokens,
|
||||||
|
SUM(output_tokens) as output_tokens,
|
||||||
|
SUM(calls) as calls
|
||||||
|
FROM costs
|
||||||
|
WHERE date > date('now', ? || ' days')
|
||||||
|
GROUP BY stage
|
||||||
|
ORDER BY SUM(input_tokens + output_tokens) DESC
|
||||||
|
""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
stage_data = []
|
||||||
|
total_api_cost = 0
|
||||||
|
total_estimated_cost = 0
|
||||||
|
total_input = 0
|
||||||
|
total_output = 0
|
||||||
|
subscription_tokens = 0
|
||||||
|
api_tokens = 0
|
||||||
|
for r in by_stage:
|
||||||
|
cost = r["api_cost"] or 0
|
||||||
|
est = r["estimated_cost"] or 0
|
||||||
|
inp = r["input_tokens"] or 0
|
||||||
|
out = r["output_tokens"] or 0
|
||||||
|
calls = r["calls"] or 0
|
||||||
|
stage_name = r["stage"]
|
||||||
|
# :max suffix = subscription, :openrouter suffix = API
|
||||||
|
billing = "subscription" if ":max" in stage_name else "api"
|
||||||
|
total_api_cost += cost
|
||||||
|
total_estimated_cost += est
|
||||||
|
total_input += inp
|
||||||
|
total_output += out
|
||||||
|
if billing == "subscription":
|
||||||
|
subscription_tokens += inp + out
|
||||||
|
else:
|
||||||
|
api_tokens += inp + out
|
||||||
|
stage_data.append({
|
||||||
|
"stage": stage_name,
|
||||||
|
"api_cost": round(cost, 4),
|
||||||
|
"estimated_cost": round(est, 4),
|
||||||
|
"input_tokens": inp,
|
||||||
|
"output_tokens": out,
|
||||||
|
"calls": calls,
|
||||||
|
"billing": billing,
|
||||||
|
})
|
||||||
|
|
||||||
|
# System totals
|
||||||
|
sys_merged = conn.execute(
|
||||||
|
"SELECT COUNT(*) as n FROM prs WHERE status='merged' AND merged_at > datetime('now', ? || ' days')",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchone()["n"] or 0
|
||||||
|
|
||||||
|
total_tokens = total_input + total_output
|
||||||
|
|
||||||
|
return {
|
||||||
|
"days": days,
|
||||||
|
"daily": daily_data,
|
||||||
|
"by_stage": stage_data,
|
||||||
|
"system": {
|
||||||
|
"actual_spend": round(total_api_cost, 4),
|
||||||
|
"estimated_cost": round(total_estimated_cost, 4),
|
||||||
|
"merged": sys_merged,
|
||||||
|
"cost_per_claim": round(total_estimated_cost / sys_merged, 4) if sys_merged else None,
|
||||||
|
"total_tokens": total_tokens,
|
||||||
|
"tokens_per_claim": round(total_tokens / sys_merged) if sys_merged else None,
|
||||||
|
"subscription_tokens": subscription_tokens,
|
||||||
|
"api_tokens": api_tokens,
|
||||||
|
"note": "estimated_cost = API-rate equivalent for all calls (unified metric). actual_spend = real dollars charged to OpenRouter.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def fix_success_by_tag(conn: sqlite3.Connection, days: int = 30) -> dict:
|
||||||
|
"""Fix success rate broken down by rejection reason.
|
||||||
|
|
||||||
|
For each rejection tag: how many PRs got that rejection, how many eventually
|
||||||
|
merged (successful fix), how many are still open (in progress), how many
|
||||||
|
were abandoned (closed/zombie without merge).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"tag": "insufficient_evidence",
|
||||||
|
"total": 50,
|
||||||
|
"fixed": 10,
|
||||||
|
"in_progress": 5,
|
||||||
|
"terminal": 35,
|
||||||
|
"fix_rate": 0.2,
|
||||||
|
"terminal_rate": 0.7
|
||||||
|
}, ...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
# Get all rejection events with their tags and PR numbers
|
||||||
|
# Then join with prs table to see final outcome
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT value as tag,
|
||||||
|
json_extract(al.detail, '$.pr') as pr_number
|
||||||
|
FROM audit_log al, json_each(json_extract(al.detail, '$.issues'))
|
||||||
|
WHERE al.stage = 'evaluate'
|
||||||
|
AND al.event IN ('changes_requested', 'domain_rejected', 'tier05_rejected')
|
||||||
|
AND al.timestamp > datetime('now', ? || ' days')
|
||||||
|
""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Collect unique PRs per tag
|
||||||
|
tag_prs: dict[str, set] = {}
|
||||||
|
for r in rows:
|
||||||
|
tag = r["tag"]
|
||||||
|
pr = r["pr_number"]
|
||||||
|
if tag not in tag_prs:
|
||||||
|
tag_prs[tag] = set()
|
||||||
|
if pr is not None:
|
||||||
|
tag_prs[tag].add(pr)
|
||||||
|
|
||||||
|
if not tag_prs:
|
||||||
|
return {"days": days, "tags": []}
|
||||||
|
|
||||||
|
# Get status for all referenced PRs in one query
|
||||||
|
all_prs = set()
|
||||||
|
for prs in tag_prs.values():
|
||||||
|
all_prs.update(prs)
|
||||||
|
|
||||||
|
if not all_prs:
|
||||||
|
return {"days": days, "tags": []}
|
||||||
|
|
||||||
|
placeholders = ",".join("?" for _ in all_prs)
|
||||||
|
pr_statuses = conn.execute(
|
||||||
|
f"SELECT number, status FROM prs WHERE number IN ({placeholders})",
|
||||||
|
list(all_prs),
|
||||||
|
).fetchall()
|
||||||
|
status_map = {r["number"]: r["status"] for r in pr_statuses}
|
||||||
|
|
||||||
|
# Compute per-tag outcomes
|
||||||
|
tag_data = []
|
||||||
|
for tag, prs in sorted(tag_prs.items(), key=lambda x: -len(x[1])):
|
||||||
|
fixed = 0
|
||||||
|
in_progress = 0
|
||||||
|
terminal = 0
|
||||||
|
for pr in prs:
|
||||||
|
st = status_map.get(pr, "unknown")
|
||||||
|
if st == "merged":
|
||||||
|
fixed += 1
|
||||||
|
elif st in ("open", "validating", "reviewing", "merging"):
|
||||||
|
in_progress += 1
|
||||||
|
else:
|
||||||
|
# closed, zombie, conflict, unknown
|
||||||
|
terminal += 1
|
||||||
|
|
||||||
|
total = len(prs)
|
||||||
|
# Fix rate excludes in-progress (only counts resolved PRs)
|
||||||
|
resolved = fixed + terminal
|
||||||
|
tag_data.append({
|
||||||
|
"tag": tag,
|
||||||
|
"total": total,
|
||||||
|
"fixed": fixed,
|
||||||
|
"in_progress": in_progress,
|
||||||
|
"terminal": terminal,
|
||||||
|
"fix_rate": round(fixed / resolved, 3) if resolved else None,
|
||||||
|
"terminal_rate": round(terminal / resolved, 3) if resolved else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {"days": days, "tags": tag_data}
|
||||||
|
|
||||||
|
|
||||||
|
def compute_profile(conn: "sqlite3.Connection", days: int = 30) -> dict:
|
||||||
|
"""Compute profile — Max subscription telemetry alongside API usage.
|
||||||
|
|
||||||
|
Surfaces: cache hit rates, latency, cost estimates (API-equivalent),
|
||||||
|
token breakdown by billing type.
|
||||||
|
"""
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT stage, model,
|
||||||
|
SUM(calls) as calls,
|
||||||
|
SUM(input_tokens) as input_tokens,
|
||||||
|
SUM(output_tokens) as output_tokens,
|
||||||
|
SUM(cost_usd) as api_cost,
|
||||||
|
SUM(duration_ms) as duration_ms,
|
||||||
|
SUM(cache_read_tokens) as cache_read_tokens,
|
||||||
|
SUM(cache_write_tokens) as cache_write_tokens,
|
||||||
|
SUM(cost_estimate_usd) as cost_estimate_usd
|
||||||
|
FROM costs
|
||||||
|
WHERE date > date('now', ? || ' days')
|
||||||
|
GROUP BY stage, model
|
||||||
|
ORDER BY SUM(input_tokens + output_tokens) DESC
|
||||||
|
""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
stage_data = []
|
||||||
|
total_calls = 0
|
||||||
|
total_tokens = 0
|
||||||
|
total_duration = 0
|
||||||
|
total_cache_read = 0
|
||||||
|
total_cache_write = 0
|
||||||
|
api_calls = 0
|
||||||
|
sub_calls = 0
|
||||||
|
api_spend = 0.0
|
||||||
|
sub_estimate = 0.0
|
||||||
|
sub_input_tokens = 0
|
||||||
|
|
||||||
|
for r in rows:
|
||||||
|
calls = r["calls"] or 0
|
||||||
|
inp = r["input_tokens"] or 0
|
||||||
|
out = r["output_tokens"] or 0
|
||||||
|
dur = r["duration_ms"] or 0
|
||||||
|
cr = r["cache_read_tokens"] or 0
|
||||||
|
cw = r["cache_write_tokens"] or 0
|
||||||
|
cost = r["api_cost"] or 0
|
||||||
|
est = r["cost_estimate_usd"] or 0
|
||||||
|
stage_name = r["stage"]
|
||||||
|
billing = "subscription" if ":max" in stage_name else "api"
|
||||||
|
|
||||||
|
total_calls += calls
|
||||||
|
total_tokens += inp + out
|
||||||
|
total_duration += dur
|
||||||
|
total_cache_read += cr
|
||||||
|
total_cache_write += cw
|
||||||
|
|
||||||
|
if billing == "subscription":
|
||||||
|
sub_calls += calls
|
||||||
|
sub_estimate += est
|
||||||
|
sub_input_tokens += inp
|
||||||
|
else:
|
||||||
|
api_calls += calls
|
||||||
|
api_spend += cost
|
||||||
|
|
||||||
|
stage_data.append({
|
||||||
|
"stage": stage_name,
|
||||||
|
"model": r["model"],
|
||||||
|
"calls": calls,
|
||||||
|
"input_tokens": inp,
|
||||||
|
"output_tokens": out,
|
||||||
|
"total_tokens": inp + out,
|
||||||
|
"duration_ms": dur,
|
||||||
|
"avg_latency_ms": round(dur / calls) if calls else 0,
|
||||||
|
"cache_read_tokens": cr,
|
||||||
|
"cache_write_tokens": cw,
|
||||||
|
"cache_hit_rate": round(cr / (cr + inp), 3) if (cr + inp) else 0,
|
||||||
|
"api_cost": round(cost, 4),
|
||||||
|
"cost_estimate_usd": round(est, 4),
|
||||||
|
"billing": billing,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Cache summary (only meaningful for subscription/Max calls)
|
||||||
|
total_cacheable = total_cache_read + total_cache_write + sub_input_tokens
|
||||||
|
cache_hit_rate = round(total_cache_read / total_cacheable, 3) if total_cacheable else 0
|
||||||
|
|
||||||
|
return {
|
||||||
|
"days": days,
|
||||||
|
"by_stage": stage_data,
|
||||||
|
"cache": {
|
||||||
|
"read_tokens": total_cache_read,
|
||||||
|
"write_tokens": total_cache_write,
|
||||||
|
"hit_rate": cache_hit_rate,
|
||||||
|
"note": "Cache hits are prompt tokens served from cache (cheaper/faster)",
|
||||||
|
},
|
||||||
|
"latency": {
|
||||||
|
"total_ms": total_duration,
|
||||||
|
"avg_ms_per_call": round(total_duration / total_calls) if total_calls else 0,
|
||||||
|
"note": "Wall-clock time including network. Only populated for Claude Max calls.",
|
||||||
|
},
|
||||||
|
"subscription_estimate": {
|
||||||
|
"total_cost_usd": round(sub_estimate, 4),
|
||||||
|
"note": "What subscription calls would cost at API rates. Actual cost: $0 (flat-rate Max plan).",
|
||||||
|
},
|
||||||
|
"system": {
|
||||||
|
"total_calls": total_calls,
|
||||||
|
"total_tokens": total_tokens,
|
||||||
|
"api_calls": api_calls,
|
||||||
|
"subscription_calls": sub_calls,
|
||||||
|
"api_spend": round(api_spend, 4),
|
||||||
|
"subscription_estimate": round(sub_estimate, 4),
|
||||||
|
"cache_hit_rate": cache_hit_rate,
|
||||||
|
},
|
||||||
|
}
|
||||||
57
ops/diagnostics/tier1_routes.py
Normal file
57
ops/diagnostics/tier1_routes.py
Normal file
|
|
@ -0,0 +1,57 @@
|
||||||
|
"""Tier 1 Metrics — API routes for Argus dashboard.
|
||||||
|
|
||||||
|
Four endpoints:
|
||||||
|
GET /api/yield — extraction yield per agent per day
|
||||||
|
GET /api/cost-per-claim — cost per merged claim per day + stage breakdown
|
||||||
|
GET /api/fix-rates — fix success rate by rejection tag
|
||||||
|
GET /api/compute-profile — full compute telemetry (cache, latency, cost estimates)
|
||||||
|
|
||||||
|
All accept ?days=N (default 30) to control lookback window.
|
||||||
|
|
||||||
|
Owner: Argus <69AF7290-758F-464B-B472-04AFCA4AB340>
|
||||||
|
"""
|
||||||
|
|
||||||
|
from aiohttp import web
|
||||||
|
|
||||||
|
from tier1_metrics import cost_per_merged_claim, compute_profile, extraction_yield, fix_success_by_tag
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_days(request, default=30):
|
||||||
|
"""Parse and clamp ?days= parameter. Returns 1..365."""
|
||||||
|
try:
|
||||||
|
days = int(request.query.get("days", str(default)))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
days = default
|
||||||
|
return max(1, min(days, 365))
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_yield(request):
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = _parse_days(request)
|
||||||
|
return web.json_response(extraction_yield(conn, days))
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_cost_per_claim(request):
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = _parse_days(request)
|
||||||
|
return web.json_response(cost_per_merged_claim(conn, days))
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_fix_rates(request):
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = _parse_days(request)
|
||||||
|
return web.json_response(fix_success_by_tag(conn, days))
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_compute_profile(request):
|
||||||
|
conn = request.app["_get_conn"]()
|
||||||
|
days = _parse_days(request)
|
||||||
|
return web.json_response(compute_profile(conn, days))
|
||||||
|
|
||||||
|
|
||||||
|
def register_tier1_routes(app: web.Application, get_conn):
|
||||||
|
app["_get_conn"] = get_conn
|
||||||
|
app.router.add_get("/api/yield", handle_yield)
|
||||||
|
app.router.add_get("/api/cost-per-claim", handle_cost_per_claim)
|
||||||
|
app.router.add_get("/api/fix-rates", handle_fix_rates)
|
||||||
|
app.router.add_get("/api/compute-profile", handle_compute_profile)
|
||||||
283
ops/pipeline-v2/batch-extract-50.sh
Executable file
283
ops/pipeline-v2/batch-extract-50.sh
Executable file
|
|
@ -0,0 +1,283 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Batch extract sources from inbox/queue/ — v3 with two-gate skip logic
|
||||||
|
#
|
||||||
|
# Uses separate extract/ worktree (not main/ — prevents daemon race condition).
|
||||||
|
# Skip logic uses two checks instead of local marker files (Ganymede v3 review):
|
||||||
|
# Gate 1: Is source already in archive/{domain}/? → already processed, dedup
|
||||||
|
# Gate 2: Does extraction branch exist on Forgejo? → extraction in progress
|
||||||
|
# Gate 3: Does pipeline.db show ≥3 closed PRs for this source? → zombie, skip
|
||||||
|
# Gate 4: Does pipeline.db show active OR recently closed PR? → skip (4h cooldown)
|
||||||
|
# All gates pass → extract
|
||||||
|
#
|
||||||
|
# Architecture: Ganymede (two-gate) + Rhea (separate worktrees)
|
||||||
|
|
||||||
|
REPO=/opt/teleo-eval/workspaces/extract
|
||||||
|
MAIN_REPO=/opt/teleo-eval/workspaces/main
|
||||||
|
EXTRACT=/opt/teleo-eval/openrouter-extract-v2.py
|
||||||
|
CLEANUP=/opt/teleo-eval/post-extract-cleanup.py
|
||||||
|
LOG=/opt/teleo-eval/logs/batch-extract-50.log
|
||||||
|
DB=/opt/teleo-eval/pipeline/pipeline.db
|
||||||
|
TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-leo-token)
|
||||||
|
FORGEJO_URL="http://localhost:3000"
|
||||||
|
MAX=50
|
||||||
|
MAX_CLOSED=3 # zombie retry limit: skip source after this many closed PRs
|
||||||
|
COUNT=0
|
||||||
|
SUCCESS=0
|
||||||
|
FAILED=0
|
||||||
|
SKIPPED=0
|
||||||
|
|
||||||
|
# Lockfile to prevent concurrent runs
|
||||||
|
LOCKFILE="/tmp/batch-extract.lock"
|
||||||
|
if [ -f "$LOCKFILE" ]; then
|
||||||
|
pid=$(cat "$LOCKFILE" 2>/dev/null)
|
||||||
|
if kill -0 "$pid" 2>/dev/null; then
|
||||||
|
echo "[$(date)] SKIP: batch extract already running (pid $pid)" >> $LOG
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
rm -f "$LOCKFILE"
|
||||||
|
fi
|
||||||
|
echo $$ > "$LOCKFILE"
|
||||||
|
trap 'rm -f "$LOCKFILE"' EXIT
|
||||||
|
|
||||||
|
echo "[$(date)] Starting batch extraction of $MAX sources" >> $LOG
|
||||||
|
|
||||||
|
cd $REPO || exit 1
|
||||||
|
|
||||||
|
# Bug fix: don't swallow errors on critical git commands (Ganymede review)
|
||||||
|
git fetch origin main >> $LOG 2>&1 || { echo "[$(date)] FATAL: fetch origin main failed" >> $LOG; exit 1; }
|
||||||
|
git checkout -f main >> $LOG 2>&1 || { echo "[$(date)] FATAL: checkout main failed" >> $LOG; exit 1; }
|
||||||
|
git reset --hard origin/main >> $LOG 2>&1 || { echo "[$(date)] FATAL: reset --hard failed" >> $LOG; exit 1; }
|
||||||
|
|
||||||
|
# SHA canary: verify extract worktree matches origin/main (Ganymede review)
|
||||||
|
LOCAL_SHA=$(git rev-parse HEAD)
|
||||||
|
REMOTE_SHA=$(git rev-parse origin/main)
|
||||||
|
if [ "$LOCAL_SHA" != "$REMOTE_SHA" ]; then
|
||||||
|
echo "[$(date)] FATAL: extract worktree diverged from main ($LOCAL_SHA vs $REMOTE_SHA)" >> $LOG
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Pre-extraction cleanup: remove queue files that already exist in archive
|
||||||
|
# This runs on the MAIN worktree (not extract/) so deletions are committed to git.
|
||||||
|
# Prevents the "queue duplicate reappears after reset --hard" problem.
|
||||||
|
CLEANED=0
|
||||||
|
for qfile in $MAIN_REPO/inbox/queue/*.md; do
|
||||||
|
[ -f "$qfile" ] || continue
|
||||||
|
qbase=$(basename "$qfile")
|
||||||
|
if find "$MAIN_REPO/inbox/archive" -name "$qbase" 2>/dev/null | grep -q .; then
|
||||||
|
rm -f "$qfile"
|
||||||
|
CLEANED=$((CLEANED + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ "$CLEANED" -gt 0 ]; then
|
||||||
|
echo "[$(date)] Cleaned $CLEANED stale queue duplicates" >> $LOG
|
||||||
|
cd $MAIN_REPO
|
||||||
|
git add -A inbox/queue/ 2>/dev/null
|
||||||
|
git commit -m "pipeline: clean $CLEANED stale queue duplicates
|
||||||
|
|
||||||
|
Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>" 2>/dev/null
|
||||||
|
# Push with retry
|
||||||
|
for attempt in 1 2 3; do
|
||||||
|
git pull --rebase origin main 2>/dev/null
|
||||||
|
git push origin main 2>/dev/null && break
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
cd $REPO
|
||||||
|
git fetch origin main 2>/dev/null
|
||||||
|
git reset --hard origin/main 2>/dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get sources in queue
|
||||||
|
SOURCES=$(ls inbox/queue/*.md 2>/dev/null | head -$MAX)
|
||||||
|
|
||||||
|
# Batch fetch all remote branches once (Ganymede: 1 call instead of 84)
|
||||||
|
REMOTE_BRANCHES=$(git ls-remote --heads origin 2>/dev/null)
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "[$(date)] ABORT: git ls-remote failed — remote unreachable, skipping cycle" >> $LOG
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
for SOURCE in $SOURCES; do
|
||||||
|
COUNT=$((COUNT + 1))
|
||||||
|
BASENAME=$(basename "$SOURCE" .md)
|
||||||
|
BRANCH="extract/$BASENAME"
|
||||||
|
|
||||||
|
# Skip conversation archives — valuable content enters through standalone sources,
|
||||||
|
# inline tags (SOURCE:/CLAIM:), and transcript review. Raw conversations produce
|
||||||
|
# low-quality claims with schema failures. (Epimetheus session 4)
|
||||||
|
if grep -q "^format: conversation" "$SOURCE" 2>/dev/null; then
|
||||||
|
# Move to archive instead of leaving in queue (prevents re-processing)
|
||||||
|
mv "$SOURCE" "$MAIN_REPO/inbox/archive/telegram/" 2>/dev/null
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] ARCHIVE $BASENAME (conversation — skipped extraction)" >> $LOG
|
||||||
|
SKIPPED=$((SKIPPED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Gate 1: Already in archive? Source was already processed — dedup (Ganymede)
|
||||||
|
if find "$MAIN_REPO/inbox/archive" -name "$BASENAME.md" 2>/dev/null | grep -q .; then
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (already in archive)" >> $LOG
|
||||||
|
# Delete the queue duplicate
|
||||||
|
rm -f "$MAIN_REPO/inbox/queue/$BASENAME.md" 2>/dev/null
|
||||||
|
SKIPPED=$((SKIPPED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Gate 2: Branch exists on Forgejo? Extraction already in progress (cached lookup)
|
||||||
|
# Enhancement: 2-hour staleness check (Ganymede review) — if branch is >2h old
|
||||||
|
# and PR is unmergeable, close PR + delete branch and re-extract
|
||||||
|
if echo "$REMOTE_BRANCHES" | grep -q "refs/heads/$BRANCH$"; then
|
||||||
|
# Check branch age
|
||||||
|
BRANCH_SHA=$(echo "$REMOTE_BRANCHES" | grep "refs/heads/$BRANCH$" | awk '{print $1}')
|
||||||
|
BRANCH_AGE_EPOCH=$(git log -1 --format='%ct' "$BRANCH_SHA" 2>/dev/null || echo 0)
|
||||||
|
NOW_EPOCH=$(date +%s)
|
||||||
|
AGE_HOURS=$(( (NOW_EPOCH - BRANCH_AGE_EPOCH) / 3600 ))
|
||||||
|
|
||||||
|
if [ "$AGE_HOURS" -ge 2 ]; then
|
||||||
|
# Branch is stale — check if PR is mergeable
|
||||||
|
# Note: Forgejo head= filter is unreliable. Fetch all open PRs and filter locally.
|
||||||
|
PR_NUM=$(curl -sf "$FORGEJO_URL/api/v1/repos/teleo/teleo-codex/pulls?state=open&limit=50" \
|
||||||
|
-H "Authorization: token $TOKEN" | python3 -c "
|
||||||
|
import sys,json
|
||||||
|
prs=json.load(sys.stdin)
|
||||||
|
branch='$BRANCH'
|
||||||
|
matches=[p for p in prs if p['head']['ref']==branch]
|
||||||
|
print(matches[0]['number'] if matches else '')
|
||||||
|
" 2>/dev/null)
|
||||||
|
if [ -n "$PR_NUM" ]; then
|
||||||
|
PR_MERGEABLE=$(curl -sf "$FORGEJO_URL/api/v1/repos/teleo/teleo-codex/pulls/$PR_NUM" \
|
||||||
|
-H "Authorization: token $TOKEN" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("mergeable","true"))' 2>/dev/null)
|
||||||
|
if [ "$PR_MERGEABLE" = "False" ] || [ "$PR_MERGEABLE" = "false" ]; then
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] STALE: $BASENAME (${AGE_HOURS}h old, unmergeable PR #$PR_NUM) — closing + re-extracting" >> $LOG
|
||||||
|
# Close PR with audit comment
|
||||||
|
curl -sf -X POST "$FORGEJO_URL/api/v1/repos/teleo/teleo-codex/issues/$PR_NUM/comments" \
|
||||||
|
-H "Authorization: token $TOKEN" -H "Content-Type: application/json" \
|
||||||
|
-d '{"body":"Auto-closed: extraction branch stale >2h, conflict unresolvable. Source will be re-extracted from current main."}' > /dev/null 2>&1
|
||||||
|
curl -sf -X PATCH "$FORGEJO_URL/api/v1/repos/teleo/teleo-codex/pulls/$PR_NUM" \
|
||||||
|
-H "Authorization: token $TOKEN" -H "Content-Type: application/json" \
|
||||||
|
-d '{"state":"closed"}' > /dev/null 2>&1
|
||||||
|
# Delete remote branch
|
||||||
|
git push origin --delete "$BRANCH" 2>/dev/null
|
||||||
|
# Fall through to extraction below
|
||||||
|
else
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (branch exists ${AGE_HOURS}h, PR #$PR_NUM mergeable — waiting)" >> $LOG
|
||||||
|
SKIPPED=$((SKIPPED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# No PR found but branch exists — orphan branch, clean up
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] STALE: $BASENAME (orphan branch ${AGE_HOURS}h, no PR) — deleting" >> $LOG
|
||||||
|
git push origin --delete "$BRANCH" 2>/dev/null
|
||||||
|
# Fall through to extraction
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (branch exists — in progress, ${AGE_HOURS}h old)" >> $LOG
|
||||||
|
SKIPPED=$((SKIPPED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Gate 3: Check pipeline.db for zombie sources — too many closed PRs means
|
||||||
|
# the source keeps failing eval. Skip after MAX_CLOSED rejections. (Epimetheus)
|
||||||
|
if [ -f "$DB" ]; then
|
||||||
|
CLOSED_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM prs WHERE branch = 'extract/$BASENAME' AND status = 'closed'" 2>/dev/null || echo 0)
|
||||||
|
if [ "$CLOSED_COUNT" -ge "$MAX_CLOSED" ]; then
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (zombie: $CLOSED_COUNT closed PRs >= $MAX_CLOSED limit)" >> $LOG
|
||||||
|
SKIPPED=$((SKIPPED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Gate 4: Check pipeline.db for active or recently closed PRs — prevents
|
||||||
|
# re-extraction waste when eval closes a PR and batch-extract runs again
|
||||||
|
# before the source is manually reviewed. 4h cooldown after closure.
|
||||||
|
if [ -f "$DB" ]; then
|
||||||
|
ACTIVE_COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM prs WHERE branch = 'extract/$BASENAME' AND status IN ('extracting','approved','merging')" 2>/dev/null || echo 0)
|
||||||
|
if [ "$ACTIVE_COUNT" -ge 1 ]; then
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (active PR exists)" >> $LOG
|
||||||
|
SKIPPED=$((SKIPPED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
RECENT_CLOSED=$(sqlite3 "$DB" "SELECT COUNT(*) FROM prs WHERE branch = 'extract/$BASENAME' AND status = 'closed' AND created_at > datetime('now', '-4 hours')" 2>/dev/null || echo 0)
|
||||||
|
if [ "$RECENT_CLOSED" -ge 1 ]; then
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (recently closed PR — 4h cooldown)" >> $LOG
|
||||||
|
SKIPPED=$((SKIPPED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[$(date)] [$COUNT/$MAX] Processing $BASENAME" >> $LOG
|
||||||
|
|
||||||
|
# Reset to main (log errors — don't swallow)
|
||||||
|
git checkout -f main >> $LOG 2>&1 || { echo " -> SKIP (checkout main failed)" >> $LOG; SKIPPED=$((SKIPPED + 1)); continue; }
|
||||||
|
git fetch origin main >> $LOG 2>&1
|
||||||
|
git reset --hard origin/main >> $LOG 2>&1 || { echo " -> SKIP (reset failed)" >> $LOG; SKIPPED=$((SKIPPED + 1)); continue; }
|
||||||
|
|
||||||
|
# Clean stale remote branch (Leo's catch — prevents checkout conflicts)
|
||||||
|
git push origin --delete "$BRANCH" 2>/dev/null
|
||||||
|
|
||||||
|
# Create fresh branch
|
||||||
|
git branch -D "$BRANCH" 2>/dev/null
|
||||||
|
git checkout -b "$BRANCH" 2>/dev/null
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo " -> SKIP (branch creation failed)" >> $LOG
|
||||||
|
SKIPPED=$((SKIPPED + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run extraction
|
||||||
|
python3 $EXTRACT "$SOURCE" --no-review >> $LOG 2>&1
|
||||||
|
EXTRACT_RC=$?
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if [ $EXTRACT_RC -ne 0 ]; then
|
||||||
|
FAILED=$((FAILED + 1))
|
||||||
|
echo " -> FAILED (extract rc=$EXTRACT_RC)" >> $LOG
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Post-extraction cleanup
|
||||||
|
python3 $CLEANUP $REPO >> $LOG 2>&1
|
||||||
|
|
||||||
|
# Check if any files were created/modified
|
||||||
|
CHANGED=$(git status --porcelain | wc -l | tr -d " ")
|
||||||
|
if [ "$CHANGED" -eq 0 ]; then
|
||||||
|
echo " -> No changes (enrichment/null-result only)" >> $LOG
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Commit
|
||||||
|
git add -A
|
||||||
|
git commit -m "extract: $BASENAME
|
||||||
|
|
||||||
|
Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>" >> $LOG 2>&1
|
||||||
|
|
||||||
|
# Push
|
||||||
|
git push "http://leo:${TOKEN}@localhost:3000/teleo/teleo-codex.git" "$BRANCH" --force >> $LOG 2>&1
|
||||||
|
|
||||||
|
# Create PR (include prior art sidecar if available)
|
||||||
|
PRIOR_ART_FILE="${SOURCE}.prior-art"
|
||||||
|
PR_BODY=""
|
||||||
|
if [ -f "$PRIOR_ART_FILE" ]; then
|
||||||
|
# Escape JSON special chars in prior art content
|
||||||
|
PR_BODY=$(cat "$PRIOR_ART_FILE" | python3 -c 'import sys,json; print(json.dumps(sys.stdin.read()))')
|
||||||
|
PR_BODY=${PR_BODY:1:-1} # Strip outer quotes from json.dumps
|
||||||
|
fi
|
||||||
|
curl -sf -X POST "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls" \
|
||||||
|
-H "Authorization: token $TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"title\":\"extract: $BASENAME\",\"head\":\"$BRANCH\",\"base\":\"main\",\"body\":\"$PR_BODY\"}" >> /dev/null 2>&1
|
||||||
|
|
||||||
|
SUCCESS=$((SUCCESS + 1))
|
||||||
|
echo " -> SUCCESS ($CHANGED files)" >> $LOG
|
||||||
|
|
||||||
|
# Back to main
|
||||||
|
git checkout -f main >> $LOG 2>&1
|
||||||
|
|
||||||
|
# Rate limit
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[$(date)] Batch complete: $SUCCESS success, $FAILED failed, $SKIPPED skipped (already attempted)" >> $LOG
|
||||||
|
|
||||||
|
git checkout -f main >> $LOG 2>&1
|
||||||
|
git reset --hard origin/main >> $LOG 2>&1
|
||||||
0
ops/pipeline-v2/lib/__init__.py
Normal file
0
ops/pipeline-v2/lib/__init__.py
Normal file
210
ops/pipeline-v2/lib/analytics.py
Normal file
210
ops/pipeline-v2/lib/analytics.py
Normal file
|
|
@ -0,0 +1,210 @@
|
||||||
|
"""Analytics module — time-series metrics snapshots + chart data endpoints.
|
||||||
|
|
||||||
|
Records pipeline metrics every 15 minutes. Serves historical data for
|
||||||
|
Chart.js dashboard. Tracks source origin (agent/human/scraper) for
|
||||||
|
pipeline funnel visualization.
|
||||||
|
|
||||||
|
Priority 1 from Cory via Ganymede.
|
||||||
|
Epimetheus owns this module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from . import config, db
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.analytics")
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Snapshot recording ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def record_snapshot(conn) -> dict:
|
||||||
|
"""Record a metrics snapshot. Called every 15 minutes by the pipeline daemon.
|
||||||
|
|
||||||
|
Returns the snapshot dict for logging/debugging.
|
||||||
|
"""
|
||||||
|
# Throughput (last hour)
|
||||||
|
throughput = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as n FROM audit_log
|
||||||
|
WHERE timestamp > datetime('now', '-1 hour')
|
||||||
|
AND event IN ('approved', 'changes_requested', 'merged')"""
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
# PR status counts
|
||||||
|
statuses = conn.execute("SELECT status, COUNT(*) as n FROM prs GROUP BY status").fetchall()
|
||||||
|
status_map = {r["status"]: r["n"] for r in statuses}
|
||||||
|
|
||||||
|
# Approval rate (24h)
|
||||||
|
verdicts = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as total,
|
||||||
|
SUM(CASE WHEN status IN ('merged', 'approved') THEN 1 ELSE 0 END) as passed
|
||||||
|
FROM prs WHERE last_attempt > datetime('now', '-24 hours')"""
|
||||||
|
).fetchone()
|
||||||
|
total = verdicts["total"] or 0
|
||||||
|
passed = verdicts["passed"] or 0
|
||||||
|
approval_rate = round(passed / total, 3) if total > 0 else None
|
||||||
|
|
||||||
|
# Evaluated in 24h
|
||||||
|
evaluated = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as n FROM prs
|
||||||
|
WHERE last_attempt > datetime('now', '-24 hours')
|
||||||
|
AND domain_verdict != 'pending'"""
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
# Fix success rate
|
||||||
|
fix_stats = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as attempted,
|
||||||
|
SUM(CASE WHEN status IN ('merged', 'approved') THEN 1 ELSE 0 END) as succeeded
|
||||||
|
FROM prs WHERE fix_attempts > 0"""
|
||||||
|
).fetchone()
|
||||||
|
fix_rate = round((fix_stats["succeeded"] or 0) / fix_stats["attempted"], 3) if fix_stats["attempted"] else None
|
||||||
|
|
||||||
|
# Rejection reasons (24h)
|
||||||
|
issue_rows = conn.execute(
|
||||||
|
"""SELECT eval_issues FROM prs
|
||||||
|
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
|
||||||
|
AND last_attempt > datetime('now', '-24 hours')"""
|
||||||
|
).fetchall()
|
||||||
|
tag_counts = {}
|
||||||
|
for row in issue_rows:
|
||||||
|
try:
|
||||||
|
tags = json.loads(row["eval_issues"])
|
||||||
|
for tag in tags:
|
||||||
|
if isinstance(tag, str):
|
||||||
|
tag_counts[tag] = tag_counts.get(tag, 0) + 1
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Source origin counts (24h) — agent vs human vs scraper
|
||||||
|
source_origins = _count_source_origins(conn)
|
||||||
|
|
||||||
|
snapshot = {
|
||||||
|
"throughput_1h": throughput["n"] if throughput else 0,
|
||||||
|
"approval_rate": approval_rate,
|
||||||
|
"open_prs": status_map.get("open", 0),
|
||||||
|
"merged_total": status_map.get("merged", 0),
|
||||||
|
"closed_total": status_map.get("closed", 0),
|
||||||
|
"conflict_total": status_map.get("conflict", 0),
|
||||||
|
"evaluated_24h": evaluated["n"] if evaluated else 0,
|
||||||
|
"fix_success_rate": fix_rate,
|
||||||
|
"rejection_broken_wiki_links": tag_counts.get("broken_wiki_links", 0),
|
||||||
|
"rejection_frontmatter_schema": tag_counts.get("frontmatter_schema", 0),
|
||||||
|
"rejection_near_duplicate": tag_counts.get("near_duplicate", 0),
|
||||||
|
"rejection_confidence": tag_counts.get("confidence_miscalibration", 0),
|
||||||
|
"rejection_other": sum(v for k, v in tag_counts.items()
|
||||||
|
if k not in ("broken_wiki_links", "frontmatter_schema",
|
||||||
|
"near_duplicate", "confidence_miscalibration")),
|
||||||
|
"extraction_model": config.EXTRACT_MODEL,
|
||||||
|
"eval_domain_model": config.EVAL_DOMAIN_MODEL,
|
||||||
|
"eval_leo_model": config.EVAL_LEO_STANDARD_MODEL,
|
||||||
|
"prompt_version": config.PROMPT_VERSION,
|
||||||
|
"pipeline_version": config.PIPELINE_VERSION,
|
||||||
|
"source_origin_agent": source_origins.get("agent", 0),
|
||||||
|
"source_origin_human": source_origins.get("human", 0),
|
||||||
|
"source_origin_scraper": source_origins.get("scraper", 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Write to DB
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO metrics_snapshots (
|
||||||
|
throughput_1h, approval_rate, open_prs, merged_total, closed_total,
|
||||||
|
conflict_total, evaluated_24h, fix_success_rate,
|
||||||
|
rejection_broken_wiki_links, rejection_frontmatter_schema,
|
||||||
|
rejection_near_duplicate, rejection_confidence, rejection_other,
|
||||||
|
extraction_model, eval_domain_model, eval_leo_model,
|
||||||
|
prompt_version, pipeline_version,
|
||||||
|
source_origin_agent, source_origin_human, source_origin_scraper
|
||||||
|
) VALUES (
|
||||||
|
:throughput_1h, :approval_rate, :open_prs, :merged_total, :closed_total,
|
||||||
|
:conflict_total, :evaluated_24h, :fix_success_rate,
|
||||||
|
:rejection_broken_wiki_links, :rejection_frontmatter_schema,
|
||||||
|
:rejection_near_duplicate, :rejection_confidence, :rejection_other,
|
||||||
|
:extraction_model, :eval_domain_model, :eval_leo_model,
|
||||||
|
:prompt_version, :pipeline_version,
|
||||||
|
:source_origin_agent, :source_origin_human, :source_origin_scraper
|
||||||
|
)""",
|
||||||
|
snapshot,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug("Recorded metrics snapshot: approval=%.1f%%, throughput=%d/h",
|
||||||
|
(approval_rate or 0) * 100, snapshot["throughput_1h"])
|
||||||
|
|
||||||
|
return snapshot
|
||||||
|
|
||||||
|
|
||||||
|
def _count_source_origins(conn) -> dict[str, int]:
|
||||||
|
"""Count source origins from recent PRs. Returns {agent: N, human: N, scraper: N}."""
|
||||||
|
counts = {"agent": 0, "human": 0, "scraper": 0}
|
||||||
|
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT origin, COUNT(*) as n FROM prs
|
||||||
|
WHERE created_at > datetime('now', '-24 hours')
|
||||||
|
GROUP BY origin"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
origin = row["origin"] or "pipeline"
|
||||||
|
if origin == "human":
|
||||||
|
counts["human"] += row["n"]
|
||||||
|
elif origin == "pipeline":
|
||||||
|
counts["agent"] += row["n"]
|
||||||
|
else:
|
||||||
|
counts["scraper"] += row["n"]
|
||||||
|
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Chart data endpoints ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def get_snapshot_history(conn, days: int = 7) -> list[dict]:
|
||||||
|
"""Get snapshot history for charting. Returns list of snapshot dicts."""
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT * FROM metrics_snapshots
|
||||||
|
WHERE ts > datetime('now', ? || ' days')
|
||||||
|
ORDER BY ts ASC""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
return [dict(row) for row in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def get_version_changes(conn, days: int = 30) -> list[dict]:
|
||||||
|
"""Get points where prompt_version or pipeline_version changed.
|
||||||
|
|
||||||
|
Used for chart annotations — vertical lines marking deployments.
|
||||||
|
"""
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT ts, prompt_version, pipeline_version
|
||||||
|
FROM metrics_snapshots
|
||||||
|
WHERE ts > datetime('now', ? || ' days')
|
||||||
|
ORDER BY ts ASC""",
|
||||||
|
(f"-{days}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
changes = []
|
||||||
|
prev_prompt = None
|
||||||
|
prev_pipeline = None
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
if row["prompt_version"] != prev_prompt and prev_prompt is not None:
|
||||||
|
changes.append({
|
||||||
|
"ts": row["ts"],
|
||||||
|
"type": "prompt",
|
||||||
|
"from": prev_prompt,
|
||||||
|
"to": row["prompt_version"],
|
||||||
|
})
|
||||||
|
if row["pipeline_version"] != prev_pipeline and prev_pipeline is not None:
|
||||||
|
changes.append({
|
||||||
|
"ts": row["ts"],
|
||||||
|
"type": "pipeline",
|
||||||
|
"from": prev_pipeline,
|
||||||
|
"to": row["pipeline_version"],
|
||||||
|
})
|
||||||
|
prev_prompt = row["prompt_version"]
|
||||||
|
prev_pipeline = row["pipeline_version"]
|
||||||
|
|
||||||
|
return changes
|
||||||
190
ops/pipeline-v2/lib/attribution.py
Normal file
190
ops/pipeline-v2/lib/attribution.py
Normal file
|
|
@ -0,0 +1,190 @@
|
||||||
|
"""Attribution module — shared between post_extract.py and merge.py.
|
||||||
|
|
||||||
|
Owns: parsing attribution from YAML frontmatter, validating role entries,
|
||||||
|
computing role counts for contributor upserts, building attribution blocks.
|
||||||
|
|
||||||
|
Avoids circular dependency between post_extract.py (validates attribution at
|
||||||
|
extraction time) and merge.py (records attribution at merge time). Both
|
||||||
|
import from this shared module.
|
||||||
|
|
||||||
|
Schema reference: schemas/attribution.md
|
||||||
|
Weights reference: schemas/contribution-weights.yaml
|
||||||
|
|
||||||
|
Epimetheus owns this module. Leo reviews changes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.attribution")
|
||||||
|
|
||||||
|
VALID_ROLES = frozenset({"sourcer", "extractor", "challenger", "synthesizer", "reviewer"})
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Parse attribution from claim content ──────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def parse_attribution(fm: dict) -> dict[str, list[dict]]:
|
||||||
|
"""Extract attribution block from claim frontmatter.
|
||||||
|
|
||||||
|
Returns {role: [{"handle": str, "agent_id": str|None, "context": str|None}]}
|
||||||
|
Handles both nested YAML format and flat field format.
|
||||||
|
"""
|
||||||
|
result = {role: [] for role in VALID_ROLES}
|
||||||
|
|
||||||
|
attribution = fm.get("attribution")
|
||||||
|
if isinstance(attribution, dict):
|
||||||
|
# Nested format (from schema spec)
|
||||||
|
for role in VALID_ROLES:
|
||||||
|
entries = attribution.get(role, [])
|
||||||
|
if isinstance(entries, list):
|
||||||
|
for entry in entries:
|
||||||
|
if isinstance(entry, dict) and "handle" in entry:
|
||||||
|
result[role].append({
|
||||||
|
"handle": entry["handle"].strip().lower().lstrip("@"),
|
||||||
|
"agent_id": entry.get("agent_id"),
|
||||||
|
"context": entry.get("context"),
|
||||||
|
})
|
||||||
|
elif isinstance(entry, str):
|
||||||
|
result[role].append({"handle": entry.strip().lower().lstrip("@"), "agent_id": None, "context": None})
|
||||||
|
elif isinstance(entries, str):
|
||||||
|
# Single entry as string
|
||||||
|
result[role].append({"handle": entries.strip().lower().lstrip("@"), "agent_id": None, "context": None})
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Flat format fallback (attribution_sourcer, attribution_extractor, etc.)
|
||||||
|
for role in VALID_ROLES:
|
||||||
|
flat_val = fm.get(f"attribution_{role}")
|
||||||
|
if flat_val:
|
||||||
|
if isinstance(flat_val, str):
|
||||||
|
result[role].append({"handle": flat_val.strip().lower().lstrip("@"), "agent_id": None, "context": None})
|
||||||
|
elif isinstance(flat_val, list):
|
||||||
|
for v in flat_val:
|
||||||
|
if isinstance(v, str):
|
||||||
|
result[role].append({"handle": v.strip().lower().lstrip("@"), "agent_id": None, "context": None})
|
||||||
|
|
||||||
|
# Legacy fallback: infer from source field
|
||||||
|
if not any(result[r] for r in VALID_ROLES):
|
||||||
|
source = fm.get("source", "")
|
||||||
|
if isinstance(source, str) and source:
|
||||||
|
# Try to extract author handle from source string
|
||||||
|
# Patterns: "@handle", "Author Name", "org, description"
|
||||||
|
handle_match = re.search(r"@(\w+)", source)
|
||||||
|
if handle_match:
|
||||||
|
result["sourcer"].append({"handle": handle_match.group(1).lower(), "agent_id": None, "context": source})
|
||||||
|
else:
|
||||||
|
# Use first word/phrase before comma as sourcer handle
|
||||||
|
author = source.split(",")[0].strip().lower().replace(" ", "-")
|
||||||
|
if author and len(author) > 1:
|
||||||
|
result["sourcer"].append({"handle": author, "agent_id": None, "context": source})
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def parse_attribution_from_file(filepath: str) -> dict[str, list[dict]]:
|
||||||
|
"""Read a claim file and extract attribution. Returns role→entries dict."""
|
||||||
|
try:
|
||||||
|
content = Path(filepath).read_text()
|
||||||
|
except (FileNotFoundError, PermissionError):
|
||||||
|
return {role: [] for role in VALID_ROLES}
|
||||||
|
|
||||||
|
from .post_extract import parse_frontmatter
|
||||||
|
fm, _ = parse_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
return {role: [] for role in VALID_ROLES}
|
||||||
|
|
||||||
|
return parse_attribution(fm)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Validate attribution ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def validate_attribution(fm: dict, agent: str | None = None) -> list[str]:
|
||||||
|
"""Validate attribution block in claim frontmatter.
|
||||||
|
|
||||||
|
Returns list of issues. Block on missing extractor, warn on missing sourcer.
|
||||||
|
(Leo: extractor is always known, sourcer is best-effort.)
|
||||||
|
|
||||||
|
If agent is provided and extractor is missing, auto-fix by setting the
|
||||||
|
agent as extractor (same pattern as created-date auto-fix).
|
||||||
|
|
||||||
|
Only validates if an attribution block is explicitly present. Legacy claims
|
||||||
|
without attribution blocks are not blocked — they'll get attribution when
|
||||||
|
enriched. New claims from v2 extraction always have attribution.
|
||||||
|
"""
|
||||||
|
issues = []
|
||||||
|
|
||||||
|
# Only validate if attribution block exists (don't break legacy claims)
|
||||||
|
has_attribution = (
|
||||||
|
fm.get("attribution") is not None
|
||||||
|
or any(fm.get(f"attribution_{role}") for role in VALID_ROLES)
|
||||||
|
)
|
||||||
|
if not has_attribution:
|
||||||
|
return [] # No attribution block = legacy claim, not an error
|
||||||
|
|
||||||
|
attribution = parse_attribution(fm)
|
||||||
|
|
||||||
|
if not attribution["extractor"]:
|
||||||
|
if agent:
|
||||||
|
# Auto-fix: set the processing agent as extractor
|
||||||
|
attr = fm.get("attribution")
|
||||||
|
if isinstance(attr, dict):
|
||||||
|
attr["extractor"] = [{"handle": agent}]
|
||||||
|
else:
|
||||||
|
fm["attribution"] = {"extractor": [{"handle": agent}]}
|
||||||
|
issues.append("fixed_missing_extractor")
|
||||||
|
else:
|
||||||
|
issues.append("missing_attribution_extractor")
|
||||||
|
|
||||||
|
return issues
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Build attribution block ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def build_attribution_block(
|
||||||
|
agent: str,
|
||||||
|
agent_id: str | None = None,
|
||||||
|
source_handle: str | None = None,
|
||||||
|
source_context: str | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Build an attribution dict for a newly extracted claim.
|
||||||
|
|
||||||
|
Called by openrouter-extract-v2.py when reconstructing claim content.
|
||||||
|
"""
|
||||||
|
attribution = {
|
||||||
|
"extractor": [{"handle": agent}],
|
||||||
|
"sourcer": [],
|
||||||
|
"challenger": [],
|
||||||
|
"synthesizer": [],
|
||||||
|
"reviewer": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
if agent_id:
|
||||||
|
attribution["extractor"][0]["agent_id"] = agent_id
|
||||||
|
|
||||||
|
if source_handle:
|
||||||
|
entry = {"handle": source_handle.strip().lower().lstrip("@")}
|
||||||
|
if source_context:
|
||||||
|
entry["context"] = source_context
|
||||||
|
attribution["sourcer"].append(entry)
|
||||||
|
|
||||||
|
return attribution
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Compute role counts for contributor upserts ──────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def role_counts_from_attribution(attribution: dict[str, list[dict]]) -> dict[str, list[str]]:
|
||||||
|
"""Extract {role: [handle, ...]} for contributor table upserts.
|
||||||
|
|
||||||
|
Returns a dict mapping each role to the list of contributor handles.
|
||||||
|
Used by merge.py to credit contributors after merge.
|
||||||
|
"""
|
||||||
|
counts: dict[str, list[str]] = {}
|
||||||
|
for role in VALID_ROLES:
|
||||||
|
handles = [entry["handle"] for entry in attribution.get(role, []) if entry.get("handle")]
|
||||||
|
if handles:
|
||||||
|
counts[role] = handles
|
||||||
|
return counts
|
||||||
150
ops/pipeline-v2/lib/breaker.py
Normal file
150
ops/pipeline-v2/lib/breaker.py
Normal file
|
|
@ -0,0 +1,150 @@
|
||||||
|
"""Circuit breaker state machine — per-stage, backed by SQLite."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.breaker")
|
||||||
|
|
||||||
|
# States
|
||||||
|
CLOSED = "closed"
|
||||||
|
OPEN = "open"
|
||||||
|
HALFOPEN = "halfopen"
|
||||||
|
|
||||||
|
|
||||||
|
class CircuitBreaker:
|
||||||
|
"""Per-stage circuit breaker.
|
||||||
|
|
||||||
|
CLOSED: normal operation
|
||||||
|
OPEN: stage paused (threshold consecutive failures reached)
|
||||||
|
HALFOPEN: cooldown expired, try 1 worker to probe recovery
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, name: str, conn):
|
||||||
|
self.name = name
|
||||||
|
self.conn = conn
|
||||||
|
self._ensure_row()
|
||||||
|
|
||||||
|
def _ensure_row(self):
|
||||||
|
self.conn.execute(
|
||||||
|
"INSERT OR IGNORE INTO circuit_breakers (name) VALUES (?)",
|
||||||
|
(self.name,),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_state(self) -> dict:
|
||||||
|
row = self.conn.execute(
|
||||||
|
"SELECT state, failures, successes, tripped_at, last_success_at FROM circuit_breakers WHERE name = ?",
|
||||||
|
(self.name,),
|
||||||
|
).fetchone()
|
||||||
|
return (
|
||||||
|
dict(row)
|
||||||
|
if row
|
||||||
|
else {"state": CLOSED, "failures": 0, "successes": 0, "tripped_at": None, "last_success_at": None}
|
||||||
|
)
|
||||||
|
|
||||||
|
def _set_state(
|
||||||
|
self,
|
||||||
|
state: str,
|
||||||
|
failures: int = None,
|
||||||
|
successes: int = None,
|
||||||
|
tripped_at: str = None,
|
||||||
|
last_success_at: str = None,
|
||||||
|
):
|
||||||
|
updates = ["state = ?", "last_update = datetime('now')"]
|
||||||
|
params = [state]
|
||||||
|
if failures is not None:
|
||||||
|
updates.append("failures = ?")
|
||||||
|
params.append(failures)
|
||||||
|
if successes is not None:
|
||||||
|
updates.append("successes = ?")
|
||||||
|
params.append(successes)
|
||||||
|
if tripped_at is not None:
|
||||||
|
updates.append("tripped_at = ?")
|
||||||
|
params.append(tripped_at)
|
||||||
|
if last_success_at is not None:
|
||||||
|
updates.append("last_success_at = ?")
|
||||||
|
params.append(last_success_at)
|
||||||
|
params.append(self.name)
|
||||||
|
self.conn.execute(
|
||||||
|
f"UPDATE circuit_breakers SET {', '.join(updates)} WHERE name = ?",
|
||||||
|
params,
|
||||||
|
)
|
||||||
|
|
||||||
|
def allow_request(self) -> bool:
|
||||||
|
"""Check if requests are allowed. Returns True if CLOSED or HALFOPEN."""
|
||||||
|
s = self._get_state()
|
||||||
|
|
||||||
|
if s["state"] == CLOSED:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if s["state"] == OPEN:
|
||||||
|
# Check cooldown
|
||||||
|
if s["tripped_at"]:
|
||||||
|
tripped = datetime.fromisoformat(s["tripped_at"])
|
||||||
|
if tripped.tzinfo is None:
|
||||||
|
tripped = tripped.replace(tzinfo=timezone.utc)
|
||||||
|
elapsed = (datetime.now(timezone.utc) - tripped).total_seconds()
|
||||||
|
if elapsed >= config.BREAKER_COOLDOWN:
|
||||||
|
logger.info("Breaker %s: cooldown expired, entering HALFOPEN", self.name)
|
||||||
|
self._set_state(HALFOPEN, successes=0)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
# HALFOPEN — allow one probe
|
||||||
|
return True
|
||||||
|
|
||||||
|
def max_workers(self) -> int:
|
||||||
|
"""Return max workers allowed in current state."""
|
||||||
|
s = self._get_state()
|
||||||
|
if s["state"] == HALFOPEN:
|
||||||
|
return 1 # probe with single worker
|
||||||
|
return None # no restriction from breaker
|
||||||
|
|
||||||
|
def record_success(self):
|
||||||
|
"""Record a successful cycle. Updates last_success_at for stall detection (Vida)."""
|
||||||
|
s = self._get_state()
|
||||||
|
now = datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
if s["state"] == HALFOPEN:
|
||||||
|
logger.info("Breaker %s: HALFOPEN probe succeeded, closing", self.name)
|
||||||
|
self._set_state(CLOSED, failures=0, successes=0, last_success_at=now)
|
||||||
|
elif s["state"] == CLOSED:
|
||||||
|
if s["failures"] > 0:
|
||||||
|
self._set_state(CLOSED, failures=0, last_success_at=now)
|
||||||
|
else:
|
||||||
|
self._set_state(CLOSED, last_success_at=now)
|
||||||
|
|
||||||
|
def record_failure(self):
|
||||||
|
"""Record a failed cycle."""
|
||||||
|
s = self._get_state()
|
||||||
|
|
||||||
|
if s["state"] == HALFOPEN:
|
||||||
|
logger.warning("Breaker %s: HALFOPEN probe failed, reopening", self.name)
|
||||||
|
self._set_state(
|
||||||
|
OPEN,
|
||||||
|
failures=s["failures"] + 1,
|
||||||
|
tripped_at=datetime.now(timezone.utc).isoformat(),
|
||||||
|
)
|
||||||
|
elif s["state"] == CLOSED:
|
||||||
|
new_failures = s["failures"] + 1
|
||||||
|
if new_failures >= config.BREAKER_THRESHOLD:
|
||||||
|
logger.warning(
|
||||||
|
"Breaker %s: threshold reached (%d failures), opening",
|
||||||
|
self.name,
|
||||||
|
new_failures,
|
||||||
|
)
|
||||||
|
self._set_state(
|
||||||
|
OPEN,
|
||||||
|
failures=new_failures,
|
||||||
|
tripped_at=datetime.now(timezone.utc).isoformat(),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._set_state(CLOSED, failures=new_failures)
|
||||||
|
elif s["state"] == OPEN:
|
||||||
|
self._set_state(OPEN, failures=s["failures"] + 1)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""Force reset to CLOSED."""
|
||||||
|
logger.info("Breaker %s: force reset to CLOSED", self.name)
|
||||||
|
self._set_state(CLOSED, failures=0, successes=0)
|
||||||
|
|
@ -230,6 +230,7 @@ async def cascade_after_merge(
|
||||||
|
|
||||||
# 3. Scan all beliefs and positions
|
# 3. Scan all beliefs and positions
|
||||||
notifications = 0
|
notifications = 0
|
||||||
|
notification_details = [] # Per-agent reasoning for audit trail
|
||||||
agents_dir = main_worktree / "agents"
|
agents_dir = main_worktree / "agents"
|
||||||
if not agents_dir.exists():
|
if not agents_dir.exists():
|
||||||
logger.warning("cascade: no agents/ dir in worktree")
|
logger.warning("cascade: no agents/ dir in worktree")
|
||||||
|
|
@ -251,6 +252,12 @@ async def cascade_after_merge(
|
||||||
body = _format_cascade_body(md_file.name, file_type, matched, pr_num)
|
body = _format_cascade_body(md_file.name, file_type, matched, pr_num)
|
||||||
if _write_inbox_message(agent_name, f"claim-changed-affects-{file_type}", body):
|
if _write_inbox_message(agent_name, f"claim-changed-affects-{file_type}", body):
|
||||||
notifications += 1
|
notifications += 1
|
||||||
|
notification_details.append({
|
||||||
|
"agent": agent_name,
|
||||||
|
"file_type": file_type,
|
||||||
|
"file": md_file.stem,
|
||||||
|
"matched_claims": matched,
|
||||||
|
})
|
||||||
logger.info("cascade: notified %s — %s '%s' affected by %s",
|
logger.info("cascade: notified %s — %s '%s' affected by %s",
|
||||||
agent_name, file_type, md_file.stem, matched)
|
agent_name, file_type, md_file.stem, matched)
|
||||||
|
|
||||||
|
|
@ -266,6 +273,7 @@ async def cascade_after_merge(
|
||||||
"pr": pr_num,
|
"pr": pr_num,
|
||||||
"claims_changed": list(changed_claims)[:20],
|
"claims_changed": list(changed_claims)[:20],
|
||||||
"notifications_sent": notifications,
|
"notifications_sent": notifications,
|
||||||
|
"details": notification_details[:50],
|
||||||
})),
|
})),
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
196
ops/pipeline-v2/lib/claim_index.py
Normal file
196
ops/pipeline-v2/lib/claim_index.py
Normal file
|
|
@ -0,0 +1,196 @@
|
||||||
|
"""Claim index generator — structured index of all KB claims.
|
||||||
|
|
||||||
|
Produces claim-index.json: every claim with title, domain, confidence,
|
||||||
|
wiki links (outgoing + incoming counts), created date, word count,
|
||||||
|
challenged_by status. Consumed by:
|
||||||
|
- Argus (diagnostics dashboard — charts, vital signs)
|
||||||
|
- Vida (KB health diagnostics — orphan ratio, linkage density, freshness)
|
||||||
|
- Extraction prompt (KB index for dedup — could replace /tmp/kb-indexes/)
|
||||||
|
|
||||||
|
Generated after each merge (post-merge hook) or on demand.
|
||||||
|
Served via GET /claim-index on the health API.
|
||||||
|
|
||||||
|
Epimetheus owns this module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from datetime import date, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.claim_index")
|
||||||
|
|
||||||
|
WIKI_LINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_frontmatter(text: str) -> dict | None:
|
||||||
|
"""Quick YAML frontmatter parser."""
|
||||||
|
if not text.startswith("---"):
|
||||||
|
return None
|
||||||
|
end = text.find("---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return None
|
||||||
|
raw = text[3:end]
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
fm = yaml.safe_load(raw)
|
||||||
|
return fm if isinstance(fm, dict) else None
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Fallback parser
|
||||||
|
fm = {}
|
||||||
|
for line in raw.strip().split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
if ":" not in line:
|
||||||
|
continue
|
||||||
|
key, _, val = line.partition(":")
|
||||||
|
key = key.strip()
|
||||||
|
val = val.strip().strip('"').strip("'")
|
||||||
|
if val.lower() == "null" or val == "":
|
||||||
|
val = None
|
||||||
|
fm[key] = val
|
||||||
|
return fm if fm else None
|
||||||
|
|
||||||
|
|
||||||
|
def build_claim_index(repo_root: str | None = None) -> dict:
|
||||||
|
"""Build the full claim index from the repo.
|
||||||
|
|
||||||
|
Returns {generated_at, total_claims, claims: [...], domains: {...}}
|
||||||
|
"""
|
||||||
|
base = Path(repo_root) if repo_root else config.MAIN_WORKTREE
|
||||||
|
claims = []
|
||||||
|
all_stems: dict[str, str] = {} # stem → filepath (for incoming link counting)
|
||||||
|
|
||||||
|
# Phase 1: Collect all claims with outgoing links
|
||||||
|
for subdir in ["domains", "core", "foundations", "decisions"]:
|
||||||
|
full = base / subdir
|
||||||
|
if not full.is_dir():
|
||||||
|
continue
|
||||||
|
for f in full.rglob("*.md"):
|
||||||
|
if f.name.startswith("_"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = f.read_text()
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
fm = _parse_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
ftype = fm.get("type")
|
||||||
|
if ftype not in ("claim", "framework", None):
|
||||||
|
continue # Skip entities, sources, etc.
|
||||||
|
|
||||||
|
# Extract wiki links
|
||||||
|
body_start = content.find("---", 3)
|
||||||
|
body = content[body_start + 3:] if body_start > 0 else content
|
||||||
|
outgoing_links = [link.strip() for link in WIKI_LINK_RE.findall(body) if link.strip()]
|
||||||
|
|
||||||
|
# Relative path from repo root
|
||||||
|
rel_path = str(f.relative_to(base))
|
||||||
|
|
||||||
|
# Word count (body only, not frontmatter)
|
||||||
|
body_text = re.sub(r"^# .+\n", "", body).strip()
|
||||||
|
body_text = re.split(r"\n---\n", body_text)[0] # Before Relevant Notes
|
||||||
|
word_count = len(body_text.split())
|
||||||
|
|
||||||
|
# Check for challenged_by
|
||||||
|
has_challenged_by = bool(fm.get("challenged_by"))
|
||||||
|
|
||||||
|
# Created date
|
||||||
|
created = fm.get("created")
|
||||||
|
if isinstance(created, date):
|
||||||
|
created = created.isoformat()
|
||||||
|
|
||||||
|
claim = {
|
||||||
|
"file": rel_path,
|
||||||
|
"stem": f.stem,
|
||||||
|
"title": f.stem.replace("-", " "),
|
||||||
|
"domain": fm.get("domain", subdir),
|
||||||
|
"confidence": fm.get("confidence"),
|
||||||
|
"created": created,
|
||||||
|
"outgoing_links": outgoing_links,
|
||||||
|
"outgoing_count": len(outgoing_links),
|
||||||
|
"incoming_count": 0, # Computed in phase 2
|
||||||
|
"has_challenged_by": has_challenged_by,
|
||||||
|
"word_count": word_count,
|
||||||
|
"type": ftype or "claim",
|
||||||
|
}
|
||||||
|
claims.append(claim)
|
||||||
|
all_stems[f.stem] = rel_path
|
||||||
|
|
||||||
|
# Phase 2: Count incoming links
|
||||||
|
incoming_counts: dict[str, int] = {}
|
||||||
|
for claim in claims:
|
||||||
|
for link in claim["outgoing_links"]:
|
||||||
|
if link in all_stems:
|
||||||
|
incoming_counts[link] = incoming_counts.get(link, 0) + 1
|
||||||
|
|
||||||
|
for claim in claims:
|
||||||
|
claim["incoming_count"] = incoming_counts.get(claim["stem"], 0)
|
||||||
|
|
||||||
|
# Domain summary
|
||||||
|
domain_counts: dict[str, int] = {}
|
||||||
|
for claim in claims:
|
||||||
|
d = claim["domain"]
|
||||||
|
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||||||
|
|
||||||
|
# Orphan detection (0 incoming links)
|
||||||
|
orphans = sum(1 for c in claims if c["incoming_count"] == 0)
|
||||||
|
|
||||||
|
# Cross-domain links
|
||||||
|
cross_domain_links = 0
|
||||||
|
for claim in claims:
|
||||||
|
claim_domain = claim["domain"]
|
||||||
|
for link in claim["outgoing_links"]:
|
||||||
|
if link in all_stems:
|
||||||
|
# Find the linked claim's domain
|
||||||
|
for other in claims:
|
||||||
|
if other["stem"] == link and other["domain"] != claim_domain:
|
||||||
|
cross_domain_links += 1
|
||||||
|
break
|
||||||
|
|
||||||
|
index = {
|
||||||
|
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||||
|
"total_claims": len(claims),
|
||||||
|
"domains": domain_counts,
|
||||||
|
"orphan_count": orphans,
|
||||||
|
"orphan_ratio": round(orphans / len(claims), 3) if claims else 0,
|
||||||
|
"cross_domain_links": cross_domain_links,
|
||||||
|
"claims": claims,
|
||||||
|
}
|
||||||
|
|
||||||
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
def write_claim_index(repo_root: str | None = None, output_path: str | None = None) -> str:
|
||||||
|
"""Build and write claim-index.json. Returns the output path."""
|
||||||
|
index = build_claim_index(repo_root)
|
||||||
|
|
||||||
|
if output_path is None:
|
||||||
|
output_path = str(Path.home() / ".pentagon" / "workspace" / "collective" / "claim-index.json")
|
||||||
|
|
||||||
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Atomic write
|
||||||
|
tmp = output_path + ".tmp"
|
||||||
|
with open(tmp, "w") as f:
|
||||||
|
json.dump(index, f, indent=2)
|
||||||
|
import os
|
||||||
|
os.rename(tmp, output_path)
|
||||||
|
|
||||||
|
logger.info("Wrote claim-index.json: %d claims, %d orphans, %d cross-domain links",
|
||||||
|
index["total_claims"], index["orphan_count"], index["cross_domain_links"])
|
||||||
|
|
||||||
|
return output_path
|
||||||
219
ops/pipeline-v2/lib/config.py
Normal file
219
ops/pipeline-v2/lib/config.py
Normal file
|
|
@ -0,0 +1,219 @@
|
||||||
|
"""Pipeline v2 configuration — all constants and thresholds."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# --- Paths ---
|
||||||
|
BASE_DIR = Path(os.environ.get("PIPELINE_BASE", "/opt/teleo-eval"))
|
||||||
|
REPO_DIR = BASE_DIR / "workspaces" / "teleo-codex.git"
|
||||||
|
MAIN_WORKTREE = BASE_DIR / "workspaces" / "main"
|
||||||
|
SECRETS_DIR = BASE_DIR / "secrets"
|
||||||
|
LOG_DIR = BASE_DIR / "logs"
|
||||||
|
DB_PATH = BASE_DIR / "pipeline" / "pipeline.db"
|
||||||
|
# File-based worktree lock path — used by all processes that write to main worktree
|
||||||
|
# (pipeline daemon stages + telegram bot). Ganymede: one lock, one mechanism.
|
||||||
|
MAIN_WORKTREE_LOCKFILE = BASE_DIR / "workspaces" / ".main-worktree.lock"
|
||||||
|
|
||||||
|
INBOX_QUEUE = "inbox/queue"
|
||||||
|
INBOX_ARCHIVE = "inbox/archive"
|
||||||
|
INBOX_NULL_RESULT = "inbox/null-result"
|
||||||
|
|
||||||
|
# --- Forgejo ---
|
||||||
|
FORGEJO_URL = os.environ.get("FORGEJO_URL", "http://localhost:3000")
|
||||||
|
FORGEJO_OWNER = "teleo"
|
||||||
|
FORGEJO_REPO = "teleo-codex"
|
||||||
|
FORGEJO_TOKEN_FILE = SECRETS_DIR / "forgejo-admin-token"
|
||||||
|
FORGEJO_PIPELINE_USER = "teleo" # git user for pipeline commits
|
||||||
|
|
||||||
|
# --- Models ---
|
||||||
|
CLAUDE_CLI = os.environ.get("CLAUDE_CLI", "/home/teleo/.local/bin/claude")
|
||||||
|
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||||
|
|
||||||
|
# Model IDs
|
||||||
|
MODEL_OPUS = "opus"
|
||||||
|
MODEL_SONNET = "sonnet"
|
||||||
|
MODEL_HAIKU = "anthropic/claude-3.5-haiku"
|
||||||
|
MODEL_GPT4O = "openai/gpt-4o" # legacy, kept for reference
|
||||||
|
MODEL_GEMINI_FLASH = "google/gemini-2.5-flash" # was -preview, removed by OpenRouter
|
||||||
|
MODEL_SONNET_OR = "anthropic/claude-sonnet-4.5" # OpenRouter Sonnet (paid, not Claude Max)
|
||||||
|
|
||||||
|
# --- Model assignment per stage ---
|
||||||
|
# Principle: Opus is scarce (Claude Max). Reserve for DEEP eval + overnight research.
|
||||||
|
# Model diversity: domain (GPT-4o) + Leo (Sonnet) = two model families, no correlated blindspots.
|
||||||
|
# Both on OpenRouter = Claude Max rate limit untouched for Opus.
|
||||||
|
#
|
||||||
|
# Pipeline eval ordering (domain-first, Leo-last):
|
||||||
|
# 1. Domain review → GPT-4o (OpenRouter) — different family from Leo
|
||||||
|
# 2. Leo STANDARD → Sonnet (OpenRouter) — different family from domain
|
||||||
|
# 3. Leo DEEP → Opus (Claude Max) — highest judgment, scarce
|
||||||
|
EXTRACT_MODEL = MODEL_SONNET # extraction: structured output, volume work (Claude Max)
|
||||||
|
TRIAGE_MODEL = MODEL_HAIKU # triage: routing decision, cheapest (OpenRouter)
|
||||||
|
EVAL_DOMAIN_MODEL = MODEL_GEMINI_FLASH # domain review: Gemini 2.5 Flash (was GPT-4o — 16x cheaper, different family from Sonnet)
|
||||||
|
EVAL_LEO_MODEL = MODEL_OPUS # Leo DEEP review: Claude Max Opus
|
||||||
|
EVAL_LEO_STANDARD_MODEL = MODEL_SONNET_OR # Leo STANDARD review: OpenRouter Sonnet
|
||||||
|
EVAL_DEEP_MODEL = MODEL_GEMINI_FLASH # DEEP cross-family: paid, adversarial
|
||||||
|
|
||||||
|
# --- Model backends ---
|
||||||
|
# Each model can run on Claude Max (subscription, base load) or API (overflow/spikes).
|
||||||
|
# Claude Max: free but rate-limited. API: paid but unlimited.
|
||||||
|
# When Claude Max is rate-limited, behavior per stage:
|
||||||
|
# "queue" — wait for capacity (preferred for non-urgent work)
|
||||||
|
# "overflow" — fall back to API (for time-sensitive work)
|
||||||
|
# "skip" — skip this cycle (for optional stages like sample audit)
|
||||||
|
OVERFLOW_POLICY = {
|
||||||
|
"extract": "queue", # extraction can wait
|
||||||
|
"triage": "overflow", # triage is cheap on API anyway
|
||||||
|
"eval_domain": "overflow", # domain review is the volume filter — don't let it bottleneck (Rhea)
|
||||||
|
"eval_leo": "queue", # Leo review is the bottleneck we protect
|
||||||
|
"eval_deep": "overflow", # DEEP is already on API
|
||||||
|
"sample_audit": "skip", # optional, skip if constrained
|
||||||
|
}
|
||||||
|
|
||||||
|
# OpenRouter cost rates per 1K tokens (only applies when using API, not Claude Max)
|
||||||
|
MODEL_COSTS = {
|
||||||
|
"opus": {"input": 0.015, "output": 0.075},
|
||||||
|
"sonnet": {"input": 0.003, "output": 0.015},
|
||||||
|
MODEL_HAIKU: {"input": 0.0008, "output": 0.004},
|
||||||
|
MODEL_GPT4O: {"input": 0.0025, "output": 0.01},
|
||||||
|
MODEL_GEMINI_FLASH: {"input": 0.00015, "output": 0.0006},
|
||||||
|
MODEL_SONNET_OR: {"input": 0.003, "output": 0.015},
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Concurrency ---
|
||||||
|
MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5"))
|
||||||
|
MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7"))
|
||||||
|
MAX_MERGE_WORKERS = 1 # domain-serialized, but one merge at a time per domain
|
||||||
|
|
||||||
|
# --- Timeouts (seconds) ---
|
||||||
|
EXTRACT_TIMEOUT = 600 # 10 min
|
||||||
|
EVAL_TIMEOUT = 120 # 2 min — routine Sonnet/Gemini Flash calls (was 600, caused 10-min stalls)
|
||||||
|
EVAL_TIMEOUT_OPUS = 600 # 10 min — Opus DEEP eval needs more time for complex reasoning
|
||||||
|
MERGE_TIMEOUT = 300 # 5 min — force-reset to conflict if exceeded (Rhea)
|
||||||
|
CLAUDE_MAX_PROBE_TIMEOUT = 15
|
||||||
|
|
||||||
|
# --- Backpressure ---
|
||||||
|
BACKPRESSURE_HIGH = 40 # pause extraction above this
|
||||||
|
BACKPRESSURE_LOW = 20 # throttle extraction above this
|
||||||
|
BACKPRESSURE_THROTTLE_WORKERS = 2 # workers when throttled
|
||||||
|
|
||||||
|
# --- Retry budgets ---
|
||||||
|
TRANSIENT_RETRY_MAX = 5 # API timeouts, rate limits
|
||||||
|
SUBSTANTIVE_RETRY_STANDARD = 2 # reviewer request_changes
|
||||||
|
SUBSTANTIVE_RETRY_DEEP = 3
|
||||||
|
MAX_EVAL_ATTEMPTS = 3 # Hard cap on eval cycles per PR before terminal
|
||||||
|
MAX_FIX_ATTEMPTS = 2 # Hard cap on auto-fix cycles per PR before giving up
|
||||||
|
MAX_FIX_PER_CYCLE = 15 # PRs to fix per cycle — bumped from 5 to clear backlog (Cory, Mar 14)
|
||||||
|
|
||||||
|
# Issue tags that can be fixed mechanically (Python fixer or Haiku)
|
||||||
|
# broken_wiki_links removed — downgraded to warning, not a gate. Links to claims
|
||||||
|
# in other open PRs resolve naturally as the dependency chain merges. (Cory, Mar 14)
|
||||||
|
MECHANICAL_ISSUE_TAGS = {"frontmatter_schema", "near_duplicate"}
|
||||||
|
# Issue tags that require re-extraction (substantive quality problems)
|
||||||
|
SUBSTANTIVE_ISSUE_TAGS = {"factual_discrepancy", "confidence_miscalibration", "scope_error", "title_overclaims"}
|
||||||
|
|
||||||
|
# --- Content type schemas ---
|
||||||
|
# Registry of content types. validate.py branches on type to apply the right
|
||||||
|
# required fields, confidence rules, and title checks. Adding a new type is a
|
||||||
|
# dict entry here — no code changes in validate.py needed.
|
||||||
|
TYPE_SCHEMAS = {
|
||||||
|
"claim": {
|
||||||
|
"required": ("type", "domain", "description", "confidence", "source", "created"),
|
||||||
|
"valid_confidence": ("proven", "likely", "experimental", "speculative"),
|
||||||
|
"needs_proposition_title": True,
|
||||||
|
},
|
||||||
|
"framework": {
|
||||||
|
"required": ("type", "domain", "description", "source", "created"),
|
||||||
|
"valid_confidence": None,
|
||||||
|
"needs_proposition_title": True,
|
||||||
|
},
|
||||||
|
"entity": {
|
||||||
|
"required": ("type", "domain", "description"),
|
||||||
|
"valid_confidence": None,
|
||||||
|
"needs_proposition_title": False,
|
||||||
|
},
|
||||||
|
"decision": {
|
||||||
|
"required": ("type", "domain", "description", "parent_entity", "status"),
|
||||||
|
"valid_confidence": None,
|
||||||
|
"needs_proposition_title": False,
|
||||||
|
"valid_status": ("active", "passed", "failed", "expired", "cancelled"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Content directories ---
|
||||||
|
ENTITY_DIR_TEMPLATE = "entities/{domain}" # centralized path (Rhea: don't hardcode across 5 files)
|
||||||
|
DECISION_DIR_TEMPLATE = "decisions/{domain}"
|
||||||
|
|
||||||
|
# --- Contributor tiers ---
|
||||||
|
# Auto-promotion rules. CI is computed, not stored.
|
||||||
|
CONTRIBUTOR_TIER_RULES = {
|
||||||
|
"contributor": {
|
||||||
|
"claims_merged": 1,
|
||||||
|
},
|
||||||
|
"veteran": {
|
||||||
|
"claims_merged": 10,
|
||||||
|
"min_days_since_first": 30,
|
||||||
|
"challenges_survived": 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Role weights for CI computation (must match schemas/contribution-weights.yaml)
|
||||||
|
CONTRIBUTION_ROLE_WEIGHTS = {
|
||||||
|
"sourcer": 0.15,
|
||||||
|
"extractor": 0.40,
|
||||||
|
"challenger": 0.20,
|
||||||
|
"synthesizer": 0.15,
|
||||||
|
"reviewer": 0.10,
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Circuit breakers ---
|
||||||
|
BREAKER_THRESHOLD = 5
|
||||||
|
BREAKER_COOLDOWN = 900 # 15 min
|
||||||
|
|
||||||
|
# --- Cost budgets ---
|
||||||
|
OPENROUTER_DAILY_BUDGET = 20.0 # USD
|
||||||
|
OPENROUTER_WARN_THRESHOLD = 0.8 # 80% of budget
|
||||||
|
|
||||||
|
# --- Quality ---
|
||||||
|
SAMPLE_AUDIT_RATE = 0.15 # 15% of LIGHT merges get pre-merge promotion to STANDARD (Rio)
|
||||||
|
SAMPLE_AUDIT_DISAGREEMENT_THRESHOLD = 0.10 # 10% disagreement → tighten LIGHT criteria
|
||||||
|
SAMPLE_AUDIT_MODEL = MODEL_OPUS # Opus for audit — different family from Haiku triage (Leo)
|
||||||
|
|
||||||
|
# --- Batch eval ---
|
||||||
|
# Batch domain review: group STANDARD PRs by domain, one LLM call per batch.
|
||||||
|
# Leo review stays individual (safety net for cross-contamination).
|
||||||
|
BATCH_EVAL_MAX_PRS = int(os.environ.get("BATCH_EVAL_MAX_PRS", "5"))
|
||||||
|
BATCH_EVAL_MAX_DIFF_BYTES = int(os.environ.get("BATCH_EVAL_MAX_DIFF_BYTES", "100000")) # 100KB
|
||||||
|
|
||||||
|
# --- Tier logic ---
|
||||||
|
# LIGHT_SKIP_LLM: when True, LIGHT PRs skip domain+Leo review entirely (auto-approve on Tier 0 pass).
|
||||||
|
# Set False for shadow mode (domain review runs but logs only). Flip True after 24h validation (Rhea).
|
||||||
|
LIGHT_SKIP_LLM = os.environ.get("LIGHT_SKIP_LLM", "false").lower() == "true"
|
||||||
|
# Random pre-merge promotion: fraction of LIGHT PRs upgraded to STANDARD before eval (Rio).
|
||||||
|
# Makes gaming unpredictable — extraction agents can't know which LIGHT PRs get full review.
|
||||||
|
LIGHT_PROMOTION_RATE = float(os.environ.get("LIGHT_PROMOTION_RATE", "0.15"))
|
||||||
|
|
||||||
|
# --- Polling intervals (seconds) ---
|
||||||
|
INGEST_INTERVAL = 60
|
||||||
|
VALIDATE_INTERVAL = 30
|
||||||
|
EVAL_INTERVAL = 30
|
||||||
|
MERGE_INTERVAL = 30
|
||||||
|
FIX_INTERVAL = 60
|
||||||
|
HEALTH_CHECK_INTERVAL = 60
|
||||||
|
|
||||||
|
# --- Retrieval (Telegram bot) ---
|
||||||
|
RETRIEVAL_RRF_K = 20 # RRF smoothing constant — tuned for 5-10 results per source
|
||||||
|
RETRIEVAL_ENTITY_BOOST = 1.5 # RRF score multiplier for claims wiki-linked from matched entities
|
||||||
|
RETRIEVAL_MAX_RESULTS = 10 # Max claims shown to LLM after RRF merge
|
||||||
|
RETRIEVAL_MIN_CLAIM_SCORE = 3.0 # Floor for keyword claim scoring — filters single-stopword matches
|
||||||
|
|
||||||
|
# --- Health API ---
|
||||||
|
HEALTH_PORT = 8080
|
||||||
|
|
||||||
|
# --- Logging ---
|
||||||
|
LOG_FILE = LOG_DIR / "pipeline.jsonl"
|
||||||
|
LOG_ROTATION_MAX_BYTES = 50 * 1024 * 1024 # 50MB per file
|
||||||
|
LOG_ROTATION_BACKUP_COUNT = 7 # keep 7 days
|
||||||
|
|
||||||
|
# --- Versioning (tracked in metrics_snapshots for chart annotations) ---
|
||||||
|
PROMPT_VERSION = "v2-lean-directed" # bump on every prompt change
|
||||||
|
PIPELINE_VERSION = "2.2" # bump on every significant pipeline change
|
||||||
200
ops/pipeline-v2/lib/connect.py
Normal file
200
ops/pipeline-v2/lib/connect.py
Normal file
|
|
@ -0,0 +1,200 @@
|
||||||
|
"""Atomic extract-and-connect — wire new claims to the KB at extraction time.
|
||||||
|
|
||||||
|
After extraction writes claim files to disk, this module:
|
||||||
|
1. Embeds each new claim (title + description + body snippet)
|
||||||
|
2. Searches Qdrant for semantically similar existing claims
|
||||||
|
3. Adds found neighbors as `related` edges on the NEW claim's frontmatter
|
||||||
|
|
||||||
|
Key design decision: edges are written on the NEW claim, not on existing claims.
|
||||||
|
Writing on existing claims would cause merge conflicts (same reason entities are
|
||||||
|
queued, not written on branches). When the PR merges, embed-on-merge adds the
|
||||||
|
new claim to Qdrant, and reweave can later add reciprocal edges on neighbors.
|
||||||
|
|
||||||
|
Cost: ~$0.0001 per claim (embedding only). No LLM classification — defaults to
|
||||||
|
"related". Reweave handles supports/challenges classification in a separate pass.
|
||||||
|
|
||||||
|
Owner: Epimetheus
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.connect")
|
||||||
|
|
||||||
|
# Similarity threshold for auto-connecting — below reweave's 0.70 but above
|
||||||
|
# the noise floor (~0.55). "related" still means actually related, not vaguely topical.
|
||||||
|
CONNECT_THRESHOLD = 0.65
|
||||||
|
CONNECT_MAX_NEIGHBORS = 5
|
||||||
|
|
||||||
|
# --- Import search functions ---
|
||||||
|
# This module is called from openrouter-extract-v2.py which may not have lib/ on path
|
||||||
|
# via the package, so handle both import paths.
|
||||||
|
try:
|
||||||
|
from .search import embed_query, search_qdrant
|
||||||
|
from .post_extract import parse_frontmatter, _rebuild_content
|
||||||
|
except ImportError:
|
||||||
|
sys.path.insert(0, os.path.dirname(__file__))
|
||||||
|
from search import embed_query, search_qdrant
|
||||||
|
from post_extract import parse_frontmatter, _rebuild_content
|
||||||
|
|
||||||
|
|
||||||
|
def _build_search_text(content: str) -> str:
|
||||||
|
"""Extract title + description + first 500 chars of body for embedding."""
|
||||||
|
fm, body = parse_frontmatter(content)
|
||||||
|
parts = []
|
||||||
|
if fm:
|
||||||
|
desc = fm.get("description", "")
|
||||||
|
if isinstance(desc, str) and desc:
|
||||||
|
parts.append(desc.strip('"').strip("'"))
|
||||||
|
# Get H1 title from body
|
||||||
|
h1_match = re.search(r"^# (.+)$", body, re.MULTILINE) if body else None
|
||||||
|
if h1_match:
|
||||||
|
parts.append(h1_match.group(1).strip())
|
||||||
|
# Add body snippet (skip H1 line)
|
||||||
|
if body:
|
||||||
|
body_text = re.sub(r"^# .+\n*", "", body).strip()
|
||||||
|
# Stop at "Relevant Notes" or "Topics" sections
|
||||||
|
body_text = re.split(r"\n---\n", body_text)[0].strip()
|
||||||
|
if body_text:
|
||||||
|
parts.append(body_text[:500])
|
||||||
|
return " ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_related_edges(claim_path: str, neighbor_titles: list[str]) -> bool:
|
||||||
|
"""Add related edges to a claim's frontmatter. Returns True if modified."""
|
||||||
|
try:
|
||||||
|
with open(claim_path) as f:
|
||||||
|
content = f.read()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Cannot read %s: %s", claim_path, e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
fm, body = parse_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Get existing related edges to avoid duplicates
|
||||||
|
existing = fm.get("related", [])
|
||||||
|
if isinstance(existing, str):
|
||||||
|
existing = [existing]
|
||||||
|
elif not isinstance(existing, list):
|
||||||
|
existing = []
|
||||||
|
|
||||||
|
existing_lower = {str(e).strip().lower() for e in existing}
|
||||||
|
|
||||||
|
# Add new edges
|
||||||
|
added = []
|
||||||
|
for title in neighbor_titles:
|
||||||
|
if title.strip().lower() not in existing_lower:
|
||||||
|
added.append(title)
|
||||||
|
existing_lower.add(title.strip().lower())
|
||||||
|
|
||||||
|
if not added:
|
||||||
|
return False
|
||||||
|
|
||||||
|
fm["related"] = existing + added
|
||||||
|
|
||||||
|
# Rebuild and write
|
||||||
|
new_content = _rebuild_content(fm, body)
|
||||||
|
with open(claim_path, "w") as f:
|
||||||
|
f.write(new_content)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def connect_new_claims(
|
||||||
|
claim_paths: list[str],
|
||||||
|
threshold: float = CONNECT_THRESHOLD,
|
||||||
|
max_neighbors: int = CONNECT_MAX_NEIGHBORS,
|
||||||
|
) -> dict:
|
||||||
|
"""Connect newly-written claims to the existing KB via vector search.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
claim_paths: List of file paths to newly-written claim files.
|
||||||
|
threshold: Minimum cosine similarity for connection.
|
||||||
|
max_neighbors: Maximum edges to add per claim.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
"total": int,
|
||||||
|
"connected": int,
|
||||||
|
"edges_added": int,
|
||||||
|
"skipped_embed_failed": int,
|
||||||
|
"skipped_no_neighbors": int,
|
||||||
|
"connections": [{"claim": str, "neighbors": [str]}],
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
stats = {
|
||||||
|
"total": len(claim_paths),
|
||||||
|
"connected": 0,
|
||||||
|
"edges_added": 0,
|
||||||
|
"skipped_embed_failed": 0,
|
||||||
|
"skipped_no_neighbors": 0,
|
||||||
|
"connections": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
for claim_path in claim_paths:
|
||||||
|
try:
|
||||||
|
with open(claim_path) as f:
|
||||||
|
content = f.read()
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Build search text from claim content
|
||||||
|
search_text = _build_search_text(content)
|
||||||
|
if not search_text or len(search_text) < 20:
|
||||||
|
stats["skipped_no_neighbors"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Embed the claim
|
||||||
|
vector = embed_query(search_text)
|
||||||
|
if vector is None:
|
||||||
|
stats["skipped_embed_failed"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Search Qdrant for neighbors (exclude nothing — new claim isn't in Qdrant yet)
|
||||||
|
hits = search_qdrant(
|
||||||
|
vector,
|
||||||
|
limit=max_neighbors,
|
||||||
|
domain=None, # Cross-domain connections are valuable
|
||||||
|
score_threshold=threshold,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not hits:
|
||||||
|
stats["skipped_no_neighbors"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extract neighbor titles
|
||||||
|
neighbor_titles = []
|
||||||
|
for hit in hits:
|
||||||
|
payload = hit.get("payload", {})
|
||||||
|
title = payload.get("claim_title", "")
|
||||||
|
if title:
|
||||||
|
neighbor_titles.append(title)
|
||||||
|
|
||||||
|
if not neighbor_titles:
|
||||||
|
stats["skipped_no_neighbors"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Add edges to the new claim's frontmatter
|
||||||
|
if _add_related_edges(claim_path, neighbor_titles):
|
||||||
|
stats["connected"] += 1
|
||||||
|
stats["edges_added"] += len(neighbor_titles)
|
||||||
|
stats["connections"].append({
|
||||||
|
"claim": os.path.basename(claim_path),
|
||||||
|
"neighbors": neighbor_titles,
|
||||||
|
})
|
||||||
|
logger.info("Connected %s → %d neighbors", os.path.basename(claim_path), len(neighbor_titles))
|
||||||
|
else:
|
||||||
|
stats["skipped_no_neighbors"] += 1
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Extract-and-connect: %d/%d claims connected (%d edges added, %d embed failed, %d no neighbors)",
|
||||||
|
stats["connected"], stats["total"], stats["edges_added"],
|
||||||
|
stats["skipped_embed_failed"], stats["skipped_no_neighbors"],
|
||||||
|
)
|
||||||
|
|
||||||
|
return stats
|
||||||
110
ops/pipeline-v2/lib/costs.py
Normal file
110
ops/pipeline-v2/lib/costs.py
Normal file
|
|
@ -0,0 +1,110 @@
|
||||||
|
"""Cost tracking — per-model per-day with budget enforcement."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.costs")
|
||||||
|
|
||||||
|
|
||||||
|
def record_usage(
|
||||||
|
conn,
|
||||||
|
model: str,
|
||||||
|
stage: str,
|
||||||
|
input_tokens: int = 0,
|
||||||
|
output_tokens: int = 0,
|
||||||
|
backend: str = "api",
|
||||||
|
duration_ms: int = 0,
|
||||||
|
cache_read_tokens: int = 0,
|
||||||
|
cache_write_tokens: int = 0,
|
||||||
|
cost_estimate_usd: float = 0.0,
|
||||||
|
):
|
||||||
|
"""Record usage and compute cost. Returns cost in USD.
|
||||||
|
|
||||||
|
backend: "max" (Claude Max subscription, free) or "api" (paid).
|
||||||
|
Claude Max calls are tracked for volume metrics but cost $0. (Ganymede)
|
||||||
|
"""
|
||||||
|
# Always compute estimated cost from tokens × published rates
|
||||||
|
rates = config.MODEL_COSTS.get(model)
|
||||||
|
if rates and (input_tokens or output_tokens):
|
||||||
|
estimated = (input_tokens * rates["input"] + output_tokens * rates["output"]) / 1000
|
||||||
|
# Cache reads are ~90% cheaper than regular input
|
||||||
|
if cache_read_tokens and rates:
|
||||||
|
estimated += (cache_read_tokens * rates["input"] * 0.1) / 1000
|
||||||
|
if cache_write_tokens and rates:
|
||||||
|
estimated += (cache_write_tokens * rates["input"] * 1.25) / 1000
|
||||||
|
else:
|
||||||
|
estimated = 0.0
|
||||||
|
# Use caller-provided estimate if we can't compute (e.g. CLI gives its own)
|
||||||
|
if cost_estimate_usd > 0 and estimated == 0:
|
||||||
|
estimated = cost_estimate_usd
|
||||||
|
cost_estimate_usd = estimated
|
||||||
|
|
||||||
|
if backend == "max":
|
||||||
|
cost = 0.0 # subscription — no actual spend
|
||||||
|
else:
|
||||||
|
cost = estimated if estimated > 0 else 0.0
|
||||||
|
|
||||||
|
today = date.today().isoformat()
|
||||||
|
# Include backend in the stage key so max vs api are tracked separately
|
||||||
|
stage_key = f"{stage}:{backend}" if backend != "api" else stage
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO costs (date, model, stage, calls, input_tokens, output_tokens, cost_usd,
|
||||||
|
duration_ms, cache_read_tokens, cache_write_tokens, cost_estimate_usd)
|
||||||
|
VALUES (?, ?, ?, 1, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT (date, model, stage) DO UPDATE SET
|
||||||
|
calls = calls + 1,
|
||||||
|
input_tokens = input_tokens + excluded.input_tokens,
|
||||||
|
output_tokens = output_tokens + excluded.output_tokens,
|
||||||
|
cost_usd = cost_usd + excluded.cost_usd,
|
||||||
|
duration_ms = duration_ms + excluded.duration_ms,
|
||||||
|
cache_read_tokens = cache_read_tokens + excluded.cache_read_tokens,
|
||||||
|
cache_write_tokens = cache_write_tokens + excluded.cache_write_tokens,
|
||||||
|
cost_estimate_usd = cost_estimate_usd + excluded.cost_estimate_usd""",
|
||||||
|
(today, model, stage_key, input_tokens, output_tokens, cost,
|
||||||
|
duration_ms, cache_read_tokens, cache_write_tokens, cost_estimate_usd),
|
||||||
|
)
|
||||||
|
return cost
|
||||||
|
|
||||||
|
|
||||||
|
def get_daily_spend(conn, day: str = None) -> float:
|
||||||
|
"""Get total OpenRouter spend for a given day (default: today)."""
|
||||||
|
if day is None:
|
||||||
|
day = date.today().isoformat()
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT COALESCE(SUM(cost_usd), 0) as total FROM costs WHERE date = ?",
|
||||||
|
(day,),
|
||||||
|
).fetchone()
|
||||||
|
return row["total"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_daily_breakdown(conn, day: str = None) -> list:
|
||||||
|
"""Get per-model per-stage breakdown for a day."""
|
||||||
|
if day is None:
|
||||||
|
day = date.today().isoformat()
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT model, stage, calls, input_tokens, output_tokens, cost_usd,
|
||||||
|
duration_ms, cache_read_tokens, cache_write_tokens, cost_estimate_usd
|
||||||
|
FROM costs WHERE date = ? ORDER BY cost_usd DESC""",
|
||||||
|
(day,),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def check_budget(conn) -> dict:
|
||||||
|
"""Check budget status. Returns {ok, spend, budget, pct}."""
|
||||||
|
spend = get_daily_spend(conn)
|
||||||
|
pct = spend / config.OPENROUTER_DAILY_BUDGET if config.OPENROUTER_DAILY_BUDGET > 0 else 0
|
||||||
|
return {
|
||||||
|
"ok": pct < 1.0,
|
||||||
|
"warn": pct >= config.OPENROUTER_WARN_THRESHOLD,
|
||||||
|
"spend": round(spend, 4),
|
||||||
|
"budget": config.OPENROUTER_DAILY_BUDGET,
|
||||||
|
"pct": round(pct * 100, 1),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def budget_allows(conn) -> bool:
|
||||||
|
"""Quick check: is spending under daily budget?"""
|
||||||
|
return check_budget(conn)["ok"]
|
||||||
|
|
@ -9,7 +9,7 @@ from . import config
|
||||||
|
|
||||||
logger = logging.getLogger("pipeline.db")
|
logger = logging.getLogger("pipeline.db")
|
||||||
|
|
||||||
SCHEMA_VERSION = 12
|
SCHEMA_VERSION = 17
|
||||||
|
|
||||||
SCHEMA_SQL = """
|
SCHEMA_SQL = """
|
||||||
CREATE TABLE IF NOT EXISTS schema_version (
|
CREATE TABLE IF NOT EXISTS schema_version (
|
||||||
|
|
@ -69,6 +69,7 @@ CREATE TABLE IF NOT EXISTS prs (
|
||||||
last_error TEXT,
|
last_error TEXT,
|
||||||
last_attempt TEXT,
|
last_attempt TEXT,
|
||||||
cost_usd REAL DEFAULT 0,
|
cost_usd REAL DEFAULT 0,
|
||||||
|
auto_merge INTEGER DEFAULT 0,
|
||||||
created_at TEXT DEFAULT (datetime('now')),
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
merged_at TEXT
|
merged_at TEXT
|
||||||
);
|
);
|
||||||
|
|
@ -468,58 +469,28 @@ def migrate(conn: sqlite3.Connection):
|
||||||
conn.commit()
|
conn.commit()
|
||||||
logger.info("Migration v10: added eval pipeline columns to response_audit")
|
logger.info("Migration v10: added eval pipeline columns to response_audit")
|
||||||
|
|
||||||
|
|
||||||
if current < 11:
|
if current < 11:
|
||||||
# Phase 11: compute tracking — extended costs table columns
|
# Add auto_merge flag for agent PR auto-merge (eval-approved agent branches)
|
||||||
# (May already exist on VPS from manual deploy — idempotent ALTERs)
|
try:
|
||||||
for col_def in [
|
conn.execute("ALTER TABLE prs ADD COLUMN auto_merge INTEGER DEFAULT 0")
|
||||||
("duration_ms", "INTEGER DEFAULT 0"),
|
except sqlite3.OperationalError:
|
||||||
("cache_read_tokens", "INTEGER DEFAULT 0"),
|
pass # Column already exists (VPS may be ahead of repo schema)
|
||||||
("cache_write_tokens", "INTEGER DEFAULT 0"),
|
conn.commit()
|
||||||
("cost_estimate_usd", "REAL DEFAULT 0"),
|
logger.info("Migration v11: added auto_merge column to prs table")
|
||||||
|
|
||||||
|
|
||||||
|
if current < 17:
|
||||||
|
# Add prompt/pipeline version tracking per PR
|
||||||
|
for col, default in [
|
||||||
|
("prompt_version", None),
|
||||||
|
("pipeline_version", None),
|
||||||
]:
|
]:
|
||||||
try:
|
try:
|
||||||
conn.execute(f"ALTER TABLE costs ADD COLUMN {col_def[0]} {col_def[1]}")
|
conn.execute(f"ALTER TABLE prs ADD COLUMN {col} TEXT")
|
||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
pass # Column already exists
|
pass # Column already exists
|
||||||
conn.commit()
|
conn.commit()
|
||||||
logger.info("Migration v11: added compute tracking columns to costs")
|
logger.info("Migration v17: added prompt_version, pipeline_version to prs table")
|
||||||
|
|
||||||
if current < 12:
|
|
||||||
# Phase 12: structured review records — captures all evaluation outcomes
|
|
||||||
# including rejections, disagreements, and approved-with-changes.
|
|
||||||
# Schema locked with Leo (2026-04-01).
|
|
||||||
conn.executescript("""
|
|
||||||
CREATE TABLE IF NOT EXISTS review_records (
|
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
||||||
pr_number INTEGER NOT NULL,
|
|
||||||
claim_path TEXT,
|
|
||||||
domain TEXT,
|
|
||||||
agent TEXT,
|
|
||||||
reviewer TEXT NOT NULL,
|
|
||||||
reviewer_model TEXT,
|
|
||||||
outcome TEXT NOT NULL
|
|
||||||
CHECK (outcome IN ('approved', 'approved-with-changes', 'rejected')),
|
|
||||||
rejection_reason TEXT
|
|
||||||
CHECK (rejection_reason IS NULL OR rejection_reason IN (
|
|
||||||
'fails-standalone-test', 'duplicate', 'scope-mismatch',
|
|
||||||
'evidence-insufficient', 'framing-poor', 'other'
|
|
||||||
)),
|
|
||||||
disagreement_type TEXT
|
|
||||||
CHECK (disagreement_type IS NULL OR disagreement_type IN (
|
|
||||||
'factual', 'scope', 'framing', 'evidence'
|
|
||||||
)),
|
|
||||||
notes TEXT,
|
|
||||||
batch_id TEXT,
|
|
||||||
claims_in_batch INTEGER DEFAULT 1,
|
|
||||||
reviewed_at TEXT DEFAULT (datetime('now'))
|
|
||||||
);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_review_records_pr ON review_records(pr_number);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_review_records_outcome ON review_records(outcome);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_review_records_domain ON review_records(domain);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_review_records_reviewer ON review_records(reviewer);
|
|
||||||
""")
|
|
||||||
logger.info("Migration v12: created review_records table")
|
|
||||||
|
|
||||||
if current < SCHEMA_VERSION:
|
if current < SCHEMA_VERSION:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
|
|
@ -540,30 +511,6 @@ def audit(conn: sqlite3.Connection, stage: str, event: str, detail: str = None):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def record_review(conn, pr_number: int, reviewer: str, outcome: str, *,
|
|
||||||
claim_path: str = None, domain: str = None, agent: str = None,
|
|
||||||
reviewer_model: str = None, rejection_reason: str = None,
|
|
||||||
disagreement_type: str = None, notes: str = None,
|
|
||||||
claims_in_batch: int = 1):
|
|
||||||
"""Record a structured review outcome.
|
|
||||||
|
|
||||||
Called from evaluate stage after Leo/domain reviewer returns a verdict.
|
|
||||||
outcome must be: approved, approved-with-changes, or rejected.
|
|
||||||
"""
|
|
||||||
batch_id = str(pr_number)
|
|
||||||
conn.execute(
|
|
||||||
"""INSERT INTO review_records
|
|
||||||
(pr_number, claim_path, domain, agent, reviewer, reviewer_model,
|
|
||||||
outcome, rejection_reason, disagreement_type, notes,
|
|
||||||
batch_id, claims_in_batch)
|
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
||||||
(pr_number, claim_path, domain, agent, reviewer, reviewer_model,
|
|
||||||
outcome, rejection_reason, disagreement_type, notes,
|
|
||||||
batch_id, claims_in_batch),
|
|
||||||
)
|
|
||||||
|
|
||||||
def append_priority_log(conn: sqlite3.Connection, path: str, stage: str, priority: str, reasoning: str):
|
def append_priority_log(conn: sqlite3.Connection, path: str, stage: str, priority: str, reasoning: str):
|
||||||
"""Append a priority assessment to a source's priority_log.
|
"""Append a priority assessment to a source's priority_log.
|
||||||
|
|
||||||
|
|
|
||||||
113
ops/pipeline-v2/lib/dedup.py
Normal file
113
ops/pipeline-v2/lib/dedup.py
Normal file
|
|
@ -0,0 +1,113 @@
|
||||||
|
"""Evidence block deduplication for enrichment idempotency.
|
||||||
|
|
||||||
|
Removes duplicate '### Additional Evidence' and '### Auto-enrichment' blocks
|
||||||
|
that arise from rebase of enrichment branches. (Leo: PRs #1751, #1752)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.dedup")
|
||||||
|
|
||||||
|
# Matches start of an evidence block header
|
||||||
|
_EVIDENCE_HEADER = re.compile(
|
||||||
|
r'^### (?:Additional Evidence|Auto-enrichment) \(',
|
||||||
|
re.MULTILINE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extracts source key from the *Source: ...* line
|
||||||
|
_SOURCE_LINE = re.compile(r'^\*Source: (.+)\*', re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
|
def dedup_evidence_blocks(content: str) -> str:
|
||||||
|
"""Remove duplicate evidence blocks from a claim file.
|
||||||
|
|
||||||
|
After rebase, two enrichment branches can produce duplicate
|
||||||
|
evidence blocks with the same source reference. Keeps the first
|
||||||
|
occurrence of each source, removes subsequent duplicates.
|
||||||
|
"""
|
||||||
|
# Find all evidence block start positions
|
||||||
|
headers = list(_EVIDENCE_HEADER.finditer(content))
|
||||||
|
if len(headers) < 2:
|
||||||
|
return content
|
||||||
|
|
||||||
|
# Parse each block: find its extent and source key
|
||||||
|
blocks = [] # (start, end, source_key)
|
||||||
|
for i, hdr in enumerate(headers):
|
||||||
|
block_start = hdr.start()
|
||||||
|
# Block extends to just before the next evidence header
|
||||||
|
# (or to end of file for the last block).
|
||||||
|
# But we need to be careful: content after the last evidence
|
||||||
|
# block that ISN'T evidence (Relevant Notes, ---, etc.) should
|
||||||
|
# NOT be considered part of the block.
|
||||||
|
if i + 1 < len(headers):
|
||||||
|
block_end = headers[i + 1].start()
|
||||||
|
else:
|
||||||
|
# Last block: find where evidence content ends.
|
||||||
|
# Look for the next non-evidence section marker after the
|
||||||
|
# source line and evidence body.
|
||||||
|
rest = content[block_start:]
|
||||||
|
# Find end of this evidence block's text by looking for
|
||||||
|
# a section boundary: ---, ## heading, Relevant Notes, Topics
|
||||||
|
# Skip the first line (the ### header itself)
|
||||||
|
lines = rest.split("\n")
|
||||||
|
end_offset = len(rest)
|
||||||
|
past_source = False
|
||||||
|
past_body = False
|
||||||
|
line_pos = 0
|
||||||
|
for j, line in enumerate(lines):
|
||||||
|
if j == 0:
|
||||||
|
line_pos += len(line) + 1
|
||||||
|
continue
|
||||||
|
if line.startswith("*Source:"):
|
||||||
|
past_source = True
|
||||||
|
line_pos += len(line) + 1
|
||||||
|
continue
|
||||||
|
if past_source and line.strip() == "":
|
||||||
|
# Blank line after source — start of body
|
||||||
|
line_pos += len(line) + 1
|
||||||
|
continue
|
||||||
|
if past_source and line.strip():
|
||||||
|
past_body = True
|
||||||
|
# After we've seen body content, a blank line followed by
|
||||||
|
# a section marker means the block is done
|
||||||
|
if past_body and (
|
||||||
|
line.startswith("---")
|
||||||
|
or line.startswith("## ")
|
||||||
|
or line.startswith("### ") # next evidence or other heading
|
||||||
|
or re.match(r'^(?:Relevant Notes|Topics)\s*:?', line)
|
||||||
|
):
|
||||||
|
end_offset = line_pos
|
||||||
|
break
|
||||||
|
line_pos += len(line) + 1
|
||||||
|
|
||||||
|
block_end = block_start + end_offset
|
||||||
|
|
||||||
|
# Extract source key
|
||||||
|
block_text = content[block_start:block_end]
|
||||||
|
src_match = _SOURCE_LINE.search(block_text)
|
||||||
|
source_key = src_match.group(1).strip() if src_match else f"_unknown_{i}"
|
||||||
|
|
||||||
|
blocks.append((block_start, block_end, source_key))
|
||||||
|
|
||||||
|
# Now rebuild content, skipping duplicate sources
|
||||||
|
seen: set[str] = set()
|
||||||
|
result_parts = [content[:blocks[0][0]]]
|
||||||
|
removed = 0
|
||||||
|
|
||||||
|
for start, end, source_key in blocks:
|
||||||
|
if source_key in seen:
|
||||||
|
removed += 1
|
||||||
|
continue
|
||||||
|
seen.add(source_key)
|
||||||
|
result_parts.append(content[start:end])
|
||||||
|
|
||||||
|
# Append any content after the last block
|
||||||
|
last_end = blocks[-1][1]
|
||||||
|
if last_end < len(content):
|
||||||
|
result_parts.append(content[last_end:])
|
||||||
|
|
||||||
|
if removed > 0:
|
||||||
|
logger.info("Deduped %d duplicate evidence block(s)", removed)
|
||||||
|
|
||||||
|
return "".join(result_parts)
|
||||||
208
ops/pipeline-v2/lib/digest.py
Normal file
208
ops/pipeline-v2/lib/digest.py
Normal file
|
|
@ -0,0 +1,208 @@
|
||||||
|
"""Daily digest — sends Cory a summary of all Tier 3 activity at 8am London time.
|
||||||
|
|
||||||
|
Aggregates: merged claims (with insight summaries), pipeline metrics, agent activity,
|
||||||
|
pending review items. Runs as a scheduled job in bot.py.
|
||||||
|
|
||||||
|
Epimetheus owns this module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
|
logger = logging.getLogger("telegram.digest")
|
||||||
|
|
||||||
|
LONDON_TZ = ZoneInfo("Europe/London")
|
||||||
|
DIGEST_HOUR_LONDON = 8 # 8am London time (auto-adjusts for BST/GMT)
|
||||||
|
|
||||||
|
|
||||||
|
def next_digest_time() -> datetime:
|
||||||
|
"""Calculate the next 8am London time as a UTC datetime.
|
||||||
|
|
||||||
|
Handles BST/GMT transitions automatically via zoneinfo.
|
||||||
|
"""
|
||||||
|
now = datetime.now(LONDON_TZ)
|
||||||
|
target = now.replace(hour=DIGEST_HOUR_LONDON, minute=0, second=0, microsecond=0)
|
||||||
|
if target <= now:
|
||||||
|
target += timedelta(days=1)
|
||||||
|
return target.astimezone(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_merged_claims_24h(conn: sqlite3.Connection) -> list[dict]:
|
||||||
|
"""Get PRs merged in the last 24 hours with domain and branch info."""
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT number, branch, domain, agent, commit_type, merged_at, description
|
||||||
|
FROM prs
|
||||||
|
WHERE merged_at > datetime('now', '-24 hours')
|
||||||
|
AND status = 'merged'
|
||||||
|
ORDER BY merged_at DESC""",
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pipeline_metrics_24h(conn: sqlite3.Connection) -> dict:
|
||||||
|
"""Get pipeline activity metrics for the last 24 hours."""
|
||||||
|
total_merged = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM prs WHERE merged_at > datetime('now', '-24 hours') AND status = 'merged'"
|
||||||
|
).fetchone()[0]
|
||||||
|
|
||||||
|
total_closed = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM prs WHERE status = 'closed' AND created_at > datetime('now', '-24 hours')"
|
||||||
|
).fetchone()[0]
|
||||||
|
|
||||||
|
total_conflict = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM prs WHERE status IN ('conflict', 'conflict_permanent') AND created_at > datetime('now', '-24 hours')"
|
||||||
|
).fetchone()[0]
|
||||||
|
|
||||||
|
total_open = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM prs WHERE status IN ('open', 'reviewing', 'approved', 'merging')"
|
||||||
|
).fetchone()[0]
|
||||||
|
|
||||||
|
# Approval rate (last 24h)
|
||||||
|
evaluated = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM prs WHERE leo_verdict IN ('approve', 'request_changes') AND created_at > datetime('now', '-24 hours')"
|
||||||
|
).fetchone()[0]
|
||||||
|
approved = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM prs WHERE leo_verdict = 'approve' AND created_at > datetime('now', '-24 hours')"
|
||||||
|
).fetchone()[0]
|
||||||
|
approval_rate = (approved / evaluated * 100) if evaluated > 0 else 0
|
||||||
|
|
||||||
|
return {
|
||||||
|
"merged": total_merged,
|
||||||
|
"closed": total_closed,
|
||||||
|
"conflict": total_conflict,
|
||||||
|
"open": total_open,
|
||||||
|
"evaluated": evaluated,
|
||||||
|
"approved": approved,
|
||||||
|
"approval_rate": approval_rate,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_agent_activity_24h(conn: sqlite3.Connection) -> dict[str, int]:
|
||||||
|
"""Get PR count by agent for the last 24 hours."""
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT agent, COUNT(*) as cnt
|
||||||
|
FROM prs
|
||||||
|
WHERE created_at > datetime('now', '-24 hours')
|
||||||
|
AND agent IS NOT NULL
|
||||||
|
GROUP BY agent
|
||||||
|
ORDER BY cnt DESC""",
|
||||||
|
).fetchall()
|
||||||
|
return {r["agent"]: r["cnt"] for r in rows}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pending_review_count(conn: sqlite3.Connection) -> int:
|
||||||
|
"""Count PRs awaiting review."""
|
||||||
|
return conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM prs WHERE status IN ('open', 'reviewing')"
|
||||||
|
).fetchone()[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_claim_title(branch: str) -> str:
|
||||||
|
"""Extract a human-readable claim title from a branch name.
|
||||||
|
|
||||||
|
Branch format: extract/source-slug or agent/description
|
||||||
|
"""
|
||||||
|
# Strip prefix (extract/, research/, theseus/, etc.)
|
||||||
|
parts = branch.split("/", 1)
|
||||||
|
slug = parts[1] if len(parts) > 1 else parts[0]
|
||||||
|
# Convert slug to readable title
|
||||||
|
return slug.replace("-", " ").replace("_", " ").title()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def format_digest(
|
||||||
|
merged_claims: list[dict],
|
||||||
|
metrics: dict,
|
||||||
|
agent_activity: dict[str, int],
|
||||||
|
pending_review: int,
|
||||||
|
) -> str:
|
||||||
|
"""Format the daily digest message."""
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
date_str = now.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
parts = [f"DAILY DIGEST — {date_str}", ""]
|
||||||
|
|
||||||
|
# Merged claims section
|
||||||
|
if merged_claims:
|
||||||
|
# Group by domain
|
||||||
|
by_domain: dict[str, list] = {}
|
||||||
|
for claim in merged_claims:
|
||||||
|
domain = claim.get("domain") or "unknown"
|
||||||
|
by_domain.setdefault(domain, []).append(claim)
|
||||||
|
|
||||||
|
parts.append(f"CLAIMS MERGED ({len(merged_claims)})")
|
||||||
|
for domain, claims in sorted(by_domain.items()):
|
||||||
|
for c in claims:
|
||||||
|
# Use real description from frontmatter if available, fall back to slug title
|
||||||
|
desc = c.get("description")
|
||||||
|
if desc:
|
||||||
|
# Take first description if multiple (pipe-delimited)
|
||||||
|
display = desc.split(" | ")[0]
|
||||||
|
if len(display) > 120:
|
||||||
|
display = display[:117] + "..."
|
||||||
|
else:
|
||||||
|
display = _extract_claim_title(c.get("branch", "unknown"))
|
||||||
|
commit_type = c.get("commit_type", "")
|
||||||
|
type_tag = f"[{commit_type}] " if commit_type else ""
|
||||||
|
parts.append(f" {type_tag}{display} ({domain})")
|
||||||
|
parts.append("")
|
||||||
|
else:
|
||||||
|
parts.extend(["CLAIMS MERGED (0)", " No claims merged in the last 24h", ""])
|
||||||
|
|
||||||
|
# Pipeline metrics
|
||||||
|
success_rate = 0
|
||||||
|
total_attempted = metrics["merged"] + metrics["closed"] + metrics["conflict"]
|
||||||
|
if total_attempted > 0:
|
||||||
|
success_rate = metrics["merged"] / total_attempted * 100
|
||||||
|
|
||||||
|
parts.append("PIPELINE")
|
||||||
|
parts.append(f" Merged: {metrics['merged']} | Closed: {metrics['closed']} | Conflicts: {metrics['conflict']}")
|
||||||
|
parts.append(f" Success rate: {success_rate:.0f}% | Approval rate: {metrics['approval_rate']:.0f}%")
|
||||||
|
parts.append(f" Open PRs: {metrics['open']}")
|
||||||
|
parts.append("")
|
||||||
|
|
||||||
|
# Agent activity
|
||||||
|
if agent_activity:
|
||||||
|
parts.append("AGENTS")
|
||||||
|
for agent, count in agent_activity.items():
|
||||||
|
parts.append(f" {agent}: {count} PRs")
|
||||||
|
parts.append("")
|
||||||
|
else:
|
||||||
|
parts.extend(["AGENTS", " No agent activity in the last 24h", ""])
|
||||||
|
|
||||||
|
# Pending review
|
||||||
|
if pending_review > 0:
|
||||||
|
parts.append(f"PENDING YOUR REVIEW: {pending_review}")
|
||||||
|
else:
|
||||||
|
parts.append("PENDING YOUR REVIEW: 0")
|
||||||
|
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
async def send_daily_digest(context):
|
||||||
|
"""Send daily digest to admin chat. Scheduled job."""
|
||||||
|
conn = context.bot_data.get("approval_conn")
|
||||||
|
admin_chat_id = context.bot_data.get("admin_chat_id")
|
||||||
|
|
||||||
|
if not conn or not admin_chat_id:
|
||||||
|
logger.debug("Digest skipped — no DB connection or admin chat ID")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
merged = _get_merged_claims_24h(conn)
|
||||||
|
metrics = _get_pipeline_metrics_24h(conn)
|
||||||
|
activity = _get_agent_activity_24h(conn)
|
||||||
|
pending = _get_pending_review_count(conn)
|
||||||
|
|
||||||
|
text = format_digest(merged, metrics, activity, pending)
|
||||||
|
|
||||||
|
await context.bot.send_message(
|
||||||
|
chat_id=admin_chat_id,
|
||||||
|
text=text,
|
||||||
|
)
|
||||||
|
logger.info("Daily digest sent (%d claims, %d agents active)",
|
||||||
|
len(merged), len(activity))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Failed to send daily digest: %s", e)
|
||||||
87
ops/pipeline-v2/lib/domains.py
Normal file
87
ops/pipeline-v2/lib/domains.py
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
"""Domain→agent mapping and domain detection — single source of truth.
|
||||||
|
|
||||||
|
Extracted from evaluate.py and merge.py (Phase 3 refactor).
|
||||||
|
All domain classification logic goes through this module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Canonical domain→agent mapping. Every domain must have exactly one primary agent.
|
||||||
|
DOMAIN_AGENT_MAP: dict[str, str] = {
|
||||||
|
"internet-finance": "Rio",
|
||||||
|
"entertainment": "Clay",
|
||||||
|
"health": "Vida",
|
||||||
|
"ai-alignment": "Theseus",
|
||||||
|
"space-development": "Astra",
|
||||||
|
"mechanisms": "Rio",
|
||||||
|
"living-capital": "Rio",
|
||||||
|
"living-agents": "Theseus",
|
||||||
|
"teleohumanity": "Leo",
|
||||||
|
"grand-strategy": "Leo",
|
||||||
|
"critical-systems": "Theseus",
|
||||||
|
"collective-intelligence": "Theseus",
|
||||||
|
"teleological-economics": "Rio",
|
||||||
|
"cultural-dynamics": "Clay",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Valid domain names — derived from the map, not maintained separately.
|
||||||
|
VALID_DOMAINS: frozenset[str] = frozenset(DOMAIN_AGENT_MAP.keys())
|
||||||
|
|
||||||
|
# Inverse mapping: agent name (lowercase) → primary domain (for branch detection).
|
||||||
|
_AGENT_PRIMARY_DOMAIN: dict[str, str] = {
|
||||||
|
"rio": "internet-finance",
|
||||||
|
"clay": "entertainment",
|
||||||
|
"theseus": "ai-alignment",
|
||||||
|
"vida": "health",
|
||||||
|
"astra": "space-development",
|
||||||
|
"leo": "grand-strategy",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def agent_for_domain(domain: str | None) -> str:
|
||||||
|
"""Get the reviewing agent for a domain. Falls back to Leo."""
|
||||||
|
if domain is None:
|
||||||
|
return "Leo"
|
||||||
|
return DOMAIN_AGENT_MAP.get(domain, "Leo")
|
||||||
|
|
||||||
|
|
||||||
|
def detect_domain_from_diff(diff: str) -> str | None:
|
||||||
|
"""Detect primary domain from changed file paths in a unified diff.
|
||||||
|
|
||||||
|
Checks domains/, entities/, core/, foundations/ for domain classification.
|
||||||
|
Returns the most-referenced domain, or None if no domain files found.
|
||||||
|
"""
|
||||||
|
domain_counts: dict[str, int] = {}
|
||||||
|
for line in diff.split("\n"):
|
||||||
|
if line.startswith("diff --git"):
|
||||||
|
# Check domains/ and entities/ (both carry domain info)
|
||||||
|
match = re.search(r"(?:domains|entities)/([^/]+)/", line)
|
||||||
|
if match:
|
||||||
|
d = match.group(1)
|
||||||
|
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||||||
|
continue
|
||||||
|
# Check core/ subdirectories
|
||||||
|
match = re.search(r"core/([^/]+)/", line)
|
||||||
|
if match:
|
||||||
|
d = match.group(1)
|
||||||
|
if d in DOMAIN_AGENT_MAP:
|
||||||
|
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||||||
|
continue
|
||||||
|
# Check foundations/ subdirectories
|
||||||
|
match = re.search(r"foundations/([^/]+)/", line)
|
||||||
|
if match:
|
||||||
|
d = match.group(1)
|
||||||
|
if d in DOMAIN_AGENT_MAP:
|
||||||
|
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||||||
|
if domain_counts:
|
||||||
|
return max(domain_counts, key=domain_counts.get)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def detect_domain_from_branch(branch: str) -> str | None:
|
||||||
|
"""Extract domain from branch name like 'rio/claims-futarchy' → 'internet-finance'.
|
||||||
|
|
||||||
|
Uses agent prefix → primary domain mapping for pipeline branches.
|
||||||
|
"""
|
||||||
|
prefix = branch.split("/")[0].lower() if "/" in branch else ""
|
||||||
|
return _AGENT_PRIMARY_DOMAIN.get(prefix)
|
||||||
358
ops/pipeline-v2/lib/entity_batch.py
Normal file
358
ops/pipeline-v2/lib/entity_batch.py
Normal file
|
|
@ -0,0 +1,358 @@
|
||||||
|
"""Entity batch processor — applies queued entity operations to main.
|
||||||
|
|
||||||
|
Reads from entity_queue, applies creates/updates to the main worktree,
|
||||||
|
commits directly to main. No PR needed for entity timeline appends —
|
||||||
|
they're factual, commutative, and low-risk.
|
||||||
|
|
||||||
|
Entity creates (new entity files) go through PR review like claims.
|
||||||
|
Entity updates (timeline appends) commit directly — they're additive
|
||||||
|
and recoverable from source archives if wrong.
|
||||||
|
|
||||||
|
Runs as part of the pipeline's ingest stage or as a standalone cron.
|
||||||
|
|
||||||
|
Epimetheus owns this module. Leo reviews changes. Rhea deploys.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from . import config, db
|
||||||
|
from .entity_queue import cleanup, dequeue, mark_failed, mark_processed
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.entity_batch")
|
||||||
|
|
||||||
|
|
||||||
|
def _read_file(path: str) -> str:
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
return f.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
async def _git(*args, cwd: str = None, timeout: int = 60) -> tuple[int, str]:
|
||||||
|
"""Run a git command async."""
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"git", *args,
|
||||||
|
cwd=cwd or str(config.MAIN_WORKTREE),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
return -1, f"git {args[0]} timed out after {timeout}s"
|
||||||
|
output = (stdout or b"").decode().strip()
|
||||||
|
if stderr:
|
||||||
|
output += "\n" + stderr.decode().strip()
|
||||||
|
return proc.returncode, output
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_timeline_entry(entity_path: str, timeline_entry: str) -> tuple[bool, str]:
|
||||||
|
"""Append a timeline entry to an existing entity file.
|
||||||
|
|
||||||
|
Returns (success, message).
|
||||||
|
"""
|
||||||
|
if not os.path.exists(entity_path):
|
||||||
|
return False, f"entity file not found: {entity_path}"
|
||||||
|
|
||||||
|
content = _read_file(entity_path)
|
||||||
|
if not content:
|
||||||
|
return False, f"entity file empty: {entity_path}"
|
||||||
|
|
||||||
|
# Check for duplicate timeline entry
|
||||||
|
if timeline_entry.strip() in content:
|
||||||
|
return False, "duplicate timeline entry"
|
||||||
|
|
||||||
|
# Find or create Timeline section
|
||||||
|
if "## Timeline" in content:
|
||||||
|
lines = content.split("\n")
|
||||||
|
insert_idx = len(lines)
|
||||||
|
in_timeline = False
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if line.strip().startswith("## Timeline"):
|
||||||
|
in_timeline = True
|
||||||
|
continue
|
||||||
|
if in_timeline and line.strip().startswith("## "):
|
||||||
|
insert_idx = i
|
||||||
|
break
|
||||||
|
lines.insert(insert_idx, timeline_entry)
|
||||||
|
updated = "\n".join(lines)
|
||||||
|
else:
|
||||||
|
updated = content.rstrip() + "\n\n## Timeline\n\n" + timeline_entry + "\n"
|
||||||
|
|
||||||
|
with open(entity_path, "w") as f:
|
||||||
|
f.write(updated)
|
||||||
|
|
||||||
|
return True, "timeline entry appended"
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_claim_enrichment(claim_path: str, evidence: str, pr_number: int,
|
||||||
|
original_title: str, similarity: float) -> tuple[bool, str]:
|
||||||
|
"""Append auto-enrichment evidence to an existing claim file.
|
||||||
|
|
||||||
|
Used for near-duplicate auto-conversion. (Ganymede: route through entity_batch)
|
||||||
|
"""
|
||||||
|
if not os.path.exists(claim_path):
|
||||||
|
return False, f"target claim not found: {claim_path}"
|
||||||
|
|
||||||
|
content = _read_file(claim_path)
|
||||||
|
if not content:
|
||||||
|
return False, f"target claim empty: {claim_path}"
|
||||||
|
|
||||||
|
# Dedup: skip if this PR already enriched this claim (idempotency)
|
||||||
|
if f"PR #{pr_number}" in content:
|
||||||
|
return False, f"already enriched by PR #{pr_number}"
|
||||||
|
|
||||||
|
enrichment_block = (
|
||||||
|
f"\n\n### Auto-enrichment (near-duplicate conversion, similarity={similarity:.2f})\n"
|
||||||
|
f"*Source: PR #{pr_number} — \"{original_title}\"*\n"
|
||||||
|
f"*Auto-converted by substantive fixer. Review: revert if this evidence doesn't belong here.*\n\n"
|
||||||
|
f"{evidence}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
if "\n---\n" in content:
|
||||||
|
parts = content.rsplit("\n---\n", 1)
|
||||||
|
updated = parts[0] + enrichment_block + "\n---\n" + parts[1]
|
||||||
|
else:
|
||||||
|
updated = content + enrichment_block
|
||||||
|
|
||||||
|
with open(claim_path, "w") as f:
|
||||||
|
f.write(updated)
|
||||||
|
|
||||||
|
return True, "enrichment appended"
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_entity_create(entity_path: str, content: str) -> tuple[bool, str]:
|
||||||
|
"""Create a new entity file. Returns (success, message)."""
|
||||||
|
if os.path.exists(entity_path):
|
||||||
|
return False, f"entity already exists: {entity_path}"
|
||||||
|
|
||||||
|
os.makedirs(os.path.dirname(entity_path), exist_ok=True)
|
||||||
|
with open(entity_path, "w") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
return True, "entity created"
|
||||||
|
|
||||||
|
|
||||||
|
async def apply_batch(conn=None, max_entries: int = 50) -> tuple[int, int]:
|
||||||
|
"""Process the entity queue. Returns (applied, failed).
|
||||||
|
|
||||||
|
1. Pull latest main
|
||||||
|
2. Read pending queue entries
|
||||||
|
3. Apply each operation to the main worktree
|
||||||
|
4. Commit all changes in one batch commit
|
||||||
|
5. Push to origin
|
||||||
|
"""
|
||||||
|
main_wt = str(config.MAIN_WORKTREE)
|
||||||
|
|
||||||
|
# Ensure we're on main branch — batch script may have left worktree on an extract branch
|
||||||
|
await _git("checkout", "main", cwd=main_wt)
|
||||||
|
|
||||||
|
# Pull latest main
|
||||||
|
rc, out = await _git("fetch", "origin", "main", cwd=main_wt)
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("Failed to fetch main: %s", out)
|
||||||
|
return 0, 0
|
||||||
|
rc, out = await _git("reset", "--hard", "origin/main", cwd=main_wt)
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("Failed to reset main: %s", out)
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
# Read queue
|
||||||
|
entries = dequeue(limit=max_entries)
|
||||||
|
if not entries:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
logger.info("Processing %d entity queue entries", len(entries))
|
||||||
|
|
||||||
|
applied_entries: list[dict] = [] # Track for post-push marking (Ganymede review)
|
||||||
|
failed = 0
|
||||||
|
files_changed: set[str] = set()
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
# Handle enrichments (from substantive fixer near-duplicate conversion)
|
||||||
|
if entry.get("type") == "enrichment":
|
||||||
|
target = entry.get("target_claim", "")
|
||||||
|
evidence = entry.get("evidence", "")
|
||||||
|
domain = entry.get("domain", "")
|
||||||
|
if not target or not evidence:
|
||||||
|
mark_failed(entry, "enrichment missing target or evidence")
|
||||||
|
failed += 1
|
||||||
|
continue
|
||||||
|
claim_path = os.path.join(main_wt, "domains", domain, os.path.basename(target))
|
||||||
|
rel_path = os.path.join("domains", domain, os.path.basename(target))
|
||||||
|
try:
|
||||||
|
ok, msg = _apply_claim_enrichment(
|
||||||
|
claim_path, evidence, entry.get("pr_number", 0),
|
||||||
|
entry.get("original_title", ""), entry.get("similarity", 0),
|
||||||
|
)
|
||||||
|
if ok:
|
||||||
|
files_changed.add(rel_path)
|
||||||
|
applied_entries.append(entry)
|
||||||
|
logger.info("Applied enrichment to %s: %s", target, msg)
|
||||||
|
else:
|
||||||
|
mark_failed(entry, msg)
|
||||||
|
failed += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Failed enrichment on %s", target)
|
||||||
|
mark_failed(entry, str(e))
|
||||||
|
failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle entity operations
|
||||||
|
entity = entry.get("entity", {})
|
||||||
|
filename = entity.get("filename", "")
|
||||||
|
domain = entity.get("domain", "")
|
||||||
|
action = entity.get("action", "")
|
||||||
|
|
||||||
|
if not filename or not domain:
|
||||||
|
mark_failed(entry, "missing filename or domain")
|
||||||
|
failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sanitize filename — prevent path traversal (Ganymede review)
|
||||||
|
filename = os.path.basename(filename)
|
||||||
|
|
||||||
|
entity_dir = os.path.join(main_wt, "entities", domain)
|
||||||
|
entity_path = os.path.join(entity_dir, filename)
|
||||||
|
rel_path = os.path.join("entities", domain, filename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if action == "update":
|
||||||
|
timeline = entity.get("timeline_entry", "")
|
||||||
|
if not timeline:
|
||||||
|
mark_failed(entry, "update with no timeline_entry")
|
||||||
|
failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
ok, msg = _apply_timeline_entry(entity_path, timeline)
|
||||||
|
if ok:
|
||||||
|
files_changed.add(rel_path)
|
||||||
|
applied_entries.append(entry)
|
||||||
|
logger.debug("Applied update to %s: %s", filename, msg)
|
||||||
|
else:
|
||||||
|
mark_failed(entry, msg)
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
elif action == "create":
|
||||||
|
content = entity.get("content", "")
|
||||||
|
if not content:
|
||||||
|
mark_failed(entry, "create with no content")
|
||||||
|
failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If entity already exists, try to apply as timeline update instead
|
||||||
|
if os.path.exists(entity_path):
|
||||||
|
timeline = entity.get("timeline_entry", "")
|
||||||
|
if timeline:
|
||||||
|
ok, msg = _apply_timeline_entry(entity_path, timeline)
|
||||||
|
if ok:
|
||||||
|
files_changed.add(rel_path)
|
||||||
|
applied_entries.append(entry)
|
||||||
|
else:
|
||||||
|
mark_failed(entry, f"create→update fallback: {msg}")
|
||||||
|
failed += 1
|
||||||
|
else:
|
||||||
|
mark_failed(entry, "entity exists, no timeline to append")
|
||||||
|
failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
ok, msg = _apply_entity_create(entity_path, content)
|
||||||
|
if ok:
|
||||||
|
files_changed.add(rel_path)
|
||||||
|
applied_entries.append(entry)
|
||||||
|
logger.debug("Created entity %s", filename)
|
||||||
|
else:
|
||||||
|
mark_failed(entry, msg)
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
mark_failed(entry, f"unknown action: {action}")
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Failed to apply entity %s", filename)
|
||||||
|
mark_failed(entry, str(e))
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
applied = len(applied_entries)
|
||||||
|
|
||||||
|
# Commit and push if any files changed
|
||||||
|
if files_changed:
|
||||||
|
# Stage changed files
|
||||||
|
for f in files_changed:
|
||||||
|
await _git("add", f, cwd=main_wt)
|
||||||
|
|
||||||
|
# Commit
|
||||||
|
commit_msg = (
|
||||||
|
f"entity-batch: update {len(files_changed)} entities\n\n"
|
||||||
|
f"- Applied {applied} entity operations from queue\n"
|
||||||
|
f"- Files: {', '.join(sorted(files_changed)[:10])}"
|
||||||
|
f"{'...' if len(files_changed) > 10 else ''}\n\n"
|
||||||
|
f"Pentagon-Agent: Epimetheus <968B2991-E2DF-4006-B962-F5B0A0CC8ACA>"
|
||||||
|
)
|
||||||
|
rc, out = await _git("commit", "-m", commit_msg, cwd=main_wt)
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("Entity batch commit failed: %s", out)
|
||||||
|
return applied, failed
|
||||||
|
|
||||||
|
# Push with retry — main advances frequently from merge module.
|
||||||
|
# Pull-rebase before each attempt to catch up with remote.
|
||||||
|
push_ok = False
|
||||||
|
for attempt in range(3):
|
||||||
|
# Always pull-rebase before pushing to catch up with remote main
|
||||||
|
rc, out = await _git("pull", "--rebase", "origin", "main", cwd=main_wt, timeout=30)
|
||||||
|
if rc != 0:
|
||||||
|
logger.warning("Entity batch pull-rebase failed (attempt %d): %s", attempt + 1, out)
|
||||||
|
await _git("rebase", "--abort", cwd=main_wt)
|
||||||
|
await _git("reset", "--hard", "origin/main", cwd=main_wt)
|
||||||
|
return 0, failed + applied
|
||||||
|
|
||||||
|
rc, out = await _git("push", "origin", "main", cwd=main_wt, timeout=30)
|
||||||
|
if rc == 0:
|
||||||
|
push_ok = True
|
||||||
|
break
|
||||||
|
logger.warning("Entity batch push failed (attempt %d), retrying: %s", attempt + 1, out[:100])
|
||||||
|
await asyncio.sleep(2) # Brief pause before retry
|
||||||
|
|
||||||
|
if not push_ok:
|
||||||
|
logger.error("Entity batch push failed after 3 attempts")
|
||||||
|
await _git("reset", "--hard", "origin/main", cwd=main_wt)
|
||||||
|
return 0, failed + applied
|
||||||
|
|
||||||
|
# Push succeeded — NOW mark entries as processed (Ganymede review)
|
||||||
|
for entry in applied_entries:
|
||||||
|
mark_processed(entry)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Entity batch: committed %d file changes (%d applied, %d failed)",
|
||||||
|
len(files_changed), applied, failed,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Audit
|
||||||
|
if conn:
|
||||||
|
db.audit(
|
||||||
|
conn, "entity_batch", "batch_applied",
|
||||||
|
json.dumps({
|
||||||
|
"applied": applied, "failed": failed,
|
||||||
|
"files": sorted(files_changed)[:20],
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cleanup old entries
|
||||||
|
cleanup(max_age_hours=24)
|
||||||
|
|
||||||
|
return applied, failed
|
||||||
|
|
||||||
|
|
||||||
|
async def entity_batch_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
|
"""Pipeline stage entry point. Called by teleo-pipeline.py's ingest stage."""
|
||||||
|
return await apply_batch(conn)
|
||||||
206
ops/pipeline-v2/lib/entity_queue.py
Normal file
206
ops/pipeline-v2/lib/entity_queue.py
Normal file
|
|
@ -0,0 +1,206 @@
|
||||||
|
"""Entity enrichment queue — decouple entity writes from extraction branches.
|
||||||
|
|
||||||
|
Problem: Entity updates on extraction branches cause merge conflicts because
|
||||||
|
multiple extraction branches modify the same entity file (e.g., metadao.md).
|
||||||
|
83% of near_duplicate false positives come from entity file modifications.
|
||||||
|
|
||||||
|
Solution: Extraction writes entity operations to a JSON queue file on the VPS.
|
||||||
|
A separate batch process reads the queue and applies operations to main.
|
||||||
|
Entity operations are commutative (timeline appends are order-independent),
|
||||||
|
so parallel extractions never conflict.
|
||||||
|
|
||||||
|
Flow:
|
||||||
|
1. openrouter-extract-v2.py → entity_queue.enqueue() instead of direct file writes
|
||||||
|
2. entity_batch.py (cron or pipeline stage) → entity_queue.dequeue() + apply to main
|
||||||
|
3. Commit entity changes to main directly (no PR needed for timeline appends)
|
||||||
|
|
||||||
|
Epimetheus owns this module. Leo reviews changes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from datetime import date, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.entity_queue")
|
||||||
|
|
||||||
|
# Default queue location (VPS)
|
||||||
|
DEFAULT_QUEUE_DIR = "/opt/teleo-eval/entity-queue"
|
||||||
|
|
||||||
|
|
||||||
|
def _queue_dir() -> Path:
|
||||||
|
"""Get the queue directory, creating it if needed."""
|
||||||
|
d = Path(os.environ.get("ENTITY_QUEUE_DIR", DEFAULT_QUEUE_DIR))
|
||||||
|
d.mkdir(parents=True, exist_ok=True)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def enqueue(entity: dict, source_file: str, agent: str) -> str:
|
||||||
|
"""Add an entity operation to the queue. Returns the queue entry ID.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
entity: dict with keys: filename, domain, action (create|update),
|
||||||
|
entity_type, content (for creates), timeline_entry (for updates)
|
||||||
|
source_file: path to the source that produced this entity
|
||||||
|
agent: agent name performing extraction
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Queue entry filename (for tracking)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: if entity dict is missing required fields or has invalid action
|
||||||
|
"""
|
||||||
|
# Validate required fields (Ganymede review)
|
||||||
|
for field in ("filename", "domain", "action"):
|
||||||
|
if not entity.get(field):
|
||||||
|
raise ValueError(f"Entity missing required field: {field}")
|
||||||
|
if entity["action"] not in ("create", "update"):
|
||||||
|
raise ValueError(f"Invalid entity action: {entity['action']}")
|
||||||
|
|
||||||
|
# Sanitize filename — prevent path traversal (Ganymede review)
|
||||||
|
entity["filename"] = os.path.basename(entity["filename"])
|
||||||
|
|
||||||
|
entry_id = f"{int(time.time() * 1000)}-{entity['filename'].replace('.md', '')}"
|
||||||
|
entry = {
|
||||||
|
"id": entry_id,
|
||||||
|
"entity": entity,
|
||||||
|
"source_file": os.path.basename(source_file),
|
||||||
|
"agent": agent,
|
||||||
|
"enqueued_at": datetime.now(tz=__import__('datetime').timezone.utc).isoformat(),
|
||||||
|
"status": "pending",
|
||||||
|
}
|
||||||
|
|
||||||
|
queue_file = _queue_dir() / f"{entry_id}.json"
|
||||||
|
with open(queue_file, "w") as f:
|
||||||
|
json.dump(entry, f, indent=2)
|
||||||
|
|
||||||
|
logger.info("Enqueued entity operation: %s (%s)", entity["filename"], entity.get("action", "?"))
|
||||||
|
return entry_id
|
||||||
|
|
||||||
|
|
||||||
|
def dequeue(limit: int = 50) -> list[dict]:
|
||||||
|
"""Read pending queue entries, oldest first. Returns list of entry dicts.
|
||||||
|
|
||||||
|
Does NOT remove entries — caller marks them processed after successful apply.
|
||||||
|
"""
|
||||||
|
qdir = _queue_dir()
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
for f in sorted(qdir.glob("*.json")):
|
||||||
|
try:
|
||||||
|
with open(f) as fh:
|
||||||
|
entry = json.load(fh)
|
||||||
|
if entry.get("status") == "pending":
|
||||||
|
entry["_queue_path"] = str(f)
|
||||||
|
entries.append(entry)
|
||||||
|
if len(entries) >= limit:
|
||||||
|
break
|
||||||
|
except (json.JSONDecodeError, KeyError) as e:
|
||||||
|
logger.warning("Skipping malformed queue entry %s: %s", f.name, e)
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def mark_processed(entry: dict, result: str = "applied"):
|
||||||
|
"""Mark a queue entry as processed (or failed).
|
||||||
|
|
||||||
|
Uses atomic write (tmp + rename) to prevent race conditions. (Ganymede review)
|
||||||
|
"""
|
||||||
|
queue_path = entry.get("_queue_path")
|
||||||
|
if not queue_path or not os.path.exists(queue_path):
|
||||||
|
return
|
||||||
|
|
||||||
|
entry["status"] = result
|
||||||
|
entry["processed_at"] = datetime.now(tz=__import__('datetime').timezone.utc).isoformat()
|
||||||
|
# Remove internal tracking field before writing
|
||||||
|
path_backup = queue_path
|
||||||
|
entry.pop("_queue_path", None)
|
||||||
|
|
||||||
|
# Atomic write: tmp file + rename (Ganymede review — prevents race condition)
|
||||||
|
tmp_path = queue_path + ".tmp"
|
||||||
|
with open(tmp_path, "w") as f:
|
||||||
|
json.dump(entry, f, indent=2)
|
||||||
|
os.rename(tmp_path, queue_path)
|
||||||
|
|
||||||
|
|
||||||
|
def mark_failed(entry: dict, error: str):
|
||||||
|
"""Mark a queue entry as failed with error message."""
|
||||||
|
entry["last_error"] = error
|
||||||
|
mark_processed(entry, result="failed")
|
||||||
|
|
||||||
|
|
||||||
|
def queue_enrichment(
|
||||||
|
target_claim: str,
|
||||||
|
evidence: str,
|
||||||
|
pr_number: int,
|
||||||
|
original_title: str,
|
||||||
|
similarity: float,
|
||||||
|
domain: str,
|
||||||
|
) -> str:
|
||||||
|
"""Queue an enrichment for an existing claim. Applied by entity_batch alongside entity updates.
|
||||||
|
|
||||||
|
Used by the substantive fixer for near-duplicate auto-conversion.
|
||||||
|
Single writer pattern — avoids race conditions with direct main writes. (Ganymede)
|
||||||
|
"""
|
||||||
|
entry_id = f"{int(time.time() * 1000)}-enrichment-{os.path.basename(target_claim).replace('.md', '')}"
|
||||||
|
entry = {
|
||||||
|
"id": entry_id,
|
||||||
|
"type": "enrichment",
|
||||||
|
"target_claim": target_claim,
|
||||||
|
"evidence": evidence,
|
||||||
|
"pr_number": pr_number,
|
||||||
|
"original_title": original_title,
|
||||||
|
"similarity": similarity,
|
||||||
|
"domain": domain,
|
||||||
|
"enqueued_at": datetime.now(tz=__import__('datetime').timezone.utc).isoformat(),
|
||||||
|
"status": "pending",
|
||||||
|
}
|
||||||
|
|
||||||
|
queue_file = _queue_dir() / f"{entry_id}.json"
|
||||||
|
with open(queue_file, "w") as f:
|
||||||
|
json.dump(entry, f, indent=2)
|
||||||
|
|
||||||
|
logger.info("Enqueued enrichment: PR #%d → %s (sim=%.2f)", pr_number, target_claim, similarity)
|
||||||
|
return entry_id
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup(max_age_hours: int = 24):
|
||||||
|
"""Remove processed/failed entries older than max_age_hours."""
|
||||||
|
qdir = _queue_dir()
|
||||||
|
cutoff = time.time() - (max_age_hours * 3600)
|
||||||
|
removed = 0
|
||||||
|
|
||||||
|
for f in qdir.glob("*.json"):
|
||||||
|
try:
|
||||||
|
with open(f) as fh:
|
||||||
|
entry = json.load(fh)
|
||||||
|
if entry.get("status") in ("applied", "failed"):
|
||||||
|
if f.stat().st_mtime < cutoff:
|
||||||
|
f.unlink()
|
||||||
|
removed += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if removed:
|
||||||
|
logger.info("Cleaned up %d old queue entries", removed)
|
||||||
|
return removed
|
||||||
|
|
||||||
|
|
||||||
|
def queue_stats() -> dict:
|
||||||
|
"""Get queue statistics for health monitoring."""
|
||||||
|
qdir = _queue_dir()
|
||||||
|
stats = {"pending": 0, "applied": 0, "failed": 0, "total": 0}
|
||||||
|
|
||||||
|
for f in qdir.glob("*.json"):
|
||||||
|
try:
|
||||||
|
with open(f) as fh:
|
||||||
|
entry = json.load(fh)
|
||||||
|
status = entry.get("status", "unknown")
|
||||||
|
stats[status] = stats.get(status, 0) + 1
|
||||||
|
stats["total"] += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
@ -25,9 +25,10 @@ import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from . import config, db
|
from . import config, db
|
||||||
from .domains import agent_for_domain, detect_domain_from_diff
|
from .domains import agent_for_domain, detect_domain_from_branch, detect_domain_from_diff
|
||||||
from .forgejo import api as forgejo_api
|
from .forgejo import api as forgejo_api
|
||||||
from .forgejo import get_agent_token, get_pr_diff, repo_path
|
from .forgejo import get_agent_token, get_pr_diff, repo_path
|
||||||
|
from .merge import PIPELINE_OWNED_PREFIXES
|
||||||
from .llm import run_batch_domain_review, run_domain_review, run_leo_review, triage_pr
|
from .llm import run_batch_domain_review, run_domain_review, run_leo_review, triage_pr
|
||||||
from .feedback import format_rejection_comment
|
from .feedback import format_rejection_comment
|
||||||
from .validate import load_existing_claims
|
from .validate import load_existing_claims
|
||||||
|
|
@ -547,6 +548,31 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
|
||||||
)
|
)
|
||||||
return {"pr": pr_number, "auto_approved": True, "reason": "musings_only"}
|
return {"pr": pr_number, "auto_approved": True, "reason": "musings_only"}
|
||||||
|
|
||||||
|
# Reweave bypass — reweave PRs only add frontmatter edges (supports/challenges/
|
||||||
|
# related/depends_on/challenged_by). The eval LLM has no context for judging
|
||||||
|
# edge correctness and consistently flags factual_discrepancy on valid edges.
|
||||||
|
# Leo's manual PR review is the real quality gate for reweave.
|
||||||
|
branch_row = conn.execute("SELECT branch FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||||
|
branch_name = branch_row["branch"] if branch_row else ""
|
||||||
|
if branch_name.startswith("reweave/"):
|
||||||
|
logger.info("PR #%d is reweave (branch=%s) — auto-approving, Leo reviews manually", pr_number, branch_name)
|
||||||
|
await forgejo_api(
|
||||||
|
"POST",
|
||||||
|
repo_path(f"issues/{pr_number}/comments"),
|
||||||
|
{"body": "Auto-approved: reweave structural update (frontmatter edges only). Leo reviews manually."},
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE prs SET status = 'approved', leo_verdict = 'skipped',
|
||||||
|
domain_verdict = 'skipped', auto_merge = 1,
|
||||||
|
domain = COALESCE(domain, 'cross-domain') WHERE number = ?""",
|
||||||
|
(pr_number,),
|
||||||
|
)
|
||||||
|
db.audit(
|
||||||
|
conn, "evaluate", "reweave_bypass",
|
||||||
|
json.dumps({"pr": pr_number, "branch": branch_name}),
|
||||||
|
)
|
||||||
|
return {"pr": pr_number, "auto_approved": True, "reason": "reweave_bypass"}
|
||||||
|
|
||||||
# NOTE: Tier 0.5 mechanical checks now run in validate stage (before eval).
|
# NOTE: Tier 0.5 mechanical checks now run in validate stage (before eval).
|
||||||
# tier0_pass=1 guarantees all mechanical checks passed. No Tier 0.5 here.
|
# tier0_pass=1 guarantees all mechanical checks passed. No Tier 0.5 here.
|
||||||
|
|
||||||
|
|
@ -556,13 +582,15 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
|
||||||
review_diff = diff
|
review_diff = diff
|
||||||
files = _extract_changed_files(diff)
|
files = _extract_changed_files(diff)
|
||||||
|
|
||||||
# Detect domain
|
# Detect domain — try diff paths first, then branch prefix, then 'general'
|
||||||
domain = detect_domain_from_diff(diff)
|
domain = detect_domain_from_diff(diff)
|
||||||
agent = agent_for_domain(domain)
|
if domain is None:
|
||||||
|
pr_row = conn.execute("SELECT branch FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||||
# Default NULL domain to 'general' (archive-only PRs have no domain files)
|
if pr_row and pr_row["branch"]:
|
||||||
|
domain = detect_domain_from_branch(pr_row["branch"])
|
||||||
if domain is None:
|
if domain is None:
|
||||||
domain = "general"
|
domain = "general"
|
||||||
|
agent = agent_for_domain(domain)
|
||||||
|
|
||||||
# Update PR domain if not set
|
# Update PR domain if not set
|
||||||
conn.execute(
|
conn.execute(
|
||||||
|
|
@ -678,16 +706,6 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
|
||||||
conn, "evaluate", "domain_rejected", json.dumps({"pr": pr_number, "agent": agent, "issues": domain_issues})
|
conn, "evaluate", "domain_rejected", json.dumps({"pr": pr_number, "agent": agent, "issues": domain_issues})
|
||||||
)
|
)
|
||||||
|
|
||||||
# Record structured review outcome
|
|
||||||
claim_files = [f for f in files if any(f.startswith(d) for d in ("domains/", "core/", "foundations/", "decisions/"))]
|
|
||||||
db.record_review(
|
|
||||||
conn, pr_number, reviewer=agent, outcome="rejected",
|
|
||||||
domain=domain, agent=agent, reviewer_model=config.EVAL_DOMAIN_MODEL,
|
|
||||||
rejection_reason=None, # TODO: parse from domain_issues when Leo starts tagging
|
|
||||||
notes=json.dumps(domain_issues) if domain_issues else None,
|
|
||||||
claims_in_batch=max(len(claim_files), 1),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Disposition: check if this PR should be terminated or kept open
|
# Disposition: check if this PR should be terminated or kept open
|
||||||
await _dispose_rejected_pr(conn, pr_number, eval_attempts, domain_issues)
|
await _dispose_rejected_pr(conn, pr_number, eval_attempts, domain_issues)
|
||||||
|
|
||||||
|
|
@ -741,26 +759,27 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
|
||||||
# Submit formal Forgejo reviews (required for merge)
|
# Submit formal Forgejo reviews (required for merge)
|
||||||
await _post_formal_approvals(pr_number, pr_author)
|
await _post_formal_approvals(pr_number, pr_author)
|
||||||
|
|
||||||
|
# Auto-merge agent PRs: if branch is NOT pipeline-owned, set auto_merge=1
|
||||||
|
# so the merge cycle picks it up without manual intervention.
|
||||||
|
branch_row = conn.execute("SELECT branch FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||||
|
branch_name = branch_row["branch"] if branch_row else ""
|
||||||
|
is_agent_pr = not branch_name.startswith(PIPELINE_OWNED_PREFIXES)
|
||||||
|
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"UPDATE prs SET status = 'approved' WHERE number = ?",
|
"UPDATE prs SET status = 'approved', auto_merge = ? WHERE number = ?",
|
||||||
(pr_number,),
|
(1 if is_agent_pr else 0, pr_number),
|
||||||
)
|
)
|
||||||
db.audit(
|
db.audit(
|
||||||
conn,
|
conn,
|
||||||
"evaluate",
|
"evaluate",
|
||||||
"approved",
|
"approved",
|
||||||
json.dumps({"pr": pr_number, "tier": tier, "domain": domain, "leo": leo_verdict, "domain_agent": agent}),
|
json.dumps({"pr": pr_number, "tier": tier, "domain": domain, "leo": leo_verdict, "domain_agent": agent,
|
||||||
|
"auto_merge": is_agent_pr}),
|
||||||
)
|
)
|
||||||
|
if is_agent_pr:
|
||||||
|
logger.info("PR #%d: APPROVED + auto_merge (agent branch %s)", pr_number, branch_name)
|
||||||
|
else:
|
||||||
logger.info("PR #%d: APPROVED (tier=%s, leo=%s, domain=%s)", pr_number, tier, leo_verdict, domain_verdict)
|
logger.info("PR #%d: APPROVED (tier=%s, leo=%s, domain=%s)", pr_number, tier, leo_verdict, domain_verdict)
|
||||||
|
|
||||||
# Record structured review outcome
|
|
||||||
claim_files = [f for f in files if any(f.startswith(d) for d in ("domains/", "core/", "foundations/", "decisions/"))]
|
|
||||||
db.record_review(
|
|
||||||
conn, pr_number, reviewer="leo", outcome="approved",
|
|
||||||
domain=domain, agent=agent,
|
|
||||||
reviewer_model=config.MODEL_SONNET if tier == "STANDARD" else "opus",
|
|
||||||
claims_in_batch=max(len(claim_files), 1),
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
# Collect all issue tags from both reviews
|
# Collect all issue tags from both reviews
|
||||||
all_issues = []
|
all_issues = []
|
||||||
|
|
@ -787,17 +806,6 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
|
||||||
{"pr": pr_number, "tier": tier, "leo": leo_verdict, "domain": domain_verdict, "issues": all_issues}
|
{"pr": pr_number, "tier": tier, "leo": leo_verdict, "domain": domain_verdict, "issues": all_issues}
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Record structured review outcome for Leo rejection
|
|
||||||
claim_files = [f for f in files if any(f.startswith(d) for d in ("domains/", "core/", "foundations/", "decisions/"))]
|
|
||||||
reviewer = "leo" if leo_verdict == "request_changes" else agent
|
|
||||||
db.record_review(
|
|
||||||
conn, pr_number, reviewer=reviewer, outcome="rejected",
|
|
||||||
domain=domain, agent=agent,
|
|
||||||
reviewer_model=config.MODEL_SONNET if tier == "STANDARD" else "opus",
|
|
||||||
notes=json.dumps(all_issues) if all_issues else None,
|
|
||||||
claims_in_batch=max(len(claim_files), 1),
|
|
||||||
)
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"PR #%d: CHANGES REQUESTED (leo=%s, domain=%s, issues=%s)",
|
"PR #%d: CHANGES REQUESTED (leo=%s, domain=%s, issues=%s)",
|
||||||
pr_number,
|
pr_number,
|
||||||
|
|
@ -821,16 +829,7 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
|
||||||
)
|
)
|
||||||
if leo_verdict not in ("skipped",):
|
if leo_verdict not in ("skipped",):
|
||||||
if tier == "DEEP":
|
if tier == "DEEP":
|
||||||
costs.record_usage(
|
costs.record_usage(conn, config.EVAL_LEO_MODEL, "eval_leo", backend="max")
|
||||||
conn, config.EVAL_LEO_MODEL, "eval_leo",
|
|
||||||
input_tokens=leo_usage.get("prompt_tokens", 0),
|
|
||||||
output_tokens=leo_usage.get("completion_tokens", 0),
|
|
||||||
backend="max",
|
|
||||||
duration_ms=leo_usage.get("duration_ms", 0),
|
|
||||||
cache_read_tokens=leo_usage.get("cache_read_tokens", 0),
|
|
||||||
cache_write_tokens=leo_usage.get("cache_write_tokens", 0),
|
|
||||||
cost_estimate_usd=leo_usage.get("cost_estimate_usd", 0.0),
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
costs.record_usage(
|
costs.record_usage(
|
||||||
conn, config.EVAL_LEO_STANDARD_MODEL, "eval_leo",
|
conn, config.EVAL_LEO_STANDARD_MODEL, "eval_leo",
|
||||||
|
|
@ -1311,7 +1310,7 @@ def _build_domain_batches(
|
||||||
individual.append(row)
|
individual.append(row)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
domain = existing["domain"] if existing and existing["domain"] else "general"
|
domain = existing["domain"] if existing and existing["domain"] and existing["domain"] != "general" else "general"
|
||||||
domain_candidates.setdefault(domain, []).append(row)
|
domain_candidates.setdefault(domain, []).append(row)
|
||||||
|
|
||||||
# Build sized batches per domain
|
# Build sized batches per domain
|
||||||
|
|
|
||||||
756
ops/pipeline-v2/lib/extract.py
Normal file
756
ops/pipeline-v2/lib/extract.py
Normal file
|
|
@ -0,0 +1,756 @@
|
||||||
|
"""Extraction stage — automated claim extraction from queued sources.
|
||||||
|
|
||||||
|
Replaces extract-cron.sh with a Python module inside the pipeline daemon.
|
||||||
|
Processes unprocessed sources in inbox/queue/, extracts claims via LLM,
|
||||||
|
creates PRs on Forgejo, and archives sources on main.
|
||||||
|
|
||||||
|
Flow per source:
|
||||||
|
1. Read source frontmatter (domain, author, rationale)
|
||||||
|
2. Pre-screen: Haiku identifies themes, Qdrant finds prior art
|
||||||
|
3. Build KB index for dedup
|
||||||
|
4. Build extraction prompt (extraction_prompt.py)
|
||||||
|
5. Call Sonnet via OpenRouter
|
||||||
|
6. Parse JSON response
|
||||||
|
7. Post-extraction validation (post_extract.py)
|
||||||
|
8. Create branch, write claim/entity files, commit, push
|
||||||
|
9. Create PR on Forgejo via agent token
|
||||||
|
10. Archive source on main (worktree lock)
|
||||||
|
|
||||||
|
Design: one source at a time (sequential), up to MAX_SOURCES per cycle.
|
||||||
|
Uses the main worktree for reading + archival, extract worktree for branches.
|
||||||
|
|
||||||
|
Epimetheus owns this module. Leo reviews changes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import secrets
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
from .costs import record_usage
|
||||||
|
from .domains import agent_for_domain
|
||||||
|
from .extraction_prompt import build_extraction_prompt
|
||||||
|
from .forgejo import api as forgejo_api
|
||||||
|
from .llm import openrouter_call
|
||||||
|
from .post_extract import load_existing_claims_from_repo, validate_and_fix_claims
|
||||||
|
from .worktree_lock import async_main_worktree_lock
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.extract")
|
||||||
|
|
||||||
|
# Extraction worktree (separate from main to avoid conflicts)
|
||||||
|
EXTRACT_WORKTREE = config.BASE_DIR / "workspaces" / "extract"
|
||||||
|
|
||||||
|
# Max sources per cycle
|
||||||
|
MAX_SOURCES = int(os.environ.get("MAX_EXTRACT_SOURCES", "3"))
|
||||||
|
|
||||||
|
# KB index cache (rebuilt once per cycle, not per source)
|
||||||
|
_kb_index_cache: dict[str, str] = {}
|
||||||
|
_kb_index_timestamp: float = 0
|
||||||
|
KB_INDEX_TTL = 300 # 5 minutes
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_source_frontmatter(content: str) -> dict:
|
||||||
|
"""Parse source file frontmatter. Returns dict of fields."""
|
||||||
|
if not content.startswith("---"):
|
||||||
|
return {}
|
||||||
|
end = content.find("---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return {}
|
||||||
|
raw = content[3:end]
|
||||||
|
|
||||||
|
fm = {}
|
||||||
|
for line in raw.strip().split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if not line or ":" not in line:
|
||||||
|
continue
|
||||||
|
key, _, val = line.partition(":")
|
||||||
|
key = key.strip()
|
||||||
|
val = val.strip().strip('"').strip("'")
|
||||||
|
if val.lower() == "null" or val == "":
|
||||||
|
val = None
|
||||||
|
fm[key] = val
|
||||||
|
return fm
|
||||||
|
|
||||||
|
|
||||||
|
def _get_kb_index(domain: str) -> str:
|
||||||
|
"""Get KB index text for a domain. Uses cached /tmp/kb-indexes/ files."""
|
||||||
|
import time
|
||||||
|
|
||||||
|
global _kb_index_cache, _kb_index_timestamp
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
if now - _kb_index_timestamp > KB_INDEX_TTL:
|
||||||
|
_kb_index_cache.clear()
|
||||||
|
_kb_index_timestamp = now
|
||||||
|
|
||||||
|
if domain in _kb_index_cache:
|
||||||
|
return _kb_index_cache[domain]
|
||||||
|
|
||||||
|
# Try pre-generated index files first
|
||||||
|
index_file = Path(f"/tmp/kb-indexes/{domain}.txt")
|
||||||
|
if index_file.exists():
|
||||||
|
text = index_file.read_text(encoding="utf-8")
|
||||||
|
_kb_index_cache[domain] = text
|
||||||
|
return text
|
||||||
|
|
||||||
|
# Fallback: build from repo
|
||||||
|
main = config.MAIN_WORKTREE
|
||||||
|
claims = []
|
||||||
|
domain_dir = main / "domains" / domain
|
||||||
|
if domain_dir.is_dir():
|
||||||
|
for f in domain_dir.glob("*.md"):
|
||||||
|
if not f.name.startswith("_"):
|
||||||
|
claims.append(f"- {f.name}")
|
||||||
|
|
||||||
|
text = f"## Claims in domains/{domain}/\n" + "\n".join(sorted(claims))
|
||||||
|
_kb_index_cache[domain] = text
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
async def _git(*args, cwd: str = None, timeout: int = 60) -> tuple[int, str]:
|
||||||
|
"""Run a git command async. Returns (returncode, stdout+stderr)."""
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"git", *args,
|
||||||
|
cwd=cwd or str(EXTRACT_WORKTREE),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
return -1, f"git {args[0]} timed out after {timeout}s"
|
||||||
|
output = (stdout or b"").decode().strip()
|
||||||
|
if stderr:
|
||||||
|
output += "\n" + stderr.decode().strip()
|
||||||
|
return proc.returncode, output
|
||||||
|
|
||||||
|
|
||||||
|
async def _pre_screen(source_content: str, source_title: str) -> str | None:
|
||||||
|
"""Run pre-screening: identify themes and find prior art.
|
||||||
|
|
||||||
|
Returns formatted prior art text, or None if pre-screening fails/unavailable.
|
||||||
|
Non-fatal — extraction proceeds without prior art if this fails.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from .pre_screen import identify_themes, PRIOR_ART_THRESHOLD
|
||||||
|
from .search import search
|
||||||
|
|
||||||
|
key_file = config.SECRETS_DIR / "openrouter-key"
|
||||||
|
if not key_file.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
api_key = key_file.read_text().strip()
|
||||||
|
themes = identify_themes(source_content, api_key, source_title)
|
||||||
|
if not themes:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Search each theme against Qdrant
|
||||||
|
results = []
|
||||||
|
search_queries = themes + ([source_title] if source_title else [])
|
||||||
|
|
||||||
|
for query in search_queries[:5]:
|
||||||
|
try:
|
||||||
|
hits = search(query, limit=3, score_threshold=PRIOR_ART_THRESHOLD)
|
||||||
|
for hit in hits:
|
||||||
|
title = hit.get("title", hit.get("filename", ""))
|
||||||
|
score = hit.get("score", 0)
|
||||||
|
domain = hit.get("domain", "")
|
||||||
|
if title and score >= PRIOR_ART_THRESHOLD:
|
||||||
|
results.append(f"- [{score:.2f}] {title} (domain: {domain})")
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Deduplicate
|
||||||
|
seen = set()
|
||||||
|
unique = []
|
||||||
|
for r in results:
|
||||||
|
if r not in seen:
|
||||||
|
seen.add(r)
|
||||||
|
unique.append(r)
|
||||||
|
|
||||||
|
return "\n".join(unique[:15])
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Pre-screening failed (non-fatal)", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_extraction_json(text: str) -> dict | None:
|
||||||
|
"""Parse extraction JSON from LLM response. Handles markdown fencing."""
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Strip markdown code fences
|
||||||
|
text = text.strip()
|
||||||
|
if text.startswith("```"):
|
||||||
|
# Remove opening fence (```json or ```)
|
||||||
|
first_newline = text.index("\n") if "\n" in text else len(text)
|
||||||
|
text = text[first_newline + 1:]
|
||||||
|
if text.endswith("```"):
|
||||||
|
text = text[:-3]
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return json.loads(text)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning("Failed to parse extraction JSON: %s", e)
|
||||||
|
# Try to find JSON object in text
|
||||||
|
match = re.search(r"\{[\s\S]+\}", text)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
return json.loads(match.group())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _build_claim_content(claim: dict, agent: str) -> str:
|
||||||
|
"""Build claim markdown file content from extraction JSON."""
|
||||||
|
today = date.today().isoformat()
|
||||||
|
domain = claim.get("domain", "")
|
||||||
|
title = claim.get("title", claim.get("filename", "").replace("-", " ").replace(".md", ""))
|
||||||
|
description = claim.get("description", "")
|
||||||
|
confidence = claim.get("confidence", "experimental")
|
||||||
|
source_ref = claim.get("source", "")
|
||||||
|
body = claim.get("body", "")
|
||||||
|
scope = claim.get("scope", "")
|
||||||
|
sourcer = claim.get("sourcer", "")
|
||||||
|
related = claim.get("related_claims", [])
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
"---",
|
||||||
|
"type: claim",
|
||||||
|
f"domain: {domain}",
|
||||||
|
f'title: "{title}"',
|
||||||
|
f'description: "{description}"',
|
||||||
|
f"confidence: {confidence}",
|
||||||
|
f'source: "{source_ref}"',
|
||||||
|
f"created: {today}",
|
||||||
|
f"agent: {agent}",
|
||||||
|
]
|
||||||
|
if scope:
|
||||||
|
lines.append(f"scope: {scope}")
|
||||||
|
if sourcer:
|
||||||
|
lines.append(f'sourcer: "{sourcer}"')
|
||||||
|
if related:
|
||||||
|
lines.append("related_claims:")
|
||||||
|
for r in related:
|
||||||
|
lines.append(f' - "[[{r}]]"')
|
||||||
|
lines.append("---")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"# {title}")
|
||||||
|
lines.append("")
|
||||||
|
if body:
|
||||||
|
lines.append(body)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_entity_content(entity: dict, domain: str) -> str:
|
||||||
|
"""Build entity markdown file content from extraction JSON."""
|
||||||
|
today = date.today().isoformat()
|
||||||
|
entity_type = entity.get("entity_type", "company")
|
||||||
|
description = entity.get("content", "")
|
||||||
|
|
||||||
|
if description:
|
||||||
|
return description
|
||||||
|
|
||||||
|
name = entity.get("filename", "").replace("-", " ").replace(".md", "").title()
|
||||||
|
return f"""---
|
||||||
|
type: entity
|
||||||
|
entity_type: {entity_type}
|
||||||
|
domain: {domain}
|
||||||
|
description: ""
|
||||||
|
created: {today}
|
||||||
|
---
|
||||||
|
|
||||||
|
# {name}
|
||||||
|
|
||||||
|
## Timeline
|
||||||
|
|
||||||
|
{entity.get("timeline_entry", "")}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_one_source(
|
||||||
|
conn,
|
||||||
|
source_path: str,
|
||||||
|
source_content: str,
|
||||||
|
fm: dict,
|
||||||
|
existing_claims: set[str],
|
||||||
|
feedback: dict | None = None,
|
||||||
|
) -> tuple[int, int]:
|
||||||
|
"""Extract claims from a single source. Returns (succeeded, errors)."""
|
||||||
|
source_file = os.path.basename(source_path)
|
||||||
|
domain = fm.get("domain", "")
|
||||||
|
agent_name = agent_for_domain(domain)
|
||||||
|
agent_lower = agent_name.lower()
|
||||||
|
title = fm.get("title", source_file)
|
||||||
|
rationale = fm.get("rationale")
|
||||||
|
intake_tier = fm.get("intake_tier")
|
||||||
|
proposed_by = fm.get("proposed_by")
|
||||||
|
|
||||||
|
logger.info("Extracting: %s (domain: %s, agent: %s)", source_file, domain, agent_name)
|
||||||
|
|
||||||
|
# 1. Pre-screen (non-fatal)
|
||||||
|
prior_art = await _pre_screen(source_content, title)
|
||||||
|
if prior_art:
|
||||||
|
logger.info("Pre-screening found %d prior art items", prior_art.count("\n") + 1)
|
||||||
|
|
||||||
|
# 2. Build KB index
|
||||||
|
kb_index = _get_kb_index(domain)
|
||||||
|
|
||||||
|
# 3. Build extraction prompt
|
||||||
|
prompt = build_extraction_prompt(
|
||||||
|
source_file=source_path,
|
||||||
|
source_content=source_content,
|
||||||
|
domain=domain,
|
||||||
|
agent=agent_name,
|
||||||
|
kb_index=kb_index,
|
||||||
|
rationale=rationale,
|
||||||
|
intake_tier=intake_tier,
|
||||||
|
proposed_by=proposed_by,
|
||||||
|
prior_art=prior_art,
|
||||||
|
previous_feedback=feedback,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Call LLM (OpenRouter — not Claude Max CLI)
|
||||||
|
# EXTRACT_MODEL is "sonnet" (CLI name), use MODEL_SONNET_OR for OpenRouter
|
||||||
|
extract_model = config.MODEL_SONNET_OR
|
||||||
|
response, usage = await openrouter_call(
|
||||||
|
model=extract_model,
|
||||||
|
prompt=prompt,
|
||||||
|
timeout_sec=config.EXTRACT_TIMEOUT,
|
||||||
|
max_tokens=8192,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Record usage
|
||||||
|
try:
|
||||||
|
record_usage(
|
||||||
|
conn,
|
||||||
|
model=extract_model,
|
||||||
|
stage="extract",
|
||||||
|
input_tokens=usage.get("prompt_tokens", 0),
|
||||||
|
output_tokens=usage.get("completion_tokens", 0),
|
||||||
|
backend="api",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Failed to record extraction usage", exc_info=True)
|
||||||
|
|
||||||
|
if not response:
|
||||||
|
logger.error("LLM extraction failed for %s — no response", source_file)
|
||||||
|
return 0, 1
|
||||||
|
|
||||||
|
# 5. Parse JSON
|
||||||
|
extraction = _parse_extraction_json(response)
|
||||||
|
if not extraction:
|
||||||
|
logger.error("Failed to parse extraction JSON for %s", source_file)
|
||||||
|
return 0, 1
|
||||||
|
|
||||||
|
claims_raw = extraction.get("claims", [])
|
||||||
|
entities_raw = extraction.get("entities", [])
|
||||||
|
enrichments = extraction.get("enrichments", [])
|
||||||
|
decisions = extraction.get("decisions", [])
|
||||||
|
facts = extraction.get("facts", [])
|
||||||
|
notes = extraction.get("extraction_notes", "")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Extraction result for %s: %d claims, %d enrichments, %d entities, %d decisions",
|
||||||
|
source_file, len(claims_raw), len(enrichments), len(entities_raw), len(decisions),
|
||||||
|
)
|
||||||
|
|
||||||
|
# 6. Build claim file contents
|
||||||
|
claim_files = []
|
||||||
|
for c in claims_raw:
|
||||||
|
filename = c.get("filename", "")
|
||||||
|
if not filename:
|
||||||
|
continue
|
||||||
|
if not filename.endswith(".md"):
|
||||||
|
filename += ".md"
|
||||||
|
content = _build_claim_content(c, agent_lower)
|
||||||
|
claim_files.append({"filename": filename, "domain": c.get("domain", domain), "content": content})
|
||||||
|
|
||||||
|
# Build entity file contents
|
||||||
|
entity_files = []
|
||||||
|
for e in entities_raw:
|
||||||
|
filename = e.get("filename", "")
|
||||||
|
if not filename:
|
||||||
|
continue
|
||||||
|
if not filename.endswith(".md"):
|
||||||
|
filename += ".md"
|
||||||
|
action = e.get("action", "create")
|
||||||
|
if action == "create":
|
||||||
|
content = _build_entity_content(e, domain)
|
||||||
|
entity_files.append({"filename": filename, "domain": domain, "content": content})
|
||||||
|
|
||||||
|
# 7. Post-extraction validation
|
||||||
|
if claim_files:
|
||||||
|
kept_claims, rejected_claims, stats = validate_and_fix_claims(
|
||||||
|
claim_files, domain, agent_lower, existing_claims,
|
||||||
|
repo_root=str(config.MAIN_WORKTREE),
|
||||||
|
)
|
||||||
|
if rejected_claims:
|
||||||
|
logger.info(
|
||||||
|
"Post-extract rejected %d/%d claims for %s: %s",
|
||||||
|
len(rejected_claims), len(claim_files), source_file,
|
||||||
|
stats.get("rejections", [])[:5],
|
||||||
|
)
|
||||||
|
claim_files = kept_claims
|
||||||
|
|
||||||
|
if not claim_files and not entity_files:
|
||||||
|
logger.info("No valid claims/entities after validation for %s — archiving as null-result", source_file)
|
||||||
|
await _archive_source(source_path, domain, "null-result")
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
# 8. Create branch, write files, commit, push
|
||||||
|
slug = Path(source_file).stem
|
||||||
|
branch = f"extract/{slug}-{secrets.token_hex(2)}"
|
||||||
|
|
||||||
|
# Prepare extract worktree
|
||||||
|
rc, _ = await _git("fetch", "origin", "main", cwd=str(EXTRACT_WORKTREE))
|
||||||
|
rc, _ = await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE))
|
||||||
|
rc, _ = await _git("reset", "--hard", "origin/main", cwd=str(EXTRACT_WORKTREE))
|
||||||
|
rc, _ = await _git("checkout", "-b", branch, cwd=str(EXTRACT_WORKTREE))
|
||||||
|
if rc != 0:
|
||||||
|
# Branch might already exist
|
||||||
|
await _git("branch", "-D", branch, cwd=str(EXTRACT_WORKTREE))
|
||||||
|
rc, out = await _git("checkout", "-b", branch, cwd=str(EXTRACT_WORKTREE))
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("Failed to create branch %s: %s", branch, out)
|
||||||
|
return 0, 1
|
||||||
|
|
||||||
|
# Write claim files
|
||||||
|
worktree = EXTRACT_WORKTREE
|
||||||
|
files_written = []
|
||||||
|
for cf in claim_files:
|
||||||
|
domain_dir = worktree / "domains" / cf["domain"]
|
||||||
|
domain_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
fpath = domain_dir / cf["filename"]
|
||||||
|
fpath.write_text(cf["content"], encoding="utf-8")
|
||||||
|
files_written.append(f"domains/{cf['domain']}/{cf['filename']}")
|
||||||
|
|
||||||
|
for ef in entity_files:
|
||||||
|
entity_dir = worktree / "entities" / domain
|
||||||
|
entity_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
fpath = entity_dir / ef["filename"]
|
||||||
|
fpath.write_text(ef["content"], encoding="utf-8")
|
||||||
|
files_written.append(f"entities/{domain}/{ef['filename']}")
|
||||||
|
|
||||||
|
if not files_written:
|
||||||
|
logger.info("No files written for %s — cleaning up", source_file)
|
||||||
|
await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE))
|
||||||
|
await _git("branch", "-D", branch, cwd=str(EXTRACT_WORKTREE))
|
||||||
|
await _archive_source(source_path, domain, "null-result")
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
# Stage and commit
|
||||||
|
for f in files_written:
|
||||||
|
await _git("add", f, cwd=str(EXTRACT_WORKTREE))
|
||||||
|
|
||||||
|
commit_msg = (
|
||||||
|
f"{agent_lower}: extract claims from {slug}\n\n"
|
||||||
|
f"- Source: {source_path}\n"
|
||||||
|
f"- Domain: {domain}\n"
|
||||||
|
f"- Claims: {len(claim_files)}, Entities: {len(entity_files)}\n"
|
||||||
|
f"- Enrichments: {len(enrichments)}\n"
|
||||||
|
f"- Extracted by: pipeline ingest (OpenRouter {extract_model})\n\n"
|
||||||
|
f"Pentagon-Agent: {agent_name} <PIPELINE>"
|
||||||
|
)
|
||||||
|
|
||||||
|
rc, out = await _git("commit", "-m", commit_msg, cwd=str(EXTRACT_WORKTREE))
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("Commit failed for %s: %s", branch, out)
|
||||||
|
await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE))
|
||||||
|
await _git("branch", "-D", branch, cwd=str(EXTRACT_WORKTREE))
|
||||||
|
return 0, 1
|
||||||
|
|
||||||
|
# Push branch
|
||||||
|
rc, out = await _git("push", "-u", "origin", branch, cwd=str(EXTRACT_WORKTREE))
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("Push failed for %s: %s", branch, out)
|
||||||
|
await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE))
|
||||||
|
await _git("branch", "-D", branch, cwd=str(EXTRACT_WORKTREE))
|
||||||
|
return 0, 1
|
||||||
|
|
||||||
|
# 9. Create PR on Forgejo
|
||||||
|
agent_token_file = config.SECRETS_DIR / f"forgejo-{agent_lower}-token"
|
||||||
|
if not agent_token_file.exists():
|
||||||
|
agent_token_file = config.SECRETS_DIR / "forgejo-leo-token"
|
||||||
|
agent_token = agent_token_file.read_text().strip()
|
||||||
|
|
||||||
|
pr_title = f"{agent_lower}: extract claims from {slug}"
|
||||||
|
pr_body = (
|
||||||
|
f"## Automated Extraction\n\n"
|
||||||
|
f"**Source:** `{source_path}`\n"
|
||||||
|
f"**Domain:** {domain}\n"
|
||||||
|
f"**Agent:** {agent_name}\n"
|
||||||
|
f"**Model:** {extract_model}\n\n"
|
||||||
|
f"### Extraction Summary\n"
|
||||||
|
f"- **Claims:** {len(claim_files)}\n"
|
||||||
|
f"- **Entities:** {len(entity_files)}\n"
|
||||||
|
f"- **Enrichments:** {len(enrichments)}\n"
|
||||||
|
f"- **Decisions:** {len(decisions)}\n"
|
||||||
|
f"- **Facts:** {len(facts)}\n\n"
|
||||||
|
f"{notes}\n\n"
|
||||||
|
f"---\n"
|
||||||
|
f"*Extracted by pipeline ingest stage (replaces extract-cron.sh)*"
|
||||||
|
)
|
||||||
|
|
||||||
|
pr_result = await forgejo_api(
|
||||||
|
"POST",
|
||||||
|
f"/repos/{config.FORGEJO_OWNER}/{config.FORGEJO_REPO}/pulls",
|
||||||
|
body={"title": pr_title, "body": pr_body, "base": "main", "head": branch},
|
||||||
|
token=agent_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
if pr_result and pr_result.get("number"):
|
||||||
|
pr_num = pr_result["number"]
|
||||||
|
logger.info("PR #%d created for %s (%d claims, %d entities)", pr_num, source_file, len(claim_files), len(entity_files))
|
||||||
|
else:
|
||||||
|
logger.warning("PR creation may have failed for %s — response: %s", source_file, pr_result)
|
||||||
|
|
||||||
|
# Clean up extract worktree
|
||||||
|
await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE))
|
||||||
|
|
||||||
|
# 10. Archive source on main
|
||||||
|
await _archive_source(source_path, domain, "processed", agent_lower)
|
||||||
|
|
||||||
|
return 1, 0
|
||||||
|
|
||||||
|
|
||||||
|
async def _archive_source(
|
||||||
|
source_path: str,
|
||||||
|
domain: str,
|
||||||
|
status: str,
|
||||||
|
agent: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Move source from inbox/queue/ to archive (or null-result) on main.
|
||||||
|
|
||||||
|
Uses worktree lock to avoid conflicts with other main-writing processes.
|
||||||
|
"""
|
||||||
|
source_file = os.path.basename(source_path)
|
||||||
|
main = str(config.MAIN_WORKTREE)
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with async_main_worktree_lock():
|
||||||
|
# Pull latest
|
||||||
|
await _git("pull", "--rebase", "origin", "main", cwd=main, timeout=30)
|
||||||
|
|
||||||
|
queue_path = Path(main) / "inbox" / "queue" / source_file
|
||||||
|
if not queue_path.exists():
|
||||||
|
logger.warning("Source %s not found in queue — may have been archived already", source_file)
|
||||||
|
return
|
||||||
|
|
||||||
|
if status == "null-result":
|
||||||
|
dest_dir = Path(main) / "inbox" / "null-result"
|
||||||
|
else:
|
||||||
|
dest_dir = Path(main) / "inbox" / "archive" / (domain or "unknown")
|
||||||
|
|
||||||
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
dest_path = dest_dir / source_file
|
||||||
|
|
||||||
|
# Read and update frontmatter
|
||||||
|
content = queue_path.read_text(encoding="utf-8")
|
||||||
|
today = date.today().isoformat()
|
||||||
|
|
||||||
|
content = re.sub(r"^status: unprocessed", f"status: {status}", content, flags=re.MULTILINE)
|
||||||
|
if agent and "processed_by:" not in content:
|
||||||
|
content = re.sub(
|
||||||
|
r"(^status: \w+)",
|
||||||
|
rf"\1\nprocessed_by: {agent}\nprocessed_date: {today}",
|
||||||
|
content,
|
||||||
|
count=1,
|
||||||
|
flags=re.MULTILINE,
|
||||||
|
)
|
||||||
|
if "extraction_model:" not in content:
|
||||||
|
content = re.sub(
|
||||||
|
r"(^status: \w+.*?)(\n---)",
|
||||||
|
rf'\1\nextraction_model: "{config.MODEL_SONNET_OR}"\2',
|
||||||
|
content,
|
||||||
|
count=1,
|
||||||
|
flags=re.MULTILINE | re.DOTALL,
|
||||||
|
)
|
||||||
|
|
||||||
|
dest_path.write_text(content, encoding="utf-8")
|
||||||
|
queue_path.unlink()
|
||||||
|
|
||||||
|
# Git add, commit, push
|
||||||
|
await _git("add", "inbox/", cwd=main)
|
||||||
|
commit_msg = (
|
||||||
|
f"source: {source_file} → {status}\n\n"
|
||||||
|
f"Pentagon-Agent: Epimetheus <PIPELINE>"
|
||||||
|
)
|
||||||
|
await _git("commit", "-m", commit_msg, cwd=main)
|
||||||
|
|
||||||
|
# Push with retry
|
||||||
|
for attempt in range(3):
|
||||||
|
rc, out = await _git("push", "origin", "main", cwd=main, timeout=30)
|
||||||
|
if rc == 0:
|
||||||
|
break
|
||||||
|
logger.warning("Push attempt %d failed: %s", attempt + 1, out)
|
||||||
|
await _git("pull", "--rebase", "origin", "main", cwd=main, timeout=30)
|
||||||
|
else:
|
||||||
|
logger.error("Failed to push source archival after 3 attempts")
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to archive source %s", source_file)
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
|
"""Main extraction cycle — called by the pipeline daemon's ingest stage.
|
||||||
|
|
||||||
|
Finds unprocessed sources in inbox/queue/, extracts claims, creates PRs.
|
||||||
|
Returns (succeeded, errors) for circuit breaker tracking.
|
||||||
|
"""
|
||||||
|
main = config.MAIN_WORKTREE
|
||||||
|
|
||||||
|
# Find unprocessed sources
|
||||||
|
queue_dir = main / "inbox" / "queue"
|
||||||
|
if not queue_dir.exists():
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
unprocessed = []
|
||||||
|
for f in sorted(queue_dir.glob("*.md")):
|
||||||
|
try:
|
||||||
|
content = f.read_text(encoding="utf-8")
|
||||||
|
fm = _parse_source_frontmatter(content)
|
||||||
|
if fm.get("status") == "unprocessed":
|
||||||
|
unprocessed.append((str(f.relative_to(main)), content, fm))
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Failed to read source %s", f, exc_info=True)
|
||||||
|
|
||||||
|
if not unprocessed:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
# Filter out sources that already have open extraction PRs
|
||||||
|
open_pr_slugs = set()
|
||||||
|
try:
|
||||||
|
prs = await forgejo_api(
|
||||||
|
"GET",
|
||||||
|
f"/repos/{config.FORGEJO_OWNER}/{config.FORGEJO_REPO}/pulls?state=open&limit=50",
|
||||||
|
)
|
||||||
|
if prs:
|
||||||
|
for pr in prs:
|
||||||
|
head = pr.get("head", {}).get("ref", "")
|
||||||
|
if head.startswith("extract/"):
|
||||||
|
# Extract the source slug from branch name (extract/{slug}-{nonce})
|
||||||
|
slug_part = head[len("extract/"):]
|
||||||
|
# Remove the random suffix (last 5 chars: -{4-hex-chars})
|
||||||
|
if len(slug_part) > 5 and slug_part[-5] == "-":
|
||||||
|
slug_part = slug_part[:-5]
|
||||||
|
open_pr_slugs.add(slug_part)
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Failed to check open PRs for dedup", exc_info=True)
|
||||||
|
|
||||||
|
if open_pr_slugs:
|
||||||
|
before = len(unprocessed)
|
||||||
|
unprocessed = [
|
||||||
|
(sp, c, f) for sp, c, f in unprocessed
|
||||||
|
if Path(sp).stem not in open_pr_slugs
|
||||||
|
]
|
||||||
|
skipped = before - len(unprocessed)
|
||||||
|
if skipped:
|
||||||
|
logger.info("Skipped %d source(s) with existing open PRs", skipped)
|
||||||
|
|
||||||
|
if not unprocessed:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
logger.info("Extract cycle: %d unprocessed source(s) found, processing up to %d", len(unprocessed), MAX_SOURCES)
|
||||||
|
|
||||||
|
# Load existing claims for dedup
|
||||||
|
existing_claims = load_existing_claims_from_repo(str(main))
|
||||||
|
|
||||||
|
# Ensure extract worktree exists and is clean
|
||||||
|
if not EXTRACT_WORKTREE.exists():
|
||||||
|
logger.error("Extract worktree not found at %s", EXTRACT_WORKTREE)
|
||||||
|
return 0, 1
|
||||||
|
|
||||||
|
total_ok = 0
|
||||||
|
total_err = 0
|
||||||
|
|
||||||
|
# ── Re-extraction: pick up sources that failed eval and have feedback ──
|
||||||
|
reextract_rows = conn.execute(
|
||||||
|
"""SELECT path, feedback FROM sources
|
||||||
|
WHERE status = 'needs_reextraction' AND feedback IS NOT NULL
|
||||||
|
ORDER BY updated_at ASC LIMIT ?""",
|
||||||
|
(max(1, MAX_SOURCES - len(unprocessed)),),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for row in reextract_rows:
|
||||||
|
reex_path = row["path"]
|
||||||
|
# Source was archived — read from archive location
|
||||||
|
archive_base = main / "inbox" / "archive"
|
||||||
|
# Try to find the file in archive subdirs
|
||||||
|
reex_file = None
|
||||||
|
for subdir in archive_base.iterdir():
|
||||||
|
candidate = subdir / Path(reex_path).name
|
||||||
|
if candidate.exists():
|
||||||
|
reex_file = candidate
|
||||||
|
break
|
||||||
|
if not reex_file:
|
||||||
|
# Try original path as fallback
|
||||||
|
candidate = main / reex_path
|
||||||
|
if candidate.exists():
|
||||||
|
reex_file = candidate
|
||||||
|
|
||||||
|
if not reex_file:
|
||||||
|
logger.warning("Re-extraction: source %s not found on disk — skipping", reex_path)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
reex_content = reex_file.read_text(encoding="utf-8")
|
||||||
|
reex_fm = _parse_source_frontmatter(reex_content)
|
||||||
|
reex_feedback = json.loads(row["feedback"]) if row["feedback"] else {}
|
||||||
|
|
||||||
|
logger.info("Re-extracting %s with feedback: %s", reex_path, list(reex_feedback.get("issues", [])))
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE sources SET status = 'extracting', updated_at = datetime('now') WHERE path = ?",
|
||||||
|
(reex_path,),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
ok, err = await _extract_one_source(conn, reex_path, reex_content, reex_fm, existing_claims, feedback=reex_feedback)
|
||||||
|
total_ok += ok
|
||||||
|
total_err += err
|
||||||
|
|
||||||
|
if ok:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE sources SET status = 'extracted', updated_at = datetime('now') WHERE path = ?",
|
||||||
|
(reex_path,),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE sources SET status = 'error', last_error = 're-extraction failed', updated_at = datetime('now') WHERE path = ?",
|
||||||
|
(reex_path,),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Re-extraction failed for %s", reex_path)
|
||||||
|
total_err += 1
|
||||||
|
|
||||||
|
for source_path, content, fm in unprocessed[:MAX_SOURCES]:
|
||||||
|
try:
|
||||||
|
ok, err = await _extract_one_source(conn, source_path, content, fm, existing_claims)
|
||||||
|
total_ok += ok
|
||||||
|
total_err += err
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Unhandled error extracting %s", source_path)
|
||||||
|
total_err += 1
|
||||||
|
|
||||||
|
# Brief pause between sources
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
logger.info("Extract cycle complete: %d succeeded, %d errors", total_ok, total_err)
|
||||||
|
return total_ok, total_err
|
||||||
326
ops/pipeline-v2/lib/extraction_prompt.py
Normal file
326
ops/pipeline-v2/lib/extraction_prompt.py
Normal file
|
|
@ -0,0 +1,326 @@
|
||||||
|
"""Lean extraction prompt — judgment only, mechanical rules in code.
|
||||||
|
|
||||||
|
The extraction prompt focuses on WHAT to extract:
|
||||||
|
- Separate facts from claims from enrichments
|
||||||
|
- Classify confidence honestly
|
||||||
|
- Identify entity data
|
||||||
|
- Check for duplicates against KB index
|
||||||
|
|
||||||
|
Mechanical enforcement (frontmatter format, wiki links, dates, filenames)
|
||||||
|
is handled by post_extract.py AFTER the LLM returns.
|
||||||
|
|
||||||
|
Design principle (Leo): mechanical rules in code, judgment in prompts.
|
||||||
|
Epimetheus owns this module. Leo reviews changes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
|
||||||
|
def build_extraction_prompt(
|
||||||
|
source_file: str,
|
||||||
|
source_content: str,
|
||||||
|
domain: str,
|
||||||
|
agent: str,
|
||||||
|
kb_index: str,
|
||||||
|
*,
|
||||||
|
today: str | None = None,
|
||||||
|
rationale: str | None = None,
|
||||||
|
intake_tier: str | None = None,
|
||||||
|
proposed_by: str | None = None,
|
||||||
|
prior_art: list[dict] | None = None,
|
||||||
|
previous_feedback: dict | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""Build the lean extraction prompt.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_file: Path to the source being extracted
|
||||||
|
source_content: Full text of the source
|
||||||
|
domain: Primary domain for this source
|
||||||
|
agent: Agent name performing extraction
|
||||||
|
kb_index: Pre-generated KB index text (claim titles for dedup)
|
||||||
|
today: Override date for testing (default: today)
|
||||||
|
rationale: Contributor's natural-language thesis about the source (optional)
|
||||||
|
intake_tier: undirected | directed | challenge (optional)
|
||||||
|
proposed_by: Contributor handle who submitted the source (optional)
|
||||||
|
prior_art: Qdrant search results — existing claims semantically similar to this source.
|
||||||
|
Each dict has: claim_title, claim_path, description, score.
|
||||||
|
Injected as connection candidates for extract-time linking.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The complete prompt string
|
||||||
|
"""
|
||||||
|
today = today or date.today().isoformat()
|
||||||
|
|
||||||
|
# Build contributor directive section (if rationale provided)
|
||||||
|
if rationale and rationale.strip():
|
||||||
|
contributor_name = proposed_by or "a contributor"
|
||||||
|
tier_label = intake_tier or "directed"
|
||||||
|
contributor_directive = f"""
|
||||||
|
## Contributor Directive (intake_tier: {tier_label})
|
||||||
|
|
||||||
|
**{contributor_name}** submitted this source and said:
|
||||||
|
|
||||||
|
> {rationale.strip()}
|
||||||
|
|
||||||
|
This is an extraction directive — use it to focus your extraction:
|
||||||
|
- Extract claims that relate to the contributor's thesis
|
||||||
|
- If the source SUPPORTS their thesis, extract the supporting evidence as claims
|
||||||
|
- If the source CONTRADICTS their thesis, extract the contradiction — that's even more valuable
|
||||||
|
- Evaluate whether the contributor's own thesis is extractable as a standalone claim
|
||||||
|
- If specific enough to disagree with and supported by the source: extract it with `source: "{contributor_name}, original analysis"`
|
||||||
|
- If too vague or already in the KB: use it as a directive only
|
||||||
|
- If the contributor references existing claims ("I disagree with X"), identify those claims by filename from the KB index and include them in the `challenges` field
|
||||||
|
- ALSO extract anything else valuable in the source — the directive is a spotlight, not a filter
|
||||||
|
|
||||||
|
Set `contributor_thesis_extractable: true` if you extracted the contributor's thesis as a claim, `false` otherwise.
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
contributor_directive = ""
|
||||||
|
|
||||||
|
# Build previous feedback section (for re-extraction after eval rejection)
|
||||||
|
if previous_feedback:
|
||||||
|
issues = previous_feedback.get("issues", [])
|
||||||
|
leo_verdict = previous_feedback.get("leo", "")
|
||||||
|
domain_verdict = previous_feedback.get("domain", "")
|
||||||
|
feedback_lines = [
|
||||||
|
"\n## Previous Extraction Feedback\n",
|
||||||
|
"A previous extraction from this source was **rejected** by the evaluation pipeline.",
|
||||||
|
"Learn from these issues and avoid repeating them:\n",
|
||||||
|
]
|
||||||
|
if issues:
|
||||||
|
for issue in issues:
|
||||||
|
issue_guidance = {
|
||||||
|
"frontmatter_schema": "Fix frontmatter format — ensure all required fields are present and correctly typed.",
|
||||||
|
"title_overclaims": "Make titles more precise — avoid broad generalizations. The title must be specific enough to disagree with.",
|
||||||
|
"confidence_miscalibration": "Calibrate confidence honestly — single source = experimental at most. Don't mark speculative claims as likely.",
|
||||||
|
"factual_discrepancy": "Check facts carefully — verify dates, numbers, and attributions against the source text.",
|
||||||
|
"near_duplicate": "Check the KB index more carefully — this claim may already exist. Prefer enrichment over duplication.",
|
||||||
|
"scope_error": "Scope claims correctly — don't mix structural, functional, and causal claims in one.",
|
||||||
|
"broken_wiki_links": "Ensure wiki links reference real entities/claims in the KB.",
|
||||||
|
}
|
||||||
|
guidance = issue_guidance.get(issue, f"Address: {issue}")
|
||||||
|
feedback_lines.append(f"- **{issue}**: {guidance}")
|
||||||
|
feedback_lines.append("")
|
||||||
|
if leo_verdict == "request_changes":
|
||||||
|
feedback_lines.append("The lead reviewer requested changes. Extract fewer, higher-quality claims.")
|
||||||
|
if domain_verdict == "request_changes":
|
||||||
|
feedback_lines.append("The domain reviewer requested changes. Pay closer attention to domain-specific standards.")
|
||||||
|
feedback_lines.append("")
|
||||||
|
previous_feedback_section = "\n".join(feedback_lines)
|
||||||
|
else:
|
||||||
|
previous_feedback_section = ""
|
||||||
|
|
||||||
|
# Build connection candidates section (if prior art found via Qdrant)
|
||||||
|
if prior_art:
|
||||||
|
pa_lines = [
|
||||||
|
"\n## Connection Candidates (semantically similar existing claims)\n",
|
||||||
|
"These existing claims are topically related to this source. For each NEW claim you extract,",
|
||||||
|
"check this list and specify connections in the `connections` array.\n",
|
||||||
|
]
|
||||||
|
for i, pa in enumerate(prior_art[:10], 1):
|
||||||
|
title = pa.get("claim_title", "untitled")
|
||||||
|
path = pa.get("claim_path", "")
|
||||||
|
desc = pa.get("description", "")
|
||||||
|
score = pa.get("score", 0)
|
||||||
|
filename = path.rsplit("/", 1)[-1].replace(".md", "") if path else title
|
||||||
|
pa_lines.append(f"{i}. **{title}** (`{filename}`, similarity: {score:.2f})")
|
||||||
|
if desc:
|
||||||
|
pa_lines.append(f" {desc}")
|
||||||
|
pa_lines.append("")
|
||||||
|
connection_candidates = "\n".join(pa_lines)
|
||||||
|
else:
|
||||||
|
connection_candidates = ""
|
||||||
|
|
||||||
|
return f"""You are {agent}, extracting knowledge from a source for TeleoHumanity's collective knowledge base.
|
||||||
|
|
||||||
|
## Your Task
|
||||||
|
|
||||||
|
Read the source below. Be SELECTIVE — extract only what genuinely expands the KB's understanding. Most sources produce 0-3 claims. A source that produces 5+ claims is almost certainly over-extracting.
|
||||||
|
|
||||||
|
For each insight, classify it as one of:
|
||||||
|
|
||||||
|
**CLAIM** — An arguable proposition someone could disagree with. Must name a specific mechanism.
|
||||||
|
- Good: "futarchy is manipulation-resistant because attack attempts create profitable opportunities for defenders"
|
||||||
|
- Bad: "futarchy has interesting governance properties"
|
||||||
|
- Test: "This note argues that [title]" must work as a sentence.
|
||||||
|
- MAXIMUM 3-5 claims per source. If you find more, keep only the most novel and surprising.
|
||||||
|
|
||||||
|
**ENRICHMENT** — New evidence that strengthens, challenges, or extends an existing claim in the KB.
|
||||||
|
- If an insight supports something already in the KB index below, it's an enrichment, NOT a new claim.
|
||||||
|
- Enrichment over duplication: ALWAYS prefer adding evidence to an existing claim.
|
||||||
|
- Most sources should produce more enrichments than new claims.
|
||||||
|
|
||||||
|
**ENTITY** — Factual data about a company, protocol, person, organization, or market. Not arguable.
|
||||||
|
- Entity types: company, person, protocol, organization, market (core). Domain-specific: lab, fund, token, exchange, therapy, research_program, benchmark.
|
||||||
|
- One file per entity. If the entity already exists, append a timeline entry — don't create a new file.
|
||||||
|
- New entities: raised real capital (>$10K), launched a product, or discussed by 2+ sources.
|
||||||
|
- Skip: test proposals, spam, trivial projects.
|
||||||
|
- Filing: `entities/{{domain}}/{{entity-name}}.md`
|
||||||
|
|
||||||
|
**DECISION** — A governance decision, futarchic proposal, funding vote, or policy action. Separate from entities.
|
||||||
|
- Decisions are events with terminal states (passed/failed/expired). Entities are persistent objects.
|
||||||
|
- Each significant decision gets its own file in `decisions/{{domain}}/`.
|
||||||
|
- ALSO output a timeline entry for the parent entity: `- **YYYY-MM-DD** — [[decision-filename]] Outcome: one-line summary`
|
||||||
|
- Only extract a CLAIM from a decision if it reveals a novel MECHANISM INSIGHT (~1 per 10-15 decisions).
|
||||||
|
- Routine decisions (minor budgets, operational tweaks, uncontested votes) → timeline entry on parent entity only, no decision file.
|
||||||
|
- Filing: `decisions/{{domain}}/{{parent}}-{{slug}}.md`
|
||||||
|
|
||||||
|
**FACT** — A verifiable data point no one would disagree with. Store in source notes, not as a claim.
|
||||||
|
- "Jupiter DAO vote reached 75% support" is a fact, not a claim.
|
||||||
|
- Individual data points about specific events are facts. Generalizable patterns from multiple data points are claims.
|
||||||
|
|
||||||
|
## Selectivity Rules
|
||||||
|
|
||||||
|
**Novelty gate — argument, not topic:** Before extracting a claim, check the KB index below. The question is NOT "does the KB cover this topic?" but "does the KB already make THIS SPECIFIC ARGUMENT?" A new argument in a well-covered topic IS a new claim. A new data point supporting an existing argument is an enrichment.
|
||||||
|
- New data point for existing argument → ENRICHMENT (add evidence to existing claim)
|
||||||
|
- New argument the KB doesn't have yet → CLAIM (even if the topic is well-covered)
|
||||||
|
- Same argument with different wording → ENRICHMENT (don't create near-duplicates)
|
||||||
|
|
||||||
|
**Challenge premium:** A single well-evidenced claim that challenges an existing KB position is worth more than 10 claims that confirm what we already know. Prioritize extraction of counter-evidence and boundary conditions.
|
||||||
|
|
||||||
|
**What would change an agent's mind?** Ask this for every potential claim. If the answer is "nothing — this is more evidence for what we already believe," it's an enrichment. If the answer is "this introduces a mechanism or argument we haven't considered," it's a claim.
|
||||||
|
|
||||||
|
## Confidence Calibration
|
||||||
|
|
||||||
|
Be honest about uncertainty:
|
||||||
|
- **proven**: Multiple independent confirmations, tested against challenges
|
||||||
|
- **likely**: 3+ corroborating sources with empirical data
|
||||||
|
- **experimental**: 1-2 sources with data, or strong theoretical argument
|
||||||
|
- **speculative**: Theory without data, single anecdote, or self-reported company claims
|
||||||
|
|
||||||
|
Single source = experimental at most. Pitch rhetoric or marketing copy = speculative.
|
||||||
|
|
||||||
|
## Source
|
||||||
|
|
||||||
|
**File:** {source_file}
|
||||||
|
|
||||||
|
{source_content}
|
||||||
|
{contributor_directive}{previous_feedback_section}{connection_candidates}
|
||||||
|
## KB Index (existing claims — check for duplicates and enrichment targets)
|
||||||
|
|
||||||
|
{kb_index}
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
Return valid JSON. The post-processor handles frontmatter formatting, wiki links, and dates — focus on the intellectual content.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{{
|
||||||
|
"claims": [
|
||||||
|
{{
|
||||||
|
"filename": "descriptive-slug-matching-the-claim.md",
|
||||||
|
"domain": "{domain}",
|
||||||
|
"title": "Prose claim title that is specific enough to disagree with",
|
||||||
|
"description": "One sentence adding context beyond the title",
|
||||||
|
"confidence": "experimental",
|
||||||
|
"source": "author/org, key evidence reference",
|
||||||
|
"body": "Argument with evidence. Cite specific data, quotes, studies from the source. Explain WHY the claim is supported. This must be a real argument, not a restatement of the title.",
|
||||||
|
"related_claims": ["existing-claim-stem-from-kb-index"],
|
||||||
|
"connections": [
|
||||||
|
{{
|
||||||
|
"target": "existing-claim-filename-from-connection-candidates-or-kb-index",
|
||||||
|
"relationship": "supports|challenges|related",
|
||||||
|
"reason": "One sentence: WHY does this claim support/challenge/relate to the target?"
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"scope": "structural|functional|causal|correlational",
|
||||||
|
"sourcer": "handle or name of the original author/source (e.g., @theiaresearch, Pine Analytics)"
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"enrichments": [
|
||||||
|
{{
|
||||||
|
"target_file": "existing-claim-filename.md",
|
||||||
|
"type": "confirm|challenge|extend",
|
||||||
|
"evidence": "The new evidence from this source",
|
||||||
|
"source_ref": "Brief source reference"
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"entities": [
|
||||||
|
{{
|
||||||
|
"filename": "entity-name.md",
|
||||||
|
"domain": "{domain}",
|
||||||
|
"action": "create|update",
|
||||||
|
"entity_type": "company|person|protocol|organization|market|lab|fund|research_program",
|
||||||
|
"content": "Full markdown for new entities. For updates, leave empty.",
|
||||||
|
"timeline_entry": "- **YYYY-MM-DD** — Event with specifics"
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"decisions": [
|
||||||
|
{{
|
||||||
|
"filename": "parent-slug-decision-slug.md",
|
||||||
|
"domain": "{domain}",
|
||||||
|
"parent_entity": "parent-entity-filename.md",
|
||||||
|
"status": "passed|failed|active",
|
||||||
|
"category": "treasury|fundraise|hiring|mechanism|liquidation|grants|strategy",
|
||||||
|
"summary": "One-sentence description of the decision",
|
||||||
|
"content": "Full markdown for significant decisions. Empty for routine ones.",
|
||||||
|
"parent_timeline_entry": "- **YYYY-MM-DD** — [[decision-filename]] Passed: one-line summary"
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"facts": [
|
||||||
|
"Verifiable data points to store in source archive notes"
|
||||||
|
],
|
||||||
|
"extraction_notes": "Brief summary: N claims, N enrichments, N entities, N decisions. What was most interesting.",
|
||||||
|
"contributor_thesis_extractable": false
|
||||||
|
}}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
|
||||||
|
1. **Quality over quantity.** 0-3 precise claims beats 8 vague ones. If you can't name the specific mechanism in the title, don't extract it. Empty claims arrays are fine — not every source produces novel claims.
|
||||||
|
2. **Enrichment over duplication.** Check the KB index FIRST. If something similar exists, add evidence to it. New claims are only for genuinely novel propositions.
|
||||||
|
3. **Facts are not claims.** Individual data points go in `facts`. Only generalized patterns from multiple data points become claims.
|
||||||
|
4. **Proposals are entities, not claims.** A governance proposal, token launch, or funding event is structured data (entity). Only extract a claim if the event reveals a novel mechanism insight that generalizes beyond this specific case.
|
||||||
|
5. **Scope your claims.** Say whether you're claiming a structural, functional, causal, or correlational relationship.
|
||||||
|
6. **Connect your claims.** For every new claim, check the Connection Candidates list. If a candidate is related, add it to the `connections` array with the relationship type and a one-sentence reason. Use `supports` when your claim provides evidence for the target, `challenges` when it contradicts, `related` only as a last resort. Unconnected claims are orphans — connect them at birth.
|
||||||
|
7. **OPSEC.** Never extract specific dollar amounts, valuations, equity percentages, or deal terms for LivingIP/Teleo. General market data is fine.
|
||||||
|
8. **Read the Agent Notes.** If the source has "Agent Notes" or "Curator Notes" sections, they contain context about why this source matters.
|
||||||
|
|
||||||
|
Return valid JSON only. No markdown fencing, no explanation outside the JSON.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def build_entity_enrichment_prompt(
|
||||||
|
entity_file: str,
|
||||||
|
entity_content: str,
|
||||||
|
new_data: list[dict],
|
||||||
|
domain: str,
|
||||||
|
) -> str:
|
||||||
|
"""Build prompt for batch entity enrichment (runs on main, not extraction branch).
|
||||||
|
|
||||||
|
This is separate from claim extraction to avoid merge conflicts.
|
||||||
|
Entity enrichments are additive timeline entries — commutative, auto-mergeable.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
entity_file: Path to the entity being enriched
|
||||||
|
entity_content: Current content of the entity file
|
||||||
|
new_data: List of timeline entries from recent extractions
|
||||||
|
domain: Entity domain
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Prompt for entity enrichment
|
||||||
|
"""
|
||||||
|
entries_text = "\n".join(
|
||||||
|
f"- Source: {d.get('source', '?')}\n Entry: {d.get('timeline_entry', '')}"
|
||||||
|
for d in new_data
|
||||||
|
)
|
||||||
|
|
||||||
|
return f"""You are a Teleo knowledge base agent. Merge these new timeline entries into an existing entity.
|
||||||
|
|
||||||
|
## Current Entity: {entity_file}
|
||||||
|
|
||||||
|
{entity_content}
|
||||||
|
|
||||||
|
## New Data Points
|
||||||
|
|
||||||
|
{entries_text}
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
|
||||||
|
1. Append new entries to the Timeline section in chronological order
|
||||||
|
2. Deduplicate: skip entries that describe events already in the timeline
|
||||||
|
3. Preserve all existing content — append only
|
||||||
|
4. If a new data point updates a metric (revenue, valuation, user count), add it as a new timeline entry, don't modify existing entries
|
||||||
|
|
||||||
|
Return the complete updated entity file content.
|
||||||
|
"""
|
||||||
273
ops/pipeline-v2/lib/feedback.py
Normal file
273
ops/pipeline-v2/lib/feedback.py
Normal file
|
|
@ -0,0 +1,273 @@
|
||||||
|
"""Structured rejection feedback — closes the loop for proposer agents.
|
||||||
|
|
||||||
|
Maps issue tags to CLAUDE.md quality gates with actionable guidance.
|
||||||
|
Tracks per-agent error patterns. Provides agent-queryable rejection history.
|
||||||
|
|
||||||
|
Problem: Proposer agents (Rio, Clay, etc.) get generic PR comments when
|
||||||
|
claims are rejected. They can't tell what specifically failed, so they
|
||||||
|
repeat the same mistakes. Rio: "I have to read the full review comment
|
||||||
|
and infer what to fix."
|
||||||
|
|
||||||
|
Solution: Machine-readable rejection codes in PR comments + per-agent
|
||||||
|
error pattern tracking on /metrics + agent feedback endpoint.
|
||||||
|
|
||||||
|
Epimetheus owns this module. Leo reviews changes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.feedback")
|
||||||
|
|
||||||
|
# ─── Quality Gate Mapping ──────────────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# Maps each issue tag to its CLAUDE.md quality gate, with actionable guidance
|
||||||
|
# for the proposer agent. The "gate" field references the specific checklist
|
||||||
|
# item in CLAUDE.md. The "fix" field tells the agent exactly what to change.
|
||||||
|
|
||||||
|
QUALITY_GATES: dict[str, dict] = {
|
||||||
|
"frontmatter_schema": {
|
||||||
|
"gate": "Schema compliance",
|
||||||
|
"description": "Missing or invalid YAML frontmatter fields",
|
||||||
|
"fix": "Ensure all 6 required fields: type, domain, description, confidence, source, created. "
|
||||||
|
"Use exact field names (not source_archive, not claim).",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": True,
|
||||||
|
},
|
||||||
|
"broken_wiki_links": {
|
||||||
|
"gate": "Wiki link validity",
|
||||||
|
"description": "[[wiki links]] reference files that don't exist in the KB",
|
||||||
|
"fix": "Only link to files listed in the KB index. If a claim doesn't exist yet, "
|
||||||
|
"omit the link or use <!-- claim pending: description -->.",
|
||||||
|
"severity": "warning",
|
||||||
|
"auto_fixable": True,
|
||||||
|
},
|
||||||
|
"title_overclaims": {
|
||||||
|
"gate": "Title precision",
|
||||||
|
"description": "Title asserts more than the evidence supports",
|
||||||
|
"fix": "Scope the title to match the evidence strength. Single source = "
|
||||||
|
"'X suggests Y' not 'X proves Y'. Name the specific mechanism.",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": False,
|
||||||
|
},
|
||||||
|
"confidence_miscalibration": {
|
||||||
|
"gate": "Confidence calibration",
|
||||||
|
"description": "Confidence level doesn't match evidence strength",
|
||||||
|
"fix": "Single source = experimental max. 3+ corroborating sources with data = likely. "
|
||||||
|
"Pitch rhetoric or self-reported metrics = speculative. "
|
||||||
|
"proven requires multiple independent confirmations.",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": False,
|
||||||
|
},
|
||||||
|
"date_errors": {
|
||||||
|
"gate": "Date accuracy",
|
||||||
|
"description": "Invalid or incorrect date format in created field",
|
||||||
|
"fix": "created = extraction date (today), not source publication date. Format: YYYY-MM-DD.",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": True,
|
||||||
|
},
|
||||||
|
"factual_discrepancy": {
|
||||||
|
"gate": "Factual accuracy",
|
||||||
|
"description": "Claim contains factual errors or misrepresents source material",
|
||||||
|
"fix": "Re-read the source. Verify specific numbers, names, dates. "
|
||||||
|
"If source X quotes source Y, attribute to Y.",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": False,
|
||||||
|
},
|
||||||
|
"near_duplicate": {
|
||||||
|
"gate": "Duplicate check",
|
||||||
|
"description": "Substantially similar claim already exists in KB",
|
||||||
|
"fix": "Check KB index before extracting. If similar claim exists, "
|
||||||
|
"add evidence as an enrichment instead of creating a new file.",
|
||||||
|
"severity": "warning",
|
||||||
|
"auto_fixable": False,
|
||||||
|
},
|
||||||
|
"scope_error": {
|
||||||
|
"gate": "Scope qualification",
|
||||||
|
"description": "Claim uses unscoped universals or is too vague to disagree with",
|
||||||
|
"fix": "Specify: structural vs functional, micro vs macro, causal vs correlational. "
|
||||||
|
"Replace 'always/never/the fundamental' with scoped language.",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": False,
|
||||||
|
},
|
||||||
|
"opsec_internal_deal_terms": {
|
||||||
|
"gate": "OPSEC",
|
||||||
|
"description": "Claim contains internal LivingIP/Teleo deal terms",
|
||||||
|
"fix": "Never extract specific dollar amounts, valuations, equity percentages, "
|
||||||
|
"or deal terms for LivingIP/Teleo. General market data is fine.",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": False,
|
||||||
|
},
|
||||||
|
"body_too_thin": {
|
||||||
|
"gate": "Evidence quality",
|
||||||
|
"description": "Claim body lacks substantive argument or evidence",
|
||||||
|
"fix": "The body must explain WHY the claim is supported with specific data, "
|
||||||
|
"quotes, or studies from the source. A body that restates the title is not enough.",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": False,
|
||||||
|
},
|
||||||
|
"title_too_few_words": {
|
||||||
|
"gate": "Title precision",
|
||||||
|
"description": "Title is too short to be a specific, disagreeable proposition",
|
||||||
|
"fix": "Minimum 4 words. Name the specific mechanism and outcome. "
|
||||||
|
"Bad: 'futarchy works'. Good: 'futarchy is manipulation-resistant because "
|
||||||
|
"attack attempts create profitable opportunities for defenders'.",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": False,
|
||||||
|
},
|
||||||
|
"title_not_proposition": {
|
||||||
|
"gate": "Title precision",
|
||||||
|
"description": "Title reads as a label, not an arguable proposition",
|
||||||
|
"fix": "The title must contain a verb and read as a complete sentence. "
|
||||||
|
"Test: 'This note argues that [title]' must work grammatically.",
|
||||||
|
"severity": "blocking",
|
||||||
|
"auto_fixable": False,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Feedback Formatting ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def format_rejection_comment(
|
||||||
|
issues: list[str],
|
||||||
|
source: str = "validator",
|
||||||
|
) -> str:
|
||||||
|
"""Format a structured rejection comment for a PR.
|
||||||
|
|
||||||
|
Includes machine-readable tags AND human-readable guidance.
|
||||||
|
Agents can parse the <!-- REJECTION: --> block programmatically.
|
||||||
|
"""
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
# Machine-readable block (agents parse this)
|
||||||
|
rejection_data = {
|
||||||
|
"issues": issues,
|
||||||
|
"source": source,
|
||||||
|
"ts": datetime.now(timezone.utc).isoformat(),
|
||||||
|
}
|
||||||
|
lines.append(f"<!-- REJECTION: {json.dumps(rejection_data)} -->")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Human-readable summary
|
||||||
|
blocking = [i for i in issues if QUALITY_GATES.get(i, {}).get("severity") == "blocking"]
|
||||||
|
warnings = [i for i in issues if QUALITY_GATES.get(i, {}).get("severity") == "warning"]
|
||||||
|
|
||||||
|
if blocking:
|
||||||
|
lines.append(f"**Rejected** — {len(blocking)} blocking issue{'s' if len(blocking) > 1 else ''}\n")
|
||||||
|
elif warnings:
|
||||||
|
lines.append(f"**Warnings** — {len(warnings)} non-blocking issue{'s' if len(warnings) > 1 else ''}\n")
|
||||||
|
|
||||||
|
# Per-issue guidance
|
||||||
|
for tag in issues:
|
||||||
|
gate = QUALITY_GATES.get(tag, {})
|
||||||
|
severity = gate.get("severity", "unknown")
|
||||||
|
icon = "BLOCK" if severity == "blocking" else "WARN"
|
||||||
|
gate_name = gate.get("gate", tag)
|
||||||
|
description = gate.get("description", tag)
|
||||||
|
fix = gate.get("fix", "See CLAUDE.md quality gates.")
|
||||||
|
auto = " (auto-fixable)" if gate.get("auto_fixable") else ""
|
||||||
|
|
||||||
|
lines.append(f"**[{icon}] {gate_name}**: {description}{auto}")
|
||||||
|
lines.append(f" - Fix: {fix}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_rejection_comment(comment_body: str) -> dict | None:
|
||||||
|
"""Parse a structured rejection comment. Returns rejection data or None."""
|
||||||
|
match = re.search(r"<!-- REJECTION: ({.+?}) -->", comment_body)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
return json.loads(match.group(1))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Per-Agent Error Tracking ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def get_agent_error_patterns(conn, agent: str, hours: int = 168) -> dict:
|
||||||
|
"""Get rejection patterns for a specific agent over the last N hours.
|
||||||
|
|
||||||
|
Returns {total_prs, rejected_prs, top_issues, issue_breakdown, trend}.
|
||||||
|
Default 168 hours = 7 days.
|
||||||
|
"""
|
||||||
|
# Get PRs by this agent in the time window
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT number, status, eval_issues, domain_verdict, leo_verdict,
|
||||||
|
tier, created_at, last_attempt
|
||||||
|
FROM prs
|
||||||
|
WHERE agent = ?
|
||||||
|
AND last_attempt > datetime('now', ? || ' hours')
|
||||||
|
ORDER BY last_attempt DESC""",
|
||||||
|
(agent, f"-{hours}"),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
total = len(rows)
|
||||||
|
if total == 0:
|
||||||
|
return {"total_prs": 0, "rejected_prs": 0, "approval_rate": None,
|
||||||
|
"top_issues": [], "issue_breakdown": {}, "trend": "no_data"}
|
||||||
|
|
||||||
|
rejected = 0
|
||||||
|
issue_counts: dict[str, int] = {}
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
status = row["status"]
|
||||||
|
if status in ("closed", "zombie"):
|
||||||
|
rejected += 1
|
||||||
|
|
||||||
|
issues_raw = row["eval_issues"]
|
||||||
|
if issues_raw and issues_raw != "[]":
|
||||||
|
try:
|
||||||
|
tags = json.loads(issues_raw)
|
||||||
|
for tag in tags:
|
||||||
|
if isinstance(tag, str):
|
||||||
|
issue_counts[tag] = issue_counts.get(tag, 0) + 1
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
approval_rate = round((total - rejected) / total, 3) if total > 0 else None
|
||||||
|
top_issues = sorted(issue_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
|
|
||||||
|
# Add guidance for top issues
|
||||||
|
top_with_guidance = []
|
||||||
|
for tag, count in top_issues:
|
||||||
|
gate = QUALITY_GATES.get(tag, {})
|
||||||
|
top_with_guidance.append({
|
||||||
|
"tag": tag,
|
||||||
|
"count": count,
|
||||||
|
"pct": round(count / total * 100, 1),
|
||||||
|
"gate": gate.get("gate", tag),
|
||||||
|
"fix": gate.get("fix", "See CLAUDE.md"),
|
||||||
|
"auto_fixable": gate.get("auto_fixable", False),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"agent": agent,
|
||||||
|
"period_hours": hours,
|
||||||
|
"total_prs": total,
|
||||||
|
"rejected_prs": rejected,
|
||||||
|
"approval_rate": approval_rate,
|
||||||
|
"top_issues": top_with_guidance,
|
||||||
|
"issue_breakdown": issue_counts,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_agent_patterns(conn, hours: int = 168) -> dict:
|
||||||
|
"""Get rejection patterns for all agents. Returns {agent: patterns}."""
|
||||||
|
agents = conn.execute(
|
||||||
|
"""SELECT DISTINCT agent FROM prs
|
||||||
|
WHERE agent IS NOT NULL
|
||||||
|
AND last_attempt > datetime('now', ? || ' hours')""",
|
||||||
|
(f"-{hours}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
return {
|
||||||
|
row["agent"]: get_agent_error_patterns(conn, row["agent"], hours)
|
||||||
|
for row in agents
|
||||||
|
}
|
||||||
295
ops/pipeline-v2/lib/fixer.py
Normal file
295
ops/pipeline-v2/lib/fixer.py
Normal file
|
|
@ -0,0 +1,295 @@
|
||||||
|
"""Auto-fixer stage — mechanical fixes for known issue types.
|
||||||
|
|
||||||
|
Currently fixes:
|
||||||
|
- broken_wiki_links: strips [[ ]] brackets from links that don't resolve
|
||||||
|
|
||||||
|
Runs as a pipeline stage on FIX_INTERVAL. Only fixes mechanical issues
|
||||||
|
that don't require content understanding. Does NOT fix frontmatter_schema,
|
||||||
|
near_duplicate, or any substantive issues.
|
||||||
|
|
||||||
|
Key design decisions (Ganymede):
|
||||||
|
- Only fix files in the PR diff (not the whole worktree/repo)
|
||||||
|
- Add intra-PR file stems to valid set (avoids stripping cross-references
|
||||||
|
between new claims in the same PR)
|
||||||
|
- Atomic claim via status='fixing' (same pattern as eval's 'reviewing')
|
||||||
|
- fix_attempts cap prevents infinite fix loops
|
||||||
|
- Reset eval_attempts + tier0_pass on successful fix for re-evaluation
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from . import config, db
|
||||||
|
from .validate import WIKI_LINK_RE, load_existing_claims
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.fixer")
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Git helper (async subprocess, same pattern as merge.py) ─────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def _git(*args, cwd: str = None, timeout: int = 60) -> tuple[int, str]:
|
||||||
|
"""Run a git command async. Returns (returncode, combined output)."""
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"git",
|
||||||
|
*args,
|
||||||
|
cwd=cwd or str(config.REPO_DIR),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
return -1, f"git {args[0]} timed out after {timeout}s"
|
||||||
|
output = (stdout or b"").decode().strip()
|
||||||
|
if stderr:
|
||||||
|
output += "\n" + stderr.decode().strip()
|
||||||
|
return proc.returncode, output
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Wiki link fixer ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def _fix_wiki_links_in_pr(conn, pr_number: int) -> dict:
|
||||||
|
"""Fix broken wiki links in a single PR by stripping brackets.
|
||||||
|
|
||||||
|
Only processes files in the PR diff (not the whole repo).
|
||||||
|
Adds intra-PR file stems to the valid set so cross-references
|
||||||
|
between new claims in the same PR are preserved.
|
||||||
|
"""
|
||||||
|
# Atomic claim — prevent concurrent fixers and evaluators
|
||||||
|
cursor = conn.execute(
|
||||||
|
"UPDATE prs SET status = 'fixing', last_attempt = datetime('now') WHERE number = ? AND status = 'open'",
|
||||||
|
(pr_number,),
|
||||||
|
)
|
||||||
|
if cursor.rowcount == 0:
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "not_open"}
|
||||||
|
|
||||||
|
# Increment fix_attempts
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE prs SET fix_attempts = COALESCE(fix_attempts, 0) + 1 WHERE number = ?",
|
||||||
|
(pr_number,),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get PR branch from DB first, fall back to Forgejo API
|
||||||
|
row = conn.execute("SELECT branch FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||||
|
branch = row["branch"] if row and row["branch"] else None
|
||||||
|
|
||||||
|
if not branch:
|
||||||
|
from .forgejo import api as forgejo_api
|
||||||
|
from .forgejo import repo_path
|
||||||
|
|
||||||
|
pr_info = await forgejo_api("GET", repo_path(f"pulls/{pr_number}"))
|
||||||
|
if pr_info:
|
||||||
|
branch = pr_info.get("head", {}).get("ref")
|
||||||
|
|
||||||
|
if not branch:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "no_branch"}
|
||||||
|
|
||||||
|
# Fetch latest refs
|
||||||
|
await _git("fetch", "origin", branch, timeout=30)
|
||||||
|
|
||||||
|
# Create worktree
|
||||||
|
worktree_path = str(config.BASE_DIR / "workspaces" / f"fix-{pr_number}")
|
||||||
|
|
||||||
|
rc, out = await _git("worktree", "add", "--detach", worktree_path, f"origin/{branch}")
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("PR #%d: worktree creation failed: %s", pr_number, out)
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "worktree_failed"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Checkout the actual branch (so we can push)
|
||||||
|
rc, out = await _git("checkout", "-B", branch, f"origin/{branch}", cwd=worktree_path)
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("PR #%d: checkout failed: %s", pr_number, out)
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "checkout_failed"}
|
||||||
|
|
||||||
|
# Get files changed in PR (only fix these, not the whole repo)
|
||||||
|
rc, out = await _git("diff", "--name-only", "origin/main...HEAD", cwd=worktree_path)
|
||||||
|
if rc != 0:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "diff_failed"}
|
||||||
|
|
||||||
|
pr_files = [f for f in out.split("\n") if f.strip() and f.endswith(".md")]
|
||||||
|
|
||||||
|
if not pr_files:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "no_md_files"}
|
||||||
|
|
||||||
|
# Load existing claims from main + add intra-PR stems
|
||||||
|
# (avoids stripping cross-references between new claims in same PR)
|
||||||
|
existing_claims = load_existing_claims()
|
||||||
|
for f in pr_files:
|
||||||
|
existing_claims.add(Path(f).stem)
|
||||||
|
|
||||||
|
# Fix broken links in each PR file
|
||||||
|
total_fixed = 0
|
||||||
|
|
||||||
|
for filepath in pr_files:
|
||||||
|
full_path = Path(worktree_path) / filepath
|
||||||
|
if not full_path.is_file():
|
||||||
|
continue
|
||||||
|
|
||||||
|
content = full_path.read_text(encoding="utf-8")
|
||||||
|
file_fixes = 0
|
||||||
|
|
||||||
|
def replace_broken_link(match):
|
||||||
|
nonlocal file_fixes
|
||||||
|
link_text = match.group(1)
|
||||||
|
if link_text.strip() not in existing_claims:
|
||||||
|
file_fixes += 1
|
||||||
|
return link_text # Strip brackets, keep text
|
||||||
|
return match.group(0) # Keep valid link
|
||||||
|
|
||||||
|
new_content = WIKI_LINK_RE.sub(replace_broken_link, content)
|
||||||
|
if new_content != content:
|
||||||
|
full_path.write_text(new_content, encoding="utf-8")
|
||||||
|
total_fixed += file_fixes
|
||||||
|
|
||||||
|
if total_fixed == 0:
|
||||||
|
# No broken links found — issue might be something else
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "no_broken_links"}
|
||||||
|
|
||||||
|
# Commit and push
|
||||||
|
rc, out = await _git("add", *pr_files, cwd=worktree_path)
|
||||||
|
if rc != 0:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "git_add_failed"}
|
||||||
|
|
||||||
|
commit_msg = (
|
||||||
|
f"auto-fix: strip {total_fixed} broken wiki links\n\n"
|
||||||
|
f"Pipeline auto-fixer: removed [[ ]] brackets from links\n"
|
||||||
|
f"that don't resolve to existing claims in the knowledge base."
|
||||||
|
)
|
||||||
|
rc, out = await _git("commit", "-m", commit_msg, cwd=worktree_path)
|
||||||
|
if rc != 0:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "commit_failed"}
|
||||||
|
|
||||||
|
# Reset eval state BEFORE push — if daemon crashes between push and
|
||||||
|
# reset, the PR would be permanently stuck at max eval_attempts.
|
||||||
|
# Reset-first: worst case is one wasted eval cycle on old content.
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE prs SET
|
||||||
|
status = 'open',
|
||||||
|
eval_attempts = 0,
|
||||||
|
eval_issues = '[]',
|
||||||
|
tier0_pass = NULL,
|
||||||
|
domain_verdict = 'pending',
|
||||||
|
leo_verdict = 'pending',
|
||||||
|
last_error = NULL
|
||||||
|
WHERE number = ?""",
|
||||||
|
(pr_number,),
|
||||||
|
)
|
||||||
|
|
||||||
|
rc, out = await _git("push", "origin", branch, cwd=worktree_path, timeout=30)
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("PR #%d: push failed: %s", pr_number, out)
|
||||||
|
# Eval state already reset — PR will re-evaluate old content,
|
||||||
|
# find same issues, and fixer will retry next cycle. No harm.
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "push_failed"}
|
||||||
|
|
||||||
|
db.audit(
|
||||||
|
conn,
|
||||||
|
"fixer",
|
||||||
|
"wiki_links_fixed",
|
||||||
|
json.dumps({"pr": pr_number, "links_fixed": total_fixed}),
|
||||||
|
)
|
||||||
|
logger.info("PR #%d: fixed %d broken wiki links, reset for re-evaluation", pr_number, total_fixed)
|
||||||
|
|
||||||
|
return {"pr": pr_number, "fixed": True, "links_fixed": total_fixed}
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Always cleanup worktree
|
||||||
|
await _git("worktree", "remove", "--force", worktree_path)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Stage entry point ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def fix_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
|
"""Run one fix cycle. Returns (fixed, errors).
|
||||||
|
|
||||||
|
Finds PRs with broken_wiki_links issues (from eval or tier0) that
|
||||||
|
haven't exceeded fix_attempts cap. Processes up to 5 per cycle
|
||||||
|
to avoid overlapping with eval.
|
||||||
|
"""
|
||||||
|
# Garbage collection: close PRs with exhausted fix budget that are stuck in open.
|
||||||
|
# These were evaluated, rejected, fixer couldn't help, nobody closes them.
|
||||||
|
# (Epimetheus session 2 — prevents zombie PR accumulation)
|
||||||
|
# Bug fix: must also close on Forgejo + delete branch, not just DB update.
|
||||||
|
# DB-only close caused Forgejo/DB state divergence — branches stayed alive,
|
||||||
|
# blocking Gate 2 in batch-extract for 5 days. (Epimetheus session 4)
|
||||||
|
gc_rows = conn.execute(
|
||||||
|
"""SELECT number, branch FROM prs
|
||||||
|
WHERE status = 'open'
|
||||||
|
AND fix_attempts >= ?
|
||||||
|
AND (domain_verdict = 'request_changes' OR leo_verdict = 'request_changes')""",
|
||||||
|
(config.MAX_FIX_ATTEMPTS + 2,),
|
||||||
|
).fetchall()
|
||||||
|
if gc_rows:
|
||||||
|
from .forgejo import api as _gc_forgejo, repo_path as _gc_repo_path
|
||||||
|
for row in gc_rows:
|
||||||
|
pr_num, branch = row["number"], row["branch"]
|
||||||
|
try:
|
||||||
|
await _gc_forgejo("POST", _gc_repo_path(f"issues/{pr_num}/comments"),
|
||||||
|
{"body": "Auto-closed: fix budget exhausted. Source will be re-extracted."})
|
||||||
|
await _gc_forgejo("PATCH", _gc_repo_path(f"pulls/{pr_num}"), {"state": "closed"})
|
||||||
|
if branch:
|
||||||
|
await _gc_forgejo("DELETE", _gc_repo_path(f"branches/{branch}"))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("GC: failed to close PR #%d on Forgejo: %s", pr_num, e)
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE prs SET status = 'closed', last_error = 'fix budget exhausted — auto-closed' WHERE number = ?",
|
||||||
|
(pr_num,),
|
||||||
|
)
|
||||||
|
logger.info("GC: closed %d exhausted PRs (DB + Forgejo + branch cleanup)", len(gc_rows))
|
||||||
|
|
||||||
|
batch_limit = min(max_workers or config.MAX_FIX_PER_CYCLE, config.MAX_FIX_PER_CYCLE)
|
||||||
|
|
||||||
|
# Only fix PRs that passed tier0 but have broken_wiki_links from eval.
|
||||||
|
# Do NOT fix PRs with tier0_pass=0 where the only issue is wiki links —
|
||||||
|
# wiki links are warnings, not gates. Fixing them creates an infinite
|
||||||
|
# fixer→validate→fixer loop. (Epimetheus session 2 — root cause of overnight stall)
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT number FROM prs
|
||||||
|
WHERE status = 'open'
|
||||||
|
AND tier0_pass = 1
|
||||||
|
AND eval_issues LIKE '%broken_wiki_links%'
|
||||||
|
AND COALESCE(fix_attempts, 0) < ?
|
||||||
|
AND (last_attempt IS NULL OR last_attempt < datetime('now', '-5 minutes'))
|
||||||
|
ORDER BY created_at ASC
|
||||||
|
LIMIT ?""",
|
||||||
|
(config.MAX_FIX_ATTEMPTS, batch_limit),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
fixed = 0
|
||||||
|
errors = 0
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
try:
|
||||||
|
result = await _fix_wiki_links_in_pr(conn, row["number"])
|
||||||
|
if result.get("fixed"):
|
||||||
|
fixed += 1
|
||||||
|
elif result.get("skipped"):
|
||||||
|
logger.debug("PR #%d fix skipped: %s", row["number"], result.get("reason"))
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to fix PR #%d", row["number"])
|
||||||
|
errors += 1
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (row["number"],))
|
||||||
|
|
||||||
|
if fixed or errors:
|
||||||
|
logger.info("Fix cycle: %d fixed, %d errors", fixed, errors)
|
||||||
|
|
||||||
|
return fixed, errors
|
||||||
89
ops/pipeline-v2/lib/forgejo.py
Normal file
89
ops/pipeline-v2/lib/forgejo.py
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
"""Forgejo API client — single shared module for all pipeline stages.
|
||||||
|
|
||||||
|
Extracted from evaluate.py, merge.py, validate.py (Phase 3 refactor).
|
||||||
|
All Forgejo HTTP calls go through this module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.forgejo")
|
||||||
|
|
||||||
|
|
||||||
|
async def api(method: str, path: str, body: dict = None, token: str = None):
|
||||||
|
"""Call Forgejo API. Returns parsed JSON, {} for 204, or None on error.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
method: HTTP method (GET, POST, DELETE, etc.)
|
||||||
|
path: API path after /api/v1 (e.g. "/repos/teleo/teleo-codex/pulls")
|
||||||
|
body: JSON body for POST/PUT/PATCH
|
||||||
|
token: Override token. If None, reads from FORGEJO_TOKEN_FILE (admin token).
|
||||||
|
"""
|
||||||
|
url = f"{config.FORGEJO_URL}/api/v1{path}"
|
||||||
|
if token is None:
|
||||||
|
token = config.FORGEJO_TOKEN_FILE.read_text().strip() if config.FORGEJO_TOKEN_FILE.exists() else ""
|
||||||
|
headers = {"Authorization": f"token {token}", "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.request(
|
||||||
|
method, url, headers=headers, json=body, timeout=aiohttp.ClientTimeout(total=60)
|
||||||
|
) as resp:
|
||||||
|
if resp.status >= 400:
|
||||||
|
text = await resp.text()
|
||||||
|
logger.error("Forgejo API %s %s → %d: %s", method, path, resp.status, text[:200])
|
||||||
|
return None
|
||||||
|
if resp.status == 204:
|
||||||
|
return {}
|
||||||
|
# Forgejo sometimes returns 200 with HTML (not JSON) on merge success.
|
||||||
|
# Treat 200 with non-JSON content-type as success rather than error.
|
||||||
|
content_type = resp.content_type or ""
|
||||||
|
if "json" not in content_type:
|
||||||
|
logger.debug("Forgejo API %s %s → %d (non-JSON: %s), treating as success", method, path, resp.status, content_type)
|
||||||
|
return {}
|
||||||
|
return await resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Forgejo API error: %s %s → %s", method, path, e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_pr_diff(pr_number: int) -> str:
|
||||||
|
"""Fetch PR diff via Forgejo API. Returns diff text or empty string."""
|
||||||
|
url = f"{config.FORGEJO_URL}/api/v1/repos/{config.FORGEJO_OWNER}/{config.FORGEJO_REPO}/pulls/{pr_number}.diff"
|
||||||
|
token = config.FORGEJO_TOKEN_FILE.read_text().strip() if config.FORGEJO_TOKEN_FILE.exists() else ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(
|
||||||
|
url,
|
||||||
|
headers={"Authorization": f"token {token}", "Accept": "text/plain"},
|
||||||
|
timeout=aiohttp.ClientTimeout(total=60),
|
||||||
|
) as resp:
|
||||||
|
if resp.status >= 400:
|
||||||
|
return ""
|
||||||
|
diff = await resp.text()
|
||||||
|
if len(diff) > 2_000_000:
|
||||||
|
return ""
|
||||||
|
return diff
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Failed to fetch diff for PR #%d: %s", pr_number, e)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_agent_token(agent_name: str) -> str | None:
|
||||||
|
"""Read Forgejo token for a named agent. Returns token string or None."""
|
||||||
|
token_file = config.SECRETS_DIR / f"forgejo-{agent_name.lower()}-token"
|
||||||
|
if token_file.exists():
|
||||||
|
return token_file.read_text().strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def repo_path(subpath: str = "") -> str:
|
||||||
|
"""Build standard repo API path: /repos/{owner}/{repo}/{subpath}."""
|
||||||
|
base = f"/repos/{config.FORGEJO_OWNER}/{config.FORGEJO_REPO}"
|
||||||
|
if subpath:
|
||||||
|
return f"{base}/{subpath}"
|
||||||
|
return base
|
||||||
838
ops/pipeline-v2/lib/health.py
Normal file
838
ops/pipeline-v2/lib/health.py
Normal file
|
|
@ -0,0 +1,838 @@
|
||||||
|
"""Health API — HTTP server on configurable port for monitoring."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import statistics
|
||||||
|
from datetime import date, datetime, timezone
|
||||||
|
|
||||||
|
from aiohttp import web
|
||||||
|
|
||||||
|
from . import config, costs, db
|
||||||
|
from .analytics import get_snapshot_history, get_version_changes
|
||||||
|
from .claim_index import build_claim_index, write_claim_index
|
||||||
|
from .feedback import get_agent_error_patterns, get_all_agent_patterns
|
||||||
|
from .search import check_duplicate
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.health")
|
||||||
|
|
||||||
|
|
||||||
|
def _conn(request):
|
||||||
|
"""Get the persistent readonly connection from app state."""
|
||||||
|
return request.app["db"]
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_health(request):
|
||||||
|
"""GET /health — overall pipeline health."""
|
||||||
|
conn = _conn(request)
|
||||||
|
|
||||||
|
# Stage status from circuit breakers
|
||||||
|
breakers = conn.execute(
|
||||||
|
"SELECT name, state, failures, last_success_at, last_update FROM circuit_breakers"
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Queue depths
|
||||||
|
sources_by_status = conn.execute("SELECT status, COUNT(*) as n FROM sources GROUP BY status").fetchall()
|
||||||
|
prs_by_status = conn.execute("SELECT status, COUNT(*) as n FROM prs GROUP BY status").fetchall()
|
||||||
|
|
||||||
|
# Per-domain merge queue depth (Vida)
|
||||||
|
merge_queue = conn.execute(
|
||||||
|
"SELECT domain, COUNT(*) as n FROM prs WHERE status = 'approved' GROUP BY domain"
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Cost
|
||||||
|
budget = costs.check_budget(conn)
|
||||||
|
|
||||||
|
# Metabolic metrics (Vida)
|
||||||
|
null_rate = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
CAST(SUM(CASE WHEN status = 'null_result' THEN 1 ELSE 0 END) AS REAL) /
|
||||||
|
NULLIF(COUNT(*), 0) as rate
|
||||||
|
FROM sources
|
||||||
|
WHERE updated_at > datetime('now', '-24 hours')
|
||||||
|
AND status IN ('extracted', 'null_result', 'error')"""
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
approval_rate = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
CAST(SUM(CASE WHEN domain_verdict = 'approve' THEN 1 ELSE 0 END) AS REAL) /
|
||||||
|
NULLIF(COUNT(*), 0) as domain_rate,
|
||||||
|
CAST(SUM(CASE WHEN leo_verdict = 'approve' THEN 1 ELSE 0 END) AS REAL) /
|
||||||
|
NULLIF(COUNT(*), 0) as leo_rate
|
||||||
|
FROM prs
|
||||||
|
WHERE last_attempt > datetime('now', '-24 hours')
|
||||||
|
AND domain_verdict != 'pending'"""
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
# Recent activity (last hour)
|
||||||
|
recent = conn.execute(
|
||||||
|
"""SELECT stage, event, COUNT(*) as n
|
||||||
|
FROM audit_log
|
||||||
|
WHERE timestamp > datetime('now', '-1 hour')
|
||||||
|
GROUP BY stage, event"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"status": "healthy",
|
||||||
|
"breakers": {},
|
||||||
|
"sources": {r["status"]: r["n"] for r in sources_by_status},
|
||||||
|
"prs": {r["status"]: r["n"] for r in prs_by_status},
|
||||||
|
"merge_queue_by_domain": {r["domain"]: r["n"] for r in merge_queue},
|
||||||
|
"budget": budget,
|
||||||
|
"metabolic": {
|
||||||
|
"null_result_rate_24h": round(null_rate["rate"], 3)
|
||||||
|
if null_rate and null_rate["rate"] is not None
|
||||||
|
else None,
|
||||||
|
"domain_approval_rate_24h": round(approval_rate["domain_rate"], 3)
|
||||||
|
if approval_rate and approval_rate["domain_rate"] is not None
|
||||||
|
else None,
|
||||||
|
"leo_approval_rate_24h": round(approval_rate["leo_rate"], 3)
|
||||||
|
if approval_rate and approval_rate["leo_rate"] is not None
|
||||||
|
else None,
|
||||||
|
},
|
||||||
|
"recent_activity": [{"stage": r["stage"], "event": r["event"], "count": r["n"]} for r in recent],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Breaker state + stall detection (Vida: last_success_at heartbeat)
|
||||||
|
for r in breakers:
|
||||||
|
breaker_info = {"state": r["state"], "failures": r["failures"]}
|
||||||
|
if r["last_success_at"]:
|
||||||
|
last = datetime.fromisoformat(r["last_success_at"])
|
||||||
|
if last.tzinfo is None:
|
||||||
|
last = last.replace(tzinfo=timezone.utc)
|
||||||
|
age_s = (datetime.now(timezone.utc) - last).total_seconds()
|
||||||
|
breaker_info["last_success_age_s"] = round(age_s)
|
||||||
|
# Stall detection: no success in 2x the stage's interval
|
||||||
|
intervals = {
|
||||||
|
"ingest": config.INGEST_INTERVAL,
|
||||||
|
"validate": config.VALIDATE_INTERVAL,
|
||||||
|
"evaluate": config.EVAL_INTERVAL,
|
||||||
|
"merge": config.MERGE_INTERVAL,
|
||||||
|
}
|
||||||
|
threshold = intervals.get(r["name"], 60) * 2
|
||||||
|
if age_s > threshold:
|
||||||
|
breaker_info["stalled"] = True
|
||||||
|
body["breakers"][r["name"]] = breaker_info
|
||||||
|
|
||||||
|
# Overall status
|
||||||
|
if any(b.get("stalled") for b in body["breakers"].values()):
|
||||||
|
body["status"] = "stalled"
|
||||||
|
if any(b["state"] == "open" for b in body["breakers"].values()):
|
||||||
|
body["status"] = "degraded"
|
||||||
|
if not budget["ok"]:
|
||||||
|
body["status"] = "budget_exhausted"
|
||||||
|
# Rubber-stamp warning (Vida)
|
||||||
|
if approval_rate and approval_rate["domain_rate"] is not None and approval_rate["domain_rate"] > 0.95:
|
||||||
|
body["metabolic"]["warning"] = "domain approval rate >95% — possible rubber-stamping"
|
||||||
|
|
||||||
|
status_code = 200 if body["status"] == "healthy" else 503
|
||||||
|
return web.json_response(body, status=status_code)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_costs(request):
|
||||||
|
"""GET /costs — daily cost breakdown."""
|
||||||
|
conn = _conn(request)
|
||||||
|
day = request.query.get("date", date.today().isoformat())
|
||||||
|
breakdown = costs.get_daily_breakdown(conn, day)
|
||||||
|
budget = costs.check_budget(conn)
|
||||||
|
return web.json_response({"date": day, "budget": budget, "breakdown": breakdown})
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_sources(request):
|
||||||
|
"""GET /sources — source pipeline status."""
|
||||||
|
conn = _conn(request)
|
||||||
|
status_filter = request.query.get("status")
|
||||||
|
if status_filter:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT path, status, priority, claims_count, transient_retries, substantive_retries, updated_at FROM sources WHERE status = ? ORDER BY updated_at DESC LIMIT 50",
|
||||||
|
(status_filter,),
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT path, status, priority, claims_count, transient_retries, substantive_retries, updated_at FROM sources ORDER BY updated_at DESC LIMIT 50"
|
||||||
|
).fetchall()
|
||||||
|
return web.json_response({"sources": [dict(r) for r in rows]})
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_prs(request):
|
||||||
|
"""GET /prs — PR pipeline status."""
|
||||||
|
conn = _conn(request)
|
||||||
|
status_filter = request.query.get("status")
|
||||||
|
if status_filter:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT number, source_path, status, domain, tier, leo_verdict, domain_verdict, transient_retries, substantive_retries FROM prs WHERE status = ? ORDER BY number DESC LIMIT 50",
|
||||||
|
(status_filter,),
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT number, source_path, status, domain, tier, leo_verdict, domain_verdict, transient_retries, substantive_retries FROM prs ORDER BY number DESC LIMIT 50"
|
||||||
|
).fetchall()
|
||||||
|
return web.json_response({"prs": [dict(r) for r in rows]})
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_breakers(request):
|
||||||
|
"""GET /breakers — circuit breaker states."""
|
||||||
|
conn = _conn(request)
|
||||||
|
rows = conn.execute("SELECT * FROM circuit_breakers").fetchall()
|
||||||
|
return web.json_response({"breakers": [dict(r) for r in rows]})
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_calibration(request):
|
||||||
|
"""GET /calibration — priority calibration analysis (Vida)."""
|
||||||
|
conn = _conn(request)
|
||||||
|
# Find sources where eval disagreed with ingest priority
|
||||||
|
# Focus on upgrades (Theseus: upgrades are the learnable signal)
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT path, priority, priority_log FROM sources
|
||||||
|
WHERE json_array_length(priority_log) >= 2"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
upgrades = []
|
||||||
|
downgrades = []
|
||||||
|
for r in rows:
|
||||||
|
import json
|
||||||
|
|
||||||
|
log = json.loads(r["priority_log"] or "[]")
|
||||||
|
if len(log) < 2:
|
||||||
|
continue
|
||||||
|
first = log[0]["priority"]
|
||||||
|
last = log[-1]["priority"]
|
||||||
|
levels = {"critical": 4, "high": 3, "medium": 2, "low": 1, "skip": 0}
|
||||||
|
if levels.get(last, 2) > levels.get(first, 2):
|
||||||
|
upgrades.append({"path": r["path"], "from": first, "to": last})
|
||||||
|
elif levels.get(last, 2) < levels.get(first, 2):
|
||||||
|
downgrades.append({"path": r["path"], "from": first, "to": last})
|
||||||
|
|
||||||
|
return web.json_response(
|
||||||
|
{
|
||||||
|
"upgrades": upgrades[:20],
|
||||||
|
"downgrades_count": len(downgrades),
|
||||||
|
"upgrades_count": len(upgrades),
|
||||||
|
"note": "Focus on upgrades — downgrades are expected (downstream has more context)",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_metrics(request):
|
||||||
|
"""GET /metrics — operational health metrics (Rhea).
|
||||||
|
|
||||||
|
Leo's three numbers plus rejection reasons, time-to-merge, and fix effectiveness.
|
||||||
|
Data from audit_log + prs tables. Curl-friendly JSON.
|
||||||
|
"""
|
||||||
|
conn = _conn(request)
|
||||||
|
|
||||||
|
# --- 1. Throughput: PRs processed in last hour ---
|
||||||
|
throughput = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as n FROM audit_log
|
||||||
|
WHERE timestamp > datetime('now', '-1 hour')
|
||||||
|
AND event IN ('approved', 'changes_requested', 'merged')"""
|
||||||
|
).fetchone()
|
||||||
|
prs_per_hour = throughput["n"] if throughput else 0
|
||||||
|
|
||||||
|
# --- 2. Approval rate (24h) ---
|
||||||
|
verdicts_24h = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
SUM(CASE WHEN status = 'merged' THEN 1 ELSE 0 END) as merged,
|
||||||
|
SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END) as approved,
|
||||||
|
SUM(CASE WHEN status = 'closed' THEN 1 ELSE 0 END) as closed
|
||||||
|
FROM prs
|
||||||
|
WHERE last_attempt > datetime('now', '-24 hours')"""
|
||||||
|
).fetchone()
|
||||||
|
total_24h = verdicts_24h["total"] if verdicts_24h else 0
|
||||||
|
passed_24h = (verdicts_24h["merged"] or 0) + (verdicts_24h["approved"] or 0)
|
||||||
|
approval_rate_24h = round(passed_24h / total_24h, 3) if total_24h > 0 else None
|
||||||
|
|
||||||
|
# --- 3. Backlog depth by status ---
|
||||||
|
backlog_rows = conn.execute(
|
||||||
|
"SELECT status, COUNT(*) as n FROM prs GROUP BY status"
|
||||||
|
).fetchall()
|
||||||
|
backlog = {r["status"]: r["n"] for r in backlog_rows}
|
||||||
|
|
||||||
|
# --- 4. Rejection reasons (top 10) ---
|
||||||
|
issue_rows = conn.execute(
|
||||||
|
"""SELECT eval_issues FROM prs
|
||||||
|
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
|
||||||
|
AND last_attempt > datetime('now', '-24 hours')"""
|
||||||
|
).fetchall()
|
||||||
|
tag_counts: dict[str, int] = {}
|
||||||
|
for row in issue_rows:
|
||||||
|
try:
|
||||||
|
tags = json.loads(row["eval_issues"])
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
continue
|
||||||
|
for tag in tags:
|
||||||
|
if isinstance(tag, str):
|
||||||
|
tag_counts[tag] = tag_counts.get(tag, 0) + 1
|
||||||
|
rejection_reasons = sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:10]
|
||||||
|
|
||||||
|
# --- 5. Median time-to-merge (24h, in minutes) ---
|
||||||
|
merge_times = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
(julianday(merged_at) - julianday(created_at)) * 24 * 60 as minutes
|
||||||
|
FROM prs
|
||||||
|
WHERE merged_at IS NOT NULL
|
||||||
|
AND merged_at > datetime('now', '-24 hours')"""
|
||||||
|
).fetchall()
|
||||||
|
durations = [r["minutes"] for r in merge_times if r["minutes"] is not None and r["minutes"] > 0]
|
||||||
|
median_ttm_minutes = round(statistics.median(durations), 1) if durations else None
|
||||||
|
|
||||||
|
# --- 6. Fix cycle effectiveness ---
|
||||||
|
fix_stats = conn.execute(
|
||||||
|
"""SELECT
|
||||||
|
COUNT(*) as attempted,
|
||||||
|
SUM(CASE WHEN status IN ('merged', 'approved') THEN 1 ELSE 0 END) as succeeded
|
||||||
|
FROM prs
|
||||||
|
WHERE fix_attempts > 0"""
|
||||||
|
).fetchone()
|
||||||
|
fix_attempted = fix_stats["attempted"] if fix_stats else 0
|
||||||
|
fix_succeeded = fix_stats["succeeded"] or 0 if fix_stats else 0
|
||||||
|
fix_rate = round(fix_succeeded / fix_attempted, 3) if fix_attempted > 0 else None
|
||||||
|
|
||||||
|
# --- 7. Cost summary (today) ---
|
||||||
|
budget = costs.check_budget(conn)
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"throughput_prs_per_hour": prs_per_hour,
|
||||||
|
"approval_rate_24h": approval_rate_24h,
|
||||||
|
"backlog": backlog,
|
||||||
|
"rejection_reasons_24h": [{"tag": t, "count": c} for t, c in rejection_reasons],
|
||||||
|
"median_time_to_merge_minutes_24h": median_ttm_minutes,
|
||||||
|
"fix_cycle": {
|
||||||
|
"attempted": fix_attempted,
|
||||||
|
"succeeded": fix_succeeded,
|
||||||
|
"success_rate": fix_rate,
|
||||||
|
},
|
||||||
|
"cost_today": budget,
|
||||||
|
"prs_with_merge_times_24h": len(durations),
|
||||||
|
"prs_evaluated_24h": total_24h,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def pr_status(conn, pr_number: int | None = None, branch: str | None = None) -> dict:
|
||||||
|
"""Get PR status for agent consumption.
|
||||||
|
|
||||||
|
Look up by PR number or branch name. Returns state, eval verdicts,
|
||||||
|
merge status, time in queue, and rejection reasons.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
conn: SQLite connection with row_factory=sqlite3.Row
|
||||||
|
pr_number: PR number to look up
|
||||||
|
branch: Branch name to look up (fallback if no pr_number)
|
||||||
|
|
||||||
|
Returns dict with PR state or {"error": "not_found"}.
|
||||||
|
"""
|
||||||
|
if pr_number is not None:
|
||||||
|
row = conn.execute(
|
||||||
|
"""SELECT number, branch, source_path, status, domain, agent,
|
||||||
|
commit_type, tier, leo_verdict, domain_verdict,
|
||||||
|
domain_agent, eval_issues, priority, origin,
|
||||||
|
cost_usd, created_at, merged_at, last_attempt, last_error,
|
||||||
|
transient_retries, substantive_retries, description
|
||||||
|
FROM prs WHERE number = ?""",
|
||||||
|
(pr_number,),
|
||||||
|
).fetchone()
|
||||||
|
elif branch:
|
||||||
|
row = conn.execute(
|
||||||
|
"""SELECT number, branch, source_path, status, domain, agent,
|
||||||
|
commit_type, tier, leo_verdict, domain_verdict,
|
||||||
|
domain_agent, eval_issues, priority, origin,
|
||||||
|
cost_usd, created_at, merged_at, last_attempt, last_error,
|
||||||
|
transient_retries, substantive_retries, description
|
||||||
|
FROM prs WHERE branch = ?
|
||||||
|
ORDER BY number DESC LIMIT 1""",
|
||||||
|
(branch,),
|
||||||
|
).fetchone()
|
||||||
|
else:
|
||||||
|
return {"error": "pr_number or branch required"}
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
return {"error": "not_found"}
|
||||||
|
|
||||||
|
# Parse eval issues
|
||||||
|
issues = []
|
||||||
|
try:
|
||||||
|
issues = json.loads(row["eval_issues"] or "[]")
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Time in queue (created → now or merged)
|
||||||
|
time_in_queue_minutes = None
|
||||||
|
if row["created_at"]:
|
||||||
|
try:
|
||||||
|
created = datetime.fromisoformat(row["created_at"])
|
||||||
|
if created.tzinfo is None:
|
||||||
|
created = created.replace(tzinfo=timezone.utc)
|
||||||
|
if row["merged_at"]:
|
||||||
|
end = datetime.fromisoformat(row["merged_at"])
|
||||||
|
if end.tzinfo is None:
|
||||||
|
end = end.replace(tzinfo=timezone.utc)
|
||||||
|
else:
|
||||||
|
end = datetime.now(timezone.utc)
|
||||||
|
time_in_queue_minutes = round((end - created).total_seconds() / 60, 1)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
|
"pr": row["number"],
|
||||||
|
"branch": row["branch"],
|
||||||
|
"source": row["source_path"],
|
||||||
|
"status": row["status"],
|
||||||
|
"domain": row["domain"],
|
||||||
|
"agent": row["agent"],
|
||||||
|
"commit_type": row["commit_type"],
|
||||||
|
"tier": row["tier"],
|
||||||
|
"leo_verdict": row["leo_verdict"],
|
||||||
|
"domain_verdict": row["domain_verdict"],
|
||||||
|
"domain_agent": row["domain_agent"],
|
||||||
|
"eval_issues": issues,
|
||||||
|
"priority": row["priority"],
|
||||||
|
"origin": row["origin"],
|
||||||
|
"cost_usd": row["cost_usd"],
|
||||||
|
"created_at": row["created_at"],
|
||||||
|
"merged_at": row["merged_at"],
|
||||||
|
"last_attempt": row["last_attempt"],
|
||||||
|
"last_error": row["last_error"],
|
||||||
|
"retries": {
|
||||||
|
"transient": row["transient_retries"],
|
||||||
|
"substantive": row["substantive_retries"],
|
||||||
|
},
|
||||||
|
"description": row["description"],
|
||||||
|
"time_in_queue_minutes": time_in_queue_minutes,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_pr_status(request):
|
||||||
|
"""GET /pr/{number} — single PR status for agent consumption."""
|
||||||
|
conn = _conn(request)
|
||||||
|
try:
|
||||||
|
pr_number = int(request.match_info["number"])
|
||||||
|
except (KeyError, ValueError):
|
||||||
|
return web.json_response({"error": "invalid pr number"}, status=400)
|
||||||
|
result = pr_status(conn, pr_number=pr_number)
|
||||||
|
status_code = 200 if "error" not in result else 404
|
||||||
|
return web.json_response(result, status=status_code)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_check_duplicate(request):
|
||||||
|
"""GET /check-duplicate?text=...&domain=... — near-duplicate detection."""
|
||||||
|
text = request.query.get("text", "")
|
||||||
|
if not text:
|
||||||
|
return web.json_response({"error": "text parameter required"}, status=400)
|
||||||
|
domain = request.query.get("domain")
|
||||||
|
result = check_duplicate(text, domain=domain)
|
||||||
|
return web.json_response(result)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_activity(request):
|
||||||
|
"""GET /activity — condensed PR activity feed (Rhea).
|
||||||
|
|
||||||
|
Recent PR outcomes at a glance. Optional ?hours=N (default 1).
|
||||||
|
Summary line at top, then individual PRs sorted most-recent-first.
|
||||||
|
"""
|
||||||
|
conn = _conn(request)
|
||||||
|
hours = int(request.query.get("hours", "1"))
|
||||||
|
|
||||||
|
# Recent PRs with activity
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT number, source_path, domain, status, tier,
|
||||||
|
domain_verdict, leo_verdict, eval_issues,
|
||||||
|
eval_attempts, fix_attempts, last_attempt, merged_at
|
||||||
|
FROM prs
|
||||||
|
WHERE last_attempt > datetime('now', ? || ' hours')
|
||||||
|
ORDER BY last_attempt DESC
|
||||||
|
LIMIT 50""",
|
||||||
|
(f"-{hours}",),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Summary counts
|
||||||
|
counts: dict[str, int] = {}
|
||||||
|
prs = []
|
||||||
|
for r in rows:
|
||||||
|
s = r["status"]
|
||||||
|
counts[s] = counts.get(s, 0) + 1
|
||||||
|
|
||||||
|
# Parse issues
|
||||||
|
issues = []
|
||||||
|
try:
|
||||||
|
issues = json.loads(r["eval_issues"] or "[]")
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Build reviewer string
|
||||||
|
reviewers = []
|
||||||
|
if r["domain_verdict"] and r["domain_verdict"] != "pending":
|
||||||
|
reviewers.append(f"domain:{r['domain_verdict']}")
|
||||||
|
if r["leo_verdict"] and r["leo_verdict"] != "pending":
|
||||||
|
reviewers.append(f"leo:{r['leo_verdict']}")
|
||||||
|
|
||||||
|
# Time since last activity
|
||||||
|
age = ""
|
||||||
|
if r["last_attempt"]:
|
||||||
|
try:
|
||||||
|
last = datetime.fromisoformat(r["last_attempt"])
|
||||||
|
if last.tzinfo is None:
|
||||||
|
last = last.replace(tzinfo=timezone.utc)
|
||||||
|
delta = datetime.now(timezone.utc) - last
|
||||||
|
mins = int(delta.total_seconds() / 60)
|
||||||
|
age = f"{mins}m" if mins < 60 else f"{mins // 60}h{mins % 60}m"
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Source name — strip the long path prefix
|
||||||
|
source = r["source_path"] or ""
|
||||||
|
if "/" in source:
|
||||||
|
source = source.rsplit("/", 1)[-1]
|
||||||
|
if source.endswith(".md"):
|
||||||
|
source = source[:-3]
|
||||||
|
|
||||||
|
prs.append({
|
||||||
|
"pr": r["number"],
|
||||||
|
"source": source,
|
||||||
|
"domain": r["domain"],
|
||||||
|
"status": r["status"],
|
||||||
|
"tier": r["tier"],
|
||||||
|
"issues": issues if issues else None,
|
||||||
|
"reviewers": ", ".join(reviewers) if reviewers else None,
|
||||||
|
"fixes": r["fix_attempts"] if r["fix_attempts"] else None,
|
||||||
|
"age": age,
|
||||||
|
})
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"window": f"{hours}h",
|
||||||
|
"summary": counts,
|
||||||
|
"prs": prs,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_contributor(request):
|
||||||
|
"""GET /contributor/{handle} — contributor profile. ?detail=card|summary|full"""
|
||||||
|
conn = _conn(request)
|
||||||
|
handle = request.match_info["handle"].lower().lstrip("@")
|
||||||
|
detail = request.query.get("detail", "card")
|
||||||
|
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM contributors WHERE handle = ?", (handle,)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
return web.json_response({"error": f"contributor '{handle}' not found"}, status=404)
|
||||||
|
|
||||||
|
# Card (~50 tokens)
|
||||||
|
card = {
|
||||||
|
"handle": row["handle"],
|
||||||
|
"tier": row["tier"],
|
||||||
|
"claims_merged": row["claims_merged"] or 0,
|
||||||
|
"domains": json.loads(row["domains"]) if row["domains"] else [],
|
||||||
|
"last_contribution": row["last_contribution"],
|
||||||
|
}
|
||||||
|
|
||||||
|
if detail == "card":
|
||||||
|
return web.json_response(card)
|
||||||
|
|
||||||
|
# Summary (~200 tokens) — add role counts + CI
|
||||||
|
roles = {
|
||||||
|
"sourcer": row["sourcer_count"] or 0,
|
||||||
|
"extractor": row["extractor_count"] or 0,
|
||||||
|
"challenger": row["challenger_count"] or 0,
|
||||||
|
"synthesizer": row["synthesizer_count"] or 0,
|
||||||
|
"reviewer": row["reviewer_count"] or 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Compute CI from role counts × weights
|
||||||
|
ci_components = {}
|
||||||
|
ci_total = 0.0
|
||||||
|
for role, count in roles.items():
|
||||||
|
weight = config.CONTRIBUTION_ROLE_WEIGHTS.get(role, 0)
|
||||||
|
score = round(count * weight, 2)
|
||||||
|
ci_components[role] = score
|
||||||
|
ci_total += score
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
**card,
|
||||||
|
"first_contribution": row["first_contribution"],
|
||||||
|
"agent_id": row["agent_id"],
|
||||||
|
"roles": roles,
|
||||||
|
"challenges_survived": row["challenges_survived"] or 0,
|
||||||
|
"highlights": json.loads(row["highlights"]) if row["highlights"] else [],
|
||||||
|
"ci": {
|
||||||
|
**ci_components,
|
||||||
|
"total": round(ci_total, 2),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if detail == "summary":
|
||||||
|
return web.json_response(summary)
|
||||||
|
|
||||||
|
# Full — add everything
|
||||||
|
full = {
|
||||||
|
**summary,
|
||||||
|
"identities": json.loads(row["identities"]) if row["identities"] else {},
|
||||||
|
"display_name": row["display_name"],
|
||||||
|
"created_at": row["created_at"],
|
||||||
|
"updated_at": row["updated_at"],
|
||||||
|
}
|
||||||
|
return web.json_response(full)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_contributors_list(request):
|
||||||
|
"""GET /contributors — list all contributors, sorted by CI."""
|
||||||
|
conn = _conn(request)
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT handle, tier, claims_merged, sourcer_count, extractor_count, "
|
||||||
|
"challenger_count, synthesizer_count, reviewer_count, last_contribution "
|
||||||
|
"FROM contributors ORDER BY claims_merged DESC"
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
contributors = []
|
||||||
|
for row in rows:
|
||||||
|
ci_total = sum(
|
||||||
|
(row[f"{role}_count"] or 0) * config.CONTRIBUTION_ROLE_WEIGHTS.get(role, 0)
|
||||||
|
for role in ("sourcer", "extractor", "challenger", "synthesizer", "reviewer")
|
||||||
|
)
|
||||||
|
contributors.append({
|
||||||
|
"handle": row["handle"],
|
||||||
|
"tier": row["tier"],
|
||||||
|
"claims_merged": row["claims_merged"] or 0,
|
||||||
|
"ci": round(ci_total, 2),
|
||||||
|
"last_contribution": row["last_contribution"],
|
||||||
|
})
|
||||||
|
|
||||||
|
return web.json_response({"contributors": contributors, "total": len(contributors)})
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_dashboard(request):
|
||||||
|
"""GET /dashboard — human-readable HTML metrics page."""
|
||||||
|
conn = _conn(request)
|
||||||
|
|
||||||
|
# Gather same data as /metrics
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
today_str = now.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
statuses = conn.execute("SELECT status, COUNT(*) as n FROM prs GROUP BY status").fetchall()
|
||||||
|
status_map = {r["status"]: r["n"] for r in statuses}
|
||||||
|
|
||||||
|
# Approval rate (24h)
|
||||||
|
evaluated = conn.execute(
|
||||||
|
"SELECT COUNT(*) as n FROM audit_log WHERE stage='evaluate' AND event IN ('approved','changes_requested','domain_rejected') AND timestamp > datetime('now','-24 hours')"
|
||||||
|
).fetchone()["n"]
|
||||||
|
approved = conn.execute(
|
||||||
|
"SELECT COUNT(*) as n FROM audit_log WHERE stage='evaluate' AND event='approved' AND timestamp > datetime('now','-24 hours')"
|
||||||
|
).fetchone()["n"]
|
||||||
|
approval_rate = round(approved / evaluated, 3) if evaluated else 0
|
||||||
|
|
||||||
|
# Throughput
|
||||||
|
merged_1h = conn.execute(
|
||||||
|
"SELECT COUNT(*) as n FROM prs WHERE merged_at > datetime('now','-1 hour')"
|
||||||
|
).fetchone()["n"]
|
||||||
|
|
||||||
|
# Rejection reasons
|
||||||
|
reasons = conn.execute(
|
||||||
|
"""SELECT value as tag, COUNT(*) as cnt
|
||||||
|
FROM audit_log, json_each(json_extract(detail, '$.issues'))
|
||||||
|
WHERE stage='evaluate' AND event IN ('changes_requested','domain_rejected','tier05_rejected')
|
||||||
|
AND timestamp > datetime('now','-24 hours')
|
||||||
|
GROUP BY tag ORDER BY cnt DESC LIMIT 10"""
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Fix cycle
|
||||||
|
fix_attempted = conn.execute(
|
||||||
|
"SELECT COUNT(*) as n FROM prs WHERE fix_attempts > 0"
|
||||||
|
).fetchone()["n"]
|
||||||
|
fix_succeeded = conn.execute(
|
||||||
|
"SELECT COUNT(*) as n FROM prs WHERE fix_attempts > 0 AND status = 'merged'"
|
||||||
|
).fetchone()["n"]
|
||||||
|
fix_rate = round(fix_succeeded / fix_attempted, 3) if fix_attempted else 0
|
||||||
|
|
||||||
|
# Build HTML
|
||||||
|
status_rows = "".join(
|
||||||
|
f"<tr><td>{s}</td><td><strong>{status_map.get(s, 0)}</strong></td></tr>"
|
||||||
|
for s in ["open", "merged", "closed", "approved", "conflict", "reviewing"]
|
||||||
|
if status_map.get(s, 0) > 0
|
||||||
|
)
|
||||||
|
|
||||||
|
reason_rows = "".join(
|
||||||
|
f"<tr><td>{r['tag']}</td><td>{r['cnt']}</td></tr>"
|
||||||
|
for r in reasons
|
||||||
|
)
|
||||||
|
|
||||||
|
html = f"""<!DOCTYPE html>
|
||||||
|
<html><head>
|
||||||
|
<meta charset="utf-8"><title>Pipeline Dashboard</title>
|
||||||
|
<meta http-equiv="refresh" content="30">
|
||||||
|
<style>
|
||||||
|
body {{ font-family: -apple-system, system-ui, sans-serif; max-width: 900px; margin: 40px auto; padding: 0 20px; background: #0d1117; color: #c9d1d9; }}
|
||||||
|
h1 {{ color: #58a6ff; margin-bottom: 5px; }}
|
||||||
|
.subtitle {{ color: #8b949e; margin-bottom: 30px; }}
|
||||||
|
.grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 16px; margin-bottom: 30px; }}
|
||||||
|
.card {{ background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 20px; }}
|
||||||
|
.card .label {{ color: #8b949e; font-size: 13px; text-transform: uppercase; letter-spacing: 0.5px; }}
|
||||||
|
.card .value {{ font-size: 32px; font-weight: 700; margin-top: 4px; }}
|
||||||
|
.green {{ color: #3fb950; }}
|
||||||
|
.yellow {{ color: #d29922; }}
|
||||||
|
.red {{ color: #f85149; }}
|
||||||
|
table {{ width: 100%; border-collapse: collapse; margin-top: 10px; }}
|
||||||
|
th, td {{ text-align: left; padding: 8px 12px; border-bottom: 1px solid #21262d; }}
|
||||||
|
th {{ color: #8b949e; font-size: 12px; text-transform: uppercase; }}
|
||||||
|
h2 {{ color: #58a6ff; margin-top: 30px; font-size: 16px; }}
|
||||||
|
</style>
|
||||||
|
</head><body>
|
||||||
|
<h1>Teleo Pipeline</h1>
|
||||||
|
<p class="subtitle">Auto-refreshes every 30s · {now.strftime("%Y-%m-%d %H:%M UTC")}</p>
|
||||||
|
|
||||||
|
<div class="grid">
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Throughput</div>
|
||||||
|
<div class="value">{merged_1h}<span style="font-size:16px;color:#8b949e">/hr</span></div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Approval Rate (24h)</div>
|
||||||
|
<div class="value {'green' if approval_rate > 0.3 else 'yellow' if approval_rate > 0.15 else 'red'}">{approval_rate:.1%}</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Open PRs</div>
|
||||||
|
<div class="value">{status_map.get('open', 0)}</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Merged</div>
|
||||||
|
<div class="value green">{status_map.get('merged', 0)}</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Fix Success</div>
|
||||||
|
<div class="value {'red' if fix_rate < 0.1 else 'yellow'}">{fix_rate:.1%}</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">Evaluated (24h)</div>
|
||||||
|
<div class="value">{evaluated}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h2>Backlog</h2>
|
||||||
|
<table>{status_rows}</table>
|
||||||
|
|
||||||
|
<h2>Top Rejection Reasons (24h)</h2>
|
||||||
|
<table><tr><th>Issue</th><th>Count</th></tr>{reason_rows}</table>
|
||||||
|
|
||||||
|
<p style="margin-top:40px;color:#484f58;font-size:12px;">
|
||||||
|
<a href="/metrics" style="color:#484f58;">JSON API</a> ·
|
||||||
|
<a href="/health" style="color:#484f58;">Health</a> ·
|
||||||
|
<a href="/activity" style="color:#484f58;">Activity</a>
|
||||||
|
</p>
|
||||||
|
</body></html>"""
|
||||||
|
|
||||||
|
return web.Response(text=html, content_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_feedback(request):
|
||||||
|
"""GET /feedback/{agent} — per-agent rejection patterns with actionable guidance.
|
||||||
|
|
||||||
|
Returns top rejection reasons, approval rate, and fix instructions.
|
||||||
|
Agents query this to learn from their mistakes. (Epimetheus)
|
||||||
|
|
||||||
|
Optional ?hours=N (default 168 = 7 days).
|
||||||
|
"""
|
||||||
|
conn = _conn(request)
|
||||||
|
agent = request.match_info["agent"]
|
||||||
|
hours = int(request.query.get("hours", "168"))
|
||||||
|
result = get_agent_error_patterns(conn, agent, hours)
|
||||||
|
return web.json_response(result)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_feedback_all(request):
|
||||||
|
"""GET /feedback — rejection patterns for all agents.
|
||||||
|
|
||||||
|
Optional ?hours=N (default 168 = 7 days).
|
||||||
|
"""
|
||||||
|
conn = _conn(request)
|
||||||
|
hours = int(request.query.get("hours", "168"))
|
||||||
|
result = get_all_agent_patterns(conn, hours)
|
||||||
|
return web.json_response(result)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_claim_index(request):
|
||||||
|
"""GET /claim-index — structured index of all KB claims.
|
||||||
|
|
||||||
|
Returns full claim index with titles, domains, confidence, wiki links,
|
||||||
|
incoming/outgoing counts, orphan ratio, cross-domain link count.
|
||||||
|
Consumed by Argus (dashboard), Vida (vital signs).
|
||||||
|
|
||||||
|
Also writes to disk for file-based consumers.
|
||||||
|
"""
|
||||||
|
repo_root = str(config.MAIN_WORKTREE)
|
||||||
|
index = build_claim_index(repo_root)
|
||||||
|
|
||||||
|
# Also write to disk (atomic)
|
||||||
|
try:
|
||||||
|
write_claim_index(repo_root)
|
||||||
|
except Exception:
|
||||||
|
pass # Non-fatal — API response is primary
|
||||||
|
|
||||||
|
return web.json_response(index)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_analytics_data(request):
|
||||||
|
"""GET /analytics/data — time-series snapshot history for Chart.js.
|
||||||
|
|
||||||
|
Returns snapshot array + version change annotations.
|
||||||
|
Optional ?days=N (default 7).
|
||||||
|
"""
|
||||||
|
conn = _conn(request)
|
||||||
|
days = int(request.query.get("days", "7"))
|
||||||
|
snapshots = get_snapshot_history(conn, days)
|
||||||
|
changes = get_version_changes(conn, days)
|
||||||
|
|
||||||
|
return web.json_response({
|
||||||
|
"snapshots": snapshots,
|
||||||
|
"version_changes": changes,
|
||||||
|
"days": days,
|
||||||
|
"count": len(snapshots),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def create_app() -> web.Application:
|
||||||
|
"""Create the health API application."""
|
||||||
|
app = web.Application()
|
||||||
|
# Persistent readonly connection — one connection, no churn (Ganymede)
|
||||||
|
app["db"] = db.get_connection(readonly=True)
|
||||||
|
app.router.add_get("/health", handle_health)
|
||||||
|
app.router.add_get("/costs", handle_costs)
|
||||||
|
app.router.add_get("/sources", handle_sources)
|
||||||
|
app.router.add_get("/prs", handle_prs)
|
||||||
|
app.router.add_get("/breakers", handle_breakers)
|
||||||
|
app.router.add_get("/metrics", handle_metrics)
|
||||||
|
app.router.add_get("/dashboard", handle_dashboard)
|
||||||
|
app.router.add_get("/contributor/{handle}", handle_contributor)
|
||||||
|
app.router.add_get("/contributors", handle_contributors_list)
|
||||||
|
app.router.add_get("/", handle_dashboard)
|
||||||
|
app.router.add_get("/activity", handle_activity)
|
||||||
|
app.router.add_get("/pr/{number}", handle_pr_status)
|
||||||
|
app.router.add_get("/check-duplicate", handle_check_duplicate)
|
||||||
|
app.router.add_get("/calibration", handle_calibration)
|
||||||
|
app.router.add_get("/feedback/{agent}", handle_feedback)
|
||||||
|
app.router.add_get("/feedback", handle_feedback_all)
|
||||||
|
app.router.add_get("/analytics/data", handle_analytics_data)
|
||||||
|
app.router.add_get("/claim-index", handle_claim_index)
|
||||||
|
app.on_cleanup.append(_cleanup)
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
async def _cleanup(app):
|
||||||
|
app["db"].close()
|
||||||
|
|
||||||
|
|
||||||
|
async def start_health_server(runner_ref: list):
|
||||||
|
"""Start the health HTTP server. Stores runner in runner_ref for shutdown."""
|
||||||
|
app = create_app()
|
||||||
|
runner = web.AppRunner(app)
|
||||||
|
await runner.setup()
|
||||||
|
# Bind to all interfaces — metrics are read-only, no sensitive data (Cory, Mar 14)
|
||||||
|
site = web.TCPSite(runner, "0.0.0.0", config.HEALTH_PORT)
|
||||||
|
await site.start()
|
||||||
|
runner_ref.append(runner)
|
||||||
|
logger.info("Health API listening on 0.0.0.0:%d", config.HEALTH_PORT)
|
||||||
|
|
||||||
|
|
||||||
|
async def stop_health_server(runner_ref: list):
|
||||||
|
"""Stop the health HTTP server."""
|
||||||
|
for runner in runner_ref:
|
||||||
|
await runner.cleanup()
|
||||||
|
logger.info("Health API stopped")
|
||||||
451
ops/pipeline-v2/lib/llm.py
Normal file
451
ops/pipeline-v2/lib/llm.py
Normal file
|
|
@ -0,0 +1,451 @@
|
||||||
|
"""LLM transport and review prompts — shared by all evaluation stages.
|
||||||
|
|
||||||
|
Extracted from evaluate.py (Phase 3c refactor). This module owns:
|
||||||
|
- Prompt templates (triage, domain, Leo)
|
||||||
|
- OpenRouter API transport
|
||||||
|
- Claude CLI transport with subprocess tracking
|
||||||
|
- Review runner functions (triage, domain, Leo)
|
||||||
|
|
||||||
|
Orchestration (PR lifecycle, SQLite state, Forgejo posting) stays in evaluate.py.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.llm")
|
||||||
|
|
||||||
|
# Track active Claude CLI subprocesses for graceful shutdown (Ganymede #8)
|
||||||
|
_active_subprocesses: set = set()
|
||||||
|
|
||||||
|
|
||||||
|
async def kill_active_subprocesses():
|
||||||
|
"""Kill all tracked Claude CLI subprocesses. Called during graceful shutdown."""
|
||||||
|
for proc in list(_active_subprocesses):
|
||||||
|
if proc.returncode is None:
|
||||||
|
logger.warning("Killing lingering Claude CLI subprocess PID %d", proc.pid)
|
||||||
|
try:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
_active_subprocesses.clear()
|
||||||
|
|
||||||
|
|
||||||
|
REVIEW_STYLE_GUIDE = (
|
||||||
|
"You MUST show your work. For each criterion, write one sentence with your finding. "
|
||||||
|
"Do not summarize what the PR does — evaluate it. "
|
||||||
|
"If a criterion passes, say what you checked and why it passes. "
|
||||||
|
"If a criterion fails, explain the specific problem. "
|
||||||
|
"Responses like 'Everything passes' with no evidence of checking will be treated as review failures. "
|
||||||
|
"Be concise but substantive — one sentence per criterion, not one sentence total."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Prompt templates ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
TRIAGE_PROMPT = """Classify this pull request diff into exactly one tier: DEEP, STANDARD, or LIGHT.
|
||||||
|
|
||||||
|
DEEP — use ONLY when the PR could change the knowledge graph structure:
|
||||||
|
- PR modifies files in core/ or foundations/ (structural KB changes)
|
||||||
|
- PR challenges an existing claim (has "challenged_by" field or explicitly argues against an existing claim)
|
||||||
|
- PR modifies axiom-level beliefs in agents/*/beliefs.md
|
||||||
|
- PR is a cross-domain synthesis claim that draws conclusions across 2+ domains
|
||||||
|
|
||||||
|
DEEP is rare — most new claims are STANDARD even if they have high confidence or cross-domain wiki links. Adding a new "likely" claim about futarchy is STANDARD. Arguing that an existing claim is wrong is DEEP.
|
||||||
|
|
||||||
|
STANDARD — the DEFAULT for most PRs:
|
||||||
|
- New claims in any domain at any confidence level
|
||||||
|
- Enrichments to existing claims (adding evidence, extending arguments)
|
||||||
|
- New hypothesis-level beliefs
|
||||||
|
- Source archives with extraction results
|
||||||
|
- Claims with cross-domain wiki links (this is normal, not exceptional)
|
||||||
|
|
||||||
|
LIGHT — use ONLY when ALL changes fit these categories:
|
||||||
|
- Entity attribute updates (factual corrections, new data points)
|
||||||
|
- Source archiving without extraction
|
||||||
|
- Formatting fixes, typo corrections
|
||||||
|
- Status field changes
|
||||||
|
|
||||||
|
IMPORTANT: When uncertain between DEEP and STANDARD, choose STANDARD. Most claims are STANDARD. DEEP is reserved for structural changes to the knowledge base, not for complex or important-sounding claims.
|
||||||
|
|
||||||
|
Respond with ONLY the tier name (DEEP, STANDARD, or LIGHT) on the first line, followed by a one-line reason on the second line.
|
||||||
|
|
||||||
|
--- PR DIFF ---
|
||||||
|
{diff}"""
|
||||||
|
|
||||||
|
DOMAIN_PROMPT = """You are {agent}, the {domain} domain expert for TeleoHumanity's knowledge base.
|
||||||
|
|
||||||
|
IMPORTANT — This PR may contain different content types:
|
||||||
|
- **Claims** (type: claim): arguable assertions with confidence levels. Review fully.
|
||||||
|
- **Entities** (type: entity, files in entities/): descriptive records of projects, people, protocols. Do NOT reject entities for missing confidence or source fields — they have a different schema.
|
||||||
|
- **Sources** (files in inbox/): archive metadata. Auto-approve these.
|
||||||
|
|
||||||
|
Review this PR. For EACH criterion below, write one sentence stating what you found:
|
||||||
|
|
||||||
|
1. **Factual accuracy** — Are the claims/entities factually correct? Name any specific errors.
|
||||||
|
2. **Intra-PR duplicates** — Do multiple changes in THIS PR add the same evidence to different claims with near-identical wording? Only flag if the same paragraph of evidence is copy-pasted across files. Shared entity files (like metadao.md or futardio.md) appearing in multiple PRs are NOT duplicates — they are expected enrichments.
|
||||||
|
3. **Confidence calibration** — For claims only. Is the confidence level right for the evidence? Entities don't have confidence levels.
|
||||||
|
4. **Wiki links** — Note any broken [[wiki links]], but do NOT let them affect your verdict. Broken links are expected — linked claims often exist in other open PRs that haven't merged yet. ALWAYS APPROVE even if wiki links are broken.
|
||||||
|
|
||||||
|
VERDICT RULES — read carefully:
|
||||||
|
- APPROVE if claims are factually correct and evidence supports them, even if minor improvements are possible.
|
||||||
|
- APPROVE entity files (type: entity) unless they contain factual errors.
|
||||||
|
- APPROVE even if wiki links are broken — this is NEVER a reason to REQUEST_CHANGES.
|
||||||
|
- REQUEST_CHANGES only for these BLOCKING issues: factual errors, copy-pasted duplicate evidence, or confidence that is clearly wrong (e.g. "proven" with no evidence).
|
||||||
|
- If the ONLY issues you find are broken wiki links: you MUST APPROVE.
|
||||||
|
- Do NOT invent problems. If a criterion passes, say it passes.
|
||||||
|
|
||||||
|
{style_guide}
|
||||||
|
|
||||||
|
If requesting changes, tag the specific issues using ONLY these tags (do not invent new tags):
|
||||||
|
<!-- ISSUES: tag1, tag2 -->
|
||||||
|
|
||||||
|
Valid tags: frontmatter_schema, title_overclaims, confidence_miscalibration, date_errors, factual_discrepancy, near_duplicate, scope_error
|
||||||
|
|
||||||
|
End your review with exactly one of:
|
||||||
|
<!-- VERDICT:{agent_upper}:APPROVE -->
|
||||||
|
<!-- VERDICT:{agent_upper}:REQUEST_CHANGES -->
|
||||||
|
|
||||||
|
--- PR DIFF ---
|
||||||
|
{diff}
|
||||||
|
|
||||||
|
--- CHANGED FILES ---
|
||||||
|
{files}"""
|
||||||
|
|
||||||
|
LEO_PROMPT_STANDARD = """You are Leo, the lead evaluator for TeleoHumanity's knowledge base.
|
||||||
|
|
||||||
|
IMPORTANT — Content types have DIFFERENT schemas:
|
||||||
|
- **Claims** (type: claim): require type, domain, confidence, source, created, description. Title must be a prose proposition.
|
||||||
|
- **Entities** (type: entity, files in entities/): require ONLY type, domain, description. NO confidence, NO source, NO created date. Short filenames like "metadao.md" are correct — entities are NOT claims.
|
||||||
|
- **Sources** (files in inbox/): different schema entirely. Do NOT flag sources for missing claim fields.
|
||||||
|
|
||||||
|
Do NOT flag entity files for missing confidence, source, or created fields. Do NOT flag entity filenames for being too short or not prose propositions. These are different content types with different rules.
|
||||||
|
|
||||||
|
Review this PR. For EACH criterion below, write one sentence stating what you found:
|
||||||
|
|
||||||
|
1. **Schema** — Does each file have valid frontmatter FOR ITS TYPE? (Claims need full schema. Entities need only type+domain+description.)
|
||||||
|
2. **Duplicate/redundancy** — Do multiple enrichments in this PR inject the same evidence into different claims? Is the enrichment actually new vs already present in the claim?
|
||||||
|
3. **Confidence** — For claims only: name the confidence level. Does the evidence justify it?
|
||||||
|
4. **Wiki links** — Note any broken [[links]], but do NOT let them affect your verdict. Broken links are expected — linked claims often exist in other open PRs. ALWAYS APPROVE even if wiki links are broken.
|
||||||
|
5. **Source quality** — Is the source credible for this claim?
|
||||||
|
6. **Specificity** — For claims only: could someone disagree? If it's too vague to be wrong, flag it.
|
||||||
|
|
||||||
|
VERDICT: APPROVE if the claims are factually correct and evidence supports them. Broken wiki links are NEVER a reason to REQUEST_CHANGES. If broken links are the ONLY issue, you MUST APPROVE.
|
||||||
|
|
||||||
|
{style_guide}
|
||||||
|
|
||||||
|
If requesting changes, tag the specific issues using ONLY these tags (do not invent new tags):
|
||||||
|
<!-- ISSUES: tag1, tag2 -->
|
||||||
|
|
||||||
|
Valid tags: frontmatter_schema, title_overclaims, confidence_miscalibration, date_errors, factual_discrepancy, near_duplicate, scope_error
|
||||||
|
|
||||||
|
End your review with exactly one of:
|
||||||
|
<!-- VERDICT:LEO:APPROVE -->
|
||||||
|
<!-- VERDICT:LEO:REQUEST_CHANGES -->
|
||||||
|
|
||||||
|
--- PR DIFF ---
|
||||||
|
{diff}
|
||||||
|
|
||||||
|
--- CHANGED FILES ---
|
||||||
|
{files}"""
|
||||||
|
|
||||||
|
LEO_PROMPT_DEEP = """You are Leo, the lead evaluator for TeleoHumanity's knowledge base.
|
||||||
|
|
||||||
|
Review this PR with MAXIMUM scrutiny. This PR may trigger belief cascades. Check:
|
||||||
|
1. Cross-domain implications — does this claim affect beliefs in other domains?
|
||||||
|
2. Confidence calibration — is the confidence level justified by the evidence?
|
||||||
|
3. Contradiction check — does this contradict any existing claims without explicit argument?
|
||||||
|
4. Wiki link validity — note any broken links, but do NOT let them affect your verdict. Broken links are expected (linked claims may be in other PRs). NEVER REQUEST_CHANGES for broken wiki links alone.
|
||||||
|
5. Axiom integrity — if touching axiom-level beliefs, is the justification extraordinary?
|
||||||
|
6. Source quality — is the source credible for the claim being made?
|
||||||
|
7. Duplicate check — does a substantially similar claim already exist?
|
||||||
|
8. Enrichment vs new claim — should this be an enrichment to an existing claim instead?
|
||||||
|
9. Domain assignment — is the claim in the correct domain?
|
||||||
|
10. Schema compliance — YAML frontmatter, prose-as-title format, required fields
|
||||||
|
11. Epistemic hygiene — is the claim specific enough to be wrong?
|
||||||
|
|
||||||
|
{style_guide}
|
||||||
|
|
||||||
|
If requesting changes, tag the specific issues using ONLY these tags (do not invent new tags):
|
||||||
|
<!-- ISSUES: tag1, tag2 -->
|
||||||
|
|
||||||
|
Valid tags: frontmatter_schema, title_overclaims, confidence_miscalibration, date_errors, factual_discrepancy, near_duplicate, scope_error
|
||||||
|
|
||||||
|
End your review with exactly one of:
|
||||||
|
<!-- VERDICT:LEO:APPROVE -->
|
||||||
|
<!-- VERDICT:LEO:REQUEST_CHANGES -->
|
||||||
|
|
||||||
|
--- PR DIFF ---
|
||||||
|
{diff}
|
||||||
|
|
||||||
|
--- CHANGED FILES ---
|
||||||
|
{files}"""
|
||||||
|
|
||||||
|
|
||||||
|
BATCH_DOMAIN_PROMPT = """You are {agent}, the {domain} domain expert for TeleoHumanity's knowledge base.
|
||||||
|
|
||||||
|
You are reviewing {n_prs} PRs in a single batch. For EACH PR, apply all criteria INDEPENDENTLY. Do not mix content between PRs. Each PR is a separate evaluation.
|
||||||
|
|
||||||
|
For EACH PR, check these criteria (one sentence each):
|
||||||
|
|
||||||
|
1. **Factual accuracy** — Are the claims factually correct? Name any specific errors.
|
||||||
|
2. **Intra-PR duplicates** — Do multiple changes in THIS PR add the same evidence to different claims with near-identical wording?
|
||||||
|
3. **Confidence calibration** — Is the confidence level right for the evidence provided?
|
||||||
|
4. **Wiki links** — Do [[wiki links]] in the diff reference files that exist?
|
||||||
|
|
||||||
|
VERDICT RULES — read carefully:
|
||||||
|
- APPROVE if claims are factually correct and evidence supports them, even if minor improvements are possible.
|
||||||
|
- REQUEST_CHANGES only for BLOCKING issues: factual errors, genuinely broken wiki links, copy-pasted duplicate evidence across files, or confidence that is clearly wrong.
|
||||||
|
- Missing context, style preferences, and "could be better" observations are NOT blocking. Note them but still APPROVE.
|
||||||
|
- Do NOT invent problems. If a criterion passes, say it passes.
|
||||||
|
|
||||||
|
{style_guide}
|
||||||
|
|
||||||
|
For EACH PR, write your full review, then end that PR's section with the verdict tag.
|
||||||
|
If requesting changes, tag the specific issues:
|
||||||
|
<!-- ISSUES: tag1, tag2 -->
|
||||||
|
|
||||||
|
Valid tags: frontmatter_schema, title_overclaims, confidence_miscalibration, date_errors, factual_discrepancy, near_duplicate, scope_error
|
||||||
|
|
||||||
|
{pr_sections}
|
||||||
|
|
||||||
|
IMPORTANT: You MUST provide a verdict for every PR listed above. For each PR, end with exactly one of:
|
||||||
|
<!-- PR:NUMBER VERDICT:{agent_upper}:APPROVE -->
|
||||||
|
<!-- PR:NUMBER VERDICT:{agent_upper}:REQUEST_CHANGES -->
|
||||||
|
where NUMBER is the PR number shown in the section header."""
|
||||||
|
|
||||||
|
|
||||||
|
# ─── API helpers ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def openrouter_call(
|
||||||
|
model: str, prompt: str, timeout_sec: int = 120, max_tokens: int = 4096,
|
||||||
|
) -> tuple[str | None, dict]:
|
||||||
|
"""Call OpenRouter API. Returns (response_text, usage_dict).
|
||||||
|
|
||||||
|
usage_dict has keys: prompt_tokens, completion_tokens (0 on failure).
|
||||||
|
"""
|
||||||
|
empty_usage = {"prompt_tokens": 0, "completion_tokens": 0}
|
||||||
|
key_file = config.SECRETS_DIR / "openrouter-key"
|
||||||
|
if not key_file.exists():
|
||||||
|
logger.error("OpenRouter key file not found")
|
||||||
|
return None, empty_usage
|
||||||
|
key = key_file.read_text().strip()
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": model,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"temperature": 0.2,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.post(
|
||||||
|
config.OPENROUTER_URL,
|
||||||
|
headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
|
||||||
|
json=payload,
|
||||||
|
timeout=aiohttp.ClientTimeout(total=timeout_sec),
|
||||||
|
) as resp:
|
||||||
|
if resp.status >= 400:
|
||||||
|
text = await resp.text()
|
||||||
|
logger.error("OpenRouter %s → %d: %s", model, resp.status, text[:200])
|
||||||
|
return None, empty_usage
|
||||||
|
data = await resp.json()
|
||||||
|
usage = data.get("usage", empty_usage)
|
||||||
|
content = data.get("choices", [{}])[0].get("message", {}).get("content")
|
||||||
|
return content, usage
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("OpenRouter error: %s → %s", model, e)
|
||||||
|
return None, empty_usage
|
||||||
|
|
||||||
|
|
||||||
|
async def claude_cli_call(model: str, prompt: str, timeout_sec: int = 600, cwd: str = None) -> tuple[str | None, dict]:
|
||||||
|
"""Call Claude via CLI (Claude Max subscription). Returns (response, usage).
|
||||||
|
|
||||||
|
Uses --output-format json to capture token usage. Subscription calls cost $0
|
||||||
|
but tokens are tracked for compute metrics (Cory: capture tokens/time, note subscription).
|
||||||
|
"""
|
||||||
|
empty_usage = {
|
||||||
|
"prompt_tokens": 0, "completion_tokens": 0,
|
||||||
|
"cache_read_tokens": 0, "cache_write_tokens": 0,
|
||||||
|
"duration_ms": 0, "duration_api_ms": 0,
|
||||||
|
"cost_estimate_usd": 0.0,
|
||||||
|
"stop_reason": "", "num_turns": 0,
|
||||||
|
"service_tier": "", "speed": "",
|
||||||
|
}
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
str(config.CLAUDE_CLI),
|
||||||
|
"-p",
|
||||||
|
"--model",
|
||||||
|
model,
|
||||||
|
"--output-format",
|
||||||
|
"json",
|
||||||
|
cwd=cwd or str(config.REPO_DIR),
|
||||||
|
stdin=asyncio.subprocess.PIPE,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
_active_subprocesses.add(proc) # Track for graceful shutdown (Ganymede #8)
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(
|
||||||
|
proc.communicate(input=prompt.encode()),
|
||||||
|
timeout=timeout_sec,
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
logger.error("Claude CLI timed out after %ds", timeout_sec)
|
||||||
|
return None, empty_usage
|
||||||
|
finally:
|
||||||
|
_active_subprocesses.discard(proc)
|
||||||
|
|
||||||
|
out_text = (stdout or b"").decode()
|
||||||
|
err_text = (stderr or b"").decode()
|
||||||
|
|
||||||
|
# Check for rate limit REGARDLESS of exit code — CLI sometimes exits 0 with limit message
|
||||||
|
combined_lower = (out_text + err_text).lower()
|
||||||
|
if "hit your limit" in combined_lower or "rate limit" in combined_lower:
|
||||||
|
logger.warning("Claude Max rate limited (rc=%d, stdout: %s)", proc.returncode, out_text[:200])
|
||||||
|
return "RATE_LIMITED", empty_usage
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
logger.error("Claude CLI failed (rc=%d): stderr=%s stdout=%s", proc.returncode, err_text[:200], out_text[:200])
|
||||||
|
return None, empty_usage
|
||||||
|
|
||||||
|
# Parse JSON output to extract full usage telemetry
|
||||||
|
usage = empty_usage.copy()
|
||||||
|
try:
|
||||||
|
data = json.loads(out_text)
|
||||||
|
text = data.get("result", "")
|
||||||
|
raw_usage = data.get("usage", {})
|
||||||
|
usage = {
|
||||||
|
"prompt_tokens": raw_usage.get("input_tokens", 0),
|
||||||
|
"completion_tokens": raw_usage.get("output_tokens", 0),
|
||||||
|
"cache_read_tokens": raw_usage.get("cache_read_input_tokens", 0),
|
||||||
|
"cache_write_tokens": raw_usage.get("cache_creation_input_tokens", 0),
|
||||||
|
"duration_ms": data.get("duration_ms", 0),
|
||||||
|
"duration_api_ms": data.get("duration_api_ms", 0),
|
||||||
|
"cost_estimate_usd": data.get("total_cost_usd", 0.0),
|
||||||
|
"stop_reason": data.get("stop_reason", ""),
|
||||||
|
"num_turns": data.get("num_turns", 0),
|
||||||
|
"service_tier": raw_usage.get("service_tier", ""),
|
||||||
|
"speed": raw_usage.get("speed", ""),
|
||||||
|
}
|
||||||
|
except (json.JSONDecodeError, KeyError):
|
||||||
|
logger.warning("Claude CLI returned non-JSON output, token tracking unavailable")
|
||||||
|
text = out_text.strip()
|
||||||
|
|
||||||
|
return text, usage
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Review execution ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def triage_pr(diff: str) -> tuple[str, dict, str]:
|
||||||
|
"""Triage PR via Haiku → (tier, usage, reason). tier is DEEP/STANDARD/LIGHT."""
|
||||||
|
prompt = TRIAGE_PROMPT.format(diff=diff[:50000]) # Cap diff size for triage
|
||||||
|
result, usage = await openrouter_call(config.TRIAGE_MODEL, prompt, timeout_sec=30)
|
||||||
|
if not result:
|
||||||
|
logger.warning("Triage failed, defaulting to STANDARD")
|
||||||
|
return "STANDARD", usage, "triage failed, default"
|
||||||
|
|
||||||
|
tier = result.split("\n")[0].strip().upper()
|
||||||
|
if tier in ("DEEP", "STANDARD", "LIGHT"):
|
||||||
|
reason = result.split("\n")[1].strip() if "\n" in result else ""
|
||||||
|
logger.info("Triage: %s — %s", tier, reason[:100])
|
||||||
|
return tier, usage, reason[:500]
|
||||||
|
|
||||||
|
logger.warning("Triage returned unparseable '%s', defaulting to STANDARD", tier[:20])
|
||||||
|
return "STANDARD", usage, f"unparseable response, default (got: {tier[:20]})"
|
||||||
|
|
||||||
|
|
||||||
|
async def run_batch_domain_review(
|
||||||
|
pr_diffs: list[dict], domain: str, agent: str,
|
||||||
|
) -> tuple[str | None, dict]:
|
||||||
|
"""Run batched domain review for multiple PRs in one LLM call.
|
||||||
|
|
||||||
|
pr_diffs: list of {"number": int, "label": str, "diff": str, "files": str}
|
||||||
|
Returns (raw_response_text, usage) or (None, usage) on failure.
|
||||||
|
"""
|
||||||
|
# Build per-PR sections with anchoring labels
|
||||||
|
sections = []
|
||||||
|
for pr in pr_diffs:
|
||||||
|
sections.append(
|
||||||
|
f"=== PR #{pr['number']}: {pr['label']} ({pr['file_count']} files) ===\n"
|
||||||
|
f"--- PR DIFF ---\n{pr['diff']}\n\n"
|
||||||
|
f"--- CHANGED FILES ---\n{pr['files']}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = BATCH_DOMAIN_PROMPT.format(
|
||||||
|
agent=agent,
|
||||||
|
agent_upper=agent.upper(),
|
||||||
|
domain=domain,
|
||||||
|
n_prs=len(pr_diffs),
|
||||||
|
style_guide=REVIEW_STYLE_GUIDE,
|
||||||
|
pr_sections="\n".join(sections),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Scale max_tokens with batch size: ~3K tokens per PR review
|
||||||
|
max_tokens = min(3000 * len(pr_diffs), 16384)
|
||||||
|
result, usage = await openrouter_call(
|
||||||
|
config.EVAL_DOMAIN_MODEL, prompt,
|
||||||
|
timeout_sec=config.EVAL_TIMEOUT, max_tokens=max_tokens,
|
||||||
|
)
|
||||||
|
return result, usage
|
||||||
|
|
||||||
|
|
||||||
|
async def run_domain_review(diff: str, files: str, domain: str, agent: str) -> tuple[str | None, dict]:
|
||||||
|
"""Run domain review via OpenRouter.
|
||||||
|
|
||||||
|
Decoupled from Claude Max to avoid account-level rate limits blocking
|
||||||
|
domain reviews. Different model lineage also reduces correlated blind spots.
|
||||||
|
Returns (review_text, usage).
|
||||||
|
"""
|
||||||
|
prompt = DOMAIN_PROMPT.format(
|
||||||
|
agent=agent,
|
||||||
|
agent_upper=agent.upper(),
|
||||||
|
domain=domain,
|
||||||
|
style_guide=REVIEW_STYLE_GUIDE,
|
||||||
|
diff=diff,
|
||||||
|
files=files,
|
||||||
|
)
|
||||||
|
|
||||||
|
result, usage = await openrouter_call(config.EVAL_DOMAIN_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT)
|
||||||
|
return result, usage
|
||||||
|
|
||||||
|
|
||||||
|
async def run_leo_review(diff: str, files: str, tier: str) -> tuple[str | None, dict]:
|
||||||
|
"""Run Leo review. DEEP → Opus (Claude Max, queue if limited). STANDARD → GPT-4o (OpenRouter).
|
||||||
|
|
||||||
|
Opus is scarce — reserved for DEEP eval and overnight research sessions.
|
||||||
|
STANDARD goes straight to GPT-4o. Domain review is the primary gate;
|
||||||
|
Leo review is a quality check that doesn't need Opus for routine claims.
|
||||||
|
Returns (review_text, usage).
|
||||||
|
"""
|
||||||
|
prompt_template = LEO_PROMPT_DEEP if tier == "DEEP" else LEO_PROMPT_STANDARD
|
||||||
|
prompt = prompt_template.format(style_guide=REVIEW_STYLE_GUIDE, diff=diff, files=files)
|
||||||
|
|
||||||
|
if tier == "DEEP":
|
||||||
|
# Opus skipped — route all Leo reviews through Sonnet until backlog clears.
|
||||||
|
# Opus via Claude Max CLI is consistently unavailable (rate limited or hanging).
|
||||||
|
# Re-enable by removing this block and uncommenting the try-then-overflow below.
|
||||||
|
# (Cory, Mar 14: "yes lets skip opus")
|
||||||
|
#
|
||||||
|
# --- Re-enable Opus later (uses EVAL_TIMEOUT_OPUS for longer reasoning): ---
|
||||||
|
# result, usage = await claude_cli_call(config.EVAL_LEO_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT_OPUS)
|
||||||
|
# if result == "RATE_LIMITED" or result is None:
|
||||||
|
# logger.info("Opus unavailable for DEEP Leo review — overflowing to Sonnet")
|
||||||
|
# result, usage = await openrouter_call(config.EVAL_LEO_STANDARD_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT_OPUS)
|
||||||
|
# return result, usage
|
||||||
|
result, usage = await openrouter_call(config.EVAL_LEO_STANDARD_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT)
|
||||||
|
return result, usage
|
||||||
|
else:
|
||||||
|
# STANDARD/LIGHT: Sonnet via OpenRouter — 120s timeout (routine calls)
|
||||||
|
result, usage = await openrouter_call(config.EVAL_LEO_STANDARD_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT)
|
||||||
|
return result, usage
|
||||||
48
ops/pipeline-v2/lib/log.py
Normal file
48
ops/pipeline-v2/lib/log.py
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
"""Structured JSON logging with rotation."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import logging.handlers
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
|
||||||
|
|
||||||
|
class JSONFormatter(logging.Formatter):
|
||||||
|
"""Format log records as JSON lines."""
|
||||||
|
|
||||||
|
def format(self, record):
|
||||||
|
entry = {
|
||||||
|
"ts": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"level": record.levelname,
|
||||||
|
"logger": record.name,
|
||||||
|
"msg": record.getMessage(),
|
||||||
|
}
|
||||||
|
if record.exc_info and record.exc_info[0]:
|
||||||
|
entry["exception"] = self.formatException(record.exc_info)
|
||||||
|
# Include extra fields if present
|
||||||
|
for key in ("stage", "source", "pr", "model", "cost", "event"):
|
||||||
|
if hasattr(record, key):
|
||||||
|
entry[key] = getattr(record, key)
|
||||||
|
return json.dumps(entry)
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logging():
|
||||||
|
"""Configure structured JSON logging with rotation."""
|
||||||
|
config.LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
handler = logging.handlers.RotatingFileHandler(
|
||||||
|
str(config.LOG_FILE),
|
||||||
|
maxBytes=config.LOG_ROTATION_MAX_BYTES,
|
||||||
|
backupCount=config.LOG_ROTATION_BACKUP_COUNT,
|
||||||
|
)
|
||||||
|
handler.setFormatter(JSONFormatter())
|
||||||
|
|
||||||
|
# Also log to stderr for systemd journal
|
||||||
|
console = logging.StreamHandler()
|
||||||
|
console.setFormatter(logging.Formatter("%(name)s [%(levelname)s] %(message)s"))
|
||||||
|
|
||||||
|
root = logging.getLogger()
|
||||||
|
root.setLevel(logging.INFO)
|
||||||
|
root.addHandler(handler)
|
||||||
|
root.addHandler(console)
|
||||||
|
|
@ -23,17 +23,23 @@ from . import config, db
|
||||||
from .db import classify_branch
|
from .db import classify_branch
|
||||||
from .dedup import dedup_evidence_blocks
|
from .dedup import dedup_evidence_blocks
|
||||||
from .domains import detect_domain_from_branch
|
from .domains import detect_domain_from_branch
|
||||||
from .cascade import cascade_after_merge
|
|
||||||
from .forgejo import api as forgejo_api
|
from .forgejo import api as forgejo_api
|
||||||
|
|
||||||
# Pipeline-owned branch prefixes — these get auto-merged via cherry-pick.
|
# Pipeline-owned branch prefixes — only these get auto-merged.
|
||||||
# Originally restricted to pipeline-only branches because rebase orphaned agent commits.
|
# Agent branches (theseus/*, rio/*, astra/*, etc.) stay approved but are NOT
|
||||||
# Now safe for all branches: cherry-pick creates a fresh branch from main, never
|
# rebased/force-pushed/auto-merged. Agents merge their own PRs.
|
||||||
# rewrites the source branch. (Original issue: Leo directive, PRs #2141, #157, #2142, #2180)
|
# Derived from BRANCH_PREFIX_MAP where agent in ("pipeline", "epimetheus").
|
||||||
PIPELINE_OWNED_PREFIXES = (
|
# (Leo directive: PRs #2141, #157, #2142, #2180 were orphaned by pipeline rebase)
|
||||||
"extract/", "ingestion/", "epimetheus/", "reweave/", "fix/",
|
PIPELINE_OWNED_PREFIXES = ("extract/", "ingestion/", "epimetheus/", "reweave/", "fix/")
|
||||||
"theseus/", "rio/", "astra/", "vida/", "clay/", "leo/", "argus/", "oberon/",
|
|
||||||
)
|
# Safety assertion: agent branches MUST NOT be in PIPELINE_OWNED_PREFIXES.
|
||||||
|
# Auto-merge on eval approval bypasses Leo's review gate.
|
||||||
|
# Agent PRs use auto_merge flag instead (set by evaluate.py after two-reviewer approval).
|
||||||
|
_AGENT_NAMES = ("theseus", "rio", "astra", "vida", "clay", "leo", "argus", "oberon", "rhea", "ganymede")
|
||||||
|
for _prefix in PIPELINE_OWNED_PREFIXES:
|
||||||
|
for _agent in _AGENT_NAMES:
|
||||||
|
assert not _prefix.startswith(f"{_agent}/"), \
|
||||||
|
f"FATAL: Agent prefix '{_agent}/' found in PIPELINE_OWNED_PREFIXES — this bypasses Leo's review gate"
|
||||||
|
|
||||||
# Import worktree lock — file at /opt/teleo-eval/pipeline/lib/worktree_lock.py
|
# Import worktree lock — file at /opt/teleo-eval/pipeline/lib/worktree_lock.py
|
||||||
try:
|
try:
|
||||||
|
|
@ -113,9 +119,10 @@ async def discover_external_prs(conn) -> int:
|
||||||
|
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"""INSERT OR IGNORE INTO prs
|
"""INSERT OR IGNORE INTO prs
|
||||||
(number, branch, status, origin, priority, domain, agent, commit_type)
|
(number, branch, status, origin, priority, domain, agent, commit_type,
|
||||||
VALUES (?, ?, 'open', ?, ?, ?, ?, ?)""",
|
prompt_version, pipeline_version)
|
||||||
(pr["number"], pr["head"]["ref"], origin, priority, domain, agent, commit_type),
|
VALUES (?, ?, 'open', ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
(pr["number"], pr["head"]["ref"], origin, priority, domain, agent, commit_type, config.PROMPT_VERSION, config.PIPELINE_VERSION),
|
||||||
)
|
)
|
||||||
db.audit(
|
db.audit(
|
||||||
conn,
|
conn,
|
||||||
|
|
@ -190,7 +197,7 @@ async def _claim_next_pr(conn, domain: str) -> dict | None:
|
||||||
LEFT JOIN sources s ON p.source_path = s.path
|
LEFT JOIN sources s ON p.source_path = s.path
|
||||||
WHERE p.status = 'approved'
|
WHERE p.status = 'approved'
|
||||||
AND p.domain = ?
|
AND p.domain = ?
|
||||||
AND ({prefix_clauses})
|
AND ({prefix_clauses} OR p.auto_merge = 1)
|
||||||
AND NOT EXISTS (
|
AND NOT EXISTS (
|
||||||
SELECT 1 FROM prs p2
|
SELECT 1 FROM prs p2
|
||||||
WHERE p2.domain = p.domain
|
WHERE p2.domain = p.domain
|
||||||
|
|
@ -313,20 +320,7 @@ async def _cherry_pick_onto_main(branch: str) -> tuple[bool, str]:
|
||||||
dropped_entities: set[str] = set()
|
dropped_entities: set[str] = set()
|
||||||
picked_count = 0
|
picked_count = 0
|
||||||
for commit_sha in commit_list:
|
for commit_sha in commit_list:
|
||||||
# Detect merge commits — cherry-pick needs -m 1 to pick first-parent diff
|
rc, out = await _git("cherry-pick", commit_sha, cwd=worktree_path, timeout=60)
|
||||||
rc_parents, parents_out = await _git(
|
|
||||||
"cat-file", "-p", commit_sha, cwd=worktree_path, timeout=5,
|
|
||||||
)
|
|
||||||
parent_count = parents_out.count("\nparent ") + (1 if parents_out.startswith("parent ") else 0)
|
|
||||||
is_merge = parent_count >= 2
|
|
||||||
|
|
||||||
pick_args = ["cherry-pick"]
|
|
||||||
if is_merge:
|
|
||||||
pick_args.extend(["-m", "1"])
|
|
||||||
logger.info("Cherry-pick %s: merge commit, using -m 1", commit_sha[:8])
|
|
||||||
pick_args.append(commit_sha)
|
|
||||||
|
|
||||||
rc, out = await _git(*pick_args, cwd=worktree_path, timeout=60)
|
|
||||||
if rc != 0 and "empty" in out.lower():
|
if rc != 0 and "empty" in out.lower():
|
||||||
# Content already on main — skip this commit
|
# Content already on main — skip this commit
|
||||||
await _git("cherry-pick", "--skip", cwd=worktree_path)
|
await _git("cherry-pick", "--skip", cwd=worktree_path)
|
||||||
|
|
@ -406,6 +400,281 @@ async def _cherry_pick_onto_main(branch: str) -> tuple[bool, str]:
|
||||||
await _git("branch", "-D", clean_branch)
|
await _git("branch", "-D", clean_branch)
|
||||||
|
|
||||||
|
|
||||||
|
REWEAVE_EDGE_FIELDS = ("supports", "challenges", "challenged_by", "depends_on", "related", "reweave_edges")
|
||||||
|
|
||||||
|
# When A supports B, B also supports A (approximately symmetric).
|
||||||
|
# When A challenges B, B is challenged_by A (NOT symmetric — direction matters).
|
||||||
|
RECIPROCAL_EDGE_MAP = {
|
||||||
|
"supports": "supports",
|
||||||
|
"challenges": "challenged_by",
|
||||||
|
"related": "related",
|
||||||
|
"depends_on": "related", # A depends_on B → B is related to A (not symmetric)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_yaml_frontmatter(text: str) -> tuple[dict | None, str, str]:
|
||||||
|
"""Parse YAML frontmatter from markdown text.
|
||||||
|
|
||||||
|
Returns (frontmatter_dict, raw_fm_text, body_text_including_closing_delimiter).
|
||||||
|
Returns (None, "", text) if no valid frontmatter found.
|
||||||
|
raw_fm_text is the text between the --- delimiters (no delimiters, no leading newline).
|
||||||
|
"""
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
if not text.startswith("---"):
|
||||||
|
return None, "", text
|
||||||
|
end = text.find("\n---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return None, "", text
|
||||||
|
try:
|
||||||
|
raw_fm_text = text[4:end] # skip "---\n", stop before "\n---"
|
||||||
|
fm = yaml.safe_load(raw_fm_text)
|
||||||
|
body = text[end:] # includes closing \n--- and body
|
||||||
|
return (fm if isinstance(fm, dict) else None), raw_fm_text, body
|
||||||
|
except Exception:
|
||||||
|
return None, "", text
|
||||||
|
|
||||||
|
|
||||||
|
def _union_edge_lists(main_edges: list, branch_edges: list) -> list:
|
||||||
|
"""Union two edge lists, preserving order from main (append new at end).
|
||||||
|
|
||||||
|
Deduplicates by lowercase slug. Main's order is preserved; branch-only
|
||||||
|
edges are appended in their original order.
|
||||||
|
"""
|
||||||
|
seen = set()
|
||||||
|
result = []
|
||||||
|
for edge in main_edges:
|
||||||
|
key = str(edge).strip().lower()
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
result.append(edge)
|
||||||
|
for edge in branch_edges:
|
||||||
|
key = str(edge).strip().lower()
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
result.append(edge)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_edge_fields(raw_fm_text: str, merged_edges: dict[str, list]) -> str:
|
||||||
|
"""Splice merged edge fields into raw frontmatter text, preserving all other fields byte-identical.
|
||||||
|
|
||||||
|
Only modifies REWEAVE_EDGE_FIELDS lines. All other frontmatter (title, confidence, type, etc.)
|
||||||
|
stays exactly as it was in the source text — no yaml.dump reformatting.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
raw_fm_text: The raw YAML text between the --- delimiters (no delimiters included).
|
||||||
|
merged_edges: {field_name: [edge_values]} for each edge field that should be present.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
lines = raw_fm_text.split("\n")
|
||||||
|
result_lines = []
|
||||||
|
i = 0
|
||||||
|
fields_written = set()
|
||||||
|
|
||||||
|
while i < len(lines):
|
||||||
|
line = lines[i]
|
||||||
|
# Check if this line starts an edge field
|
||||||
|
matched_field = None
|
||||||
|
for field in REWEAVE_EDGE_FIELDS:
|
||||||
|
if line.startswith(f"{field}:"):
|
||||||
|
matched_field = field
|
||||||
|
break
|
||||||
|
|
||||||
|
if matched_field:
|
||||||
|
fields_written.add(matched_field)
|
||||||
|
# Skip the old field and its list items (may be indented with spaces)
|
||||||
|
i += 1
|
||||||
|
while i < len(lines) and lines[i] and (lines[i][0] in (' ', '-')):
|
||||||
|
i += 1
|
||||||
|
# Write the merged version
|
||||||
|
edges = merged_edges.get(matched_field, [])
|
||||||
|
if edges:
|
||||||
|
result_lines.append(f"{matched_field}:")
|
||||||
|
for edge in edges:
|
||||||
|
result_lines.append(f"- {edge}")
|
||||||
|
# Don't increment i — it's already past the old field
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
result_lines.append(line)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Append any new edge fields that didn't exist in the original
|
||||||
|
for field in REWEAVE_EDGE_FIELDS:
|
||||||
|
if field not in fields_written:
|
||||||
|
edges = merged_edges.get(field, [])
|
||||||
|
if edges:
|
||||||
|
result_lines.append(f"{field}:")
|
||||||
|
for edge in edges:
|
||||||
|
result_lines.append(f"- {edge}")
|
||||||
|
|
||||||
|
return "\n".join(result_lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_frontmatter(raw_fm_text: str, merged_edges: dict[str, list], body: str) -> str:
|
||||||
|
"""Rebuild markdown file: splice merged edges into raw frontmatter, append body.
|
||||||
|
|
||||||
|
Uses string-level surgery — only edge fields are modified. All other frontmatter
|
||||||
|
stays byte-identical to the source. No yaml.dump reformatting.
|
||||||
|
"""
|
||||||
|
spliced = _serialize_edge_fields(raw_fm_text, merged_edges)
|
||||||
|
# body starts with \n--- (closing delimiter + body text)
|
||||||
|
if body.startswith("\n"):
|
||||||
|
return f"---\n{spliced}{body}"
|
||||||
|
return f"---\n{spliced}\n{body}"
|
||||||
|
|
||||||
|
|
||||||
|
async def _merge_reweave_pr(branch: str) -> tuple[bool, str]:
|
||||||
|
"""Merge a reweave PR using per-file frontmatter union instead of cherry-pick.
|
||||||
|
|
||||||
|
Reweave branches MODIFY existing files (appending YAML frontmatter edges).
|
||||||
|
Cherry-pick fails when main moved since branch creation (~75% failure rate).
|
||||||
|
|
||||||
|
This function:
|
||||||
|
1. Gets the list of files changed by the reweave branch
|
||||||
|
2. For each file, reads frontmatter from BOTH main HEAD and branch HEAD
|
||||||
|
3. Unions the edge arrays (order-preserving, main first, branch-new appended)
|
||||||
|
4. Asserts branch edges are a superset of main edges (reweave is append-only)
|
||||||
|
5. Writes merged content to a worktree, commits, pushes as the branch
|
||||||
|
|
||||||
|
Approved by Ganymede (manifest approach) and Theseus (superset assertion + order-preserving dedup).
|
||||||
|
"""
|
||||||
|
worktree_path = f"/tmp/teleo-merge-{branch.replace('/', '-')}"
|
||||||
|
clean_branch = f"_clean/{branch.replace('/', '-')}"
|
||||||
|
|
||||||
|
# Fetch latest state
|
||||||
|
rc, out = await _git("fetch", "origin", "main", timeout=15)
|
||||||
|
if rc != 0:
|
||||||
|
return False, f"fetch main failed: {out}"
|
||||||
|
rc, out = await _git("fetch", "origin", branch, timeout=15)
|
||||||
|
if rc != 0:
|
||||||
|
return False, f"fetch branch failed: {out}"
|
||||||
|
|
||||||
|
# Get files changed by the reweave branch
|
||||||
|
rc, diff_out = await _git(
|
||||||
|
"diff", "--name-only", f"origin/main...origin/{branch}", timeout=10,
|
||||||
|
)
|
||||||
|
if rc != 0 or not diff_out.strip():
|
||||||
|
return False, f"no changed files found on {branch}"
|
||||||
|
|
||||||
|
changed_files = [f.strip() for f in diff_out.strip().split("\n") if f.strip() and f.strip().endswith(".md")]
|
||||||
|
if not changed_files:
|
||||||
|
return False, "no .md files changed"
|
||||||
|
|
||||||
|
# Pre-cleanup: remove stale worktree/branch from prior crash (SIGKILL, OOM, etc.)
|
||||||
|
await _git("worktree", "remove", "--force", worktree_path)
|
||||||
|
await _git("branch", "-D", clean_branch)
|
||||||
|
rc, out = await _git("worktree", "add", "-b", clean_branch, worktree_path, "origin/main")
|
||||||
|
if rc != 0:
|
||||||
|
return False, f"worktree add failed: {out}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
merged_count = 0
|
||||||
|
skipped_non_superset = []
|
||||||
|
|
||||||
|
for fpath in changed_files:
|
||||||
|
# Read file content from main HEAD and branch HEAD
|
||||||
|
rc_main, main_content = await _git("show", f"origin/main:{fpath}", timeout=5)
|
||||||
|
rc_branch, branch_content = await _git("show", f"origin/{branch}:{fpath}", timeout=5)
|
||||||
|
|
||||||
|
if rc_branch != 0:
|
||||||
|
logger.warning("Reweave merge: cannot read %s from branch %s", fpath, branch)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if rc_main != 0:
|
||||||
|
# File only exists on branch (new file) — just write it
|
||||||
|
full_path = os.path.join(worktree_path, fpath)
|
||||||
|
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
||||||
|
with open(full_path, "w") as f:
|
||||||
|
f.write(branch_content)
|
||||||
|
await _git("add", fpath, cwd=worktree_path)
|
||||||
|
merged_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Parse frontmatter from both versions
|
||||||
|
main_fm, main_raw_fm, main_body = _parse_yaml_frontmatter(main_content)
|
||||||
|
branch_fm, _branch_raw_fm, branch_body = _parse_yaml_frontmatter(branch_content)
|
||||||
|
|
||||||
|
if main_fm is None or branch_fm is None:
|
||||||
|
# Parse failure = something unexpected. Fail the merge, don't fallback
|
||||||
|
# to cherry-pick. (Theseus: loud failure, not silent retry)
|
||||||
|
return False, f"frontmatter parse failed on {fpath} — manual review needed"
|
||||||
|
|
||||||
|
# Superset assertion + merge in one pass.
|
||||||
|
# Reweave only adds edges. If branch is missing an edge that main has,
|
||||||
|
# the branch was based on stale main — union is safe (adds both).
|
||||||
|
merged_edges = {}
|
||||||
|
for field in REWEAVE_EDGE_FIELDS:
|
||||||
|
main_list = main_fm.get(field, [])
|
||||||
|
branch_list = branch_fm.get(field, [])
|
||||||
|
if not isinstance(main_list, list):
|
||||||
|
main_list = [main_list] if main_list else []
|
||||||
|
if not isinstance(branch_list, list):
|
||||||
|
branch_list = [branch_list] if branch_list else []
|
||||||
|
|
||||||
|
# Superset check
|
||||||
|
main_keys = {str(v).strip().lower() for v in main_list if v}
|
||||||
|
branch_keys = {str(v).strip().lower() for v in branch_list if v}
|
||||||
|
missing = main_keys - branch_keys
|
||||||
|
if missing:
|
||||||
|
logger.warning(
|
||||||
|
"Reweave merge: %s field '%s' — branch missing edges from main: %s",
|
||||||
|
fpath, field, missing,
|
||||||
|
)
|
||||||
|
skipped_non_superset.append(f"{fpath}:{field}")
|
||||||
|
|
||||||
|
# Collect merged edges for string-level splicing
|
||||||
|
if main_list or branch_list:
|
||||||
|
merged_edges[field] = _union_edge_lists(main_list, branch_list)
|
||||||
|
|
||||||
|
# Write merged file — splice edges into main's raw frontmatter, use main's body
|
||||||
|
full_path = os.path.join(worktree_path, fpath)
|
||||||
|
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
||||||
|
with open(full_path, "w") as f:
|
||||||
|
f.write(_serialize_frontmatter(main_raw_fm, merged_edges, main_body))
|
||||||
|
await _git("add", fpath, cwd=worktree_path)
|
||||||
|
merged_count += 1
|
||||||
|
|
||||||
|
if merged_count == 0:
|
||||||
|
return False, "no files merged (all skipped)"
|
||||||
|
|
||||||
|
# Commit the merged changes
|
||||||
|
commit_msg = f"reweave: merge {merged_count} files via frontmatter union [auto]"
|
||||||
|
rc, out = await _git(
|
||||||
|
"commit", "-m", commit_msg, cwd=worktree_path, timeout=30,
|
||||||
|
)
|
||||||
|
if rc != 0:
|
||||||
|
return False, f"commit failed: {out}"
|
||||||
|
|
||||||
|
# Force-push as the branch (for the ff-push step in _merge_domain_queue)
|
||||||
|
rc, expected_sha = await _git("rev-parse", f"origin/{branch}")
|
||||||
|
if rc != 0:
|
||||||
|
return False, f"rev-parse origin/{branch} failed: {expected_sha}"
|
||||||
|
expected_sha = expected_sha.strip().split("\n")[0]
|
||||||
|
|
||||||
|
rc, out = await _git(
|
||||||
|
"push",
|
||||||
|
f"--force-with-lease={branch}:{expected_sha}",
|
||||||
|
"origin",
|
||||||
|
f"HEAD:{branch}",
|
||||||
|
cwd=worktree_path,
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if rc != 0:
|
||||||
|
return False, f"push rejected: {out}"
|
||||||
|
|
||||||
|
result_msg = f"frontmatter-union merged {merged_count} files"
|
||||||
|
if skipped_non_superset:
|
||||||
|
result_msg += f" (non-superset warnings: {len(skipped_non_superset)})"
|
||||||
|
return True, result_msg
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await _git("worktree", "remove", "--force", worktree_path)
|
||||||
|
await _git("branch", "-D", clean_branch)
|
||||||
|
|
||||||
|
|
||||||
async def _resubmit_approvals(pr_number: int):
|
async def _resubmit_approvals(pr_number: int):
|
||||||
"""Re-submit 2 formal Forgejo approvals after force-push invalidated them.
|
"""Re-submit 2 formal Forgejo approvals after force-push invalidated them.
|
||||||
|
|
||||||
|
|
@ -852,6 +1121,179 @@ async def _embed_merged_claims(main_sha: str, branch_sha: str):
|
||||||
logger.exception("embed: post-merge embedding failed (non-fatal)")
|
logger.exception("embed: post-merge embedding failed (non-fatal)")
|
||||||
|
|
||||||
|
|
||||||
|
async def _reciprocal_edges(main_sha: str, branch_sha: str):
|
||||||
|
"""Add reciprocal edges on existing claims after a PR merges.
|
||||||
|
|
||||||
|
When a new claim A has `supports: [B]` in its frontmatter, B should have
|
||||||
|
`supports: [A]` added to its own frontmatter. This gives A an incoming link,
|
||||||
|
preventing it from being an orphan.
|
||||||
|
|
||||||
|
Runs on main after cherry-pick merge. Non-fatal — orphans are recoverable.
|
||||||
|
Only processes new files (diff-filter=A), not modified files.
|
||||||
|
"""
|
||||||
|
EDGE_FIELDS = ("supports", "challenges", "related")
|
||||||
|
# Inverse mapping: if A supports B, then B is supported-by A.
|
||||||
|
# For simplicity, we use the same edge type (bidirectional "supports" means
|
||||||
|
# both claims support each other's argument). This matches reweave behavior.
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Find newly added claim files
|
||||||
|
rc, diff_out = await _git(
|
||||||
|
"diff", "--name-only", "--diff-filter=A",
|
||||||
|
main_sha, branch_sha,
|
||||||
|
cwd=str(config.MAIN_WORKTREE),
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if rc != 0:
|
||||||
|
logger.warning("reciprocal_edges: diff failed (rc=%d), skipping", rc)
|
||||||
|
return
|
||||||
|
|
||||||
|
claim_dirs = {"domains/", "core/", "foundations/"}
|
||||||
|
new_claims = [
|
||||||
|
f for f in diff_out.strip().split("\n")
|
||||||
|
if f.endswith(".md")
|
||||||
|
and any(f.startswith(d) for d in claim_dirs)
|
||||||
|
and not f.split("/")[-1].startswith("_")
|
||||||
|
and "/entities/" not in f
|
||||||
|
and "/decisions/" not in f
|
||||||
|
]
|
||||||
|
|
||||||
|
if not new_claims:
|
||||||
|
return
|
||||||
|
|
||||||
|
reciprocals_added = 0
|
||||||
|
modified_files = set()
|
||||||
|
for claim_path in new_claims:
|
||||||
|
full_path = config.MAIN_WORKTREE / claim_path
|
||||||
|
if not full_path.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = full_path.read_text()
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
fm, raw_fm, body = _parse_yaml_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Get the new claim's slug (filename without .md)
|
||||||
|
claim_slug = claim_path.rsplit("/", 1)[-1].replace(".md", "")
|
||||||
|
|
||||||
|
# Collect all edge targets from this new claim
|
||||||
|
for field in EDGE_FIELDS:
|
||||||
|
targets = fm.get(field, [])
|
||||||
|
if isinstance(targets, str):
|
||||||
|
targets = [targets]
|
||||||
|
if not isinstance(targets, list):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for target_slug in targets:
|
||||||
|
target_slug = str(target_slug).strip()
|
||||||
|
if not target_slug:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Find the target file on disk
|
||||||
|
target_file = _find_claim_file(target_slug)
|
||||||
|
if target_file is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Add reciprocal edge: target now has field: [new_claim_slug]
|
||||||
|
reciprocal_type = RECIPROCAL_EDGE_MAP.get(field, "related")
|
||||||
|
if _add_edge_to_file(target_file, reciprocal_type, claim_slug):
|
||||||
|
reciprocals_added += 1
|
||||||
|
modified_files.add(str(target_file))
|
||||||
|
|
||||||
|
if reciprocals_added > 0:
|
||||||
|
# Stage only the files we modified (never git add -A in automation)
|
||||||
|
for f in modified_files:
|
||||||
|
await _git("add", f, cwd=str(config.MAIN_WORKTREE))
|
||||||
|
rc, out = await _git(
|
||||||
|
"commit", "-m", f"reciprocal edges: {reciprocals_added} edges from {len(new_claims)} new claims",
|
||||||
|
cwd=str(config.MAIN_WORKTREE),
|
||||||
|
)
|
||||||
|
if rc == 0:
|
||||||
|
# Push immediately — batch-extract-50.sh does reset --hard origin/main
|
||||||
|
# every 15 min, which destroys unpushed local commits
|
||||||
|
push_rc, push_out = await _git(
|
||||||
|
"push", "origin", "main",
|
||||||
|
cwd=str(config.MAIN_WORKTREE),
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if push_rc == 0:
|
||||||
|
logger.info("reciprocal_edges: %d edges pushed to main (%d new claims)", reciprocals_added, len(new_claims))
|
||||||
|
else:
|
||||||
|
logger.warning("reciprocal_edges: push failed (commit is local only): %s", push_out[:200])
|
||||||
|
else:
|
||||||
|
logger.warning("reciprocal_edges: commit failed: %s", out[:200])
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.exception("reciprocal_edges: failed (non-fatal)")
|
||||||
|
|
||||||
|
|
||||||
|
def _find_claim_file(slug: str) -> "Path | None":
|
||||||
|
"""Find a claim file on disk by its slug. Searches domains/, core/, foundations/."""
|
||||||
|
from pathlib import Path as _Path
|
||||||
|
|
||||||
|
worktree = config.MAIN_WORKTREE
|
||||||
|
for search_dir in ("domains", "core", "foundations"):
|
||||||
|
base = worktree / search_dir
|
||||||
|
if not base.is_dir():
|
||||||
|
continue
|
||||||
|
# Direct match
|
||||||
|
for md in base.rglob(f"{slug}.md"):
|
||||||
|
if not md.name.startswith("_"):
|
||||||
|
return md
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _add_edge_to_file(file_path, edge_type: str, target_slug: str) -> bool:
|
||||||
|
"""Add a single edge to a file's frontmatter. Returns True if modified."""
|
||||||
|
try:
|
||||||
|
content = file_path.read_text()
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
fm, raw_fm, body = _parse_yaml_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for existing edge (dedup)
|
||||||
|
existing = fm.get(edge_type, [])
|
||||||
|
if isinstance(existing, str):
|
||||||
|
existing = [existing]
|
||||||
|
if not isinstance(existing, list):
|
||||||
|
existing = []
|
||||||
|
|
||||||
|
if any(str(e).strip().lower() == target_slug.lower() for e in existing):
|
||||||
|
return False # Already exists
|
||||||
|
|
||||||
|
# Build merged edges (all edge fields, only modifying the target one)
|
||||||
|
merged_edges = {}
|
||||||
|
for field in REWEAVE_EDGE_FIELDS:
|
||||||
|
vals = fm.get(field, [])
|
||||||
|
if isinstance(vals, str):
|
||||||
|
vals = [vals]
|
||||||
|
if not isinstance(vals, list):
|
||||||
|
vals = []
|
||||||
|
merged_edges[field] = list(vals)
|
||||||
|
|
||||||
|
merged_edges.setdefault(edge_type, []).append(target_slug)
|
||||||
|
|
||||||
|
# Serialize using the same string-surgery approach as reweave
|
||||||
|
new_fm = _serialize_edge_fields(raw_fm, merged_edges)
|
||||||
|
if body.startswith("\n"):
|
||||||
|
new_content = f"---\n{new_fm}{body}"
|
||||||
|
else:
|
||||||
|
new_content = f"---\n{new_fm}\n{body}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_path.write_text(new_content)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _archive_source_for_pr(branch: str, domain: str, merged: bool = True):
|
def _archive_source_for_pr(branch: str, domain: str, merged: bool = True):
|
||||||
"""Move source from queue/ to archive/{domain}/ after PR merge or close.
|
"""Move source from queue/ to archive/{domain}/ after PR merge or close.
|
||||||
|
|
||||||
|
|
@ -960,11 +1402,19 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
|
||||||
logger.info("Merging PR #%d (%s) in domain %s", pr_num, branch, domain)
|
logger.info("Merging PR #%d (%s) in domain %s", pr_num, branch, domain)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Cherry-pick onto fresh main (replaces rebase-retry — Leo+Cory directive)
|
# Route reweave branches to frontmatter-union merge.
|
||||||
# Extraction commits ADD new files, so cherry-pick applies cleanly.
|
# Reweave MODIFIES existing files (appending YAML edges) — cherry-pick
|
||||||
# Rebase failed ~23% of the time due to main moving during replay.
|
# fails ~75% when main moved. Frontmatter union reads current main HEAD,
|
||||||
|
# unions edge lists, commits. No conflicts possible.
|
||||||
|
# (Ganymede: manifest approach, Theseus: superset assertion + order-preserving dedup)
|
||||||
|
if branch.startswith("reweave/"):
|
||||||
|
merge_fn = _merge_reweave_pr(branch)
|
||||||
|
else:
|
||||||
|
# Extraction commits ADD new files — cherry-pick applies cleanly.
|
||||||
|
merge_fn = _cherry_pick_onto_main(branch)
|
||||||
|
|
||||||
pick_ok, pick_msg = await asyncio.wait_for(
|
pick_ok, pick_msg = await asyncio.wait_for(
|
||||||
_cherry_pick_onto_main(branch),
|
merge_fn,
|
||||||
timeout=MERGE_TIMEOUT_SECONDS,
|
timeout=MERGE_TIMEOUT_SECONDS,
|
||||||
)
|
)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
|
|
@ -1062,14 +1512,10 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
|
||||||
# Embed new/changed claims into Qdrant (non-fatal)
|
# Embed new/changed claims into Qdrant (non-fatal)
|
||||||
await _embed_merged_claims(main_sha, branch_sha)
|
await _embed_merged_claims(main_sha, branch_sha)
|
||||||
|
|
||||||
|
# Add reciprocal edges on existing claims (non-fatal)
|
||||||
|
# New claim A with supports:[B] → add supports:[A] on B's frontmatter
|
||||||
|
await _reciprocal_edges(main_sha, branch_sha)
|
||||||
|
|
||||||
# Cascade: notify agents whose beliefs/positions depend on changed claims
|
|
||||||
try:
|
|
||||||
cascaded = await cascade_after_merge(main_sha, branch_sha, pr_num, config.MAIN_WORKTREE)
|
|
||||||
if cascaded:
|
|
||||||
logger.info("PR #%d: %d cascade notifications sent", pr_num, cascaded)
|
|
||||||
except Exception:
|
|
||||||
logger.exception("PR #%d: cascade check failed (non-fatal)", pr_num)
|
|
||||||
# Delete remote branch immediately (Ganymede Q4)
|
# Delete remote branch immediately (Ganymede Q4)
|
||||||
await _delete_remote_branch(branch)
|
await _delete_remote_branch(branch)
|
||||||
|
|
||||||
|
|
@ -1092,7 +1538,7 @@ async def _reconcile_db_state(conn):
|
||||||
Run at the start of each merge cycle.
|
Run at the start of each merge cycle.
|
||||||
"""
|
"""
|
||||||
stale = conn.execute(
|
stale = conn.execute(
|
||||||
"SELECT number, branch, status FROM prs WHERE status IN ('conflict', 'open', 'reviewing', 'approved')"
|
"SELECT number, branch, status FROM prs WHERE status IN ('conflict', 'open', 'reviewing')"
|
||||||
).fetchall()
|
).fetchall()
|
||||||
|
|
||||||
if not stale:
|
if not stale:
|
||||||
|
|
@ -1121,28 +1567,6 @@ async def _reconcile_db_state(conn):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if forgejo_state == "closed" and not is_merged and db_status not in ("closed",):
|
if forgejo_state == "closed" and not is_merged and db_status not in ("closed",):
|
||||||
# Agent PRs get merged via git push (not Forgejo merge API), so
|
|
||||||
# Forgejo shows merged=False. Check if branch content is on main.
|
|
||||||
if db_status == "approved" and branch:
|
|
||||||
# Agent merges are ff-push — no merge commit exists.
|
|
||||||
# Check if branch tip is an ancestor of main (content is on main).
|
|
||||||
rc, branch_sha = await _git(
|
|
||||||
"rev-parse", f"origin/{branch}", timeout=10,
|
|
||||||
)
|
|
||||||
if rc == 0 and branch_sha.strip():
|
|
||||||
rc2, _ = await _git(
|
|
||||||
"merge-base", "--is-ancestor",
|
|
||||||
branch_sha.strip(), "origin/main",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
if rc2 == 0:
|
|
||||||
conn.execute(
|
|
||||||
"UPDATE prs SET status = 'merged', merged_at = datetime('now') WHERE number = ?",
|
|
||||||
(pr_number,),
|
|
||||||
)
|
|
||||||
logger.info("Reconciled PR #%d: agent-merged (branch tip on main)", pr_number)
|
|
||||||
reconciled += 1
|
|
||||||
continue
|
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"UPDATE prs SET status = 'closed', last_error = 'reconciled: closed on Forgejo' WHERE number = ?",
|
"UPDATE prs SET status = 'closed', last_error = 'reconciled: closed on Forgejo' WHERE number = ?",
|
||||||
(pr_number,),
|
(pr_number,),
|
||||||
|
|
|
||||||
551
ops/pipeline-v2/lib/post_extract.py
Normal file
551
ops/pipeline-v2/lib/post_extract.py
Normal file
|
|
@ -0,0 +1,551 @@
|
||||||
|
"""Post-extraction validator — deterministic fixes and quality gate.
|
||||||
|
|
||||||
|
Runs AFTER LLM extraction, BEFORE git commit. Pure Python, $0 cost.
|
||||||
|
Catches the mechanical issues that account for 73% of eval rejections:
|
||||||
|
- Frontmatter schema violations (missing/invalid fields)
|
||||||
|
- Broken wiki links (strips brackets, keeps text)
|
||||||
|
- Date errors (wrong format, source date instead of today)
|
||||||
|
- Filename convention violations
|
||||||
|
- Title precision (too short, not a proposition)
|
||||||
|
- Duplicate detection against existing KB
|
||||||
|
|
||||||
|
Design principles (Leo):
|
||||||
|
- Mechanical rules belong in code, not prompts
|
||||||
|
- Fix what's fixable, reject what's not
|
||||||
|
- Never silently drop content — log everything
|
||||||
|
|
||||||
|
Epimetheus owns this module. Leo reviews changes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from datetime import date, datetime
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.post_extract")
|
||||||
|
|
||||||
|
# ─── Constants ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
VALID_DOMAINS = frozenset({
|
||||||
|
"internet-finance", "entertainment", "health", "ai-alignment",
|
||||||
|
"space-development", "grand-strategy", "mechanisms", "living-capital",
|
||||||
|
"living-agents", "teleohumanity", "critical-systems",
|
||||||
|
"collective-intelligence", "teleological-economics", "cultural-dynamics",
|
||||||
|
})
|
||||||
|
|
||||||
|
VALID_CONFIDENCE = frozenset({"proven", "likely", "experimental", "speculative"})
|
||||||
|
|
||||||
|
REQUIRED_CLAIM_FIELDS = ("type", "domain", "description", "confidence", "source", "created")
|
||||||
|
REQUIRED_ENTITY_FIELDS = ("type", "domain", "description")
|
||||||
|
|
||||||
|
WIKI_LINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
|
||||||
|
|
||||||
|
# Minimum title word count for claims (Leo: titles must name specific mechanism)
|
||||||
|
MIN_TITLE_WORDS = 8
|
||||||
|
|
||||||
|
DEDUP_THRESHOLD = 0.85
|
||||||
|
|
||||||
|
|
||||||
|
# ─── YAML parsing ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def parse_frontmatter(text: str) -> tuple[dict | None, str]:
|
||||||
|
"""Extract YAML frontmatter from markdown. Returns (frontmatter_dict, body)."""
|
||||||
|
if not text.startswith("---"):
|
||||||
|
return None, text
|
||||||
|
end = text.find("---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return None, text
|
||||||
|
raw = text[3:end]
|
||||||
|
body = text[end + 3:].strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
fm = yaml.safe_load(raw)
|
||||||
|
if not isinstance(fm, dict):
|
||||||
|
return None, body
|
||||||
|
return fm, body
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
return None, body
|
||||||
|
|
||||||
|
# Fallback: simple key-value parser
|
||||||
|
fm = {}
|
||||||
|
for line in raw.strip().split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
if ":" not in line:
|
||||||
|
continue
|
||||||
|
key, _, val = line.partition(":")
|
||||||
|
key = key.strip()
|
||||||
|
val = val.strip().strip('"').strip("'")
|
||||||
|
if val.lower() == "null" or val == "":
|
||||||
|
val = None
|
||||||
|
elif val.startswith("["):
|
||||||
|
val = [v.strip().strip('"').strip("'") for v in val.strip("[]").split(",") if v.strip()]
|
||||||
|
fm[key] = val
|
||||||
|
return fm if fm else None, body
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Fixers (modify content, return fixed version) ─────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def fix_frontmatter(content: str, domain: str, agent: str) -> tuple[str, list[str]]:
|
||||||
|
"""Fix common frontmatter issues. Returns (fixed_content, list_of_fixes_applied)."""
|
||||||
|
fixes = []
|
||||||
|
fm, body = parse_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
return content, ["unfixable:no_frontmatter"]
|
||||||
|
|
||||||
|
changed = False
|
||||||
|
ftype = fm.get("type", "claim")
|
||||||
|
|
||||||
|
# Fix 1: created = extraction date, always today. No parsing, no comparison.
|
||||||
|
# "created" means "when this was extracted," period. Source publication date
|
||||||
|
# belongs in a separate field if needed. (Ganymede review)
|
||||||
|
today_str = date.today().isoformat()
|
||||||
|
if ftype == "claim":
|
||||||
|
old_created = fm.get("created")
|
||||||
|
fm["created"] = today_str
|
||||||
|
if old_created != today_str:
|
||||||
|
fixes.append(f"set_created:{today_str}")
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
# Fix 2: type field
|
||||||
|
if "type" not in fm:
|
||||||
|
fm["type"] = "claim"
|
||||||
|
fixes.append("added_type:claim")
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
# Fix 3: domain field
|
||||||
|
if "domain" not in fm or fm["domain"] not in VALID_DOMAINS:
|
||||||
|
fm["domain"] = domain
|
||||||
|
fixes.append(f"fixed_domain:{fm.get('domain', 'missing')}->{domain}")
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
# Fix 4: confidence field (claims only)
|
||||||
|
if ftype == "claim":
|
||||||
|
conf = fm.get("confidence")
|
||||||
|
if conf is None:
|
||||||
|
fm["confidence"] = "experimental"
|
||||||
|
fixes.append("added_confidence:experimental")
|
||||||
|
changed = True
|
||||||
|
elif conf not in VALID_CONFIDENCE:
|
||||||
|
fm["confidence"] = "experimental"
|
||||||
|
fixes.append(f"fixed_confidence:{conf}->experimental")
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
# Fix 5: description field
|
||||||
|
if "description" not in fm or not fm["description"]:
|
||||||
|
# Try to derive from body's first sentence
|
||||||
|
first_sentence = body.split(".")[0].strip().lstrip("# ") if body else ""
|
||||||
|
if first_sentence and len(first_sentence) > 10:
|
||||||
|
fm["description"] = first_sentence[:200]
|
||||||
|
fixes.append("derived_description_from_body")
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
# Fix 6: source field (claims only)
|
||||||
|
if ftype == "claim" and ("source" not in fm or not fm["source"]):
|
||||||
|
fm["source"] = f"extraction by {agent}"
|
||||||
|
fixes.append("added_default_source")
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
if not changed:
|
||||||
|
return content, []
|
||||||
|
|
||||||
|
# Reconstruct frontmatter
|
||||||
|
return _rebuild_content(fm, body), fixes
|
||||||
|
|
||||||
|
|
||||||
|
def fix_wiki_links(content: str, existing_claims: set[str]) -> tuple[str, list[str]]:
|
||||||
|
"""Fix or strip broken wiki links. Resolves slug→space mismatches before stripping.
|
||||||
|
|
||||||
|
The LLM often generates wiki links as slugs (hyphens) but KB filenames use spaces.
|
||||||
|
Try normalizing hyphens→spaces before giving up and stripping brackets.
|
||||||
|
"""
|
||||||
|
fixes = []
|
||||||
|
# Build a lookup: normalized (lowercased, hyphens→spaces) → original stem
|
||||||
|
_normalized_lookup: dict[str, str] = {}
|
||||||
|
for stem in existing_claims:
|
||||||
|
_normalized_lookup[stem.lower().replace("-", " ")] = stem
|
||||||
|
|
||||||
|
def replace_broken(match):
|
||||||
|
link = match.group(1).strip()
|
||||||
|
if link in existing_claims:
|
||||||
|
return match.group(0) # Exact match — keep as-is
|
||||||
|
# Try normalizing slug to spaces
|
||||||
|
normalized = link.lower().replace("-", " ")
|
||||||
|
if normalized in _normalized_lookup:
|
||||||
|
resolved = _normalized_lookup[normalized]
|
||||||
|
fixes.append(f"resolved_wiki_link:{link[:40]}->{resolved[:40]}")
|
||||||
|
return f"[[{resolved}]]"
|
||||||
|
fixes.append(f"stripped_wiki_link:{link[:60]}")
|
||||||
|
return link # Keep text, remove brackets
|
||||||
|
|
||||||
|
fixed = WIKI_LINK_RE.sub(replace_broken, content)
|
||||||
|
return fixed, fixes
|
||||||
|
|
||||||
|
|
||||||
|
def fix_trailing_newline(content: str) -> tuple[str, list[str]]:
|
||||||
|
"""Ensure file ends with exactly one newline."""
|
||||||
|
if not content.endswith("\n"):
|
||||||
|
return content + "\n", ["added_trailing_newline"]
|
||||||
|
return content, []
|
||||||
|
|
||||||
|
|
||||||
|
def fix_h1_title_match(content: str, filename: str) -> tuple[str, list[str]]:
|
||||||
|
"""Ensure the content has an H1 title. Does NOT replace existing H1s.
|
||||||
|
|
||||||
|
The H1 title in the content is authoritative — the filename is derived from it
|
||||||
|
and may be truncated or slightly different. We only add a missing H1, never
|
||||||
|
overwrite an existing one.
|
||||||
|
"""
|
||||||
|
expected_title = Path(filename).stem.replace("-", " ")
|
||||||
|
fm, body = parse_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
return content, []
|
||||||
|
|
||||||
|
# Find existing H1
|
||||||
|
h1_match = re.search(r"^# (.+)$", body, re.MULTILINE)
|
||||||
|
if h1_match:
|
||||||
|
# H1 exists — leave it alone. The content's H1 is authoritative.
|
||||||
|
return content, []
|
||||||
|
elif body and not body.startswith("#"):
|
||||||
|
# No H1 at all — add one derived from filename
|
||||||
|
body = f"# {expected_title}\n\n{body}"
|
||||||
|
return _rebuild_content(fm, body), ["added_h1_title"]
|
||||||
|
|
||||||
|
return content, []
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Validators (check without modifying, return issues) ──────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def validate_claim(filename: str, content: str, existing_claims: set[str], agent: str | None = None) -> list[str]:
|
||||||
|
"""Validate a claim file. Returns list of issues (empty = pass)."""
|
||||||
|
issues = []
|
||||||
|
fm, body = parse_frontmatter(content)
|
||||||
|
|
||||||
|
if fm is None:
|
||||||
|
return ["no_frontmatter"]
|
||||||
|
|
||||||
|
ftype = fm.get("type", "claim")
|
||||||
|
|
||||||
|
# Schema check
|
||||||
|
required = REQUIRED_CLAIM_FIELDS if ftype == "claim" else REQUIRED_ENTITY_FIELDS
|
||||||
|
for field in required:
|
||||||
|
if field not in fm or fm[field] is None:
|
||||||
|
issues.append(f"missing_field:{field}")
|
||||||
|
|
||||||
|
# Domain check
|
||||||
|
domain = fm.get("domain")
|
||||||
|
if domain and domain not in VALID_DOMAINS:
|
||||||
|
issues.append(f"invalid_domain:{domain}")
|
||||||
|
|
||||||
|
# Confidence check (claims only)
|
||||||
|
if ftype == "claim":
|
||||||
|
conf = fm.get("confidence")
|
||||||
|
if conf and conf not in VALID_CONFIDENCE:
|
||||||
|
issues.append(f"invalid_confidence:{conf}")
|
||||||
|
|
||||||
|
# Title checks (claims only, not entities)
|
||||||
|
# Use H1 from body if available (authoritative), fall back to filename
|
||||||
|
if ftype in ("claim", "framework"):
|
||||||
|
h1_match = re.search(r"^# (.+)$", body, re.MULTILINE)
|
||||||
|
title = h1_match.group(1).strip() if h1_match else Path(filename).stem.replace("-", " ")
|
||||||
|
words = title.split()
|
||||||
|
# Always enforce minimum 4 words — a 2-3 word title is never specific
|
||||||
|
# enough to disagree with. (Ganymede review)
|
||||||
|
if len(words) < 4:
|
||||||
|
issues.append("title_too_few_words")
|
||||||
|
elif len(words) < 8:
|
||||||
|
# For 4-7 word titles, also require a verb/connective
|
||||||
|
has_verb = bool(re.search(
|
||||||
|
r"\b(is|are|was|were|will|would|can|could|should|must|has|have|had|"
|
||||||
|
r"does|did|do|may|might|shall|"
|
||||||
|
r"because|therefore|however|although|despite|since|through|by|"
|
||||||
|
r"when|where|while|if|unless|"
|
||||||
|
r"rather than|instead of|not just|more than|"
|
||||||
|
r"\w+(?:s|ed|ing|es|tes|ses|zes|ves|cts|pts|nts|rns))\b",
|
||||||
|
title, re.IGNORECASE,
|
||||||
|
))
|
||||||
|
if not has_verb:
|
||||||
|
issues.append("title_not_proposition")
|
||||||
|
|
||||||
|
# Description quality
|
||||||
|
desc = fm.get("description", "")
|
||||||
|
if isinstance(desc, str) and len(desc.strip()) < 10:
|
||||||
|
issues.append("description_too_short")
|
||||||
|
|
||||||
|
# Attribution check: extractor must be identified. (Leo: block extractor, warn sourcer)
|
||||||
|
if ftype == "claim":
|
||||||
|
from .attribution import validate_attribution
|
||||||
|
issues.extend(validate_attribution(fm, agent=agent))
|
||||||
|
|
||||||
|
# OPSEC check: flag claims containing dollar amounts + internal entity references.
|
||||||
|
# Rio's rule: never extract LivingIP/Teleo deal terms to public codex. (Ganymede review)
|
||||||
|
if ftype == "claim":
|
||||||
|
combined_text = (title + " " + desc + " " + body).lower()
|
||||||
|
has_dollar = bool(re.search(r"\$[\d,.]+[mkb]?\b", combined_text, re.IGNORECASE))
|
||||||
|
has_internal = bool(re.search(
|
||||||
|
r"\b(livingip|teleo|internal|deal terms?|valuation|equity percent)",
|
||||||
|
combined_text, re.IGNORECASE,
|
||||||
|
))
|
||||||
|
if has_dollar and has_internal:
|
||||||
|
issues.append("opsec_internal_deal_terms")
|
||||||
|
|
||||||
|
# Body substance check (claims only)
|
||||||
|
if ftype == "claim" and body:
|
||||||
|
# Strip the H1 title line and check remaining content
|
||||||
|
body_no_h1 = re.sub(r"^# .+\n*", "", body).strip()
|
||||||
|
# Remove "Relevant Notes" and "Topics" sections
|
||||||
|
body_content = re.split(r"\n---\n", body_no_h1)[0].strip()
|
||||||
|
if len(body_content) < 50:
|
||||||
|
issues.append("body_too_thin")
|
||||||
|
|
||||||
|
# Near-duplicate check (claims only, not entities)
|
||||||
|
if ftype != "entity":
|
||||||
|
title_lower = Path(filename).stem.replace("-", " ").lower()
|
||||||
|
title_words = set(title_lower.split()[:6])
|
||||||
|
for existing in existing_claims:
|
||||||
|
# Normalize existing stem: hyphens → spaces for consistent comparison
|
||||||
|
existing_normalized = existing.replace("-", " ").lower()
|
||||||
|
if len(title_words & set(existing_normalized.split()[:6])) < 2:
|
||||||
|
continue
|
||||||
|
ratio = SequenceMatcher(None, title_lower, existing_normalized).ratio()
|
||||||
|
if ratio >= DEDUP_THRESHOLD:
|
||||||
|
issues.append(f"near_duplicate:{existing[:80]}")
|
||||||
|
break # One is enough to flag
|
||||||
|
|
||||||
|
return issues
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Main entry point ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def validate_and_fix_claims(
|
||||||
|
claims: list[dict],
|
||||||
|
domain: str,
|
||||||
|
agent: str,
|
||||||
|
existing_claims: set[str],
|
||||||
|
repo_root: str = ".",
|
||||||
|
) -> tuple[list[dict], list[dict], dict]:
|
||||||
|
"""Validate and fix extracted claims. Returns (kept_claims, rejected_claims, stats).
|
||||||
|
|
||||||
|
Each claim dict has: filename, domain, content
|
||||||
|
Returned claims have content fixed where possible.
|
||||||
|
|
||||||
|
Stats: {total, kept, fixed, rejected, fixes_applied: [...], rejections: [...]}
|
||||||
|
"""
|
||||||
|
kept = []
|
||||||
|
rejected = []
|
||||||
|
all_fixes = []
|
||||||
|
all_rejections = []
|
||||||
|
|
||||||
|
# Add intra-batch stems to existing claims (avoid false positive duplicates within same extraction)
|
||||||
|
batch_stems = {Path(c["filename"]).stem for c in claims}
|
||||||
|
existing_plus_batch = existing_claims | batch_stems
|
||||||
|
|
||||||
|
for claim in claims:
|
||||||
|
filename = claim.get("filename", "")
|
||||||
|
content = claim.get("content", "")
|
||||||
|
claim_domain = claim.get("domain", domain)
|
||||||
|
|
||||||
|
if not filename or not content:
|
||||||
|
rejected.append(claim)
|
||||||
|
all_rejections.append(f"{filename or '?'}:missing_filename_or_content")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Phase 1: Apply fixers
|
||||||
|
content, fixes1 = fix_frontmatter(content, claim_domain, agent)
|
||||||
|
content, fixes2 = fix_wiki_links(content, existing_plus_batch)
|
||||||
|
content, fixes3 = fix_trailing_newline(content)
|
||||||
|
content, fixes4 = fix_h1_title_match(content, filename)
|
||||||
|
|
||||||
|
fixes = fixes1 + fixes2 + fixes3 + fixes4
|
||||||
|
if fixes:
|
||||||
|
all_fixes.extend([f"{filename}:{f}" for f in fixes])
|
||||||
|
|
||||||
|
# Phase 2: Validate (after fixes)
|
||||||
|
issues = validate_claim(filename, content, existing_claims, agent=agent)
|
||||||
|
|
||||||
|
# Separate hard failures from warnings
|
||||||
|
hard_failures = [i for i in issues if not i.startswith("near_duplicate")]
|
||||||
|
warnings = [i for i in issues if i.startswith("near_duplicate")]
|
||||||
|
|
||||||
|
if hard_failures:
|
||||||
|
rejected.append({**claim, "content": content, "issues": hard_failures})
|
||||||
|
all_rejections.extend([f"{filename}:{i}" for i in hard_failures])
|
||||||
|
else:
|
||||||
|
if warnings:
|
||||||
|
all_fixes.extend([f"{filename}:WARN:{w}" for w in warnings])
|
||||||
|
kept.append({**claim, "content": content})
|
||||||
|
|
||||||
|
stats = {
|
||||||
|
"total": len(claims),
|
||||||
|
"kept": len(kept),
|
||||||
|
"fixed": len([f for f in all_fixes if ":WARN:" not in f]),
|
||||||
|
"rejected": len(rejected),
|
||||||
|
"fixes_applied": all_fixes,
|
||||||
|
"rejections": all_rejections,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Post-extraction: %d/%d claims kept (%d fixed, %d rejected)",
|
||||||
|
stats["kept"], stats["total"], stats["fixed"], stats["rejected"],
|
||||||
|
)
|
||||||
|
|
||||||
|
return kept, rejected, stats
|
||||||
|
|
||||||
|
|
||||||
|
def validate_and_fix_entities(
|
||||||
|
entities: list[dict],
|
||||||
|
domain: str,
|
||||||
|
existing_claims: set[str],
|
||||||
|
) -> tuple[list[dict], list[dict], dict]:
|
||||||
|
"""Validate and fix extracted entities. Returns (kept, rejected, stats).
|
||||||
|
|
||||||
|
Lighter validation than claims — entities are factual records, not arguable propositions.
|
||||||
|
"""
|
||||||
|
kept = []
|
||||||
|
rejected = []
|
||||||
|
all_issues = []
|
||||||
|
|
||||||
|
for ent in entities:
|
||||||
|
filename = ent.get("filename", "")
|
||||||
|
content = ent.get("content", "")
|
||||||
|
action = ent.get("action", "create")
|
||||||
|
|
||||||
|
if not filename:
|
||||||
|
rejected.append(ent)
|
||||||
|
all_issues.append("missing_filename")
|
||||||
|
continue
|
||||||
|
|
||||||
|
issues = []
|
||||||
|
|
||||||
|
if action == "create" and content:
|
||||||
|
fm, body = parse_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
issues.append("no_frontmatter")
|
||||||
|
else:
|
||||||
|
if fm.get("type") != "entity":
|
||||||
|
issues.append("wrong_type")
|
||||||
|
if "entity_type" not in fm:
|
||||||
|
issues.append("missing_entity_type")
|
||||||
|
if "domain" not in fm:
|
||||||
|
issues.append("missing_domain")
|
||||||
|
|
||||||
|
# decision_market specific checks
|
||||||
|
if fm.get("entity_type") == "decision_market":
|
||||||
|
for field in ("parent_entity", "platform", "category", "status"):
|
||||||
|
if field not in fm:
|
||||||
|
issues.append(f"dm_missing:{field}")
|
||||||
|
|
||||||
|
# Fix trailing newline
|
||||||
|
if content and not content.endswith("\n"):
|
||||||
|
ent["content"] = content + "\n"
|
||||||
|
|
||||||
|
elif action == "update":
|
||||||
|
timeline = ent.get("timeline_entry", "")
|
||||||
|
if not timeline:
|
||||||
|
issues.append("update_no_timeline")
|
||||||
|
|
||||||
|
if issues:
|
||||||
|
rejected.append({**ent, "issues": issues})
|
||||||
|
all_issues.extend([f"{filename}:{i}" for i in issues])
|
||||||
|
else:
|
||||||
|
kept.append(ent)
|
||||||
|
|
||||||
|
stats = {
|
||||||
|
"total": len(entities),
|
||||||
|
"kept": len(kept),
|
||||||
|
"rejected": len(rejected),
|
||||||
|
"issues": all_issues,
|
||||||
|
}
|
||||||
|
|
||||||
|
return kept, rejected, stats
|
||||||
|
|
||||||
|
|
||||||
|
def load_existing_claims_from_repo(repo_root: str) -> set[str]:
|
||||||
|
"""Build set of known claim/entity stems from the repo."""
|
||||||
|
claims: set[str] = set()
|
||||||
|
base = Path(repo_root)
|
||||||
|
for subdir in ["domains", "core", "foundations", "maps", "agents", "schemas", "entities"]:
|
||||||
|
full = base / subdir
|
||||||
|
if not full.is_dir():
|
||||||
|
continue
|
||||||
|
for f in full.rglob("*.md"):
|
||||||
|
claims.add(f.stem)
|
||||||
|
return claims
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Helpers ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _rebuild_content(fm: dict, body: str) -> str:
|
||||||
|
"""Rebuild markdown content from frontmatter dict and body."""
|
||||||
|
# Order frontmatter fields consistently
|
||||||
|
field_order = ["type", "entity_type", "name", "domain", "description",
|
||||||
|
"confidence", "source", "created", "status", "parent_entity",
|
||||||
|
"platform", "proposer", "proposal_url", "proposal_date",
|
||||||
|
"resolution_date", "category", "summary", "tracked_by",
|
||||||
|
"secondary_domains", "challenged_by"]
|
||||||
|
|
||||||
|
lines = ["---"]
|
||||||
|
written = set()
|
||||||
|
for field in field_order:
|
||||||
|
if field in fm and fm[field] is not None:
|
||||||
|
lines.append(_yaml_line(field, fm[field]))
|
||||||
|
written.add(field)
|
||||||
|
# Write remaining fields not in the order list
|
||||||
|
for key, val in fm.items():
|
||||||
|
if key not in written and val is not None:
|
||||||
|
lines.append(_yaml_line(key, val))
|
||||||
|
lines.append("---")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(body)
|
||||||
|
|
||||||
|
content = "\n".join(lines)
|
||||||
|
if not content.endswith("\n"):
|
||||||
|
content += "\n"
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def _yaml_line(key: str, val) -> str:
|
||||||
|
"""Format a single YAML key-value line."""
|
||||||
|
if isinstance(val, dict):
|
||||||
|
# Nested YAML block (e.g. attribution with sub-keys)
|
||||||
|
lines = [f"{key}:"]
|
||||||
|
for sub_key, sub_val in val.items():
|
||||||
|
if isinstance(sub_val, list) and sub_val:
|
||||||
|
lines.append(f" {sub_key}:")
|
||||||
|
for item in sub_val:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
first = True
|
||||||
|
for ik, iv in item.items():
|
||||||
|
prefix = " - " if first else " "
|
||||||
|
lines.append(f'{prefix}{ik}: "{iv}"')
|
||||||
|
first = False
|
||||||
|
else:
|
||||||
|
lines.append(f' - "{item}"')
|
||||||
|
else:
|
||||||
|
lines.append(f" {sub_key}: []")
|
||||||
|
return "\n".join(lines)
|
||||||
|
if isinstance(val, list):
|
||||||
|
return f"{key}: {json.dumps(val)}"
|
||||||
|
if isinstance(val, bool):
|
||||||
|
return f"{key}: {'true' if val else 'false'}"
|
||||||
|
if isinstance(val, (int, float)):
|
||||||
|
return f"{key}: {val}"
|
||||||
|
if isinstance(val, date):
|
||||||
|
return f"{key}: {val.isoformat()}"
|
||||||
|
# String — quote if it contains special chars
|
||||||
|
s = str(val)
|
||||||
|
if any(c in s for c in ":#{}[]|>&*!%@`"):
|
||||||
|
return f'{key}: "{s}"'
|
||||||
|
return f"{key}: {s}"
|
||||||
221
ops/pipeline-v2/lib/pre_screen.py
Normal file
221
ops/pipeline-v2/lib/pre_screen.py
Normal file
|
|
@ -0,0 +1,221 @@
|
||||||
|
"""Pre-screening: identify themes from source, fetch prior art from Qdrant.
|
||||||
|
|
||||||
|
Runs before extraction to show the extractor what the KB already knows.
|
||||||
|
Reduces near-duplicates (our #1 rejection cause) by turning semantic
|
||||||
|
pre-screening from a manual discipline into a pipeline feature.
|
||||||
|
|
||||||
|
Design: Leo (approved 2026-03-30). Owner: Epimetheus.
|
||||||
|
|
||||||
|
Flow:
|
||||||
|
1. Haiku identifies 3-5 themes from source text
|
||||||
|
2. Each theme + title (with author-stripped variant) → Tier 1 search
|
||||||
|
3. Results injected into extraction prompt as "Prior Art"
|
||||||
|
4. Extractor classifies extractions as NEW / ENRICHMENT / CHALLENGE
|
||||||
|
5. ENRICHMENT/CHALLENGE must cite specific target claim (hard gate)
|
||||||
|
|
||||||
|
Cost: ~$0.002/source (Haiku theme pass) + free Qdrant queries.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Search library (same Tier 1 path used by Argus + Telegram bot)
|
||||||
|
from pathlib import Path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from lib.search import search
|
||||||
|
|
||||||
|
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||||
|
THEME_MODEL = "anthropic/claude-haiku-4.5"
|
||||||
|
|
||||||
|
# Regex to strip leading author/entity patterns from titles
|
||||||
|
# e.g. "Shapiro: How Far Will AI Video Go" → "How Far Will AI Video Go"
|
||||||
|
# "Aschenbrenner — Situational Awareness" → "Situational Awareness"
|
||||||
|
# Prior art threshold — only show results above this score to the extractor.
|
||||||
|
# 0.50 catches mechanism-level matches where compound themes dilute embeddings.
|
||||||
|
# Was 0.65 but Haiku compound themes score 0.50-0.60 even on exact matches.
|
||||||
|
# False positives cost nothing (extractor sees irrelevant prior art, ignores it).
|
||||||
|
# False negatives cost wasted extraction + review + rejection.
|
||||||
|
PRIOR_ART_THRESHOLD = 0.50
|
||||||
|
|
||||||
|
AUTHOR_PREFIX_RE = re.compile(
|
||||||
|
r"^[A-Za-z\-']+(?:\s+[A-Za-z\-']+)?\s*[:–—\-]\s*", re.UNICODE
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def identify_themes(source_content: str, api_key: str, source_title: str = "") -> list[str]:
|
||||||
|
"""Use Haiku to identify 3-5 major themes from source text.
|
||||||
|
|
||||||
|
Returns a list of theme strings suitable as search queries.
|
||||||
|
Falls back to [source_title] on API failure.
|
||||||
|
"""
|
||||||
|
# Truncate source to keep Haiku costs minimal
|
||||||
|
snippet = source_content[:3000]
|
||||||
|
|
||||||
|
prompt = f"""Identify the 3-5 major themes or topics in this text.
|
||||||
|
Return ONLY a JSON array of short search queries (3-8 words each).
|
||||||
|
Keep queries SHORT — 3-5 words is ideal. Compound phrases score poorly in vector search.
|
||||||
|
|
||||||
|
Example good output: ["futarchy governance", "semaglutide kidney outcomes", "ICO oversubscription"]
|
||||||
|
Example bad output: ["futarchy governance mechanisms detecting revenue misrepresentation token launches", "prediction market accuracy identifying fraudulent financial claims"]
|
||||||
|
|
||||||
|
Text:
|
||||||
|
{snippet}
|
||||||
|
|
||||||
|
Return JSON array only, no explanation."""
|
||||||
|
|
||||||
|
try:
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"HTTP-Referer": "https://livingip.xyz",
|
||||||
|
"X-Title": "Teleo Pre-Screen",
|
||||||
|
}
|
||||||
|
payload = {
|
||||||
|
"model": THEME_MODEL,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"temperature": 0.1,
|
||||||
|
"max_tokens": 500,
|
||||||
|
}
|
||||||
|
resp = requests.post(OPENROUTER_URL, headers=headers, json=payload, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
content = resp.json()["choices"][0]["message"]["content"].strip()
|
||||||
|
|
||||||
|
# Strip markdown fencing if present
|
||||||
|
if content.startswith("```"):
|
||||||
|
content = re.sub(r"^```(?:json)?\s*\n?", "", content)
|
||||||
|
content = re.sub(r"\n?```\s*$", "", content)
|
||||||
|
|
||||||
|
themes = json.loads(content)
|
||||||
|
if isinstance(themes, list) and all(isinstance(t, str) for t in themes):
|
||||||
|
return themes[:5]
|
||||||
|
except Exception as e:
|
||||||
|
print(f" WARN: Theme identification failed: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Fallback: use title as the only theme
|
||||||
|
return [source_title] if source_title else []
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_author(title: str) -> str:
|
||||||
|
"""Strip leading author/entity prefix from a title.
|
||||||
|
|
||||||
|
"Shapiro: How Far Will AI Video Go" → "How Far Will AI Video Go"
|
||||||
|
"Noah Smith — AI and Jobs" → "AI and Jobs"
|
||||||
|
"""
|
||||||
|
stripped = AUTHOR_PREFIX_RE.sub("", title).strip()
|
||||||
|
# Only use stripped version if it's meaningfully different
|
||||||
|
if stripped and len(stripped) > 10 and stripped != title:
|
||||||
|
return stripped
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_title_from_source(source_content: str, source_file: str) -> str:
|
||||||
|
"""Get a usable title from source frontmatter or filename."""
|
||||||
|
# Try frontmatter title
|
||||||
|
match = re.search(r"^title:\s*[\"']?(.+?)[\"']?\s*$", source_content, re.MULTILINE)
|
||||||
|
if match:
|
||||||
|
return match.group(1).strip()
|
||||||
|
|
||||||
|
# Fall back to filename
|
||||||
|
basename = os.path.basename(source_file).replace(".md", "")
|
||||||
|
# Strip date prefix (e.g., "2026-03-15-article-name" → "article-name")
|
||||||
|
basename = re.sub(r"^\d{4}-\d{2}-\d{2}-", "", basename)
|
||||||
|
return basename.replace("-", " ")
|
||||||
|
|
||||||
|
|
||||||
|
def pre_screen(source_content: str, source_file: str, api_key: str,
|
||||||
|
domain: str | None = None) -> dict:
|
||||||
|
"""Run full pre-screening: themes → search → prior art.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
"themes": ["theme1", "theme2", ...],
|
||||||
|
"prior_art": [
|
||||||
|
{"claim_path": str, "title": str, "score": float, "query": str},
|
||||||
|
...
|
||||||
|
],
|
||||||
|
"search_queries": ["query1", "query2", ...], # for audit trail
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
title = _extract_title_from_source(source_content, source_file)
|
||||||
|
|
||||||
|
# Step 1: Identify themes
|
||||||
|
themes = identify_themes(source_content, api_key, source_title=title)
|
||||||
|
|
||||||
|
# Step 2: Build search queries (themes + title + author-stripped title)
|
||||||
|
queries = list(themes)
|
||||||
|
if title and title not in queries:
|
||||||
|
queries.append(title)
|
||||||
|
stripped = _strip_author(title)
|
||||||
|
if stripped and stripped not in queries:
|
||||||
|
queries.append(stripped)
|
||||||
|
|
||||||
|
# Step 3: Search Qdrant for each query (Tier 1: expand=False)
|
||||||
|
seen_paths: set[str] = set()
|
||||||
|
prior_art: list[dict] = []
|
||||||
|
|
||||||
|
for query in queries:
|
||||||
|
try:
|
||||||
|
results = search(query, expand=False, domain=None) # cross-domain on purpose
|
||||||
|
for hit in results.get("direct_results", []):
|
||||||
|
path = hit.get("claim_path", "")
|
||||||
|
if path and path not in seen_paths:
|
||||||
|
seen_paths.add(path)
|
||||||
|
prior_art.append({
|
||||||
|
"claim_path": path,
|
||||||
|
"title": hit.get("title", os.path.basename(path).replace(".md", "").replace("-", " ")),
|
||||||
|
"score": round(hit.get("score", 0), 3),
|
||||||
|
"query": query,
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
print(f" WARN: Pre-screen search failed for '{query[:50]}': {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Filter below threshold, sort by score descending, cap at 25
|
||||||
|
prior_art = [p for p in prior_art if p["score"] >= PRIOR_ART_THRESHOLD]
|
||||||
|
prior_art.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
prior_art = prior_art[:25]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"themes": themes,
|
||||||
|
"prior_art": prior_art,
|
||||||
|
"search_queries": queries,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def format_prior_art_for_prompt(prior_art: list[dict]) -> str:
|
||||||
|
"""Format prior art results for injection into the extraction prompt.
|
||||||
|
|
||||||
|
Leo's required format:
|
||||||
|
- [claim-slug](path) — similarity: 0.82 — query: "theme that matched"
|
||||||
|
"""
|
||||||
|
if not prior_art:
|
||||||
|
return "No similar claims found in the KB. This source likely covers novel territory."
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for item in prior_art:
|
||||||
|
slug = os.path.basename(item["claim_path"]).replace(".md", "")
|
||||||
|
lines.append(
|
||||||
|
f"- [{slug}]({item['claim_path']}) — similarity: {item['score']:.2f} — query: \"{item['query'][:60]}\""
|
||||||
|
)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def format_prior_art_for_pr(prior_art: list[dict]) -> str:
|
||||||
|
"""Format prior art for PR body (structured, reviewable by Leo).
|
||||||
|
|
||||||
|
Shows similarity score + which query matched for verification.
|
||||||
|
"""
|
||||||
|
if not prior_art:
|
||||||
|
return "No prior art found — source covers novel territory.\n"
|
||||||
|
|
||||||
|
lines = ["## Prior Art (automated pre-screening)\n"]
|
||||||
|
for item in prior_art:
|
||||||
|
slug = os.path.basename(item["claim_path"]).replace(".md", "")
|
||||||
|
lines.append(
|
||||||
|
f"- [{slug}]({item['claim_path']}) — similarity: {item['score']:.2f} — matched query: \"{item['query'][:80]}\""
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
return "\n".join(lines)
|
||||||
480
ops/pipeline-v2/lib/search.py
Normal file
480
ops/pipeline-v2/lib/search.py
Normal file
|
|
@ -0,0 +1,480 @@
|
||||||
|
"""Shared Qdrant vector search library for the Teleo knowledge base.
|
||||||
|
|
||||||
|
Provides embed + search + graph expansion as a reusable library.
|
||||||
|
Any consumer (Argus dashboard, Telegram bot, agent research) imports from here.
|
||||||
|
|
||||||
|
Layer 1: Qdrant vector search (semantic similarity)
|
||||||
|
Layer 2: Graph expansion (1-hop via frontmatter edges)
|
||||||
|
Layer 3: Left to the caller (agent context, domain filtering)
|
||||||
|
|
||||||
|
Owner: Epimetheus
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.search")
|
||||||
|
|
||||||
|
# --- Config (all from environment or config.py defaults) ---
|
||||||
|
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
|
||||||
|
QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "teleo-claims")
|
||||||
|
EMBEDDING_MODEL = "text-embedding-3-small"
|
||||||
|
|
||||||
|
_OPENROUTER_KEY: str | None = None
|
||||||
|
|
||||||
|
WIKI_LINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
|
||||||
|
|
||||||
|
# Structural files that should never be included in graph expansion results.
|
||||||
|
# These are indexes/MOCs, not claims — expanding them pulls entire domains.
|
||||||
|
STRUCTURAL_FILES = {"_map.md", "_overview.md"}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_api_key() -> str | None:
|
||||||
|
"""Load OpenRouter API key (cached after first read)."""
|
||||||
|
global _OPENROUTER_KEY
|
||||||
|
if _OPENROUTER_KEY:
|
||||||
|
return _OPENROUTER_KEY
|
||||||
|
key_file = config.SECRETS_DIR / "openrouter-key"
|
||||||
|
if key_file.exists():
|
||||||
|
_OPENROUTER_KEY = key_file.read_text().strip()
|
||||||
|
return _OPENROUTER_KEY
|
||||||
|
_OPENROUTER_KEY = os.environ.get("OPENROUTER_API_KEY")
|
||||||
|
return _OPENROUTER_KEY
|
||||||
|
|
||||||
|
|
||||||
|
# --- Layer 1: Vector search ---
|
||||||
|
|
||||||
|
|
||||||
|
def embed_query(text: str) -> list[float] | None:
|
||||||
|
"""Embed a query string via OpenRouter (OpenAI-compatible endpoint).
|
||||||
|
|
||||||
|
Returns 1536-dim vector or None on failure.
|
||||||
|
"""
|
||||||
|
api_key = _get_api_key()
|
||||||
|
if not api_key:
|
||||||
|
logger.error("No OpenRouter API key available for embedding")
|
||||||
|
return None
|
||||||
|
|
||||||
|
payload = json.dumps({
|
||||||
|
"model": f"openai/{EMBEDDING_MODEL}",
|
||||||
|
"input": text[:8000],
|
||||||
|
}).encode()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
"https://openrouter.ai/api/v1/embeddings",
|
||||||
|
data=payload,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
return data["data"][0]["embedding"]
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Embedding failed: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def search_qdrant(vector: list[float], limit: int = 10,
|
||||||
|
domain: str | None = None, confidence: str | None = None,
|
||||||
|
exclude: list[str] | None = None,
|
||||||
|
score_threshold: float = 0.3,
|
||||||
|
offset: int = 0) -> list[dict]:
|
||||||
|
"""Search Qdrant collection for nearest claims.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
offset: Skip first N results (Qdrant native offset for pagination).
|
||||||
|
|
||||||
|
Returns list of hits: [{id, score, payload: {claim_path, claim_title, ...}}]
|
||||||
|
"""
|
||||||
|
must_filters = []
|
||||||
|
if domain:
|
||||||
|
must_filters.append({"key": "domain", "match": {"value": domain}})
|
||||||
|
if confidence:
|
||||||
|
must_filters.append({"key": "confidence", "match": {"value": confidence}})
|
||||||
|
|
||||||
|
must_not_filters = []
|
||||||
|
if exclude:
|
||||||
|
for path in exclude:
|
||||||
|
must_not_filters.append({"key": "claim_path", "match": {"value": path}})
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"vector": vector,
|
||||||
|
"limit": limit,
|
||||||
|
"with_payload": True,
|
||||||
|
"score_threshold": score_threshold,
|
||||||
|
}
|
||||||
|
if offset > 0:
|
||||||
|
body["offset"] = offset
|
||||||
|
if must_filters or must_not_filters:
|
||||||
|
body["filter"] = {}
|
||||||
|
if must_filters:
|
||||||
|
body["filter"]["must"] = must_filters
|
||||||
|
if must_not_filters:
|
||||||
|
body["filter"]["must_not"] = must_not_filters
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points/search",
|
||||||
|
data=json.dumps(body).encode(),
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
return data.get("result", [])
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Qdrant search failed: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
# --- Layer 2: Graph expansion ---
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_frontmatter_edges(path: Path) -> dict:
|
||||||
|
"""Extract relationship edges from a claim's frontmatter.
|
||||||
|
|
||||||
|
Handles both YAML formats:
|
||||||
|
depends_on: ["item1", "item2"] (inline list)
|
||||||
|
depends_on: (multi-line list)
|
||||||
|
- item1
|
||||||
|
- item2
|
||||||
|
|
||||||
|
Returns {supports: [...], challenges: [...], depends_on: [...], related: [...], wiki_links: [...]}.
|
||||||
|
wiki_links are separated from explicit related edges for differential weighting.
|
||||||
|
"""
|
||||||
|
edges = {"supports": [], "challenges": [], "depends_on": [], "related": [], "wiki_links": []}
|
||||||
|
try:
|
||||||
|
text = path.read_text(errors="replace")
|
||||||
|
except Exception:
|
||||||
|
return edges
|
||||||
|
|
||||||
|
if not text.startswith("---"):
|
||||||
|
return edges
|
||||||
|
end = text.find("\n---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return edges
|
||||||
|
|
||||||
|
fm_text = text[3:end]
|
||||||
|
|
||||||
|
# Use YAML parser for reliable edge extraction
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
fm = yaml.safe_load(fm_text)
|
||||||
|
if isinstance(fm, dict):
|
||||||
|
for field in ("supports", "challenges", "depends_on", "related"):
|
||||||
|
val = fm.get(field)
|
||||||
|
if isinstance(val, list):
|
||||||
|
edges[field] = [str(v).strip() for v in val if v]
|
||||||
|
elif isinstance(val, str) and val.strip():
|
||||||
|
edges[field] = [val.strip()]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Extract wiki links from body as separate edge type (lower weight)
|
||||||
|
body = text[end + 4:]
|
||||||
|
all_explicit = set()
|
||||||
|
for field in ("supports", "challenges", "depends_on", "related"):
|
||||||
|
all_explicit.update(edges[field])
|
||||||
|
|
||||||
|
wiki_links = WIKI_LINK_RE.findall(body)
|
||||||
|
for link in wiki_links:
|
||||||
|
link = link.strip()
|
||||||
|
if link and link not in all_explicit and link not in edges["wiki_links"]:
|
||||||
|
edges["wiki_links"].append(link)
|
||||||
|
|
||||||
|
return edges
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_claim_path(name: str, repo_root: Path) -> Path | None:
|
||||||
|
"""Resolve a claim name (from frontmatter edge or wiki link) to a file path.
|
||||||
|
|
||||||
|
Handles both naming conventions:
|
||||||
|
- "GLP-1 receptor agonists are..." → "GLP-1 receptor agonists are....md" (spaces)
|
||||||
|
- "glp-1-persistence-drops..." → "glp-1-persistence-drops....md" (slugified)
|
||||||
|
|
||||||
|
Checks domains/, core/, foundations/, decisions/ subdirectories.
|
||||||
|
"""
|
||||||
|
# Try exact name first (spaces in filename), then slugified
|
||||||
|
candidates = [name]
|
||||||
|
slug = name.lower().replace(" ", "-").replace("_", "-")
|
||||||
|
if slug != name:
|
||||||
|
candidates.append(slug)
|
||||||
|
|
||||||
|
for subdir in ["domains", "core", "foundations", "decisions"]:
|
||||||
|
base = repo_root / subdir
|
||||||
|
if not base.is_dir():
|
||||||
|
continue
|
||||||
|
for candidate_name in candidates:
|
||||||
|
for md in base.rglob(f"{candidate_name}.md"):
|
||||||
|
return md
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def graph_expand(seed_paths: list[str], repo_root: Path | None = None,
|
||||||
|
max_expanded: int = 30,
|
||||||
|
challenge_weight: float = 1.5,
|
||||||
|
seen: set[str] | None = None) -> list[dict]:
|
||||||
|
"""Layer 2: Expand seed claims 1-hop through knowledge graph edges.
|
||||||
|
|
||||||
|
Traverses supports/challenges/depends_on/related/wiki_links edges in frontmatter.
|
||||||
|
Edge weights: challenges 1.5x, depends_on 1.25x, supports/related 1.0x, wiki_links 0.5x.
|
||||||
|
Results sorted by weight descending so cap cuts low-value edges first.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
seen: Optional set of paths already matched (e.g. from keyword search) to exclude.
|
||||||
|
|
||||||
|
Returns list of {claim_path, claim_title, edge_type, edge_weight, from_claim}.
|
||||||
|
Excludes claims already in seed_paths or seen set.
|
||||||
|
"""
|
||||||
|
EDGE_WEIGHTS = {
|
||||||
|
"challenges": 1.5,
|
||||||
|
"challenged_by": 1.5,
|
||||||
|
"depends_on": 1.25,
|
||||||
|
"supports": 1.0,
|
||||||
|
"related": 1.0,
|
||||||
|
"wiki_links": 0.5,
|
||||||
|
}
|
||||||
|
|
||||||
|
root = repo_root or config.MAIN_WORKTREE
|
||||||
|
all_expanded = []
|
||||||
|
visited = set(seed_paths)
|
||||||
|
if seen:
|
||||||
|
visited.update(seen)
|
||||||
|
|
||||||
|
for seed_path in seed_paths:
|
||||||
|
full_path = root / seed_path
|
||||||
|
if not full_path.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
edges = _parse_frontmatter_edges(full_path)
|
||||||
|
|
||||||
|
for edge_type, targets in edges.items():
|
||||||
|
weight = EDGE_WEIGHTS.get(edge_type, 1.0)
|
||||||
|
|
||||||
|
for target_name in targets:
|
||||||
|
target_path = _resolve_claim_path(target_name, root)
|
||||||
|
if target_path is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
rel_path = str(target_path.relative_to(root))
|
||||||
|
if rel_path in visited:
|
||||||
|
continue
|
||||||
|
# Skip structural files (MOCs/indexes) — they pull entire domains
|
||||||
|
if target_path.name in STRUCTURAL_FILES:
|
||||||
|
continue
|
||||||
|
visited.add(rel_path)
|
||||||
|
|
||||||
|
# Read title from frontmatter
|
||||||
|
title = target_name
|
||||||
|
try:
|
||||||
|
text = target_path.read_text(errors="replace")
|
||||||
|
if text.startswith("---"):
|
||||||
|
end = text.find("\n---", 3)
|
||||||
|
if end > 0:
|
||||||
|
import yaml
|
||||||
|
fm = yaml.safe_load(text[3:end])
|
||||||
|
if isinstance(fm, dict):
|
||||||
|
title = fm.get("name", fm.get("title", target_name))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
all_expanded.append({
|
||||||
|
"claim_path": rel_path,
|
||||||
|
"claim_title": str(title),
|
||||||
|
"edge_type": edge_type,
|
||||||
|
"edge_weight": weight,
|
||||||
|
"from_claim": seed_path,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort by weight descending so cap cuts lowest-value edges first
|
||||||
|
all_expanded.sort(key=lambda x: x["edge_weight"], reverse=True)
|
||||||
|
return all_expanded[:max_expanded]
|
||||||
|
|
||||||
|
|
||||||
|
# --- Combined search (Layer 1 + Layer 2) ---
|
||||||
|
|
||||||
|
# Default thresholds — lowered Apr 5 after production audit showed 0 vector hits.
|
||||||
|
# text-embedding-3-small scores 0.50-0.60 on conceptual matches (e.g. "risks in
|
||||||
|
# investing" vs specific claims). 0.70 rejected every result. 0.50/0.40 lets
|
||||||
|
# relevant claims through while still filtering noise.
|
||||||
|
PASS1_LIMIT = 5
|
||||||
|
PASS1_THRESHOLD = 0.50
|
||||||
|
PASS2_LIMIT = 5
|
||||||
|
PASS2_THRESHOLD = 0.40
|
||||||
|
HARD_CAP = 10
|
||||||
|
|
||||||
|
|
||||||
|
def _dedup_hits(hits: list[dict], seen: set[str]) -> list[dict]:
|
||||||
|
"""Filter Qdrant hits: dedup by claim_path, exclude structural files."""
|
||||||
|
results = []
|
||||||
|
for hit in hits:
|
||||||
|
payload = hit.get("payload", {})
|
||||||
|
claim_path = payload.get("claim_path", "")
|
||||||
|
if claim_path in seen:
|
||||||
|
continue
|
||||||
|
if claim_path.split("/")[-1] in STRUCTURAL_FILES:
|
||||||
|
continue
|
||||||
|
seen.add(claim_path)
|
||||||
|
results.append({
|
||||||
|
"claim_title": payload.get("claim_title", ""),
|
||||||
|
"claim_path": claim_path,
|
||||||
|
"score": round(hit.get("score", 0), 4),
|
||||||
|
"domain": payload.get("domain", ""),
|
||||||
|
"confidence": payload.get("confidence", ""),
|
||||||
|
"snippet": payload.get("snippet", "")[:200],
|
||||||
|
"type": payload.get("type", "claim"),
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _sort_results(direct: list[dict], expanded: list[dict]) -> list[dict]:
|
||||||
|
"""Sort combined results: similarity desc → challenged_by → other expansion.
|
||||||
|
|
||||||
|
Sort order is load-bearing: LLMs have primacy bias, so best claims first.
|
||||||
|
"""
|
||||||
|
# Direct results already sorted by Qdrant (cosine desc)
|
||||||
|
sorted_direct = sorted(direct, key=lambda x: x.get("score", 0), reverse=True)
|
||||||
|
|
||||||
|
# Expansion: challenged_by first (counterpoints), then rest by weight
|
||||||
|
challenged = [e for e in expanded if e.get("edge_type") == "challenges"]
|
||||||
|
other_expanded = [e for e in expanded if e.get("edge_type") != "challenges"]
|
||||||
|
challenged.sort(key=lambda x: x.get("edge_weight", 0), reverse=True)
|
||||||
|
other_expanded.sort(key=lambda x: x.get("edge_weight", 0), reverse=True)
|
||||||
|
|
||||||
|
return sorted_direct + challenged + other_expanded
|
||||||
|
|
||||||
|
|
||||||
|
def search(query: str, expand: bool = False,
|
||||||
|
domain: str | None = None, confidence: str | None = None,
|
||||||
|
exclude: list[str] | None = None) -> dict:
|
||||||
|
"""Two-pass semantic search: embed query, search Qdrant, optionally expand.
|
||||||
|
|
||||||
|
Pass 1 (expand=False, default): Top 5 claims from Qdrant, score >= 0.70.
|
||||||
|
Sufficient for ~80% of queries. Fast and focused.
|
||||||
|
|
||||||
|
Pass 2 (expand=True): Next 5 claims (offset=5, score >= 0.60) plus
|
||||||
|
graph-expanded claims (challenged_by, related edges). Hard cap 10 total.
|
||||||
|
Agent calls this only when pass 1 didn't answer the question.
|
||||||
|
|
||||||
|
Returns {
|
||||||
|
"query": str,
|
||||||
|
"direct_results": [...], # Layer 1 Qdrant hits (sorted by score desc)
|
||||||
|
"expanded_results": [...], # Layer 2 graph expansion (challenges first)
|
||||||
|
"total": int,
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
vector = embed_query(query)
|
||||||
|
if vector is None:
|
||||||
|
return {"query": query, "direct_results": [], "expanded_results": [],
|
||||||
|
"total": 0, "error": "embedding_failed"}
|
||||||
|
|
||||||
|
# --- Pass 1: Top 5, high threshold ---
|
||||||
|
hits = search_qdrant(vector, limit=PASS1_LIMIT, domain=domain,
|
||||||
|
confidence=confidence, exclude=exclude,
|
||||||
|
score_threshold=PASS1_THRESHOLD)
|
||||||
|
|
||||||
|
seen_paths: set[str] = set()
|
||||||
|
if exclude:
|
||||||
|
seen_paths.update(exclude)
|
||||||
|
direct = _dedup_hits(hits, seen_paths)
|
||||||
|
|
||||||
|
expanded = []
|
||||||
|
if expand:
|
||||||
|
# --- Pass 2: Next 5 from Qdrant (lower threshold, offset) ---
|
||||||
|
pass2_hits = search_qdrant(vector, limit=PASS2_LIMIT, domain=domain,
|
||||||
|
confidence=confidence, exclude=exclude,
|
||||||
|
score_threshold=PASS2_THRESHOLD,
|
||||||
|
offset=PASS1_LIMIT)
|
||||||
|
pass2_direct = _dedup_hits(pass2_hits, seen_paths)
|
||||||
|
direct.extend(pass2_direct)
|
||||||
|
|
||||||
|
# Graph expansion on all direct results (pass 1 + pass 2 seeds)
|
||||||
|
seed_paths = [r["claim_path"] for r in direct]
|
||||||
|
remaining_cap = HARD_CAP - len(direct)
|
||||||
|
if remaining_cap > 0:
|
||||||
|
expanded = graph_expand(seed_paths, max_expanded=remaining_cap,
|
||||||
|
seen=seen_paths)
|
||||||
|
|
||||||
|
# Enforce hard cap across all results
|
||||||
|
all_sorted = _sort_results(direct, expanded)[:HARD_CAP]
|
||||||
|
|
||||||
|
# Split back into direct vs expanded for backward compat
|
||||||
|
direct_paths = {r["claim_path"] for r in direct}
|
||||||
|
final_direct = [r for r in all_sorted if r.get("claim_path") in direct_paths]
|
||||||
|
final_expanded = [r for r in all_sorted if r.get("claim_path") not in direct_paths]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"query": query,
|
||||||
|
"direct_results": final_direct,
|
||||||
|
"expanded_results": final_expanded,
|
||||||
|
"total": len(all_sorted),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# --- Duplicate detection ---
|
||||||
|
|
||||||
|
|
||||||
|
def check_duplicate(text: str, threshold: float = 0.85,
|
||||||
|
domain: str | None = None) -> dict:
|
||||||
|
"""Check if a claim/text is a near-duplicate of existing KB content.
|
||||||
|
|
||||||
|
Embeds the text, searches Qdrant, returns top-3 matches with scores.
|
||||||
|
Thresholds: >=0.85 likely duplicate, 0.70-0.85 check manually, <0.70 novel.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The claim text to check.
|
||||||
|
threshold: Minimum score to flag as potential duplicate (default 0.85).
|
||||||
|
domain: Optional domain filter.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
"query": str,
|
||||||
|
"is_duplicate": bool, # True if any match >= threshold
|
||||||
|
"highest_score": float, # Best match score
|
||||||
|
"verdict": str, # "duplicate" | "check_manually" | "novel"
|
||||||
|
"matches": [ # Top 3 matches
|
||||||
|
{"score": float, "claim_path": str, "claim_title": str, "domain": str}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
vector = embed_query(text)
|
||||||
|
if vector is None:
|
||||||
|
return {"query": text[:100], "is_duplicate": False, "highest_score": 0,
|
||||||
|
"verdict": "error", "matches": [], "error": "embedding_failed"}
|
||||||
|
|
||||||
|
hits = search_qdrant(vector, limit=3, domain=domain, score_threshold=0.3)
|
||||||
|
|
||||||
|
matches = []
|
||||||
|
for hit in hits:
|
||||||
|
payload = hit.get("payload", {})
|
||||||
|
matches.append({
|
||||||
|
"score": round(hit.get("score", 0), 4),
|
||||||
|
"claim_path": payload.get("claim_path", ""),
|
||||||
|
"claim_title": payload.get("claim_title", ""),
|
||||||
|
"domain": payload.get("domain", ""),
|
||||||
|
})
|
||||||
|
|
||||||
|
highest = matches[0]["score"] if matches else 0.0
|
||||||
|
|
||||||
|
if highest >= threshold:
|
||||||
|
verdict = "duplicate"
|
||||||
|
elif highest >= 0.70:
|
||||||
|
verdict = "check_manually"
|
||||||
|
else:
|
||||||
|
verdict = "novel"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"query": text[:100],
|
||||||
|
"is_duplicate": highest >= threshold,
|
||||||
|
"highest_score": highest,
|
||||||
|
"verdict": verdict,
|
||||||
|
"matches": matches,
|
||||||
|
}
|
||||||
601
ops/pipeline-v2/lib/substantive_fixer.py
Normal file
601
ops/pipeline-v2/lib/substantive_fixer.py
Normal file
|
|
@ -0,0 +1,601 @@
|
||||||
|
"""Substantive fixer — acts on reviewer feedback for non-mechanical issues.
|
||||||
|
|
||||||
|
When Leo or a domain agent requests changes with substantive issues
|
||||||
|
(confidence_miscalibration, title_overclaims, scope_error, near_duplicate),
|
||||||
|
this module reads the claim + reviewer comment + original source material,
|
||||||
|
sends to an LLM, pushes the fix, and resets eval.
|
||||||
|
|
||||||
|
Issue routing:
|
||||||
|
FIXABLE (confidence, title, scope) → LLM edits the claim
|
||||||
|
CONVERTIBLE (near_duplicate) → flag for Leo to pick target, then convert
|
||||||
|
UNFIXABLE (factual_discrepancy) → close PR, re-extract with feedback
|
||||||
|
DROPPABLE (low-value, reviewer explicitly closed) → close PR
|
||||||
|
|
||||||
|
Design reviewed by Ganymede (architecture), Rhea (ops), Leo (quality).
|
||||||
|
Epimetheus owns this module. Leo reviews changes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from . import config, db
|
||||||
|
from .forgejo import api as forgejo_api, get_agent_token, get_pr_diff, repo_path
|
||||||
|
from .llm import openrouter_call
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.substantive_fixer")
|
||||||
|
|
||||||
|
# Issue type routing
|
||||||
|
FIXABLE_TAGS = {"confidence_miscalibration", "title_overclaims", "scope_error", "frontmatter_schema"}
|
||||||
|
CONVERTIBLE_TAGS = {"near_duplicate"}
|
||||||
|
UNFIXABLE_TAGS = {"factual_discrepancy"}
|
||||||
|
|
||||||
|
# Max substantive fix attempts per PR (Rhea: prevent infinite loops)
|
||||||
|
MAX_SUBSTANTIVE_FIXES = 2
|
||||||
|
|
||||||
|
# Model for fixes — Gemini Flash: cheap ($0.001/fix), different family from Sonnet reviewer
|
||||||
|
FIX_MODEL = config.MODEL_GEMINI_FLASH
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Fix prompt ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _build_fix_prompt(
|
||||||
|
claim_content: str,
|
||||||
|
review_comment: str,
|
||||||
|
issue_tags: list[str],
|
||||||
|
source_content: str | None,
|
||||||
|
domain_index: str | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""Build the targeted fix prompt.
|
||||||
|
|
||||||
|
Includes claim + reviewer feedback + source material.
|
||||||
|
Does NOT re-extract — makes targeted edits based on specific feedback.
|
||||||
|
"""
|
||||||
|
source_section = ""
|
||||||
|
if source_content:
|
||||||
|
# Truncate source to keep prompt manageable
|
||||||
|
source_section = f"""
|
||||||
|
## Original Source Material
|
||||||
|
{source_content[:8000]}
|
||||||
|
"""
|
||||||
|
|
||||||
|
index_section = ""
|
||||||
|
if domain_index and "near_duplicate" in issue_tags:
|
||||||
|
index_section = f"""
|
||||||
|
## Existing Claims in Domain (for near-duplicate resolution)
|
||||||
|
{domain_index[:4000]}
|
||||||
|
"""
|
||||||
|
|
||||||
|
issue_descriptions = []
|
||||||
|
for tag in issue_tags:
|
||||||
|
if tag == "confidence_miscalibration":
|
||||||
|
issue_descriptions.append("CONFIDENCE: Reviewer says the confidence level doesn't match the evidence.")
|
||||||
|
elif tag == "title_overclaims":
|
||||||
|
issue_descriptions.append("TITLE: Reviewer says the title asserts more than the evidence supports.")
|
||||||
|
elif tag == "scope_error":
|
||||||
|
issue_descriptions.append("SCOPE: Reviewer says the claim needs explicit scope qualification.")
|
||||||
|
elif tag == "near_duplicate":
|
||||||
|
issue_descriptions.append("DUPLICATE: Reviewer says this substantially duplicates an existing claim.")
|
||||||
|
|
||||||
|
return f"""You are fixing a knowledge base claim based on reviewer feedback. Make targeted edits — do NOT rewrite from scratch.
|
||||||
|
|
||||||
|
## The Claim (current version)
|
||||||
|
{claim_content}
|
||||||
|
|
||||||
|
## Reviewer Feedback
|
||||||
|
{review_comment}
|
||||||
|
|
||||||
|
## Issues to Fix
|
||||||
|
{chr(10).join(issue_descriptions)}
|
||||||
|
|
||||||
|
{source_section}
|
||||||
|
{index_section}
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
|
||||||
|
1. **Implement the reviewer's explicit instructions.** If the reviewer says "change confidence to experimental," do that. If the reviewer says "confidence seems high" without a specific target, set it to one level below current.
|
||||||
|
2. **For title_overclaims:** Scope the title down to match evidence. Add qualifiers. Keep the mechanism but bound the claim.
|
||||||
|
3. **For scope_error:** Add explicit scope (structural/functional/causal/correlational) to the title. Add scoping language to the body.
|
||||||
|
4. **For near_duplicate:** Do NOT fix. Instead, identify the top 3 most similar existing claims from the domain index and output them in your response. The reviewer will pick the target.
|
||||||
|
5. **Preserve the claim's core argument.** You're adjusting precision, not changing what the claim says.
|
||||||
|
6. **Keep all frontmatter fields.** Do not remove or rename fields. Only modify the values the reviewer flagged.
|
||||||
|
|
||||||
|
## Output
|
||||||
|
|
||||||
|
For FIXABLE issues (confidence, title, scope):
|
||||||
|
Return the complete fixed claim file content (full markdown with frontmatter).
|
||||||
|
|
||||||
|
For near_duplicate:
|
||||||
|
Return JSON:
|
||||||
|
```json
|
||||||
|
{{"action": "flag_duplicate", "candidates": ["existing-claim-1.md", "existing-claim-2.md", "existing-claim-3.md"], "reasoning": "Why each candidate matches"}}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Git helpers ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def _git(*args, cwd: str = None, timeout: int = 60) -> tuple[int, str]:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"git", *args,
|
||||||
|
cwd=cwd or str(config.REPO_DIR),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
return -1, f"git {args[0]} timed out"
|
||||||
|
output = (stdout or b"").decode().strip()
|
||||||
|
if stderr:
|
||||||
|
output += "\n" + stderr.decode().strip()
|
||||||
|
return proc.returncode, output
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Source and review retrieval ───────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _read_source_content(source_path: str) -> str | None:
|
||||||
|
"""Read source archive from main worktree."""
|
||||||
|
if not source_path:
|
||||||
|
return None
|
||||||
|
full_path = config.MAIN_WORKTREE / source_path
|
||||||
|
try:
|
||||||
|
return full_path.read_text()
|
||||||
|
except (FileNotFoundError, PermissionError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_review_comments(pr_number: int) -> str:
|
||||||
|
"""Get all review comments for a PR, concatenated."""
|
||||||
|
comments = []
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
result = await forgejo_api(
|
||||||
|
"GET",
|
||||||
|
repo_path(f"issues/{pr_number}/comments?limit=50&page={page}"),
|
||||||
|
)
|
||||||
|
if not result:
|
||||||
|
break
|
||||||
|
for c in result:
|
||||||
|
body = c.get("body", "")
|
||||||
|
# Skip tier0 validation comments and pipeline ack comments
|
||||||
|
if "TIER0-VALIDATION" in body or "queued for evaluation" in body:
|
||||||
|
continue
|
||||||
|
if "VERDICT:" in body or "REJECTION:" in body:
|
||||||
|
comments.append(body)
|
||||||
|
if len(result) < 50:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
return "\n\n---\n\n".join(comments)
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_claim_files_from_pr(pr_number: int) -> dict[str, str]:
|
||||||
|
"""Get claim file contents from a PR's diff."""
|
||||||
|
diff = await get_pr_diff(pr_number)
|
||||||
|
if not diff:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
from .validate import extract_claim_files_from_diff
|
||||||
|
return extract_claim_files_from_diff(diff)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_domain_index(domain: str) -> str | None:
|
||||||
|
"""Get domain-filtered KB index for near-duplicate resolution."""
|
||||||
|
index_file = f"/tmp/kb-indexes/{domain}.txt"
|
||||||
|
if os.path.exists(index_file):
|
||||||
|
return Path(index_file).read_text()
|
||||||
|
# Fallback: list domain claim files
|
||||||
|
domain_dir = config.MAIN_WORKTREE / "domains" / domain
|
||||||
|
if not domain_dir.is_dir():
|
||||||
|
return None
|
||||||
|
lines = []
|
||||||
|
for f in sorted(domain_dir.glob("*.md")):
|
||||||
|
if not f.name.startswith("_"):
|
||||||
|
lines.append(f"- {f.name}: {f.stem.replace('-', ' ')}")
|
||||||
|
return "\n".join(lines[:150]) if lines else None
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Issue classification ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_substantive(issues: list[str]) -> str:
|
||||||
|
"""Classify issue list as fixable/convertible/unfixable/droppable."""
|
||||||
|
issue_set = set(issues)
|
||||||
|
if issue_set & UNFIXABLE_TAGS:
|
||||||
|
return "unfixable"
|
||||||
|
if issue_set & CONVERTIBLE_TAGS and not (issue_set & FIXABLE_TAGS):
|
||||||
|
return "convertible"
|
||||||
|
if issue_set & FIXABLE_TAGS:
|
||||||
|
return "fixable"
|
||||||
|
return "droppable"
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Fix execution ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def _fix_pr(conn, pr_number: int) -> dict:
|
||||||
|
"""Attempt a substantive fix on a single PR. Returns result dict."""
|
||||||
|
# Atomic claim
|
||||||
|
cursor = conn.execute(
|
||||||
|
"UPDATE prs SET status = 'fixing', last_attempt = datetime('now') WHERE number = ? AND status = 'open'",
|
||||||
|
(pr_number,),
|
||||||
|
)
|
||||||
|
if cursor.rowcount == 0:
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "not_open"}
|
||||||
|
|
||||||
|
# Increment fix attempts
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE prs SET fix_attempts = COALESCE(fix_attempts, 0) + 1 WHERE number = ?",
|
||||||
|
(pr_number,),
|
||||||
|
)
|
||||||
|
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT branch, source_path, domain, eval_issues, fix_attempts FROM prs WHERE number = ?",
|
||||||
|
(pr_number,),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
branch = row["branch"]
|
||||||
|
source_path = row["source_path"]
|
||||||
|
domain = row["domain"]
|
||||||
|
fix_attempts = row["fix_attempts"] or 0
|
||||||
|
|
||||||
|
# Parse issue tags
|
||||||
|
try:
|
||||||
|
issues = json.loads(row["eval_issues"] or "[]")
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
issues = []
|
||||||
|
|
||||||
|
# Check fix budget
|
||||||
|
if fix_attempts > MAX_SUBSTANTIVE_FIXES:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "fix_budget_exhausted"}
|
||||||
|
|
||||||
|
# Classify
|
||||||
|
classification = _classify_substantive(issues)
|
||||||
|
|
||||||
|
if classification == "unfixable":
|
||||||
|
# Close and re-extract
|
||||||
|
logger.info("PR #%d: unfixable (%s) — closing, source re-queued", pr_number, issues)
|
||||||
|
await _close_and_reextract(conn, pr_number, issues)
|
||||||
|
return {"pr": pr_number, "action": "closed_reextract", "issues": issues}
|
||||||
|
|
||||||
|
if classification == "droppable":
|
||||||
|
logger.info("PR #%d: droppable (%s) — closing", pr_number, issues)
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE prs SET status = 'closed', last_error = ? WHERE number = ?",
|
||||||
|
(f"droppable: {issues}", pr_number),
|
||||||
|
)
|
||||||
|
return {"pr": pr_number, "action": "closed_droppable", "issues": issues}
|
||||||
|
|
||||||
|
# Refresh main worktree for source read (Ganymede: ensure freshness)
|
||||||
|
await _git("fetch", "origin", "main", cwd=str(config.MAIN_WORKTREE))
|
||||||
|
await _git("reset", "--hard", "origin/main", cwd=str(config.MAIN_WORKTREE))
|
||||||
|
|
||||||
|
# Gather context
|
||||||
|
review_text = await _get_review_comments(pr_number)
|
||||||
|
claim_files = await _get_claim_files_from_pr(pr_number)
|
||||||
|
source_content = _read_source_content(source_path)
|
||||||
|
domain_index = _get_domain_index(domain) if "near_duplicate" in issues else None
|
||||||
|
|
||||||
|
if not claim_files:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "no_claim_files"}
|
||||||
|
|
||||||
|
if not review_text:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "no_review_comments"}
|
||||||
|
|
||||||
|
if classification == "convertible":
|
||||||
|
# Near-duplicate: auto-convert to enrichment if high-confidence match (>= 0.90).
|
||||||
|
# Below threshold: flag for Leo. (Leo approved: "evidence loss > wrong target risk")
|
||||||
|
result = await _auto_convert_near_duplicate(
|
||||||
|
conn, pr_number, claim_files, domain,
|
||||||
|
)
|
||||||
|
if result.get("converted"):
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE prs SET status = 'closed', last_error = ? WHERE number = ?",
|
||||||
|
(f"auto-enriched: {result['target_claim']} (sim={result['similarity']:.2f})", pr_number),
|
||||||
|
)
|
||||||
|
await forgejo_api("PATCH", repo_path(f"pulls/{pr_number}"), {"state": "closed"})
|
||||||
|
await forgejo_api("POST", repo_path(f"issues/{pr_number}/comments"), {
|
||||||
|
"body": (
|
||||||
|
f"**Auto-converted:** Evidence from this PR enriched "
|
||||||
|
f"`{result['target_claim']}` (similarity: {result['similarity']:.2f}).\n\n"
|
||||||
|
f"Leo: review if wrong target. Enrichment labeled "
|
||||||
|
f"`### Auto-enrichment (near-duplicate conversion)` in the target file."
|
||||||
|
),
|
||||||
|
})
|
||||||
|
db.audit(conn, "substantive_fixer", "auto_enrichment", json.dumps({
|
||||||
|
"pr": pr_number, "target_claim": result["target_claim"],
|
||||||
|
"similarity": round(result["similarity"], 3), "domain": domain,
|
||||||
|
}))
|
||||||
|
logger.info("PR #%d: auto-enriched on %s (sim=%.2f)",
|
||||||
|
pr_number, result["target_claim"], result["similarity"])
|
||||||
|
return {"pr": pr_number, "action": "auto_enriched", "target": result["target_claim"]}
|
||||||
|
else:
|
||||||
|
# Below 0.90 threshold — flag for Leo
|
||||||
|
logger.info("PR #%d: near_duplicate, best match %.2f < 0.90 — flagging Leo",
|
||||||
|
pr_number, result.get("best_similarity", 0))
|
||||||
|
await _flag_for_leo_review(conn, pr_number, claim_files, review_text, domain_index)
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "action": "flagged_duplicate", "issues": issues}
|
||||||
|
|
||||||
|
# FIXABLE: send to LLM
|
||||||
|
# Fix each claim file individually
|
||||||
|
fixed_any = False
|
||||||
|
for filepath, content in claim_files.items():
|
||||||
|
prompt = _build_fix_prompt(content, review_text, issues, source_content, domain_index)
|
||||||
|
result, _usage = await openrouter_call(FIX_MODEL, prompt, timeout_sec=120, max_tokens=4096)
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
logger.warning("PR #%d: fix LLM call failed for %s", pr_number, filepath)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check if result is a duplicate flag (JSON) or fixed content (markdown)
|
||||||
|
if result.strip().startswith("{"):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(result)
|
||||||
|
if parsed.get("action") == "flag_duplicate":
|
||||||
|
await _flag_for_leo_review(conn, pr_number, claim_files, review_text, domain_index)
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "action": "flagged_duplicate_by_llm"}
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Write fixed content to worktree and push
|
||||||
|
fixed_any = True
|
||||||
|
logger.info("PR #%d: fixed %s for %s", pr_number, filepath, issues)
|
||||||
|
|
||||||
|
if not fixed_any:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "no_fixes_applied"}
|
||||||
|
|
||||||
|
# Push fix and reset for re-eval
|
||||||
|
# Create worktree, apply fix, commit, push
|
||||||
|
worktree_path = str(config.BASE_DIR / "workspaces" / f"subfix-{pr_number}")
|
||||||
|
|
||||||
|
await _git("fetch", "origin", branch, timeout=30)
|
||||||
|
rc, out = await _git("worktree", "add", "--detach", worktree_path, f"origin/{branch}")
|
||||||
|
if rc != 0:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "worktree_failed"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
rc, out = await _git("checkout", "-B", branch, f"origin/{branch}", cwd=worktree_path)
|
||||||
|
if rc != 0:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "checkout_failed"}
|
||||||
|
|
||||||
|
# Write fixed files
|
||||||
|
for filepath, content in claim_files.items():
|
||||||
|
prompt = _build_fix_prompt(content, review_text, issues, source_content, domain_index)
|
||||||
|
fixed_content, _usage = await openrouter_call(FIX_MODEL, prompt, timeout_sec=120, max_tokens=4096)
|
||||||
|
if fixed_content and not fixed_content.strip().startswith("{"):
|
||||||
|
full_path = Path(worktree_path) / filepath
|
||||||
|
full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
full_path.write_text(fixed_content)
|
||||||
|
|
||||||
|
# Commit and push
|
||||||
|
rc, _ = await _git("add", "-A", cwd=worktree_path)
|
||||||
|
commit_msg = f"substantive-fix: address reviewer feedback ({', '.join(issues)})"
|
||||||
|
rc, _ = await _git("commit", "-m", commit_msg, cwd=worktree_path)
|
||||||
|
if rc != 0:
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "nothing_to_commit"}
|
||||||
|
|
||||||
|
# Reset eval state BEFORE push (same pattern as fixer.py)
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE prs SET
|
||||||
|
status = 'open',
|
||||||
|
eval_attempts = 0,
|
||||||
|
eval_issues = '[]',
|
||||||
|
tier0_pass = NULL,
|
||||||
|
domain_verdict = 'pending',
|
||||||
|
leo_verdict = 'pending',
|
||||||
|
last_error = NULL
|
||||||
|
WHERE number = ?""",
|
||||||
|
(pr_number,),
|
||||||
|
)
|
||||||
|
|
||||||
|
rc, out = await _git("push", "origin", branch, cwd=worktree_path, timeout=30)
|
||||||
|
if rc != 0:
|
||||||
|
logger.error("PR #%d: push failed: %s", pr_number, out)
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "push_failed"}
|
||||||
|
|
||||||
|
db.audit(
|
||||||
|
conn, "substantive_fixer", "fixed",
|
||||||
|
json.dumps({"pr": pr_number, "issues": issues, "attempt": fix_attempts}),
|
||||||
|
)
|
||||||
|
logger.info("PR #%d: substantive fix pushed, reset for re-eval", pr_number)
|
||||||
|
return {"pr": pr_number, "action": "fixed", "issues": issues}
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await _git("worktree", "remove", "--force", worktree_path)
|
||||||
|
|
||||||
|
|
||||||
|
async def _auto_convert_near_duplicate(
|
||||||
|
conn, pr_number: int, claim_files: dict, domain: str,
|
||||||
|
) -> dict:
|
||||||
|
"""Auto-convert a near-duplicate claim into an enrichment on the best-match existing claim.
|
||||||
|
|
||||||
|
Returns {"converted": True, "target_claim": "...", "similarity": 0.95} on success.
|
||||||
|
Returns {"converted": False, "best_similarity": 0.80} when no match >= 0.90.
|
||||||
|
|
||||||
|
Threshold 0.90 (Leo: conservative, lower later based on false-positive rate).
|
||||||
|
"""
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
|
||||||
|
SIMILARITY_THRESHOLD = 0.90
|
||||||
|
main_wt = str(config.MAIN_WORKTREE)
|
||||||
|
|
||||||
|
# Get the duplicate claim's title and body
|
||||||
|
first_filepath = next(iter(claim_files.keys()), "")
|
||||||
|
first_content = next(iter(claim_files.values()), "")
|
||||||
|
dup_title = Path(first_filepath).stem.replace("-", " ").lower()
|
||||||
|
|
||||||
|
# Extract the body (evidence) from the duplicate — this is what we preserve
|
||||||
|
from .post_extract import parse_frontmatter
|
||||||
|
fm, body = parse_frontmatter(first_content)
|
||||||
|
if not body:
|
||||||
|
body = first_content # Fallback: use full content
|
||||||
|
|
||||||
|
# Strip the H1 and Relevant Notes sections — keep just the argument
|
||||||
|
evidence = re.sub(r"^# .+\n*", "", body).strip()
|
||||||
|
evidence = re.split(r"\n---\n", evidence)[0].strip()
|
||||||
|
|
||||||
|
if not evidence or len(evidence) < 20:
|
||||||
|
return {"converted": False, "best_similarity": 0, "reason": "no_evidence_to_preserve"}
|
||||||
|
|
||||||
|
# Find best-match existing claim in the domain
|
||||||
|
domain_dir = Path(main_wt) / "domains" / (domain or "")
|
||||||
|
best_match = None
|
||||||
|
best_similarity = 0.0
|
||||||
|
|
||||||
|
if domain_dir.is_dir():
|
||||||
|
for f in domain_dir.glob("*.md"):
|
||||||
|
if f.name.startswith("_"):
|
||||||
|
continue
|
||||||
|
existing_title = f.stem.replace("-", " ").lower()
|
||||||
|
sim = SequenceMatcher(None, dup_title, existing_title).ratio()
|
||||||
|
if sim > best_similarity:
|
||||||
|
best_similarity = sim
|
||||||
|
best_match = f
|
||||||
|
|
||||||
|
if best_similarity < SIMILARITY_THRESHOLD or best_match is None:
|
||||||
|
return {"converted": False, "best_similarity": best_similarity}
|
||||||
|
|
||||||
|
# Queue the enrichment — entity_batch handles the actual write to main.
|
||||||
|
# Single writer pattern prevents race conditions. (Ganymede)
|
||||||
|
from .entity_queue import queue_enrichment
|
||||||
|
try:
|
||||||
|
queue_enrichment(
|
||||||
|
target_claim=best_match.name,
|
||||||
|
evidence=evidence,
|
||||||
|
pr_number=pr_number,
|
||||||
|
original_title=dup_title,
|
||||||
|
similarity=best_similarity,
|
||||||
|
domain=domain or "",
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("PR #%d: failed to queue enrichment: %s", pr_number, e)
|
||||||
|
return {"converted": False, "best_similarity": best_similarity, "reason": f"queue_failed: {e}"}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"converted": True,
|
||||||
|
"target_claim": best_match.name,
|
||||||
|
"similarity": best_similarity,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _close_and_reextract(conn, pr_number: int, issues: list[str]):
|
||||||
|
"""Close PR and mark source for re-extraction with feedback."""
|
||||||
|
await forgejo_api(
|
||||||
|
"PATCH", repo_path(f"pulls/{pr_number}"), {"state": "closed"},
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE prs SET status = 'closed', last_error = ? WHERE number = ?",
|
||||||
|
(f"unfixable: {', '.join(issues)}", pr_number),
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE sources SET status = 'needs_reextraction', feedback = ?,
|
||||||
|
updated_at = datetime('now')
|
||||||
|
WHERE path = (SELECT source_path FROM prs WHERE number = ?)""",
|
||||||
|
(json.dumps({"issues": issues, "pr": pr_number}), pr_number),
|
||||||
|
)
|
||||||
|
db.audit(conn, "substantive_fixer", "closed_reextract",
|
||||||
|
json.dumps({"pr": pr_number, "issues": issues}))
|
||||||
|
|
||||||
|
|
||||||
|
async def _flag_for_leo_review(
|
||||||
|
conn, pr_number: int, claim_files: dict, review_text: str, domain_index: str | None,
|
||||||
|
):
|
||||||
|
"""Flag a near-duplicate PR for Leo to pick the enrichment target."""
|
||||||
|
# Get first claim content for matching
|
||||||
|
first_claim = next(iter(claim_files.values()), "")
|
||||||
|
|
||||||
|
# Use LLM to identify candidate matches
|
||||||
|
if domain_index:
|
||||||
|
prompt = _build_fix_prompt(first_claim, review_text, ["near_duplicate"], None, domain_index)
|
||||||
|
result, _usage = await openrouter_call(FIX_MODEL, prompt, timeout_sec=60, max_tokens=1024)
|
||||||
|
candidates_text = result or "Could not identify candidates."
|
||||||
|
else:
|
||||||
|
candidates_text = "No domain index available."
|
||||||
|
|
||||||
|
comment = (
|
||||||
|
f"**Substantive fixer: near-duplicate detected**\n\n"
|
||||||
|
f"This PR's claims may duplicate existing KB content. "
|
||||||
|
f"Leo: please pick the enrichment target or close if not worth converting.\n\n"
|
||||||
|
f"**Candidate matches:**\n{candidates_text}\n\n"
|
||||||
|
f"_Reply with the target claim filename to convert, or close the PR._"
|
||||||
|
)
|
||||||
|
await forgejo_api(
|
||||||
|
"POST", repo_path(f"issues/{pr_number}/comments"), {"body": comment},
|
||||||
|
)
|
||||||
|
db.audit(conn, "substantive_fixer", "flagged_duplicate",
|
||||||
|
json.dumps({"pr": pr_number}))
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Stage entry point ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def substantive_fix_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
|
"""Run one substantive fix cycle. Called by the fixer stage after mechanical fixes.
|
||||||
|
|
||||||
|
Finds PRs with substantive issue tags that haven't exceeded fix budget.
|
||||||
|
Processes up to 3 per cycle (Rhea: 180s interval, don't overwhelm eval).
|
||||||
|
"""
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT number, eval_issues FROM prs
|
||||||
|
WHERE status = 'open'
|
||||||
|
AND tier0_pass = 1
|
||||||
|
AND (domain_verdict = 'request_changes' OR leo_verdict = 'request_changes')
|
||||||
|
AND COALESCE(fix_attempts, 0) < ?
|
||||||
|
AND (last_attempt IS NULL OR last_attempt < datetime('now', '-3 minutes'))
|
||||||
|
ORDER BY created_at ASC
|
||||||
|
LIMIT 3""",
|
||||||
|
(MAX_SUBSTANTIVE_FIXES + config.MAX_FIX_ATTEMPTS,), # Total budget: mechanical + substantive
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
# Filter to only PRs with substantive issues (not just mechanical)
|
||||||
|
substantive_rows = []
|
||||||
|
for row in rows:
|
||||||
|
try:
|
||||||
|
issues = json.loads(row["eval_issues"] or "[]")
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
continue
|
||||||
|
if set(issues) & (FIXABLE_TAGS | CONVERTIBLE_TAGS | UNFIXABLE_TAGS):
|
||||||
|
substantive_rows.append(row)
|
||||||
|
|
||||||
|
if not substantive_rows:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
fixed = 0
|
||||||
|
errors = 0
|
||||||
|
|
||||||
|
for row in substantive_rows:
|
||||||
|
try:
|
||||||
|
result = await _fix_pr(conn, row["number"])
|
||||||
|
if result.get("action"):
|
||||||
|
fixed += 1
|
||||||
|
elif result.get("skipped"):
|
||||||
|
logger.debug("PR #%d: substantive fix skipped: %s", row["number"], result.get("reason"))
|
||||||
|
except Exception:
|
||||||
|
logger.exception("PR #%d: substantive fix failed", row["number"])
|
||||||
|
errors += 1
|
||||||
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (row["number"],))
|
||||||
|
|
||||||
|
if fixed or errors:
|
||||||
|
logger.info("Substantive fix cycle: %d fixed, %d errors", fixed, errors)
|
||||||
|
|
||||||
|
return fixed, errors
|
||||||
753
ops/pipeline-v2/lib/validate.py
Normal file
753
ops/pipeline-v2/lib/validate.py
Normal file
|
|
@ -0,0 +1,753 @@
|
||||||
|
"""Validate stage — Tier 0 deterministic validation gate.
|
||||||
|
|
||||||
|
Ported from tier0-gate.py + validate_claims.py. Pure Python, no LLM calls.
|
||||||
|
Validates claim frontmatter, title format, wiki links, domain-directory match,
|
||||||
|
proposition heuristic, universal quantifiers, near-duplicate detection.
|
||||||
|
|
||||||
|
Runs against PRs with status 'open' that have tier0_pass IS NULL.
|
||||||
|
Posts results as PR comments. In gate mode, sets tier0_pass = 0/1.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from datetime import date, datetime, timezone
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from . import config, db
|
||||||
|
from .domains import VALID_DOMAINS
|
||||||
|
from .forgejo import api as forgejo_api
|
||||||
|
from .forgejo import get_pr_diff, repo_path
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.validate")
|
||||||
|
|
||||||
|
# ─── Constants ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
VALID_TYPES = frozenset(config.TYPE_SCHEMAS.keys())
|
||||||
|
# Default confidence values (union of all types that define them)
|
||||||
|
VALID_CONFIDENCE = frozenset(
|
||||||
|
c for schema in config.TYPE_SCHEMAS.values()
|
||||||
|
if schema.get("valid_confidence") for c in schema["valid_confidence"]
|
||||||
|
)
|
||||||
|
DATE_MIN = date(2020, 1, 1)
|
||||||
|
WIKI_LINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
|
||||||
|
DEDUP_THRESHOLD = 0.85
|
||||||
|
|
||||||
|
# Proposition heuristic patterns
|
||||||
|
_STRONG_SIGNALS = re.compile(
|
||||||
|
r"\b(because|therefore|however|although|despite|since|"
|
||||||
|
r"rather than|instead of|not just|more than|less than|"
|
||||||
|
r"by\b|through\b|via\b|without\b|"
|
||||||
|
r"when\b|where\b|while\b|if\b|unless\b|"
|
||||||
|
r"which\b|that\b|"
|
||||||
|
r"is\b|are\b|was\b|were\b|will\b|would\b|"
|
||||||
|
r"can\b|could\b|should\b|must\b|"
|
||||||
|
r"has\b|have\b|had\b|does\b|did\b)",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
_VERB_ENDINGS = re.compile(
|
||||||
|
r"\b\w{2,}(ed|ing|es|tes|ses|zes|ves|cts|pts|nts|rns|ps|ts|rs|ns|ds)\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
_UNIVERSAL_QUANTIFIERS = re.compile(
|
||||||
|
r"\b(all|every|always|never|no one|nobody|nothing|none of|"
|
||||||
|
r"the only|the fundamental|the sole|the single|"
|
||||||
|
r"universally|invariably|without exception|in every case)\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
_SCOPING_LANGUAGE = re.compile(
|
||||||
|
r"\b(when|if|under|given|assuming|provided|in cases where|"
|
||||||
|
r"for .+ that|among|within|across|during|between|"
|
||||||
|
r"approximately|roughly|nearly|most|many|often|typically|"
|
||||||
|
r"tends? to|generally|usually|frequently)\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── YAML frontmatter parser ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def parse_frontmatter(text: str) -> tuple[dict | None, str]:
|
||||||
|
"""Extract YAML frontmatter and body from markdown text."""
|
||||||
|
if not text.startswith("---"):
|
||||||
|
return None, text
|
||||||
|
end = text.find("---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return None, text
|
||||||
|
raw = text[3:end]
|
||||||
|
body = text[end + 3 :].strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
fm = yaml.safe_load(raw)
|
||||||
|
if not isinstance(fm, dict):
|
||||||
|
return None, body
|
||||||
|
return fm, body
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
return None, body
|
||||||
|
|
||||||
|
# Fallback: simple key-value parser
|
||||||
|
fm = {}
|
||||||
|
for line in raw.strip().split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
if ":" not in line:
|
||||||
|
continue
|
||||||
|
key, _, val = line.partition(":")
|
||||||
|
key = key.strip()
|
||||||
|
val = val.strip().strip('"').strip("'")
|
||||||
|
if val.lower() == "null" or val == "":
|
||||||
|
val = None
|
||||||
|
elif val.startswith("["):
|
||||||
|
val = [v.strip().strip('"').strip("'") for v in val.strip("[]").split(",") if v.strip()]
|
||||||
|
fm[key] = val
|
||||||
|
return fm if fm else None, body
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Validators ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def validate_schema(fm: dict) -> list[str]:
|
||||||
|
"""Check required fields and valid enums, branching on content type."""
|
||||||
|
violations = []
|
||||||
|
|
||||||
|
ftype = fm.get("type")
|
||||||
|
if not ftype:
|
||||||
|
violations.append("missing_field:type")
|
||||||
|
schema = config.TYPE_SCHEMAS["claim"] # strictest default
|
||||||
|
elif ftype not in config.TYPE_SCHEMAS:
|
||||||
|
violations.append(f"invalid_type:{ftype}")
|
||||||
|
schema = config.TYPE_SCHEMAS["claim"]
|
||||||
|
else:
|
||||||
|
schema = config.TYPE_SCHEMAS[ftype]
|
||||||
|
|
||||||
|
for field in schema["required"]:
|
||||||
|
if field not in fm or fm[field] is None:
|
||||||
|
violations.append(f"missing_field:{field}")
|
||||||
|
|
||||||
|
domain = fm.get("domain")
|
||||||
|
if domain and domain not in VALID_DOMAINS:
|
||||||
|
violations.append(f"invalid_domain:{domain}")
|
||||||
|
|
||||||
|
valid_conf = schema.get("valid_confidence")
|
||||||
|
confidence = fm.get("confidence")
|
||||||
|
if valid_conf and confidence and confidence not in valid_conf:
|
||||||
|
violations.append(f"invalid_confidence:{confidence}")
|
||||||
|
|
||||||
|
desc = fm.get("description")
|
||||||
|
if isinstance(desc, str) and len(desc.strip()) < 10:
|
||||||
|
violations.append("description_too_short")
|
||||||
|
|
||||||
|
source = fm.get("source")
|
||||||
|
if "source" in schema["required"] and isinstance(source, str) and len(source.strip()) < 3:
|
||||||
|
violations.append("source_too_short")
|
||||||
|
|
||||||
|
return violations
|
||||||
|
|
||||||
|
|
||||||
|
def validate_date(date_val) -> list[str]:
|
||||||
|
"""Validate created date."""
|
||||||
|
violations = []
|
||||||
|
if date_val is None:
|
||||||
|
return ["missing_field:created"]
|
||||||
|
|
||||||
|
parsed = None
|
||||||
|
if isinstance(date_val, date):
|
||||||
|
parsed = date_val
|
||||||
|
elif isinstance(date_val, str):
|
||||||
|
try:
|
||||||
|
parsed = datetime.strptime(date_val, "%Y-%m-%d").date()
|
||||||
|
except ValueError:
|
||||||
|
return [f"invalid_date_format:{date_val}"]
|
||||||
|
else:
|
||||||
|
return [f"invalid_date_type:{type(date_val).__name__}"]
|
||||||
|
|
||||||
|
today = date.today()
|
||||||
|
if parsed > today:
|
||||||
|
violations.append(f"future_date:{parsed}")
|
||||||
|
if parsed < DATE_MIN:
|
||||||
|
violations.append(f"date_before_2020:{parsed}")
|
||||||
|
return violations
|
||||||
|
|
||||||
|
|
||||||
|
def validate_title(filepath: str) -> list[str]:
|
||||||
|
"""Check filename follows prose-as-claim convention."""
|
||||||
|
violations = []
|
||||||
|
name = Path(filepath).stem
|
||||||
|
normalized = name.replace("-", " ")
|
||||||
|
|
||||||
|
if len(normalized) < 20:
|
||||||
|
violations.append("title_too_short")
|
||||||
|
|
||||||
|
words = normalized.split()
|
||||||
|
if len(words) < 4:
|
||||||
|
violations.append("title_too_few_words")
|
||||||
|
|
||||||
|
cleaned = re.sub(r"[a-zA-Z0-9\s\-\.,'()%]", "", name)
|
||||||
|
if cleaned:
|
||||||
|
violations.append(f"title_special_chars:{cleaned[:20]}")
|
||||||
|
|
||||||
|
return violations
|
||||||
|
|
||||||
|
|
||||||
|
def validate_wiki_links(body: str, existing_claims: set[str]) -> list[str]:
|
||||||
|
"""Check that [[wiki links]] resolve to known claims."""
|
||||||
|
violations = []
|
||||||
|
for link in WIKI_LINK_RE.findall(body):
|
||||||
|
if link.strip() and link.strip() not in existing_claims:
|
||||||
|
violations.append(f"broken_wiki_link:{link.strip()[:80]}")
|
||||||
|
return violations
|
||||||
|
|
||||||
|
|
||||||
|
def validate_proposition(title: str) -> list[str]:
|
||||||
|
"""Check title reads as a proposition, not a label."""
|
||||||
|
normalized = title.replace("-", " ")
|
||||||
|
words = normalized.split()
|
||||||
|
n = len(words)
|
||||||
|
|
||||||
|
if n < 4:
|
||||||
|
return ["title_not_proposition:too short to be a disagreeable sentence"]
|
||||||
|
|
||||||
|
if _STRONG_SIGNALS.search(normalized):
|
||||||
|
return []
|
||||||
|
if _VERB_ENDINGS.search(normalized):
|
||||||
|
return []
|
||||||
|
if n >= 8:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return ["title_not_proposition:no verb or connective found"]
|
||||||
|
|
||||||
|
|
||||||
|
def validate_universal_quantifiers(title: str) -> list[str]:
|
||||||
|
"""Flag unscoped universal quantifiers (warning, not gate)."""
|
||||||
|
universals = _UNIVERSAL_QUANTIFIERS.findall(title)
|
||||||
|
if universals and not _SCOPING_LANGUAGE.search(title):
|
||||||
|
return [f"unscoped_universal:{','.join(universals)}"]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def validate_domain_directory_match(filepath: str, fm: dict) -> list[str]:
|
||||||
|
"""Check file's directory matches its domain field."""
|
||||||
|
domain = fm.get("domain")
|
||||||
|
if not domain:
|
||||||
|
return []
|
||||||
|
|
||||||
|
parts = Path(filepath).parts
|
||||||
|
for i, part in enumerate(parts):
|
||||||
|
if part == "domains" and i + 1 < len(parts):
|
||||||
|
dir_domain = parts[i + 1]
|
||||||
|
if dir_domain != domain:
|
||||||
|
secondary = fm.get("secondary_domains", [])
|
||||||
|
if isinstance(secondary, str):
|
||||||
|
secondary = [secondary]
|
||||||
|
if dir_domain not in (secondary or []):
|
||||||
|
return [f"domain_directory_mismatch:file in domains/{dir_domain}/ but domain field says '{domain}'"]
|
||||||
|
break
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def validate_description_not_title(title: str, description: str) -> list[str]:
|
||||||
|
"""Check description adds info beyond the title."""
|
||||||
|
if not description:
|
||||||
|
return []
|
||||||
|
title_lower = title.lower().strip()
|
||||||
|
desc_lower = description.lower().strip().rstrip(".")
|
||||||
|
|
||||||
|
if desc_lower in title_lower or title_lower in desc_lower:
|
||||||
|
return ["description_echoes_title"]
|
||||||
|
|
||||||
|
ratio = SequenceMatcher(None, title_lower, desc_lower).ratio()
|
||||||
|
if ratio > 0.75:
|
||||||
|
return [f"description_too_similar:{ratio:.0%}"]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def find_near_duplicates(title: str, existing_claims: set[str]) -> list[str]:
|
||||||
|
"""Find near-duplicate titles using SequenceMatcher with word pre-filter."""
|
||||||
|
title_lower = title.lower()
|
||||||
|
title_words = set(title_lower.split()[:6])
|
||||||
|
warnings = []
|
||||||
|
for existing in existing_claims:
|
||||||
|
existing_lower = existing.lower()
|
||||||
|
if len(title_words & set(existing_lower.split()[:6])) < 2:
|
||||||
|
continue
|
||||||
|
ratio = SequenceMatcher(None, title_lower, existing_lower).ratio()
|
||||||
|
if ratio >= DEDUP_THRESHOLD:
|
||||||
|
warnings.append(f"near_duplicate:{existing[:80]} (similarity={ratio:.2f})")
|
||||||
|
return warnings
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Full Tier 0 validation ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def tier0_validate_claim(filepath: str, content: str, existing_claims: set[str]) -> dict:
|
||||||
|
"""Run full Tier 0 validation. Returns {filepath, passes, violations, warnings}.
|
||||||
|
|
||||||
|
Branches on content type (claim/framework/entity) via TYPE_SCHEMAS.
|
||||||
|
Entities skip proposition title check, date validation, and confidence —
|
||||||
|
they're factual records, not arguable claims.
|
||||||
|
"""
|
||||||
|
violations = []
|
||||||
|
warnings = []
|
||||||
|
|
||||||
|
fm, body = parse_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
return {"filepath": filepath, "passes": False, "violations": ["no_frontmatter"], "warnings": []}
|
||||||
|
|
||||||
|
violations.extend(validate_schema(fm))
|
||||||
|
|
||||||
|
# Type-aware checks
|
||||||
|
ftype = fm.get("type", "claim")
|
||||||
|
schema = config.TYPE_SCHEMAS.get(ftype, config.TYPE_SCHEMAS["claim"])
|
||||||
|
|
||||||
|
if "created" in schema["required"]:
|
||||||
|
violations.extend(validate_date(fm.get("created")))
|
||||||
|
|
||||||
|
title = Path(filepath).stem
|
||||||
|
if schema.get("needs_proposition_title", True):
|
||||||
|
# Title length/format checks only for claims/frameworks — entity filenames
|
||||||
|
# like "metadao.md" are intentionally short (Ganymede review)
|
||||||
|
violations.extend(validate_title(filepath))
|
||||||
|
violations.extend(validate_proposition(title))
|
||||||
|
warnings.extend(validate_universal_quantifiers(title))
|
||||||
|
|
||||||
|
# Wiki links are warnings, not violations — broken links usually point to
|
||||||
|
# claims in other open PRs that haven't merged yet. (Cory, Mar 14)
|
||||||
|
warnings.extend(validate_wiki_links(body, existing_claims))
|
||||||
|
|
||||||
|
violations.extend(validate_domain_directory_match(filepath, fm))
|
||||||
|
|
||||||
|
desc = fm.get("description", "")
|
||||||
|
if isinstance(desc, str):
|
||||||
|
warnings.extend(validate_description_not_title(title, desc))
|
||||||
|
|
||||||
|
# Skip near_duplicate for entities — entity updates matching existing entities
|
||||||
|
# is correct behavior, not duplication. 83% false positive rate on entities. (Leo/Rhea)
|
||||||
|
if ftype != "entity" and not filepath.startswith("entities/"):
|
||||||
|
warnings.extend(find_near_duplicates(title, existing_claims))
|
||||||
|
|
||||||
|
return {"filepath": filepath, "passes": len(violations) == 0, "violations": violations, "warnings": warnings}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Diff parsing ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def extract_claim_files_from_diff(diff: str) -> dict[str, str]:
|
||||||
|
"""Parse unified diff to extract new/modified claim file contents."""
|
||||||
|
claim_dirs = ("domains/", "core/", "foundations/")
|
||||||
|
files = {}
|
||||||
|
current_file = None
|
||||||
|
current_lines = []
|
||||||
|
is_deletion = False
|
||||||
|
|
||||||
|
for line in diff.split("\n"):
|
||||||
|
if line.startswith("diff --git"):
|
||||||
|
if current_file and not is_deletion:
|
||||||
|
files[current_file] = "\n".join(current_lines)
|
||||||
|
current_file = None
|
||||||
|
current_lines = []
|
||||||
|
is_deletion = False
|
||||||
|
elif line.startswith("deleted file mode") or line.startswith("+++ /dev/null"):
|
||||||
|
is_deletion = True
|
||||||
|
current_file = None
|
||||||
|
elif line.startswith("+++ b/") and not is_deletion:
|
||||||
|
path = line[6:]
|
||||||
|
basename = path.rsplit("/", 1)[-1] if "/" in path else path
|
||||||
|
if any(path.startswith(d) for d in claim_dirs) and path.endswith(".md") and not basename.startswith("_"):
|
||||||
|
current_file = path
|
||||||
|
elif current_file and line.startswith("+") and not line.startswith("+++"):
|
||||||
|
current_lines.append(line[1:])
|
||||||
|
|
||||||
|
if current_file and not is_deletion:
|
||||||
|
files[current_file] = "\n".join(current_lines)
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_pr_head_sha(pr_number: int) -> str:
|
||||||
|
"""Get HEAD SHA of PR's branch."""
|
||||||
|
pr_info = await forgejo_api(
|
||||||
|
"GET",
|
||||||
|
repo_path(f"pulls/{pr_number}"),
|
||||||
|
)
|
||||||
|
if pr_info:
|
||||||
|
return pr_info.get("head", {}).get("sha", "")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
async def _has_tier0_comment(pr_number: int, head_sha: str) -> bool:
|
||||||
|
"""Check if we already validated this exact commit."""
|
||||||
|
if not head_sha:
|
||||||
|
return False
|
||||||
|
# Paginate comments (Ganymede standing rule)
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
comments = await forgejo_api(
|
||||||
|
"GET",
|
||||||
|
repo_path(f"issues/{pr_number}/comments?limit=50&page={page}"),
|
||||||
|
)
|
||||||
|
if not comments:
|
||||||
|
break
|
||||||
|
marker = f"<!-- TIER0-VALIDATION:{head_sha} -->"
|
||||||
|
for c in comments:
|
||||||
|
if marker in c.get("body", ""):
|
||||||
|
return True
|
||||||
|
if len(comments) < 50:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def _post_validation_comment(
|
||||||
|
pr_number: int, results: list[dict], head_sha: str,
|
||||||
|
t05_issues: list[str] | None = None, t05_details: list[str] | None = None,
|
||||||
|
):
|
||||||
|
"""Post Tier 0 + Tier 0.5 validation results as PR comment."""
|
||||||
|
tier0_pass = all(r["passes"] for r in results)
|
||||||
|
t05_pass = not t05_issues # empty list = pass
|
||||||
|
all_pass = tier0_pass and t05_pass
|
||||||
|
total = len(results)
|
||||||
|
passing = sum(1 for r in results if r["passes"])
|
||||||
|
|
||||||
|
marker = f"<!-- TIER0-VALIDATION:{head_sha} -->" if head_sha else "<!-- TIER0-VALIDATION -->"
|
||||||
|
status = "PASS" if all_pass else "FAIL"
|
||||||
|
lines = [
|
||||||
|
marker,
|
||||||
|
f"**Validation: {status}** — {passing}/{total} claims pass\n",
|
||||||
|
]
|
||||||
|
|
||||||
|
for r in results:
|
||||||
|
icon = "pass" if r["passes"] else "FAIL"
|
||||||
|
short_path = r["filepath"].split("/", 1)[-1] if "/" in r["filepath"] else r["filepath"]
|
||||||
|
lines.append(f"**[{icon}]** `{short_path}`")
|
||||||
|
for v in r["violations"]:
|
||||||
|
lines.append(f" - {v}")
|
||||||
|
for w in r["warnings"]:
|
||||||
|
lines.append(f" - (warn) {w}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Tier 0.5 results (diff-level checks)
|
||||||
|
if t05_issues:
|
||||||
|
lines.append("**Tier 0.5 — mechanical pre-check: FAIL**\n")
|
||||||
|
for detail in (t05_details or []):
|
||||||
|
lines.append(f" - {detail}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if not all_pass:
|
||||||
|
lines.append("---")
|
||||||
|
lines.append("Fix the violations above and push to trigger re-validation.")
|
||||||
|
lines.append("LLM review will run after all mechanical checks pass.")
|
||||||
|
|
||||||
|
lines.append(f"\n*tier0-gate v2 | {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}*")
|
||||||
|
|
||||||
|
await forgejo_api(
|
||||||
|
"POST",
|
||||||
|
repo_path(f"issues/{pr_number}/comments"),
|
||||||
|
{"body": "\n".join(lines)},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Existing claims index ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def load_existing_claims() -> set[str]:
|
||||||
|
"""Build set of known claim titles from the main worktree."""
|
||||||
|
claims: set[str] = set()
|
||||||
|
base = config.MAIN_WORKTREE
|
||||||
|
for subdir in ["domains", "core", "foundations", "maps", "agents", "schemas", "entities", "decisions"]:
|
||||||
|
full = base / subdir
|
||||||
|
if not full.is_dir():
|
||||||
|
continue
|
||||||
|
for f in full.rglob("*.md"):
|
||||||
|
claims.add(f.stem)
|
||||||
|
return claims
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Main entry point ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_all_md_added_content(diff: str) -> dict[str, str]:
|
||||||
|
"""Extract added content from ALL .md files in diff (not just claim dirs).
|
||||||
|
|
||||||
|
Used for wiki link validation on agent files, musings, etc. that
|
||||||
|
extract_claim_files_from_diff skips. Returns {filepath: added_lines}.
|
||||||
|
"""
|
||||||
|
files: dict[str, str] = {}
|
||||||
|
current_file = None
|
||||||
|
current_lines: list[str] = []
|
||||||
|
is_deletion = False
|
||||||
|
|
||||||
|
for line in diff.split("\n"):
|
||||||
|
if line.startswith("diff --git"):
|
||||||
|
if current_file and not is_deletion:
|
||||||
|
files[current_file] = "\n".join(current_lines)
|
||||||
|
current_file = None
|
||||||
|
current_lines = []
|
||||||
|
is_deletion = False
|
||||||
|
elif line.startswith("deleted file mode") or line.startswith("+++ /dev/null"):
|
||||||
|
is_deletion = True
|
||||||
|
current_file = None
|
||||||
|
elif line.startswith("+++ b/") and not is_deletion:
|
||||||
|
path = line[6:]
|
||||||
|
if path.endswith(".md"):
|
||||||
|
current_file = path
|
||||||
|
elif current_file and line.startswith("+") and not line.startswith("+++"):
|
||||||
|
current_lines.append(line[1:])
|
||||||
|
|
||||||
|
if current_file and not is_deletion:
|
||||||
|
files[current_file] = "\n".join(current_lines)
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
|
|
||||||
|
def _new_files_in_diff(diff: str) -> set[str]:
|
||||||
|
"""Extract paths of newly added files from a unified diff."""
|
||||||
|
new_files: set[str] = set()
|
||||||
|
lines = diff.split("\n")
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if line.startswith("--- /dev/null") and i + 1 < len(lines) and lines[i + 1].startswith("+++ b/"):
|
||||||
|
new_files.add(lines[i + 1][6:])
|
||||||
|
return new_files
|
||||||
|
|
||||||
|
|
||||||
|
def tier05_mechanical_check(diff: str, existing_claims: set[str] | None = None) -> tuple[bool, list[str], list[str]]:
|
||||||
|
"""Tier 0.5: mechanical pre-check for frontmatter schema + wiki links.
|
||||||
|
|
||||||
|
Runs deterministic Python checks ($0) to catch issues that LLM reviewers
|
||||||
|
rubber-stamp or reject without structured issue tags. Moved from evaluate.py
|
||||||
|
to validate.py so that mechanical issues are caught BEFORE eval, not during.
|
||||||
|
|
||||||
|
Only checks NEW files for frontmatter (modified files have partial content
|
||||||
|
from diff — Bug 2). Wiki links checked on ALL .md files.
|
||||||
|
|
||||||
|
Returns (passes, issue_tags, detail_messages).
|
||||||
|
"""
|
||||||
|
claim_files = extract_claim_files_from_diff(diff)
|
||||||
|
all_md_files = _extract_all_md_added_content(diff)
|
||||||
|
|
||||||
|
if not claim_files and not all_md_files:
|
||||||
|
return True, [], []
|
||||||
|
|
||||||
|
if existing_claims is None:
|
||||||
|
existing_claims = load_existing_claims()
|
||||||
|
|
||||||
|
new_files = _new_files_in_diff(diff)
|
||||||
|
|
||||||
|
issues: list[str] = []
|
||||||
|
details: list[str] = []
|
||||||
|
gate_failed = False
|
||||||
|
|
||||||
|
# Pass 1: Claim-specific checks (frontmatter, schema, near-duplicate)
|
||||||
|
for filepath, content in claim_files.items():
|
||||||
|
is_new = filepath in new_files
|
||||||
|
|
||||||
|
if is_new:
|
||||||
|
fm, body = parse_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
issues.append("frontmatter_schema")
|
||||||
|
details.append(f"{filepath}: no valid YAML frontmatter")
|
||||||
|
gate_failed = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
schema_errors = validate_schema(fm)
|
||||||
|
if schema_errors:
|
||||||
|
issues.append("frontmatter_schema")
|
||||||
|
details.append(f"{filepath}: {', '.join(schema_errors)}")
|
||||||
|
gate_failed = True
|
||||||
|
|
||||||
|
# Near-duplicate (warning only — tagged but doesn't gate)
|
||||||
|
# Skip for entities — entity updates matching existing entities is expected.
|
||||||
|
title = Path(filepath).stem
|
||||||
|
ftype_check = fm.get("type", "claim")
|
||||||
|
if ftype_check != "entity" and not filepath.startswith("entities/"):
|
||||||
|
dup_warnings = find_near_duplicates(title, existing_claims)
|
||||||
|
if dup_warnings:
|
||||||
|
issues.append("near_duplicate")
|
||||||
|
details.append(f"{filepath}: {', '.join(w[:60] for w in dup_warnings[:2])}")
|
||||||
|
|
||||||
|
# Pass 2: Wiki link check on ALL .md files
|
||||||
|
# Broken wiki links are a WARNING, not a gate. Most broken links point to claims
|
||||||
|
# in other open PRs that haven't merged yet — they resolve naturally as the
|
||||||
|
# dependency chain merges. LLM reviewers catch genuinely missing references.
|
||||||
|
# (Cory directive, Mar 14: "they'll likely merge")
|
||||||
|
for filepath, content in all_md_files.items():
|
||||||
|
link_errors = validate_wiki_links(content, existing_claims)
|
||||||
|
if link_errors:
|
||||||
|
issues.append("broken_wiki_links")
|
||||||
|
details.append(f"{filepath}: (warn) {', '.join(e[:60] for e in link_errors[:3])}")
|
||||||
|
# NOT gate_failed — wiki links are warnings, not blockers
|
||||||
|
|
||||||
|
unique_issues = list(dict.fromkeys(issues))
|
||||||
|
return not gate_failed, unique_issues, details
|
||||||
|
|
||||||
|
|
||||||
|
async def validate_pr(conn, pr_number: int) -> dict:
|
||||||
|
"""Run Tier 0 + Tier 0.5 validation on a single PR.
|
||||||
|
|
||||||
|
Tier 0: per-claim validation (schema, date, title, wiki links, proposition).
|
||||||
|
Tier 0.5: diff-level mechanical checks (frontmatter schema on new files, wiki links on all .md).
|
||||||
|
|
||||||
|
Both must pass for tier0_pass = 1. If either fails, eval won't touch this PR.
|
||||||
|
Fixer handles wiki links; non-fixable issues exhaust fix_attempts → terminal.
|
||||||
|
|
||||||
|
Returns {pr, all_pass, total, passing, skipped, reason, tier05_issues}.
|
||||||
|
"""
|
||||||
|
# Get HEAD SHA for idempotency
|
||||||
|
head_sha = await _get_pr_head_sha(pr_number)
|
||||||
|
|
||||||
|
# Skip if already validated for this commit
|
||||||
|
if await _has_tier0_comment(pr_number, head_sha):
|
||||||
|
logger.debug("PR #%d already validated at %s", pr_number, head_sha[:8])
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "already_validated"}
|
||||||
|
|
||||||
|
# Fetch diff
|
||||||
|
diff = await get_pr_diff(pr_number)
|
||||||
|
if not diff:
|
||||||
|
logger.debug("PR #%d: empty or oversized diff", pr_number)
|
||||||
|
return {"pr": pr_number, "skipped": True, "reason": "no_diff"}
|
||||||
|
|
||||||
|
# Load existing claims index (shared between Tier 0 and Tier 0.5)
|
||||||
|
existing_claims = load_existing_claims()
|
||||||
|
|
||||||
|
# Extract claim files (domains/, core/, foundations/)
|
||||||
|
claim_files = extract_claim_files_from_diff(diff)
|
||||||
|
|
||||||
|
# ── Tier 0: per-claim validation ──
|
||||||
|
# Only validates NEW files (not modified). Modified files have partial content
|
||||||
|
# from diffs (only + lines) — frontmatter parsing fails on partial content,
|
||||||
|
# producing false no_frontmatter violations. Enrichment PRs that modify
|
||||||
|
# existing claim files were getting stuck here. (Epimetheus session 2)
|
||||||
|
new_files = _new_files_in_diff(diff)
|
||||||
|
results = []
|
||||||
|
for filepath, content in claim_files.items():
|
||||||
|
if filepath not in new_files:
|
||||||
|
continue # Skip modified files — partial diff content can't be validated
|
||||||
|
result = tier0_validate_claim(filepath, content, existing_claims)
|
||||||
|
results.append(result)
|
||||||
|
status = "PASS" if result["passes"] else "FAIL"
|
||||||
|
logger.debug("PR #%d: %s %s v=%s w=%s", pr_number, status, filepath, result["violations"], result["warnings"])
|
||||||
|
|
||||||
|
tier0_pass = all(r["passes"] for r in results) if results else True
|
||||||
|
total = len(results)
|
||||||
|
passing = sum(1 for r in results if r["passes"])
|
||||||
|
|
||||||
|
# ── Tier 0.5: diff-level mechanical checks ──
|
||||||
|
# Always runs — catches broken wiki links in ALL .md files including entities.
|
||||||
|
t05_pass, t05_issues, t05_details = tier05_mechanical_check(diff, existing_claims)
|
||||||
|
|
||||||
|
if not claim_files and t05_pass:
|
||||||
|
# Entity/source-only PR with no wiki link issues — pass through
|
||||||
|
logger.debug("PR #%d: no claim files, Tier 0.5 passed — auto-pass", pr_number)
|
||||||
|
elif not claim_files and not t05_pass:
|
||||||
|
logger.info("PR #%d: no claim files but Tier 0.5 failed: %s", pr_number, t05_issues)
|
||||||
|
|
||||||
|
# Combined result: both tiers must pass
|
||||||
|
all_pass = tier0_pass and t05_pass
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"PR #%d: Tier 0 — %d/%d pass | Tier 0.5 — %s (issues: %s) | combined: %s",
|
||||||
|
pr_number, passing, total, "PASS" if t05_pass else "FAIL", t05_issues, all_pass,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Post combined comment
|
||||||
|
await _post_validation_comment(pr_number, results, head_sha, t05_issues, t05_details)
|
||||||
|
|
||||||
|
# Update PR record — reset eval state on new commits
|
||||||
|
# WARNING-ONLY issue tags (broken_wiki_links, near_duplicate) should NOT
|
||||||
|
# prevent tier0_pass. Only blocking tags (frontmatter_schema, etc.) gate.
|
||||||
|
# This was causing an infinite fixer→validate loop where wiki link warnings
|
||||||
|
# kept resetting tier0_pass=0. (Epimetheus, session 2 fix)
|
||||||
|
# Determine effective pass: per-claim violations always gate. Tier 0.5 warnings don't.
|
||||||
|
# (Ganymede: verify this doesn't accidentally pass real schema failures)
|
||||||
|
WARNING_ONLY_TAGS = {"broken_wiki_links", "near_duplicate"}
|
||||||
|
blocking_t05_issues = set(t05_issues) - WARNING_ONLY_TAGS if t05_issues else set()
|
||||||
|
# Pass if: per-claim checks pass AND no blocking Tier 0.5 issues
|
||||||
|
effective_pass = tier0_pass and not blocking_t05_issues
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE prs SET tier0_pass = ?,
|
||||||
|
eval_attempts = 0, eval_issues = ?,
|
||||||
|
domain_verdict = 'pending', leo_verdict = 'pending',
|
||||||
|
last_error = NULL
|
||||||
|
WHERE number = ?""",
|
||||||
|
(1 if effective_pass else 0, json.dumps(t05_issues) if t05_issues else "[]", pr_number),
|
||||||
|
)
|
||||||
|
db.audit(
|
||||||
|
conn,
|
||||||
|
"validate",
|
||||||
|
"tier0_complete",
|
||||||
|
json.dumps({
|
||||||
|
"pr": pr_number, "pass": all_pass,
|
||||||
|
"tier0_pass": tier0_pass, "tier05_pass": t05_pass,
|
||||||
|
"passing": passing, "total": total,
|
||||||
|
"tier05_issues": t05_issues,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"pr": pr_number, "all_pass": all_pass,
|
||||||
|
"total": total, "passing": passing,
|
||||||
|
"tier05_issues": t05_issues,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def validate_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
|
"""Run one validation cycle.
|
||||||
|
|
||||||
|
Finds PRs with status='open' and tier0_pass IS NULL, validates them.
|
||||||
|
"""
|
||||||
|
# Find unvalidated PRs (priority ordered)
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT p.number FROM prs p
|
||||||
|
LEFT JOIN sources s ON p.source_path = s.path
|
||||||
|
WHERE p.status = 'open'
|
||||||
|
AND p.tier0_pass IS NULL
|
||||||
|
ORDER BY
|
||||||
|
CASE COALESCE(p.priority, s.priority, 'medium')
|
||||||
|
WHEN 'critical' THEN 0
|
||||||
|
WHEN 'high' THEN 1
|
||||||
|
WHEN 'medium' THEN 2
|
||||||
|
WHEN 'low' THEN 3
|
||||||
|
ELSE 4
|
||||||
|
END,
|
||||||
|
p.created_at ASC
|
||||||
|
LIMIT ?""",
|
||||||
|
(max_workers or 10,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
succeeded = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
try:
|
||||||
|
result = await validate_pr(conn, row["number"])
|
||||||
|
if result.get("skipped"):
|
||||||
|
# Mark as validated even if skipped (no claims = pass)
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE prs SET tier0_pass = 1 WHERE number = ? AND tier0_pass IS NULL",
|
||||||
|
(row["number"],),
|
||||||
|
)
|
||||||
|
succeeded += 1
|
||||||
|
elif result.get("all_pass"):
|
||||||
|
succeeded += 1
|
||||||
|
else:
|
||||||
|
succeeded += 1 # Validation ran successfully, even if claims failed
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to validate PR #%d", row["number"])
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
if succeeded or failed:
|
||||||
|
logger.info("Validate cycle: %d validated, %d errors", succeeded, failed)
|
||||||
|
|
||||||
|
return succeeded, failed
|
||||||
138
ops/pipeline-v2/lib/watchdog.py
Normal file
138
ops/pipeline-v2/lib/watchdog.py
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
"""Pipeline health watchdog — detects stalls and model failures fast.
|
||||||
|
|
||||||
|
Runs every 60 seconds (inside the existing health check or as its own stage).
|
||||||
|
Checks for conditions that have caused pipeline stalls:
|
||||||
|
|
||||||
|
1. Eval stall: open PRs with tier0_pass=1 but no eval event in 5 minutes
|
||||||
|
2. Breaker open: any circuit breaker in open state
|
||||||
|
3. Model API failure: 400/401 errors indicating invalid model ID or auth failure
|
||||||
|
4. Zombie accumulation: PRs with exhausted fix budget sitting in open
|
||||||
|
|
||||||
|
When a condition is detected, logs a WARNING with specific diagnosis.
|
||||||
|
Future: could trigger Pentagon notification or webhook.
|
||||||
|
|
||||||
|
Epimetheus owns this module. Born from 3 stall incidents in 2 sessions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from . import config, db
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.watchdog")
|
||||||
|
|
||||||
|
|
||||||
|
async def watchdog_check(conn) -> dict:
|
||||||
|
"""Run all health checks. Returns {healthy: bool, issues: [...]}.
|
||||||
|
|
||||||
|
Called every 60 seconds by the pipeline daemon.
|
||||||
|
"""
|
||||||
|
issues = []
|
||||||
|
|
||||||
|
# 1. Eval stall: open PRs ready for eval but no eval event in 5 minutes
|
||||||
|
eval_ready = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as n FROM prs
|
||||||
|
WHERE status = 'open' AND tier0_pass = 1
|
||||||
|
AND domain_verdict = 'pending' AND eval_attempts < ?""",
|
||||||
|
(config.MAX_EVAL_ATTEMPTS,),
|
||||||
|
).fetchone()["n"]
|
||||||
|
|
||||||
|
if eval_ready > 0:
|
||||||
|
last_eval = conn.execute(
|
||||||
|
"SELECT MAX(timestamp) as ts FROM audit_log WHERE stage = 'evaluate'"
|
||||||
|
).fetchone()
|
||||||
|
if last_eval and last_eval["ts"]:
|
||||||
|
try:
|
||||||
|
last_ts = datetime.fromisoformat(last_eval["ts"].replace("Z", "+00:00"))
|
||||||
|
age_seconds = (datetime.now(timezone.utc) - last_ts).total_seconds()
|
||||||
|
if age_seconds > 300: # 5 minutes
|
||||||
|
issues.append({
|
||||||
|
"type": "eval_stall",
|
||||||
|
"severity": "critical",
|
||||||
|
"detail": f"{eval_ready} PRs ready for eval but no eval event in {int(age_seconds)}s",
|
||||||
|
"action": "Check eval breaker state and model API availability",
|
||||||
|
})
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 2. Breaker open
|
||||||
|
breakers = conn.execute(
|
||||||
|
"SELECT name, state, failures FROM circuit_breakers WHERE state = 'open'"
|
||||||
|
).fetchall()
|
||||||
|
for b in breakers:
|
||||||
|
issues.append({
|
||||||
|
"type": "breaker_open",
|
||||||
|
"severity": "critical",
|
||||||
|
"detail": f"Breaker '{b['name']}' is OPEN ({b['failures']} failures)",
|
||||||
|
"action": f"Check {b['name']} stage logs for root cause",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 3. Model API failure pattern: 5+ recent errors from same model
|
||||||
|
recent_errors = conn.execute(
|
||||||
|
"""SELECT detail FROM audit_log
|
||||||
|
WHERE stage = 'evaluate' AND event IN ('error', 'domain_rejected')
|
||||||
|
AND timestamp > datetime('now', '-10 minutes')
|
||||||
|
ORDER BY id DESC LIMIT 10"""
|
||||||
|
).fetchall()
|
||||||
|
error_count = 0
|
||||||
|
for row in recent_errors:
|
||||||
|
detail = row["detail"] or ""
|
||||||
|
if "400" in detail or "not a valid model" in detail or "401" in detail:
|
||||||
|
error_count += 1
|
||||||
|
if error_count >= 3:
|
||||||
|
issues.append({
|
||||||
|
"type": "model_api_failure",
|
||||||
|
"severity": "critical",
|
||||||
|
"detail": f"{error_count} model API errors in last 10 minutes — possible invalid model ID or auth failure",
|
||||||
|
"action": "Check OpenRouter model IDs in config.py and API key validity",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 4. Zombie PRs: open with exhausted fix budget and request_changes
|
||||||
|
zombies = conn.execute(
|
||||||
|
"""SELECT COUNT(*) as n FROM prs
|
||||||
|
WHERE status = 'open' AND fix_attempts >= ?
|
||||||
|
AND (domain_verdict = 'request_changes' OR leo_verdict = 'request_changes')""",
|
||||||
|
(config.MAX_FIX_ATTEMPTS,),
|
||||||
|
).fetchone()["n"]
|
||||||
|
if zombies > 0:
|
||||||
|
issues.append({
|
||||||
|
"type": "zombie_prs",
|
||||||
|
"severity": "warning",
|
||||||
|
"detail": f"{zombies} PRs with exhausted fix budget still open",
|
||||||
|
"action": "GC should auto-close these — check fixer.py GC logic",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 5. Tier0 blockage: many PRs with tier0_pass=0 (potential validation bug)
|
||||||
|
tier0_blocked = conn.execute(
|
||||||
|
"SELECT COUNT(*) as n FROM prs WHERE status = 'open' AND tier0_pass = 0"
|
||||||
|
).fetchone()["n"]
|
||||||
|
if tier0_blocked >= 5:
|
||||||
|
issues.append({
|
||||||
|
"type": "tier0_blockage",
|
||||||
|
"severity": "warning",
|
||||||
|
"detail": f"{tier0_blocked} PRs blocked at tier0_pass=0",
|
||||||
|
"action": "Check validate.py — may be the modified-file or wiki-link bug recurring",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Log issues
|
||||||
|
healthy = len(issues) == 0
|
||||||
|
if not healthy:
|
||||||
|
for issue in issues:
|
||||||
|
if issue["severity"] == "critical":
|
||||||
|
logger.warning("WATCHDOG CRITICAL: %s — %s", issue["type"], issue["detail"])
|
||||||
|
else:
|
||||||
|
logger.info("WATCHDOG: %s — %s", issue["type"], issue["detail"])
|
||||||
|
|
||||||
|
return {"healthy": healthy, "issues": issues, "checks_run": 5}
|
||||||
|
|
||||||
|
|
||||||
|
async def watchdog_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
|
"""Pipeline stage entry point. Returns (1, 0) on success."""
|
||||||
|
result = await watchdog_check(conn)
|
||||||
|
if not result["healthy"]:
|
||||||
|
db.audit(
|
||||||
|
conn, "watchdog", "issues_detected",
|
||||||
|
json.dumps({"issues": result["issues"]}),
|
||||||
|
)
|
||||||
|
return 1, 0
|
||||||
85
ops/pipeline-v2/lib/worktree_lock.py
Normal file
85
ops/pipeline-v2/lib/worktree_lock.py
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
"""File-based lock for ALL processes writing to the main worktree.
|
||||||
|
|
||||||
|
One lock, one mechanism (Ganymede: Option C). Used by:
|
||||||
|
- Pipeline daemon stages (entity_batch, source archiver, substantive_fixer) via async wrapper
|
||||||
|
- Telegram bot (sync context manager)
|
||||||
|
|
||||||
|
Protects: /opt/teleo-eval/workspaces/main/
|
||||||
|
|
||||||
|
flock auto-releases on process exit (even crash/kill). No stale lock cleanup needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import fcntl
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from contextlib import asynccontextmanager, contextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger("worktree-lock")
|
||||||
|
|
||||||
|
LOCKFILE = Path("/opt/teleo-eval/workspaces/.main-worktree.lock")
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def main_worktree_lock(timeout: float = 10.0):
|
||||||
|
"""Sync context manager — use in telegram bot and other external processes.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
with main_worktree_lock():
|
||||||
|
# write to inbox/queue/, git add/commit/push, etc.
|
||||||
|
"""
|
||||||
|
LOCKFILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
fp = open(LOCKFILE, "w")
|
||||||
|
start = time.monotonic()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
break
|
||||||
|
except BlockingIOError:
|
||||||
|
if time.monotonic() - start > timeout:
|
||||||
|
fp.close()
|
||||||
|
logger.warning("Main worktree lock timeout after %.0fs", timeout)
|
||||||
|
raise TimeoutError(f"Could not acquire main worktree lock in {timeout}s")
|
||||||
|
time.sleep(0.1)
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
fcntl.flock(fp, fcntl.LOCK_UN)
|
||||||
|
fp.close()
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def async_main_worktree_lock(timeout: float = 10.0):
|
||||||
|
"""Async context manager — use in pipeline daemon stages.
|
||||||
|
|
||||||
|
Acquires the same file lock via run_in_executor (Ganymede: <1ms overhead).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
async with async_main_worktree_lock():
|
||||||
|
await _git("fetch", "origin", "main", cwd=main_dir)
|
||||||
|
await _git("reset", "--hard", "origin/main", cwd=main_dir)
|
||||||
|
# ... write files, commit, push ...
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
LOCKFILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
fp = open(LOCKFILE, "w")
|
||||||
|
|
||||||
|
def _acquire():
|
||||||
|
start = time.monotonic()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
return
|
||||||
|
except BlockingIOError:
|
||||||
|
if time.monotonic() - start > timeout:
|
||||||
|
fp.close()
|
||||||
|
raise TimeoutError(f"Could not acquire main worktree lock in {timeout}s")
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
await loop.run_in_executor(None, _acquire)
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
fcntl.flock(fp, fcntl.LOCK_UN)
|
||||||
|
fp.close()
|
||||||
972
ops/pipeline-v2/reweave.py
Normal file
972
ops/pipeline-v2/reweave.py
Normal file
|
|
@ -0,0 +1,972 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Orphan Reweave — connect isolated claims via vector similarity + Haiku classification.
|
||||||
|
|
||||||
|
Finds claims with zero incoming links (orphans), uses Qdrant to find semantically
|
||||||
|
similar neighbors, classifies the relationship with Haiku, and writes edges on the
|
||||||
|
neighbor's frontmatter pointing TO the orphan.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 reweave.py --dry-run # Show what would be connected
|
||||||
|
python3 reweave.py --max-orphans 50 # Process up to 50 orphans
|
||||||
|
python3 reweave.py --threshold 0.72 # Override similarity floor
|
||||||
|
|
||||||
|
Design:
|
||||||
|
- Orphan = zero incoming links (no other claim's supports/challenges/related/depends_on points to it)
|
||||||
|
- Write edge on NEIGHBOR (not orphan) so orphan gains an incoming link
|
||||||
|
- Haiku classifies: supports | challenges | related (>=0.85 confidence for supports/challenges)
|
||||||
|
- reweave_edges parallel field for tooling-readable provenance
|
||||||
|
- Single PR per run for Leo review
|
||||||
|
|
||||||
|
Pentagon-Agent: Epimetheus <0144398e-4ed3-4fe2-95a3-3d72e1abf887>
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||||
|
logger = logging.getLogger("reweave")
|
||||||
|
|
||||||
|
# --- Config ---
|
||||||
|
REPO_DIR = Path(os.environ.get("REPO_DIR", "/opt/teleo-eval/workspaces/main"))
|
||||||
|
SECRETS_DIR = Path(os.environ.get("SECRETS_DIR", "/opt/teleo-eval/secrets"))
|
||||||
|
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
|
||||||
|
QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "teleo-claims")
|
||||||
|
FORGEJO_URL = os.environ.get("FORGEJO_URL", "http://localhost:3000")
|
||||||
|
|
||||||
|
EMBED_DIRS = ["domains", "core", "foundations", "decisions", "entities"]
|
||||||
|
EDGE_FIELDS = ("supports", "challenges", "challenged_by", "depends_on", "related")
|
||||||
|
WIKI_LINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
|
||||||
|
|
||||||
|
# Thresholds (from calibration data — Mar 28)
|
||||||
|
DEFAULT_THRESHOLD = 0.70 # Elbow in score distribution
|
||||||
|
DEFAULT_MAX_ORPHANS = 50 # Keep PRs reviewable
|
||||||
|
DEFAULT_MAX_NEIGHBORS = 3 # Don't over-connect
|
||||||
|
HAIKU_CONFIDENCE_FLOOR = 0.85 # Below this → default to "related"
|
||||||
|
PER_FILE_EDGE_CAP = 10 # Max total reweave edges per neighbor file
|
||||||
|
|
||||||
|
# Domain processing order: diversity first, internet-finance last (Leo)
|
||||||
|
DOMAIN_PRIORITY = [
|
||||||
|
"ai-alignment", "health", "space-development", "entertainment",
|
||||||
|
"creative-industries", "collective-intelligence", "governance",
|
||||||
|
# internet-finance last — batch-imported futarchy cluster, lower cross-domain value
|
||||||
|
"internet-finance",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Orphan Detection ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_frontmatter(path: Path) -> dict | None:
|
||||||
|
"""Parse YAML frontmatter from a markdown file. Returns dict or None."""
|
||||||
|
try:
|
||||||
|
text = path.read_text(errors="replace")
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
if not text.startswith("---"):
|
||||||
|
return None
|
||||||
|
end = text.find("\n---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
fm = yaml.safe_load(text[3:end])
|
||||||
|
return fm if isinstance(fm, dict) else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_body(path: Path) -> str:
|
||||||
|
"""Get body text (after frontmatter) from a markdown file."""
|
||||||
|
try:
|
||||||
|
text = path.read_text(errors="replace")
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
if not text.startswith("---"):
|
||||||
|
return text
|
||||||
|
end = text.find("\n---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return text
|
||||||
|
return text[end + 4:].strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_edge_targets(path: Path) -> list[str]:
|
||||||
|
"""Extract all outgoing edge targets from a claim's frontmatter + wiki links."""
|
||||||
|
targets = []
|
||||||
|
fm = _parse_frontmatter(path)
|
||||||
|
if fm:
|
||||||
|
for field in EDGE_FIELDS:
|
||||||
|
val = fm.get(field)
|
||||||
|
if isinstance(val, list):
|
||||||
|
targets.extend(str(v).strip().lower() for v in val if v)
|
||||||
|
elif isinstance(val, str) and val.strip():
|
||||||
|
targets.append(val.strip().lower())
|
||||||
|
# Also check reweave_edges (from previous runs)
|
||||||
|
rw = fm.get("reweave_edges")
|
||||||
|
if isinstance(rw, list):
|
||||||
|
targets.extend(str(v).strip().lower() for v in rw if v)
|
||||||
|
|
||||||
|
# Wiki links in body
|
||||||
|
try:
|
||||||
|
text = path.read_text(errors="replace")
|
||||||
|
end = text.find("\n---", 3)
|
||||||
|
if end > 0:
|
||||||
|
body = text[end + 4:]
|
||||||
|
for link in WIKI_LINK_RE.findall(body):
|
||||||
|
targets.append(link.strip().lower())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return targets
|
||||||
|
|
||||||
|
|
||||||
|
def _claim_name_variants(path: Path, repo_root: Path = None) -> list[str]:
|
||||||
|
"""Generate name variants for a claim file (used for incoming link matching).
|
||||||
|
|
||||||
|
A claim at domains/ai-alignment/rlhf-reward-hacking.md could be referenced as:
|
||||||
|
- "rlhf-reward-hacking"
|
||||||
|
- "rlhf reward hacking"
|
||||||
|
- "RLHF reward hacking" (title case)
|
||||||
|
- The actual 'name' or 'title' from frontmatter
|
||||||
|
- "domains/ai-alignment/rlhf-reward-hacking" (relative path without .md)
|
||||||
|
"""
|
||||||
|
variants = set()
|
||||||
|
stem = path.stem
|
||||||
|
variants.add(stem.lower())
|
||||||
|
variants.add(stem.lower().replace("-", " "))
|
||||||
|
|
||||||
|
# Also match by relative path (Ganymede Q1: some edges use path references)
|
||||||
|
if repo_root:
|
||||||
|
try:
|
||||||
|
rel = str(path.relative_to(repo_root)).removesuffix(".md")
|
||||||
|
variants.add(rel.lower())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
fm = _parse_frontmatter(path)
|
||||||
|
if fm:
|
||||||
|
for key in ("name", "title"):
|
||||||
|
val = fm.get(key)
|
||||||
|
if isinstance(val, str) and val.strip():
|
||||||
|
variants.add(val.strip().lower())
|
||||||
|
|
||||||
|
return list(variants)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_entity(path: Path) -> bool:
|
||||||
|
"""Check if a file is an entity (not a claim). Entities need different edge vocabulary."""
|
||||||
|
fm = _parse_frontmatter(path)
|
||||||
|
if fm and fm.get("type") == "entity":
|
||||||
|
return True
|
||||||
|
# Check path parts — avoids false positives on paths like "domains/entities-overview/"
|
||||||
|
return "entities" in Path(path).parts
|
||||||
|
|
||||||
|
|
||||||
|
def _same_source(path_a: Path, path_b: Path) -> bool:
|
||||||
|
"""Check if two claims derive from the same source material.
|
||||||
|
|
||||||
|
Prevents self-referential edges where N claims about the same paper
|
||||||
|
all "support" each other — inflates graph density without adding information.
|
||||||
|
"""
|
||||||
|
fm_a = _parse_frontmatter(path_a)
|
||||||
|
fm_b = _parse_frontmatter(path_b)
|
||||||
|
if not fm_a or not fm_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check source field
|
||||||
|
src_a = fm_a.get("source") or fm_a.get("source_file") or ""
|
||||||
|
src_b = fm_b.get("source") or fm_b.get("source_file") or ""
|
||||||
|
if src_a and src_b and str(src_a).strip() == str(src_b).strip():
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def find_all_claims(repo_root: Path) -> list[Path]:
|
||||||
|
"""Find all knowledge files (claim, framework, entity, decision) in the KB."""
|
||||||
|
claims = []
|
||||||
|
for d in EMBED_DIRS:
|
||||||
|
base = repo_root / d
|
||||||
|
if not base.is_dir():
|
||||||
|
continue
|
||||||
|
for md in base.rglob("*.md"):
|
||||||
|
if md.name.startswith("_"):
|
||||||
|
continue
|
||||||
|
fm = _parse_frontmatter(md)
|
||||||
|
if fm and fm.get("type") not in ("source", "musing", None):
|
||||||
|
claims.append(md)
|
||||||
|
return claims
|
||||||
|
|
||||||
|
|
||||||
|
def build_reverse_link_index(claims: list[Path]) -> dict[str, set[Path]]:
|
||||||
|
"""Build a reverse index: claim_name_variant → set of files that link TO it.
|
||||||
|
|
||||||
|
For each claim, extract all outgoing edges. For each target name, record
|
||||||
|
the source claim as an incoming link for that target.
|
||||||
|
"""
|
||||||
|
# name_variant → set of source paths that point to it
|
||||||
|
incoming: dict[str, set[Path]] = {}
|
||||||
|
|
||||||
|
for claim_path in claims:
|
||||||
|
targets = _get_edge_targets(claim_path)
|
||||||
|
for target in targets:
|
||||||
|
if target not in incoming:
|
||||||
|
incoming[target] = set()
|
||||||
|
incoming[target].add(claim_path)
|
||||||
|
|
||||||
|
return incoming
|
||||||
|
|
||||||
|
|
||||||
|
def find_orphans(claims: list[Path], incoming: dict[str, set[Path]],
|
||||||
|
repo_root: Path = None) -> list[Path]:
|
||||||
|
"""Find claims with zero incoming links."""
|
||||||
|
orphans = []
|
||||||
|
for claim_path in claims:
|
||||||
|
variants = _claim_name_variants(claim_path, repo_root)
|
||||||
|
has_incoming = any(
|
||||||
|
len(incoming.get(v, set()) - {claim_path}) > 0
|
||||||
|
for v in variants
|
||||||
|
)
|
||||||
|
if not has_incoming:
|
||||||
|
orphans.append(claim_path)
|
||||||
|
return orphans
|
||||||
|
|
||||||
|
|
||||||
|
def sort_orphans_by_domain(orphans: list[Path], repo_root: Path) -> list[Path]:
|
||||||
|
"""Sort orphans by domain priority (diversity first, internet-finance last)."""
|
||||||
|
def domain_key(path: Path) -> tuple[int, str]:
|
||||||
|
rel = path.relative_to(repo_root)
|
||||||
|
parts = rel.parts
|
||||||
|
domain = ""
|
||||||
|
if len(parts) >= 2 and parts[0] in ("domains", "entities", "decisions"):
|
||||||
|
domain = parts[1]
|
||||||
|
elif parts[0] == "foundations" and len(parts) >= 2:
|
||||||
|
domain = parts[1]
|
||||||
|
elif parts[0] == "core":
|
||||||
|
domain = "core"
|
||||||
|
|
||||||
|
try:
|
||||||
|
priority = DOMAIN_PRIORITY.index(domain)
|
||||||
|
except ValueError:
|
||||||
|
# Unknown domain goes before internet-finance but after known ones
|
||||||
|
priority = len(DOMAIN_PRIORITY) - 1
|
||||||
|
|
||||||
|
return (priority, path.stem)
|
||||||
|
|
||||||
|
return sorted(orphans, key=domain_key)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Qdrant Search ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _get_api_key() -> str:
|
||||||
|
"""Load OpenRouter API key."""
|
||||||
|
key_file = SECRETS_DIR / "openrouter-key"
|
||||||
|
if key_file.exists():
|
||||||
|
return key_file.read_text().strip()
|
||||||
|
key = os.environ.get("OPENROUTER_API_KEY", "")
|
||||||
|
if key:
|
||||||
|
return key
|
||||||
|
logger.error("No OpenRouter API key found")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def make_point_id(rel_path: str) -> str:
|
||||||
|
"""Deterministic point ID from repo-relative path (matches embed-claims.py)."""
|
||||||
|
return hashlib.md5(rel_path.encode()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def get_vector_from_qdrant(rel_path: str) -> list[float] | None:
|
||||||
|
"""Retrieve a claim's existing vector from Qdrant by its point ID."""
|
||||||
|
point_id = make_point_id(rel_path)
|
||||||
|
body = json.dumps({"ids": [point_id], "with_vector": True}).encode()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points",
|
||||||
|
data=body,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
points = data.get("result", [])
|
||||||
|
if points and points[0].get("vector"):
|
||||||
|
return points[0]["vector"]
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Qdrant point lookup failed for %s: %s", rel_path, e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def search_neighbors(vector: list[float], exclude_path: str,
|
||||||
|
threshold: float, limit: int) -> list[dict]:
|
||||||
|
"""Search Qdrant for nearest neighbors above threshold, excluding self."""
|
||||||
|
body = {
|
||||||
|
"vector": vector,
|
||||||
|
"limit": limit + 5, # over-fetch to account for self + filtered
|
||||||
|
"with_payload": True,
|
||||||
|
"score_threshold": threshold,
|
||||||
|
"filter": {
|
||||||
|
"must_not": [{"key": "claim_path", "match": {"value": exclude_path}}]
|
||||||
|
},
|
||||||
|
}
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points/search",
|
||||||
|
data=json.dumps(body).encode(),
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
hits = data.get("result", [])
|
||||||
|
return hits[:limit]
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Qdrant search failed: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Haiku Edge Classification ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
CLASSIFY_PROMPT = """You are classifying the relationship between two knowledge claims.
|
||||||
|
|
||||||
|
CLAIM A (the orphan — needs to be connected):
|
||||||
|
Title: {orphan_title}
|
||||||
|
Body: {orphan_body}
|
||||||
|
|
||||||
|
CLAIM B (the neighbor — already connected in the knowledge graph):
|
||||||
|
Title: {neighbor_title}
|
||||||
|
Body: {neighbor_body}
|
||||||
|
|
||||||
|
What is the relationship FROM Claim B TO Claim A?
|
||||||
|
|
||||||
|
Options:
|
||||||
|
- "supports" — Claim B provides evidence, reasoning, or examples that strengthen Claim A
|
||||||
|
- "challenges" — Claim B contradicts, undermines, or provides counter-evidence to Claim A. NOTE: "challenges" is underused — if one claim says X works and another says X fails, or they propose incompatible mechanisms, that IS a challenge. Use it.
|
||||||
|
- "related" — Claims are topically connected but neither supports nor challenges the other. This is the WEAKEST edge — prefer supports/challenges when the relationship has directionality.
|
||||||
|
|
||||||
|
Respond with EXACTLY this JSON format, nothing else:
|
||||||
|
{{"edge_type": "supports|challenges|related", "confidence": 0.0-1.0, "reason": "one sentence explanation"}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def classify_edge(orphan_title: str, orphan_body: str,
|
||||||
|
neighbor_title: str, neighbor_body: str,
|
||||||
|
api_key: str) -> dict:
|
||||||
|
"""Use Haiku to classify the edge type between two claims.
|
||||||
|
|
||||||
|
Returns {"edge_type": str, "confidence": float, "reason": str}.
|
||||||
|
Falls back to "related" on any failure.
|
||||||
|
"""
|
||||||
|
default = {"edge_type": "related", "confidence": 0.5, "reason": "classification failed"}
|
||||||
|
|
||||||
|
prompt = CLASSIFY_PROMPT.format(
|
||||||
|
orphan_title=orphan_title,
|
||||||
|
orphan_body=orphan_body[:500],
|
||||||
|
neighbor_title=neighbor_title,
|
||||||
|
neighbor_body=neighbor_body[:500],
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = json.dumps({
|
||||||
|
"model": "anthropic/claude-3.5-haiku",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"max_tokens": 200,
|
||||||
|
"temperature": 0.3,
|
||||||
|
}).encode()
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
"https://openrouter.ai/api/v1/chat/completions",
|
||||||
|
data=payload,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
content = data["choices"][0]["message"]["content"].strip()
|
||||||
|
|
||||||
|
# Parse JSON from response (handle markdown code blocks)
|
||||||
|
if content.startswith("```"):
|
||||||
|
content = content.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
|
||||||
|
|
||||||
|
result = json.loads(content)
|
||||||
|
edge_type = result.get("edge_type", "related")
|
||||||
|
confidence = float(result.get("confidence", 0.5))
|
||||||
|
|
||||||
|
# Enforce confidence floor for supports/challenges
|
||||||
|
if edge_type in ("supports", "challenges") and confidence < HAIKU_CONFIDENCE_FLOOR:
|
||||||
|
edge_type = "related"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"edge_type": edge_type,
|
||||||
|
"confidence": confidence,
|
||||||
|
"reason": result.get("reason", ""),
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Haiku classification failed: %s", e)
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
# ─── YAML Frontmatter Editing ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _count_reweave_edges(path: Path) -> int:
|
||||||
|
"""Count existing reweave_edges in a file's frontmatter."""
|
||||||
|
fm = _parse_frontmatter(path)
|
||||||
|
if not fm:
|
||||||
|
return 0
|
||||||
|
rw = fm.get("reweave_edges")
|
||||||
|
if isinstance(rw, list):
|
||||||
|
return len(rw)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def write_edge(neighbor_path: Path, orphan_title: str, edge_type: str,
|
||||||
|
date_str: str, dry_run: bool = False) -> bool:
|
||||||
|
"""Write a reweave edge on the neighbor's frontmatter.
|
||||||
|
|
||||||
|
Adds to both the edge_type list (related/supports/challenges) and
|
||||||
|
the parallel reweave_edges list for provenance tracking.
|
||||||
|
|
||||||
|
Uses ruamel.yaml for round-trip YAML preservation.
|
||||||
|
"""
|
||||||
|
# Check per-file cap
|
||||||
|
if _count_reweave_edges(neighbor_path) >= PER_FILE_EDGE_CAP:
|
||||||
|
logger.info(" Skip %s — per-file edge cap (%d) reached", neighbor_path.name, PER_FILE_EDGE_CAP)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
text = neighbor_path.read_text(errors="replace")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(" Cannot read %s: %s", neighbor_path, e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not text.startswith("---"):
|
||||||
|
logger.warning(" No frontmatter in %s", neighbor_path.name)
|
||||||
|
return False
|
||||||
|
|
||||||
|
end = text.find("\n---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return False
|
||||||
|
|
||||||
|
fm_text = text[3:end]
|
||||||
|
body_text = text[end:] # includes the closing ---
|
||||||
|
|
||||||
|
# Try ruamel.yaml for round-trip editing
|
||||||
|
try:
|
||||||
|
from ruamel.yaml import YAML
|
||||||
|
ry = YAML()
|
||||||
|
ry.preserve_quotes = True
|
||||||
|
ry.width = 4096 # prevent line wrapping
|
||||||
|
|
||||||
|
import io
|
||||||
|
fm = ry.load(fm_text)
|
||||||
|
if not isinstance(fm, dict):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Add to edge_type list (related/supports/challenges)
|
||||||
|
# Clean value only — provenance tracked in reweave_edges (Ganymede: comment-in-string bug)
|
||||||
|
if edge_type not in fm:
|
||||||
|
fm[edge_type] = []
|
||||||
|
elif not isinstance(fm[edge_type], list):
|
||||||
|
fm[edge_type] = [fm[edge_type]]
|
||||||
|
|
||||||
|
# Check for duplicate
|
||||||
|
existing = [str(v).strip().lower() for v in fm[edge_type] if v]
|
||||||
|
if orphan_title.strip().lower() in existing:
|
||||||
|
logger.info(" Skip duplicate edge: %s → %s", neighbor_path.name, orphan_title)
|
||||||
|
return False
|
||||||
|
|
||||||
|
fm[edge_type].append(orphan_title)
|
||||||
|
|
||||||
|
# Add to reweave_edges with provenance (edge_type + date for audit trail)
|
||||||
|
if "reweave_edges" not in fm:
|
||||||
|
fm["reweave_edges"] = []
|
||||||
|
elif not isinstance(fm["reweave_edges"], list):
|
||||||
|
fm["reweave_edges"] = [fm["reweave_edges"]]
|
||||||
|
fm["reweave_edges"].append(f"{orphan_title}|{edge_type}|{date_str}")
|
||||||
|
|
||||||
|
# Serialize back
|
||||||
|
buf = io.StringIO()
|
||||||
|
ry.dump(fm, buf)
|
||||||
|
new_fm = buf.getvalue().rstrip("\n")
|
||||||
|
|
||||||
|
new_text = f"---\n{new_fm}{body_text}"
|
||||||
|
|
||||||
|
if not dry_run:
|
||||||
|
neighbor_path.write_text(new_text)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
# Fallback: regex-based editing (no ruamel.yaml installed)
|
||||||
|
logger.info(" ruamel.yaml not available, using regex fallback")
|
||||||
|
return _write_edge_regex(neighbor_path, fm_text, body_text, orphan_title,
|
||||||
|
edge_type, date_str, dry_run)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_edge_regex(neighbor_path: Path, fm_text: str, body_text: str,
|
||||||
|
orphan_title: str, edge_type: str, date_str: str,
|
||||||
|
dry_run: bool) -> bool:
|
||||||
|
"""Fallback: add edge via regex when ruamel.yaml is unavailable."""
|
||||||
|
# Strip leading newline from fm_text (text[3:end] includes \n after ---)
|
||||||
|
fm_text = fm_text.lstrip("\n")
|
||||||
|
|
||||||
|
# Check for duplicate before writing
|
||||||
|
existing_re = re.compile(
|
||||||
|
rf'^\s*-\s*["\']?{re.escape(orphan_title)}["\']?\s*$',
|
||||||
|
re.MULTILINE | re.IGNORECASE,
|
||||||
|
)
|
||||||
|
if existing_re.search(fm_text):
|
||||||
|
logger.info(" Skip duplicate edge (regex): %s → %s", neighbor_path.name, orphan_title)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check if edge_type field exists
|
||||||
|
field_re = re.compile(rf"^{edge_type}:\s*$", re.MULTILINE)
|
||||||
|
inline_re = re.compile(rf'^{edge_type}:\s*\[', re.MULTILINE)
|
||||||
|
|
||||||
|
entry_line = f' - "{orphan_title}"'
|
||||||
|
rw_line = f' - "{orphan_title}|{edge_type}|{date_str}"'
|
||||||
|
|
||||||
|
if field_re.search(fm_text):
|
||||||
|
# Multi-line list exists — find end of list, append
|
||||||
|
lines = fm_text.split("\n")
|
||||||
|
new_lines = []
|
||||||
|
in_field = False
|
||||||
|
inserted = False
|
||||||
|
for line in lines:
|
||||||
|
new_lines.append(line)
|
||||||
|
if re.match(rf"^{edge_type}:\s*$", line):
|
||||||
|
in_field = True
|
||||||
|
elif in_field and not line.startswith(" -"):
|
||||||
|
# End of list — insert before this line
|
||||||
|
new_lines.insert(-1, entry_line)
|
||||||
|
in_field = False
|
||||||
|
inserted = True
|
||||||
|
if in_field and not inserted:
|
||||||
|
# Field was last in frontmatter
|
||||||
|
new_lines.append(entry_line)
|
||||||
|
fm_text = "\n".join(new_lines)
|
||||||
|
|
||||||
|
elif inline_re.search(fm_text):
|
||||||
|
# Inline list — skip, too complex for regex
|
||||||
|
logger.warning(" Inline list format for %s in %s, skipping", edge_type, neighbor_path.name)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Field doesn't exist — add at end of frontmatter
|
||||||
|
fm_text = fm_text.rstrip("\n") + f"\n{edge_type}:\n{entry_line}"
|
||||||
|
|
||||||
|
# Add reweave_edges field
|
||||||
|
if "reweave_edges:" in fm_text:
|
||||||
|
lines = fm_text.split("\n")
|
||||||
|
new_lines = []
|
||||||
|
in_rw = False
|
||||||
|
inserted_rw = False
|
||||||
|
for line in lines:
|
||||||
|
new_lines.append(line)
|
||||||
|
if re.match(r"^reweave_edges:\s*$", line):
|
||||||
|
in_rw = True
|
||||||
|
elif in_rw and not line.startswith(" -"):
|
||||||
|
new_lines.insert(-1, rw_line)
|
||||||
|
in_rw = False
|
||||||
|
inserted_rw = True
|
||||||
|
if in_rw and not inserted_rw:
|
||||||
|
new_lines.append(rw_line)
|
||||||
|
fm_text = "\n".join(new_lines)
|
||||||
|
else:
|
||||||
|
fm_text = fm_text.rstrip("\n") + f"\nreweave_edges:\n{rw_line}"
|
||||||
|
|
||||||
|
new_text = f"---\n{fm_text}{body_text}"
|
||||||
|
|
||||||
|
if not dry_run:
|
||||||
|
neighbor_path.write_text(new_text)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Git + PR ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def create_branch(repo_root: Path, branch_name: str) -> bool:
|
||||||
|
"""Create and checkout a new branch. Cleans up stale local/remote branches from prior failed runs."""
|
||||||
|
# Delete stale local branch if it exists (e.g., from a failed earlier run today)
|
||||||
|
subprocess.run(["git", "branch", "-D", branch_name],
|
||||||
|
cwd=str(repo_root), capture_output=True) # ignore errors if branch doesn't exist
|
||||||
|
|
||||||
|
# Delete stale remote branch if it exists
|
||||||
|
token_file = SECRETS_DIR / "forgejo-admin-token"
|
||||||
|
if token_file.exists():
|
||||||
|
token = token_file.read_text().strip()
|
||||||
|
push_url = f"http://teleo:{token}@localhost:3000/teleo/teleo-codex.git"
|
||||||
|
subprocess.run(["git", "push", push_url, "--delete", branch_name],
|
||||||
|
cwd=str(repo_root), capture_output=True) # ignore errors if branch doesn't exist
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(["git", "checkout", "-b", branch_name],
|
||||||
|
cwd=str(repo_root), check=True, capture_output=True)
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
logger.error("Failed to create branch %s: %s", branch_name, e.stderr.decode())
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def commit_and_push(repo_root: Path, branch_name: str, modified_files: list[Path],
|
||||||
|
orphan_count: int) -> bool:
|
||||||
|
"""Stage modified files, commit, and push."""
|
||||||
|
# Stage only modified files
|
||||||
|
for f in modified_files:
|
||||||
|
subprocess.run(["git", "add", str(f)], cwd=str(repo_root),
|
||||||
|
check=True, capture_output=True)
|
||||||
|
|
||||||
|
# Check if anything staged
|
||||||
|
result = subprocess.run(["git", "diff", "--cached", "--name-only"],
|
||||||
|
cwd=str(repo_root), capture_output=True, text=True)
|
||||||
|
if not result.stdout.strip():
|
||||||
|
logger.info("No files staged — nothing to commit")
|
||||||
|
return False
|
||||||
|
|
||||||
|
msg = (
|
||||||
|
f"reweave: connect {orphan_count} orphan claims via vector similarity\n\n"
|
||||||
|
f"Threshold: {DEFAULT_THRESHOLD}, Haiku classification, {len(modified_files)} files modified.\n\n"
|
||||||
|
f"Pentagon-Agent: Epimetheus <0144398e-4ed3-4fe2-95a3-3d72e1abf887>"
|
||||||
|
)
|
||||||
|
subprocess.run(["git", "commit", "-m", msg], cwd=str(repo_root),
|
||||||
|
check=True, capture_output=True)
|
||||||
|
|
||||||
|
# Push — inject token
|
||||||
|
token_file = SECRETS_DIR / "forgejo-admin-token"
|
||||||
|
if not token_file.exists():
|
||||||
|
logger.error("No Forgejo token found at %s", token_file)
|
||||||
|
return False
|
||||||
|
token = token_file.read_text().strip()
|
||||||
|
push_url = f"http://teleo:{token}@localhost:3000/teleo/teleo-codex.git"
|
||||||
|
|
||||||
|
subprocess.run(["git", "push", "-u", push_url, branch_name],
|
||||||
|
cwd=str(repo_root), check=True, capture_output=True)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def create_pr(branch_name: str, orphan_count: int, summary_lines: list[str]) -> str | None:
|
||||||
|
"""Create a Forgejo PR for the reweave batch."""
|
||||||
|
token_file = SECRETS_DIR / "forgejo-admin-token"
|
||||||
|
if not token_file.exists():
|
||||||
|
return None
|
||||||
|
token = token_file.read_text().strip()
|
||||||
|
|
||||||
|
summary = "\n".join(f"- {line}" for line in summary_lines[:30])
|
||||||
|
body = (
|
||||||
|
f"## Orphan Reweave\n\n"
|
||||||
|
f"Connected **{orphan_count}** orphan claims to the knowledge graph "
|
||||||
|
f"via vector similarity (threshold {DEFAULT_THRESHOLD}) + Haiku edge classification.\n\n"
|
||||||
|
f"### Edges Added\n{summary}\n\n"
|
||||||
|
f"### Review Guide\n"
|
||||||
|
f"- Each edge has a `# reweave:YYYY-MM-DD` comment — strip after review\n"
|
||||||
|
f"- `reweave_edges` field tracks automated edges for tooling (graph_expand weights them 0.75x)\n"
|
||||||
|
f"- Upgrade `related` → `supports`/`challenges` where you have better judgment\n"
|
||||||
|
f"- Delete any edges that don't make sense\n\n"
|
||||||
|
f"Pentagon-Agent: Epimetheus"
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = json.dumps({
|
||||||
|
"title": f"reweave: connect {orphan_count} orphan claims",
|
||||||
|
"body": body,
|
||||||
|
"head": branch_name,
|
||||||
|
"base": "main",
|
||||||
|
}).encode()
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls",
|
||||||
|
data=payload,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"token {token}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
data = json.loads(resp.read())
|
||||||
|
return data.get("html_url", "")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("PR creation failed: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Worktree Lock ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_lock_fd = None # Module-level to prevent GC and avoid function-attribute fragility
|
||||||
|
|
||||||
|
|
||||||
|
def acquire_lock(lock_path: Path, timeout: int = 30) -> bool:
|
||||||
|
"""Acquire file lock for worktree access. Returns True if acquired."""
|
||||||
|
global _lock_fd
|
||||||
|
import fcntl
|
||||||
|
try:
|
||||||
|
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
_lock_fd = open(lock_path, "w")
|
||||||
|
fcntl.flock(_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
_lock_fd.write(f"reweave:{os.getpid()}\n")
|
||||||
|
_lock_fd.flush()
|
||||||
|
return True
|
||||||
|
except (IOError, OSError):
|
||||||
|
logger.warning("Could not acquire worktree lock at %s — another process has it", lock_path)
|
||||||
|
_lock_fd = None
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def release_lock(lock_path: Path):
|
||||||
|
"""Release worktree lock."""
|
||||||
|
global _lock_fd
|
||||||
|
import fcntl
|
||||||
|
fd = _lock_fd
|
||||||
|
_lock_fd = None
|
||||||
|
if fd:
|
||||||
|
try:
|
||||||
|
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||||
|
fd.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
lock_path.unlink(missing_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Main ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global REPO_DIR, DEFAULT_THRESHOLD
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Orphan Reweave — connect isolated claims")
|
||||||
|
parser.add_argument("--dry-run", action="store_true",
|
||||||
|
help="Show what would be connected without modifying files")
|
||||||
|
parser.add_argument("--max-orphans", type=int, default=DEFAULT_MAX_ORPHANS,
|
||||||
|
help=f"Max orphans to process (default {DEFAULT_MAX_ORPHANS})")
|
||||||
|
parser.add_argument("--max-neighbors", type=int, default=DEFAULT_MAX_NEIGHBORS,
|
||||||
|
help=f"Max neighbors per orphan (default {DEFAULT_MAX_NEIGHBORS})")
|
||||||
|
parser.add_argument("--threshold", type=float, default=DEFAULT_THRESHOLD,
|
||||||
|
help=f"Minimum cosine similarity (default {DEFAULT_THRESHOLD})")
|
||||||
|
parser.add_argument("--repo-dir", type=str, default=None,
|
||||||
|
help="Override repo directory")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.repo_dir:
|
||||||
|
REPO_DIR = Path(args.repo_dir)
|
||||||
|
DEFAULT_THRESHOLD = args.threshold
|
||||||
|
|
||||||
|
date_str = datetime.date.today().isoformat()
|
||||||
|
branch_name = f"reweave/{date_str}"
|
||||||
|
|
||||||
|
logger.info("=== Orphan Reweave ===")
|
||||||
|
logger.info("Repo: %s", REPO_DIR)
|
||||||
|
logger.info("Threshold: %.2f, Max orphans: %d, Max neighbors: %d",
|
||||||
|
args.threshold, args.max_orphans, args.max_neighbors)
|
||||||
|
if args.dry_run:
|
||||||
|
logger.info("DRY RUN — no files will be modified")
|
||||||
|
|
||||||
|
# Step 1: Find all claims and build reverse-link index
|
||||||
|
logger.info("Step 1: Scanning KB for claims...")
|
||||||
|
claims = find_all_claims(REPO_DIR)
|
||||||
|
logger.info(" Found %d knowledge files", len(claims))
|
||||||
|
|
||||||
|
logger.info("Step 2: Building reverse-link index...")
|
||||||
|
incoming = build_reverse_link_index(claims)
|
||||||
|
|
||||||
|
logger.info("Step 3: Finding orphans...")
|
||||||
|
orphans = find_orphans(claims, incoming, REPO_DIR)
|
||||||
|
orphans = sort_orphans_by_domain(orphans, REPO_DIR)
|
||||||
|
logger.info(" Found %d orphans (%.1f%% of %d claims)",
|
||||||
|
len(orphans), 100 * len(orphans) / max(len(claims), 1), len(claims))
|
||||||
|
|
||||||
|
if not orphans:
|
||||||
|
logger.info("No orphans found — KB is fully connected!")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Cap to max_orphans
|
||||||
|
batch = orphans[:args.max_orphans]
|
||||||
|
logger.info(" Processing batch of %d orphans", len(batch))
|
||||||
|
|
||||||
|
# Step 4: For each orphan, find neighbors and classify edges
|
||||||
|
api_key = _get_api_key()
|
||||||
|
edges_to_write: list[dict] = [] # {neighbor_path, orphan_title, edge_type, reason, score}
|
||||||
|
skipped_no_vector = 0
|
||||||
|
skipped_no_neighbors = 0
|
||||||
|
skipped_entity_pair = 0
|
||||||
|
skipped_same_source = 0
|
||||||
|
|
||||||
|
for i, orphan_path in enumerate(batch):
|
||||||
|
rel_path = str(orphan_path.relative_to(REPO_DIR))
|
||||||
|
fm = _parse_frontmatter(orphan_path)
|
||||||
|
orphan_title = fm.get("name", fm.get("title", orphan_path.stem.replace("-", " "))) if fm else orphan_path.stem
|
||||||
|
orphan_body = _get_body(orphan_path)
|
||||||
|
|
||||||
|
logger.info("[%d/%d] %s", i + 1, len(batch), orphan_title[:80])
|
||||||
|
|
||||||
|
# Get vector from Qdrant
|
||||||
|
vector = get_vector_from_qdrant(rel_path)
|
||||||
|
if not vector:
|
||||||
|
logger.info(" No vector in Qdrant — skipping (not embedded yet)")
|
||||||
|
skipped_no_vector += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Find neighbors
|
||||||
|
hits = search_neighbors(vector, rel_path, args.threshold, args.max_neighbors)
|
||||||
|
if not hits:
|
||||||
|
logger.info(" No neighbors above threshold %.2f", args.threshold)
|
||||||
|
skipped_no_neighbors += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
for hit in hits:
|
||||||
|
payload = hit.get("payload", {})
|
||||||
|
neighbor_rel = payload.get("claim_path", "")
|
||||||
|
neighbor_title = payload.get("claim_title", "")
|
||||||
|
score = hit.get("score", 0)
|
||||||
|
|
||||||
|
if not neighbor_rel:
|
||||||
|
continue
|
||||||
|
|
||||||
|
neighbor_path = REPO_DIR / neighbor_rel
|
||||||
|
if not neighbor_path.exists():
|
||||||
|
logger.info(" Neighbor %s not found on disk — skipping", neighbor_rel)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Entity-to-entity exclusion: entities need different vocabulary
|
||||||
|
# (founded_by, competes_with, etc.) not supports/challenges
|
||||||
|
if _is_entity(orphan_path) and _is_entity(neighbor_path):
|
||||||
|
logger.info(" Skip entity-entity pair: %s ↔ %s", orphan_path.name, neighbor_path.name)
|
||||||
|
skipped_entity_pair += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Same-source exclusion: N claims from one paper all "supporting" each other
|
||||||
|
# inflates graph density without adding information
|
||||||
|
if _same_source(orphan_path, neighbor_path):
|
||||||
|
logger.info(" Skip same-source pair: %s ↔ %s", orphan_path.name, neighbor_path.name)
|
||||||
|
skipped_same_source += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
neighbor_body = _get_body(neighbor_path)
|
||||||
|
|
||||||
|
# Classify with Haiku
|
||||||
|
result = classify_edge(orphan_title, orphan_body,
|
||||||
|
neighbor_title, neighbor_body, api_key)
|
||||||
|
edge_type = result["edge_type"]
|
||||||
|
confidence = result["confidence"]
|
||||||
|
reason = result["reason"]
|
||||||
|
|
||||||
|
logger.info(" → %s (%.3f) %s [%.2f]: %s",
|
||||||
|
neighbor_title[:50], score, edge_type, confidence, reason[:60])
|
||||||
|
|
||||||
|
edges_to_write.append({
|
||||||
|
"neighbor_path": neighbor_path,
|
||||||
|
"neighbor_rel": neighbor_rel,
|
||||||
|
"neighbor_title": neighbor_title,
|
||||||
|
"orphan_title": str(orphan_title),
|
||||||
|
"orphan_rel": rel_path,
|
||||||
|
"edge_type": edge_type,
|
||||||
|
"score": score,
|
||||||
|
"confidence": confidence,
|
||||||
|
"reason": reason,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Rate limit courtesy
|
||||||
|
if not args.dry_run and i < len(batch) - 1:
|
||||||
|
time.sleep(0.3)
|
||||||
|
|
||||||
|
logger.info("\n=== Summary ===")
|
||||||
|
logger.info("Orphans processed: %d", len(batch))
|
||||||
|
logger.info("Edges to write: %d", len(edges_to_write))
|
||||||
|
logger.info("Skipped (no vector): %d", skipped_no_vector)
|
||||||
|
logger.info("Skipped (no neighbors): %d", skipped_no_neighbors)
|
||||||
|
logger.info("Skipped (entity-entity): %d", skipped_entity_pair)
|
||||||
|
logger.info("Skipped (same-source): %d", skipped_same_source)
|
||||||
|
|
||||||
|
if not edges_to_write:
|
||||||
|
logger.info("Nothing to write.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
logger.info("\n=== Dry Run — Edges That Would Be Written ===")
|
||||||
|
for e in edges_to_write:
|
||||||
|
logger.info(" %s → [%s] → %s (score=%.3f, conf=%.2f)",
|
||||||
|
e["neighbor_title"][:40], e["edge_type"],
|
||||||
|
e["orphan_title"][:40], e["score"], e["confidence"])
|
||||||
|
return
|
||||||
|
|
||||||
|
# Step 5: Acquire lock, create branch, write edges, commit, push, create PR
|
||||||
|
lock_path = REPO_DIR.parent / ".main-worktree.lock"
|
||||||
|
if not acquire_lock(lock_path):
|
||||||
|
logger.error("Cannot acquire worktree lock — aborting")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create branch
|
||||||
|
if not create_branch(REPO_DIR, branch_name):
|
||||||
|
logger.error("Failed to create branch %s", branch_name)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Write edges
|
||||||
|
modified_files = set()
|
||||||
|
written = 0
|
||||||
|
summary_lines = []
|
||||||
|
|
||||||
|
for e in edges_to_write:
|
||||||
|
ok = write_edge(
|
||||||
|
e["neighbor_path"], e["orphan_title"], e["edge_type"],
|
||||||
|
date_str, dry_run=False,
|
||||||
|
)
|
||||||
|
if ok:
|
||||||
|
modified_files.add(e["neighbor_path"])
|
||||||
|
written += 1
|
||||||
|
summary_lines.append(
|
||||||
|
f"`{e['neighbor_title'][:50]}` → [{e['edge_type']}] → "
|
||||||
|
f"`{e['orphan_title'][:50]}` (score={e['score']:.3f})"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info("Wrote %d edges across %d files", written, len(modified_files))
|
||||||
|
|
||||||
|
if not modified_files:
|
||||||
|
logger.info("No edges written — cleaning up branch")
|
||||||
|
subprocess.run(["git", "checkout", "main"], cwd=str(REPO_DIR),
|
||||||
|
capture_output=True)
|
||||||
|
subprocess.run(["git", "branch", "-d", branch_name], cwd=str(REPO_DIR),
|
||||||
|
capture_output=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Commit and push
|
||||||
|
orphan_count = len(set(e["orphan_title"] for e in edges_to_write if e["neighbor_path"] in modified_files))
|
||||||
|
if commit_and_push(REPO_DIR, branch_name, list(modified_files), orphan_count):
|
||||||
|
logger.info("Pushed branch %s", branch_name)
|
||||||
|
|
||||||
|
# Create PR
|
||||||
|
pr_url = create_pr(branch_name, orphan_count, summary_lines)
|
||||||
|
if pr_url:
|
||||||
|
logger.info("PR created: %s", pr_url)
|
||||||
|
else:
|
||||||
|
logger.warning("PR creation failed — branch is pushed, create manually")
|
||||||
|
else:
|
||||||
|
logger.error("Commit/push failed")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Always return to main — even on exception (Ganymede: branch cleanup)
|
||||||
|
try:
|
||||||
|
subprocess.run(["git", "checkout", "main"], cwd=str(REPO_DIR),
|
||||||
|
capture_output=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
release_lock(lock_path)
|
||||||
|
|
||||||
|
logger.info("Done.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
296
ops/pipeline-v2/teleo-pipeline.py
Normal file
296
ops/pipeline-v2/teleo-pipeline.py
Normal file
|
|
@ -0,0 +1,296 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Teleo Pipeline v2 — single async daemon replacing 7 cron scripts.
|
||||||
|
|
||||||
|
Four stages: Ingest → Validate → Evaluate → Merge
|
||||||
|
SQLite WAL state store. systemd-managed. Graceful shutdown.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Add parent dir to path so lib/ is importable
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent))
|
||||||
|
|
||||||
|
from lib import config, db
|
||||||
|
from lib import log as logmod
|
||||||
|
from lib.breaker import CircuitBreaker
|
||||||
|
from lib.evaluate import evaluate_cycle
|
||||||
|
from lib.fixer import fix_cycle as mechanical_fix_cycle
|
||||||
|
from lib.substantive_fixer import substantive_fix_cycle
|
||||||
|
from lib.health import start_health_server, stop_health_server
|
||||||
|
from lib.llm import kill_active_subprocesses
|
||||||
|
from lib.merge import merge_cycle
|
||||||
|
from lib.analytics import record_snapshot
|
||||||
|
from lib.entity_batch import entity_batch_cycle
|
||||||
|
from lib.extract import extract_cycle as source_extract_cycle
|
||||||
|
from lib.validate import validate_cycle
|
||||||
|
from lib.watchdog import watchdog_cycle
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline")
|
||||||
|
|
||||||
|
# Global shutdown event — stages check this between iterations
|
||||||
|
shutdown_event = asyncio.Event()
|
||||||
|
|
||||||
|
|
||||||
|
async def stage_loop(name: str, interval: int, func, conn, breaker: CircuitBreaker):
|
||||||
|
"""Generic stage loop with interval, shutdown check, and circuit breaker."""
|
||||||
|
logger.info("Stage %s started (interval=%ds)", name, interval)
|
||||||
|
while not shutdown_event.is_set():
|
||||||
|
try:
|
||||||
|
if not breaker.allow_request():
|
||||||
|
logger.debug("Stage %s: breaker OPEN, skipping cycle", name)
|
||||||
|
else:
|
||||||
|
workers = breaker.max_workers()
|
||||||
|
succeeded, failed = await func(conn, max_workers=workers)
|
||||||
|
if failed > 0 and succeeded == 0:
|
||||||
|
breaker.record_failure()
|
||||||
|
elif succeeded > 0:
|
||||||
|
breaker.record_success()
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Stage %s: unhandled error in cycle", name)
|
||||||
|
breaker.record_failure()
|
||||||
|
|
||||||
|
# Wait for interval or shutdown, whichever comes first
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(shutdown_event.wait(), timeout=interval)
|
||||||
|
break # shutdown_event was set
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass # interval elapsed, continue loop
|
||||||
|
|
||||||
|
logger.info("Stage %s stopped", name)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Stage stubs (Phase 1 — replaced in later phases) ---
|
||||||
|
|
||||||
|
|
||||||
|
async def ingest_cycle(conn, max_workers=None):
|
||||||
|
"""Stage 1: Entity batch + source extraction."""
|
||||||
|
# Entity batch first (fast, local-only operations)
|
||||||
|
eb_ok, eb_err = await entity_batch_cycle(conn, max_workers=max_workers)
|
||||||
|
# Source extraction (slower, LLM calls)
|
||||||
|
try:
|
||||||
|
ex_ok, ex_err = await source_extract_cycle(conn, max_workers=max_workers)
|
||||||
|
except Exception:
|
||||||
|
import logging
|
||||||
|
logging.getLogger("pipeline").exception("Extract cycle failed (non-fatal)")
|
||||||
|
ex_ok, ex_err = 0, 0
|
||||||
|
return eb_ok + ex_ok, eb_err + ex_err
|
||||||
|
|
||||||
|
|
||||||
|
async def fix_cycle(conn, max_workers=None):
|
||||||
|
"""Combined fix stage: mechanical fixes first, then substantive fixes.
|
||||||
|
|
||||||
|
Mechanical (fixer.py): wiki link bracket stripping, $0
|
||||||
|
Substantive (substantive_fixer.py): confidence/title/scope fixes via LLM, $0.001
|
||||||
|
"""
|
||||||
|
m_fixed, m_errors = await mechanical_fix_cycle(conn, max_workers=max_workers)
|
||||||
|
s_fixed, s_errors = await substantive_fix_cycle(conn, max_workers=max_workers)
|
||||||
|
return m_fixed + s_fixed, m_errors + s_errors
|
||||||
|
|
||||||
|
|
||||||
|
async def snapshot_cycle(conn, max_workers=None):
|
||||||
|
"""Record metrics snapshot every cycle (runs on 15-min interval).
|
||||||
|
|
||||||
|
Populates metrics_snapshots table for Argus analytics dashboard.
|
||||||
|
Lightweight — just SQL queries, no LLM calls, no git ops.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
record_snapshot(conn)
|
||||||
|
return 1, 0
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Snapshot recording failed")
|
||||||
|
return 0, 1
|
||||||
|
|
||||||
|
|
||||||
|
# validate_cycle imported from lib.validate
|
||||||
|
|
||||||
|
|
||||||
|
# evaluate_cycle imported from lib.evaluate
|
||||||
|
|
||||||
|
|
||||||
|
# merge_cycle imported from lib.merge
|
||||||
|
|
||||||
|
|
||||||
|
# --- Shutdown ---
|
||||||
|
|
||||||
|
|
||||||
|
def handle_signal(sig):
|
||||||
|
"""Signal handler — sets shutdown event."""
|
||||||
|
logger.info("Received %s, initiating graceful shutdown...", sig.name)
|
||||||
|
shutdown_event.set()
|
||||||
|
|
||||||
|
|
||||||
|
async def kill_subprocesses():
|
||||||
|
"""Kill any lingering Claude CLI subprocesses (delegates to evaluate module)."""
|
||||||
|
await kill_active_subprocesses()
|
||||||
|
|
||||||
|
|
||||||
|
async def cleanup_orphan_worktrees():
|
||||||
|
"""Remove any orphan worktrees from previous crashes."""
|
||||||
|
import glob
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
# Use specific prefix to avoid colliding with other /tmp users (Ganymede)
|
||||||
|
orphans = glob.glob("/tmp/teleo-extract-*") + glob.glob("/tmp/teleo-merge-*")
|
||||||
|
# Fixer worktrees live under BASE_DIR/workspaces/fix-*
|
||||||
|
orphans += glob.glob(str(config.BASE_DIR / "workspaces" / "fix-*"))
|
||||||
|
for path in orphans:
|
||||||
|
logger.warning("Cleaning orphan worktree: %s", path)
|
||||||
|
try:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"git",
|
||||||
|
"worktree",
|
||||||
|
"remove",
|
||||||
|
"--force",
|
||||||
|
path,
|
||||||
|
cwd=str(config.REPO_DIR),
|
||||||
|
stdout=asyncio.subprocess.DEVNULL,
|
||||||
|
stderr=asyncio.subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
await asyncio.wait_for(proc.wait(), timeout=10)
|
||||||
|
except Exception:
|
||||||
|
shutil.rmtree(path, ignore_errors=True)
|
||||||
|
# Prune stale worktree metadata entries from bare repo (Ganymede)
|
||||||
|
try:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"git",
|
||||||
|
"worktree",
|
||||||
|
"prune",
|
||||||
|
cwd=str(config.REPO_DIR),
|
||||||
|
stdout=asyncio.subprocess.DEVNULL,
|
||||||
|
stderr=asyncio.subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
await asyncio.wait_for(proc.wait(), timeout=10)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("git worktree prune failed, continuing")
|
||||||
|
|
||||||
|
|
||||||
|
# --- Main ---
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
logmod.setup_logging()
|
||||||
|
logger.info("Teleo Pipeline v2 starting")
|
||||||
|
|
||||||
|
# Clean orphan worktrees from prior crashes (Ganymede's requirement)
|
||||||
|
await cleanup_orphan_worktrees()
|
||||||
|
|
||||||
|
# Initialize database
|
||||||
|
conn = db.get_connection()
|
||||||
|
db.migrate(conn)
|
||||||
|
logger.info("Database ready at %s", config.DB_PATH)
|
||||||
|
|
||||||
|
# Initialize circuit breakers
|
||||||
|
breakers = {
|
||||||
|
"ingest": CircuitBreaker("ingest", conn),
|
||||||
|
"validate": CircuitBreaker("validate", conn),
|
||||||
|
"evaluate": CircuitBreaker("evaluate", conn),
|
||||||
|
"merge": CircuitBreaker("merge", conn),
|
||||||
|
"fix": CircuitBreaker("fix", conn),
|
||||||
|
"snapshot": CircuitBreaker("snapshot", conn),
|
||||||
|
"watchdog": CircuitBreaker("watchdog", conn),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Recover interrupted state from crashes
|
||||||
|
# Atomic recovery: all three resets in one transaction (Ganymede)
|
||||||
|
# Increment transient_retries on recovered sources to prevent infinite cycling (Vida)
|
||||||
|
with db.transaction(conn):
|
||||||
|
# Sources stuck in 'extracting' — increment retry counter, move to error if exhausted
|
||||||
|
c1 = conn.execute(
|
||||||
|
"""UPDATE sources SET
|
||||||
|
transient_retries = transient_retries + 1,
|
||||||
|
status = CASE
|
||||||
|
WHEN transient_retries + 1 >= ? THEN 'error'
|
||||||
|
ELSE 'unprocessed'
|
||||||
|
END,
|
||||||
|
last_error = CASE
|
||||||
|
WHEN transient_retries + 1 >= ? THEN 'crash recovery: retry budget exhausted'
|
||||||
|
ELSE last_error
|
||||||
|
END,
|
||||||
|
updated_at = datetime('now')
|
||||||
|
WHERE status = 'extracting'""",
|
||||||
|
(config.TRANSIENT_RETRY_MAX, config.TRANSIENT_RETRY_MAX),
|
||||||
|
)
|
||||||
|
# PRs stuck in 'merging' → approved (Ganymede's Q4 answer)
|
||||||
|
c2 = conn.execute("UPDATE prs SET status = 'approved' WHERE status = 'merging'")
|
||||||
|
# PRs stuck in 'reviewing' → open
|
||||||
|
c3 = conn.execute("UPDATE prs SET status = 'open', merge_cycled = 0 WHERE status = 'reviewing'")
|
||||||
|
# PRs stuck in 'fixing' → open (fixer crashed mid-fix)
|
||||||
|
c4 = conn.execute("UPDATE prs SET status = 'open' WHERE status = 'fixing'")
|
||||||
|
recovered = c1.rowcount + c2.rowcount + c3.rowcount + c4.rowcount
|
||||||
|
if recovered:
|
||||||
|
logger.info("Recovered %d interrupted rows from prior crash", recovered)
|
||||||
|
|
||||||
|
# Register signal handlers
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||||
|
loop.add_signal_handler(sig, handle_signal, sig)
|
||||||
|
|
||||||
|
# Start health API
|
||||||
|
health_runners = []
|
||||||
|
await start_health_server(health_runners)
|
||||||
|
|
||||||
|
# Start stage loops
|
||||||
|
stages = [
|
||||||
|
asyncio.create_task(
|
||||||
|
stage_loop("ingest", config.INGEST_INTERVAL, ingest_cycle, conn, breakers["ingest"]),
|
||||||
|
name="ingest",
|
||||||
|
),
|
||||||
|
asyncio.create_task(
|
||||||
|
stage_loop("validate", config.VALIDATE_INTERVAL, validate_cycle, conn, breakers["validate"]),
|
||||||
|
name="validate",
|
||||||
|
),
|
||||||
|
asyncio.create_task(
|
||||||
|
stage_loop("evaluate", config.EVAL_INTERVAL, evaluate_cycle, conn, breakers["evaluate"]),
|
||||||
|
name="evaluate",
|
||||||
|
),
|
||||||
|
asyncio.create_task(
|
||||||
|
stage_loop("merge", config.MERGE_INTERVAL, merge_cycle, conn, breakers["merge"]),
|
||||||
|
name="merge",
|
||||||
|
),
|
||||||
|
asyncio.create_task(
|
||||||
|
stage_loop("fix", config.FIX_INTERVAL, fix_cycle, conn, breakers["fix"]),
|
||||||
|
name="fix",
|
||||||
|
),
|
||||||
|
asyncio.create_task(
|
||||||
|
stage_loop("snapshot", 900, snapshot_cycle, conn, breakers["snapshot"]),
|
||||||
|
name="snapshot",
|
||||||
|
),
|
||||||
|
asyncio.create_task(
|
||||||
|
stage_loop("watchdog", 60, watchdog_cycle, conn, breakers["watchdog"]),
|
||||||
|
name="watchdog",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info("All stages running")
|
||||||
|
|
||||||
|
# Wait for shutdown signal
|
||||||
|
await shutdown_event.wait()
|
||||||
|
logger.info("Shutdown event received, waiting for stages to finish...")
|
||||||
|
|
||||||
|
# Give stages time to finish current work
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(asyncio.gather(*stages, return_exceptions=True), timeout=60)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.warning("Stages did not finish within 60s, force-cancelling")
|
||||||
|
for task in stages:
|
||||||
|
task.cancel()
|
||||||
|
await asyncio.gather(*stages, return_exceptions=True)
|
||||||
|
|
||||||
|
# Kill lingering subprocesses
|
||||||
|
await kill_subprocesses()
|
||||||
|
|
||||||
|
# Stop health API
|
||||||
|
await stop_health_server(health_runners)
|
||||||
|
|
||||||
|
# Close DB
|
||||||
|
conn.close()
|
||||||
|
logger.info("Teleo Pipeline v2 shut down cleanly")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|
@ -324,6 +324,41 @@ Format:
|
||||||
|
|
||||||
The journal accumulates session over session. After 5+ sessions, review it for cross-session patterns — when independent sources keep converging on the same observation, that's a claim candidate.
|
The journal accumulates session over session. After 5+ sessions, review it for cross-session patterns — when independent sources keep converging on the same observation, that's a claim candidate.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Step 8.5: Write Session Digest (2 min)
|
||||||
|
Write a JSON session digest to /opt/teleo-eval/agent-state/${AGENT}/sessions/${DATE}.json
|
||||||
|
|
||||||
|
This is a structured summary for human review. Be honest about what surprised you and where your confidence shifted. Format:
|
||||||
|
|
||||||
|
{
|
||||||
|
\"agent\": \"${AGENT}\",
|
||||||
|
\"date\": \"${DATE}\",
|
||||||
|
\"research_question\": \"[the question you investigated]\",
|
||||||
|
\"belief_targeted\": \"[which keystone belief you tried to disconfirm]\",
|
||||||
|
\"disconfirmation_result\": \"[what you found — did the belief hold, weaken, or get complicated?]\",
|
||||||
|
\"sources_archived\": [number],
|
||||||
|
\"key_findings\": [
|
||||||
|
\"[most important thing you learned — be specific, not generic]\",
|
||||||
|
\"[second most important, if any]\"
|
||||||
|
],
|
||||||
|
\"surprises\": [
|
||||||
|
\"[what you did NOT expect to find — or expected to find but didn't]\"
|
||||||
|
],
|
||||||
|
\"confidence_shifts\": [
|
||||||
|
{\"belief\": \"[belief title]\", \"direction\": \"stronger|weaker|unchanged\", \"reason\": \"[one sentence why]\"}
|
||||||
|
],
|
||||||
|
\"prs_submitted\": [\"[branch name if you created one, empty array if not]\"],
|
||||||
|
\"follow_ups\": [\"[specific next research directions]\"]
|
||||||
|
}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Be concrete. \"Found interesting data\" is useless. \"MetaDAO pass rate dropped from 78% to 52%\" is useful.
|
||||||
|
- Surprises should be genuine — things that updated your model of the world, not things you already expected.
|
||||||
|
- If nothing surprised you, say so honestly — that itself is informative (you may be in a filter bubble).
|
||||||
|
- Confidence shifts: only list beliefs that actually moved. No shift is fine — report \"unchanged\" with why.
|
||||||
|
- This file is for Cory to read each morning. Write for a human who wants to know what you learned.
|
||||||
|
|
||||||
### Step 9: Stop
|
### Step 9: Stop
|
||||||
When you've finished archiving sources, updating your musing, and writing the research journal entry, STOP. Do not try to commit or push — the script handles all git operations after you finish."
|
When you've finished archiving sources, updating your musing, and writing the research journal entry, STOP. Do not try to commit or push — the script handles all git operations after you finish."
|
||||||
|
|
||||||
|
|
|
||||||
38
ops/systemd/teleo-agent@.service
Normal file
38
ops/systemd/teleo-agent@.service
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Teleo Agent %i
|
||||||
|
After=network.target
|
||||||
|
Wants=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=teleo
|
||||||
|
Group=teleo
|
||||||
|
WorkingDirectory=/opt/teleo-eval/telegram
|
||||||
|
|
||||||
|
# Touch required paths before startup (prevents namespace crash on missing files)
|
||||||
|
ExecStartPre=/bin/bash -c 'touch /opt/teleo-eval/workspaces/.main-worktree.lock'
|
||||||
|
# Validate config before starting (fail fast on bad config)
|
||||||
|
ExecStartPre=/opt/teleo-eval/pipeline/.venv/bin/python3 /opt/teleo-eval/telegram/agent_runner.py --agent %i --validate
|
||||||
|
|
||||||
|
ExecStart=/opt/teleo-eval/pipeline/.venv/bin/python3 /opt/teleo-eval/telegram/agent_runner.py --agent %i
|
||||||
|
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=10
|
||||||
|
|
||||||
|
# Filesystem protection (Rhea-approved)
|
||||||
|
ProtectSystem=strict
|
||||||
|
ReadWritePaths=/opt/teleo-eval/logs
|
||||||
|
ReadWritePaths=/opt/teleo-eval/telegram-archives
|
||||||
|
ReadWritePaths=/opt/teleo-eval/workspaces/main/inbox
|
||||||
|
ReadWritePaths=/opt/teleo-eval/workspaces/.main-worktree.lock
|
||||||
|
ReadWritePaths=/opt/teleo-eval/pipeline/pipeline.db
|
||||||
|
ReadWritePaths=/opt/teleo-eval/pipeline/pipeline.db-wal
|
||||||
|
ReadWritePaths=/opt/teleo-eval/pipeline/pipeline.db-shm
|
||||||
|
|
||||||
|
# Agent-specific learnings (all agents share the worktree write path)
|
||||||
|
ReadWritePaths=/opt/teleo-eval/workspaces/main/agents
|
||||||
|
|
||||||
|
Environment=PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
21
ops/systemd/teleo-diagnostics.service
Normal file
21
ops/systemd/teleo-diagnostics.service
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Argus — Teleo Pipeline Diagnostics Dashboard
|
||||||
|
After=teleo-pipeline.service
|
||||||
|
Wants=teleo-pipeline.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=teleo
|
||||||
|
Group=teleo
|
||||||
|
WorkingDirectory=/opt/teleo-eval/diagnostics
|
||||||
|
ExecStart=/usr/bin/python3 /opt/teleo-eval/diagnostics/app.py
|
||||||
|
Environment=PIPELINE_DB=/opt/teleo-eval/pipeline/pipeline.db
|
||||||
|
Environment=ARGUS_PORT=8081
|
||||||
|
Environment=REPO_DIR=/opt/teleo-eval/workspaces/main
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
37
ops/systemd/teleo-pipeline.service
Normal file
37
ops/systemd/teleo-pipeline.service
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Teleo Pipeline v2 — extraction/eval/merge daemon
|
||||||
|
After=network.target
|
||||||
|
Wants=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=teleo
|
||||||
|
Group=teleo
|
||||||
|
WorkingDirectory=/opt/teleo-eval
|
||||||
|
ExecStartPre=/opt/teleo-eval/pipeline/fix-ownership.sh
|
||||||
|
ExecStart=/opt/teleo-eval/pipeline/.venv/bin/python3 /opt/teleo-eval/pipeline/teleo-pipeline.py
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=30
|
||||||
|
|
||||||
|
# Graceful shutdown: SIGTERM → 60s drain → force-cancel → kill subprocesses
|
||||||
|
# 180s buffer handles in-flight extractions (up to 10 min each) (Ganymede)
|
||||||
|
KillSignal=SIGTERM
|
||||||
|
TimeoutStopSec=180
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
Environment=PIPELINE_BASE=/opt/teleo-eval
|
||||||
|
EnvironmentFile=-/opt/teleo-eval/secrets/pipeline.env
|
||||||
|
|
||||||
|
# Logging goes to journal + pipeline.jsonl
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
|
||||||
|
# Security hardening
|
||||||
|
NoNewPrivileges=yes
|
||||||
|
ProtectSystem=strict
|
||||||
|
ReadWritePaths=/opt/teleo-eval /tmp
|
||||||
|
# PrivateTmp=no: daemon uses /tmp/teleo-extract-* worktrees shared with git (Ganymede)
|
||||||
|
PrivateTmp=no
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
Loading…
Reference in a new issue