Imports 67 files from VPS (/opt/teleo-eval/) into repo as the single source of truth. Previously only 8 of 67 files existed in repo — the rest were deployed directly to VPS via SCP, causing massive drift. Includes: - pipeline/lib/: 33 Python modules (daemon core, extraction, evaluation, merge, cascade, cross-domain, costs, attribution, etc.) - pipeline/: main daemon (teleo-pipeline.py), reweave.py, batch-extract-50.sh - diagnostics/: 19 files (4-page dashboard, alerting, daily digest, review queue, tier1 metrics) - agent-state/: bootstrap, lib-state, cascade inbox processor, schema - systemd/: service unit files for reference - deploy.sh: rsync-based deploy with --dry-run, syntax checks, dirty-tree gate - research-session.sh: updated with Step 8.5 digest + cascade inbox processing No new code written — all files are exact copies from VPS as of 2026-04-06. From this point forward: edit in repo, commit, then deploy.sh. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
312 lines
11 KiB
Python
312 lines
11 KiB
Python
"""Daily digest: aggregates 24h activity for Telegram bot consumption.
|
|
|
|
Data sources:
|
|
- pipeline.db: merged PRs, audit events, contributor activity
|
|
- Forgejo API: PR descriptions for claim summaries
|
|
- claim-index: total claims, domain breakdown
|
|
- review queue: pending approval counts
|
|
|
|
Endpoint: GET /api/daily-digest?hours=24
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import sqlite3
|
|
from datetime import datetime, timezone, timedelta
|
|
from typing import Any
|
|
|
|
import aiohttp
|
|
|
|
logger = logging.getLogger("argus.daily_digest")
|
|
|
|
FORGEJO_BASE = "https://git.livingip.xyz/api/v1"
|
|
REPO = "teleo/teleo-codex"
|
|
CLAIM_INDEX_URL = "http://localhost:8080/claim-index"
|
|
|
|
|
|
async def fetch_daily_digest(
|
|
db_path: str,
|
|
forgejo_token: str | None = None,
|
|
hours: int = 24,
|
|
timeout_s: int = 15,
|
|
) -> dict[str, Any]:
|
|
"""Build the daily digest payload.
|
|
|
|
Returns structured data for Epimetheus's Telegram bot to format and send.
|
|
"""
|
|
cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
|
|
|
|
# Parallel: DB queries + HTTP fetches
|
|
db_data = _query_db(db_path, cutoff, hours)
|
|
|
|
headers = {"Accept": "application/json"}
|
|
if forgejo_token:
|
|
headers["Authorization"] = f"token {forgejo_token}"
|
|
|
|
connector = aiohttp.TCPConnector(ssl=False)
|
|
async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
|
|
# Fetch claim-index, merged PR details from Forgejo, and open PR count in parallel
|
|
merged_numbers = [pr["number"] for pr in db_data["merged_prs"]]
|
|
|
|
tasks = [
|
|
_fetch_claim_index(session, timeout_s),
|
|
_fetch_merged_pr_details(session, merged_numbers, timeout_s),
|
|
_fetch_open_pr_count(session, timeout_s),
|
|
]
|
|
claim_index, pr_details, open_pr_count = await asyncio.gather(*tasks)
|
|
|
|
# Enrich merged PRs with Forgejo descriptions
|
|
merged_claims = _build_merged_claims(db_data["merged_prs"], pr_details)
|
|
|
|
return {
|
|
"period_hours": hours,
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
"claims_merged": merged_claims,
|
|
"pipeline_stats": {
|
|
"prs_merged": db_data["prs_merged"],
|
|
"prs_opened": db_data["prs_opened"],
|
|
"prs_rejected": db_data["prs_rejected"],
|
|
"approval_rate": db_data["approval_rate"],
|
|
"top_rejection_reasons": db_data["top_rejection_reasons"],
|
|
},
|
|
"agent_activity": db_data["agent_activity"],
|
|
"pending_review": {
|
|
"open_prs": open_pr_count,
|
|
},
|
|
"knowledge_base": {
|
|
"total_claims": claim_index.get("total_claims", 0),
|
|
"domains": claim_index.get("domains", {}),
|
|
"orphan_ratio": claim_index.get("orphan_ratio", 0),
|
|
"cross_domain_links": claim_index.get("cross_domain_links", 0),
|
|
},
|
|
}
|
|
|
|
|
|
def _query_db(db_path: str, cutoff: str, hours: int) -> dict[str, Any]:
|
|
"""Run all DB queries synchronously (SQLite is fast enough for digest)."""
|
|
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
|
conn.row_factory = sqlite3.Row
|
|
try:
|
|
# Merged PRs in period
|
|
merged_prs = conn.execute(
|
|
"""SELECT number, branch, domain, agent, commit_type, merged_at, cost_usd
|
|
FROM prs WHERE status = 'merged' AND merged_at >= ?
|
|
ORDER BY merged_at DESC""",
|
|
(cutoff,),
|
|
).fetchall()
|
|
|
|
prs_merged = len(merged_prs)
|
|
|
|
# PRs opened in period
|
|
prs_opened = conn.execute(
|
|
"SELECT COUNT(*) FROM prs WHERE created_at >= ?", (cutoff,)
|
|
).fetchone()[0]
|
|
|
|
# Rejected PRs in period (closed/zombie with rejection events)
|
|
prs_rejected = conn.execute(
|
|
"""SELECT COUNT(DISTINCT json_extract(detail, '$.pr'))
|
|
FROM audit_log
|
|
WHERE stage = 'evaluate'
|
|
AND event IN ('domain_rejected', 'tier05_rejected')
|
|
AND timestamp >= ?""",
|
|
(cutoff,),
|
|
).fetchone()[0]
|
|
|
|
# Approval rate
|
|
total_evaluated = prs_merged + prs_rejected
|
|
approval_rate = round(prs_merged / total_evaluated * 100, 1) if total_evaluated > 0 else 0.0
|
|
|
|
# Top rejection reasons
|
|
rejection_rows = conn.execute(
|
|
"""SELECT json_extract(detail, '$.issues') as issues
|
|
FROM audit_log
|
|
WHERE stage = 'evaluate'
|
|
AND event IN ('domain_rejected', 'tier05_rejected')
|
|
AND timestamp >= ?
|
|
AND json_valid(detail)""",
|
|
(cutoff,),
|
|
).fetchall()
|
|
|
|
reason_counts: dict[str, int] = {}
|
|
import json
|
|
for row in rejection_rows:
|
|
if row["issues"]:
|
|
try:
|
|
issues = json.loads(row["issues"])
|
|
if isinstance(issues, list):
|
|
for issue in issues:
|
|
reason_counts[issue] = reason_counts.get(issue, 0) + 1
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass
|
|
|
|
top_rejection_reasons = sorted(reason_counts.items(), key=lambda x: -x[1])[:5]
|
|
top_rejection_reasons = [{"reason": r, "count": c} for r, c in top_rejection_reasons]
|
|
|
|
# Agent activity — who contributed what
|
|
agent_rows = conn.execute(
|
|
"""SELECT agent,
|
|
COUNT(*) as total,
|
|
SUM(CASE WHEN status = 'merged' THEN 1 ELSE 0 END) as merged,
|
|
SUM(CASE WHEN commit_type = 'extract' OR commit_type = 'research' THEN 1 ELSE 0 END) as extractions,
|
|
SUM(CASE WHEN commit_type = 'challenge' THEN 1 ELSE 0 END) as challenges,
|
|
SUM(CASE WHEN commit_type = 'enrich' OR commit_type = 'reweave' THEN 1 ELSE 0 END) as enrichments,
|
|
SUM(CASE WHEN commit_type = 'synthesize' THEN 1 ELSE 0 END) as syntheses
|
|
FROM prs
|
|
WHERE created_at >= ? AND agent IS NOT NULL AND agent != ''
|
|
GROUP BY agent
|
|
ORDER BY merged DESC""",
|
|
(cutoff,),
|
|
).fetchall()
|
|
|
|
agent_activity = [
|
|
{
|
|
"agent": row["agent"],
|
|
"prs_total": row["total"],
|
|
"prs_merged": row["merged"],
|
|
"extractions": row["extractions"],
|
|
"challenges": row["challenges"],
|
|
"enrichments": row["enrichments"],
|
|
"syntheses": row["syntheses"],
|
|
}
|
|
for row in agent_rows
|
|
]
|
|
|
|
return {
|
|
"merged_prs": [dict(pr) for pr in merged_prs],
|
|
"prs_merged": prs_merged,
|
|
"prs_opened": prs_opened,
|
|
"prs_rejected": prs_rejected,
|
|
"approval_rate": approval_rate,
|
|
"top_rejection_reasons": top_rejection_reasons,
|
|
"agent_activity": agent_activity,
|
|
}
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
async def _fetch_claim_index(session: aiohttp.ClientSession, timeout_s: int) -> dict:
|
|
"""Fetch claim-index summary stats."""
|
|
try:
|
|
async with session.get(
|
|
CLAIM_INDEX_URL,
|
|
timeout=aiohttp.ClientTimeout(total=timeout_s),
|
|
) as resp:
|
|
if resp.status == 200:
|
|
data = await resp.json()
|
|
return {
|
|
"total_claims": data.get("total_claims", 0),
|
|
"domains": data.get("domains", {}),
|
|
"orphan_ratio": data.get("orphan_ratio", 0),
|
|
"cross_domain_links": data.get("cross_domain_links", 0),
|
|
}
|
|
except Exception as e:
|
|
logger.warning("Failed to fetch claim-index: %s", e)
|
|
return {}
|
|
|
|
|
|
async def _fetch_merged_pr_details(
|
|
session: aiohttp.ClientSession,
|
|
pr_numbers: list[int],
|
|
timeout_s: int,
|
|
) -> dict[int, dict]:
|
|
"""Fetch PR details from Forgejo for merged PRs (parallel)."""
|
|
if not pr_numbers:
|
|
return {}
|
|
|
|
async def _fetch_one(n: int) -> tuple[int, dict]:
|
|
url = f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}"
|
|
try:
|
|
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
|
if resp.status == 200:
|
|
return n, await resp.json()
|
|
except Exception as e:
|
|
logger.warning("Failed to fetch PR #%d: %s", n, e)
|
|
return n, {}
|
|
|
|
results = await asyncio.gather(*[_fetch_one(n) for n in pr_numbers])
|
|
return {n: data for n, data in results}
|
|
|
|
|
|
async def _fetch_open_pr_count(session: aiohttp.ClientSession, timeout_s: int) -> int:
|
|
"""Get count of open PRs from Forgejo."""
|
|
url = f"{FORGEJO_BASE}/repos/{REPO}/pulls?state=open&limit=1"
|
|
try:
|
|
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
|
if resp.status == 200:
|
|
# Forgejo returns X-Total-Count header
|
|
total = resp.headers.get("X-Total-Count")
|
|
if total is not None:
|
|
return int(total)
|
|
# Fallback: fetch all and count
|
|
data = await resp.json()
|
|
return len(data)
|
|
except Exception as e:
|
|
logger.warning("Failed to fetch open PR count: %s", e)
|
|
return 0
|
|
|
|
|
|
def _build_merged_claims(
|
|
merged_prs: list[dict],
|
|
pr_details: dict[int, dict],
|
|
) -> list[dict]:
|
|
"""Build claim summaries from merged PRs + Forgejo PR bodies."""
|
|
claims = []
|
|
for pr in merged_prs:
|
|
number = pr["number"]
|
|
detail = pr_details.get(number, {})
|
|
|
|
# Extract summary from PR body (first paragraph or first 200 chars)
|
|
body = detail.get("body", "") or ""
|
|
summary = _extract_summary(body)
|
|
|
|
claims.append({
|
|
"pr_number": number,
|
|
"title": detail.get("title", pr.get("branch", f"PR #{number}")),
|
|
"agent": pr.get("agent", "unknown"),
|
|
"domain": pr.get("domain", "unknown"),
|
|
"commit_type": pr.get("commit_type", "knowledge"),
|
|
"summary": summary,
|
|
"merged_at": pr.get("merged_at", ""),
|
|
"cost_usd": pr.get("cost_usd", 0.0),
|
|
"url": detail.get("html_url", ""),
|
|
})
|
|
|
|
return claims
|
|
|
|
|
|
def _extract_summary(body: str) -> str:
|
|
"""Extract a 1-2 sentence summary from PR body markdown.
|
|
|
|
Looks for a Summary section first, then falls back to first non-header paragraph.
|
|
"""
|
|
if not body:
|
|
return ""
|
|
|
|
lines = body.strip().split("\n")
|
|
|
|
# Look for ## Summary section
|
|
in_summary = False
|
|
summary_lines = []
|
|
for line in lines:
|
|
if line.strip().lower().startswith("## summary"):
|
|
in_summary = True
|
|
continue
|
|
if in_summary:
|
|
if line.startswith("##"):
|
|
break
|
|
stripped = line.strip()
|
|
if stripped and not stripped.startswith("- ["): # skip checklists
|
|
summary_lines.append(stripped)
|
|
if len(summary_lines) >= 3:
|
|
break
|
|
|
|
if summary_lines:
|
|
return " ".join(summary_lines)[:300]
|
|
|
|
# Fallback: first non-header, non-empty paragraph
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if stripped and not stripped.startswith("#") and not stripped.startswith("- ["):
|
|
return stripped[:300]
|
|
|
|
return ""
|