"""Daily digest: aggregates 24h activity for Telegram bot consumption. Data sources: - pipeline.db: merged PRs, audit events, contributor activity - Forgejo API: PR descriptions for claim summaries - claim-index: total claims, domain breakdown - review queue: pending approval counts Endpoint: GET /api/daily-digest?hours=24 """ import asyncio import logging import sqlite3 from datetime import datetime, timezone, timedelta from typing import Any import aiohttp logger = logging.getLogger("argus.daily_digest") FORGEJO_BASE = "https://git.livingip.xyz/api/v1" REPO = "teleo/teleo-codex" CLAIM_INDEX_URL = "http://localhost:8080/claim-index" async def fetch_daily_digest( db_path: str, forgejo_token: str | None = None, hours: int = 24, timeout_s: int = 15, ) -> dict[str, Any]: """Build the daily digest payload. Returns structured data for Epimetheus's Telegram bot to format and send. """ cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat() # Parallel: DB queries + HTTP fetches db_data = _query_db(db_path, cutoff, hours) headers = {"Accept": "application/json"} if forgejo_token: headers["Authorization"] = f"token {forgejo_token}" connector = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(headers=headers, connector=connector) as session: # Fetch claim-index, merged PR details from Forgejo, and open PR count in parallel merged_numbers = [pr["number"] for pr in db_data["merged_prs"]] tasks = [ _fetch_claim_index(session, timeout_s), _fetch_merged_pr_details(session, merged_numbers, timeout_s), _fetch_open_pr_count(session, timeout_s), ] claim_index, pr_details, open_pr_count = await asyncio.gather(*tasks) # Enrich merged PRs with Forgejo descriptions merged_claims = _build_merged_claims(db_data["merged_prs"], pr_details) return { "period_hours": hours, "generated_at": datetime.now(timezone.utc).isoformat(), "claims_merged": merged_claims, "pipeline_stats": { "prs_merged": db_data["prs_merged"], "prs_opened": db_data["prs_opened"], "prs_rejected": db_data["prs_rejected"], "approval_rate": db_data["approval_rate"], "top_rejection_reasons": db_data["top_rejection_reasons"], }, "agent_activity": db_data["agent_activity"], "pending_review": { "open_prs": open_pr_count, }, "knowledge_base": { "total_claims": claim_index.get("total_claims", 0), "domains": claim_index.get("domains", {}), "orphan_ratio": claim_index.get("orphan_ratio", 0), "cross_domain_links": claim_index.get("cross_domain_links", 0), }, } def _query_db(db_path: str, cutoff: str, hours: int) -> dict[str, Any]: """Run all DB queries synchronously (SQLite is fast enough for digest).""" conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) conn.row_factory = sqlite3.Row try: # Merged PRs in period merged_prs = conn.execute( """SELECT number, branch, domain, agent, commit_type, merged_at, cost_usd FROM prs WHERE status = 'merged' AND merged_at >= ? ORDER BY merged_at DESC""", (cutoff,), ).fetchall() prs_merged = len(merged_prs) # PRs opened in period prs_opened = conn.execute( "SELECT COUNT(*) FROM prs WHERE created_at >= ?", (cutoff,) ).fetchone()[0] # Rejected PRs in period (closed/zombie with rejection events) prs_rejected = conn.execute( """SELECT COUNT(DISTINCT json_extract(detail, '$.pr')) FROM audit_log WHERE stage = 'evaluate' AND event IN ('domain_rejected', 'tier05_rejected') AND timestamp >= ?""", (cutoff,), ).fetchone()[0] # Approval rate total_evaluated = prs_merged + prs_rejected approval_rate = round(prs_merged / total_evaluated * 100, 1) if total_evaluated > 0 else 0.0 # Top rejection reasons rejection_rows = conn.execute( """SELECT json_extract(detail, '$.issues') as issues FROM audit_log WHERE stage = 'evaluate' AND event IN ('domain_rejected', 'tier05_rejected') AND timestamp >= ? AND json_valid(detail)""", (cutoff,), ).fetchall() reason_counts: dict[str, int] = {} import json for row in rejection_rows: if row["issues"]: try: issues = json.loads(row["issues"]) if isinstance(issues, list): for issue in issues: reason_counts[issue] = reason_counts.get(issue, 0) + 1 except (json.JSONDecodeError, TypeError): pass top_rejection_reasons = sorted(reason_counts.items(), key=lambda x: -x[1])[:5] top_rejection_reasons = [{"reason": r, "count": c} for r, c in top_rejection_reasons] # Agent activity — who contributed what agent_rows = conn.execute( """SELECT agent, COUNT(*) as total, SUM(CASE WHEN status = 'merged' THEN 1 ELSE 0 END) as merged, SUM(CASE WHEN commit_type = 'extract' OR commit_type = 'research' THEN 1 ELSE 0 END) as extractions, SUM(CASE WHEN commit_type = 'challenge' THEN 1 ELSE 0 END) as challenges, SUM(CASE WHEN commit_type = 'enrich' OR commit_type = 'reweave' THEN 1 ELSE 0 END) as enrichments, SUM(CASE WHEN commit_type = 'synthesize' THEN 1 ELSE 0 END) as syntheses FROM prs WHERE created_at >= ? AND agent IS NOT NULL AND agent != '' GROUP BY agent ORDER BY merged DESC""", (cutoff,), ).fetchall() agent_activity = [ { "agent": row["agent"], "prs_total": row["total"], "prs_merged": row["merged"], "extractions": row["extractions"], "challenges": row["challenges"], "enrichments": row["enrichments"], "syntheses": row["syntheses"], } for row in agent_rows ] return { "merged_prs": [dict(pr) for pr in merged_prs], "prs_merged": prs_merged, "prs_opened": prs_opened, "prs_rejected": prs_rejected, "approval_rate": approval_rate, "top_rejection_reasons": top_rejection_reasons, "agent_activity": agent_activity, } finally: conn.close() async def _fetch_claim_index(session: aiohttp.ClientSession, timeout_s: int) -> dict: """Fetch claim-index summary stats.""" try: async with session.get( CLAIM_INDEX_URL, timeout=aiohttp.ClientTimeout(total=timeout_s), ) as resp: if resp.status == 200: data = await resp.json() return { "total_claims": data.get("total_claims", 0), "domains": data.get("domains", {}), "orphan_ratio": data.get("orphan_ratio", 0), "cross_domain_links": data.get("cross_domain_links", 0), } except Exception as e: logger.warning("Failed to fetch claim-index: %s", e) return {} async def _fetch_merged_pr_details( session: aiohttp.ClientSession, pr_numbers: list[int], timeout_s: int, ) -> dict[int, dict]: """Fetch PR details from Forgejo for merged PRs (parallel).""" if not pr_numbers: return {} async def _fetch_one(n: int) -> tuple[int, dict]: url = f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}" try: async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp: if resp.status == 200: return n, await resp.json() except Exception as e: logger.warning("Failed to fetch PR #%d: %s", n, e) return n, {} results = await asyncio.gather(*[_fetch_one(n) for n in pr_numbers]) return {n: data for n, data in results} async def _fetch_open_pr_count(session: aiohttp.ClientSession, timeout_s: int) -> int: """Get count of open PRs from Forgejo.""" url = f"{FORGEJO_BASE}/repos/{REPO}/pulls?state=open&limit=1" try: async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp: if resp.status == 200: # Forgejo returns X-Total-Count header total = resp.headers.get("X-Total-Count") if total is not None: return int(total) # Fallback: fetch all and count data = await resp.json() return len(data) except Exception as e: logger.warning("Failed to fetch open PR count: %s", e) return 0 def _build_merged_claims( merged_prs: list[dict], pr_details: dict[int, dict], ) -> list[dict]: """Build claim summaries from merged PRs + Forgejo PR bodies.""" claims = [] for pr in merged_prs: number = pr["number"] detail = pr_details.get(number, {}) # Extract summary from PR body (first paragraph or first 200 chars) body = detail.get("body", "") or "" summary = _extract_summary(body) claims.append({ "pr_number": number, "title": detail.get("title", pr.get("branch", f"PR #{number}")), "agent": pr.get("agent", "unknown"), "domain": pr.get("domain", "unknown"), "commit_type": pr.get("commit_type", "knowledge"), "summary": summary, "merged_at": pr.get("merged_at", ""), "cost_usd": pr.get("cost_usd", 0.0), "url": detail.get("html_url", ""), }) return claims def _extract_summary(body: str) -> str: """Extract a 1-2 sentence summary from PR body markdown. Looks for a Summary section first, then falls back to first non-header paragraph. """ if not body: return "" lines = body.strip().split("\n") # Look for ## Summary section in_summary = False summary_lines = [] for line in lines: if line.strip().lower().startswith("## summary"): in_summary = True continue if in_summary: if line.startswith("##"): break stripped = line.strip() if stripped and not stripped.startswith("- ["): # skip checklists summary_lines.append(stripped) if len(summary_lines) >= 3: break if summary_lines: return " ".join(summary_lines)[:300] # Fallback: first non-header, non-empty paragraph for line in lines: stripped = line.strip() if stripped and not stripped.startswith("#") and not stripped.startswith("- ["): return stripped[:300] return ""