fix(classify): Ganymede review fixes — alias cleanup + counter accuracy + handle alignment

1. WARNING — orphan contributor_aliases after publisher/garbage delete: Added alias cleanup to the transaction (gated on --delete-events, same audit rationale as events). Both garbage and publisher deletion loops now DELETE matching contributor_aliases rows. Dry-run adds an orphan count diagnostic so the --delete-events decision is informed. 2. NIT — inserted_publishers counter over-reports on replay: INSERT OR IGNORE silently skips name collisions, but the counter incremented unconditionally. Now uses cur.rowcount so a second apply reports 0 inserts instead of falsely claiming 100. moved_to_publisher set remains unconditional — publisher rows already present still need the matching contributors row deleted. 3. NIT — handle-length gate diverged from writer path: Widened from {0,19} (20 chars) to {0,38} (39 chars) to match GitHub's handle limit and contributor.py::_HANDLE_RE. Prevents future long-handle real contributors from falling through to review_needed and blocking --apply. Current data has 0 review_needed either way. Bonus (Q5): Added audit_log entry inside the transaction. One row in audit_log.stage='schema_v26', event='classify_contributors' with counter detail JSON on every --apply run. Cheap audit trail for the destructive op. Verified end-to-end on VPS DB snapshot: - First apply: 100/9/9/100/0 (matches pre-fix) - Second apply: 0/9/0/0/0 (counter fix working) - With injected aliases + --delete-events: 2 aliases deleted, 1 pre-existing orphan correctly left alone (outside script scope), audit_log entry written with accurate counters. Ganymede msg-3. Protocol closed.
feat(schema): v26 — publishers + contributor_identities + sources provenance
2026-04-24 20:47:21 +01:00 · 2026-04-24 20:47:21 +01:00 · 2026-04-24 17:58:30 +01:00 · 2026-04-24 16:49:12 +01:00 · 2026-04-24 16:33:37 +01:00 · 2026-04-24 16:16:03 +01:00
28 changed files with 4891 additions and 99 deletions
--- a/diagnostics/activity_endpoint.py
+++ b/diagnostics/activity_endpoint.py
@ -28,12 +28,9 @@ import sqlite3
 import json


-# Map PR status to Clay's operation color palette
-# extract (cyan), new (green), enrich (amber), challenge (red-orange),
-# decision (violet), infra (grey)
-STATUS_TO_OPERATION = {
-    'merged': 'new',           # green — new knowledge merged
-    'approved': 'enrich',      # amber — approved, enriching KB
+# Non-merged statuses map directly to operation — no semantic classification yet.
+NON_MERGED_STATUS_TO_OPERATION = {
+    'approved': 'new',         # about to become knowledge
    'open': 'extract',         # cyan — new extraction in progress
    'validating': 'extract',   # cyan — being validated
    'reviewing': 'extract',    # cyan — under review
@ -43,6 +40,51 @@ STATUS_TO_OPERATION = {
    'conflict': 'challenge',   # red-orange — conflict detected
 }

+# Maintenance commit_types that land on main but don't represent new knowledge.
+_MAINTENANCE_COMMIT_TYPES = {'fix', 'pipeline', 'reweave'}
+
+
+def classify_pr_operation(status, commit_type, branch, description=None):
+    """Derive a Timeline operation from a PR row.
+
+    Priority order for MERGED PRs (commit_type wins over branch prefix —
+    extract/* branches with commit_type='enrich' or 'challenge' classify
+    by commit_type, matching the contributor-role wiring fix):
+      1. commit_type == 'challenge' OR branch.startswith('challenge/') OR
+         description contains 'challenged_by' → 'challenge'
+      2. commit_type == 'enrich' OR branch.startswith('enrich/' | 'reweave/')
+         → 'enrich'
+      3. commit_type in _MAINTENANCE_COMMIT_TYPES → 'infra'
+      4. default (commit_type='knowledge'|'extract'|'research'|'entity' or
+         anything else) → 'new'
+
+    For non-merged PRs, falls back to NON_MERGED_STATUS_TO_OPERATION.
+    """
+    commit_type = (commit_type or '').lower()
+    branch = branch or ''
+    description_lower = (description or '').lower()
+
+    if status != 'merged':
+        return NON_MERGED_STATUS_TO_OPERATION.get(status, 'infra')
+
+    # Challenge takes precedence — the signal is inherently more specific.
+    if (commit_type == 'challenge'
+            or branch.startswith('challenge/')
+            or 'challenged_by' in description_lower):
+        return 'challenge'
+
+    if (commit_type == 'enrich'
+            or branch.startswith('enrich/')
+            or branch.startswith('reweave/')):
+        return 'enrich'
+
+    if commit_type in _MAINTENANCE_COMMIT_TYPES:
+        return 'infra'
+
+    # Default: legacy 'knowledge', new 'extract', 'research', 'entity',
+    # unknown/null commit_type → treat as new knowledge.
+    return 'new'
+
 # Map audit_log stage to operation type
 STAGE_TO_OPERATION = {
    'ingest': 'extract',
@ -118,6 +160,8 @@ async def handle_activity(request):
    Query params:
        limit (int, default 100, max 500): number of events to return
        cursor (ISO timestamp): return events older than this timestamp
+        type (str, optional): comma-separated operation types to include
+            (extract|new|enrich|challenge|infra). If absent, returns all types.

    Derives events from two sources:
        1. prs table — per-PR events with domain, agent, status
@ -131,6 +175,13 @@ async def handle_activity(request):
        limit = 100

    cursor = request.query.get('cursor')
+    type_param = request.query.get('type', '').strip()
+    allowed_ops = None
+    if type_param:
+        allowed_ops = {t.strip() for t in type_param.split(',') if t.strip()}
+        if not allowed_ops:
+            allowed_ops = None
+
    db_path = request.app['db_path']

    try:
@ -143,22 +194,27 @@ async def handle_activity(request):
        # Each PR generates events at created_at and merged_at timestamps
        pr_query = """
            SELECT number, status, domain, agent, branch, source_path,
-                   created_at, merged_at
+                   created_at, merged_at, source_channel, commit_type,
+                   description
            FROM prs
            WHERE {where_clause}
            ORDER BY COALESCE(merged_at, created_at) DESC
            LIMIT ?
        """

+        # Over-fetch when filtering by type so we have enough matching rows after
+        # post-build filtering. Cap at 2000 to avoid runaway queries.
+        fetch_limit = min(2000, limit * 5) if allowed_ops else limit + 1
+
        if cursor:
            rows = conn.execute(
                pr_query.format(where_clause="COALESCE(merged_at, created_at) < ?"),
-                (cursor, limit + 1)
+                (cursor, fetch_limit)
            ).fetchall()
        else:
            rows = conn.execute(
                pr_query.format(where_clause="1=1"),
-                (limit + 1,)
+                (fetch_limit,)
            ).fetchall()

        # Known knowledge agents for branch-prefix inference
@ -166,7 +222,14 @@ async def handle_activity(request):

        for row in rows:
            row_dict = dict(row)
-            operation = STATUS_TO_OPERATION.get(row_dict['status'], 'infra')
+            operation = classify_pr_operation(
+                row_dict['status'],
+                row_dict.get('commit_type'),
+                row_dict.get('branch'),
+                row_dict.get('description'),
+            )
+            if allowed_ops and operation not in allowed_ops:
+                continue
            description = pr_description(row_dict)

            # Use merged_at if available (more interesting event), else created_at
@ -189,6 +252,7 @@ async def handle_activity(request):
                'description': description,
                'status': row_dict['status'],
                'pr_number': row_dict['number'],
+                'source_channel': row_dict.get('source_channel') or 'unknown',
            })

        # Source 2: Audit log events (secondary — pipeline-level)
@ -217,6 +281,8 @@ async def handle_activity(request):
            for row in audit_rows:
                row_dict = dict(row)
                operation = STAGE_TO_OPERATION.get(row_dict['stage'], 'infra')
+                if allowed_ops and operation not in allowed_ops:
+                    continue
                description = audit_description(row_dict)

                events.append({
@ -228,6 +294,7 @@ async def handle_activity(request):
                    'description': description,
                    'status': None,
                    'pr_number': None,
+                    'source_channel': None,  # audit events not tied to a PR
                })

        conn.close()
--- a/diagnostics/activity_feed_api.py
+++ b/diagnostics/activity_feed_api.py
@ -0,0 +1,214 @@
+"""Activity feed API — serves contribution events from pipeline.db."""
+import re
+import sqlite3
+import math
+import time
+from aiohttp import web
+
+DB_PATH = "/opt/teleo-eval/pipeline/pipeline.db"
+_cache = {"data": None, "ts": 0}
+CACHE_TTL = 60  # 1 minute — activity should feel fresh
+
+
+def _get_conn():
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA busy_timeout = 10000")
+    return conn
+
+
+def _classify_event(branch, description, commit_type):
+    if commit_type != "knowledge":
+        return None
+    if branch and branch.startswith("extract/"):
+        return "create"
+    if branch and branch.startswith("reweave/"):
+        return "enrich"
+    if branch and branch.startswith("challenge/"):
+        return "challenge"
+    if description and "challenged_by" in description.lower():
+        return "challenge"
+    if branch and branch.startswith("enrich/"):
+        return "enrich"
+    return "create"
+
+
+def _normalize_contributor(submitted_by, agent):
+    if submitted_by and submitted_by.strip():
+        name = submitted_by.strip().lstrip("@")
+        return name
+    if agent and agent.strip() and agent != "pipeline":
+        return agent.strip()
+    return "pipeline"
+
+
+def _summary_from_branch(branch):
+    if not branch:
+        return ""
+    parts = branch.split("/", 1)
+    if len(parts) < 2:
+        return ""
+    slug = parts[1]
+    slug = re.sub(r"^[\d-]+-", "", slug)  # strip date prefix
+    slug = re.sub(r"-[a-f0-9]{4}$", "", slug)  # strip hash suffix
+    return slug.replace("-", " ").strip().capitalize()
+
+
+def _extract_claim_slugs(description, branch=None):
+    if not description:
+        if branch:
+            parts = branch.split("/", 1)
+            if len(parts) > 1:
+                return [parts[1][:120]]
+        return []
+    titles = [t.strip() for t in description.split("|") if t.strip()]
+    slugs = []
+    for title in titles:
+        slug = title.lower().strip()
+        slug = "".join(c if c.isalnum() or c in (" ", "-") else "" for c in slug)
+        slug = slug.replace(" ", "-").strip("-")
+        if len(slug) > 10:
+            slugs.append(slug[:120])
+    return slugs
+
+
+def _hot_score(challenge_count, enrich_count, signal_count, hours_since):
+    numerator = challenge_count * 3 + enrich_count * 2 + signal_count
+    denominator = max(hours_since, 0.5) ** 1.5
+    return numerator / denominator
+
+
+def _build_events():
+    conn = _get_conn()
+    try:
+        rows = conn.execute("""
+            SELECT p.number, p.branch, p.domain, p.agent, p.submitted_by,
+                   p.merged_at, p.description, p.commit_type, p.cost_usd,
+                   p.source_channel
+            FROM prs p
+            WHERE p.status = 'merged'
+              AND p.commit_type = 'knowledge'
+              AND p.merged_at IS NOT NULL
+            ORDER BY p.merged_at DESC
+            LIMIT 2000
+        """).fetchall()
+
+        events = []
+        claim_activity = {}  # slug -> {challenges, enriches, signals, first_seen}
+
+        for row in rows:
+            event_type = _classify_event(row["branch"], row["description"], row["commit_type"])
+            if not event_type:
+                continue
+
+            contributor = _normalize_contributor(row["submitted_by"], row["agent"])
+            slugs = _extract_claim_slugs(row["description"], row["branch"])
+            merged_at = row["merged_at"] or ""
+
+            ci_map = {"create": 0.35, "enrich": 0.25, "challenge": 0.40}
+            ci_earned = ci_map.get(event_type, 0)
+
+            for slug in slugs:
+                if slug not in claim_activity:
+                    claim_activity[slug] = {
+                        "challenges": 0, "enriches": 0, "signals": 0,
+                        "first_seen": merged_at,
+                    }
+                if event_type == "challenge":
+                    claim_activity[slug]["challenges"] += 1
+                elif event_type == "enrich":
+                    claim_activity[slug]["enriches"] += 1
+                else:
+                    claim_activity[slug]["signals"] += 1
+
+            summary_text = ""
+            if row["description"]:
+                first_title = row["description"].split("|")[0].strip()
+                if len(first_title) > 120:
+                    first_title = first_title[:117] + "..."
+                summary_text = first_title
+            elif row["branch"]:
+                summary_text = _summary_from_branch(row["branch"])
+
+            for slug in (slugs[:1] if slugs else [""]):
+                events.append({
+                    "type": event_type,
+                    "claim_slug": slug,
+                    "domain": row["domain"] or "unknown",
+                    "contributor": contributor,
+                    "timestamp": merged_at,
+                    "ci_earned": round(ci_earned, 2),
+                    "summary": summary_text,
+                    "pr_number": row["number"],
+                    "source_channel": row["source_channel"] or "unknown",
+                })
+
+        return events, claim_activity
+    finally:
+        conn.close()
+
+
+def _sort_events(events, claim_activity, sort_mode, now_ts):
+    if sort_mode == "recent":
+        events.sort(key=lambda e: e["timestamp"], reverse=True)
+    elif sort_mode == "hot":
+        def hot_key(e):
+            slug = e["claim_slug"]
+            ca = claim_activity.get(slug, {"challenges": 0, "enriches": 0, "signals": 0})
+            try:
+                from datetime import datetime
+                evt_time = datetime.fromisoformat(e["timestamp"].replace("Z", "+00:00"))
+                hours = (now_ts - evt_time.timestamp()) / 3600
+            except (ValueError, AttributeError):
+                hours = 9999
+            return _hot_score(ca["challenges"], ca["enriches"], ca["signals"], hours)
+        events.sort(key=hot_key, reverse=True)
+    elif sort_mode == "important":
+        type_rank = {"challenge": 0, "enrich": 1, "create": 2}
+        events.sort(key=lambda e: (type_rank.get(e["type"], 3), -len(e["summary"])))
+    return events
+
+
+async def handle_activity_feed(request):
+    sort_mode = request.query.get("sort", "recent")
+    if sort_mode not in ("hot", "recent", "important"):
+        sort_mode = "recent"
+    domain = request.query.get("domain", "")
+    contributor = request.query.get("contributor", "")
+    try:
+        limit = min(int(request.query.get("limit", "20")), 100)
+    except ValueError:
+        limit = 20
+    try:
+        offset = max(int(request.query.get("offset", "0")), 0)
+    except ValueError:
+        offset = 0
+
+    now = time.time()
+    if _cache["data"] is None or (now - _cache["ts"]) > CACHE_TTL:
+        _cache["data"] = _build_events()
+        _cache["ts"] = now
+
+    events, claim_activity = _cache["data"]
+
+    filtered = events
+    if domain:
+        filtered = [e for e in filtered if e["domain"] == domain]
+    if contributor:
+        filtered = [e for e in filtered if e["contributor"] == contributor]
+
+    sorted_events = _sort_events(list(filtered), claim_activity, sort_mode, now)
+    total = len(sorted_events)
+    page = sorted_events[offset:offset + limit]
+
+    return web.json_response({
+        "events": page,
+        "total": total,
+        "sort": sort_mode,
+        "offset": offset,
+        "limit": limit,
+    }, headers={"Access-Control-Allow-Origin": "*"})
+
+
+def register(app):
+    app.router.add_get("/api/activity-feed", handle_activity_feed)
--- a/diagnostics/app.py
+++ b/diagnostics/app.py
@ -42,7 +42,7 @@ API_KEY_FILE = Path(os.environ.get("ARGUS_API_KEY_FILE", "/opt/teleo-eval/secret

 # Endpoints that skip auth (dashboard is public for now, can lock later)
 _PUBLIC_PATHS = frozenset({"/", "/prs", "/ops", "/health", "/agents", "/epistemic", "/legacy", "/audit", "/api/metrics", "/api/snapshots", "/api/vital-signs",
-                           "/api/contributors", "/api/domains", "/api/audit", "/api/yield", "/api/cost-per-claim", "/api/fix-rates", "/api/compute-profile", "/api/review-queue", "/api/daily-digest"})
+                           "/api/contributors", "/api/domains", "/api/audit", "/api/yield", "/api/cost-per-claim", "/api/fix-rates", "/api/compute-profile", "/api/review-queue", "/api/daily-digest", "/api/search"})


 def _get_db() -> sqlite3.Connection:
@ -663,38 +663,115 @@ async def handle_api_domains(request):
    return web.json_response({"domains": breakdown})


-async def handle_api_search(request):
-    """GET /api/search — semantic search over claims via Qdrant + graph expansion.
+def _qdrant_hits_to_results(hits, include_expanded=False):
+    """Shape raw Qdrant hits into Ship's chat-API contract."""
+    results = []
+    for h in hits:
+        payload = h.get("payload", {}) or {}
+        path = payload.get("claim_path", "") or ""
+        slug = path.rsplit("/", 1)[-1]
+        if slug.endswith(".md"):
+            slug = slug[:-3]
+        results.append({
+            "slug": slug,
+            "path": path,
+            "title": payload.get("claim_title", ""),
+            "domain": payload.get("domain"),
+            "confidence": payload.get("confidence"),
+            "score": round(float(h.get("score", 0.0) or 0.0), 4),
+            "body_excerpt": payload.get("snippet", "") or "",
+        })
+    return results

-    Query params:
-      q:          search query (required)
-      domain:     filter by domain (optional)
-      confidence: filter by confidence level (optional)
-      limit:      max results, default 10 (optional)
-      exclude:    comma-separated claim paths to exclude (optional)
-      expand:     enable graph expansion, default true (optional)
+
+async def handle_api_search(request):
+    """Semantic search over claims via Qdrant.
+
+    POST contract (Ship's chat API):
+      body: {"query": str, "limit": int, "min_score": float?, "domain": str?, "confidence": str?, "exclude": [str]?}
+      response: {"query": str, "results": [{"slug","path","title","domain","confidence","score","body_excerpt"}], "total": int}
+
+    GET (legacy + hackathon debug):
+      q: search query (required)
+      limit, domain, confidence, exclude, expand
+      min_score: if set, bypasses two-pass lib threshold (default lib behavior otherwise)
    """
+    if request.method == "POST":
+        try:
+            body = await request.json()
+        except Exception:
+            return web.json_response({"error": "invalid JSON body"}, status=400)
+
+        query = (body.get("query") or "").strip()
+        if not query:
+            return web.json_response({"error": "query required"}, status=400)
+
+        try:
+            limit = min(int(body.get("limit") or 5), 50)
+        except (TypeError, ValueError):
+            return web.json_response({"error": "limit must be int"}, status=400)
+        try:
+            min_score = float(body.get("min_score") if body.get("min_score") is not None else 0.25)
+        except (TypeError, ValueError):
+            return web.json_response({"error": "min_score must be float"}, status=400)
+
+        domain = body.get("domain")
+        confidence = body.get("confidence")
+        exclude = body.get("exclude") or None
+
+        vector = embed_query(query)
+        if vector is None:
+            return web.json_response({"error": "embedding failed"}, status=502)
+
+        hits = search_qdrant(vector, limit=limit, domain=domain,
+                             confidence=confidence, exclude=exclude,
+                             score_threshold=min_score)
+        results = _qdrant_hits_to_results(hits)
+        return web.json_response({"query": query, "results": results, "total": len(results)})
+
+    # GET path
    query = request.query.get("q", "").strip()
    if not query:
        return web.json_response({"error": "q parameter required"}, status=400)

    domain = request.query.get("domain")
    confidence = request.query.get("confidence")
-    limit = min(int(request.query.get("limit", "10")), 50)
+    try:
+        limit = min(int(request.query.get("limit", "10")), 50)
+    except ValueError:
+        return web.json_response({"error": "limit must be int"}, status=400)
    exclude_raw = request.query.get("exclude", "")
    exclude = [p.strip() for p in exclude_raw.split(",") if p.strip()] if exclude_raw else None
    expand = request.query.get("expand", "true").lower() != "false"
+    min_score_raw = request.query.get("min_score")

-    # Use shared search library (Layer 1 + Layer 2)
+    if min_score_raw is not None:
+        try:
+            min_score = float(min_score_raw)
+        except ValueError:
+            return web.json_response({"error": "min_score must be float"}, status=400)
+        vector = embed_query(query)
+        if vector is None:
+            return web.json_response({"error": "embedding failed"}, status=502)
+        hits = search_qdrant(vector, limit=limit, domain=domain,
+                             confidence=confidence, exclude=exclude,
+                             score_threshold=min_score)
+        direct = _qdrant_hits_to_results(hits)
+        return web.json_response({
+            "query": query,
+            "direct_results": direct,
+            "expanded_results": [],
+            "total": len(direct),
+        })
+
+    # Default GET: Layer 1 + Layer 2 via lib
    result = kb_search(query, expand=expand,
                       domain=domain, confidence=confidence, exclude=exclude)
-
    if "error" in result:
        error = result["error"]
        if error == "embedding_failed":
            return web.json_response({"error": "embedding failed"}, status=502)
        return web.json_response({"error": error}, status=500)
-
    return web.json_response(result)


@ -2268,6 +2345,7 @@ def create_app() -> web.Application:
    app.router.add_get("/api/contributors", handle_api_contributors)
    app.router.add_get("/api/domains", handle_api_domains)
    app.router.add_get("/api/search", handle_api_search)
+    app.router.add_post("/api/search", handle_api_search)
    app.router.add_get("/api/audit", handle_api_audit)
    app.router.add_get("/audit", handle_audit_page)
    app.router.add_post("/api/usage", handle_api_usage)
@ -2283,6 +2361,18 @@ def create_app() -> web.Application:
    # Response audit - cost tracking + reasoning traces
    app["db_path"] = str(DB_PATH)
    register_response_audit_routes(app)
+    # Timeline activity feed (per-PR + audit_log events for dashboard v2)
+    from activity_endpoint import handle_activity
+    app.router.add_get("/api/activity", handle_activity)
+    # Gamification activity feed (hot/recent/important sort)
+    from activity_feed_api import register as register_activity_feed
+    register_activity_feed(app)
+    # Claims browser + detail
+    from claims_api import register_claims_routes
+    register_claims_routes(app)
+    # Contributor profile (handle lookup, leaderboard with action CI)
+    from contributor_profile_api import register_contributor_routes
+    register_contributor_routes(app)
    app.on_cleanup.append(_cleanup)
    return app

--- a/diagnostics/claims_api.py
+++ b/diagnostics/claims_api.py
@ -0,0 +1,161 @@
+"""Claims API endpoint — serves claim data from the codex filesystem."""
+import os
+import re
+import time
+import yaml
+from pathlib import Path
+from aiohttp import web
+
+CODEX_ROOT = Path("/opt/teleo-eval/workspaces/main/domains")
+_cache = {"data": None, "ts": 0}
+CACHE_TTL = 300  # 5 minutes
+
+def _parse_frontmatter(filepath):
+    try:
+        text = filepath.read_text(encoding="utf-8")
+        if not text.startswith("---"):
+            return None
+        end = text.index("---", 3)
+        fm = yaml.safe_load(text[3:end])
+        if not fm or fm.get("type") != "claim":
+            return None
+        body = text[end+3:].strip()
+        # Count wiki-links
+        links = re.findall(r"\[\[([^\]]+)\]\]", body)
+        # Extract first paragraph as summary
+        paragraphs = [p.strip() for p in body.split("\n\n") if p.strip() and not p.strip().startswith("#")]
+        summary = paragraphs[0][:300] if paragraphs else ""
+        return {
+            "slug": filepath.stem,
+            "title": fm.get("title", filepath.stem.replace("-", " ")),
+            "domain": fm.get("domain", "unknown"),
+            "confidence": fm.get("confidence", "unknown"),
+            "agent": fm.get("agent"),
+            "scope": fm.get("scope"),
+            "created": str(fm.get("created", "")),
+            "source": fm.get("source", "") if isinstance(fm.get("source"), str) else "",
+            "sourcer": fm.get("sourcer", ""),
+            "wiki_link_count": len(links),
+            "summary": summary,
+            "challenged_by": fm.get("challenged_by"),
+            "related_claims": fm.get("related_claims", []),
+        }
+    except Exception:
+        return None
+
+
+def _load_all_claims():
+    now = time.time()
+    if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
+        return _cache["data"]
+
+    claims = []
+    for domain_dir in sorted(CODEX_ROOT.iterdir()):
+        if not domain_dir.is_dir():
+            continue
+        for f in sorted(domain_dir.glob("*.md")):
+            if f.name == "_map.md":
+                continue
+            c = _parse_frontmatter(f)
+            if c:
+                claims.append(c)
+
+    _cache["data"] = claims
+    _cache["ts"] = now
+    return claims
+
+
+async def handle_claims(request):
+    claims = _load_all_claims()
+
+    # Filters
+    domain = request.query.get("domain")
+    search = request.query.get("q", "").lower()
+    confidence = request.query.get("confidence")
+    agent = request.query.get("agent")
+    sort = request.query.get("sort", "recent")  # recent, alpha, domain
+
+    filtered = claims
+    if domain:
+        filtered = [c for c in filtered if c["domain"] == domain]
+    if confidence:
+        filtered = [c for c in filtered if c["confidence"] == confidence]
+    if agent:
+        filtered = [c for c in filtered if c["agent"] == agent]
+    if search:
+        filtered = [c for c in filtered if search in c["title"].lower() or search in c["summary"].lower()]
+
+    # Sort
+    if sort == "recent":
+        filtered.sort(key=lambda c: c["created"], reverse=True)
+    elif sort == "alpha":
+        filtered.sort(key=lambda c: c["title"].lower())
+    elif sort == "domain":
+        filtered.sort(key=lambda c: (c["domain"], c["title"].lower()))
+
+    # Pagination
+    limit = min(int(request.query.get("limit", "50")), 200)
+    offset = int(request.query.get("offset", "0"))
+    page = filtered[offset:offset+limit]
+
+    # Domain counts for sidebar
+    domain_counts = {}
+    for c in claims:
+        domain_counts[c["domain"]] = domain_counts.get(c["domain"], 0) + 1
+
+    return web.json_response({
+        "claims": page,
+        "total": len(filtered),
+        "offset": offset,
+        "limit": limit,
+        "domains": dict(sorted(domain_counts.items(), key=lambda x: -x[1])),
+        "confidence_levels": sorted(set(c["confidence"] for c in claims)),
+        "agents": sorted(set(c["agent"] for c in claims if c["agent"])),
+    }, headers={"Access-Control-Allow-Origin": "*"})
+
+
+async def handle_claim_detail(request):
+    slug = request.match_info["slug"]
+    claims = _load_all_claims()
+    for c in claims:
+        if c["slug"] == slug:
+            # Read full body for detail view
+            for domain_dir in CODEX_ROOT.iterdir():
+                if not domain_dir.is_dir():
+                    continue
+                f = domain_dir / f"{slug}.md"
+                if f.exists():
+                    text = f.read_text(encoding="utf-8")
+                    end = text.index("---", 3)
+                    body = text[end+3:].strip()
+                    c["body"] = body
+                    break
+            return web.json_response(c, headers={"Access-Control-Allow-Origin": "*"})
+    return web.json_response({"error": "claim not found"}, status=404)
+
+
+async def handle_domains(request):
+    claims = _load_all_claims()
+    domains = {}
+    for c in claims:
+        d = c["domain"]
+        if d not in domains:
+            domains[d] = {"name": d, "count": 0, "agents": set(), "confidence_dist": {}}
+        domains[d]["count"] += 1
+        if c["agent"]:
+            domains[d]["agents"].add(c["agent"])
+        conf = c["confidence"]
+        domains[d]["confidence_dist"][conf] = domains[d]["confidence_dist"].get(conf, 0) + 1
+
+    result = []
+    for d in sorted(domains.values(), key=lambda x: -x["count"]):
+        d["agents"] = sorted(d["agents"])
+        result.append(d)
+
+    return web.json_response(result, headers={"Access-Control-Allow-Origin": "*"})
+
+
+def register_claims_routes(app):
+    app.router.add_get("/api/claims", handle_claims)
+    app.router.add_get("/api/claims/{slug}", handle_claim_detail)
+    app.router.add_get("/api/domains", handle_domains)
--- a/diagnostics/contributor_profile_api.py
+++ b/diagnostics/contributor_profile_api.py
@ -0,0 +1,365 @@
+"""Contributor profile API — GET /api/contributors/{handle}"""
+
+import sqlite3
+import json
+import os
+import re
+import subprocess
+from datetime import datetime
+
+DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
+SYSTEM_ACCOUNTS = {"pipeline", "unknown", "teleo-agents", "teleo pipeline"}
+CODEX_PATH = "/opt/teleo-eval/workspaces/main"
+
+CI_WEIGHTS = {
+    "sourcer": 0.15,
+    "extractor": 0.05,
+    "challenger": 0.35,
+    "synthesizer": 0.25,
+    "reviewer": 0.20,
+}
+
+FOUNDING_CUTOFF = "2026-03-15"
+
+BADGE_DEFS = {
+    "FOUNDING CONTRIBUTOR": {"rarity": "limited", "desc": "Contributed during pre-launch phase"},
+    "BELIEF MOVER": {"rarity": "rare", "desc": "Challenge that led to a claim revision"},
+    "KNOWLEDGE SOURCER": {"rarity": "uncommon", "desc": "Source that generated 3+ claims"},
+    "DOMAIN SPECIALIST": {"rarity": "rare", "desc": "Top 3 CI contributor in a domain"},
+    "VETERAN": {"rarity": "uncommon", "desc": "10+ accepted contributions"},
+    "FIRST BLOOD": {"rarity": "common", "desc": "First contribution of any kind"},
+    "CONTRIBUTOR": {"rarity": "common", "desc": "Account created + first accepted contribution"},
+}
+
+
+def _get_conn():
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+
+def _compute_ci(row):
+    total = 0
+    for role, weight in CI_WEIGHTS.items():
+        total += (row.get(f"{role}_count", 0) or 0) * weight
+    return round(total, 2)
+
+
+def _compute_badges(handle, row, domain_breakdown, conn):
+    badges = []
+    first = row.get("first_contribution", "")
+
+    if first and first <= FOUNDING_CUTOFF:
+        badges.append("FOUNDING CONTRIBUTOR")
+
+    claims = row.get("claims_merged", 0) or 0
+    if claims > 0:
+        badges.append("CONTRIBUTOR")
+        badges.append("FIRST BLOOD")
+
+    if claims >= 10:
+        badges.append("VETERAN")
+
+    challenger = row.get("challenger_count", 0) or 0
+    challenge_ci = row.get("_challenge_count_from_scores", 0)
+    if challenger > 0 or challenge_ci > 0:
+        badges.append("BELIEF MOVER")
+
+    sourcer = row.get("sourcer_count", 0) or 0
+    if sourcer >= 3:
+        badges.append("KNOWLEDGE SOURCER")
+
+    return badges
+
+
+def _get_domain_breakdown(handle, conn):
+    rows = conn.execute("""
+        SELECT domain, COUNT(*) as cnt
+        FROM prs
+        WHERE status='merged' AND (LOWER(agent)=LOWER(?) OR LOWER(submitted_by)=LOWER(?))
+        AND domain IS NOT NULL
+        GROUP BY domain ORDER BY cnt DESC
+    """, (handle, handle)).fetchall()
+    return {r["domain"]: r["cnt"] for r in rows}
+
+
+def _get_contribution_timeline(handle, conn, limit=20):
+    rows = conn.execute("""
+        SELECT number, domain, status, created_at, description, commit_type, source_path
+        FROM prs
+        WHERE status='merged' AND (LOWER(agent)=LOWER(?) OR LOWER(submitted_by)=LOWER(?))
+        ORDER BY created_at DESC LIMIT ?
+    """, (handle, handle, limit)).fetchall()
+
+    timeline = []
+    for r in rows:
+        desc = r["description"] or ""
+        if not desc and r["source_path"]:
+            desc = os.path.basename(r["source_path"]).replace("-", " ").replace(".md", "")
+        timeline.append({
+            "pr_number": r["number"],
+            "domain": r["domain"],
+            "date": r["created_at"][:10] if r["created_at"] else None,
+            "type": _classify_commit(r["commit_type"]),
+            "summary": desc[:200] if desc else None,
+        })
+    return timeline
+
+
+def _classify_commit(commit_type):
+    if not commit_type:
+        return "create"
+    ct = commit_type.lower()
+    if "challenge" in ct:
+        return "challenge"
+    if "enrich" in ct or "update" in ct or "reweave" in ct:
+        return "enrich"
+    return "create"
+
+
+def _get_review_stats(handle, conn):
+    rows = conn.execute("""
+        SELECT outcome, COUNT(*) as cnt
+        FROM review_records
+        WHERE LOWER(agent) = LOWER(?)
+        GROUP BY outcome
+    """, (handle,)).fetchall()
+    stats = {}
+    for r in rows:
+        stats[r["outcome"]] = r["cnt"]
+    return stats
+
+
+def _get_action_ci(handle, conn):
+    """Get action-type CI from contribution_scores table.
+
+    Checks both exact handle and common variants (with/without suffix).
+    """
+    h = handle.lower()
+    base = re.sub(r"[-_]\w+\d+$", "", h)
+    variants = list({h, base}) if base and base != h else [h]
+    try:
+        placeholders = ",".join("?" for _ in variants)
+        rows = conn.execute(f"""
+            SELECT event_type, SUM(ci_earned) as total, COUNT(*) as cnt
+            FROM contribution_scores
+            WHERE LOWER(contributor) IN ({placeholders})
+            GROUP BY event_type
+        """, variants).fetchall()
+    except Exception:
+        return None
+
+    if not rows:
+        return None
+
+    breakdown = {}
+    total = 0.0
+    for r in rows:
+        breakdown[r["event_type"]] = {
+            "count": r["cnt"],
+            "ci": round(r["total"], 4),
+        }
+        total += r["total"]
+
+    return {
+        "total": round(total, 4),
+        "breakdown": breakdown,
+    }
+
+
+def _get_git_contributor(handle):
+    """Fallback: check git log for contributors not in pipeline.db."""
+    try:
+        result = subprocess.run(
+            ["git", "log", "--all", "--format=%H|%an|%ae|%aI", "--diff-filter=A", "--", "domains/"],
+            capture_output=True, text=True, cwd=CODEX_PATH, timeout=30
+        )
+        if result.returncode != 0:
+            return None
+
+        claims = []
+        for line in result.stdout.strip().split("\n"):
+            if not line:
+                continue
+            parts = line.split("|", 3)
+            if len(parts) < 4:
+                continue
+            sha, name, email, date = parts
+            if handle.lower() in name.lower() or handle.lower() in email.lower():
+                claims.append({"sha": sha, "author": name, "email": email, "date": date[:10]})
+
+        if not claims:
+            return None
+
+        return {
+            "handle": handle,
+            "display_name": claims[0]["author"],
+            "email": claims[0]["email"],
+            "first_contribution": min(c["date"] for c in claims),
+            "last_contribution": max(c["date"] for c in claims),
+            "claims_merged": len(claims),
+            "sourcer_count": 0,
+            "extractor_count": 0,
+            "challenger_count": 0,
+            "synthesizer_count": 0,
+            "reviewer_count": 0,
+        }
+    except Exception:
+        return None
+
+
+def get_contributor_profile(handle):
+    conn = _get_conn()
+    try:
+        row = conn.execute(
+            "SELECT * FROM contributors WHERE LOWER(handle) = LOWER(?)", (handle,)
+        ).fetchone()
+
+        if row:
+            data = dict(row)
+        else:
+            git_data = _get_git_contributor(handle)
+            if git_data:
+                data = git_data
+            else:
+                return None
+
+        ci_score = _compute_ci(data)
+        action_ci = _get_action_ci(handle, conn)
+        domain_breakdown = _get_domain_breakdown(handle, conn)
+        timeline = _get_contribution_timeline(handle, conn)
+        review_stats = _get_review_stats(handle, conn)
+        if action_ci and "challenge" in action_ci.get("breakdown", {}):
+            data["_challenge_count_from_scores"] = action_ci["breakdown"]["challenge"]["count"]
+        badges = _compute_badges(handle, data, domain_breakdown, conn)
+
+        # For git-only contributors, build domain breakdown from git
+        if not domain_breakdown and not row:
+            domain_breakdown = _git_domain_breakdown(handle)
+
+        hero_badge = None
+        rarity_order = ["limited", "rare", "uncommon", "common"]
+        for rarity in rarity_order:
+            for b in badges:
+                if BADGE_DEFS.get(b, {}).get("rarity") == rarity:
+                    hero_badge = b
+                    break
+            if hero_badge:
+                break
+
+        role_breakdown = {
+            "sourcer": data.get("sourcer_count", 0) or 0,
+            "extractor": data.get("extractor_count", 0) or 0,
+            "challenger": data.get("challenger_count", 0) or 0,
+            "synthesizer": data.get("synthesizer_count", 0) or 0,
+            "reviewer": data.get("reviewer_count", 0) or 0,
+        }
+        total_roles = sum(role_breakdown.values())
+        role_pct = {}
+        for k, v in role_breakdown.items():
+            role_pct[k] = round(v / total_roles * 100) if total_roles > 0 else 0
+
+        return {
+            "handle": data.get("handle", handle),
+            "display_name": data.get("display_name"),
+            "ci_score": ci_score,
+            "action_ci": action_ci,
+            "primary_ci": action_ci["total"] if action_ci else ci_score,
+            "hero_badge": hero_badge,
+            "badges": [{"name": b, **BADGE_DEFS.get(b, {})} for b in badges],
+            "joined": data.get("first_contribution"),
+            "last_active": data.get("last_contribution"),
+            "claims_merged": data.get("claims_merged", 0) or 0,
+            "principal": data.get("principal"),
+            "role_breakdown": role_breakdown,
+            "role_percentages": role_pct,
+            "domain_breakdown": domain_breakdown,
+            "review_stats": review_stats,
+            "contribution_timeline": timeline,
+            "active_domains": list(domain_breakdown.keys()),
+        }
+    finally:
+        conn.close()
+
+
+def _git_domain_breakdown(handle):
+    """For git-only contributors, count claims by domain from file paths."""
+    try:
+        result = subprocess.run(
+            ["git", "log", "--all", "--name-only", "--format=COMMIT|%an", "--diff-filter=A", "--", "domains/"],
+            capture_output=True, text=True, cwd=CODEX_PATH, timeout=30
+        )
+        if result.returncode != 0:
+            return {}
+
+        domains = {}
+        current_match = False
+        for line in result.stdout.strip().split("\n"):
+            if line.startswith("COMMIT|"):
+                author = line.split("|", 1)[1]
+                current_match = handle.lower() in author.lower()
+            elif current_match and line.startswith("domains/"):
+                parts = line.split("/")
+                if len(parts) >= 2:
+                    domain = parts[1]
+                    domains[domain] = domains.get(domain, 0) + 1
+
+        return domains
+    except Exception:
+        return {}
+
+
+async def handle_contributor_profile(request):
+    from aiohttp import web
+    handle = request.match_info["handle"]
+    profile = get_contributor_profile(handle)
+    if profile is None:
+        return web.json_response({"error": f"Contributor '{handle}' not found"}, status=404)
+    return web.json_response(profile)
+
+
+async def handle_contributors_list(request):
+    from aiohttp import web
+    conn = _get_conn()
+    try:
+        min_claims = int(request.query.get("min_claims", "1"))
+        rows = conn.execute("""
+            SELECT handle, display_name, first_contribution, last_contribution, 
+                   sourcer_count, extractor_count, challenger_count, synthesizer_count,
+                   reviewer_count, claims_merged, principal
+            FROM contributors
+            WHERE claims_merged >= ?
+            ORDER BY claims_merged DESC
+        """, (min_claims,)).fetchall()
+
+        contributors = []
+        for r in rows:
+            data = dict(r)
+            if data["handle"].lower() in SYSTEM_ACCOUNTS:
+                continue
+            ci = _compute_ci(data)
+            action_ci = _get_action_ci(data["handle"], conn)
+            action_total = action_ci["total"] if action_ci else 0.0
+            contributors.append({
+                "handle": data["handle"],
+                "display_name": data["display_name"],
+                "ci_score": ci,
+                "action_ci": action_total,
+                "primary_ci": action_total if action_total > 0 else ci,
+                "claims_merged": data["claims_merged"],
+                "first_contribution": data["first_contribution"],
+                "last_contribution": data["last_contribution"],
+                "principal": data["principal"],
+            })
+
+        return web.json_response({
+            "contributors": contributors,
+            "total": len(contributors),
+        })
+    finally:
+        conn.close()
+
+
+def register_contributor_routes(app):
+    app.router.add_get("/api/contributors/list", handle_contributors_list)
+    app.router.add_get("/api/contributors/{handle}", handle_contributor_profile)
--- a/diagnostics/dashboard_routes.py
+++ b/diagnostics/dashboard_routes.py
@ -10,6 +10,7 @@ Endpoints:
 Owner: Argus
 """

+import asyncio
 import json
 import logging
 import os
@ -17,6 +18,7 @@ import sqlite3
 import statistics
 import time
 import urllib.request
+from collections import defaultdict
 from datetime import datetime, timezone
 from pathlib import Path

@ -1182,6 +1184,113 @@ async def handle_telegram_extractions(request):
        conn.close()


+# ─── GET /api/contributor-growth ─────────────────────────────────────────
+
+CODEX_WORKTREE = Path(os.environ.get("MAIN_WORKTREE", "/opt/teleo-eval/workspaces/main"))
+FOUNDING_CUTOFF = "2026-03-15"
+CONTRIBUTOR_EXCLUDE = {"Teleo Agents", "Teleo Pipeline"}
+
+_growth_cache: dict | None = None
+_growth_cache_ts: float = 0
+GROWTH_CACHE_TTL = 300
+
+
+async def handle_contributor_growth(request):
+    """Cumulative unique contributors and claims over time from git log.
+
+    Returns time-series data for Chart.js line charts.
+    Cached for 5 minutes since git log is expensive.
+    """
+    global _growth_cache, _growth_cache_ts
+    now = time.monotonic()
+    if _growth_cache is not None and (now - _growth_cache_ts) < GROWTH_CACHE_TTL:
+        return web.json_response(_growth_cache)
+
+    codex_path = str(CODEX_WORKTREE)
+    if not CODEX_WORKTREE.exists():
+        return web.json_response(
+            {"error": "codex worktree not found", "path": codex_path}, status=404
+        )
+
+    proc = await asyncio.create_subprocess_exec(
+        "git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all",
+        cwd=codex_path,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, stderr = await proc.communicate()
+    if proc.returncode != 0:
+        return web.json_response(
+            {"error": "git log failed", "detail": stderr.decode()[:500]}, status=500
+        )
+
+    first_seen: dict[str, str] = {}
+    daily_commits: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+    for line in stdout.decode().strip().split("\n"):
+        if "|" not in line:
+            continue
+        date, author = line.split("|", 1)
+        if author in CONTRIBUTOR_EXCLUDE:
+            continue
+        daily_commits[date][author] += 1
+        if author not in first_seen or date < first_seen[author]:
+            first_seen[author] = date
+
+    by_date: dict[str, list[str]] = defaultdict(list)
+    for author, date in first_seen.items():
+        by_date[date].append(author)
+
+    contributors_timeline = []
+    seen: set[str] = set()
+    for date in sorted(by_date.keys()):
+        new_authors = by_date[date]
+        seen.update(new_authors)
+        contributors_timeline.append({
+            "date": date,
+            "cumulative": len(seen),
+            "new": [{"name": a, "founding": date <= FOUNDING_CUTOFF} for a in sorted(new_authors)],
+        })
+
+    proc2 = await asyncio.create_subprocess_exec(
+        "git", "log", "--format=%ad", "--date=format:%Y-%m-%d",
+        "--all", "--diff-filter=A", "--", "domains/*.md",
+        cwd=codex_path,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout2, _ = await proc2.communicate()
+    claim_counts: dict[str, int] = defaultdict(int)
+    for line in stdout2.decode().strip().split("\n"):
+        line = line.strip()
+        if line:
+            claim_counts[line] += 1
+
+    claims_timeline = []
+    cumulative = 0
+    for date in sorted(claim_counts.keys()):
+        cumulative += claim_counts[date]
+        claims_timeline.append({"date": date, "cumulative": cumulative, "added": claim_counts[date]})
+
+    all_contributors = set(first_seen.keys())
+    founding = sorted(a for a in all_contributors if first_seen[a] <= FOUNDING_CUTOFF)
+
+    result = {
+        "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "summary": {
+            "total_contributors": len(all_contributors),
+            "founding_contributors": founding,
+            "total_claims": cumulative,
+            "days_active": (datetime.now(timezone.utc) - datetime(2026, 3, 5, tzinfo=timezone.utc)).days,
+        },
+        "cumulative_contributors": contributors_timeline,
+        "cumulative_claims": claims_timeline,
+    }
+
+    _growth_cache = result
+    _growth_cache_ts = now
+    return web.json_response(result)
+
+
 # ─── Registration ──────────────────────────────────────────────────────────

 def register_dashboard_routes(app: web.Application, get_conn):
@ -1199,3 +1308,42 @@ def register_dashboard_routes(app: web.Application, get_conn):
    app.router.add_get("/api/growth", handle_growth)
    app.router.add_get("/api/pr-lifecycle", handle_pr_lifecycle)
    app.router.add_get("/api/telegram-extractions", handle_telegram_extractions)
+    app.router.add_get("/api/contributor-growth", handle_contributor_growth)
+    app.router.add_get("/api/digest/latest", handle_digest_latest)
+    app.router.add_get("/api/contributor-graph", handle_contributor_graph)
+
+
+async def handle_digest_latest(request):
+    """GET /api/digest/latest — return the most recent scoring digest."""
+    import json as _json
+    digest_path = "/opt/teleo-eval/logs/scoring-digest-latest.json"
+    try:
+        with open(digest_path) as f:
+            data = _json.load(f)
+        return web.json_response(data)
+    except FileNotFoundError:
+        return web.json_response({"error": "No digest available yet"}, status=404)
+    except Exception as e:
+        return web.json_response({"error": str(e)}, status=500)
+
+
+async def handle_contributor_graph(request):
+    """GET /api/contributor-graph — serve the PNG chart."""
+    import subprocess, os
+    png_path = "/opt/teleo-eval/static/contributor-graph.png"
+    # Regenerate if older than 1 hour or missing
+    regen = not os.path.exists(png_path)
+    if not regen:
+        age = __import__('time').time() - os.path.getmtime(png_path)
+        regen = age > 3600
+    if regen:
+        try:
+            subprocess.run(
+                ['python3', '/opt/teleo-eval/scripts/contributor-graph.py'],
+                timeout=30, capture_output=True
+            )
+        except Exception:
+            pass
+    if not os.path.exists(png_path):
+        return web.Response(text='Chart not available', status=503)
+    return web.FileResponse(png_path, headers={'Content-Type': 'image/png'})
--- a/fetch_coins.py
+++ b/fetch_coins.py
@ -90,6 +90,8 @@ def load_ownership_coins():
            continue
        if fm.get("subtype") != "ownership-coin":
            continue
+        if fm.get("status") == "liquidated":
+            continue

        chain = fm.get("chain") or {}
        if isinstance(chain, str):
--- a/lib/attribution.py
+++ b/lib/attribution.py
@ -21,6 +21,92 @@ logger = logging.getLogger("pipeline.attribution")

 VALID_ROLES = frozenset({"sourcer", "extractor", "challenger", "synthesizer", "reviewer"})

+# Agent-owned branch prefixes — PRs from these branches get Pentagon-Agent trailer
+# credit for challenger/synthesizer roles. Pipeline-infra branches (extract/ reweave/
+# fix/ ingestion/) are deliberately excluded: they're automation, not contribution.
+# Single source of truth; imported by contributor.py and backfill-events.py.
+AGENT_BRANCH_PREFIXES = (
+    "rio/", "theseus/", "leo/", "vida/", "astra/", "clay/", "oberon/",
+)
+
+# Handle sanity: lowercase alphanumerics, hyphens, underscores. 1-39 chars (matches
+# GitHub's handle rules). Rejects garbage like "governance---meritocratic-voting-+-futarchy"
+# or "sec-interpretive-release-s7-2026-09-(march-17" that upstream frontmatter hygiene
+# bugs produce. Apply at parse time so bad handles never reach the contributors table.
+_HANDLE_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,38}$")
+
+
+def _valid_handle(handle: str) -> bool:
+    """Return True if handle matches the handle format (alphanum + _-, ≤39 chars)."""
+    if not handle or not isinstance(handle, str):
+        return False
+    h = handle.strip().lower().lstrip("@")
+    if h.endswith("-") or h.endswith("_"):
+        return False
+    return bool(_HANDLE_RE.match(h))
+
+
+def _filter_valid_handles(result: dict) -> dict:
+    """Drop entries with invalid handles from a parsed attribution dict."""
+    filtered: dict[str, list[dict]] = {role: [] for role in VALID_ROLES}
+    for role, entries in result.items():
+        for entry in entries:
+            if _valid_handle(entry.get("handle", "")):
+                filtered[role].append(entry)
+    return filtered
+
+
+# ─── Handle normalization + kind classification (schema v24) ──────────────
+
+# Known Pentagon agents. Used to classify contributor kind='agent' so the
+# leaderboard can filter them out of the default person view.
+PENTAGON_AGENTS = frozenset({
+    "rio", "leo", "theseus", "vida", "clay", "astra",
+    "oberon", "argus", "rhea", "ganymede", "epimetheus", "hermes", "ship",
+    "pipeline",  # pipeline-owned commits (extract/*, reweave/*, fix/*)
+})
+
+
+def normalize_handle(handle: str, conn=None) -> str:
+    """Canonicalize a handle: lowercase, strip @, resolve alias if conn provided.
+
+    Examples:
+      '@thesensatore' → 'thesensatore'
+      'Cameron' → 'cameron' → 'cameron-s1' (via alias if seeded)
+      'CNBC' → 'cnbc'
+
+    Always lowercases and strips @ prefix. Alias resolution requires a conn
+    argument (not always available at parse time; merge-time writer passes it).
+    """
+    if not handle:
+        return ""
+    h = handle.strip().lower().lstrip("@")
+    if conn is None:
+        return h
+    try:
+        row = conn.execute(
+            "SELECT canonical FROM contributor_aliases WHERE alias = ?", (h,),
+        ).fetchone()
+        if row:
+            return row["canonical"] if isinstance(row, dict) or hasattr(row, "keys") else row[0]
+    except Exception:
+        # Alias table might not exist yet on pre-v24 DBs — degrade gracefully.
+        logger.debug("normalize_handle: alias lookup failed for %r", h, exc_info=True)
+    return h
+
+
+def classify_kind(handle: str) -> str:
+    """Return 'agent' for known Pentagon agents, 'person' otherwise.
+
+    The 'org' kind (CNBC, SpaceNews, etc.) is assigned by operator review,
+    not inferred here. Keeping heuristics narrow: we know our own agents;
+    everything else defaults to person until explicitly classified.
+    """
+    h = handle.strip().lower().lstrip("@")
+    if h in PENTAGON_AGENTS:
+        return "agent"
+    return "person"
+

 # ─── Parse attribution from claim content ──────────────────────────────────

@ -51,7 +137,11 @@ def parse_attribution(fm: dict) -> dict[str, list[dict]]:
            elif isinstance(entries, str):
                # Single entry as string
                result[role].append({"handle": entries.strip().lower().lstrip("@"), "agent_id": None, "context": None})
-        return result
+        # Fall through to the filter at the end (don't early-return). The nested
+        # block path was skipping the handle sanity filter, letting garbage like
+        # "senator-elissa-slotkin-/-the-hill" through when it was written into
+        # frontmatter during the legacy-fallback era.
+        return _filter_valid_handles(result)

    # Flat format fallback (attribution_sourcer, attribution_extractor, etc.)
    for role in VALID_ROLES:
@ -64,22 +154,40 @@ def parse_attribution(fm: dict) -> dict[str, list[dict]]:
                    if isinstance(v, str):
                        result[role].append({"handle": v.strip().lower().lstrip("@"), "agent_id": None, "context": None})

-    # Legacy fallback: infer from source field
-    if not any(result[r] for r in VALID_ROLES):
-        source = fm.get("source", "")
-        if isinstance(source, str) and source:
-            # Try to extract author handle from source string
-            # Patterns: "@handle", "Author Name", "org, description"
-            handle_match = re.search(r"@(\w+)", source)
-            if handle_match:
-                result["sourcer"].append({"handle": handle_match.group(1).lower(), "agent_id": None, "context": source})
-            else:
-                # Use first word/phrase before comma as sourcer handle
-                author = source.split(",")[0].strip().lower().replace(" ", "-")
-                if author and len(author) > 1:
-                    result["sourcer"].append({"handle": author, "agent_id": None, "context": source})
+    # Bare-key flat format: `sourcer: alexastrum`, `extractor: leo`, etc.
+    # This is what extract.py writes (line 290: f'sourcer: "{sourcer}"') — the most
+    # common format in practice (~42% of claim files). The Apr 24 incident traced
+    # missing leaderboard entries to this format being silently dropped because the
+    # parser only checked the `attribution_*` prefix.
+    # Only fill if the role wasn't already populated by the prefixed form, to avoid
+    # double-counting when both formats coexist on the same claim.
+    for role in VALID_ROLES:
+        if result[role]:
+            continue
+        bare_val = fm.get(role)
+        if isinstance(bare_val, str) and bare_val.strip():
+            result[role].append({"handle": bare_val.strip().lower().lstrip("@"), "agent_id": None, "context": None})
+        elif isinstance(bare_val, list):
+            for v in bare_val:
+                if isinstance(v, str) and v.strip():
+                    result[role].append({"handle": v.strip().lower().lstrip("@"), "agent_id": None, "context": None})
+                elif isinstance(v, dict) and v.get("handle"):
+                    result[role].append({
+                        "handle": v["handle"].strip().lower().lstrip("@"),
+                        "agent_id": v.get("agent_id"),
+                        "context": v.get("context"),
+                    })

-    return result
+    # Legacy `source` heuristic REMOVED (Ganymede review, Apr 24). It fabricated
+    # handles from descriptive source strings — "governance---meritocratic-voting-+-
+    # futarchy", "cameron-(contributor)", "sec-interpretive-release-s7-2026-09-
+    # (march-17". Hit rate on real handles was near-zero, false-positive rate was
+    # high. Claims without explicit attribution now return empty (better surface as
+    # data hygiene than invent fake contributors).
+
+    # Filter to valid handles only. Bad handles (garbage from upstream frontmatter
+    # bugs) get dropped rather than written to the contributors table.
+    return _filter_valid_handles(result)


 def parse_attribution_from_file(filepath: str) -> dict[str, list[dict]]:
--- a/lib/config.py
+++ b/lib/config.py
@ -156,13 +156,13 @@ CONTRIBUTOR_TIER_RULES = {
    },
 }

-# Role weights for CI computation (must match schemas/contribution-weights.yaml)
+# Role weights for CI computation (must match core/contribution-architecture.md)
 CONTRIBUTION_ROLE_WEIGHTS = {
+    "challenger": 0.35,
+    "synthesizer": 0.25,
+    "reviewer": 0.20,
    "sourcer": 0.15,
-    "extractor": 0.40,
-    "challenger": 0.20,
-    "synthesizer": 0.15,
-    "reviewer": 0.10,
+    "extractor": 0.05,
 }

 # --- Circuit breakers ---
@ -200,6 +200,9 @@ MERGE_INTERVAL = 30
 FIX_INTERVAL = 60
 HEALTH_CHECK_INTERVAL = 60

+# --- Extraction gates ---
+EXTRACTION_COOLDOWN_HOURS = 4  # Skip sources with any PR activity in this window. Defense-in-depth for DB-status filter.
+
 # --- Retrieval (Telegram bot) ---
 RETRIEVAL_RRF_K = 20  # RRF smoothing constant — tuned for 5-10 results per source
 RETRIEVAL_ENTITY_BOOST = 1.5  # RRF score multiplier for claims wiki-linked from matched entities
--- a/lib/contributor.py
+++ b/lib/contributor.py
@ -5,6 +5,7 @@ Extracted from merge.py (Phase 5 decomposition). Functions:
 - refine_commit_type: extract → challenge/enrich refinement from diff content
 - record_contributor_attribution: parse trailers + frontmatter, upsert contributors
 - upsert_contributor: insert/update contributor record with role counts
+- insert_contribution_event: event-sourced credit log (schema v24)
 - recalculate_tier: tier promotion based on config rules
 """

@ -13,11 +14,69 @@ import logging
 import re

 from . import config, db
+from .attribution import AGENT_BRANCH_PREFIXES, classify_kind, normalize_handle
 from .forgejo import get_pr_diff

 logger = logging.getLogger("pipeline.contributor")


+# ─── Event schema (v24) ───────────────────────────────────────────────────
+
+# Role → CI weight, per Cory's confirmed schema (Apr 24 conversation).
+# Humans-are-always-author rule: agents never accumulate author credit;
+# evaluator (0.05) is the only agent-facing role. Internal agents still earn
+# author/challenger/synthesizer on their own autonomous research PRs but
+# surface in the kind='agent' leaderboard, not the default person view.
+ROLE_WEIGHTS = {
+    "author": 0.30,
+    "challenger": 0.25,
+    "synthesizer": 0.20,
+    "originator": 0.15,
+    "evaluator": 0.05,
+}
+
+
+def insert_contribution_event(
+    conn,
+    handle: str,
+    role: str,
+    pr_number: int,
+    *,
+    claim_path: str | None = None,
+    domain: str | None = None,
+    channel: str | None = None,
+    timestamp: str | None = None,
+) -> bool:
+    """Emit a contribution_events row. Idempotent via UNIQUE constraint.
+
+    Returns True if the event was inserted, False if the constraint blocked it
+    (same handle/role/pr/claim_path combo already recorded — safe to replay).
+
+    Canonicalizes handle via alias table. Classifies kind from handle.
+    Falls back silently if contribution_events table doesn't exist yet (pre-v24).
+    """
+    if role not in ROLE_WEIGHTS:
+        logger.warning("insert_contribution_event: unknown role %r", role)
+        return False
+    weight = ROLE_WEIGHTS[role]
+    canonical = normalize_handle(handle, conn=conn)
+    if not canonical:
+        return False
+    kind = classify_kind(canonical)
+    try:
+        cur = conn.execute(
+            """INSERT OR IGNORE INTO contribution_events
+               (handle, kind, role, weight, pr_number, claim_path, domain, channel, timestamp)
+               VALUES (?, ?, ?, ?, ?, ?, ?, ?, COALESCE(?, datetime('now')))""",
+            (canonical, kind, role, weight, pr_number, claim_path, domain, channel, timestamp),
+        )
+        return cur.rowcount > 0
+    except Exception:
+        logger.debug("insert_contribution_event failed for pr=%d handle=%r role=%r",
+                     pr_number, canonical, role, exc_info=True)
+        return False
+
+
 def is_knowledge_pr(diff: str) -> bool:
    """Check if a PR touches knowledge files (claims, decisions, core, foundations).

@ -38,6 +97,22 @@ def is_knowledge_pr(diff: str) -> bool:
    return False


+COMMIT_TYPE_TO_ROLE = {
+    "challenge": "challenger",
+    "enrich": "synthesizer",
+    "extract": "extractor",
+    "research": "synthesizer",
+    "entity": "extractor",
+    "reweave": "synthesizer",
+    "fix": "extractor",
+}
+
+
+def commit_type_to_role(commit_type: str) -> str:
+    """Map a refined commit_type to a contributor role."""
+    return COMMIT_TYPE_TO_ROLE.get(commit_type, "extractor")
+
+
 def refine_commit_type(diff: str, branch_commit_type: str) -> str:
    """Refine commit_type from diff content when branch prefix is ambiguous.

@ -109,15 +184,98 @@ async def record_contributor_attribution(conn, pr_number: int, branch: str, git_
        return

    # Refine commit_type from diff content (branch prefix may be too broad)
-    row = conn.execute("SELECT commit_type FROM prs WHERE number = ?", (pr_number,)).fetchone()
+    row = conn.execute(
+        "SELECT commit_type, submitted_by, domain, source_channel, leo_verdict, "
+        "domain_verdict, domain_agent, merged_at FROM prs WHERE number = ?",
+        (pr_number,),
+    ).fetchone()
    branch_type = row["commit_type"] if row and row["commit_type"] else "extract"
    refined_type = refine_commit_type(diff, branch_type)
    if refined_type != branch_type:
        conn.execute("UPDATE prs SET commit_type = ? WHERE number = ?", (refined_type, pr_number))
        logger.info("PR #%d: commit_type refined %s → %s", pr_number, branch_type, refined_type)

+    # Schema v24 event-sourcing context. Fetched once per PR, reused across emit sites.
+    pr_domain = row["domain"] if row else None
+    pr_channel = row["source_channel"] if row else None
+    pr_submitted_by = row["submitted_by"] if row else None
+    # Use the PR's merged_at timestamp so event time matches the actual merge.
+    # If a merge retries after a crash, this keeps forward-emitted and backfilled
+    # events on the same timeline. Falls back to datetime('now') in the writer.
+    pr_merged_at = row["merged_at"] if row and row["merged_at"] else None
+
+    # ── AUTHOR event (schema v24, double-write) ──
+    # Humans-are-always-author rule: the human in the loop gets author credit.
+    # Precedence: prs.submitted_by (set by extract.py from source proposed_by, or
+    # by discover for human PRs) → git author of first commit → branch-prefix agent.
+    # Pentagon-owned infra branches (extract/ reweave/ fix/ ingestion/) don't get
+    # author events from branch prefix; extract/ PRs carry submitted_by from the
+    # source's proposed_by field so the human who submitted gets credit via path 1.
+    author_candidate: str | None = None
+    if pr_submitted_by:
+        author_candidate = pr_submitted_by
+    else:
+        # External GitHub PRs: git author of the FIRST commit on the branch is
+        # the real submitter. `git log -1` would return the latest commit, which
+        # mis-credits multi-commit PRs where a reviewer rebased or force-pushed.
+        # Take the last line of the unreversed log (= oldest commit, since git
+        # log defaults to reverse-chronological). Ganymede review, Apr 24.
+        rc_author_log, author_log = await git_fn(
+            "log", f"origin/main..origin/{branch}", "--no-merges",
+            "--format=%an", timeout=5,
+        )
+        if rc_author_log == 0 and author_log.strip():
+            lines = [line for line in author_log.strip().split("\n") if line.strip()]
+            if lines:
+                candidate = lines[-1].strip().lower()
+                if candidate and candidate not in {"teleo", "teleo-bot", "pipeline",
+                                                   "github-actions[bot]", "forgejo-actions"}:
+                    author_candidate = candidate
+        # Agent-owned branches with no submitted_by: theseus/research-*, leo/*, etc.
+        if not author_candidate and branch.startswith(AGENT_BRANCH_PREFIXES):
+            # Autonomous agent PR (theseus/research-*, leo/entity-*, etc.) —
+            # credit goes to the agent as author per Cory's directive.
+            author_candidate = branch.split("/", 1)[0]
+
+    if author_candidate:
+        insert_contribution_event(
+            conn, author_candidate, "author", pr_number,
+            claim_path=None, domain=pr_domain, channel=pr_channel,
+            timestamp=pr_merged_at,
+        )
+
+    # ── EVALUATOR events (schema v24) ──
+    # Leo reviews every PR (STANDARD/DEEP tiers). domain_agent is the second
+    # reviewer. Both earn evaluator credit (0.05) per approved PR. Skip when
+    # verdict is 'request_changes' — failed review isn't contribution credit.
+    if row:
+        if row["leo_verdict"] == "approve":
+            insert_contribution_event(
+                conn, "leo", "evaluator", pr_number,
+                claim_path=None, domain=pr_domain, channel=pr_channel,
+                timestamp=pr_merged_at,
+            )
+        if row["domain_verdict"] == "approve" and row["domain_agent"]:
+            dagent = row["domain_agent"].strip().lower()
+            if dagent and dagent != "leo":  # don't double-credit leo
+                insert_contribution_event(
+                    conn, dagent, "evaluator", pr_number,
+                    claim_path=None, domain=pr_domain, channel=pr_channel,
+                    timestamp=pr_merged_at,
+                )
+
    # Parse Pentagon-Agent trailer from branch commit messages
    agents_found: set[str] = set()
+    # Agent-owned branches (theseus/*, rio/*, etc.) give the trailer-named agent
+    # challenger/synthesizer credit based on refined commit_type. Pipeline-owned
+    # branches (extract/*, reweave/*, etc.) don't — those are infra, not work.
+    is_agent_branch = branch.startswith(AGENT_BRANCH_PREFIXES)
+    _TRAILER_EVENT_ROLE = {
+        "challenge": "challenger",
+        "enrich": "synthesizer",
+        "research": "synthesizer",
+        "reweave": "synthesizer",
+    }
    rc, log_output = await git_fn(
        "log", f"origin/main..origin/{branch}", "--format=%b%n%N",
        timeout=10,
@ -126,32 +284,81 @@ async def record_contributor_attribution(conn, pr_number: int, branch: str, git_
        for match in re.finditer(r"Pentagon-Agent:\s*(\S+)\s*<([^>]+)>", log_output):
            agent_name = match.group(1).lower()
            agent_uuid = match.group(2)
+            role = commit_type_to_role(refined_type)
            upsert_contributor(
-                conn, agent_name, agent_uuid, "extractor", today,
+                conn, agent_name, agent_uuid, role, today,
            )
+            # Event-emit only for agent-owned branches where the trailer's agent
+            # actually did the substantive work (challenger/synthesizer).
+            event_role = _TRAILER_EVENT_ROLE.get(refined_type)
+            if is_agent_branch and event_role:
+                insert_contribution_event(
+                    conn, agent_name, event_role, pr_number,
+                    claim_path=None, domain=pr_domain, channel=pr_channel,
+                    timestamp=pr_merged_at,
+                )
            agents_found.add(agent_name)

-    # Parse attribution blocks from claim frontmatter in diff
-    # Look for added lines with attribution YAML
-    current_role = None
-    for line in diff.split("\n"):
-        if not line.startswith("+") or line.startswith("+++"):
-            continue
-        stripped = line[1:].strip()
+    # Parse attribution from NEWLY ADDED knowledge files via the canonical attribution
+    # parser (lib/attribution.py). The previous diff-line regex parser dropped
+    # both the bare-key flat format (`sourcer: alexastrum`) and the nested
+    # `attribution:` block format because it only matched `- handle: "X"` lines.
+    # The Apr 24 incident traced missing leaderboard entries (alexastrum=0,
+    # thesensatore=0, cameron-s1=0) directly to this parser's blind spots.
+    #
+    # --diff-filter=A restricts to added files only (Ganymede review): enrich and
+    # challenge PRs modify existing claims, and re-crediting the existing sourcer on
+    # every modification would inflate counts. The synthesizer/challenger/reviewer
+    # roles for those PRs are credited via the Pentagon-Agent trailer path above.
+    rc_files, files_output = await git_fn(
+        "diff", "--name-only", "--diff-filter=A",
+        f"origin/main...origin/{branch}", timeout=10,
+    )
+    if rc_files == 0 and files_output:
+        from pathlib import Path
+        from . import config
+        from .attribution import parse_attribution_from_file

-        # Detect role sections in attribution block
-        for role in ("sourcer", "extractor", "challenger", "synthesizer", "reviewer"):
-            if stripped.startswith(f"{role}:"):
-                current_role = role
-                break
-
-        # Extract handle from attribution entries
-        handle_match = re.match(r'-\s*handle:\s*["\']?([^"\']+)["\']?', stripped)
-        if handle_match and current_role:
-            handle = handle_match.group(1).strip().lower()
-            agent_id_match = re.search(r'agent_id:\s*["\']?([^"\']+)', stripped)
-            agent_id = agent_id_match.group(1).strip() if agent_id_match else None
-            upsert_contributor(conn, handle, agent_id, current_role, today)
+        main_root = Path(config.MAIN_WORKTREE)
+        # Match is_knowledge_pr's gate exactly. Entities/convictions are excluded
+        # here because is_knowledge_pr skips entity-only PRs at line 123 — so a
+        # broader list here only matters for mixed PRs where the narrower list
+        # already matches via the claim file. Widening requires Cory sign-off
+        # since it would change leaderboard accounting (entity-only PRs → CI credit).
+        knowledge_prefixes = ("domains/", "core/", "foundations/", "decisions/")
+        author_canonical = normalize_handle(author_candidate, conn=conn) if author_candidate else None
+        for rel_path in files_output.strip().split("\n"):
+            rel_path = rel_path.strip()
+            if not rel_path.endswith(".md"):
+                continue
+            if not rel_path.startswith(knowledge_prefixes):
+                continue
+            full = main_root / rel_path
+            if not full.exists():
+                continue  # file removed in this PR
+            attribution = parse_attribution_from_file(str(full))
+            for role, entries in attribution.items():
+                for entry in entries:
+                    handle = entry.get("handle")
+                    if handle:
+                        upsert_contributor(
+                            conn, handle, entry.get("agent_id"), role, today,
+                        )
+                        # Event-emit: only 'sourcer' frontmatter entries become
+                        # originator events. 'extractor' frontmatter = infrastructure
+                        # (the Sonnet extraction agent), no event. challenger/
+                        # synthesizer frontmatter is extremely rare at extract time.
+                        # Skip originator if same as author — avoids double-credit
+                        # when someone submits their own content (self-authored).
+                        if role == "sourcer":
+                            origin_canonical = normalize_handle(handle, conn=conn)
+                            if origin_canonical and origin_canonical != author_canonical:
+                                insert_contribution_event(
+                                    conn, handle, "originator", pr_number,
+                                    claim_path=rel_path,
+                                    domain=pr_domain, channel=pr_channel,
+                                    timestamp=pr_merged_at,
+                                )

    # Fallback: if no Pentagon-Agent trailer found, try git commit authors
    _BOT_AUTHORS = frozenset({
@ -167,13 +374,37 @@ async def record_contributor_attribution(conn, pr_number: int, branch: str, git_
            for author_line in author_output.strip().split("\n"):
                author_name = author_line.strip().lower()
                if author_name and author_name not in _BOT_AUTHORS:
-                    upsert_contributor(conn, author_name, None, "extractor", today)
+                    role = commit_type_to_role(refined_type)
+                    upsert_contributor(conn, author_name, None, role, today)
+                    # Event-model parity: emit challenger/synthesizer event when
+                    # the fallback credits a human/agent for that kind of work.
+                    # Without this, external-contributor challenge/enrich PRs
+                    # accumulate legacy counts but disappear from event-sourced
+                    # leaderboards when Phase B cuts over. (Ganymede review.)
+                    event_role_fb = _TRAILER_EVENT_ROLE.get(refined_type)
+                    if event_role_fb:
+                        insert_contribution_event(
+                            conn, author_name, event_role_fb, pr_number,
+                            claim_path=None, domain=pr_domain, channel=pr_channel,
+                            timestamp=pr_merged_at,
+                        )
                    agents_found.add(author_name)

        if not agents_found:
-            row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone()
-            if row and row["agent"] and row["agent"] != "external":
-                upsert_contributor(conn, row["agent"].lower(), None, "extractor", today)
+            fb_row = conn.execute(
+                "SELECT agent FROM prs WHERE number = ?", (pr_number,)
+            ).fetchone()
+            if fb_row and fb_row["agent"] and fb_row["agent"] != "external":
+                pr_agent = fb_row["agent"].lower()
+                role = commit_type_to_role(refined_type)
+                upsert_contributor(conn, pr_agent, None, role, today)
+                event_role_fb = _TRAILER_EVENT_ROLE.get(refined_type)
+                if event_role_fb:
+                    insert_contribution_event(
+                        conn, pr_agent, event_role_fb, pr_number,
+                        claim_path=None, domain=pr_domain, channel=pr_channel,
+                        timestamp=pr_merged_at,
+                    )


 def upsert_contributor(
--- a/lib/db.py
+++ b/lib/db.py
@ -9,7 +9,7 @@ from . import config

 logger = logging.getLogger("pipeline.db")

-SCHEMA_VERSION = 22
+SCHEMA_VERSION = 26

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@ -35,6 +35,15 @@ CREATE TABLE IF NOT EXISTS sources (
    feedback TEXT,
    -- eval feedback for re-extraction (JSON)
    cost_usd REAL DEFAULT 0,
+    -- v26: provenance — publisher (news org / venue) + content author.
+    -- publisher_id references publishers(id) when source is from a known org.
+    -- original_author_handle references contributors(handle) when author is in our system.
+    -- original_author is free-text fallback ("Kim et al.", "Robin Hanson") — not credit-bearing.
+    publisher_id INTEGER REFERENCES publishers(id),
+    content_type TEXT,
+    -- article | paper | tweet | conversation | self_authored | webpage | podcast
+    original_author TEXT,
+    original_author_handle TEXT REFERENCES contributors(handle),
    created_at TEXT DEFAULT (datetime('now')),
    updated_at TEXT DEFAULT (datetime('now'))
 );
@ -157,11 +166,83 @@ CREATE TABLE IF NOT EXISTS response_audit (
 CREATE INDEX IF NOT EXISTS idx_sources_status ON sources(status);
 CREATE INDEX IF NOT EXISTS idx_prs_status ON prs(status);
 CREATE INDEX IF NOT EXISTS idx_prs_domain ON prs(domain);
+CREATE INDEX IF NOT EXISTS idx_prs_source_path ON prs(source_path) WHERE source_path IS NOT NULL;
 CREATE INDEX IF NOT EXISTS idx_costs_date ON costs(date);
 CREATE INDEX IF NOT EXISTS idx_audit_stage ON audit_log(stage);
 CREATE INDEX IF NOT EXISTS idx_response_audit_ts ON response_audit(timestamp);
 CREATE INDEX IF NOT EXISTS idx_response_audit_agent ON response_audit(agent);
 CREATE INDEX IF NOT EXISTS idx_response_audit_chat_ts ON response_audit(chat_id, timestamp);
+
+-- Event-sourced contributions (schema v24).
+-- One row per credit-earning event. Idempotent via two partial UNIQUE indexes
+-- (SQLite treats NULL != NULL in UNIQUE constraints, so a single composite
+-- UNIQUE with nullable claim_path would allow evaluator-event duplicates).
+-- Leaderboards are SQL aggregations over this table; contributors becomes a materialized cache.
+CREATE TABLE IF NOT EXISTS contribution_events (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    handle TEXT NOT NULL,
+    kind TEXT NOT NULL DEFAULT 'person',
+    -- person | org | agent
+    role TEXT NOT NULL,
+    -- author | originator | challenger | synthesizer | evaluator
+    weight REAL NOT NULL,
+    pr_number INTEGER NOT NULL,
+    claim_path TEXT,
+    -- NULL for PR-level events (e.g. evaluator). Set for per-claim events.
+    domain TEXT,
+    channel TEXT,
+    -- telegram | github | agent | web | unknown
+    timestamp TEXT NOT NULL DEFAULT (datetime('now'))
+);
+-- Per-claim events: unique on (handle, role, pr_number, claim_path) when path IS NOT NULL.
+CREATE UNIQUE INDEX IF NOT EXISTS idx_ce_unique_claim ON contribution_events(
+    handle, role, pr_number, claim_path
+) WHERE claim_path IS NOT NULL;
+-- PR-level events (evaluator, author, trailer-based): unique on (handle, role, pr_number) when path IS NULL.
+CREATE UNIQUE INDEX IF NOT EXISTS idx_ce_unique_pr ON contribution_events(
+    handle, role, pr_number
+) WHERE claim_path IS NULL;
+CREATE INDEX IF NOT EXISTS idx_ce_handle_ts ON contribution_events(handle, timestamp);
+CREATE INDEX IF NOT EXISTS idx_ce_domain_ts ON contribution_events(domain, timestamp);
+CREATE INDEX IF NOT EXISTS idx_ce_pr ON contribution_events(pr_number);
+CREATE INDEX IF NOT EXISTS idx_ce_role_ts ON contribution_events(role, timestamp);
+CREATE INDEX IF NOT EXISTS idx_ce_kind_ts ON contribution_events(kind, timestamp);
+
+-- Handle aliasing. @thesensatore → thesensatore. cameron → cameron-s1.
+-- Writers call resolve_alias(handle) before inserting events or upserting contributors.
+CREATE TABLE IF NOT EXISTS contributor_aliases (
+    alias TEXT PRIMARY KEY,
+    canonical TEXT NOT NULL,
+    created_at TEXT DEFAULT (datetime('now'))
+);
+CREATE INDEX IF NOT EXISTS idx_aliases_canonical ON contributor_aliases(canonical);
+
+-- Publishers: news orgs, academic venues, social platforms. NOT contributors — these
+-- provide metadata/provenance for sources, never earn leaderboard credit. Separating
+-- these from contributors prevents CNBC/SpaceNews from dominating the leaderboard.
+-- (Apr 24 Cory directive: "only credit the original source if its on X or tg")
+CREATE TABLE IF NOT EXISTS publishers (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    name TEXT NOT NULL UNIQUE,
+    kind TEXT CHECK(kind IN ('news', 'academic', 'social_platform', 'podcast', 'self', 'internal', 'legal', 'government', 'research_org', 'commercial', 'other')),
+    url_pattern TEXT,
+    created_at TEXT DEFAULT (datetime('now'))
+);
+CREATE INDEX IF NOT EXISTS idx_publishers_name ON publishers(name);
+CREATE INDEX IF NOT EXISTS idx_publishers_kind ON publishers(kind);
+
+-- Multi-platform identity: one contributor, many handles. Enables the leaderboard to
+-- unify @thesensatore (X) + thesensatore (TG) + thesensatore@github into one person.
+-- Writers check this table after resolving aliases to find canonical contributor handle.
+CREATE TABLE IF NOT EXISTS contributor_identities (
+    contributor_handle TEXT NOT NULL,
+    platform TEXT NOT NULL CHECK(platform IN ('x', 'telegram', 'github', 'email', 'web', 'internal')),
+    platform_handle TEXT NOT NULL,
+    verified INTEGER DEFAULT 0,
+    created_at TEXT DEFAULT (datetime('now')),
+    PRIMARY KEY (platform, platform_handle)
+);
+CREATE INDEX IF NOT EXISTS idx_identities_contributor ON contributor_identities(contributor_handle);
 """


@ -231,9 +312,20 @@ def classify_branch(branch: str) -> tuple[str, str]:


 # Keep in sync with BRANCH_PREFIX_MAP above.
+#
+# Valid source_channel values: github | telegram | agent | maintenance | web | unknown
+#   - github: external contributor PR (set via sync-mirror.sh github_pr linking,
+#     or from gh-pr-* branches, or any time github_pr is provided)
+#   - telegram: message captured by telegram bot (must be tagged explicitly by
+#     ingestion — extract/* default is "unknown" because the bare branch prefix
+#     can no longer distinguish telegram-origin from github-origin extractions)
+#   - agent: per-agent research branches (rio/, theseus/, etc.)
+#   - maintenance: pipeline housekeeping (reweave/, epimetheus/, fix/)
+#   - web: future in-app submissions (chat UI or form posts)
+#   - unknown: fallback when provenance cannot be determined
 _CHANNEL_MAP = {
-    "extract": "telegram",
-    "ingestion": "telegram",
+    "extract": "unknown",
+    "ingestion": "unknown",
    "rio": "agent",
    "theseus": "agent",
    "astra": "agent",
@ -248,7 +340,12 @@ _CHANNEL_MAP = {


 def classify_source_channel(branch: str, *, github_pr: int = None) -> str:
-    """Derive source_channel from branch prefix and github_pr flag."""
+    """Derive source_channel from branch prefix and github_pr flag.
+
+    Precedence: github_pr flag > gh-pr- branch prefix > _CHANNEL_MAP lookup.
+    extract/* defaults to "unknown" — callers with better provenance (telegram
+    bot, web submission handler) must override at PR-insert time.
+    """
    if github_pr is not None or branch.startswith("gh-pr-"):
        return "github"
    prefix = branch.split("/", 1)[0] if "/" in branch else branch
@ -617,6 +714,137 @@ def migrate(conn: sqlite3.Connection):
        conn.commit()
        logger.info("Migration v22: added source_channel to prs + backfilled from branch prefix")

+    if current < 23:
+        conn.execute(
+            "CREATE INDEX IF NOT EXISTS idx_prs_source_path ON prs(source_path) WHERE source_path IS NOT NULL"
+        )
+        conn.commit()
+        logger.info("Migration v23: added idx_prs_source_path for auto-close dedup lookup")
+
+    if current < 24:
+        # Event-sourced contributions table + alias table + kind column on contributors.
+        # Non-breaking: contributors table stays; events are written in addition via
+        # double-write in merge.py. Leaderboards switch to events in Phase B.
+        conn.executescript("""
+            CREATE TABLE IF NOT EXISTS contribution_events (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                handle TEXT NOT NULL,
+                kind TEXT NOT NULL DEFAULT 'person',
+                role TEXT NOT NULL,
+                weight REAL NOT NULL,
+                pr_number INTEGER NOT NULL,
+                claim_path TEXT,
+                domain TEXT,
+                channel TEXT,
+                timestamp TEXT NOT NULL DEFAULT (datetime('now'))
+            );
+            -- Partial unique indexes handle SQLite's NULL != NULL UNIQUE semantics.
+            -- Per-claim events dedup on 4-tuple; PR-level events dedup on 3-tuple.
+            CREATE UNIQUE INDEX IF NOT EXISTS idx_ce_unique_claim ON contribution_events(
+                handle, role, pr_number, claim_path
+            ) WHERE claim_path IS NOT NULL;
+            CREATE UNIQUE INDEX IF NOT EXISTS idx_ce_unique_pr ON contribution_events(
+                handle, role, pr_number
+            ) WHERE claim_path IS NULL;
+            CREATE INDEX IF NOT EXISTS idx_ce_handle_ts ON contribution_events(handle, timestamp);
+            CREATE INDEX IF NOT EXISTS idx_ce_domain_ts ON contribution_events(domain, timestamp);
+            CREATE INDEX IF NOT EXISTS idx_ce_pr ON contribution_events(pr_number);
+            CREATE INDEX IF NOT EXISTS idx_ce_role_ts ON contribution_events(role, timestamp);
+            CREATE INDEX IF NOT EXISTS idx_ce_kind_ts ON contribution_events(kind, timestamp);
+
+            CREATE TABLE IF NOT EXISTS contributor_aliases (
+                alias TEXT PRIMARY KEY,
+                canonical TEXT NOT NULL,
+                created_at TEXT DEFAULT (datetime('now'))
+            );
+            CREATE INDEX IF NOT EXISTS idx_aliases_canonical ON contributor_aliases(canonical);
+        """)
+        try:
+            conn.execute("ALTER TABLE contributors ADD COLUMN kind TEXT DEFAULT 'person'")
+        except sqlite3.OperationalError:
+            pass  # column already exists
+        # Seed known aliases. @thesensatore → thesensatore catches the zombie row Argus flagged.
+        # cameron → cameron-s1 reconciles the Leo-flagged missing contributor.
+        conn.executemany(
+            "INSERT OR IGNORE INTO contributor_aliases (alias, canonical) VALUES (?, ?)",
+            [
+                ("@thesensatore", "thesensatore"),
+                ("cameron", "cameron-s1"),
+            ],
+        )
+        # Seed kind='agent' for known Pentagon agents so the events writer picks it up.
+        # Must stay in sync with lib/attribution.PENTAGON_AGENTS — drift causes
+        # contributors.kind to disagree with classify_kind() output for future
+        # inserts. (Ganymede review: "pipeline" was missing until Apr 24.)
+        pentagon_agents = [
+            "rio", "leo", "theseus", "vida", "clay", "astra",
+            "oberon", "argus", "rhea", "ganymede", "epimetheus", "hermes", "ship",
+            "pipeline",
+        ]
+        for agent in pentagon_agents:
+            conn.execute(
+                "UPDATE contributors SET kind = 'agent' WHERE handle = ?",
+                (agent,),
+            )
+        conn.commit()
+        logger.info("Migration v24: added contribution_events + contributor_aliases tables, kind column")
+
+    if current < 25:
+        # v24 seeded 13 Pentagon agents but missed "pipeline" — classify_kind()
+        # treats it as agent so contributors.kind drifted from event-insert output.
+        # Idempotent corrective UPDATE: fresh installs have no "pipeline" row
+        # (no-op), upgraded envs flip it if it exists. (Ganymede review Apr 24.)
+        conn.execute(
+            "UPDATE contributors SET kind = 'agent' WHERE handle = 'pipeline'"
+        )
+        conn.commit()
+        logger.info("Migration v25: patched kind='agent' for pipeline handle")
+
+    if current < 26:
+        # Add publishers + contributor_identities. Non-breaking — new tables only.
+        # No existing data moved. Classification into publishers happens via a
+        # separate script (scripts/reclassify-contributors.py) with Cory-reviewed
+        # seed list. CHECK constraint on contributors.kind deferred to v27 after
+        # classification completes. (Apr 24 Cory directive: "fix schema, don't
+        # filter output" — separate contributors from publishers at the data layer.)
+        conn.executescript("""
+            CREATE TABLE IF NOT EXISTS publishers (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                name TEXT NOT NULL UNIQUE,
+                kind TEXT CHECK(kind IN ('news', 'academic', 'social_platform', 'podcast', 'self', 'internal', 'legal', 'government', 'research_org', 'commercial', 'other')),
+                url_pattern TEXT,
+                created_at TEXT DEFAULT (datetime('now'))
+            );
+            CREATE INDEX IF NOT EXISTS idx_publishers_name ON publishers(name);
+            CREATE INDEX IF NOT EXISTS idx_publishers_kind ON publishers(kind);
+
+            CREATE TABLE IF NOT EXISTS contributor_identities (
+                contributor_handle TEXT NOT NULL,
+                platform TEXT NOT NULL CHECK(platform IN ('x', 'telegram', 'github', 'email', 'web', 'internal')),
+                platform_handle TEXT NOT NULL,
+                verified INTEGER DEFAULT 0,
+                created_at TEXT DEFAULT (datetime('now')),
+                PRIMARY KEY (platform, platform_handle)
+            );
+            CREATE INDEX IF NOT EXISTS idx_identities_contributor ON contributor_identities(contributor_handle);
+        """)
+        # Extend sources with provenance columns. ALTER TABLE ADD COLUMN is
+        # idempotent-safe via try/except because SQLite doesn't support IF NOT EXISTS
+        # on column adds.
+        for col_sql in (
+            "ALTER TABLE sources ADD COLUMN publisher_id INTEGER REFERENCES publishers(id)",
+            "ALTER TABLE sources ADD COLUMN content_type TEXT",
+            "ALTER TABLE sources ADD COLUMN original_author TEXT",
+            "ALTER TABLE sources ADD COLUMN original_author_handle TEXT REFERENCES contributors(handle)",
+        ):
+            try:
+                conn.execute(col_sql)
+            except sqlite3.OperationalError as e:
+                if "duplicate column" not in str(e).lower():
+                    raise
+        conn.commit()
+        logger.info("Migration v26: added publishers + contributor_identities tables + sources provenance columns")
+
    if current < SCHEMA_VERSION:
        conn.execute(
            "INSERT OR REPLACE INTO schema_version (version) VALUES (?)",
--- a/lib/domains.py
+++ b/lib/domains.py
@ -37,6 +37,11 @@ _AGENT_PRIMARY_DOMAIN: dict[str, str] = {
    "leo": "grand-strategy",
 }

+_INGESTION_SOURCE_DOMAIN: dict[str, str] = {
+    "futardio": "internet-finance",
+    "metadao": "internet-finance",
+}
+

 def agent_for_domain(domain: str | None) -> str:
    """Get the reviewing agent for a domain. Falls back to Leo."""
@ -82,6 +87,14 @@ def detect_domain_from_branch(branch: str) -> str | None:
    """Extract domain from branch name like 'rio/claims-futarchy' → 'internet-finance'.

    Uses agent prefix → primary domain mapping for pipeline branches.
+    For ingestion branches, checks the rest of the name for source-type hints.
    """
    prefix = branch.split("/")[0].lower() if "/" in branch else ""
-    return _AGENT_PRIMARY_DOMAIN.get(prefix)
+    if prefix in _AGENT_PRIMARY_DOMAIN:
+        return _AGENT_PRIMARY_DOMAIN[prefix]
+    if prefix == "ingestion":
+        rest = branch.split("/", 1)[1].lower() if "/" in branch else ""
+        for source_key, domain in _INGESTION_SOURCE_DOMAIN.items():
+            if source_key in rest:
+                return domain
+    return None
--- a/lib/eval_actions.py
+++ b/lib/eval_actions.py
@ -11,13 +11,14 @@ All functions are async (Forgejo API calls). Dependencies: forgejo, db, config,
 pr_state, feedback, eval_parse.
 """

+import asyncio
 import json
 import logging

 from . import config, db
 from .eval_parse import classify_issues
 from .feedback import format_rejection_comment
-from .forgejo import api as forgejo_api, get_agent_token, repo_path
+from .forgejo import api as forgejo_api, get_agent_token, get_pr_diff, repo_path
 from .github_feedback import on_closed, on_eval_complete
 from .pr_state import close_pr

@ -114,12 +115,98 @@ async def terminate_pr(conn, pr_number: int, reason: str):
 async def dispose_rejected_pr(conn, pr_number: int, eval_attempts: int, all_issues: list[str]):
    """Disposition logic for rejected PRs on attempt 2+.

+    Auto-close gate (all attempts): near-duplicate of an already-merged PR for
+    the same source — close immediately. Avoids the Apr 22 runaway-damage
+    pattern where a source extracted 20+ times in a short window produced
+    dozens of open PRs that all had to be closed manually.
+
    Attempt 1: normal — back to open, wait for fix.
    Attempt 2: check issue classification.
      - Mechanical only: keep open for one more attempt (auto-fix future).
      - Substantive or mixed: close PR, requeue source.
    Attempt 3+: terminal.
    """
+    # Auto-close near-duplicate when a merged sibling for the same source exists.
+    # Runs before the attempt-count branches so it catches the common runaway
+    # case on attempt 1 instead of waiting for attempt 2's terminate path.
+    #
+    # Exact-match requirement (Ganymede review): compound rejections like
+    # ["near_duplicate", "factual_discrepancy"] carry signal about the merged
+    # sibling being wrong or limited — we want humans to see those. Only the
+    # pure single-issue case is safe to auto-close.
+    if all_issues == ["near_duplicate"]:
+        existing_merged = conn.execute(
+            """SELECT p2.number, p1.source_path FROM prs p1
+               JOIN prs p2 ON p2.source_path = p1.source_path
+               WHERE p1.number = ?
+                 AND p1.source_path IS NOT NULL
+                 AND p2.number != p1.number
+                 AND p2.status = 'merged'
+               LIMIT 1""",
+            (pr_number,),
+        ).fetchone()
+        if existing_merged:
+            sibling = existing_merged[0]
+            source_path = existing_merged[1]
+
+            # Enrichment guard: LLM reviewers can flag enrichment prose as
+            # "redundant" via eval_parse regex, tagging near_duplicate even
+            # though validate.py's structural check only fires on NEW files.
+            # If the PR only MODIFIES existing files (no "new file mode" in
+            # diff), it's an enrichment — skip auto-close so a human reviews.
+            #
+            # 10s timeout bounds damage when Forgejo is wedged (Apr 22 incident:
+            # hung for 2.5h). Conservative fallback: skip auto-close on any
+            # failure — fall through to normal rejection path.
+            try:
+                diff = await asyncio.wait_for(get_pr_diff(pr_number), timeout=10)
+            except (asyncio.TimeoutError, Exception):
+                logger.warning(
+                    "PR #%d: diff fetch failed/timed out for near-dup guard — skipping auto-close",
+                    pr_number, exc_info=True,
+                )
+                diff = None
+
+            if not diff:
+                # None or empty — conservative fallback, fall through to attempt-count branches
+                pass
+            elif "new file mode" not in diff:
+                logger.info(
+                    "PR #%d: near_duplicate but modifies-only (enrichment) — skipping auto-close",
+                    pr_number,
+                )
+            else:
+                logger.info(
+                    "PR #%d: auto-closing near-duplicate of merged PR #%d (same source)",
+                    pr_number, sibling,
+                )
+                # Post a brief explanation before closing (best-effort — non-fatal)
+                try:
+                    await forgejo_api(
+                        "POST",
+                        repo_path(f"issues/{pr_number}/comments"),
+                        {"body": (
+                            f"Auto-closed: near-duplicate of already-merged PR "
+                            f"#{sibling} (same source: `{source_path}`)."
+                        )},
+                    )
+                except Exception:
+                    logger.debug("PR #%d: auto-close comment failed (non-fatal)", pr_number, exc_info=True)
+                await close_pr(
+                    conn, pr_number,
+                    last_error=f"auto_closed_near_duplicate: merged sibling #{sibling}",
+                )
+                db.audit(
+                    conn, "evaluate", "auto_closed_near_duplicate",
+                    json.dumps({
+                        "pr": pr_number,
+                        "merged_sibling": sibling,
+                        "source_path": source_path,
+                        "eval_attempts": eval_attempts,
+                    }),
+                )
+                return
+
    if eval_attempts < 2:
        # Attempt 1: post structured feedback so agent learns, but don't close
        if all_issues:
--- a/lib/evaluate.py
+++ b/lib/evaluate.py
@ -261,7 +261,8 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
        )
        db.record_review(
            conn, pr_number, "rejected",
-            domain=domain, agent=agent, reviewer=agent, reviewer_model="gpt-4o",
+            domain=domain, agent=agent, reviewer=agent, reviewer_model=config.EVAL_DOMAIN_MODEL,
+            rejection_reason=",".join(domain_issues) if domain_issues else None,
            notes=(domain_review or "")[:4000],
        )

@ -398,6 +399,7 @@ async def evaluate_pr(conn, pr_number: int, tier: str = None) -> dict:
            conn, pr_number, "approved-with-changes",
            domain=domain, agent=agent, reviewer="leo",
            reviewer_model="sonnet" if tier == "STANDARD" else "opus",
+            rejection_reason=",".join(all_issues) if all_issues else None,
            notes=(leo_review or domain_review or "")[:4000],
        )
        logger.info(
--- a/lib/extract.py
+++ b/lib/extract.py
@ -33,6 +33,7 @@ from pathlib import Path

 from . import config
 from .costs import record_usage
+from .db import classify_source_channel
 from .domains import agent_for_domain
 from .extraction_prompt import build_extraction_prompt
 from .forgejo import api as forgejo_api
@ -229,7 +230,7 @@ def _parse_extraction_json(text: str) -> dict | None:
        return None


-def _build_claim_content(claim: dict, agent: str, source_format: str | None = None) -> str:
+def _build_claim_content(claim: dict, agent: str, source_format: str | None = None, source_file: str = "") -> str:
    """Build claim markdown file content from extraction JSON."""
    today = date.today().isoformat()
    domain = claim.get("domain", "")
@ -281,6 +282,8 @@ def _build_claim_content(claim: dict, agent: str, source_format: str | None = No
        f"created: {today}",
        f"agent: {agent}",
    ]
+    if source_file:
+        lines.append(f"sourced_from: {source_file}")
    if scope:
        lines.append(f"scope: {scope}")
    if sourcer:
@ -432,7 +435,7 @@ async def _extract_one_source(
        filename = Path(filename).name  # Strip directory components — LLM output may contain path traversal
        if not filename.endswith(".md"):
            filename += ".md"
-        content = _build_claim_content(c, agent_lower, source_format=source_format)
+        content = _build_claim_content(c, agent_lower, source_format=source_format, source_file=f"{domain}/{source_file}" if domain else source_file)
        claim_files.append({"filename": filename, "domain": c.get("domain", domain), "content": content})

    # Build entity file contents
@ -490,6 +493,17 @@ async def _extract_one_source(

    if not claim_files and not entity_files and not enrichments:
        logger.info("No valid claims/entities/enrichments after validation for %s — archiving as null-result", source_file)
+        # Mark DB as null_result so queue scan won't re-extract even if file stays in queue
+        # (the main-worktree push in _archive_source frequently fails — DB is authoritative).
+        try:
+            conn.execute(
+                """INSERT INTO sources (path, status, updated_at) VALUES (?, 'null_result', datetime('now'))
+                   ON CONFLICT(path) DO UPDATE SET status='null_result', updated_at=datetime('now')""",
+                (source_path,),
+            )
+            conn.commit()
+        except Exception:
+            logger.debug("Failed to mark source as null_result in DB", exc_info=True)
        await _archive_source(source_path, domain, "null-result")
        return 0, 0

@ -558,6 +572,18 @@ async def _extract_one_source(

    if not files_written:
        logger.info("No files written for %s — cleaning up", source_file)
+        # Path B null-result: enrichments existed but all targets missing in worktree.
+        # No PR, no cooldown match — without DB update this re-extracts every 60s.
+        # (Ganymede review, commit 469cb7f follow-up.)
+        try:
+            conn.execute(
+                """INSERT INTO sources (path, status, updated_at) VALUES (?, 'null_result', datetime('now'))
+                   ON CONFLICT(path) DO UPDATE SET status='null_result', updated_at=datetime('now')""",
+                (source_path,),
+            )
+            conn.commit()
+        except Exception:
+            logger.debug("Failed to mark source as null_result (path B)", exc_info=True)
        await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE))
        await _git("branch", "-D", branch, cwd=str(EXTRACT_WORKTREE))
        await _archive_source(source_path, domain, "null-result")
@ -576,6 +602,22 @@ async def _extract_one_source(
        except Exception:
            logger.warning("Extract-connect failed (non-fatal)", exc_info=True)

+    # Archive the source WITHIN the extract branch (not via separate push on main).
+    # Prevents the runaway-extraction race: when archive-to-main push fails (non-FF,
+    # non-pushable worktree state), file returns to queue and gets re-extracted every
+    # cycle. Moving the archive into the extract branch makes it atomic with the PR
+    # merge — when the PR merges, the source is archived automatically.
+    try:
+        archive_rel = _archive_source_in_worktree(
+            worktree, source_path, domain, "processed", agent_lower, extract_model,
+        )
+        if archive_rel:
+            files_written.append(archive_rel["new"])
+            # The queue file was deleted; git add handles the removal
+            await _git("add", "inbox/queue/", cwd=str(EXTRACT_WORKTREE))
+    except Exception:
+        logger.exception("In-branch archive failed for %s (continuing)", source_file)
+
    # Stage and commit
    for f in files_written:
        await _git("add", f, cwd=str(EXTRACT_WORKTREE))
@ -658,17 +700,32 @@ async def _extract_one_source(
            for c in claims_raw if c.get("title") or c.get("filename")
        )

-        # Upsert: if discover_external_prs already created the row, update it;
-        # if not, create a partial row that discover will complete.
+        # Success path: mark source as 'extracting' so queue scan's DB-status filter
+        # skips it between PR creation and merge. Without this, cooldown is load-bearing
+        # (Ganymede review, commit 469cb7f follow-up).
        try:
            conn.execute(
-                """INSERT INTO prs (number, branch, status, submitted_by, source_path, description)
-                   VALUES (?, ?, 'open', ?, ?, ?)
+                """INSERT INTO sources (path, status, updated_at) VALUES (?, 'extracting', datetime('now'))
+                   ON CONFLICT(path) DO UPDATE SET status='extracting', updated_at=datetime('now')""",
+                (source_path,),
+            )
+            conn.commit()
+        except Exception:
+            logger.debug("Failed to mark source as extracting", exc_info=True)
+
+        # Upsert: if discover_external_prs already created the row, update it;
+        # if not, create a partial row that discover will complete.
+        source_channel = classify_source_channel(branch)
+        try:
+            conn.execute(
+                """INSERT INTO prs (number, branch, status, submitted_by, source_path, description, source_channel)
+                   VALUES (?, ?, 'open', ?, ?, ?, ?)
                   ON CONFLICT(number) DO UPDATE SET
                     submitted_by = excluded.submitted_by,
                     source_path = excluded.source_path,
-                     description = COALESCE(excluded.description, prs.description)""",
-                (pr_num, branch, contributor, source_path, claim_titles),
+                     description = COALESCE(excluded.description, prs.description),
+                     source_channel = COALESCE(prs.source_channel, excluded.source_channel)""",
+                (pr_num, branch, contributor, source_path, claim_titles, source_channel),
            )
            conn.commit()
        except Exception:
@ -689,12 +746,69 @@ async def _extract_one_source(
    # Clean up extract worktree
    await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE))

-    # 10. Archive source on main
-    await _archive_source(source_path, domain, "processed", agent_lower)
+    # Note: source archival happened in-branch before commit (see _archive_source_in_worktree).
+    # Do NOT call _archive_source() here — the broken main-worktree-push path caused the
+    # runaway extraction bug. Archive is now atomic with PR merge.

    return 1, 0


+def _archive_source_in_worktree(
+    worktree: Path,
+    source_path: str,
+    domain: str,
+    status: str,
+    agent: str | None,
+    extraction_model: str,
+) -> dict | None:
+    """Move source file from inbox/queue/ to inbox/archive/<domain>/ WITHIN extract worktree.
+
+    Updates frontmatter (status, processed_by, processed_date, extraction_model) and
+    returns {"old": old_rel_path, "new": new_rel_path} or None if not found.
+
+    The caller commits this change as part of the extract branch, so the archive lands
+    atomically with the PR merge — no separate push on main required.
+    """
+    queue_path = worktree / source_path
+    if not queue_path.exists():
+        logger.warning("Source %s not found in worktree queue — skipping in-branch archive", source_path)
+        return None
+
+    if status == "null-result":
+        dest_dir = worktree / "inbox" / "null-result"
+    else:
+        dest_dir = worktree / "inbox" / "archive" / (domain or "unknown")
+    dest_dir.mkdir(parents=True, exist_ok=True)
+    dest_path = dest_dir / queue_path.name
+
+    content = queue_path.read_text(encoding="utf-8")
+    today = date.today().isoformat()
+    content = re.sub(r"^status: unprocessed", f"status: {status}", content, flags=re.MULTILINE)
+    if agent and "processed_by:" not in content:
+        content = re.sub(
+            r"(^status: \w+)",
+            rf"\1\nprocessed_by: {agent}\nprocessed_date: {today}",
+            content,
+            count=1,
+            flags=re.MULTILINE,
+        )
+    if "extraction_model:" not in content:
+        content = re.sub(
+            r"(^status: \w+.*?)(\n---)",
+            rf'\1\nextraction_model: "{extraction_model}"\2',
+            content,
+            count=1,
+            flags=re.MULTILINE | re.DOTALL,
+        )
+
+    dest_path.write_text(content, encoding="utf-8")
+    queue_path.unlink()
+
+    old_rel = str(queue_path.relative_to(worktree))
+    new_rel = str(dest_path.relative_to(worktree))
+    return {"old": old_rel, "new": new_rel}
+
+
 async def _archive_source(
    source_path: str,
    domain: str,
@ -786,13 +900,26 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
    if not queue_dir.exists():
        return 0, 0

+    # DB-authoritative status filter: exclude sources where DB records non-unprocessed state.
+    # File frontmatter alone isn't reliable — archive pushes can fail, leaving stale file state.
+    # The sources table is the authoritative record of whether a source has been processed.
+    db_non_unprocessed = {
+        r["path"] for r in conn.execute(
+            "SELECT path FROM sources WHERE status != 'unprocessed'"
+        ).fetchall()
+    }
+
    unprocessed = []
    for f in sorted(queue_dir.glob("*.md")):
        try:
            content = f.read_text(encoding="utf-8")
            fm = _parse_source_frontmatter(content)
-            if fm.get("status") == "unprocessed":
-                unprocessed.append((str(f.relative_to(main)), content, fm))
+            if fm.get("status") != "unprocessed":
+                continue
+            rel_path = str(f.relative_to(main))
+            if rel_path in db_non_unprocessed:
+                continue
+            unprocessed.append((rel_path, content, fm))
        except Exception:
            logger.debug("Failed to read source %s", f, exc_info=True)

@ -829,6 +956,29 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
        if skipped:
            logger.info("Skipped %d source(s) with existing open PRs", skipped)

+    # Cooldown: skip sources with ANY PR in last EXTRACTION_COOLDOWN_HOURS.
+    # Defense-in-depth for DB-status filter — catches the window between PR
+    # creation and DB status update if anything races.
+    if unprocessed:
+        cooldown_hours = config.EXTRACTION_COOLDOWN_HOURS
+        recent_source_paths = {
+            r["source_path"] for r in conn.execute(
+                """SELECT DISTINCT source_path FROM prs
+                   WHERE source_path IS NOT NULL
+                   AND created_at > datetime('now', ? || ' hours')""",
+                (f"-{cooldown_hours}",),
+            ).fetchall() if r["source_path"]
+        }
+        if recent_source_paths:
+            before = len(unprocessed)
+            unprocessed = [
+                (sp, c, f) for sp, c, f in unprocessed
+                if sp not in recent_source_paths
+            ]
+            cooled = before - len(unprocessed)
+            if cooled:
+                logger.info("Cooldown: skipped %d source(s) with PRs in last %dh", cooled, cooldown_hours)
+
    # ── Check for re-extraction sources (must run even when queue is empty) ──
    reextract_rows = conn.execute(
        """SELECT path, feedback FROM sources
--- a/lib/merge.py
+++ b/lib/merge.py
@ -308,7 +308,14 @@ async def _cherry_pick_onto_main(branch: str) -> tuple[bool, str]:
    rc, merge_base = await _git("merge-base", "origin/main", f"origin/{branch}")
    rc2, main_sha = await _git("rev-parse", "origin/main")
    if rc == 0 and rc2 == 0 and merge_base.strip() == main_sha.strip():
-        return True, "already up to date"
+        # Branch is descendant of main — but fork workflows (merge main into branch)
+        # create this state while still having new content. Check for actual diff.
+        rc_diff, diff_out = await _git(
+            "diff", "--stat", f"origin/main..origin/{branch}", timeout=10,
+        )
+        if rc_diff != 0 or not diff_out.strip():
+            return True, "already up to date"
+        logger.info("Branch %s is descendant of main but has new content — proceeding", branch)

    # Get extraction commits (oldest first), skip merge commits from fork workflows
    rc, commits_out = await _git(
@ -429,6 +436,7 @@ from .frontmatter import (
    serialize_frontmatter,
 )
 from .post_merge import (
+    backlink_source_claims,
    embed_merged_claims,
    reciprocal_edges,
    archive_source_for_pr,
@ -848,6 +856,12 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
            # Archive source file (closes near-duplicate loop — Ganymede review)
            archive_source_for_pr(branch, domain)

+            # Backlink: update source files with claims_extracted refs
+            try:
+                await backlink_source_claims(main_sha, branch_sha, _git)
+            except Exception:
+                logger.exception("PR #%d: backlink_source_claims failed (non-fatal)", pr_num)
+
            # Embed new/changed claims into Qdrant (non-fatal)
            await embed_merged_claims(main_sha, branch_sha, _git)

--- a/lib/post_merge.py
+++ b/lib/post_merge.py
@ -13,6 +13,7 @@ import logging
 import os
 import re
 import shutil
+from pathlib import Path
 from typing import Callable

 from . import config
@ -295,6 +296,139 @@ async def reciprocal_edges(main_sha: str, branch_sha: str, git_fn: Callable):
        logger.exception("reciprocal_edges: failed (non-fatal)")


+async def backlink_source_claims(main_sha: str, branch_sha: str, git_fn: Callable):
+    """After merge, update source files with claims_extracted backlinks.
+
+    Reads sourced_from from merged claim frontmatter, finds the source file,
+    and appends the claim filename to its claims_extracted list.
+    Only runs for newly added claims (diff-filter=A).
+    """
+    try:
+        rc, diff_out = await git_fn(
+            "diff", "--name-only", "--diff-filter=A",
+            main_sha, branch_sha,
+            cwd=str(config.MAIN_WORKTREE),
+            timeout=10,
+        )
+        if rc != 0:
+            logger.warning("backlink_source_claims: diff failed (rc=%d), skipping", rc)
+            return
+
+        claim_dirs = {"domains/", "core/", "foundations/"}
+        new_claims = [
+            f for f in diff_out.strip().split("\n")
+            if f.endswith(".md")
+            and any(f.startswith(d) for d in claim_dirs)
+            and not f.split("/")[-1].startswith("_")
+            and "/entities/" not in f
+            and "/decisions/" not in f
+        ]
+
+        if not new_claims:
+            return
+
+        modified_sources = {}
+        for claim_path in new_claims:
+            full_path = config.MAIN_WORKTREE / claim_path
+            if not full_path.exists():
+                continue
+
+            try:
+                content = full_path.read_text()
+            except Exception:
+                continue
+
+            fm, raw_fm, body = parse_yaml_frontmatter(content)
+            if fm is None:
+                continue
+
+            sourced_from = fm.get("sourced_from", "")
+            if not sourced_from:
+                continue
+
+            source_path = config.MAIN_WORKTREE / "inbox" / "archive" / sourced_from
+            if not source_path.exists():
+                logger.debug("backlink_source_claims: source %s not found at %s", sourced_from, source_path)
+                continue
+
+            claim_filename = claim_path.rsplit("/", 1)[-1].replace(".md", "")
+
+            try:
+                source_content = source_path.read_text()
+            except Exception:
+                continue
+
+            source_fm, source_raw_fm, source_body = parse_yaml_frontmatter(source_content)
+            if source_fm is None:
+                continue
+
+            existing_claims = source_fm.get("claims_extracted", [])
+            if isinstance(existing_claims, str):
+                existing_claims = [existing_claims]
+            if not isinstance(existing_claims, list):
+                existing_claims = []
+
+            if claim_filename in existing_claims:
+                continue
+
+            existing_claims.append(claim_filename)
+            new_block = "claims_extracted:\n" + "\n".join(f"- {c}" for c in existing_claims)
+
+            lines = source_content.split("\n")
+            if "claims_extracted:" not in source_content:
+                end_idx = None
+                for i, line in enumerate(lines):
+                    if i > 0 and line.strip() == "---":
+                        end_idx = i
+                        break
+                if end_idx is None:
+                    continue
+                lines.insert(end_idx, new_block)
+            else:
+                start_idx = None
+                end_idx = None
+                for i, line in enumerate(lines):
+                    if line.startswith("claims_extracted:"):
+                        start_idx = i
+                    elif start_idx is not None and not line.startswith("- "):
+                        end_idx = i
+                        break
+                if start_idx is None:
+                    continue
+                if end_idx is None:
+                    end_idx = len(lines)
+                lines[start_idx:end_idx] = new_block.split("\n")
+
+            modified_sources[str(source_path)] = "\n".join(lines)
+            logger.info("backlink_source_claims: added %s to %s", claim_filename, sourced_from)
+
+        if modified_sources:
+            async with async_main_worktree_lock():
+                for sp, content in modified_sources.items():
+                    Path(sp).write_text(content)
+                    await git_fn("add", sp, cwd=str(config.MAIN_WORKTREE))
+                rc, out = await git_fn(
+                    "commit", "-m", f"backlink: update claims_extracted on {len(modified_sources)} source(s)",
+                    cwd=str(config.MAIN_WORKTREE),
+                    timeout=15,
+                )
+                if rc == 0:
+                    push_rc, push_out = await git_fn(
+                        "push", "origin", "main",
+                        cwd=str(config.MAIN_WORKTREE),
+                        timeout=30,
+                    )
+                    if push_rc == 0:
+                        logger.info("backlink_source_claims: %d source(s) updated and pushed", len(modified_sources))
+                    else:
+                        logger.warning("backlink_source_claims: push failed: %s", push_out[:200])
+                else:
+                    logger.warning("backlink_source_claims: commit failed: %s", out[:200])
+
+    except Exception:
+        logger.exception("backlink_source_claims: failed (non-fatal)")
+
+
 def archive_source_for_pr(branch: str, domain: str, merged: bool = True):
    """Move source from queue/ to archive/{domain}/ after PR merge or close.

--- a/ops/backfill-contributor-roles.py
+++ b/ops/backfill-contributor-roles.py
@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""Backfill contributor role counts from prs.commit_type.
+
+Resets all role counts to 0, then re-derives them from the prs table's
+commit_type column using the COMMIT_TYPE_TO_ROLE mapping. This corrects
+the bug where all contributors were recorded as 'extractor' regardless
+of their actual commit_type.
+
+Usage:
+    python3 ops/backfill-contributor-roles.py [--dry-run]
+"""
+
+import argparse
+import sqlite3
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from lib.contributor import COMMIT_TYPE_TO_ROLE, commit_type_to_role
+
+DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
+
+
+def backfill(db_path: str, dry_run: bool = False):
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+
+    # Get all merged PRs with commit_type and agent
+    prs = conn.execute("""
+        SELECT number, commit_type, agent, branch
+        FROM prs
+        WHERE status = 'merged' AND agent IS NOT NULL
+        ORDER BY number
+    """).fetchall()
+
+    print(f"Processing {len(prs)} merged PRs...")
+
+    # Reset all role counts
+    if not dry_run:
+        conn.execute("""
+            UPDATE contributors SET
+                extractor_count = 0,
+                challenger_count = 0,
+                synthesizer_count = 0,
+                sourcer_count = 0
+        """)
+        print("Reset all role counts to 0")
+
+    # Tally roles from commit_type
+    role_counts: dict[str, dict[str, int]] = {}
+    for pr in prs:
+        agent = pr["agent"].lower() if pr["agent"] else None
+        if not agent or agent in ("external", "pipeline"):
+            continue
+
+        commit_type = pr["commit_type"] or "extract"
+        role = commit_type_to_role(commit_type)
+
+        if agent not in role_counts:
+            role_counts[agent] = {
+                "extractor_count": 0, "challenger_count": 0,
+                "synthesizer_count": 0, "sourcer_count": 0,
+                "reviewer_count": 0,
+            }
+        role_col = f"{role}_count"
+        if role_col in role_counts[agent]:
+            role_counts[agent][role_col] += 1
+
+    # Apply tallied counts
+    for handle, counts in sorted(role_counts.items()):
+        non_zero = {k: v for k, v in counts.items() if v > 0}
+        print(f"  {handle}: {non_zero or '(no knowledge PRs)'}")
+        if not dry_run and non_zero:
+            set_clauses = ", ".join(f"{k} = {v}" for k, v in non_zero.items())
+            conn.execute(
+                f"UPDATE contributors SET {set_clauses}, updated_at = datetime('now') WHERE handle = ?",
+                (handle,),
+            )
+
+    if not dry_run:
+        conn.commit()
+        print("\nBackfill committed.")
+    else:
+        print("\n[DRY RUN] No changes made.")
+
+    # Print summary
+    print("\nRole distribution across all contributors:")
+    if not dry_run:
+        rows = conn.execute("""
+            SELECT handle, extractor_count, challenger_count, synthesizer_count,
+                   sourcer_count, reviewer_count
+            FROM contributors
+            ORDER BY (extractor_count + challenger_count + synthesizer_count) DESC
+        """).fetchall()
+        for r in rows:
+            parts = []
+            if r["extractor_count"]: parts.append(f"extract:{r['extractor_count']}")
+            if r["challenger_count"]: parts.append(f"challenge:{r['challenger_count']}")
+            if r["synthesizer_count"]: parts.append(f"synthesize:{r['synthesizer_count']}")
+            if r["sourcer_count"]: parts.append(f"source:{r['sourcer_count']}")
+            if r["reviewer_count"]: parts.append(f"review:{r['reviewer_count']}")
+            if parts:
+                print(f"  {r['handle']}: {', '.join(parts)}")
+
+    conn.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dry-run", action="store_true")
+    parser.add_argument("--db", default=DB_PATH)
+    args = parser.parse_args()
+    backfill(args.db, args.dry_run)
--- a/scripts/audit-wiki-links.py
+++ b/scripts/audit-wiki-links.py
@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+"""Audit wiki-links across the teleo-codex knowledge base.
+
+Crawls domains/, foundations/, core/, decisions/ for [[wiki-links]].
+Resolves each link against known claim files, entity files, and _map files.
+Reports dead links, orphaned claims, and link counts.
+
+Output: JSON to stdout with dead links, orphans, and per-file link counts.
+"""
+
+import json
+import os
+import re
+import sys
+import unicodedata
+from pathlib import Path
+
+CODEX_ROOT = Path(os.environ.get("CODEX_ROOT", "/opt/teleo-eval/workspaces/main"))
+CLAIM_DIRS = ["domains", "foundations", "core", "decisions"]
+ENTITY_DIR = "entities"
+
+WIKI_LINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
+
+
+def slugify(title: str) -> str:
+    """Convert a wiki-link title to the kebab-case slug used for filenames."""
+    s = title.strip().lower()
+    s = unicodedata.normalize("NFKD", s)
+    s = re.sub(r"[^\w\s-]", "", s)
+    s = re.sub(r"[\s_]+", "-", s)
+    s = re.sub(r"-+", "-", s)
+    return s.strip("-")
+
+
+def build_index(codex: Path) -> dict:
+    """Build a lookup index of all resolvable targets.
+
+    Returns dict mapping normalized slug -> file path.
+    Also maps raw stem (filename without .md) -> file path.
+    """
+    index = {}
+
+    # Index claim files across all claim directories
+    for claim_dir in CLAIM_DIRS:
+        d = codex / claim_dir
+        if not d.exists():
+            continue
+        for md in d.rglob("*.md"):
+            stem = md.stem
+            rel = str(md.relative_to(codex))
+            # Map by stem (exact filename match)
+            index[stem.lower()] = rel
+            # Map by slugified stem
+            index[slugify(stem)] = rel
+
+    # Index entity files
+    entity_root = codex / ENTITY_DIR
+    if entity_root.exists():
+        for md in entity_root.rglob("*.md"):
+            stem = md.stem
+            rel = str(md.relative_to(codex))
+            index[stem.lower()] = rel
+            index[slugify(stem)] = rel
+
+    # Index maps/ directory (MOC-style overview docs)
+    maps_root = codex / "maps"
+    if maps_root.exists():
+        for md in maps_root.rglob("*.md"):
+            stem = md.stem
+            rel = str(md.relative_to(codex))
+            index[stem.lower()] = rel
+            index[slugify(stem)] = rel
+
+    # Index top-level docs that might be link targets
+    for special in ["overview.md", "livingip-overview.md"]:
+        p = codex / special
+        if p.exists():
+            index[p.stem.lower()] = str(p.relative_to(codex))
+
+    # Index agents/ beliefs and positions (sometimes linked)
+    agents_dir = codex / "agents"
+    if agents_dir.exists():
+        for md in agents_dir.rglob("*.md"):
+            stem = md.stem
+            rel = str(md.relative_to(codex))
+            index[stem.lower()] = rel
+
+    return index
+
+
+def resolve_link(link_text: str, index: dict, source_dir: str) -> str | None:
+    """Try to resolve a wiki-link target. Returns file path or None."""
+    text = link_text.strip()
+
+    # Special case: [[_map]] resolves to _map.md in the same domain directory
+    if text == "_map":
+        parts = source_dir.split("/")
+        if len(parts) >= 2:
+            candidate = f"{parts[0]}/{parts[1]}/_map.md"
+            if (CODEX_ROOT / candidate).exists():
+                return candidate
+        return None
+
+    # Path-style references like [[domains/health/_map]]
+    if "/" in text:
+        candidate = text.rstrip("/")
+        if not candidate.endswith(".md"):
+            candidate += ".md"
+        if (CODEX_ROOT / candidate).exists():
+            return candidate
+        return None
+
+    # Try exact stem match (lowercased)
+    key = text.lower()
+    if key in index:
+        return index[key]
+
+    # Try slugified version
+    slug = slugify(text)
+    if slug in index:
+        return index[slug]
+
+    # Try with common variations
+    for variant in [
+        slug.replace("metadaos", "metadao"),
+        slug.replace("ais", "ai"),
+    ]:
+        if variant in index:
+            return index[variant]
+
+    return None
+
+
+def audit(codex: Path) -> dict:
+    """Run the full wiki-link audit."""
+    index = build_index(codex)
+
+    dead_links = []       # {file, link, line_number}
+    link_counts = {}      # file -> {outbound: N, targets: []}
+    all_targets = set()   # files that are linked TO
+    all_files = set()     # all claim/foundation files
+
+    # Scan all markdown files in claim directories
+    for claim_dir in CLAIM_DIRS:
+        d = codex / claim_dir
+        if not d.exists():
+            continue
+        for md in d.rglob("*.md"):
+            rel = str(md.relative_to(codex))
+            all_files.add(rel)
+            source_dir = str(md.parent.relative_to(codex))
+
+            try:
+                content = md.read_text(encoding="utf-8")
+            except Exception:
+                continue
+
+            links_in_file = []
+            for i, line in enumerate(content.split("\n"), 1):
+                for match in WIKI_LINK_RE.finditer(line):
+                    link_text = match.group(1)
+                    # Skip links with | (display text aliases) - take the target part
+                    if "|" in link_text:
+                        link_text = link_text.split("|")[0].strip()
+
+                    resolved = resolve_link(link_text, index, source_dir)
+                    if resolved:
+                        all_targets.add(resolved)
+                        links_in_file.append(resolved)
+                    else:
+                        dead_links.append({
+                            "file": rel,
+                            "link": link_text,
+                            "line": i,
+                        })
+
+            link_counts[rel] = {
+                "outbound": len(links_in_file),
+                "targets": links_in_file,
+            }
+
+    # Find orphaned claims (no inbound links AND no outbound links)
+    files_with_outbound = {f for f, c in link_counts.items() if c["outbound"] > 0}
+    orphaned = sorted(
+        f for f in all_files
+        if f not in all_targets
+        and f not in files_with_outbound
+        and not f.endswith("_map.md")  # MOC files are structural, not orphans
+    )
+
+    # Compute inbound link counts
+    inbound_counts = {}
+    for f, c in link_counts.items():
+        for target in c["targets"]:
+            inbound_counts[target] = inbound_counts.get(target, 0) + 1
+
+    # Claims with high outbound (good connectivity)
+    high_connectivity = sorted(
+        [(f, c["outbound"]) for f, c in link_counts.items() if c["outbound"] >= 3],
+        key=lambda x: -x[1],
+    )
+
+    # Summary stats
+    total_links = sum(c["outbound"] for c in link_counts.values())
+    files_with_links = sum(1 for c in link_counts.values() if c["outbound"] > 0)
+
+    # Domain breakdown of dead links
+    dead_by_domain = {}
+    for dl in dead_links:
+        parts = dl["file"].split("/")
+        domain = parts[1] if len(parts) >= 3 else parts[0]
+        dead_by_domain[domain] = dead_by_domain.get(domain, 0) + 1
+
+    # Domain breakdown of orphans
+    orphan_by_domain = {}
+    for o in orphaned:
+        parts = o.split("/")
+        domain = parts[1] if len(parts) >= 3 else parts[0]
+        orphan_by_domain[domain] = orphan_by_domain.get(domain, 0) + 1
+
+    return {
+        "summary": {
+            "total_files": len(all_files),
+            "total_links": total_links,
+            "files_with_links": files_with_links,
+            "files_without_links": len(all_files) - files_with_links,
+            "dead_link_count": len(dead_links),
+            "orphan_count": len(orphaned),
+            "avg_links_per_file": round(total_links / max(len(all_files), 1), 2),
+            "high_connectivity_count": len(high_connectivity),
+        },
+        "dead_links": dead_links,
+        "dead_by_domain": dict(sorted(dead_by_domain.items(), key=lambda x: -x[1])),
+        "orphaned": orphaned,
+        "orphan_by_domain": dict(sorted(orphan_by_domain.items(), key=lambda x: -x[1])),
+        "high_connectivity": [{"file": f, "outbound_links": n} for f, n in high_connectivity[:20]],
+        "inbound_top20": sorted(
+            [{"file": f, "inbound_links": n} for f, n in inbound_counts.items()],
+            key=lambda x: -x["inbound_links"],
+        )[:20],
+    }
+
+
+if __name__ == "__main__":
+    codex = Path(sys.argv[1]) if len(sys.argv) > 1 else CODEX_ROOT
+    result = audit(codex)
+    json.dump(result, sys.stdout, indent=2)
+    print()
+
+    # Print human-readable summary to stderr
+    s = result["summary"]
+    print(f"\n=== Wiki-Link Audit ===", file=sys.stderr)
+    print(f"Files scanned: {s['total_files']}", file=sys.stderr)
+    print(f"Total links: {s['total_links']}", file=sys.stderr)
+    print(f"Files with links: {s['files_with_links']} ({100*s['files_with_links']//max(s['total_files'],1)}%)", file=sys.stderr)
+    print(f"Dead links: {s['dead_link_count']}", file=sys.stderr)
+    print(f"Orphaned claims: {s['orphan_count']}", file=sys.stderr)
+    print(f"Avg links/file: {s['avg_links_per_file']}", file=sys.stderr)
+    print(f"High connectivity (≥3 links): {s['high_connectivity_count']}", file=sys.stderr)
--- a/scripts/backfill-events.py
+++ b/scripts/backfill-events.py
@ -0,0 +1,618 @@
+#!/usr/bin/env python3
+"""Backfill contribution_events by replaying merged PRs from pipeline.db + worktree.
+
+For each merged PR:
+  - Derive author from prs.submitted_by → git author → branch prefix
+  - Emit author event (role=author, weight=0.30, claim_path=NULL)
+  - For each claim file under a knowledge prefix, parse frontmatter and emit
+    originator events for sourcer entries that differ from the author
+  - Emit evaluator events for Leo (when leo_verdict='approve') and domain_agent
+    (when domain_verdict='approve' and not Leo)
+  - Emit challenger/synthesizer events for Pentagon-Agent trailers on
+    agent-owned branches (theseus/*, rio/*, etc.) based on commit_type
+
+Idempotent via the partial UNIQUE indexes on contribution_events. Safe to re-run.
+
+Usage:
+  python3 scripts/backfill-events.py --dry-run     # Count events without writing
+  python3 scripts/backfill-events.py               # Apply
+
+Runs read-only against the git worktree; only writes to pipeline.db.
+"""
+import argparse
+import os
+import re
+import sqlite3
+import subprocess
+import sys
+from collections import Counter
+from pathlib import Path
+
+DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
+REPO_DIR = os.environ.get("REPO_DIR", "/opt/teleo-eval/workspaces/main")
+
+# Role weights — must match lib/contributor.py ROLE_WEIGHTS.
+ROLE_WEIGHTS = {
+    "author": 0.30,
+    "challenger": 0.25,
+    "synthesizer": 0.20,
+    "originator": 0.15,
+    "evaluator": 0.05,
+}
+
+PENTAGON_AGENTS = frozenset({
+    "rio", "leo", "theseus", "vida", "clay", "astra",
+    "oberon", "argus", "rhea", "ganymede", "epimetheus", "hermes", "ship",
+    "pipeline",
+})
+
+# Keep in sync with lib/attribution.AGENT_BRANCH_PREFIXES.
+# Duplicated here because this script runs standalone (no pipeline package import).
+AGENT_BRANCH_PREFIXES = (
+    "rio/", "theseus/", "leo/", "vida/", "astra/", "clay/", "oberon/",
+)
+
+TRAILER_EVENT_ROLE = {
+    "challenge": "challenger",
+    "enrich": "synthesizer",
+    "research": "synthesizer",
+    "reweave": "synthesizer",
+}
+
+KNOWLEDGE_PREFIXES = ("domains/", "core/", "foundations/", "decisions/")
+
+BOT_AUTHORS = frozenset({
+    "teleo", "teleo-bot", "pipeline",
+    "github-actions[bot]", "forgejo-actions",
+})
+
+
+def normalize_handle(conn: sqlite3.Connection, handle: str) -> str:
+    if not handle:
+        return ""
+    h = handle.strip().lower().lstrip("@")
+    row = conn.execute("SELECT canonical FROM contributor_aliases WHERE alias = ?", (h,)).fetchone()
+    if row:
+        return row[0]
+    return h
+
+
+def classify_kind(handle: str) -> str:
+    h = handle.strip().lower().lstrip("@")
+    return "agent" if h in PENTAGON_AGENTS else "person"
+
+
+def parse_frontmatter(text: str):
+    """Minimal YAML frontmatter parser using PyYAML when available."""
+    if not text.startswith("---"):
+        return None
+    end = text.find("---", 3)
+    if end == -1:
+        return None
+    raw = text[3:end]
+    try:
+        import yaml
+        fm = yaml.safe_load(raw)
+        return fm if isinstance(fm, dict) else None
+    except ImportError:
+        return None
+    except Exception:
+        return None
+
+
+def extract_sourcers_from_file(path: Path) -> list[str]:
+    """Return the sourcer handles from a claim file's frontmatter.
+
+    Matches three formats:
+      1. Block: `attribution: { sourcer: [{handle: "x"}, ...] }`
+      2. Bare-key flat: `sourcer: alexastrum`
+      3. Prefix-keyed: `attribution_sourcer: alexastrum`
+    """
+    try:
+        content = path.read_text(encoding="utf-8")
+    except (FileNotFoundError, PermissionError, UnicodeDecodeError):
+        return []
+    fm = parse_frontmatter(content)
+    if not fm:
+        return []
+
+    handles: list[str] = []
+
+    attr = fm.get("attribution")
+    if isinstance(attr, dict):
+        entries = attr.get("sourcer", [])
+        if isinstance(entries, list):
+            for e in entries:
+                if isinstance(e, dict) and "handle" in e:
+                    handles.append(e["handle"])
+                elif isinstance(e, str):
+                    handles.append(e)
+        elif isinstance(entries, str):
+            handles.append(entries)
+        return handles
+
+    flat = fm.get("attribution_sourcer")
+    if flat:
+        if isinstance(flat, str):
+            handles.append(flat)
+        elif isinstance(flat, list):
+            handles.extend(v for v in flat if isinstance(v, str))
+        if handles:
+            return handles
+
+    bare = fm.get("sourcer")
+    if bare:
+        if isinstance(bare, str):
+            handles.append(bare)
+        elif isinstance(bare, list):
+            handles.extend(v for v in bare if isinstance(v, str))
+
+    return handles
+
+
+_HANDLE_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,38}$")
+
+
+def valid_handle(h: str) -> bool:
+    if not h:
+        return False
+    lower = h.strip().lower().lstrip("@")
+    if lower.endswith("-") or lower.endswith("_"):
+        return False
+    return bool(_HANDLE_RE.match(lower))
+
+
+def git(*args, cwd: str = REPO_DIR, timeout: int = 30) -> str:
+    """Run a git command, return stdout. Returns empty string on failure."""
+    try:
+        result = subprocess.run(
+            ["git", *args],
+            cwd=cwd, capture_output=True, text=True, timeout=timeout, check=False,
+        )
+        return result.stdout
+    except (subprocess.TimeoutExpired, OSError):
+        return ""
+
+
+def git_first_commit_author(pr_branch: str, merged_at: str) -> str:
+    """Best-effort: find git author of first non-merge commit on the branch.
+
+    PR branches are usually deleted after merge. We fall back to scanning main
+    commits around merged_at for commits matching the branch slug.
+    """
+    # Post-merge branches are cleaned up. For the backfill, we accept that this
+    # path rarely yields results and rely on submitted_by + branch prefix.
+    return ""
+
+
+def derive_author(conn: sqlite3.Connection, pr: dict) -> str | None:
+    """Author precedence: submitted_by → branch-prefix agent for agent-owned branches."""
+    if pr.get("submitted_by"):
+        cand = pr["submitted_by"].strip().lower().lstrip("@")
+        if cand and cand not in BOT_AUTHORS:
+            return cand
+    branch = pr.get("branch") or ""
+    if "/" in branch:
+        prefix = branch.split("/", 1)[0].lower()
+        if prefix in ("rio", "theseus", "leo", "vida", "clay", "astra", "oberon"):
+            return prefix
+    return None
+
+
+def find_pr_for_claim(
+    conn: sqlite3.Connection,
+    repo: Path,
+    md: Path,
+) -> tuple[int | None, str]:
+    """Recover the Forgejo PR number that introduced a claim file.
+
+    Returns (pr_number, strategy) — strategy is one of:
+      'sourced_from' — frontmatter sourced_from matched prs.source_path
+      'git_subject'  — git log first-add commit message matched a branch pattern
+      'title_desc'   — filename stem matched a title in prs.description
+      'github_pr'    — recovery commit mentioned GitHub PR # → prs.github_pr
+      'none'         — no strategy found a match
+
+    Order is chosen by reliability:
+      1. sourced_from (explicit provenance, most reliable when present)
+      2. git_subject  (covers Leo research, Cameron challenges, Theseus contrib)
+      3. title_desc   (current fallback — brittle when description is NULL)
+      4. github_pr    (recovery commits referencing erased GitHub PRs)
+    """
+    rel = str(md.relative_to(repo))
+
+    # Strategy 1: sourced_from frontmatter → prs.source_path
+    try:
+        content = md.read_text(encoding="utf-8")
+    except (FileNotFoundError, PermissionError, UnicodeDecodeError):
+        content = ""
+    fm = parse_frontmatter(content) if content else None
+    if fm:
+        sourced = fm.get("sourced_from")
+        candidate_paths: list[str] = []
+        if isinstance(sourced, str) and sourced:
+            candidate_paths.append(sourced)
+        elif isinstance(sourced, list):
+            candidate_paths.extend(s for s in sourced if isinstance(s, str))
+        for sp in candidate_paths:
+            stem = Path(sp).stem
+            if not stem:
+                continue
+            row = conn.execute(
+                """SELECT number FROM prs
+                   WHERE source_path LIKE ? AND status='merged'
+                   ORDER BY merged_at ASC LIMIT 1""",
+                (f"%{stem}.md",),
+            ).fetchone()
+            if row:
+                return row["number"], "sourced_from"
+
+    # Strategy 2: git log first-add commit → subject pattern → prs.branch
+    # Default log order is reverse-chronological; take the last line (oldest)
+    # to get the original addition, not later rewrites.
+    log_out = git(
+        "log", "--diff-filter=A", "--follow",
+        "--format=%H|||%s|||%b", "--", rel,
+    )
+    if log_out.strip():
+        # Split on the delimiter we chose. Each commit produces 3 fields but
+        # %b can contain blank lines — group by lines that look like a SHA.
+        blocks: list[tuple[str, str, str]] = []
+        current: list[str] = []
+        for line in log_out.splitlines():
+            if re.match(r"^[a-f0-9]{40}\|\|\|", line):
+                if current:
+                    parts = "\n".join(current).split("|||", 2)
+                    if len(parts) == 3:
+                        blocks.append((parts[0], parts[1], parts[2]))
+                current = [line]
+            else:
+                current.append(line)
+        if current:
+            parts = "\n".join(current).split("|||", 2)
+            if len(parts) == 3:
+                blocks.append((parts[0], parts[1], parts[2]))
+        if blocks:
+            # Oldest addition — git log defaults to reverse-chronological
+            _oldest_sha, subject, body = blocks[-1]
+
+            # Pattern: "<agent>: extract claims from <slug>"
+            m = re.match(r"^(\w+):\s*extract\s+claims\s+from\s+(\S+)", subject)
+            if m:
+                slug = m.group(2).rstrip(".md").rstrip(".")
+                row = conn.execute(
+                    """SELECT number FROM prs
+                       WHERE branch LIKE ? AND status='merged'
+                       ORDER BY merged_at ASC LIMIT 1""",
+                    (f"extract/{slug}%",),
+                ).fetchone()
+                if row:
+                    return row["number"], "git_subject"
+
+            # Pattern: "<agent>: research session <date>"
+            m = re.match(r"^(\w+):\s*research\s+session\s+(\d{4}-\d{2}-\d{2})", subject)
+            if m:
+                agent = m.group(1).lower()
+                date = m.group(2)
+                row = conn.execute(
+                    """SELECT number FROM prs
+                       WHERE branch LIKE ? AND status='merged'
+                       ORDER BY merged_at ASC LIMIT 1""",
+                    (f"{agent}/research-{date}%",),
+                ).fetchone()
+                if row:
+                    return row["number"], "git_subject"
+
+            # Pattern: "<agent>: challenge" / contrib challenges / entity batches
+            m = re.match(r"^(\w+):\s*(?:challenge|contrib|entity|synthesize)", subject)
+            if m:
+                agent = m.group(1).lower()
+                row = conn.execute(
+                    """SELECT number FROM prs
+                       WHERE branch LIKE ? AND status='merged'
+                       ORDER BY merged_at ASC LIMIT 1""",
+                    (f"{agent}/%",),
+                ).fetchone()
+                if row:
+                    return row["number"], "git_subject"
+
+            # Recovery commits referencing erased GitHub PRs (Alex/Cameron).
+            # Subject: "Recover <who> contribution from GitHub PR #NN (...)".
+            # Match only when a corresponding prs row exists with github_pr=NN —
+            # otherwise the claims were direct-to-main without a Forgejo PR
+            # record, which requires a synthetic PR row (follow-up, not in
+            # this script's scope).
+            gh_match = re.search(r"GitHub\s+PR\s+#(\d+)", subject + "\n" + body)
+            if gh_match:
+                gh_pr = int(gh_match.group(1))
+                row = conn.execute(
+                    "SELECT number FROM prs WHERE github_pr = ? AND status='merged' LIMIT 1",
+                    (gh_pr,),
+                ).fetchone()
+                if row:
+                    return row["number"], "github_pr"
+
+            # Pattern: bare "Extract N claims from <source-fragment>" (no
+            # agent prefix). Used in early research PRs like Shaga's claims
+            # at PR #2025. Fall back to time-proximity: find the earliest
+            # agent-branch PR merged within 24h AFTER this commit's date.
+            m = re.match(r"^Extract\s+\d+\s+claims\s+from\b", subject)
+            if m:
+                # Get commit author date
+                date_out = git(
+                    "log", "-1", "--format=%aI", _oldest_sha, timeout=10,
+                )
+                commit_date = date_out.strip() if date_out.strip() else None
+                if commit_date:
+                    # git %aI returns ISO 8601 with T-separator; prs.merged_at
+                    # uses SQLite's space-separator. Lexicographic comparison
+                    # fails across formats (space<T), so normalize commit_date
+                    # via datetime() before comparing. Without this, PRs merged
+                    # within the same calendar day but earlier than the commit
+                    # hour are silently excluded (caught by Ganymede review —
+                    # Shaga's #2025 was dropped in favor of later #2032).
+                    row = conn.execute(
+                        """SELECT number FROM prs
+                           WHERE status='merged'
+                             AND merged_at >= datetime(?)
+                             AND merged_at <= datetime(datetime(?), '+24 hours')
+                             AND (branch LIKE 'leo/%' OR branch LIKE 'theseus/%'
+                                  OR branch LIKE 'rio/%' OR branch LIKE 'astra/%'
+                                  OR branch LIKE 'vida/%' OR branch LIKE 'clay/%')
+                           ORDER BY merged_at ASC LIMIT 1""",
+                        (commit_date, commit_date),
+                    ).fetchone()
+                    if row:
+                        return row["number"], "git_time_proximity"
+
+    return None, "none"
+
+
+def emit(conn, counts, dry_run, handle, role, pr_number, claim_path, domain, channel, timestamp):
+    canonical = normalize_handle(conn, handle)
+    if not valid_handle(canonical):
+        return
+    kind = classify_kind(canonical)
+    weight = ROLE_WEIGHTS[role]
+    counts[(role, "attempt")] += 1
+    if dry_run:
+        counts[(role, "would_insert")] += 1
+        return
+    cur = conn.execute(
+        """INSERT OR IGNORE INTO contribution_events
+           (handle, kind, role, weight, pr_number, claim_path, domain, channel, timestamp)
+           VALUES (?, ?, ?, ?, ?, ?, ?, ?, COALESCE(?, datetime('now')))""",
+        (canonical, kind, role, weight, pr_number, claim_path, domain, channel, timestamp),
+    )
+    if cur.rowcount > 0:
+        counts[(role, "inserted")] += 1
+    else:
+        counts[(role, "skipped_dup")] += 1
+
+
+def files_added_in_pr(pr_number: int, branch: str) -> list[str]:
+    """Best-effort: list added .md files in the PR.
+
+    Uses prs.source_path as a fallback signal (the claim being added). If the
+    branch no longer exists post-merge, this will return []; we accept the loss
+    for historical PRs where the granular per-claim events can't be recovered —
+    PR-level author/evaluator events still land correctly.
+    """
+    # Post-merge PR branches are deleted from Forgejo so we can't diff them.
+    # For the backfill we use prs.source_path — for extract/* PRs this points to
+    # the source inbox file; we can glob the claim files from the extract branch
+    # commit on main. But main's commits don't track which files a given PR touched.
+    # Accept the loss: backfill emits only PR-level events (author, evaluator,
+    # challenger/synthesizer). Originator events come from parsing claim files
+    # attributed to the branch via description field which lists claim titles.
+    return []
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dry-run", action="store_true")
+    parser.add_argument("--limit", type=int, default=0, help="Process at most N PRs (0 = all)")
+    args = parser.parse_args()
+
+    if not Path(DB_PATH).exists():
+        print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr)
+        sys.exit(1)
+
+    conn = sqlite3.connect(DB_PATH, timeout=30)
+    conn.row_factory = sqlite3.Row
+
+    # Sanity: contribution_events exists (v24 migration applied)
+    try:
+        conn.execute("SELECT 1 FROM contribution_events LIMIT 1")
+    except sqlite3.OperationalError:
+        print("ERROR: contribution_events table missing. Run migration v24 first.", file=sys.stderr)
+        sys.exit(2)
+
+    # Walk all merged knowledge PRs
+    query = """
+        SELECT number, branch, domain, source_channel, submitted_by,
+               leo_verdict, domain_verdict, domain_agent,
+               commit_type, merged_at
+        FROM prs
+        WHERE status = 'merged'
+        ORDER BY merged_at ASC
+    """
+    if args.limit:
+        query += f" LIMIT {args.limit}"
+    prs = conn.execute(query).fetchall()
+    print(f"Replaying {len(prs)} merged PRs (dry_run={args.dry_run})...")
+
+    counts: Counter = Counter()
+    repo = Path(REPO_DIR)
+
+    for pr in prs:
+        pr_number = pr["number"]
+        branch = pr["branch"] or ""
+        domain = pr["domain"]
+        channel = pr["source_channel"]
+        merged_at = pr["merged_at"]
+
+        # Skip pipeline-only branches for author credit (extract/*, reweave/*,
+        # fix/*, ingestion/*, epimetheus/*) — those are infrastructure. But
+        # evaluator credit for Leo/domain_agent still applies.
+        is_pipeline_branch = branch.startswith((
+            "extract/", "reweave/", "fix/", "ingestion/", "epimetheus/",
+        ))
+
+        # ── AUTHOR ──
+        # For pipeline branches, submitted_by carries the real author (the
+        # human who submitted the source via Telegram/etc). For agent branches,
+        # the agent is author. For external branches (gh-pr-*), git author is
+        # in submitted_by from the sync-mirror pipeline.
+        author = derive_author(conn, dict(pr))
+        if author:
+            emit(conn, counts, args.dry_run, author, "author", pr_number,
+                 None, domain, channel, merged_at)
+
+        # ── EVALUATOR ──
+        if pr["leo_verdict"] == "approve":
+            emit(conn, counts, args.dry_run, "leo", "evaluator", pr_number,
+                 None, domain, channel, merged_at)
+        if pr["domain_verdict"] == "approve" and pr["domain_agent"]:
+            dagent = pr["domain_agent"].strip().lower()
+            if dagent and dagent != "leo":
+                emit(conn, counts, args.dry_run, dagent, "evaluator", pr_number,
+                     None, domain, channel, merged_at)
+
+        # ── CHALLENGER / SYNTHESIZER from branch+commit_type ──
+        # Only fires on agent-owned branches. Pipeline branches aren't creditable
+        # work (they're machine extraction, evaluator already captures the review).
+        if branch.startswith(AGENT_BRANCH_PREFIXES):
+            prefix = branch.split("/", 1)[0].lower()
+            event_role = TRAILER_EVENT_ROLE.get(pr["commit_type"] or "")
+            if event_role:
+                emit(conn, counts, args.dry_run, prefix, event_role, pr_number,
+                     None, domain, channel, merged_at)
+
+        # ── ORIGINATOR per claim ──
+        # Walk claim files currently on main whose content was added in this PR.
+        # We can't diff old branches (deleted post-merge), but for extract PRs
+        # the source_path + description carry claim titles — too lossy to build
+        # per-claim events reliably. Strategy: walk ALL claim files that have a
+        # sourcer in their frontmatter and assign them to the PR whose
+        # source_path matches (via description or filename heuristic).
+        # DEFERRED: per-claim originator events require branch introspection
+        # that fails on deleted branches. Backfill emits PR-level events only.
+        # Forward traffic (post-deploy) gets per-claim originator events via
+        # record_contributor_attribution's added-files walk.
+
+    if not args.dry_run:
+        conn.commit()
+
+    # Originator is emitted in the claim-level pass below, not the PR-level pass.
+    # Previous summary listed it here with attempted=0 which confused operators.
+    print("\n=== PR-level events (author, evaluator, challenger, synthesizer) ===")
+    for role in ("author", "challenger", "synthesizer", "evaluator"):
+        att = counts[(role, "attempt")]
+        if args.dry_run:
+            wi = counts[(role, "would_insert")]
+            print(f"  {role:12s} attempted={att:5d} would_insert={wi:5d}")
+        else:
+            ins = counts[(role, "inserted")]
+            skip = counts[(role, "skipped_dup")]
+            print(f"  {role:12s} attempted={att:5d} inserted={ins:5d} skipped_dup={skip:5d}")
+
+    # ── Per-claim originator pass ──
+    # Walk the knowledge tree, parse sourcer attribution, and attach each claim
+    # to its merging PR via find_pr_for_claim's multi-strategy recovery.
+    # Apr 24 rewrite (Ganymede-approved): replaces the single-strategy
+    # title→description match with four strategies in reliability order.
+    # Previous script missed PRs with NULL description (Cameron #3377) and
+    # cross-context claims (Shaga's Leo research). Fallback title-match is
+    # preserved to recover anything the git-log path misses.
+    print("\n=== Claim-level originator pass ===")
+    # Build title → pr_number map from prs.description (strategy 3 fallback)
+    title_to_pr: dict[str, int] = {}
+    for r in conn.execute(
+        "SELECT number, description FROM prs WHERE status='merged' AND description IS NOT NULL AND description != ''"
+    ).fetchall():
+        desc = r["description"] or ""
+        for title in desc.split(" | "):
+            title = title.strip()
+            if title:
+                # Last-writer wins. Conflicts are rare (titles unique in practice).
+                title_to_pr[title.lower()] = r["number"]
+
+    claim_counts = Counter()
+    strategy_counts = Counter()
+    claim_count = 0
+    originator_count = 0
+    for md in sorted(repo.glob("domains/**/*.md")) + \
+              sorted(repo.glob("core/**/*.md")) + \
+              sorted(repo.glob("foundations/**/*.md")) + \
+              sorted(repo.glob("decisions/**/*.md")):
+        rel = str(md.relative_to(repo))
+        stem = md.stem
+
+        # Strategies 1, 2, 4 via the helper (sourced_from, git_subject, github_pr).
+        pr_number, strategy = find_pr_for_claim(conn, repo, md)
+
+        # Strategy 3 (fallback): title-match against prs.description.
+        if not pr_number:
+            pr_number = title_to_pr.get(stem.lower())
+            if not pr_number:
+                pr_number = title_to_pr.get(stem.replace("-", " ").lower())
+            if pr_number:
+                strategy = "title_desc"
+
+        if not pr_number:
+            claim_counts["no_pr_match"] += 1
+            continue
+
+        sourcers = extract_sourcers_from_file(md)
+        if not sourcers:
+            claim_counts["no_sourcer"] += 1
+            continue
+
+        claim_count += 1
+        strategy_counts[strategy] += 1
+        # Look up author for this PR to skip self-credit
+        pr_row = conn.execute(
+            "SELECT submitted_by, branch, domain, source_channel, merged_at FROM prs WHERE number = ?",
+            (pr_number,),
+        ).fetchone()
+        if not pr_row:
+            continue
+        author = derive_author(conn, dict(pr_row))
+        author_canonical = normalize_handle(conn, author) if author else None
+
+        for src_handle in sourcers:
+            src_canonical = normalize_handle(conn, src_handle)
+            if not valid_handle(src_canonical):
+                claim_counts["invalid_handle"] += 1
+                continue
+            if src_canonical == author_canonical:
+                claim_counts["skip_self"] += 1
+                continue
+            emit(conn, counts, args.dry_run, src_handle, "originator", pr_number,
+                 rel, pr_row["domain"], pr_row["source_channel"], pr_row["merged_at"])
+            originator_count += 1
+
+    if not args.dry_run:
+        conn.commit()
+
+    print(f"  Claims processed: {claim_count}")
+    print(f"  Originator events emitted: {originator_count}")
+    print(f"  Breakdown: {dict(claim_counts)}")
+    print(f"  Strategy hits: {dict(strategy_counts)}")
+    att = counts[("originator", "attempt")]
+    if args.dry_run:
+        wi = counts[("originator", "would_insert")]
+        print(f"  {'originator':12s} attempted={att:5d} would_insert={wi:5d}")
+    else:
+        ins = counts[("originator", "inserted")]
+        skip = counts[("originator", "skipped_dup")]
+        print(f"  {'originator':12s} attempted={att:5d} inserted={ins:5d} skipped_dup={skip:5d}")
+
+    if not args.dry_run:
+        total = conn.execute("SELECT COUNT(*) FROM contribution_events").fetchone()[0]
+        print(f"\nTotal contribution_events rows: {total}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/backfill-sourcer-attribution.py
+++ b/scripts/backfill-sourcer-attribution.py
@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+"""Backfill sourcer/extractor/etc. attribution from claim frontmatter.
+
+Walks every merged knowledge file under domains/, entities/, decisions/,
+foundations/, convictions/, core/ and re-runs the canonical attribution
+parser (lib/attribution.py). For each parsed (handle, role) pair, increments
+the corresponding *_count column on the contributors table.
+
+Why this is needed (Apr 24 incident):
+  - lib/contributor.py used a diff-line regex parser that handled neither
+    the bare-key flat format (`sourcer: alexastrum`, ~42% of claims) nor
+    the nested `attribution: { sourcer: [...] }` block format used by Leo's
+    manual extractions (Shaga's claims).
+  - Result: alexastrum, thesensatore, cameron-s1, and similar handles were
+    silently dropped at merge time. Their contributor rows either don't
+    exist or are stuck at zero counts.
+
+Usage:
+    python3 backfill-sourcer-attribution.py --dry-run    # report deltas, no writes
+    python3 backfill-sourcer-attribution.py              # apply (additive: max(db, truth))
+    python3 backfill-sourcer-attribution.py --reset      # destructive: set absolute truth
+
+Default mode is ADDITIVE for safety: per-role count is set to max(current_db, truth).
+This preserves any existing high counts that came from non-frontmatter sources
+(e.g., m3taversal.sourcer=1011 reflects Telegram-curator credit accumulated via
+a different code path; truncating to the file-walk truth would be destructive).
+
+Use --reset to set absolute truth from the file walk only — this clobbers
+all existing role counts including legitimate non-frontmatter credit.
+
+Idempotency: additive mode is safe to re-run. --reset run is gated by an
+audit_log marker; pass --force to override.
+"""
+import argparse
+import os
+import sqlite3
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+# Allow running from anywhere — point at pipeline lib
+PIPELINE_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(PIPELINE_ROOT))
+
+from lib.attribution import parse_attribution_from_file, VALID_ROLES  # noqa: E402
+
+DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
+REPO = Path(os.environ.get("REPO_DIR", "/opt/teleo-eval/workspaces/main"))
+KNOWLEDGE_PREFIXES = (
+    "domains", "entities", "decisions", "foundations", "convictions", "core",
+)
+
+
+def collect_attributions(repo_root: Path) -> dict[str, dict[str, int]]:
+    """Walk all knowledge files; return {handle: {role: count}}."""
+    counts: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+    files_scanned = 0
+    files_with_attribution = 0
+
+    for prefix in KNOWLEDGE_PREFIXES:
+        base = repo_root / prefix
+        if not base.exists():
+            continue
+        for path in base.rglob("*.md"):
+            if path.name.startswith("_"):
+                continue
+            files_scanned += 1
+            attr = parse_attribution_from_file(str(path))
+            had_any = False
+            for role, entries in attr.items():
+                for entry in entries:
+                    handle = entry.get("handle")
+                    if handle:
+                        counts[handle][role] += 1
+                        had_any = True
+            if had_any:
+                files_with_attribution += 1
+
+    print(f"  Scanned {files_scanned} knowledge files", file=sys.stderr)
+    print(f"  {files_with_attribution} had parseable attribution", file=sys.stderr)
+    return counts
+
+
+def existing_contributors(conn) -> dict[str, dict[str, int]]:
+    """Return {handle: {role: count}} from the current DB."""
+    rows = conn.execute(
+        "SELECT handle, sourcer_count, extractor_count, challenger_count, "
+        "synthesizer_count, reviewer_count, claims_merged FROM contributors"
+    ).fetchall()
+    out = {}
+    for r in rows:
+        out[r["handle"]] = {
+            "sourcer": r["sourcer_count"] or 0,
+            "extractor": r["extractor_count"] or 0,
+            "challenger": r["challenger_count"] or 0,
+            "synthesizer": r["synthesizer_count"] or 0,
+            "reviewer": r["reviewer_count"] or 0,
+            "claims_merged": r["claims_merged"] or 0,
+        }
+    return out
+
+
+def claims_merged_for(role_counts: dict[str, int]) -> int:
+    """Mirror upsert_contributor logic: claims_merged += sourcer + extractor."""
+    return role_counts.get("sourcer", 0) + role_counts.get("extractor", 0)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dry-run", action="store_true",
+                        help="Report deltas without writing")
+    parser.add_argument("--reset", action="store_true",
+                        help="Destructive: set absolute truth from file walk "
+                             "(default is additive max(db, truth))")
+    parser.add_argument("--force", action="store_true",
+                        help="Re-run even if a previous --reset marker exists")
+    args = parser.parse_args()
+
+    if not REPO.exists():
+        print(f"ERROR: repo not found at {REPO}", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"DB: {DB_PATH}", file=sys.stderr)
+    print(f"Repo: {REPO}", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("Walking knowledge tree...", file=sys.stderr)
+
+    truth = collect_attributions(REPO)
+    print(f"  Found attributions for {len(truth)} unique handles", file=sys.stderr)
+    print("", file=sys.stderr)
+
+    conn = sqlite3.connect(DB_PATH, timeout=30)
+    conn.row_factory = sqlite3.Row
+    current = existing_contributors(conn)
+
+    # Compute deltas: new handles + handles with role-count mismatches
+    new_handles: list[tuple[str, dict[str, int]]] = []
+    role_deltas: list[tuple[str, dict[str, int], dict[str, int]]] = []
+
+    for handle, roles in truth.items():
+        if handle not in current:
+            new_handles.append((handle, dict(roles)))
+        else:
+            cur = current[handle]
+            mismatches = {r: roles.get(r, 0) for r in VALID_ROLES
+                          if roles.get(r, 0) != cur.get(r, 0)}
+            if mismatches:
+                role_deltas.append((handle, dict(roles), cur))
+
+    print(f"=== {len(new_handles)} NEW contributors to insert ===")
+    for handle, roles in sorted(new_handles, key=lambda x: -sum(x[1].values()))[:20]:
+        roles_str = ", ".join(f"{r}={c}" for r, c in roles.items() if c > 0)
+        print(f"  + {handle}: {roles_str} (claims_merged={claims_merged_for(roles)})")
+    if len(new_handles) > 20:
+        print(f"  ... and {len(new_handles) - 20} more")
+    print()
+
+    print(f"=== {len(role_deltas)} EXISTING contributors with count drift ===")
+    for handle, truth_roles, cur_roles in sorted(
+        role_deltas,
+        key=lambda x: -sum(x[1].values()),
+    )[:20]:
+        for role in VALID_ROLES:
+            t = truth_roles.get(role, 0)
+            c = cur_roles.get(role, 0)
+            if t != c:
+                print(f"  ~ {handle}.{role}: db={c} → truth={t} (Δ{t - c:+d})")
+    if len(role_deltas) > 20:
+        print(f"  ... and {len(role_deltas) - 20} more")
+    print()
+
+    if args.dry_run:
+        mode = "RESET" if args.reset else "ADDITIVE"
+        print(f"Dry run ({mode} mode) — no changes written.")
+        if not args.reset:
+            print("Default is ADDITIVE: existing high counts (e.g. m3taversal=1011) preserved.")
+            print("Pass --reset to clobber existing counts with file-walk truth.")
+        return
+
+    # Idempotency: --reset is gated by audit marker. Additive mode is always safe.
+    if args.reset:
+        marker = conn.execute(
+            "SELECT 1 FROM audit_log WHERE event = 'sourcer_attribution_backfill_reset' LIMIT 1"
+        ).fetchone()
+        if marker and not args.force:
+            print("ERROR: --reset has already run (audit marker present).")
+            print("Pass --force to re-run.")
+            sys.exit(2)
+
+    inserted = 0
+    updated = 0
+    preserved_higher = 0
+    for handle, roles in truth.items():
+        truth_counts = {
+            "sourcer": roles.get("sourcer", 0),
+            "extractor": roles.get("extractor", 0),
+            "challenger": roles.get("challenger", 0),
+            "synthesizer": roles.get("synthesizer", 0),
+            "reviewer": roles.get("reviewer", 0),
+        }
+
+        if handle in current:
+            cur = current[handle]
+            if args.reset:
+                # Preserve reviewer_count even on reset (PR-level not file-level)
+                final = dict(truth_counts)
+                final["reviewer"] = max(truth_counts["reviewer"], cur.get("reviewer", 0))
+            else:
+                # Additive: max of db vs truth, per role
+                final = {
+                    role: max(truth_counts[role], cur.get(role, 0))
+                    for role in truth_counts
+                }
+                if any(cur.get(r, 0) > truth_counts[r] for r in truth_counts):
+                    preserved_higher += 1
+
+            cm = final["sourcer"] + final["extractor"]
+            conn.execute(
+                """UPDATE contributors SET
+                    sourcer_count = ?,
+                    extractor_count = ?,
+                    challenger_count = ?,
+                    synthesizer_count = ?,
+                    reviewer_count = ?,
+                    claims_merged = ?,
+                    updated_at = datetime('now')
+                WHERE handle = ?""",
+                (final["sourcer"], final["extractor"], final["challenger"],
+                 final["synthesizer"], final["reviewer"], cm, handle),
+            )
+            updated += 1
+        else:
+            cm = truth_counts["sourcer"] + truth_counts["extractor"]
+            conn.execute(
+                """INSERT INTO contributors (
+                    handle, sourcer_count, extractor_count, challenger_count,
+                    synthesizer_count, reviewer_count, claims_merged,
+                    first_contribution, last_contribution, tier
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, date('now'), date('now'), 'new')""",
+                (handle, truth_counts["sourcer"], truth_counts["extractor"],
+                 truth_counts["challenger"], truth_counts["synthesizer"],
+                 truth_counts["reviewer"], cm),
+            )
+            inserted += 1
+
+    event = "sourcer_attribution_backfill_reset" if args.reset else "sourcer_attribution_backfill"
+    conn.execute(
+        "INSERT INTO audit_log (stage, event, detail) VALUES (?, ?, ?)",
+        ("contributor", event,
+         f'{{"inserted": {inserted}, "updated": {updated}, '
+         f'"preserved_higher": {preserved_higher}, "mode": '
+         f'"{"reset" if args.reset else "additive"}"}}'),
+    )
+    conn.commit()
+    print(f"Done ({'RESET' if args.reset else 'ADDITIVE'}). "
+          f"Inserted {inserted} new, updated {updated} existing, "
+          f"preserved {preserved_higher} higher-than-truth values.")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/backfill-sources.py
+++ b/scripts/backfill-sources.py
@ -104,14 +104,22 @@ def main():
                claims_count = 0

            if rel_path in existing:
-                # Update status if different
+                # Update status if different — but never regress from terminal states.
+                # If DB says 'extracted' or 'null_result' and file happens to be in queue/
+                # (e.g., failed archive push, zombie file), the DB is authoritative.
+                # Downgrading to 'unprocessed' triggers the runaway re-extraction loop.
                current = conn.execute("SELECT status FROM sources WHERE path = ?", (rel_path,)).fetchone()
+                TERMINAL_STATUSES = {"extracted", "null_result", "error", "ghost_no_file"}
                if current and current["status"] != status:
-                    conn.execute(
-                        "UPDATE sources SET status = ?, updated_at = datetime('now') WHERE path = ?",
-                        (status, rel_path),
-                    )
-                    updated += 1
+                    if current["status"] in TERMINAL_STATUSES and status == "unprocessed":
+                        # Don't regress terminal → unprocessed. DB wins.
+                        pass
+                    else:
+                        conn.execute(
+                            "UPDATE sources SET status = ?, updated_at = datetime('now') WHERE path = ?",
+                            (status, rel_path),
+                        )
+                        updated += 1
            else:
                conn.execute(
                    """INSERT INTO sources (path, status, priority, claims_count, created_at, updated_at)
--- a/scripts/backfill-synthetic-recovery-prs.py
+++ b/scripts/backfill-synthetic-recovery-prs.py
@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""Reconstruct synthetic `prs` rows for historical GitHub PRs lost pre-mirror-wiring.
+
+Two PRs merged on GitHub before our sync-mirror.sh tracked `github_pr`:
+  - GitHub PR #68: alexastrum — 6 claims, merged 2026-03-09 via GitHub squash,
+    recovered to Forgejo via commit dba00a79 (Apr 16, after mirror erased files)
+  - GitHub PR #88: Cameron-S1 — 1 claim, recovered via commit da64f805
+
+The recovery commits wrote the files directly to main, so our `prs` table has
+no row to attach originator events to — the backfill-events.py strategies all
+return NULL. We reconstruct one synthetic `prs` row per historical GitHub PR so
+the events pipeline (and `github_pr` strategy in backfill-events) can credit
+Alex and Cameron properly.
+
+Numbers 900000+ are clearly synthetic and won't collide with real Forgejo PRs.
+
+Idempotent via INSERT OR IGNORE.
+
+Usage:
+  python3 scripts/backfill-synthetic-recovery-prs.py --dry-run
+  python3 scripts/backfill-synthetic-recovery-prs.py
+"""
+import argparse
+import os
+import sqlite3
+import sys
+from pathlib import Path
+
+DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
+
+# Historical GitHub PRs recovered via direct-to-main commits.
+# Original GitHub merge dates come from the recovery commit messages.
+RECOVERY_PRS = [
+    {
+        "number": 900068,
+        "github_pr": 68,
+        "branch": "gh-pr-68",
+        "status": "merged",
+        "domain": "ai-alignment",
+        "commit_type": "knowledge",
+        "tier": "STANDARD",
+        "leo_verdict": "approve",
+        "domain_verdict": "approve",
+        "submitted_by": "alexastrum",
+        "source_channel": "github",
+        # origin='human' matches lib/merge.py convention for external contributors
+        # (default is 'pipeline' which misclassifies us as machine-authored).
+        "origin": "human",
+        "priority": "high",
+        "description": "Multi-agent git workflows production maturity | Cryptographic agent trust ratings | Defense in depth for AI agent oversight | Deterministic policy engines below LLM layer | Knowledge validation four-layer architecture | Structurally separating proposer and reviewer agents",
+        "merged_at": "2026-03-09 00:00:00",
+        "created_at": "2026-03-08 00:00:00",
+        "last_error": "synthetic_recovery: GitHub PR #68 pre-mirror-wiring reconstruction (commit dba00a79)",
+    },
+    {
+        "number": 900088,
+        "github_pr": 88,
+        "branch": "gh-pr-88",
+        "status": "merged",
+        "domain": "ai-alignment",
+        "commit_type": "knowledge",
+        "tier": "STANDARD",
+        "leo_verdict": "approve",
+        "domain_verdict": "approve",
+        "submitted_by": "cameron-s1",
+        "source_channel": "github",
+        "origin": "human",
+        "priority": "high",
+        "description": "Orthogonality is an artefact of specification architectures not a property of intelligence itself",
+        "merged_at": "2026-04-01 00:00:00",
+        "created_at": "2026-04-01 00:00:00",
+        "last_error": "synthetic_recovery: GitHub PR #88 pre-mirror-wiring reconstruction (commit da64f805)",
+    },
+]
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dry-run", action="store_true")
+    args = parser.parse_args()
+
+    if not Path(DB_PATH).exists():
+        print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr)
+        sys.exit(1)
+
+    conn = sqlite3.connect(DB_PATH, timeout=30)
+    conn.row_factory = sqlite3.Row
+
+    # Guard against synthetic-range colonization (Ganymede review): check for
+    # any row in the synthetic range that isn't one of ours. INSERT OR IGNORE on
+    # the specific numbers is the real collision defense; this is belt-and-suspenders.
+    max_real = conn.execute(
+        "SELECT MAX(number) FROM prs WHERE number < 900000"
+    ).fetchone()[0] or 0
+    print(f"Max real Forgejo PR number: {max_real}")
+    synth_conflict = conn.execute(
+        "SELECT number FROM prs WHERE number >= 900000 AND number NOT IN (900068, 900088) LIMIT 1"
+    ).fetchone()
+    if synth_conflict:
+        print(f"ERROR: PR #{synth_conflict[0]} already exists in synthetic range. "
+              f"Pick a new range before running.", file=sys.stderr)
+        sys.exit(2)
+
+    inserted = 0
+    skipped = 0
+    for row in RECOVERY_PRS:
+        existing = conn.execute(
+            "SELECT number FROM prs WHERE number = ? OR github_pr = ?",
+            (row["number"], row["github_pr"]),
+        ).fetchone()
+        if existing:
+            print(f"  PR #{row['number']} (github_pr={row['github_pr']}): already exists — skip")
+            skipped += 1
+            continue
+        print(f"  {'(dry-run) ' if args.dry_run else ''}INSERT synthetic PR #{row['number']} "
+              f"(github_pr={row['github_pr']}, submitted_by={row['submitted_by']}, "
+              f"merged_at={row['merged_at']})")
+        if not args.dry_run:
+            conn.execute(
+                """INSERT INTO prs (
+                    number, github_pr, branch, status, domain, commit_type, tier,
+                    leo_verdict, domain_verdict, submitted_by, source_channel,
+                    origin, priority,
+                    description, merged_at, created_at, last_error
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                (
+                    row["number"], row["github_pr"], row["branch"], row["status"],
+                    row["domain"], row["commit_type"], row["tier"],
+                    row["leo_verdict"], row["domain_verdict"],
+                    row["submitted_by"], row["source_channel"],
+                    row["origin"], row["priority"],
+                    row["description"], row["merged_at"], row["created_at"],
+                    row["last_error"],
+                ),
+            )
+            inserted += 1
+
+    if not args.dry_run:
+        conn.commit()
+
+    print(f"\nInserted {inserted}, skipped {skipped}")
+    if not args.dry_run and inserted:
+        print("\nNext step: re-run backfill-events.py to attach originator events")
+        print("  python3 ops/backfill-events.py")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/classify-contributors.py
+++ b/scripts/classify-contributors.py
@ -0,0 +1,426 @@
+#!/usr/bin/env python3
+"""Classify `contributors` rows into {keep_person, keep_agent, move_to_publisher, delete_garbage}.
+
+Reads current contributors table, proposes reclassification per v26 schema design:
+  - Real humans + Pentagon agents stay in contributors (kind='person'|'agent')
+  - News orgs, publications, venues move to publishers table (new v26)
+  - Multi-word hyphenated garbage (parsing artifacts) gets deleted
+  - Their contribution_events are handled per category:
+      * Publishers: DELETE events (orgs shouldn't have credit)
+      * Garbage: DELETE events (bogus data)
+      * Persons/agents: keep events untouched
+
+Classification is heuristic — uses explicit allowlists + regex patterns + length gates.
+Ambiguous cases default to 'review_needed' (human decision).
+
+Usage:
+  python3 scripts/classify-contributors.py              # dry-run analysis + report
+  python3 scripts/classify-contributors.py --apply      # write changes
+  python3 scripts/classify-contributors.py --show <handle>  # inspect a single row
+
+Writes to pipeline.db only. Does NOT modify claim files.
+"""
+import argparse
+import json
+import os
+import re
+import sqlite3
+import sys
+from collections import Counter
+from pathlib import Path
+
+DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
+
+# Pentagon agents: kind='agent'. Authoritative list.
+PENTAGON_AGENTS = frozenset({
+    "rio", "leo", "theseus", "vida", "clay", "astra",
+    "oberon", "argus", "rhea", "ganymede", "epimetheus", "hermes", "ship",
+    "pipeline",
+})
+
+# Publisher/news-org handles seen in current contributors table.
+# Grouped by kind for the publishers row. Classified by inspection.
+# NOTE: This list is hand-curated — add to it as new orgs appear.
+PUBLISHERS_NEWS = {
+    # News outlets / brands
+    "cnbc", "al-jazeera", "axios", "bloomberg", "reuters", "bettorsinsider",
+    "fortune", "techcrunch", "coindesk", "coindesk-staff", "coindesk-research",
+    "coindesk research", "coindesk staff",
+    "defense-one", "thedefensepost", "theregister", "the-intercept",
+    "the-meridiem", "variety", "variety-staff", "variety staff", "spacenews",
+    "nasaspaceflight", "thedonkey", "insidedefense", "techpolicypress",
+    "morganlewis", "casinoorg", "deadline", "animationmagazine",
+    "defensepost", "casino-org", "casino.org",
+    "air & space forces magazine", "ieee spectrum", "techcrunch-staff",
+    "blockworks", "blockworks-staff", "decrypt", "ainvest", "banking-dive", "banking dive",
+    "cset-georgetown", "cset georgetown",
+    "kff", "kff-health-news", "kff health news", "kff-health-news---cbo",
+    "kff-health-news-/-cbo", "kff health news / cbo", "kffhealthnews",
+    "bloomberg-law",
+    "norton-rose-fulbright", "norton rose fulbright",
+    "defence-post", "the-defensepost",
+    "wilmerhale", "mofo", "sciencedirect",
+    "yogonet", "csr", "aisi-uk", "aisi", "aisi_gov", "rand",
+    "armscontrol", "eclinmed", "solana-compass", "solana compass",
+    "pmc11919318", "pmc11780016",
+    "healthverity", "natrium", "form-energy",
+    "courtlistener", "curtis-schiff", "curtis-schiff-prediction-markets",
+    "prophetx", "techpolicypress-staff",
+    "npr", "venturebeat", "geekwire", "payloadspace", "the-ankler",
+    "theankler", "tubefilter", "emarketer", "dagster",
+    "numerai",  # fund/project brand, not person
+    "psl", "multistate",
+}
+PUBLISHERS_ACADEMIC = {
+    # Academic orgs, labs, papers, journals, institutions
+    "arxiv", "metr", "metr_evals", "apollo-research", "apollo research", "apolloresearch",
+    "jacc-study-authors", "jacc-data-report-authors",
+    "anthropic-fellows-program", "anthropic-fellows",
+    "anthropic-fellows-/-alignment-science-team", "anthropic-research",
+    "jmir-2024", "jmir 2024",
+    "oettl-et-al.,-journal-of-experimental-orthopaedics",
+    "oettl et al., journal of experimental orthopaedics",
+    "jacc", "nct06548490", "pmc",
+    "conitzer-et-al.-(2024)", "aquino-michaels-2026", "pan-et-al.",
+    "pan-et-al.-'natural-language-agent-harnesses'",
+    "stanford", "stanford-meta-harness",
+    "hendershot", "annals-im",
+    "nellie-liang,-brookings-institution", "nellie liang, brookings institution",
+    "penn-state", "american-heart-association", "american heart association",
+    "molt_cornelius", "molt-cornelius",
+    # Companies / labs / brand-orgs (not specific humans)
+    "anthropic", "anthropicai", "openai", "nasa", "icrc", "ecri",
+    "epochairesearch", "metadao", "iapam", "icer",
+    "who", "ama", "uspstf", "unknown",
+    "futard.io",  # protocol/platform
+    "oxford-martin-ai-governance-initiative",
+    "oxford-martin-ai-governance",
+    "u.s.-food-and-drug-administration",
+    "jitse-goutbeek,-european-policy-centre",  # cited person+org string → publisher
+    "adepoju-et-al.",  # paper citation
+    # Formal-citation names (Firstname-Lastname or Lastname-et-al) — classified
+    # as academic citations, not reachable contributors. They'd need an @ handle
+    # to get CI credit per Cory's growth-loop design.
+    "senator-elissa-slotkin",
+    "bostrom", "hanson", "kaufmann", "noah-smith", "doug-shapiro",
+    "shayon-sengupta", "shayon sengupta",
+    "robin-hanson", "robin hanson", "eliezer-yudkowsky",
+    "leopold-aschenbrenner", "aschenbrenner",
+    "ramstead", "larsson", "heavey",
+    "dan-slimmon", "van-leeuwaarden", "ward-whitt", "adams",
+    "tamim-ansary", "spizzirri",
+    "dario-amodei",  # formal-citation form (real @ is @darioamodei)
+    "corless", "oxranga", "vlahakis",
+    # Brand/project/DAO tokens — not individuals
+    "areal-dao", "areal", "theiaresearch", "futard-io", "dhrumil",
+    # Classic formal-citation names — famous academics/economists cited by surname.
+    # Reachable via @ handle if/when they join (e.g. Ostrom has no X, Hayek deceased,
+    # Friston has an institutional affiliation not an @ handle we'd track).
+    "clayton-christensen", "hidalgo", "coase", "wiener", "juarrero",
+    "ostrom", "centola", "hayek", "marshall-mcluhan", "blackmore",
+    "knuth", "friston", "aquino-michaels", "conitzer", "bak",
+}
+# NOTE: pseudonymous X handles that MAY be real contributors stay in keep_person:
+#   karpathy, simonw, swyx, metaproph3t, metanallok, mmdhrumil, sjdedic,
+#   ceterispar1bus — these are real X accounts and match Cory's growth loop.
+# They appear without @ prefix because extraction frontmatter didn't normalize.
+# Auto-creating them as contributors tier='cited' is correct (A-path from earlier).
+PUBLISHERS_SOCIAL = {
+    "x", "twitter", "telegram", "x.com",
+}
+PUBLISHERS_INTERNAL = {
+    "teleohumanity-manifesto", "strategy-session-journal",
+    "living-capital-thesis-development", "attractor-state-historical-backtesting",
+    "web-research-compilation", "architectural-investing",
+    "governance---meritocratic-voting-+-futarchy",  # title artifact
+    "sec-interpretive-release-s7-2026-09-(march-17",  # title artifact
+    "mindstudio",  # tooling/platform, not contributor
+}
+# Merge into one kind→set map for classification
+PUBLISHER_KIND_MAP = {}
+for h in PUBLISHERS_NEWS:
+    PUBLISHER_KIND_MAP[h.lower()] = "news"
+for h in PUBLISHERS_ACADEMIC:
+    PUBLISHER_KIND_MAP[h.lower()] = "academic"
+for h in PUBLISHERS_SOCIAL:
+    PUBLISHER_KIND_MAP[h.lower()] = "social_platform"
+for h in PUBLISHERS_INTERNAL:
+    PUBLISHER_KIND_MAP[h.lower()] = "internal"
+
+
+# Garbage: handles that are clearly parse artifacts, not real names.
+# Pattern: contains parens, special chars, or >50 chars.
+def is_garbage(handle: str) -> bool:
+    h = handle.strip()
+    if len(h) > 50:
+        return True
+    if re.search(r"[()\[\]<>{}\/\\|@#$%^&*=?!:;\"']", h):
+        # But @ can appear legitimately in handles like @thesensatore — allow if @ is only prefix
+        if h.startswith("@") and not re.search(r"[()\[\]<>{}\/\\|#$%^&*=?!:;\"']", h):
+            return False
+        return True
+    # Multi-word hyphenated with very specific artifact shape: 3+ hyphens in a row or trailing noise
+    if "---" in h or "---meritocratic" in h or h.endswith("(march") or h.endswith("-(march"):
+        return True
+    return False
+
+
+def classify(handle: str) -> tuple[str, str | None]:
+    """Return (category, publisher_kind).
+
+    category ∈ {'keep_agent', 'keep_person', 'publisher', 'garbage', 'review_needed'}
+    publisher_kind ∈ {'news','academic','social_platform','internal', None}
+    """
+    h = handle.strip().lower().lstrip("@")
+
+    if h in PENTAGON_AGENTS:
+        return ("keep_agent", None)
+
+    if h in PUBLISHER_KIND_MAP:
+        return ("publisher", PUBLISHER_KIND_MAP[h])
+
+    if is_garbage(handle):
+        return ("garbage", None)
+
+    # @-prefixed handles or short-slug real-looking names → keep as person
+    # (Auto-create rule from Cory: @ handles auto-join as tier='cited'.)
+    if handle.startswith("@"):
+        return ("keep_person", None)
+
+    # Plausible handles (<=39 chars, alphanum + underscore/hyphen): treat as person.
+    # 39-char ceiling matches GitHub's handle limit and the writer path in
+    # contributor.py::_HANDLE_RE, so a valid 21-39 char real handle won't fall
+    # through to review_needed and block --apply.
+    if re.match(r"^[a-z0-9][a-z0-9_-]{0,38}$", h):
+        return ("keep_person", None)
+
+    # Everything else: needs human review
+    return ("review_needed", None)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--apply", action="store_true", help="Write changes to DB")
+    parser.add_argument("--show", type=str, help="Inspect a single handle")
+    parser.add_argument("--delete-events", action="store_true",
+                        help="DELETE contribution_events for publishers+garbage (default: keep for audit)")
+    args = parser.parse_args()
+
+    if not Path(DB_PATH).exists():
+        print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr)
+        sys.exit(1)
+
+    conn = sqlite3.connect(DB_PATH, timeout=30)
+    conn.row_factory = sqlite3.Row
+
+    # Sanity: publishers table must exist (v26 migration applied)
+    try:
+        conn.execute("SELECT 1 FROM publishers LIMIT 1")
+    except sqlite3.OperationalError:
+        print("ERROR: publishers table missing. Run migration v26 first.", file=sys.stderr)
+        sys.exit(2)
+
+    rows = conn.execute(
+        "SELECT handle, kind, tier, claims_merged FROM contributors ORDER BY claims_merged DESC"
+    ).fetchall()
+
+    if args.show:
+        target = args.show.strip().lower().lstrip("@")
+        for r in rows:
+            if r["handle"].lower().lstrip("@") == target:
+                category, pkind = classify(r["handle"])
+                events_count = conn.execute(
+                    "SELECT COUNT(*) FROM contribution_events WHERE handle = ?",
+                    (r["handle"].lower().lstrip("@"),),
+                ).fetchone()[0]
+                print(f"handle:         {r['handle']}")
+                print(f"current_kind:   {r['kind']}")
+                print(f"current_tier:   {r['tier']}")
+                print(f"claims_merged:  {r['claims_merged']}")
+                print(f"events:         {events_count}")
+                print(f"→ category:     {category}")
+                if pkind:
+                    print(f"→ publisher:    kind={pkind}")
+                return
+        print(f"No match for '{args.show}'")
+        return
+
+    # Classify all
+    buckets: dict[str, list[dict]] = {
+        "keep_agent": [],
+        "keep_person": [],
+        "publisher": [],
+        "garbage": [],
+        "review_needed": [],
+    }
+    for r in rows:
+        category, pkind = classify(r["handle"])
+        buckets[category].append({
+            "handle": r["handle"],
+            "kind_now": r["kind"],
+            "tier": r["tier"],
+            "claims": r["claims_merged"] or 0,
+            "publisher_kind": pkind,
+        })
+
+    print("=== Classification summary ===")
+    for cat, items in buckets.items():
+        print(f"  {cat:18s}  {len(items):5d}")
+
+    print("\n=== Sample of each category ===")
+    for cat, items in buckets.items():
+        print(f"\n--- {cat} (showing up to 10) ---")
+        for item in items[:10]:
+            tag = f" → {item['publisher_kind']}" if item["publisher_kind"] else ""
+            print(f"  {item['handle']:50s} claims={item['claims']:5d}{tag}")
+
+    print("\n=== Full review_needed list ===")
+    for item in buckets["review_needed"]:
+        print(f"  {item['handle']:50s} claims={item['claims']:5d}")
+
+    # Diagnostic: orphan alias count for handles we're about to delete.
+    # Contributor_aliases has no FK (SQLite FKs require PRAGMA to enforce anyway),
+    # so aliases pointing to deleted canonical handles become orphans. Surface
+    # the count so the --delete-events decision is informed.
+    doomed = [item["handle"].lower().lstrip("@") for item in buckets["garbage"] + buckets["publisher"]]
+    if doomed:
+        placeholders = ",".join("?" * len(doomed))
+        orphan_count = conn.execute(
+            f"SELECT COUNT(*) FROM contributor_aliases WHERE canonical IN ({placeholders})",
+            doomed,
+        ).fetchone()[0]
+        print(f"\n=== Alias orphan check ===")
+        print(f"  contributor_aliases rows pointing to deletable canonicals: {orphan_count}")
+        if orphan_count:
+            print(f"  (cleanup requires --delete-events; without it, aliases stay as orphans)")
+
+    if not args.apply:
+        print("\n(dry-run — no writes. Re-run with --apply to execute.)")
+        return
+
+    # ── Apply changes ──
+    print("\n=== Applying changes ===")
+    if buckets["review_needed"]:
+        print(f"ABORT: {len(buckets['review_needed'])} rows need human review. Fix classifier before --apply.")
+        sys.exit(3)
+
+    inserted_publishers = 0
+    reclassified_agents = 0
+    deleted_garbage = 0
+    deleted_publisher_rows = 0
+    deleted_events = 0
+    deleted_aliases = 0
+
+    # Single transaction — if any step errors, roll back. This prevents the failure
+    # mode where a publisher insert fails silently and we still delete the contributor
+    # row, losing data.
+    try:
+        conn.execute("BEGIN")
+
+        # 1. Insert publishers. Track which ones succeeded so step 4 only deletes those.
+        # Counter uses cur.rowcount so replay runs (where publishers already exist)
+        # report accurate inserted=0 instead of falsely claiming the full set.
+        # moved_to_publisher is unconditional — the contributors row still needs to
+        # be deleted even when the publishers row was added in a prior run.
+        moved_to_publisher = set()
+        for item in buckets["publisher"]:
+            name = item["handle"].strip().lower().lstrip("@")
+            cur = conn.execute(
+                "INSERT OR IGNORE INTO publishers (name, kind) VALUES (?, ?)",
+                (name, item["publisher_kind"]),
+            )
+            if cur.rowcount > 0:
+                inserted_publishers += 1
+            moved_to_publisher.add(item["handle"])
+
+        # 2. Ensure Pentagon agents have kind='agent' (idempotent after v25 patch)
+        for item in buckets["keep_agent"]:
+            conn.execute(
+                "UPDATE contributors SET kind = 'agent' WHERE handle = ?",
+                (item["handle"].lower().lstrip("@"),),
+            )
+            reclassified_agents += 1
+
+        # 3. Delete garbage handles from contributors (and their events + aliases)
+        for item in buckets["garbage"]:
+            canonical_lower = item["handle"].lower().lstrip("@")
+            if args.delete_events:
+                cur = conn.execute(
+                    "DELETE FROM contribution_events WHERE handle = ?",
+                    (canonical_lower,),
+                )
+                deleted_events += cur.rowcount
+                cur = conn.execute(
+                    "DELETE FROM contributor_aliases WHERE canonical = ?",
+                    (canonical_lower,),
+                )
+                deleted_aliases += cur.rowcount
+            cur = conn.execute(
+                "DELETE FROM contributors WHERE handle = ?",
+                (item["handle"],),
+            )
+            deleted_garbage += cur.rowcount
+
+        # 4. Delete publisher rows from contributors — ONLY for those successfully
+        # inserted into publishers above. Guards against partial failure.
+        # Aliases pointing to publisher-classified handles get cleaned under the
+        # same --delete-events gate: publishers live in their own table now, any
+        # leftover aliases in contributor_aliases are orphans.
+        for item in buckets["publisher"]:
+            if item["handle"] not in moved_to_publisher:
+                continue
+            canonical_lower = item["handle"].lower().lstrip("@")
+            if args.delete_events:
+                cur = conn.execute(
+                    "DELETE FROM contribution_events WHERE handle = ?",
+                    (canonical_lower,),
+                )
+                deleted_events += cur.rowcount
+                cur = conn.execute(
+                    "DELETE FROM contributor_aliases WHERE canonical = ?",
+                    (canonical_lower,),
+                )
+                deleted_aliases += cur.rowcount
+            cur = conn.execute(
+                "DELETE FROM contributors WHERE handle = ?",
+                (item["handle"],),
+            )
+            deleted_publisher_rows += cur.rowcount
+
+        # 5. Audit log entry for the destructive operation (Ganymede Q5).
+        conn.execute(
+            "INSERT INTO audit_log (timestamp, stage, event, detail) VALUES (datetime('now'), ?, ?, ?)",
+            (
+                "schema_v26",
+                "classify_contributors",
+                json.dumps({
+                    "publishers_inserted": inserted_publishers,
+                    "agents_updated": reclassified_agents,
+                    "garbage_deleted": deleted_garbage,
+                    "publisher_rows_deleted": deleted_publisher_rows,
+                    "events_deleted": deleted_events,
+                    "aliases_deleted": deleted_aliases,
+                    "delete_events_flag": bool(args.delete_events),
+                }),
+            ),
+        )
+
+        conn.commit()
+    except Exception as e:
+        conn.rollback()
+        print(f"ERROR: Transaction failed, rolled back. {e}", file=sys.stderr)
+        sys.exit(4)
+
+    print(f"  publishers inserted:          {inserted_publishers}")
+    print(f"  agents kind='agent' ensured:  {reclassified_agents}")
+    print(f"  garbage rows deleted:         {deleted_garbage}")
+    print(f"  publisher rows removed from contributors: {deleted_publisher_rows}")
+    if args.delete_events:
+        print(f"  contribution_events deleted:  {deleted_events}")
+        print(f"  contributor_aliases deleted:  {deleted_aliases}")
+    else:
+        print(f"  (events + aliases kept — re-run with --delete-events to clean them)")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/contributor-graph.py
+++ b/scripts/contributor-graph.py
@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""Generate cumulative contributor + claims PNG for Twitter embedding."""
+
+import json
+import subprocess
+import sys
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+from matplotlib.ticker import MaxNLocator
+
+ACCENT = "#00d4aa"
+PURPLE = "#7c3aed"
+BG = "#0a0a0a"
+TEXT = "#e0e0e0"
+SUBTLE = "#555555"
+OUTPUT = Path("/opt/teleo-eval/static/contributor-graph.png")
+
+
+def get_data():
+    """Fetch from local API."""
+    import urllib.request
+    with urllib.request.urlopen("http://localhost:8081/api/contributor-growth") as r:
+        return json.loads(r.read())
+
+
+def build_continuous_series(milestones, start_date, end_date):
+    """Expand milestone-only contributor data into daily series."""
+    dates = []
+    values = []
+    current = 0
+    milestone_map = {}
+    for m in milestones:
+        d = datetime.strptime(m["date"], "%Y-%m-%d").date()
+        milestone_map[d] = m["cumulative"]
+
+    d = start_date
+    while d <= end_date:
+        if d in milestone_map:
+            current = milestone_map[d]
+        dates.append(d)
+        values.append(current)
+        d += timedelta(days=1)
+    return dates, values
+
+
+def render(data, output_path):
+    fig, ax1 = plt.subplots(figsize=(12, 6.3), dpi=100)
+    fig.patch.set_facecolor(BG)
+    ax1.set_facecolor(BG)
+
+    claims = data["cumulative_claims"]
+    contribs = data["cumulative_contributors"]
+
+    claim_dates = [datetime.strptime(c["date"], "%Y-%m-%d").date() for c in claims]
+    claim_values = [c["cumulative"] for c in claims]
+
+    start = min(claim_dates)
+    end = max(claim_dates)
+
+    contrib_dates, contrib_values = build_continuous_series(contribs, start, end)
+
+    # Claims line (left y-axis)
+    ax1.fill_between(claim_dates, claim_values, alpha=0.15, color=ACCENT)
+    ax1.plot(claim_dates, claim_values, color=ACCENT, linewidth=2.5, label="Claims")
+    ax1.set_ylabel("Claims", color=ACCENT, fontsize=12, fontweight="bold")
+    ax1.tick_params(axis="y", colors=ACCENT, labelsize=10)
+    ax1.set_ylim(bottom=0)
+
+    # Contributors line (right y-axis)
+    ax2 = ax1.twinx()
+    ax2.set_facecolor("none")
+    ax2.fill_between(contrib_dates, contrib_values, alpha=0.1, color=PURPLE, step="post")
+    ax2.step(contrib_dates, contrib_values, color=PURPLE, linewidth=2.5,
+             where="post", label="Contributors")
+    ax2.set_ylabel("Contributors", color=PURPLE, fontsize=12, fontweight="bold")
+    ax2.tick_params(axis="y", colors=PURPLE, labelsize=10)
+    ax2.yaxis.set_major_locator(MaxNLocator(integer=True))
+    ax2.set_ylim(bottom=0, top=max(contrib_values) * 1.8)
+
+    # Annotate contributor milestones with staggered offsets to avoid overlap
+    offsets = {}
+    for i, m in enumerate(contribs):
+        d = datetime.strptime(m["date"], "%Y-%m-%d").date()
+        val = m["cumulative"]
+        names = [n["name"] for n in m["new"]]
+        if len(names) <= 2:
+            label = ", ".join(names)
+        else:
+            label = f"+{len(names)}"
+        y_off = 8 + (i % 2) * 14
+        ax2.annotate(label, (d, val),
+                     textcoords="offset points", xytext=(5, y_off),
+                     fontsize=7, color=PURPLE, alpha=0.8)
+
+    # Hero stats
+    total_claims = data["summary"]["total_claims"]
+    total_contribs = data["summary"]["total_contributors"]
+    days = data["summary"]["days_active"]
+    fig.text(0.14, 0.88, f"{total_claims:,} claims", fontsize=22,
+             color=ACCENT, fontweight="bold", ha="left")
+    fig.text(0.14, 0.82, f"{total_contribs} contributors · {days} days",
+             fontsize=13, color=TEXT, ha="left", alpha=0.7)
+
+    # X-axis
+    ax1.xaxis.set_major_formatter(mdates.DateFormatter("%b %d"))
+    ax1.xaxis.set_major_locator(mdates.WeekdayLocator(interval=2))
+    ax1.tick_params(axis="x", colors=SUBTLE, labelsize=9, rotation=0)
+
+    # Remove spines
+    for ax in [ax1, ax2]:
+        for spine in ax.spines.values():
+            spine.set_visible(False)
+
+    # Subtle grid on claims axis only
+    ax1.grid(axis="y", color=SUBTLE, alpha=0.2, linewidth=0.5)
+    ax1.set_axisbelow(True)
+
+    # Branding
+    fig.text(0.98, 0.02, "livingip.xyz", fontsize=9, color=SUBTLE,
+             ha="right", style="italic")
+
+    plt.tight_layout(rect=[0, 0.03, 1, 0.78])
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    fig.savefig(output_path, facecolor=BG, bbox_inches="tight", pad_inches=0.3)
+    plt.close(fig)
+    print(f"Saved to {output_path} ({output_path.stat().st_size:,} bytes)")
+
+
+if __name__ == "__main__":
+    out = Path(sys.argv[1]) if len(sys.argv) > 1 else OUTPUT
+    data = get_data()
+    render(data, out)
--- a/scripts/cumulative-growth.py
+++ b/scripts/cumulative-growth.py
@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+"""Generate cumulative growth time-series data for public dashboard.
+
+Produces JSON with three series:
+  - cumulative_contributors: unique git authors over time
+  - cumulative_claims: domain claim files added over time
+  - github_stars: star count snapshots (requires GitHub API)
+
+Data sources: git log (codex repo), GitHub API.
+Output: JSON to stdout or file, suitable for Chart.js line charts.
+
+Usage:
+  python3 cumulative-growth.py --codex-path /path/to/teleo-codex [--output /path/to/output.json]
+  python3 cumulative-growth.py --codex-path /path/to/teleo-codex --format csv
+"""
+
+import argparse
+import json
+import subprocess
+import sys
+from collections import defaultdict
+from datetime import datetime, timedelta
+
+# Map bot/service accounts to their human principal or exclude them.
+# "Teleo Agents" and "Teleo Pipeline" are bot accounts — attribute to system.
+CONTRIBUTOR_ALIASES = {
+    "Teleo Agents": None,   # system automation, not a contributor
+    "Teleo Pipeline": None, # pipeline bot
+}
+
+# Founding contributors get a badge — anyone who contributed before this date.
+FOUNDING_CUTOFF = "2026-03-15"
+
+
+def git_log_contributors(codex_path: str) -> list[dict]:
+    """Extract per-commit author and date from git log."""
+    result = subprocess.run(
+        ["git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all"],
+        capture_output=True, text=True, cwd=codex_path
+    )
+    if result.returncode != 0:
+        print(f"git log failed: {result.stderr}", file=sys.stderr)
+        sys.exit(1)
+
+    entries = []
+    for line in result.stdout.strip().split("\n"):
+        if "|" not in line:
+            continue
+        date, author = line.split("|", 1)
+        canonical = CONTRIBUTOR_ALIASES.get(author, author)
+        if canonical is None:
+            continue
+        entries.append({"date": date, "author": canonical})
+    return entries
+
+
+def git_log_claims(codex_path: str) -> list[dict]:
+    """Extract claim file additions over time from git log."""
+    result = subprocess.run(
+        ["git", "log", "--format=%ad", "--date=format:%Y-%m-%d",
+         "--all", "--diff-filter=A", "--", "domains/*.md"],
+        capture_output=True, text=True, cwd=codex_path
+    )
+    if result.returncode != 0:
+        print(f"git log failed: {result.stderr}", file=sys.stderr)
+        sys.exit(1)
+
+    counts = defaultdict(int)
+    for line in result.stdout.strip().split("\n"):
+        line = line.strip()
+        if line:
+            counts[line] += 1
+    return [{"date": d, "count": c} for d, c in sorted(counts.items())]
+
+
+def github_stars(repo: str = "living-ip/teleo-codex") -> int | None:
+    """Fetch current star count from GitHub API. Returns None on failure."""
+    try:
+        result = subprocess.run(
+            ["gh", "api", f"repos/{repo}", "--jq", ".stargazers_count"],
+            capture_output=True, text=True, timeout=10
+        )
+        if result.returncode == 0:
+            return int(result.stdout.strip())
+    except (subprocess.TimeoutExpired, ValueError):
+        pass
+    return None
+
+
+def build_cumulative_contributors(entries: list[dict]) -> list[dict]:
+    """Build cumulative unique contributor count by date."""
+    first_seen = {}
+    for e in entries:
+        author, date = e["author"], e["date"]
+        if author not in first_seen or date < first_seen[author]:
+            first_seen[author] = date
+
+    by_date = defaultdict(list)
+    for author, date in first_seen.items():
+        by_date[date].append(author)
+
+    timeline = []
+    seen = set()
+    for date in sorted(by_date.keys()):
+        new_authors = by_date[date]
+        seen.update(new_authors)
+        is_founding = date <= FOUNDING_CUTOFF
+        timeline.append({
+            "date": date,
+            "cumulative": len(seen),
+            "new": [
+                {"name": a, "founding": is_founding}
+                for a in sorted(new_authors)
+            ],
+        })
+    return timeline
+
+
+def build_cumulative_claims(claim_entries: list[dict]) -> list[dict]:
+    """Build cumulative claim count by date."""
+    timeline = []
+    cumulative = 0
+    for entry in claim_entries:
+        cumulative += entry["count"]
+        timeline.append({
+            "date": entry["date"],
+            "cumulative": cumulative,
+            "added": entry["count"],
+        })
+    return timeline
+
+
+def build_daily_commits(entries: list[dict]) -> list[dict]:
+    """Build daily commit volume by contributor."""
+    daily = defaultdict(lambda: defaultdict(int))
+    for e in entries:
+        daily[e["date"]][e["author"]] += 1
+
+    timeline = []
+    for date in sorted(daily.keys()):
+        authors = daily[date]
+        timeline.append({
+            "date": date,
+            "total": sum(authors.values()),
+            "by_contributor": dict(sorted(authors.items())),
+        })
+    return timeline
+
+
+def generate_report(codex_path: str) -> dict:
+    entries = git_log_contributors(codex_path)
+    claim_entries = git_log_claims(codex_path)
+    stars = github_stars()
+
+    contributors_timeline = build_cumulative_contributors(entries)
+    claims_timeline = build_cumulative_claims(claim_entries)
+    commits_timeline = build_daily_commits(entries)
+
+    all_contributors = set(e["author"] for e in entries)
+    founding = [
+        a for a in all_contributors
+        if any(
+            e["date"] <= FOUNDING_CUTOFF and e["author"] == a
+            for e in entries
+        )
+    ]
+
+    return {
+        "generated_at": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "summary": {
+            "total_contributors": len(all_contributors),
+            "founding_contributors": sorted(founding),
+            "total_claims": claims_timeline[-1]["cumulative"] if claims_timeline else 0,
+            "github_stars": stars,
+            "codex_start_date": "2026-03-05",
+            "days_active": (datetime.utcnow() - datetime(2026, 3, 5)).days,
+        },
+        "cumulative_contributors": contributors_timeline,
+        "cumulative_claims": claims_timeline,
+        "daily_activity": commits_timeline,
+    }
+
+
+def format_csv(report: dict) -> str:
+    lines = ["date,cumulative_contributors,cumulative_claims"]
+    contrib_map = {e["date"]: e["cumulative"] for e in report["cumulative_contributors"]}
+    claims_map = {e["date"]: e["cumulative"] for e in report["cumulative_claims"]}
+
+    all_dates = sorted(set(list(contrib_map.keys()) + list(claims_map.keys())))
+
+    last_contrib = 0
+    last_claims = 0
+    for d in all_dates:
+        last_contrib = contrib_map.get(d, last_contrib)
+        last_claims = claims_map.get(d, last_claims)
+        lines.append(f"{d},{last_contrib},{last_claims}")
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate cumulative growth data")
+    parser.add_argument("--codex-path", required=True, help="Path to teleo-codex repo")
+    parser.add_argument("--output", help="Output file path (default: stdout)")
+    parser.add_argument("--format", choices=["json", "csv"], default="json")
+    args = parser.parse_args()
+
+    report = generate_report(args.codex_path)
+
+    if args.format == "csv":
+        output = format_csv(report)
+    else:
+        output = json.dumps(report, indent=2)
+
+    if args.output:
+        with open(args.output, "w") as f:
+            f.write(output)
+        print(f"Written to {args.output}", file=sys.stderr)
+    else:
+        print(output)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/scoring_digest.py
+++ b/scripts/scoring_digest.py
@ -0,0 +1,561 @@
+#!/usr/bin/env python3
+"""Daily scoring digest — classify, score, and broadcast KB contributions.
+
+Runs daily at 8:07 AM London via cron.
+Queries pipeline.db for merged PRs in last 24h, classifies each as
+CREATE/ENRICH/CHALLENGE, scores with importance multiplier and connectivity
+bonus, updates contributors table, posts summary to Telegram.
+
+Spec: Pentagon/sprints/contribution-scoring-algorithm.md
+"""
+
+import json
+import logging
+import os
+import re
+import sqlite3
+import subprocess
+import sys
+import urllib.request
+from datetime import datetime, timezone, timedelta
+from pathlib import Path
+from zoneinfo import ZoneInfo
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+log = logging.getLogger("scoring_digest")
+
+# --- Configuration ---
+BASE_DIR = Path(os.environ.get("PIPELINE_BASE", "/opt/teleo-eval"))
+DB_PATH = BASE_DIR / "pipeline" / "pipeline.db"
+CODEX_DIR = BASE_DIR / "workspaces" / "main"
+TELEGRAM_TOKEN_FILE = BASE_DIR / "secrets" / "telegram-bot-token"
+TELEGRAM_CHAT_ID = 2091295364
+DIGEST_JSON_PATH = BASE_DIR / "logs" / "scoring-digest-latest.json"
+LONDON_TZ = ZoneInfo("Europe/London")
+
+# --- Action weights (Leo spec Apr 20) ---
+ACTION_WEIGHTS = {
+    "challenge": 0.40,
+    "create": 0.35,
+    "enrich": 0.25,
+}
+
+# --- Confidence → base importance mapping ---
+CONFIDENCE_BASE = {
+    "proven": 2.0,
+    "likely": 1.5,
+    "experimental": 1.0,
+    "speculative": 1.0,
+    "possible": 1.0,
+    "plausible": 1.0,
+    "medium": 1.5,
+}
+
+DOMAIN_CLAIM_COUNTS: dict[str, int] = {}
+ENTITY_SLUGS: set[str] = set()
+CLAIM_SLUGS: set[str] = set()
+MAP_FILES: set[str] = set()
+
+
+def _slugify(title: str) -> str:
+    s = title.lower().strip()
+    s = re.sub(r"[^\w\s-]", "", s)
+    s = re.sub(r"[\s_]+", "-", s)
+    return s.strip("-")
+
+
+def _init_link_index():
+    """Build indexes for wiki-link resolution."""
+    global ENTITY_SLUGS, CLAIM_SLUGS, MAP_FILES
+
+    entities_dir = CODEX_DIR / "entities"
+    if entities_dir.exists():
+        for f in entities_dir.glob("*.md"):
+            ENTITY_SLUGS.add(f.stem.lower())
+
+    for domain_dir in (CODEX_DIR / "domains").iterdir():
+        if not domain_dir.is_dir():
+            continue
+        for f in domain_dir.glob("*.md"):
+            CLAIM_SLUGS.add(f.stem.lower())
+        map_file = domain_dir / "_map.md"
+        if map_file.exists():
+            MAP_FILES.add("_map")
+            MAP_FILES.add(f"domains/{domain_dir.name}/_map")
+
+    for f in (CODEX_DIR / "foundations").glob("*.md") if (CODEX_DIR / "foundations").exists() else []:
+        CLAIM_SLUGS.add(f.stem.lower())
+    for f in (CODEX_DIR / "core").glob("*.md") if (CODEX_DIR / "core").exists() else []:
+        CLAIM_SLUGS.add(f.stem.lower())
+    for f in (CODEX_DIR / "decisions").glob("*.md") if (CODEX_DIR / "decisions").exists() else []:
+        CLAIM_SLUGS.add(f.stem.lower())
+
+
+def _resolve_link(link_text: str) -> bool:
+    """Check if a [[wiki-link]] resolves to a known entity, claim, or map."""
+    slug = _slugify(link_text)
+    return (
+        slug in ENTITY_SLUGS
+        or slug in CLAIM_SLUGS
+        or slug in MAP_FILES
+        or link_text.lower() in MAP_FILES
+    )
+
+
+def _count_resolved_wiki_links(file_path: Path) -> int:
+    """Count wiki-links in a claim file that resolve to real targets."""
+    if not file_path.exists():
+        return 0
+    try:
+        text = file_path.read_text(encoding="utf-8")
+    except Exception:
+        return 0
+
+    links = re.findall(r"\[\[([^\]]+)\]\]", text)
+    return sum(1 for link in links if _resolve_link(link))
+
+
+def _get_confidence(file_path: Path) -> str:
+    """Extract confidence field from claim frontmatter."""
+    if not file_path.exists():
+        return "experimental"
+    try:
+        text = file_path.read_text(encoding="utf-8")
+    except Exception:
+        return "experimental"
+
+    m = re.search(r"^confidence:\s*(\S+)", text, re.MULTILINE)
+    return m.group(1).strip() if m else "experimental"
+
+
+def _has_cross_domain_ref(file_path: Path) -> bool:
+    """Check if claim references another domain via secondary_domains or cross-domain links."""
+    if not file_path.exists():
+        return False
+    try:
+        text = file_path.read_text(encoding="utf-8")
+    except Exception:
+        return False
+
+    if re.search(r"^secondary_domains:\s*\[.+\]", text, re.MULTILINE):
+        return True
+    if re.search(r"^depends_on:", text, re.MULTILINE):
+        return True
+    return False
+
+
+def _has_challenged_by(file_path: Path) -> bool:
+    """Check if claim has challenged_by field."""
+    if not file_path.exists():
+        return False
+    try:
+        text = file_path.read_text(encoding="utf-8")
+    except Exception:
+        return False
+    return bool(re.search(r"^challenged_by:", text, re.MULTILINE))
+
+
+def _get_domain_weight(domain: str) -> float:
+    """Domain maturity weight: sparse domains get bonus, mature domains get discount."""
+    count = DOMAIN_CLAIM_COUNTS.get(domain, 0)
+    if count < 20:
+        return 1.5
+    elif count > 50:
+        return 0.8
+    return 1.0
+
+
+def _init_domain_counts():
+    """Count claims per domain."""
+    global DOMAIN_CLAIM_COUNTS
+    domains_dir = CODEX_DIR / "domains"
+    if not domains_dir.exists():
+        return
+    for domain_dir in domains_dir.iterdir():
+        if domain_dir.is_dir():
+            count = sum(1 for f in domain_dir.glob("*.md") if f.name != "_map.md")
+            DOMAIN_CLAIM_COUNTS[domain_dir.name] = count
+
+
+def _normalize_contributor(submitted_by: str | None, agent: str | None, branch: str | None = None) -> str:
+    """Normalize contributor handle — strip @, map agent self-directed to agent name.
+
+    For fork PRs (contrib/NAME/...), extract contributor from branch name.
+    """
+    if branch and branch.startswith("contrib/"):
+        parts = branch.split("/")
+        if len(parts) >= 2 and parts[1]:
+            return parts[1].lower()
+
+    raw = submitted_by or agent or "unknown"
+    raw = raw.strip()
+    if raw.startswith("@"):
+        raw = raw[1:]
+    if " (self-directed)" in raw:
+        raw = raw.replace(" (self-directed)", "")
+    if raw in ("pipeline", ""):
+        return agent.strip() if agent and agent.strip() not in ("pipeline", "") else "pipeline"
+    return raw
+
+
+def classify_pr(pr: dict) -> str | None:
+    """Classify a merged PR as create/enrich/challenge or None (skip).
+
+    Uses branch name pattern + commit_type as primary signal.
+    Falls back to file-level analysis for ambiguous cases.
+    """
+    branch = pr.get("branch", "")
+    commit_type = pr.get("commit_type", "")
+
+    if commit_type in ("pipeline", "entity"):
+        return None
+
+    if "challenge" in branch.lower():
+        return "challenge"
+
+    if branch.startswith("extract/") or branch.startswith("research-"):
+        return "create"
+
+    if "reweave" in branch.lower() or "enrich" in branch.lower():
+        return "enrich"
+
+    if commit_type == "research":
+        return "create"
+
+    if commit_type == "reweave":
+        return "enrich"
+
+    if commit_type == "fix":
+        return "enrich"
+
+    if commit_type == "knowledge":
+        return "create"
+
+    return "create"
+
+
+def _find_claim_file(pr: dict) -> Path | None:
+    """Find the claim file for a merged PR."""
+    domain = pr.get("domain")
+    branch = pr.get("branch", "")
+
+    if not domain:
+        return None
+
+    domain_dir = CODEX_DIR / "domains" / domain
+    if not domain_dir.exists():
+        return None
+
+    slug_part = branch.split("/")[-1] if "/" in branch else branch
+    slug_part = re.sub(r"-[a-f0-9]{4}$", "", slug_part)
+
+    for claim_file in domain_dir.glob("*.md"):
+        if claim_file.name == "_map.md":
+            continue
+        claim_slug = _slugify(claim_file.stem)
+        if slug_part and slug_part in claim_slug:
+            return claim_file
+
+    return None
+
+
+def score_contribution(action_type: str, claim_file: Path | None, domain: str) -> tuple[float, dict]:
+    """Compute CI points for a single contribution.
+
+    Returns (score, breakdown_dict) for transparency.
+    """
+    weight = ACTION_WEIGHTS[action_type]
+
+    confidence = _get_confidence(claim_file) if claim_file else "experimental"
+    base = CONFIDENCE_BASE.get(confidence, 1.0)
+
+    if action_type == "challenge" and claim_file and _has_challenged_by(claim_file):
+        base = 3.0 if confidence in ("proven",) else 2.5
+
+    domain_weight = _get_domain_weight(domain)
+
+    connectivity = 0.0
+    if claim_file and _has_cross_domain_ref(claim_file):
+        connectivity += 0.2
+
+    create_multiplier = 1.0
+    resolved_links = 0
+    if action_type == "create" and claim_file:
+        resolved_links = _count_resolved_wiki_links(claim_file)
+        if resolved_links >= 3:
+            create_multiplier = 1.5
+
+    importance = base * domain_weight + connectivity
+    score = weight * importance * create_multiplier
+
+    return score, {
+        "action": action_type,
+        "weight": weight,
+        "confidence": confidence,
+        "base": base,
+        "domain_weight": domain_weight,
+        "connectivity_bonus": connectivity,
+        "create_multiplier": create_multiplier,
+        "resolved_links": resolved_links,
+        "importance": importance,
+        "score": round(score, 4),
+    }
+
+
+def collect_and_score(hours: int = 24) -> dict:
+    """Main scoring pipeline: collect merged PRs, classify, score."""
+    _init_domain_counts()
+    _init_link_index()
+
+    cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
+
+    conn = sqlite3.connect(str(DB_PATH))
+    conn.row_factory = sqlite3.Row
+    try:
+        rows = conn.execute(
+            """SELECT number, branch, domain, agent, commit_type, merged_at,
+                      submitted_by, description
+               FROM prs
+               WHERE status = 'merged' AND merged_at >= ?
+               ORDER BY merged_at DESC""",
+            (cutoff,),
+        ).fetchall()
+    finally:
+        conn.close()
+
+    contributions = []
+    contributor_deltas: dict[str, float] = {}
+    domain_activity: dict[str, int] = {}
+    action_counts = {"create": 0, "enrich": 0, "challenge": 0}
+
+    for row in rows:
+        pr = dict(row)
+        action_type = classify_pr(pr)
+        if action_type is None:
+            continue
+
+        claim_file = _find_claim_file(pr)
+        domain = pr.get("domain", "unknown")
+        score, breakdown = score_contribution(action_type, claim_file, domain)
+
+        contributor = _normalize_contributor(
+            pr.get("submitted_by"), pr.get("agent"), pr.get("branch")
+        )
+        contributor_deltas[contributor] = contributor_deltas.get(contributor, 0) + score
+        domain_activity[domain] = domain_activity.get(domain, 0) + 1
+        action_counts[action_type] = action_counts.get(action_type, 0) + 1
+
+        contributions.append({
+            "pr_number": pr["number"],
+            "contributor": contributor,
+            "agent": pr.get("agent", ""),
+            "domain": domain,
+            "action": action_type,
+            "score": round(score, 4),
+            "breakdown": breakdown,
+            "description": pr.get("description", ""),
+            "merged_at": pr.get("merged_at", ""),
+        })
+
+    total_claims = sum(DOMAIN_CLAIM_COUNTS.values())
+
+    return {
+        "period_hours": hours,
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "date": datetime.now(LONDON_TZ).strftime("%B %d, %Y"),
+        "contributions": contributions,
+        "contributor_deltas": {k: round(v, 4) for k, v in sorted(
+            contributor_deltas.items(), key=lambda x: -x[1]
+        )},
+        "domain_activity": dict(sorted(domain_activity.items(), key=lambda x: -x[1])),
+        "action_counts": action_counts,
+        "total_contributions": len(contributions),
+        "total_ci_awarded": round(sum(c["score"] for c in contributions), 4),
+        "kb_state": {
+            "total_claims": total_claims,
+            "domains": len(DOMAIN_CLAIM_COUNTS),
+            "domain_breakdown": dict(DOMAIN_CLAIM_COUNTS),
+        },
+    }
+
+
+def update_contributors(digest: dict):
+    """Write CI deltas to contributors table."""
+    if not digest["contributor_deltas"]:
+        return
+
+    conn = sqlite3.connect(str(DB_PATH))
+    try:
+        for handle, delta in digest["contributor_deltas"].items():
+            conn.execute(
+                """INSERT INTO contributors (handle, claims_merged, created_at, updated_at)
+                   VALUES (?, 0, datetime('now'), datetime('now'))
+                   ON CONFLICT(handle) DO UPDATE SET updated_at = datetime('now')""",
+                (handle,),
+            )
+        conn.commit()
+    finally:
+        conn.close()
+
+    log.info("Updated %d contributor records", len(digest["contributor_deltas"]))
+
+
+def save_scores_to_db(digest: dict):
+    """Write individual contribution scores to contribution_scores table."""
+    conn = sqlite3.connect(str(DB_PATH))
+    try:
+        conn.execute("""CREATE TABLE IF NOT EXISTS contribution_scores (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            pr_number INTEGER UNIQUE,
+            contributor TEXT NOT NULL,
+            event_type TEXT CHECK(event_type IN ('create','enrich','challenge')),
+            ci_earned REAL,
+            claim_slug TEXT,
+            domain TEXT,
+            scored_at TEXT NOT NULL
+        )""")
+        for c in digest["contributions"]:
+            slug = (c.get("description") or "")[:200] or c.get("breakdown", {}).get("action", "")
+            conn.execute(
+                """INSERT INTO contribution_scores (pr_number, contributor, event_type, ci_earned, claim_slug, domain, scored_at)
+                   VALUES (?, ?, ?, ?, ?, ?, ?)
+                   ON CONFLICT(pr_number) DO UPDATE SET
+                     contributor = excluded.contributor,
+                     ci_earned = excluded.ci_earned,
+                     event_type = excluded.event_type,
+                     scored_at = excluded.scored_at""",
+                (c["pr_number"], c["contributor"], c["action"], c["score"], slug, c["domain"], c["merged_at"]),
+            )
+        conn.commit()
+        log.info("Wrote %d contribution scores to DB", len(digest["contributions"]))
+    finally:
+        conn.close()
+
+
+def save_digest_json(digest: dict):
+    """Save latest digest as JSON for API consumption."""
+    DIGEST_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
+    with open(DIGEST_JSON_PATH, "w") as f:
+        json.dump(digest, f, indent=2, default=str)
+    log.info("Saved digest to %s", DIGEST_JSON_PATH)
+
+
+def send_telegram(digest: dict):
+    """Post digest summary to Telegram."""
+    token_file = TELEGRAM_TOKEN_FILE
+    if not token_file.exists():
+        log.warning("Telegram token not found at %s", token_file)
+        return
+
+    token = token_file.read_text().strip()
+
+    lines = [f"📊 *Daily KB Digest — {digest['date']}*", ""]
+
+    if digest["contributions"]:
+        lines.append(f"*NEW CONTRIBUTIONS* (last {digest['period_hours']}h):")
+        action_emoji = {"challenge": "⚔️", "create": "🆕", "enrich": "📚"}
+
+        by_contributor: dict[str, list] = {}
+        for c in digest["contributions"]:
+            name = c["contributor"]
+            by_contributor.setdefault(name, []).append(c)
+
+        for name, contribs in sorted(by_contributor.items(), key=lambda x: -sum(c["score"] for c in x[1])):
+            total_score = sum(c["score"] for c in contribs)
+            actions = {}
+            for c in contribs:
+                actions[c["action"]] = actions.get(c["action"], 0) + 1
+
+            action_summary = ", ".join(
+                f"{action_emoji.get(a, '•')} {n} {a}" for a, n in sorted(actions.items(), key=lambda x: -x[1])
+            )
+            lines.append(f"  {name}: {action_summary} → +{total_score:.2f} CI")
+
+        lines.append("")
+
+    lines.append("*KB STATE:*")
+    kb = digest["kb_state"]
+    ac = digest["action_counts"]
+    lines.append(
+        f"Claims: {kb['total_claims']} (+{digest['total_contributions']}) | "
+        f"Domains: {kb['domains']}"
+    )
+    lines.append(
+        f"Creates: {ac.get('create', 0)} | "
+        f"Enrichments: {ac.get('enrich', 0)} | "
+        f"Challenges: {ac.get('challenge', 0)}"
+    )
+
+    if digest["domain_activity"]:
+        top_domain = max(digest["domain_activity"], key=digest["domain_activity"].get)
+        lines.append(f"Most active: {top_domain} ({digest['domain_activity'][top_domain]} events)")
+
+    if digest["contributor_deltas"]:
+        lines.append("")
+        lines.append("*LEADERBOARD CHANGE:*")
+        for i, (name, delta) in enumerate(digest["contributor_deltas"].items(), 1):
+            if i > 5:
+                break
+            lines.append(f"  #{i} {name} +{delta:.2f} CI")
+
+    text = "\n".join(lines)
+
+    url = f"https://api.telegram.org/bot{token}/sendMessage"
+    payload = json.dumps({
+        "chat_id": TELEGRAM_CHAT_ID,
+        "text": text,
+        "parse_mode": "Markdown",
+    }).encode("utf-8")
+
+    req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            result = json.loads(resp.read())
+            if result.get("ok"):
+                log.info("Telegram digest sent successfully")
+            else:
+                log.error("Telegram API error: %s", result)
+    except Exception as e:
+        log.error("Failed to send Telegram message: %s", e)
+
+
+def main():
+    hours = int(sys.argv[1]) if len(sys.argv) > 1 else 24
+    dry_run = "--dry-run" in sys.argv
+    no_telegram = "--no-telegram" in sys.argv
+
+    log.info("Running scoring digest for last %dh (dry_run=%s)", hours, dry_run)
+
+    digest = collect_and_score(hours)
+
+    log.info(
+        "Scored %d contributions: %d create, %d enrich, %d challenge → %.2f total CI",
+        digest["total_contributions"],
+        digest["action_counts"]["create"],
+        digest["action_counts"]["enrich"],
+        digest["action_counts"]["challenge"],
+        digest["total_ci_awarded"],
+    )
+
+    for name, delta in digest["contributor_deltas"].items():
+        log.info("  %s: +%.4f CI", name, delta)
+
+    if dry_run:
+        print(json.dumps(digest, indent=2, default=str))
+        return
+
+    save_digest_json(digest)
+    save_scores_to_db(digest)
+    update_contributors(digest)
+
+    if not no_telegram:
+        send_telegram(digest)
+
+    log.info("Digest complete")
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_attribution.py
+++ b/tests/test_attribution.py
@ -34,13 +34,34 @@ class TestParseAttribution:
        assert result["extractor"][0]["handle"] == "rio"
        assert result["sourcer"][0]["handle"] == "theiaresearch"

-    def test_legacy_source_fallback(self):
+    def test_legacy_source_fallback_removed(self):
+        """Legacy `source` heuristic removed (Ganymede review, Apr 24).
+
+        It fabricated handles from descriptive strings (garbage like
+        'sec-interpretive-release-s7-2026-09-(march-17'). Claims without
+        explicit attribution now return empty — better to surface as data
+        hygiene than invent contributors.
+        """
        fm = {
            "type": "claim",
            "source": "@pineanalytics, Q4 2025 report",
        }
        result = parse_attribution(fm)
-        assert result["sourcer"][0]["handle"] == "pineanalytics"
+        assert all(len(v) == 0 for v in result.values())
+
+    def test_bad_handles_filtered(self):
+        """Handles with spaces, parens, or garbage chars are dropped."""
+        fm = {
+            "sourcer": "governance---meritocratic-voting-+-futarchy",
+        }
+        result = parse_attribution(fm)
+        assert len(result["sourcer"]) == 0
+
+    def test_valid_handle_with_hyphen_passes(self):
+        """Legitimate handles like 'cameron-s1' survive the filter."""
+        fm = {"sourcer": "cameron-s1"}
+        result = parse_attribution(fm)
+        assert result["sourcer"][0]["handle"] == "cameron-s1"

    def test_empty_attribution(self):
        fm = {"type": "claim"}