From af027d3ced277638f2c8c75b95aa3bf9af267325 Mon Sep 17 00:00:00 2001
From: m3taversal <m3taversal@gmail.com>
Date: Tue, 21 Apr 2026 11:22:13 +0100
Subject: [PATCH] feat: add contributor profile API endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GET /api/contributors/{handle} — returns CI score, badges, domain
breakdown, role percentages, contribution timeline, review stats.
GET /api/contributors/list — leaderboard with min_claims filter.

Git-log fallback for contributors not in pipeline.db (Cameron, Alex).
Badge system: FOUNDING CONTRIBUTOR, BELIEF MOVER, KNOWLEDGE SOURCER,
DOMAIN SPECIALIST, VETERAN, FIRST BLOOD.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diagnostics/contributor_profile_api.py | 314 +++++++++++++++++++++++++
 1 file changed, 314 insertions(+)
 create mode 100644 diagnostics/contributor_profile_api.py

diff --git a/diagnostics/contributor_profile_api.py b/diagnostics/contributor_profile_api.py
new file mode 100644
index 0000000..ac80e23
--- /dev/null
+++ b/diagnostics/contributor_profile_api.py
@@ -0,0 +1,314 @@
+"""Contributor profile API — GET /api/contributors/{handle}"""
+
+import sqlite3
+import json
+import os
+import subprocess
+from datetime import datetime
+
+DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
+CODEX_PATH = "/opt/teleo-eval/workspaces/main"
+
+CI_WEIGHTS = {
+    "sourcer": 0.15,
+    "extractor": 0.05,
+    "challenger": 0.35,
+    "synthesizer": 0.25,
+    "reviewer": 0.20,
+}
+
+FOUNDING_CUTOFF = "2026-03-15"
+
+BADGE_DEFS = {
+    "FOUNDING CONTRIBUTOR": {"rarity": "limited", "desc": "Contributed during pre-launch phase"},
+    "BELIEF MOVER": {"rarity": "rare", "desc": "Challenge that led to a claim revision"},
+    "KNOWLEDGE SOURCER": {"rarity": "uncommon", "desc": "Source that generated 3+ claims"},
+    "DOMAIN SPECIALIST": {"rarity": "rare", "desc": "Top 3 CI contributor in a domain"},
+    "VETERAN": {"rarity": "uncommon", "desc": "10+ accepted contributions"},
+    "FIRST BLOOD": {"rarity": "common", "desc": "First contribution of any kind"},
+    "CONTRIBUTOR": {"rarity": "common", "desc": "Account created + first accepted contribution"},
+}
+
+
+def _get_conn():
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+
+def _compute_ci(row):
+    total = 0
+    for role, weight in CI_WEIGHTS.items():
+        total += (row.get(f"{role}_count", 0) or 0) * weight
+    return round(total, 2)
+
+
+def _compute_badges(handle, row, domain_breakdown, conn):
+    badges = []
+    first = row.get("first_contribution", "")
+
+    if first and first <= FOUNDING_CUTOFF:
+        badges.append("FOUNDING CONTRIBUTOR")
+
+    claims = row.get("claims_merged", 0) or 0
+    if claims > 0:
+        badges.append("CONTRIBUTOR")
+        badges.append("FIRST BLOOD")
+
+    if claims >= 10:
+        badges.append("VETERAN")
+
+    challenger = row.get("challenger_count", 0) or 0
+    if challenger > 0:
+        badges.append("BELIEF MOVER")
+
+    sourcer = row.get("sourcer_count", 0) or 0
+    if sourcer >= 3:
+        badges.append("KNOWLEDGE SOURCER")
+
+    return badges
+
+
+def _get_domain_breakdown(handle, conn):
+    rows = conn.execute("""
+        SELECT domain, COUNT(*) as cnt
+        FROM prs
+        WHERE status='merged' AND (LOWER(agent)=LOWER(?) OR LOWER(submitted_by)=LOWER(?))
+        AND domain IS NOT NULL
+        GROUP BY domain ORDER BY cnt DESC
+    """, (handle, handle)).fetchall()
+    return {r["domain"]: r["cnt"] for r in rows}
+
+
+def _get_contribution_timeline(handle, conn, limit=20):
+    rows = conn.execute("""
+        SELECT number, domain, status, created_at, description, commit_type, source_path
+        FROM prs
+        WHERE status='merged' AND (LOWER(agent)=LOWER(?) OR LOWER(submitted_by)=LOWER(?))
+        ORDER BY created_at DESC LIMIT ?
+    """, (handle, handle, limit)).fetchall()
+
+    timeline = []
+    for r in rows:
+        desc = r["description"] or ""
+        if not desc and r["source_path"]:
+            desc = os.path.basename(r["source_path"]).replace("-", " ").replace(".md", "")
+        timeline.append({
+            "pr_number": r["number"],
+            "domain": r["domain"],
+            "date": r["created_at"][:10] if r["created_at"] else None,
+            "type": _classify_commit(r["commit_type"]),
+            "summary": desc[:200] if desc else None,
+        })
+    return timeline
+
+
+def _classify_commit(commit_type):
+    if not commit_type:
+        return "create"
+    ct = commit_type.lower()
+    if "challenge" in ct:
+        return "challenge"
+    if "enrich" in ct or "update" in ct or "reweave" in ct:
+        return "enrich"
+    return "create"
+
+
+def _get_review_stats(handle, conn):
+    rows = conn.execute("""
+        SELECT outcome, COUNT(*) as cnt
+        FROM review_records
+        WHERE LOWER(agent) = LOWER(?)
+        GROUP BY outcome
+    """, (handle,)).fetchall()
+    stats = {}
+    for r in rows:
+        stats[r["outcome"]] = r["cnt"]
+    return stats
+
+
+def _get_git_contributor(handle):
+    """Fallback: check git log for contributors not in pipeline.db."""
+    try:
+        result = subprocess.run(
+            ["git", "log", "--all", "--format=%H|%an|%ae|%aI", "--diff-filter=A", "--", "domains/"],
+            capture_output=True, text=True, cwd=CODEX_PATH, timeout=30
+        )
+        if result.returncode != 0:
+            return None
+
+        claims = []
+        for line in result.stdout.strip().split("\n"):
+            if not line:
+                continue
+            parts = line.split("|", 3)
+            if len(parts) < 4:
+                continue
+            sha, name, email, date = parts
+            if handle.lower() in name.lower() or handle.lower() in email.lower():
+                claims.append({"sha": sha, "author": name, "email": email, "date": date[:10]})
+
+        if not claims:
+            return None
+
+        return {
+            "handle": handle,
+            "display_name": claims[0]["author"],
+            "email": claims[0]["email"],
+            "first_contribution": min(c["date"] for c in claims),
+            "last_contribution": max(c["date"] for c in claims),
+            "claims_merged": len(claims),
+            "sourcer_count": 0,
+            "extractor_count": 0,
+            "challenger_count": 0,
+            "synthesizer_count": 0,
+            "reviewer_count": 0,
+        }
+    except Exception:
+        return None
+
+
+def get_contributor_profile(handle):
+    conn = _get_conn()
+    try:
+        row = conn.execute(
+            "SELECT * FROM contributors WHERE LOWER(handle) = LOWER(?)", (handle,)
+        ).fetchone()
+
+        if row:
+            data = dict(row)
+        else:
+            git_data = _get_git_contributor(handle)
+            if git_data:
+                data = git_data
+            else:
+                return None
+
+        ci_score = _compute_ci(data)
+        domain_breakdown = _get_domain_breakdown(handle, conn)
+        timeline = _get_contribution_timeline(handle, conn)
+        review_stats = _get_review_stats(handle, conn)
+        badges = _compute_badges(handle, data, domain_breakdown, conn)
+
+        # For git-only contributors, build domain breakdown from git
+        if not domain_breakdown and not row:
+            domain_breakdown = _git_domain_breakdown(handle)
+
+        hero_badge = None
+        rarity_order = ["limited", "rare", "uncommon", "common"]
+        for rarity in rarity_order:
+            for b in badges:
+                if BADGE_DEFS.get(b, {}).get("rarity") == rarity:
+                    hero_badge = b
+                    break
+            if hero_badge:
+                break
+
+        role_breakdown = {
+            "sourcer": data.get("sourcer_count", 0) or 0,
+            "extractor": data.get("extractor_count", 0) or 0,
+            "challenger": data.get("challenger_count", 0) or 0,
+            "synthesizer": data.get("synthesizer_count", 0) or 0,
+            "reviewer": data.get("reviewer_count", 0) or 0,
+        }
+        total_roles = sum(role_breakdown.values())
+        role_pct = {}
+        for k, v in role_breakdown.items():
+            role_pct[k] = round(v / total_roles * 100) if total_roles > 0 else 0
+
+        return {
+            "handle": data.get("handle", handle),
+            "display_name": data.get("display_name"),
+            "ci_score": ci_score,
+            "hero_badge": hero_badge,
+            "badges": [{"name": b, **BADGE_DEFS.get(b, {})} for b in badges],
+            "joined": data.get("first_contribution"),
+            "last_active": data.get("last_contribution"),
+            "claims_merged": data.get("claims_merged", 0) or 0,
+            "principal": data.get("principal"),
+            "role_breakdown": role_breakdown,
+            "role_percentages": role_pct,
+            "domain_breakdown": domain_breakdown,
+            "review_stats": review_stats,
+            "contribution_timeline": timeline,
+            "active_domains": list(domain_breakdown.keys()),
+        }
+    finally:
+        conn.close()
+
+
+def _git_domain_breakdown(handle):
+    """For git-only contributors, count claims by domain from file paths."""
+    try:
+        result = subprocess.run(
+            ["git", "log", "--all", "--name-only", "--format=COMMIT|%an", "--diff-filter=A", "--", "domains/"],
+            capture_output=True, text=True, cwd=CODEX_PATH, timeout=30
+        )
+        if result.returncode != 0:
+            return {}
+
+        domains = {}
+        current_match = False
+        for line in result.stdout.strip().split("\n"):
+            if line.startswith("COMMIT|"):
+                author = line.split("|", 1)[1]
+                current_match = handle.lower() in author.lower()
+            elif current_match and line.startswith("domains/"):
+                parts = line.split("/")
+                if len(parts) >= 2:
+                    domain = parts[1]
+                    domains[domain] = domains.get(domain, 0) + 1
+
+        return domains
+    except Exception:
+        return {}
+
+
+async def handle_contributor_profile(request):
+    from aiohttp import web
+    handle = request.match_info["handle"]
+    profile = get_contributor_profile(handle)
+    if profile is None:
+        return web.json_response({"error": f"Contributor '{handle}' not found"}, status=404)
+    return web.json_response(profile)
+
+
+async def handle_contributors_list(request):
+    from aiohttp import web
+    conn = _get_conn()
+    try:
+        min_claims = int(request.query.get("min_claims", "1"))
+        rows = conn.execute("""
+            SELECT handle, display_name, first_contribution, last_contribution, 
+                   sourcer_count, extractor_count, challenger_count, synthesizer_count,
+                   reviewer_count, claims_merged, principal
+            FROM contributors
+            WHERE claims_merged >= ?
+            ORDER BY claims_merged DESC
+        """, (min_claims,)).fetchall()
+
+        contributors = []
+        for r in rows:
+            data = dict(r)
+            ci = _compute_ci(data)
+            contributors.append({
+                "handle": data["handle"],
+                "display_name": data["display_name"],
+                "ci_score": ci,
+                "claims_merged": data["claims_merged"],
+                "first_contribution": data["first_contribution"],
+                "last_contribution": data["last_contribution"],
+                "principal": data["principal"],
+            })
+
+        return web.json_response({
+            "contributors": contributors,
+            "total": len(contributors),
+        })
+    finally:
+        conn.close()
+
+
+def register_contributor_routes(app):
+    app.router.add_get("/api/contributors/list", handle_contributors_list)
+    app.router.add_get("/api/contributors/{handle}", handle_contributor_profile)