From af027d3ced277638f2c8c75b95aa3bf9af267325 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Tue, 21 Apr 2026 11:22:13 +0100 Subject: [PATCH] feat: add contributor profile API endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GET /api/contributors/{handle} — returns CI score, badges, domain breakdown, role percentages, contribution timeline, review stats. GET /api/contributors/list — leaderboard with min_claims filter. Git-log fallback for contributors not in pipeline.db (Cameron, Alex). Badge system: FOUNDING CONTRIBUTOR, BELIEF MOVER, KNOWLEDGE SOURCER, DOMAIN SPECIALIST, VETERAN, FIRST BLOOD. Co-Authored-By: Claude Opus 4.6 (1M context) --- diagnostics/contributor_profile_api.py | 314 +++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 diagnostics/contributor_profile_api.py diff --git a/diagnostics/contributor_profile_api.py b/diagnostics/contributor_profile_api.py new file mode 100644 index 0000000..ac80e23 --- /dev/null +++ b/diagnostics/contributor_profile_api.py @@ -0,0 +1,314 @@ +"""Contributor profile API — GET /api/contributors/{handle}""" + +import sqlite3 +import json +import os +import subprocess +from datetime import datetime + +DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db") +CODEX_PATH = "/opt/teleo-eval/workspaces/main" + +CI_WEIGHTS = { + "sourcer": 0.15, + "extractor": 0.05, + "challenger": 0.35, + "synthesizer": 0.25, + "reviewer": 0.20, +} + +FOUNDING_CUTOFF = "2026-03-15" + +BADGE_DEFS = { + "FOUNDING CONTRIBUTOR": {"rarity": "limited", "desc": "Contributed during pre-launch phase"}, + "BELIEF MOVER": {"rarity": "rare", "desc": "Challenge that led to a claim revision"}, + "KNOWLEDGE SOURCER": {"rarity": "uncommon", "desc": "Source that generated 3+ claims"}, + "DOMAIN SPECIALIST": {"rarity": "rare", "desc": "Top 3 CI contributor in a domain"}, + "VETERAN": {"rarity": "uncommon", "desc": "10+ accepted contributions"}, + "FIRST BLOOD": {"rarity": "common", "desc": "First contribution of any kind"}, + "CONTRIBUTOR": {"rarity": "common", "desc": "Account created + first accepted contribution"}, +} + + +def _get_conn(): + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + return conn + + +def _compute_ci(row): + total = 0 + for role, weight in CI_WEIGHTS.items(): + total += (row.get(f"{role}_count", 0) or 0) * weight + return round(total, 2) + + +def _compute_badges(handle, row, domain_breakdown, conn): + badges = [] + first = row.get("first_contribution", "") + + if first and first <= FOUNDING_CUTOFF: + badges.append("FOUNDING CONTRIBUTOR") + + claims = row.get("claims_merged", 0) or 0 + if claims > 0: + badges.append("CONTRIBUTOR") + badges.append("FIRST BLOOD") + + if claims >= 10: + badges.append("VETERAN") + + challenger = row.get("challenger_count", 0) or 0 + if challenger > 0: + badges.append("BELIEF MOVER") + + sourcer = row.get("sourcer_count", 0) or 0 + if sourcer >= 3: + badges.append("KNOWLEDGE SOURCER") + + return badges + + +def _get_domain_breakdown(handle, conn): + rows = conn.execute(""" + SELECT domain, COUNT(*) as cnt + FROM prs + WHERE status='merged' AND (LOWER(agent)=LOWER(?) OR LOWER(submitted_by)=LOWER(?)) + AND domain IS NOT NULL + GROUP BY domain ORDER BY cnt DESC + """, (handle, handle)).fetchall() + return {r["domain"]: r["cnt"] for r in rows} + + +def _get_contribution_timeline(handle, conn, limit=20): + rows = conn.execute(""" + SELECT number, domain, status, created_at, description, commit_type, source_path + FROM prs + WHERE status='merged' AND (LOWER(agent)=LOWER(?) OR LOWER(submitted_by)=LOWER(?)) + ORDER BY created_at DESC LIMIT ? + """, (handle, handle, limit)).fetchall() + + timeline = [] + for r in rows: + desc = r["description"] or "" + if not desc and r["source_path"]: + desc = os.path.basename(r["source_path"]).replace("-", " ").replace(".md", "") + timeline.append({ + "pr_number": r["number"], + "domain": r["domain"], + "date": r["created_at"][:10] if r["created_at"] else None, + "type": _classify_commit(r["commit_type"]), + "summary": desc[:200] if desc else None, + }) + return timeline + + +def _classify_commit(commit_type): + if not commit_type: + return "create" + ct = commit_type.lower() + if "challenge" in ct: + return "challenge" + if "enrich" in ct or "update" in ct or "reweave" in ct: + return "enrich" + return "create" + + +def _get_review_stats(handle, conn): + rows = conn.execute(""" + SELECT outcome, COUNT(*) as cnt + FROM review_records + WHERE LOWER(agent) = LOWER(?) + GROUP BY outcome + """, (handle,)).fetchall() + stats = {} + for r in rows: + stats[r["outcome"]] = r["cnt"] + return stats + + +def _get_git_contributor(handle): + """Fallback: check git log for contributors not in pipeline.db.""" + try: + result = subprocess.run( + ["git", "log", "--all", "--format=%H|%an|%ae|%aI", "--diff-filter=A", "--", "domains/"], + capture_output=True, text=True, cwd=CODEX_PATH, timeout=30 + ) + if result.returncode != 0: + return None + + claims = [] + for line in result.stdout.strip().split("\n"): + if not line: + continue + parts = line.split("|", 3) + if len(parts) < 4: + continue + sha, name, email, date = parts + if handle.lower() in name.lower() or handle.lower() in email.lower(): + claims.append({"sha": sha, "author": name, "email": email, "date": date[:10]}) + + if not claims: + return None + + return { + "handle": handle, + "display_name": claims[0]["author"], + "email": claims[0]["email"], + "first_contribution": min(c["date"] for c in claims), + "last_contribution": max(c["date"] for c in claims), + "claims_merged": len(claims), + "sourcer_count": 0, + "extractor_count": 0, + "challenger_count": 0, + "synthesizer_count": 0, + "reviewer_count": 0, + } + except Exception: + return None + + +def get_contributor_profile(handle): + conn = _get_conn() + try: + row = conn.execute( + "SELECT * FROM contributors WHERE LOWER(handle) = LOWER(?)", (handle,) + ).fetchone() + + if row: + data = dict(row) + else: + git_data = _get_git_contributor(handle) + if git_data: + data = git_data + else: + return None + + ci_score = _compute_ci(data) + domain_breakdown = _get_domain_breakdown(handle, conn) + timeline = _get_contribution_timeline(handle, conn) + review_stats = _get_review_stats(handle, conn) + badges = _compute_badges(handle, data, domain_breakdown, conn) + + # For git-only contributors, build domain breakdown from git + if not domain_breakdown and not row: + domain_breakdown = _git_domain_breakdown(handle) + + hero_badge = None + rarity_order = ["limited", "rare", "uncommon", "common"] + for rarity in rarity_order: + for b in badges: + if BADGE_DEFS.get(b, {}).get("rarity") == rarity: + hero_badge = b + break + if hero_badge: + break + + role_breakdown = { + "sourcer": data.get("sourcer_count", 0) or 0, + "extractor": data.get("extractor_count", 0) or 0, + "challenger": data.get("challenger_count", 0) or 0, + "synthesizer": data.get("synthesizer_count", 0) or 0, + "reviewer": data.get("reviewer_count", 0) or 0, + } + total_roles = sum(role_breakdown.values()) + role_pct = {} + for k, v in role_breakdown.items(): + role_pct[k] = round(v / total_roles * 100) if total_roles > 0 else 0 + + return { + "handle": data.get("handle", handle), + "display_name": data.get("display_name"), + "ci_score": ci_score, + "hero_badge": hero_badge, + "badges": [{"name": b, **BADGE_DEFS.get(b, {})} for b in badges], + "joined": data.get("first_contribution"), + "last_active": data.get("last_contribution"), + "claims_merged": data.get("claims_merged", 0) or 0, + "principal": data.get("principal"), + "role_breakdown": role_breakdown, + "role_percentages": role_pct, + "domain_breakdown": domain_breakdown, + "review_stats": review_stats, + "contribution_timeline": timeline, + "active_domains": list(domain_breakdown.keys()), + } + finally: + conn.close() + + +def _git_domain_breakdown(handle): + """For git-only contributors, count claims by domain from file paths.""" + try: + result = subprocess.run( + ["git", "log", "--all", "--name-only", "--format=COMMIT|%an", "--diff-filter=A", "--", "domains/"], + capture_output=True, text=True, cwd=CODEX_PATH, timeout=30 + ) + if result.returncode != 0: + return {} + + domains = {} + current_match = False + for line in result.stdout.strip().split("\n"): + if line.startswith("COMMIT|"): + author = line.split("|", 1)[1] + current_match = handle.lower() in author.lower() + elif current_match and line.startswith("domains/"): + parts = line.split("/") + if len(parts) >= 2: + domain = parts[1] + domains[domain] = domains.get(domain, 0) + 1 + + return domains + except Exception: + return {} + + +async def handle_contributor_profile(request): + from aiohttp import web + handle = request.match_info["handle"] + profile = get_contributor_profile(handle) + if profile is None: + return web.json_response({"error": f"Contributor '{handle}' not found"}, status=404) + return web.json_response(profile) + + +async def handle_contributors_list(request): + from aiohttp import web + conn = _get_conn() + try: + min_claims = int(request.query.get("min_claims", "1")) + rows = conn.execute(""" + SELECT handle, display_name, first_contribution, last_contribution, + sourcer_count, extractor_count, challenger_count, synthesizer_count, + reviewer_count, claims_merged, principal + FROM contributors + WHERE claims_merged >= ? + ORDER BY claims_merged DESC + """, (min_claims,)).fetchall() + + contributors = [] + for r in rows: + data = dict(r) + ci = _compute_ci(data) + contributors.append({ + "handle": data["handle"], + "display_name": data["display_name"], + "ci_score": ci, + "claims_merged": data["claims_merged"], + "first_contribution": data["first_contribution"], + "last_contribution": data["last_contribution"], + "principal": data["principal"], + }) + + return web.json_response({ + "contributors": contributors, + "total": len(contributors), + }) + finally: + conn.close() + + +def register_contributor_routes(app): + app.router.add_get("/api/contributors/list", handle_contributors_list) + app.router.add_get("/api/contributors/{handle}", handle_contributor_profile)