#!/usr/bin/env python3 """Backfill reviewer_count in contributors table from prs review data. Sources of review data: 1. leo_verdict in prs table (approve/request_changes = Leo reviewed) 2. domain_verdict + domain_agent in prs table (domain agent reviewed) 3. Forgejo API reviews (agents that submitted reviews via Forgejo) Deduplication: If the same agent is both leo_verdict reviewer and domain_agent on the same PR, count it once per PR. """ import sqlite3 import json import os import sys import urllib.request DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db") FORGEJO_URL = "http://localhost:3000/api/v1" REPO = "teleo/teleo-codex" def get_forgejo_token(): token_path = "/opt/teleo-eval/secrets/forgejo-admin-token" if os.path.exists(token_path): return open(token_path).read().strip() return os.environ.get("FORGEJO_TOKEN", "") def fetch_forgejo_reviews(pr_number, token): """Fetch reviews from Forgejo API for a single PR.""" url = f"{FORGEJO_URL}/repos/{REPO}/pulls/{pr_number}/reviews" req = urllib.request.Request(url, headers={"Authorization": f"token {token}"}) try: with urllib.request.urlopen(req, timeout=5) as resp: return json.loads(resp.read()) except Exception: return [] def main(): dry_run = "--dry-run" in sys.argv skip_forgejo = "--skip-forgejo" in sys.argv conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row # Step 1: Collect review events from prs table # reviewer -> set of PR numbers they reviewed reviewer_prs = {} # Leo reviews (leo_verdict = approve or request_changes) rows = conn.execute(""" SELECT number FROM prs WHERE status='merged' AND leo_verdict IN ('approve', 'request_changes') """).fetchall() leo_prs = {r["number"] for r in rows} if leo_prs: reviewer_prs["leo"] = leo_prs print(f"Leo reviews from leo_verdict: {len(leo_prs)}") # Domain agent reviews rows = conn.execute(""" SELECT number, domain_agent FROM prs WHERE status='merged' AND domain_verdict IN ('approve', 'request_changes') AND domain_agent IS NOT NULL AND domain_agent != '' """).fetchall() for r in rows: agent = r["domain_agent"].lower() if agent not in reviewer_prs: reviewer_prs[agent] = set() reviewer_prs[agent].add(r["number"]) # Print domain agent counts (before dedup with Leo) for agent in sorted(reviewer_prs): if agent != "leo": print(f" {agent} domain reviews: {len(reviewer_prs[agent])}") # Leo as domain_agent overlaps with leo_verdict — already deduped by using sets leo_domain = conn.execute(""" SELECT COUNT(*) as cnt FROM prs WHERE status='merged' AND domain_agent='Leo' AND domain_verdict IN ('approve', 'request_changes') """).fetchone()["cnt"] print(f" Leo as domain_agent: {leo_domain} (deduplicated into Leo's total)") # Step 2: Optionally fetch Forgejo API reviews if not skip_forgejo: token = get_forgejo_token() if token: # Get all merged PR numbers merged = conn.execute( "SELECT number FROM prs WHERE status='merged'" ).fetchall() merged_numbers = [r["number"] for r in merged] print(f"\nFetching Forgejo reviews for {len(merged_numbers)} merged PRs...") forgejo_count = 0 for i, pr_num in enumerate(merged_numbers): if i % 100 == 0 and i > 0: print(f" ...{i}/{len(merged_numbers)}") reviews = fetch_forgejo_reviews(pr_num, token) for review in reviews: if review.get("state") in ("APPROVED", "REQUEST_CHANGES"): login = review["user"]["login"].lower() if login not in reviewer_prs: reviewer_prs[login] = set() reviewer_prs[login].add(pr_num) forgejo_count += 1 print(f" Forgejo API reviews found: {forgejo_count}") else: print("\nNo Forgejo token found, skipping API reviews") else: print("\nSkipping Forgejo API reviews (--skip-forgejo)") # Step 3: Compute final counts print("\n--- Final reviewer counts ---") existing = {r["handle"]: r["reviewer_count"] for r in conn.execute("SELECT handle, reviewer_count FROM contributors").fetchall()} updates = {} for reviewer, prs in sorted(reviewer_prs.items()): count = len(prs) current = existing.get(reviewer, None) if current is not None: updates[reviewer] = count print(f" {reviewer}: {current} -> {count} ({count - current:+d})") else: print(f" {reviewer}: {count} reviews (no contributor record, skipping)") # Step 4: Apply updates if dry_run: print(f"\n[DRY RUN] Would update {len(updates)} contributors") else: for handle, count in updates.items(): conn.execute( "UPDATE contributors SET reviewer_count = ?, updated_at = datetime('now') WHERE handle = ?", (count, handle) ) conn.commit() print(f"\nUpdated {len(updates)} contributors") conn.close() if __name__ == "__main__": main()