teleo-infrastructure/scripts/backfill-reviewer-count.py
m3taversal 81afcd319f
Some checks are pending
CI / lint-and-test (push) Waiting to run
fix: sync all code from VPS — repo is now authoritative source of truth
24 files: 8 pipeline lib modules, 6 diagnostics updates, 4 new diagnostics
modules, telegram bot fix, 5 active operational scripts. Key changes:
- Security: SQL injection prevention (alerting.py), SSL verification
  (review_queue.py), path traversal guard (extract.py)
- Cost tracking: per-PR cost accumulation in evaluate.py
- Auto-recovery: watchdog tier0 reset with retry cap + cooldown
- Extraction: structured edge fields, post-write vector connection
- New modules: vitality, research_tracking, research_routes

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 13:18:01 +01:00

143 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""Backfill reviewer_count in contributors table from prs review data.
Sources of review data:
1. leo_verdict in prs table (approve/request_changes = Leo reviewed)
2. domain_verdict + domain_agent in prs table (domain agent reviewed)
3. Forgejo API reviews (agents that submitted reviews via Forgejo)
Deduplication: If the same agent is both leo_verdict reviewer and domain_agent
on the same PR, count it once per PR.
"""
import sqlite3
import json
import os
import sys
import urllib.request
DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
FORGEJO_URL = "http://localhost:3000/api/v1"
REPO = "teleo/teleo-codex"
def get_forgejo_token():
token_path = "/opt/teleo-eval/secrets/forgejo-admin-token"
if os.path.exists(token_path):
return open(token_path).read().strip()
return os.environ.get("FORGEJO_TOKEN", "")
def fetch_forgejo_reviews(pr_number, token):
"""Fetch reviews from Forgejo API for a single PR."""
url = f"{FORGEJO_URL}/repos/{REPO}/pulls/{pr_number}/reviews"
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
try:
with urllib.request.urlopen(req, timeout=5) as resp:
return json.loads(resp.read())
except Exception:
return []
def main():
dry_run = "--dry-run" in sys.argv
skip_forgejo = "--skip-forgejo" in sys.argv
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
# Step 1: Collect review events from prs table
# reviewer -> set of PR numbers they reviewed
reviewer_prs = {}
# Leo reviews (leo_verdict = approve or request_changes)
rows = conn.execute("""
SELECT number FROM prs
WHERE status='merged' AND leo_verdict IN ('approve', 'request_changes')
""").fetchall()
leo_prs = {r["number"] for r in rows}
if leo_prs:
reviewer_prs["leo"] = leo_prs
print(f"Leo reviews from leo_verdict: {len(leo_prs)}")
# Domain agent reviews
rows = conn.execute("""
SELECT number, domain_agent FROM prs
WHERE status='merged' AND domain_verdict IN ('approve', 'request_changes')
AND domain_agent IS NOT NULL AND domain_agent != ''
""").fetchall()
for r in rows:
agent = r["domain_agent"].lower()
if agent not in reviewer_prs:
reviewer_prs[agent] = set()
reviewer_prs[agent].add(r["number"])
# Print domain agent counts (before dedup with Leo)
for agent in sorted(reviewer_prs):
if agent != "leo":
print(f" {agent} domain reviews: {len(reviewer_prs[agent])}")
# Leo as domain_agent overlaps with leo_verdict — already deduped by using sets
leo_domain = conn.execute("""
SELECT COUNT(*) as cnt FROM prs
WHERE status='merged' AND domain_agent='Leo'
AND domain_verdict IN ('approve', 'request_changes')
""").fetchone()["cnt"]
print(f" Leo as domain_agent: {leo_domain} (deduplicated into Leo's total)")
# Step 2: Optionally fetch Forgejo API reviews
if not skip_forgejo:
token = get_forgejo_token()
if token:
# Get all merged PR numbers
merged = conn.execute(
"SELECT number FROM prs WHERE status='merged'"
).fetchall()
merged_numbers = [r["number"] for r in merged]
print(f"\nFetching Forgejo reviews for {len(merged_numbers)} merged PRs...")
forgejo_count = 0
for i, pr_num in enumerate(merged_numbers):
if i % 100 == 0 and i > 0:
print(f" ...{i}/{len(merged_numbers)}")
reviews = fetch_forgejo_reviews(pr_num, token)
for review in reviews:
if review.get("state") in ("APPROVED", "REQUEST_CHANGES"):
login = review["user"]["login"].lower()
if login not in reviewer_prs:
reviewer_prs[login] = set()
reviewer_prs[login].add(pr_num)
forgejo_count += 1
print(f" Forgejo API reviews found: {forgejo_count}")
else:
print("\nNo Forgejo token found, skipping API reviews")
else:
print("\nSkipping Forgejo API reviews (--skip-forgejo)")
# Step 3: Compute final counts
print("\n--- Final reviewer counts ---")
existing = {r["handle"]: r["reviewer_count"] for r in
conn.execute("SELECT handle, reviewer_count FROM contributors").fetchall()}
updates = {}
for reviewer, prs in sorted(reviewer_prs.items()):
count = len(prs)
current = existing.get(reviewer, None)
if current is not None:
updates[reviewer] = count
print(f" {reviewer}: {current} -> {count} ({count - current:+d})")
else:
print(f" {reviewer}: {count} reviews (no contributor record, skipping)")
# Step 4: Apply updates
if dry_run:
print(f"\n[DRY RUN] Would update {len(updates)} contributors")
else:
for handle, count in updates.items():
conn.execute(
"UPDATE contributors SET reviewer_count = ?, updated_at = datetime('now') WHERE handle = ?",
(count, handle)
)
conn.commit()
print(f"\nUpdated {len(updates)} contributors")
conn.close()
if __name__ == "__main__":
main()