Some checks are pending
CI / lint-and-test (push) Waiting to run
24 files: 8 pipeline lib modules, 6 diagnostics updates, 4 new diagnostics modules, telegram bot fix, 5 active operational scripts. Key changes: - Security: SQL injection prevention (alerting.py), SSL verification (review_queue.py), path traversal guard (extract.py) - Cost tracking: per-PR cost accumulation in evaluate.py - Auto-recovery: watchdog tier0 reset with retry cap + cooldown - Extraction: structured edge fields, post-write vector connection - New modules: vitality, research_tracking, research_routes Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
143 lines
5.3 KiB
Python
143 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Backfill reviewer_count in contributors table from prs review data.
|
|
|
|
Sources of review data:
|
|
1. leo_verdict in prs table (approve/request_changes = Leo reviewed)
|
|
2. domain_verdict + domain_agent in prs table (domain agent reviewed)
|
|
3. Forgejo API reviews (agents that submitted reviews via Forgejo)
|
|
|
|
Deduplication: If the same agent is both leo_verdict reviewer and domain_agent
|
|
on the same PR, count it once per PR.
|
|
"""
|
|
import sqlite3
|
|
import json
|
|
import os
|
|
import sys
|
|
import urllib.request
|
|
|
|
DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
|
|
FORGEJO_URL = "http://localhost:3000/api/v1"
|
|
REPO = "teleo/teleo-codex"
|
|
|
|
def get_forgejo_token():
|
|
token_path = "/opt/teleo-eval/secrets/forgejo-admin-token"
|
|
if os.path.exists(token_path):
|
|
return open(token_path).read().strip()
|
|
return os.environ.get("FORGEJO_TOKEN", "")
|
|
|
|
def fetch_forgejo_reviews(pr_number, token):
|
|
"""Fetch reviews from Forgejo API for a single PR."""
|
|
url = f"{FORGEJO_URL}/repos/{REPO}/pulls/{pr_number}/reviews"
|
|
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=5) as resp:
|
|
return json.loads(resp.read())
|
|
except Exception:
|
|
return []
|
|
|
|
def main():
|
|
dry_run = "--dry-run" in sys.argv
|
|
skip_forgejo = "--skip-forgejo" in sys.argv
|
|
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
# Step 1: Collect review events from prs table
|
|
# reviewer -> set of PR numbers they reviewed
|
|
reviewer_prs = {}
|
|
|
|
# Leo reviews (leo_verdict = approve or request_changes)
|
|
rows = conn.execute("""
|
|
SELECT number FROM prs
|
|
WHERE status='merged' AND leo_verdict IN ('approve', 'request_changes')
|
|
""").fetchall()
|
|
leo_prs = {r["number"] for r in rows}
|
|
if leo_prs:
|
|
reviewer_prs["leo"] = leo_prs
|
|
print(f"Leo reviews from leo_verdict: {len(leo_prs)}")
|
|
|
|
# Domain agent reviews
|
|
rows = conn.execute("""
|
|
SELECT number, domain_agent FROM prs
|
|
WHERE status='merged' AND domain_verdict IN ('approve', 'request_changes')
|
|
AND domain_agent IS NOT NULL AND domain_agent != ''
|
|
""").fetchall()
|
|
for r in rows:
|
|
agent = r["domain_agent"].lower()
|
|
if agent not in reviewer_prs:
|
|
reviewer_prs[agent] = set()
|
|
reviewer_prs[agent].add(r["number"])
|
|
|
|
# Print domain agent counts (before dedup with Leo)
|
|
for agent in sorted(reviewer_prs):
|
|
if agent != "leo":
|
|
print(f" {agent} domain reviews: {len(reviewer_prs[agent])}")
|
|
|
|
# Leo as domain_agent overlaps with leo_verdict — already deduped by using sets
|
|
leo_domain = conn.execute("""
|
|
SELECT COUNT(*) as cnt FROM prs
|
|
WHERE status='merged' AND domain_agent='Leo'
|
|
AND domain_verdict IN ('approve', 'request_changes')
|
|
""").fetchone()["cnt"]
|
|
print(f" Leo as domain_agent: {leo_domain} (deduplicated into Leo's total)")
|
|
|
|
# Step 2: Optionally fetch Forgejo API reviews
|
|
if not skip_forgejo:
|
|
token = get_forgejo_token()
|
|
if token:
|
|
# Get all merged PR numbers
|
|
merged = conn.execute(
|
|
"SELECT number FROM prs WHERE status='merged'"
|
|
).fetchall()
|
|
merged_numbers = [r["number"] for r in merged]
|
|
|
|
print(f"\nFetching Forgejo reviews for {len(merged_numbers)} merged PRs...")
|
|
forgejo_count = 0
|
|
for i, pr_num in enumerate(merged_numbers):
|
|
if i % 100 == 0 and i > 0:
|
|
print(f" ...{i}/{len(merged_numbers)}")
|
|
reviews = fetch_forgejo_reviews(pr_num, token)
|
|
for review in reviews:
|
|
if review.get("state") in ("APPROVED", "REQUEST_CHANGES"):
|
|
login = review["user"]["login"].lower()
|
|
if login not in reviewer_prs:
|
|
reviewer_prs[login] = set()
|
|
reviewer_prs[login].add(pr_num)
|
|
forgejo_count += 1
|
|
print(f" Forgejo API reviews found: {forgejo_count}")
|
|
else:
|
|
print("\nNo Forgejo token found, skipping API reviews")
|
|
else:
|
|
print("\nSkipping Forgejo API reviews (--skip-forgejo)")
|
|
|
|
# Step 3: Compute final counts
|
|
print("\n--- Final reviewer counts ---")
|
|
existing = {r["handle"]: r["reviewer_count"] for r in
|
|
conn.execute("SELECT handle, reviewer_count FROM contributors").fetchall()}
|
|
|
|
updates = {}
|
|
for reviewer, prs in sorted(reviewer_prs.items()):
|
|
count = len(prs)
|
|
current = existing.get(reviewer, None)
|
|
if current is not None:
|
|
updates[reviewer] = count
|
|
print(f" {reviewer}: {current} -> {count} ({count - current:+d})")
|
|
else:
|
|
print(f" {reviewer}: {count} reviews (no contributor record, skipping)")
|
|
|
|
# Step 4: Apply updates
|
|
if dry_run:
|
|
print(f"\n[DRY RUN] Would update {len(updates)} contributors")
|
|
else:
|
|
for handle, count in updates.items():
|
|
conn.execute(
|
|
"UPDATE contributors SET reviewer_count = ?, updated_at = datetime('now') WHERE handle = ?",
|
|
(count, handle)
|
|
)
|
|
conn.commit()
|
|
print(f"\nUpdated {len(updates)} contributors")
|
|
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|