#!/usr/bin/env python3 """Backfill description column for merged PRs that have no description. Reads claim frontmatter from branches via git show (works on bare repos). """ import sqlite3 import yaml import os import sys REPO = os.environ.get("REPO_DIR", "/opt/teleo-eval/workspaces/teleo-codex.git") DB = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db") def extract_description(branch): result = os.popen(f"cd {REPO} && git diff --name-only origin/main...origin/{branch} 2>/dev/null").read() changed = [f for f in result.strip().split("\n") if f.endswith(".md") and "domains/" in f] descs = [] for fpath in changed[:10]: content = os.popen(f"cd {REPO} && git show origin/{branch}:{fpath} 2>/dev/null").read()[:2000] if not content or not content.startswith("---"): continue end = content.find("---", 3) if end < 0: continue try: fm = yaml.safe_load(content[3:end]) except Exception: continue if fm and isinstance(fm, dict) and fm.get("description"): d = fm["description"].strip().strip('"') if len(d) > 10: descs.append(d) return " | ".join(descs[:5]) if descs else None def main(): conn = sqlite3.connect(DB) rows = conn.execute( "SELECT number, branch FROM prs WHERE status='merged' AND (description IS NULL OR description='')" ).fetchall() print(f"PRs needing descriptions: {len(rows)}") updated = 0 for pr_num, branch in rows: desc = extract_description(branch) if desc: conn.execute("UPDATE prs SET description=? WHERE number=?", (desc, pr_num)) updated += 1 if updated % 50 == 0: conn.commit() print(f" ...{updated} updated") conn.commit() conn.close() print(f"Done. Updated {updated}/{len(rows)} PRs with descriptions.") if __name__ == "__main__": main()