#!/usr/bin/env python3 """Reconstruct synthetic `prs` rows for historical GitHub PRs lost pre-mirror-wiring. Two PRs merged on GitHub before our sync-mirror.sh tracked `github_pr`: - GitHub PR #68: alexastrum — 6 claims, merged 2026-03-09 via GitHub squash, recovered to Forgejo via commit dba00a79 (Apr 16, after mirror erased files) - GitHub PR #88: Cameron-S1 — 1 claim, recovered via commit da64f805 The recovery commits wrote the files directly to main, so our `prs` table has no row to attach originator events to — the backfill-events.py strategies all return NULL. We reconstruct one synthetic `prs` row per historical GitHub PR so the events pipeline (and `github_pr` strategy in backfill-events) can credit Alex and Cameron properly. Numbers 900000+ are clearly synthetic and won't collide with real Forgejo PRs. Idempotent via INSERT OR IGNORE. Usage: python3 scripts/backfill-synthetic-recovery-prs.py --dry-run python3 scripts/backfill-synthetic-recovery-prs.py """ import argparse import os import sqlite3 import sys from pathlib import Path DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db") # Historical GitHub PRs recovered via direct-to-main commits. # Original GitHub merge dates come from the recovery commit messages. RECOVERY_PRS = [ { "number": 900068, "github_pr": 68, "branch": "gh-pr-68", "status": "merged", "domain": "ai-alignment", "commit_type": "knowledge", "tier": "STANDARD", "leo_verdict": "approve", "domain_verdict": "approve", "submitted_by": "alexastrum", "source_channel": "github", # origin='human' matches lib/merge.py convention for external contributors # (default is 'pipeline' which misclassifies us as machine-authored). "origin": "human", "priority": "high", "description": "Multi-agent git workflows production maturity | Cryptographic agent trust ratings | Defense in depth for AI agent oversight | Deterministic policy engines below LLM layer | Knowledge validation four-layer architecture | Structurally separating proposer and reviewer agents", "merged_at": "2026-03-09 00:00:00", "created_at": "2026-03-08 00:00:00", "last_error": "synthetic_recovery: GitHub PR #68 pre-mirror-wiring reconstruction (commit dba00a79)", }, { "number": 900088, "github_pr": 88, "branch": "gh-pr-88", "status": "merged", "domain": "ai-alignment", "commit_type": "knowledge", "tier": "STANDARD", "leo_verdict": "approve", "domain_verdict": "approve", "submitted_by": "cameron-s1", "source_channel": "github", "origin": "human", "priority": "high", "description": "Orthogonality is an artefact of specification architectures not a property of intelligence itself", "merged_at": "2026-04-01 00:00:00", "created_at": "2026-04-01 00:00:00", "last_error": "synthetic_recovery: GitHub PR #88 pre-mirror-wiring reconstruction (commit da64f805)", }, ] def main(): parser = argparse.ArgumentParser() parser.add_argument("--dry-run", action="store_true") args = parser.parse_args() if not Path(DB_PATH).exists(): print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr) sys.exit(1) conn = sqlite3.connect(DB_PATH, timeout=30) conn.row_factory = sqlite3.Row # Guard against synthetic-range colonization (Ganymede review): check for # any row in the synthetic range that isn't one of ours. INSERT OR IGNORE on # the specific numbers is the real collision defense; this is belt-and-suspenders. max_real = conn.execute( "SELECT MAX(number) FROM prs WHERE number < 900000" ).fetchone()[0] or 0 print(f"Max real Forgejo PR number: {max_real}") synth_conflict = conn.execute( "SELECT number FROM prs WHERE number >= 900000 AND number NOT IN (900068, 900088) LIMIT 1" ).fetchone() if synth_conflict: print(f"ERROR: PR #{synth_conflict[0]} already exists in synthetic range. " f"Pick a new range before running.", file=sys.stderr) sys.exit(2) inserted = 0 skipped = 0 for row in RECOVERY_PRS: existing = conn.execute( "SELECT number FROM prs WHERE number = ? OR github_pr = ?", (row["number"], row["github_pr"]), ).fetchone() if existing: print(f" PR #{row['number']} (github_pr={row['github_pr']}): already exists — skip") skipped += 1 continue print(f" {'(dry-run) ' if args.dry_run else ''}INSERT synthetic PR #{row['number']} " f"(github_pr={row['github_pr']}, submitted_by={row['submitted_by']}, " f"merged_at={row['merged_at']})") if not args.dry_run: conn.execute( """INSERT INTO prs ( number, github_pr, branch, status, domain, commit_type, tier, leo_verdict, domain_verdict, submitted_by, source_channel, origin, priority, description, merged_at, created_at, last_error ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", ( row["number"], row["github_pr"], row["branch"], row["status"], row["domain"], row["commit_type"], row["tier"], row["leo_verdict"], row["domain_verdict"], row["submitted_by"], row["source_channel"], row["origin"], row["priority"], row["description"], row["merged_at"], row["created_at"], row["last_error"], ), ) inserted += 1 if not args.dry_run: conn.commit() print(f"\nInserted {inserted}, skipped {skipped}") if not args.dry_run and inserted: print("\nNext step: re-run backfill-events.py to attach originator events") print(" python3 ops/backfill-events.py") if __name__ == "__main__": main()