#!/usr/bin/env python3 """Reset m3taversal.sourcer_count from inflated legacy value to file-truth count. Background: pre-Phase-A extract.py had a `submitted_by` fallback that credited m3taversal as sourcer for every Telegram-ingested source, accumulating to 1011 sourcer_count in the contributors table. The actual file-truth count (sourcer frontmatter equal to "m3taversal" in claim files) is 21. The 990-row delta is infrastructure attribution that doesn't reflect content authorship. The Phase A event-sourced ledger (contribution_events) computed the correct 389.55 CI from author events; /api/leaderboard reads from there directly. But the legacy /api/contributors endpoint reads contributors.claims_merged which carries the inflated 1011. Until that endpoint is deprecated, the divergence shows two different numbers depending on which surface the UI queries. This script applies the surgical UPDATE that was run on VPS on 2026-04-27 during the leaderboard cutover. Committed as a script per Ganymede review: "DB mutations go through reviewable code paths matters more than the convenience of one-shot SQL. The artifact explains what was done and why." Idempotent — safe to re-run. If sourcer_count is already 21, no change. Usage: python3 scripts/reset-m3taversal-sourcer.py --dry-run python3 scripts/reset-m3taversal-sourcer.py """ import argparse import os import sqlite3 import sys from pathlib import Path DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db") TARGET_HANDLE = "m3taversal" TRUTH_SOURCER_COUNT = 21 TRUTH_CLAIMS_MERGED = 21 def main(): parser = argparse.ArgumentParser() parser.add_argument("--dry-run", action="store_true") args = parser.parse_args() if not Path(DB_PATH).exists(): print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr) sys.exit(1) conn = sqlite3.connect(DB_PATH, timeout=30) conn.row_factory = sqlite3.Row row = conn.execute( "SELECT handle, sourcer_count, claims_merged FROM contributors WHERE handle = ?", (TARGET_HANDLE,), ).fetchone() if not row: print(f" No contributors row for {TARGET_HANDLE} — nothing to reset.") return print( f" Current: {row['handle']} sourcer_count={row['sourcer_count']} " f"claims_merged={row['claims_merged']}" ) print(f" Target: sourcer_count={TRUTH_SOURCER_COUNT} claims_merged={TRUTH_CLAIMS_MERGED}") if (row["sourcer_count"] == TRUTH_SOURCER_COUNT and row["claims_merged"] == TRUTH_CLAIMS_MERGED): print(" Already at target values — no-op.") return if args.dry_run: print(" (dry-run) UPDATE would be applied. Re-run without --dry-run.") return conn.execute( """UPDATE contributors SET sourcer_count = ?, claims_merged = ?, updated_at = datetime('now') WHERE handle = ?""", (TRUTH_SOURCER_COUNT, TRUTH_CLAIMS_MERGED, TARGET_HANDLE), ) conn.execute( """INSERT INTO audit_log (stage, event, detail) VALUES (?, ?, ?)""", ( "manual", "m3taversal_sourcer_reset", ( '{"reason":"Pre-Phase-A submitted_by fallback inflated to 1011; ' 'file-truth is 21","sourcer_count_before":1011,' '"sourcer_count_after":21,"claims_merged_after":21}' ), ), ) conn.commit() after = conn.execute( "SELECT sourcer_count, claims_merged FROM contributors WHERE handle = ?", (TARGET_HANDLE,), ).fetchone() print( f" Applied. Now: sourcer_count={after['sourcer_count']} " f"claims_merged={after['claims_merged']}" ) if __name__ == "__main__": main()