Some checks are pending
CI / lint-and-test (push) Waiting to run
3 nits from review of d60b6f8 + Q4 ask:
1. test_window_24h_only_today: replace always-true assertion with
concrete `assert handles == ["carol"]`. Push alice's most-recent
event from -1 days to -2 days to eliminate fixture-vs-query
microsecond drift on the 24h boundary.
2. _call helper: asyncio.get_event_loop().run_until_complete →
asyncio.run (deprecation in 3.12, raises in some 3.14 contexts).
3. test_invalid_limit_falls_to_default: dead first call removed,
misleading "7 entries" comment now matches assertion.
Q4: scripts/reset-m3taversal-sourcer.py captures the surgical
UPDATE we ran on VPS as a reviewable artifact. Idempotent (no-op
on already-reset rows), audit_log entry per run. Ganymede's point:
DB mutations should leave a code paper trail, not just an audit
row whose origin lives only in the executor's memory.
30/30 tests pass on VPS hermes venv (aiohttp 3.13.5, py 3.11.15).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
108 lines
3.7 KiB
Python
108 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Reset m3taversal.sourcer_count from inflated legacy value to file-truth count.
|
|
|
|
Background: pre-Phase-A extract.py had a `submitted_by` fallback that credited
|
|
m3taversal as sourcer for every Telegram-ingested source, accumulating to 1011
|
|
sourcer_count in the contributors table. The actual file-truth count (sourcer
|
|
frontmatter equal to "m3taversal" in claim files) is 21. The 990-row delta is
|
|
infrastructure attribution that doesn't reflect content authorship.
|
|
|
|
The Phase A event-sourced ledger (contribution_events) computed the correct
|
|
389.55 CI from author events; /api/leaderboard reads from there directly.
|
|
But the legacy /api/contributors endpoint reads contributors.claims_merged
|
|
which carries the inflated 1011. Until that endpoint is deprecated, the
|
|
divergence shows two different numbers depending on which surface the UI
|
|
queries.
|
|
|
|
This script applies the surgical UPDATE that was run on VPS on 2026-04-27
|
|
during the leaderboard cutover. Committed as a script per Ganymede review:
|
|
"DB mutations go through reviewable code paths matters more than the
|
|
convenience of one-shot SQL. The artifact explains what was done and why."
|
|
|
|
Idempotent — safe to re-run. If sourcer_count is already 21, no change.
|
|
|
|
Usage:
|
|
python3 scripts/reset-m3taversal-sourcer.py --dry-run
|
|
python3 scripts/reset-m3taversal-sourcer.py
|
|
"""
|
|
import argparse
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
|
|
TARGET_HANDLE = "m3taversal"
|
|
TRUTH_SOURCER_COUNT = 21
|
|
TRUTH_CLAIMS_MERGED = 21
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
if not Path(DB_PATH).exists():
|
|
print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
conn = sqlite3.connect(DB_PATH, timeout=30)
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
row = conn.execute(
|
|
"SELECT handle, sourcer_count, claims_merged FROM contributors WHERE handle = ?",
|
|
(TARGET_HANDLE,),
|
|
).fetchone()
|
|
if not row:
|
|
print(f" No contributors row for {TARGET_HANDLE} — nothing to reset.")
|
|
return
|
|
|
|
print(
|
|
f" Current: {row['handle']} sourcer_count={row['sourcer_count']} "
|
|
f"claims_merged={row['claims_merged']}"
|
|
)
|
|
print(f" Target: sourcer_count={TRUTH_SOURCER_COUNT} claims_merged={TRUTH_CLAIMS_MERGED}")
|
|
|
|
if (row["sourcer_count"] == TRUTH_SOURCER_COUNT
|
|
and row["claims_merged"] == TRUTH_CLAIMS_MERGED):
|
|
print(" Already at target values — no-op.")
|
|
return
|
|
|
|
if args.dry_run:
|
|
print(" (dry-run) UPDATE would be applied. Re-run without --dry-run.")
|
|
return
|
|
|
|
conn.execute(
|
|
"""UPDATE contributors SET
|
|
sourcer_count = ?,
|
|
claims_merged = ?,
|
|
updated_at = datetime('now')
|
|
WHERE handle = ?""",
|
|
(TRUTH_SOURCER_COUNT, TRUTH_CLAIMS_MERGED, TARGET_HANDLE),
|
|
)
|
|
conn.execute(
|
|
"""INSERT INTO audit_log (stage, event, detail) VALUES (?, ?, ?)""",
|
|
(
|
|
"manual",
|
|
"m3taversal_sourcer_reset",
|
|
(
|
|
'{"reason":"Pre-Phase-A submitted_by fallback inflated to 1011; '
|
|
'file-truth is 21","sourcer_count_before":1011,'
|
|
'"sourcer_count_after":21,"claims_merged_after":21}'
|
|
),
|
|
),
|
|
)
|
|
conn.commit()
|
|
|
|
after = conn.execute(
|
|
"SELECT sourcer_count, claims_merged FROM contributors WHERE handle = ?",
|
|
(TARGET_HANDLE,),
|
|
).fetchone()
|
|
print(
|
|
f" Applied. Now: sourcer_count={after['sourcer_count']} "
|
|
f"claims_merged={after['claims_merged']}"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|