From 1351db70a9099647befebbf180b21721856dac47 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Mon, 27 Apr 2026 17:35:18 +0100 Subject: [PATCH] fix(tests): apply Ganymede review nits + add m3taversal reset script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 3 nits from review of d60b6f8 + Q4 ask: 1. test_window_24h_only_today: replace always-true assertion with concrete `assert handles == ["carol"]`. Push alice's most-recent event from -1 days to -2 days to eliminate fixture-vs-query microsecond drift on the 24h boundary. 2. _call helper: asyncio.get_event_loop().run_until_complete → asyncio.run (deprecation in 3.12, raises in some 3.14 contexts). 3. test_invalid_limit_falls_to_default: dead first call removed, misleading "7 entries" comment now matches assertion. Q4: scripts/reset-m3taversal-sourcer.py captures the surgical UPDATE we ran on VPS as a reviewable artifact. Idempotent (no-op on already-reset rows), audit_log entry per run. Ganymede's point: DB mutations should leave a code paper trail, not just an audit row whose origin lives only in the executor's memory. 30/30 tests pass on VPS hermes venv (aiohttp 3.13.5, py 3.11.15). Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/reset-m3taversal-sourcer.py | 108 ++++++++++++++++++++++++++++ tests/test_leaderboard.py | 37 +++++----- 2 files changed, 128 insertions(+), 17 deletions(-) create mode 100644 scripts/reset-m3taversal-sourcer.py diff --git a/scripts/reset-m3taversal-sourcer.py b/scripts/reset-m3taversal-sourcer.py new file mode 100644 index 0000000..ca89506 --- /dev/null +++ b/scripts/reset-m3taversal-sourcer.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +"""Reset m3taversal.sourcer_count from inflated legacy value to file-truth count. + +Background: pre-Phase-A extract.py had a `submitted_by` fallback that credited +m3taversal as sourcer for every Telegram-ingested source, accumulating to 1011 +sourcer_count in the contributors table. The actual file-truth count (sourcer +frontmatter equal to "m3taversal" in claim files) is 21. The 990-row delta is +infrastructure attribution that doesn't reflect content authorship. + +The Phase A event-sourced ledger (contribution_events) computed the correct +389.55 CI from author events; /api/leaderboard reads from there directly. +But the legacy /api/contributors endpoint reads contributors.claims_merged +which carries the inflated 1011. Until that endpoint is deprecated, the +divergence shows two different numbers depending on which surface the UI +queries. + +This script applies the surgical UPDATE that was run on VPS on 2026-04-27 +during the leaderboard cutover. Committed as a script per Ganymede review: +"DB mutations go through reviewable code paths matters more than the +convenience of one-shot SQL. The artifact explains what was done and why." + +Idempotent — safe to re-run. If sourcer_count is already 21, no change. + +Usage: + python3 scripts/reset-m3taversal-sourcer.py --dry-run + python3 scripts/reset-m3taversal-sourcer.py +""" +import argparse +import os +import sqlite3 +import sys +from pathlib import Path + +DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db") +TARGET_HANDLE = "m3taversal" +TRUTH_SOURCER_COUNT = 21 +TRUTH_CLAIMS_MERGED = 21 + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args() + + if not Path(DB_PATH).exists(): + print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr) + sys.exit(1) + + conn = sqlite3.connect(DB_PATH, timeout=30) + conn.row_factory = sqlite3.Row + + row = conn.execute( + "SELECT handle, sourcer_count, claims_merged FROM contributors WHERE handle = ?", + (TARGET_HANDLE,), + ).fetchone() + if not row: + print(f" No contributors row for {TARGET_HANDLE} — nothing to reset.") + return + + print( + f" Current: {row['handle']} sourcer_count={row['sourcer_count']} " + f"claims_merged={row['claims_merged']}" + ) + print(f" Target: sourcer_count={TRUTH_SOURCER_COUNT} claims_merged={TRUTH_CLAIMS_MERGED}") + + if (row["sourcer_count"] == TRUTH_SOURCER_COUNT + and row["claims_merged"] == TRUTH_CLAIMS_MERGED): + print(" Already at target values — no-op.") + return + + if args.dry_run: + print(" (dry-run) UPDATE would be applied. Re-run without --dry-run.") + return + + conn.execute( + """UPDATE contributors SET + sourcer_count = ?, + claims_merged = ?, + updated_at = datetime('now') + WHERE handle = ?""", + (TRUTH_SOURCER_COUNT, TRUTH_CLAIMS_MERGED, TARGET_HANDLE), + ) + conn.execute( + """INSERT INTO audit_log (stage, event, detail) VALUES (?, ?, ?)""", + ( + "manual", + "m3taversal_sourcer_reset", + ( + '{"reason":"Pre-Phase-A submitted_by fallback inflated to 1011; ' + 'file-truth is 21","sourcer_count_before":1011,' + '"sourcer_count_after":21,"claims_merged_after":21}' + ), + ), + ) + conn.commit() + + after = conn.execute( + "SELECT sourcer_count, claims_merged FROM contributors WHERE handle = ?", + (TARGET_HANDLE,), + ).fetchone() + print( + f" Applied. Now: sourcer_count={after['sourcer_count']} " + f"claims_merged={after['claims_merged']}" + ) + + +if __name__ == "__main__": + main() diff --git a/tests/test_leaderboard.py b/tests/test_leaderboard.py index 1162dd6..2633807 100644 --- a/tests/test_leaderboard.py +++ b/tests/test_leaderboard.py @@ -112,11 +112,13 @@ def db_path(tmp_path): # (handle, role, weight, pr_number, claim_path, domain, timestamp) events = [ - # alice — 3 author + 1 originator, recent - ("alice", "author", 0.30, 100, None, "internet-finance", "now,-1 days"), + # alice — 3 author + 1 originator, recent (all >24h ago, all <7d) + # Most-recent event at -2 days (not -1 days) so 24h window exclusion is + # unambiguous and not subject to fixture-vs-query microsecond drift. + ("alice", "author", 0.30, 100, None, "internet-finance", "now,-2 days"), ("alice", "author", 0.30, 101, None, "internet-finance", "now,-2 days"), ("alice", "author", 0.30, 102, None, "ai-alignment", "now,-3 days"), - ("alice", "originator", 0.15, 103, "domains/internet-finance/x.md", "internet-finance", "now,-1 days"), + ("alice", "originator", 0.15, 103, "domains/internet-finance/x.md", "internet-finance", "now,-2 days"), # bob — 5 author, all 60d ago (outside 30d, inside all_time) ("bob", "author", 0.30, 200, None, "ai-alignment", "now,-60 days"), ("bob", "author", 0.30, 201, None, "ai-alignment", "now,-60 days"), @@ -142,7 +144,8 @@ def db_path(tmp_path): ("cnbc", "originator", 0.15, 600, "domains/internet-finance/y.md", "internet-finance", "now,-5 days"), ("cnbc", "originator", 0.15, 601, "domains/internet-finance/z.md", "internet-finance", "now,-5 days"), # newhandle — handle in events but no contributors row (LEFT JOIN COALESCE → person) - ("newhandle", "author", 0.30, 700, None, "ai-alignment", "now,-1 days"), + # -2 days so 24h-window test exclusion is unambiguous (matches alice). + ("newhandle", "author", 0.30, 700, None, "ai-alignment", "now,-2 days"), ] for handle, role, weight, pr_num, claim_path, domain, ts_modifier in events: # Use SQLite datetime() to compute timestamps relative to "now" so tests @@ -171,7 +174,7 @@ def _call(db_path, **query): req = make_mocked_request("GET", f"/api/leaderboard?{qs}") # make_mocked_request gives us req.app — write db_path into it. req.app["db_path"] = db_path - response = asyncio.get_event_loop().run_until_complete(handle_leaderboard(req)) + response = asyncio.run(handle_leaderboard(req)) return json.loads(response.body.decode()) @@ -283,14 +286,16 @@ class TestLeaderboardEndpoint: assert "bob" not in handles def test_window_24h_only_today(self, db_path): - """24h window picks up today's events only.""" + """24h window picks up today's events only. + + Default kind=person. Within 24h: only carol (events at 'now'). + Excluded: alice/newhandle (events at -2 days), bob (-60d), rio/leo (kind), + cnbc (-5d AND kind=org). + """ body = _call(db_path, window="24h") handles = [r["handle"] for r in body["leaderboard"]] - assert "carol" in handles # today - # alice's events are 1-3d ago → outside 24h window - # leo's evaluator events are 'now' (today) — but kind=person excludes leo - assert "alice" not in handles or any( - e["last_contribution"] >= "0" for e in body["leaderboard"] # not strict check + assert handles == ["carol"], ( + "24h + kind=person should return only carol; got %r" % handles ) def test_kind_agent(self, db_path): @@ -360,12 +365,10 @@ class TestLeaderboardEndpoint: assert body["total"] == 1 def test_invalid_limit_falls_to_default(self, db_path): - """Defensive: garbage limit param falls to default 100.""" - body = _call(db_path, limit="not-a-number") - # 7 entries < 100 → shown should be all of them under kind=all - body2 = _call(db_path, kind="all", limit="not-a-number") - assert body2["shown"] == 7 - assert body2["has_more"] is False + """Defensive: garbage limit param falls to default 100. 7 entries < 100.""" + body = _call(db_path, kind="all", limit="not-a-number") + assert body["shown"] == 7 + assert body["has_more"] is False def test_limit_capped_at_500(self, db_path): """Defensive: limit > 500 silently caps at 500."""