fix(tests): apply Ganymede review nits + add m3taversal reset script

3 nits from review of d60b6f8 + Q4 ask: 1. test_window_24h_only_today: replace always-true assertion with concrete `assert handles == ["carol"]`. Push alice's most-recent event from -1 days to -2 days to eliminate fixture-vs-query microsecond drift on the 24h boundary. 2. _call helper: asyncio.get_event_loop().run_until_complete → asyncio.run (deprecation in 3.12, raises in some 3.14 contexts). 3. test_invalid_limit_falls_to_default: dead first call removed, misleading "7 entries" comment now matches assertion. Q4: scripts/reset-m3taversal-sourcer.py captures the surgical UPDATE we ran on VPS as a reviewable artifact. Idempotent (no-op on already-reset rows), audit_log entry per run. Ganymede's point: DB mutations should leave a code paper trail, not just an audit row whose origin lives only in the executor's memory. 30/30 tests pass on VPS hermes venv (aiohttp 3.13.5, py 3.11.15). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 17:35:18 +01:00 · 2026-04-27 17:35:18 +01:00 · 1351db70a9
commit 1351db70a9
parent d60b6f8bf2
2 changed files with 128 additions and 17 deletions
--- a/scripts/reset-m3taversal-sourcer.py
+++ b/scripts/reset-m3taversal-sourcer.py
@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+"""Reset m3taversal.sourcer_count from inflated legacy value to file-truth count.
+
+Background: pre-Phase-A extract.py had a `submitted_by` fallback that credited
+m3taversal as sourcer for every Telegram-ingested source, accumulating to 1011
+sourcer_count in the contributors table. The actual file-truth count (sourcer
+frontmatter equal to "m3taversal" in claim files) is 21. The 990-row delta is
+infrastructure attribution that doesn't reflect content authorship.
+
+The Phase A event-sourced ledger (contribution_events) computed the correct
+389.55 CI from author events; /api/leaderboard reads from there directly.
+But the legacy /api/contributors endpoint reads contributors.claims_merged
+which carries the inflated 1011. Until that endpoint is deprecated, the
+divergence shows two different numbers depending on which surface the UI
+queries.
+
+This script applies the surgical UPDATE that was run on VPS on 2026-04-27
+during the leaderboard cutover. Committed as a script per Ganymede review:
+"DB mutations go through reviewable code paths matters more than the
+convenience of one-shot SQL. The artifact explains what was done and why."
+
+Idempotent — safe to re-run. If sourcer_count is already 21, no change.
+
+Usage:
+  python3 scripts/reset-m3taversal-sourcer.py --dry-run
+  python3 scripts/reset-m3taversal-sourcer.py
+"""
+import argparse
+import os
+import sqlite3
+import sys
+from pathlib import Path
+
+DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
+TARGET_HANDLE = "m3taversal"
+TRUTH_SOURCER_COUNT = 21
+TRUTH_CLAIMS_MERGED = 21
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dry-run", action="store_true")
+    args = parser.parse_args()
+
+    if not Path(DB_PATH).exists():
+        print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr)
+        sys.exit(1)
+
+    conn = sqlite3.connect(DB_PATH, timeout=30)
+    conn.row_factory = sqlite3.Row
+
+    row = conn.execute(
+        "SELECT handle, sourcer_count, claims_merged FROM contributors WHERE handle = ?",
+        (TARGET_HANDLE,),
+    ).fetchone()
+    if not row:
+        print(f"  No contributors row for {TARGET_HANDLE} — nothing to reset.")
+        return
+
+    print(
+        f"  Current: {row['handle']} sourcer_count={row['sourcer_count']} "
+        f"claims_merged={row['claims_merged']}"
+    )
+    print(f"  Target:  sourcer_count={TRUTH_SOURCER_COUNT} claims_merged={TRUTH_CLAIMS_MERGED}")
+
+    if (row["sourcer_count"] == TRUTH_SOURCER_COUNT
+            and row["claims_merged"] == TRUTH_CLAIMS_MERGED):
+        print("  Already at target values — no-op.")
+        return
+
+    if args.dry_run:
+        print("  (dry-run) UPDATE would be applied. Re-run without --dry-run.")
+        return
+
+    conn.execute(
+        """UPDATE contributors SET
+            sourcer_count = ?,
+            claims_merged = ?,
+            updated_at = datetime('now')
+           WHERE handle = ?""",
+        (TRUTH_SOURCER_COUNT, TRUTH_CLAIMS_MERGED, TARGET_HANDLE),
+    )
+    conn.execute(
+        """INSERT INTO audit_log (stage, event, detail) VALUES (?, ?, ?)""",
+        (
+            "manual",
+            "m3taversal_sourcer_reset",
+            (
+                '{"reason":"Pre-Phase-A submitted_by fallback inflated to 1011; '
+                'file-truth is 21","sourcer_count_before":1011,'
+                '"sourcer_count_after":21,"claims_merged_after":21}'
+            ),
+        ),
+    )
+    conn.commit()
+
+    after = conn.execute(
+        "SELECT sourcer_count, claims_merged FROM contributors WHERE handle = ?",
+        (TARGET_HANDLE,),
+    ).fetchone()
+    print(
+        f"  Applied. Now: sourcer_count={after['sourcer_count']} "
+        f"claims_merged={after['claims_merged']}"
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_leaderboard.py
+++ b/tests/test_leaderboard.py
@ -112,11 +112,13 @@ def db_path(tmp_path):

    # (handle, role, weight, pr_number, claim_path, domain, timestamp)
    events = [
-        # alice — 3 author + 1 originator, recent
-        ("alice", "author", 0.30, 100, None, "internet-finance", "now,-1 days"),
+        # alice — 3 author + 1 originator, recent (all >24h ago, all <7d)
+        # Most-recent event at -2 days (not -1 days) so 24h window exclusion is
+        # unambiguous and not subject to fixture-vs-query microsecond drift.
+        ("alice", "author", 0.30, 100, None, "internet-finance", "now,-2 days"),
        ("alice", "author", 0.30, 101, None, "internet-finance", "now,-2 days"),
        ("alice", "author", 0.30, 102, None, "ai-alignment", "now,-3 days"),
-        ("alice", "originator", 0.15, 103, "domains/internet-finance/x.md", "internet-finance", "now,-1 days"),
+        ("alice", "originator", 0.15, 103, "domains/internet-finance/x.md", "internet-finance", "now,-2 days"),
        # bob — 5 author, all 60d ago (outside 30d, inside all_time)
        ("bob", "author", 0.30, 200, None, "ai-alignment", "now,-60 days"),
        ("bob", "author", 0.30, 201, None, "ai-alignment", "now,-60 days"),
@ -142,7 +144,8 @@ def db_path(tmp_path):
        ("cnbc", "originator", 0.15, 600, "domains/internet-finance/y.md", "internet-finance", "now,-5 days"),
        ("cnbc", "originator", 0.15, 601, "domains/internet-finance/z.md", "internet-finance", "now,-5 days"),
        # newhandle — handle in events but no contributors row (LEFT JOIN COALESCE → person)
-        ("newhandle", "author", 0.30, 700, None, "ai-alignment", "now,-1 days"),
+        # -2 days so 24h-window test exclusion is unambiguous (matches alice).
+        ("newhandle", "author", 0.30, 700, None, "ai-alignment", "now,-2 days"),
    ]
    for handle, role, weight, pr_num, claim_path, domain, ts_modifier in events:
        # Use SQLite datetime() to compute timestamps relative to "now" so tests
@ -171,7 +174,7 @@ def _call(db_path, **query):
    req = make_mocked_request("GET", f"/api/leaderboard?{qs}")
    # make_mocked_request gives us req.app — write db_path into it.
    req.app["db_path"] = db_path
-    response = asyncio.get_event_loop().run_until_complete(handle_leaderboard(req))
+    response = asyncio.run(handle_leaderboard(req))
    return json.loads(response.body.decode())


@ -283,14 +286,16 @@ class TestLeaderboardEndpoint:
        assert "bob" not in handles

    def test_window_24h_only_today(self, db_path):
-        """24h window picks up today's events only."""
+        """24h window picks up today's events only.
+
+        Default kind=person. Within 24h: only carol (events at 'now').
+        Excluded: alice/newhandle (events at -2 days), bob (-60d), rio/leo (kind),
+        cnbc (-5d AND kind=org).
+        """
        body = _call(db_path, window="24h")
        handles = [r["handle"] for r in body["leaderboard"]]
-        assert "carol" in handles  # today
-        # alice's events are 1-3d ago → outside 24h window
-        # leo's evaluator events are 'now' (today) — but kind=person excludes leo
-        assert "alice" not in handles or any(
-            e["last_contribution"] >= "0" for e in body["leaderboard"]  # not strict check
+        assert handles == ["carol"], (
+            "24h + kind=person should return only carol; got %r" % handles
        )

    def test_kind_agent(self, db_path):
@ -360,12 +365,10 @@ class TestLeaderboardEndpoint:
        assert body["total"] == 1

    def test_invalid_limit_falls_to_default(self, db_path):
-        """Defensive: garbage limit param falls to default 100."""
-        body = _call(db_path, limit="not-a-number")
-        # 7 entries < 100 → shown should be all of them under kind=all
-        body2 = _call(db_path, kind="all", limit="not-a-number")
-        assert body2["shown"] == 7
-        assert body2["has_more"] is False
+        """Defensive: garbage limit param falls to default 100. 7 entries < 100."""
+        body = _call(db_path, kind="all", limit="not-a-number")
+        assert body["shown"] == 7
+        assert body["has_more"] is False

    def test_limit_capped_at_500(self, db_path):
        """Defensive: limit > 500 silently caps at 500."""