fix(tests): apply Ganymede review nits + add m3taversal reset script
Some checks are pending
CI / lint-and-test (push) Waiting to run
Some checks are pending
CI / lint-and-test (push) Waiting to run
3 nits from review of d60b6f8 + Q4 ask:
1. test_window_24h_only_today: replace always-true assertion with
concrete `assert handles == ["carol"]`. Push alice's most-recent
event from -1 days to -2 days to eliminate fixture-vs-query
microsecond drift on the 24h boundary.
2. _call helper: asyncio.get_event_loop().run_until_complete →
asyncio.run (deprecation in 3.12, raises in some 3.14 contexts).
3. test_invalid_limit_falls_to_default: dead first call removed,
misleading "7 entries" comment now matches assertion.
Q4: scripts/reset-m3taversal-sourcer.py captures the surgical
UPDATE we ran on VPS as a reviewable artifact. Idempotent (no-op
on already-reset rows), audit_log entry per run. Ganymede's point:
DB mutations should leave a code paper trail, not just an audit
row whose origin lives only in the executor's memory.
30/30 tests pass on VPS hermes venv (aiohttp 3.13.5, py 3.11.15).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d60b6f8bf2
commit
1351db70a9
2 changed files with 128 additions and 17 deletions
108
scripts/reset-m3taversal-sourcer.py
Normal file
108
scripts/reset-m3taversal-sourcer.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Reset m3taversal.sourcer_count from inflated legacy value to file-truth count.
|
||||
|
||||
Background: pre-Phase-A extract.py had a `submitted_by` fallback that credited
|
||||
m3taversal as sourcer for every Telegram-ingested source, accumulating to 1011
|
||||
sourcer_count in the contributors table. The actual file-truth count (sourcer
|
||||
frontmatter equal to "m3taversal" in claim files) is 21. The 990-row delta is
|
||||
infrastructure attribution that doesn't reflect content authorship.
|
||||
|
||||
The Phase A event-sourced ledger (contribution_events) computed the correct
|
||||
389.55 CI from author events; /api/leaderboard reads from there directly.
|
||||
But the legacy /api/contributors endpoint reads contributors.claims_merged
|
||||
which carries the inflated 1011. Until that endpoint is deprecated, the
|
||||
divergence shows two different numbers depending on which surface the UI
|
||||
queries.
|
||||
|
||||
This script applies the surgical UPDATE that was run on VPS on 2026-04-27
|
||||
during the leaderboard cutover. Committed as a script per Ganymede review:
|
||||
"DB mutations go through reviewable code paths matters more than the
|
||||
convenience of one-shot SQL. The artifact explains what was done and why."
|
||||
|
||||
Idempotent — safe to re-run. If sourcer_count is already 21, no change.
|
||||
|
||||
Usage:
|
||||
python3 scripts/reset-m3taversal-sourcer.py --dry-run
|
||||
python3 scripts/reset-m3taversal-sourcer.py
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
|
||||
TARGET_HANDLE = "m3taversal"
|
||||
TRUTH_SOURCER_COUNT = 21
|
||||
TRUTH_CLAIMS_MERGED = 21
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not Path(DB_PATH).exists():
|
||||
print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
conn = sqlite3.connect(DB_PATH, timeout=30)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT handle, sourcer_count, claims_merged FROM contributors WHERE handle = ?",
|
||||
(TARGET_HANDLE,),
|
||||
).fetchone()
|
||||
if not row:
|
||||
print(f" No contributors row for {TARGET_HANDLE} — nothing to reset.")
|
||||
return
|
||||
|
||||
print(
|
||||
f" Current: {row['handle']} sourcer_count={row['sourcer_count']} "
|
||||
f"claims_merged={row['claims_merged']}"
|
||||
)
|
||||
print(f" Target: sourcer_count={TRUTH_SOURCER_COUNT} claims_merged={TRUTH_CLAIMS_MERGED}")
|
||||
|
||||
if (row["sourcer_count"] == TRUTH_SOURCER_COUNT
|
||||
and row["claims_merged"] == TRUTH_CLAIMS_MERGED):
|
||||
print(" Already at target values — no-op.")
|
||||
return
|
||||
|
||||
if args.dry_run:
|
||||
print(" (dry-run) UPDATE would be applied. Re-run without --dry-run.")
|
||||
return
|
||||
|
||||
conn.execute(
|
||||
"""UPDATE contributors SET
|
||||
sourcer_count = ?,
|
||||
claims_merged = ?,
|
||||
updated_at = datetime('now')
|
||||
WHERE handle = ?""",
|
||||
(TRUTH_SOURCER_COUNT, TRUTH_CLAIMS_MERGED, TARGET_HANDLE),
|
||||
)
|
||||
conn.execute(
|
||||
"""INSERT INTO audit_log (stage, event, detail) VALUES (?, ?, ?)""",
|
||||
(
|
||||
"manual",
|
||||
"m3taversal_sourcer_reset",
|
||||
(
|
||||
'{"reason":"Pre-Phase-A submitted_by fallback inflated to 1011; '
|
||||
'file-truth is 21","sourcer_count_before":1011,'
|
||||
'"sourcer_count_after":21,"claims_merged_after":21}'
|
||||
),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
after = conn.execute(
|
||||
"SELECT sourcer_count, claims_merged FROM contributors WHERE handle = ?",
|
||||
(TARGET_HANDLE,),
|
||||
).fetchone()
|
||||
print(
|
||||
f" Applied. Now: sourcer_count={after['sourcer_count']} "
|
||||
f"claims_merged={after['claims_merged']}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -112,11 +112,13 @@ def db_path(tmp_path):
|
|||
|
||||
# (handle, role, weight, pr_number, claim_path, domain, timestamp)
|
||||
events = [
|
||||
# alice — 3 author + 1 originator, recent
|
||||
("alice", "author", 0.30, 100, None, "internet-finance", "now,-1 days"),
|
||||
# alice — 3 author + 1 originator, recent (all >24h ago, all <7d)
|
||||
# Most-recent event at -2 days (not -1 days) so 24h window exclusion is
|
||||
# unambiguous and not subject to fixture-vs-query microsecond drift.
|
||||
("alice", "author", 0.30, 100, None, "internet-finance", "now,-2 days"),
|
||||
("alice", "author", 0.30, 101, None, "internet-finance", "now,-2 days"),
|
||||
("alice", "author", 0.30, 102, None, "ai-alignment", "now,-3 days"),
|
||||
("alice", "originator", 0.15, 103, "domains/internet-finance/x.md", "internet-finance", "now,-1 days"),
|
||||
("alice", "originator", 0.15, 103, "domains/internet-finance/x.md", "internet-finance", "now,-2 days"),
|
||||
# bob — 5 author, all 60d ago (outside 30d, inside all_time)
|
||||
("bob", "author", 0.30, 200, None, "ai-alignment", "now,-60 days"),
|
||||
("bob", "author", 0.30, 201, None, "ai-alignment", "now,-60 days"),
|
||||
|
|
@ -142,7 +144,8 @@ def db_path(tmp_path):
|
|||
("cnbc", "originator", 0.15, 600, "domains/internet-finance/y.md", "internet-finance", "now,-5 days"),
|
||||
("cnbc", "originator", 0.15, 601, "domains/internet-finance/z.md", "internet-finance", "now,-5 days"),
|
||||
# newhandle — handle in events but no contributors row (LEFT JOIN COALESCE → person)
|
||||
("newhandle", "author", 0.30, 700, None, "ai-alignment", "now,-1 days"),
|
||||
# -2 days so 24h-window test exclusion is unambiguous (matches alice).
|
||||
("newhandle", "author", 0.30, 700, None, "ai-alignment", "now,-2 days"),
|
||||
]
|
||||
for handle, role, weight, pr_num, claim_path, domain, ts_modifier in events:
|
||||
# Use SQLite datetime() to compute timestamps relative to "now" so tests
|
||||
|
|
@ -171,7 +174,7 @@ def _call(db_path, **query):
|
|||
req = make_mocked_request("GET", f"/api/leaderboard?{qs}")
|
||||
# make_mocked_request gives us req.app — write db_path into it.
|
||||
req.app["db_path"] = db_path
|
||||
response = asyncio.get_event_loop().run_until_complete(handle_leaderboard(req))
|
||||
response = asyncio.run(handle_leaderboard(req))
|
||||
return json.loads(response.body.decode())
|
||||
|
||||
|
||||
|
|
@ -283,14 +286,16 @@ class TestLeaderboardEndpoint:
|
|||
assert "bob" not in handles
|
||||
|
||||
def test_window_24h_only_today(self, db_path):
|
||||
"""24h window picks up today's events only."""
|
||||
"""24h window picks up today's events only.
|
||||
|
||||
Default kind=person. Within 24h: only carol (events at 'now').
|
||||
Excluded: alice/newhandle (events at -2 days), bob (-60d), rio/leo (kind),
|
||||
cnbc (-5d AND kind=org).
|
||||
"""
|
||||
body = _call(db_path, window="24h")
|
||||
handles = [r["handle"] for r in body["leaderboard"]]
|
||||
assert "carol" in handles # today
|
||||
# alice's events are 1-3d ago → outside 24h window
|
||||
# leo's evaluator events are 'now' (today) — but kind=person excludes leo
|
||||
assert "alice" not in handles or any(
|
||||
e["last_contribution"] >= "0" for e in body["leaderboard"] # not strict check
|
||||
assert handles == ["carol"], (
|
||||
"24h + kind=person should return only carol; got %r" % handles
|
||||
)
|
||||
|
||||
def test_kind_agent(self, db_path):
|
||||
|
|
@ -360,12 +365,10 @@ class TestLeaderboardEndpoint:
|
|||
assert body["total"] == 1
|
||||
|
||||
def test_invalid_limit_falls_to_default(self, db_path):
|
||||
"""Defensive: garbage limit param falls to default 100."""
|
||||
body = _call(db_path, limit="not-a-number")
|
||||
# 7 entries < 100 → shown should be all of them under kind=all
|
||||
body2 = _call(db_path, kind="all", limit="not-a-number")
|
||||
assert body2["shown"] == 7
|
||||
assert body2["has_more"] is False
|
||||
"""Defensive: garbage limit param falls to default 100. 7 entries < 100."""
|
||||
body = _call(db_path, kind="all", limit="not-a-number")
|
||||
assert body["shown"] == 7
|
||||
assert body["has_more"] is False
|
||||
|
||||
def test_limit_capped_at_500(self, db_path):
|
||||
"""Defensive: limit > 500 silently caps at 500."""
|
||||
|
|
|
|||
Loading…
Reference in a new issue