teleo-infrastructure/tests/test_leaderboard.py
m3taversal 1351db70a9
Some checks are pending
CI / lint-and-test (push) Waiting to run
fix(tests): apply Ganymede review nits + add m3taversal reset script
3 nits from review of d60b6f8 + Q4 ask:

1. test_window_24h_only_today: replace always-true assertion with
   concrete `assert handles == ["carol"]`. Push alice's most-recent
   event from -1 days to -2 days to eliminate fixture-vs-query
   microsecond drift on the 24h boundary.
2. _call helper: asyncio.get_event_loop().run_until_complete →
   asyncio.run (deprecation in 3.12, raises in some 3.14 contexts).
3. test_invalid_limit_falls_to_default: dead first call removed,
   misleading "7 entries" comment now matches assertion.

Q4: scripts/reset-m3taversal-sourcer.py captures the surgical
UPDATE we ran on VPS as a reviewable artifact. Idempotent (no-op
on already-reset rows), audit_log entry per run. Ganymede's point:
DB mutations should leave a code paper trail, not just an audit
row whose origin lives only in the executor's memory.

30/30 tests pass on VPS hermes venv (aiohttp 3.13.5, py 3.11.15).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 17:35:18 +01:00

437 lines
18 KiB
Python

"""Tests for /api/leaderboard endpoint (diagnostics/leaderboard_routes.py).
Locks behavior for the four slicings consumed by Argus + Oberon:
- window: all_time | Nd | Nh
- domain: per-domain filter
- kind: person | agent | org | all
- limit: pagination + has_more flag
Regression coverage includes the AND-prefix SQL bug (commit 42d35d4): _parse_window
returned clauses prefixed with 'AND ' which produced 'WHERE 1=1 AND AND ...' when
joined into the WHERE clause via " AND ".join(...).
"""
import asyncio
import json
import sqlite3
from pathlib import Path
import pytest
# Skip whole file if aiohttp isn't available (matches test_activity_classify.py pattern)
aiohttp = pytest.importorskip("aiohttp")
# Make diagnostics/ importable
import sys
DIAG_ROOT = Path(__file__).parent.parent / "diagnostics"
sys.path.insert(0, str(DIAG_ROOT))
from leaderboard_routes import ( # noqa: E402
_parse_window,
handle_leaderboard,
KIND_VALUES,
LEADERBOARD_PUBLIC_PATHS,
)
from aiohttp.test_utils import make_mocked_request # noqa: E402
# ─── Schema lifted from lib/db.py:138-209 (v25 minimum) ──────────────────────
SCHEMA = """
CREATE TABLE contributors (
handle TEXT PRIMARY KEY,
kind TEXT DEFAULT 'person',
tier TEXT DEFAULT 'new',
claims_merged INTEGER DEFAULT 0,
sourcer_count INTEGER DEFAULT 0,
extractor_count INTEGER DEFAULT 0,
challenger_count INTEGER DEFAULT 0,
synthesizer_count INTEGER DEFAULT 0,
reviewer_count INTEGER DEFAULT 0,
challenges_survived INTEGER DEFAULT 0,
domains TEXT DEFAULT '[]',
first_contribution TEXT,
last_contribution TEXT
);
CREATE TABLE contribution_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
handle TEXT NOT NULL,
kind TEXT NOT NULL DEFAULT 'person',
role TEXT NOT NULL,
weight REAL NOT NULL,
pr_number INTEGER NOT NULL,
claim_path TEXT,
domain TEXT,
channel TEXT,
timestamp TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE UNIQUE INDEX idx_ce_unique_claim ON contribution_events(
handle, role, pr_number, claim_path
) WHERE claim_path IS NOT NULL;
CREATE UNIQUE INDEX idx_ce_unique_pr ON contribution_events(
handle, role, pr_number
) WHERE claim_path IS NULL;
"""
# ─── Fixtures ────────────────────────────────────────────────────────────────
@pytest.fixture
def db_path(tmp_path):
"""Seeded pipeline.db with deterministic events.
Cohort:
- alice (person): 3 author events, 1 originator (recent 3d, internet-finance)
- bob (person): 5 author events (older, 60d ago, ai-alignment)
- carol (person): 1 author + 1 evaluator (today, internet-finance)
- rio (agent): 4 author + 2 evaluator (mixed, internet-finance + grand-strategy)
- leo (agent): 8 evaluator events (today, mixed domains)
- cnbc (org): 2 originator events (legacy, before classifier moved orgs)
- newhandle (no contributors row): 1 author event — tests LEFT JOIN COALESCE
"""
p = tmp_path / "pipeline.db"
conn = sqlite3.connect(str(p))
conn.executescript(SCHEMA)
contribs = [
("alice", "person"),
("bob", "person"),
("carol", "person"),
("rio", "agent"),
("leo", "agent"),
("cnbc", "org"),
# newhandle intentionally absent — tests LEFT JOIN
]
for handle, kind in contribs:
conn.execute(
"INSERT INTO contributors (handle, kind) VALUES (?, ?)",
(handle, kind),
)
# (handle, role, weight, pr_number, claim_path, domain, timestamp)
events = [
# alice — 3 author + 1 originator, recent (all >24h ago, all <7d)
# Most-recent event at -2 days (not -1 days) so 24h window exclusion is
# unambiguous and not subject to fixture-vs-query microsecond drift.
("alice", "author", 0.30, 100, None, "internet-finance", "now,-2 days"),
("alice", "author", 0.30, 101, None, "internet-finance", "now,-2 days"),
("alice", "author", 0.30, 102, None, "ai-alignment", "now,-3 days"),
("alice", "originator", 0.15, 103, "domains/internet-finance/x.md", "internet-finance", "now,-2 days"),
# bob — 5 author, all 60d ago (outside 30d, inside all_time)
("bob", "author", 0.30, 200, None, "ai-alignment", "now,-60 days"),
("bob", "author", 0.30, 201, None, "ai-alignment", "now,-60 days"),
("bob", "author", 0.30, 202, None, "ai-alignment", "now,-61 days"),
("bob", "author", 0.30, 203, None, "ai-alignment", "now,-62 days"),
("bob", "author", 0.30, 204, None, "ai-alignment", "now,-63 days"),
# carol — 1 author + 1 evaluator, today
("carol", "author", 0.30, 300, None, "internet-finance", "now"),
("carol", "evaluator", 0.05, 301, None, "internet-finance", "now"),
# rio agent — 4 author + 2 evaluator
("rio", "author", 0.30, 400, None, "internet-finance", "now,-2 days"),
("rio", "author", 0.30, 401, None, "grand-strategy", "now,-2 days"),
("rio", "author", 0.30, 402, None, "internet-finance", "now,-2 days"),
("rio", "author", 0.30, 403, None, "internet-finance", "now,-2 days"),
("rio", "evaluator", 0.05, 404, None, "ai-alignment", "now,-2 days"),
("rio", "evaluator", 0.05, 405, None, "ai-alignment", "now,-2 days"),
# leo agent — 8 evaluator
*[
("leo", "evaluator", 0.05, 500 + i, None, "internet-finance" if i % 2 == 0 else "ai-alignment", "now")
for i in range(8)
],
# cnbc org — 2 originator (legacy data, kept by classifier+gate split)
("cnbc", "originator", 0.15, 600, "domains/internet-finance/y.md", "internet-finance", "now,-5 days"),
("cnbc", "originator", 0.15, 601, "domains/internet-finance/z.md", "internet-finance", "now,-5 days"),
# newhandle — handle in events but no contributors row (LEFT JOIN COALESCE → person)
# -2 days so 24h-window test exclusion is unambiguous (matches alice).
("newhandle", "author", 0.30, 700, None, "ai-alignment", "now,-2 days"),
]
for handle, role, weight, pr_num, claim_path, domain, ts_modifier in events:
# Use SQLite datetime() to compute timestamps relative to "now" so tests
# are deterministic across days. Multi-arg form: datetime('now', '-1 days').
ts_args = ts_modifier.split(",")
if len(ts_args) == 1:
ts_sql = f"datetime('{ts_args[0]}')"
else:
ts_sql = f"datetime('{ts_args[0]}', '{ts_args[1].strip()}')"
conn.execute(
f"""INSERT INTO contribution_events
(handle, kind, role, weight, pr_number, claim_path, domain, timestamp)
VALUES (?, ?, ?, ?, ?, ?, ?, {ts_sql})""",
(handle, "agent" if handle in {"rio", "leo"} else "person",
role, weight, pr_num, claim_path, domain),
)
conn.commit()
conn.close()
return str(p)
def _call(db_path, **query):
"""Build a mocked request, call handle_leaderboard, return parsed JSON."""
qs = "&".join(f"{k}={v}" for k, v in query.items())
req = make_mocked_request("GET", f"/api/leaderboard?{qs}")
# make_mocked_request gives us req.app — write db_path into it.
req.app["db_path"] = db_path
response = asyncio.run(handle_leaderboard(req))
return json.loads(response.body.decode())
# ─── _parse_window unit tests ────────────────────────────────────────────────
class TestParseWindow:
def test_default_is_all_time(self):
clause, params, label = _parse_window(None)
assert clause == ""
assert params == ()
assert label == "all_time"
def test_explicit_all_time(self):
clause, params, label = _parse_window("all_time")
assert clause == ""
assert label == "all_time"
def test_seven_days(self):
clause, params, label = _parse_window("7d")
assert clause == "ce.timestamp >= datetime('now', ?)"
assert params == ("-7 days",)
assert label == "7d"
# Regression: must NOT begin with "AND " (handle_leaderboard composes via " AND ".join)
assert not clause.startswith("AND")
def test_thirty_days(self):
clause, params, label = _parse_window("30d")
assert params == ("-30 days",)
assert label == "30d"
def test_hours(self):
clause, params, label = _parse_window("24h")
assert clause == "ce.timestamp >= datetime('now', ?)"
assert params == ("-24 hours",)
assert label == "24h"
def test_caps_days_at_365(self):
clause, params, label = _parse_window("9999d")
assert params == ("-365 days",)
def test_caps_hours_at_8760(self):
clause, params, label = _parse_window("99999h")
assert params == ("-8760 hours",)
def test_garbage_falls_to_all_time(self):
clause, params, label = _parse_window("foobar")
assert clause == ""
assert label == "all_time"
def test_uppercase_normalized(self):
clause, params, label = _parse_window("7D")
assert label == "7d"
def test_zero_days_still_emits_clause(self):
# 0d means "now or later" — empty result, but parse should succeed
clause, params, label = _parse_window("0d")
assert "datetime" in clause
assert label == "0d"
# ─── handle_leaderboard integration tests ────────────────────────────────────
class TestLeaderboardEndpoint:
def test_all_time_default_kind_person(self, db_path):
"""Default kind is 'person'. Returns all persons, sorted by CI desc."""
body = _call(db_path)
assert body["window"] == "all_time"
assert body["kind_filter"] == "person"
assert body["domain"] is None
assert body["source"] == "contribution_events"
# alice 3*0.30 + 0.15 = 1.05
# bob 5*0.30 = 1.50
# carol 0.30 + 0.05 = 0.35
# newhandle 0.30 (LEFT JOIN COALESCE → 'person')
# cnbc excluded (kind='org')
# rio/leo excluded (kind='agent')
handles = [r["handle"] for r in body["leaderboard"]]
assert "bob" in handles
assert "alice" in handles
assert "newhandle" in handles, "LEFT JOIN COALESCE should default missing contributors to 'person'"
assert "cnbc" not in handles, "kind=person should exclude orgs"
assert "rio" not in handles, "kind=person should exclude agents"
# Descending by CI
cis = [r["ci"] for r in body["leaderboard"]]
assert cis == sorted(cis, reverse=True)
def test_window_7d_excludes_old_events(self, db_path):
"""REGRESSION: 7d window must execute (no AND-prefix SQL error).
Bob has all events 60d ago → must not appear in 7d window.
Alice has events 1-3d ago → must appear.
"""
body = _call(db_path, window="7d")
assert body["window"] == "7d"
handles = [r["handle"] for r in body["leaderboard"]]
assert "alice" in handles
assert "bob" not in handles, "60d-old events must be excluded from 7d window"
assert "carol" in handles # today
def test_window_30d_excludes_60d_events(self, db_path):
"""REGRESSION: 30d window must execute. Bob (60d) excluded; alice/carol included."""
body = _call(db_path, window="30d")
assert body["window"] == "30d"
handles = [r["handle"] for r in body["leaderboard"]]
assert "alice" in handles
assert "carol" in handles
assert "bob" not in handles
def test_window_24h_only_today(self, db_path):
"""24h window picks up today's events only.
Default kind=person. Within 24h: only carol (events at 'now').
Excluded: alice/newhandle (events at -2 days), bob (-60d), rio/leo (kind),
cnbc (-5d AND kind=org).
"""
body = _call(db_path, window="24h")
handles = [r["handle"] for r in body["leaderboard"]]
assert handles == ["carol"], (
"24h + kind=person should return only carol; got %r" % handles
)
def test_kind_agent(self, db_path):
"""kind=agent returns only agents."""
body = _call(db_path, kind="agent")
handles = [r["handle"] for r in body["leaderboard"]]
assert "rio" in handles
assert "leo" in handles
assert "alice" not in handles
assert "bob" not in handles
def test_kind_org(self, db_path):
"""kind=org returns only orgs (legacy events still queryable)."""
body = _call(db_path, kind="org")
handles = [r["handle"] for r in body["leaderboard"]]
assert handles == ["cnbc"]
assert body["leaderboard"][0]["ci"] == 0.30 # 2 * 0.15
def test_kind_all_returns_everyone(self, db_path):
"""kind=all returns all kinds — persons + agents + orgs."""
body = _call(db_path, kind="all")
handles = {r["handle"] for r in body["leaderboard"]}
assert handles == {"alice", "bob", "carol", "rio", "leo", "cnbc", "newhandle"}
def test_invalid_kind_falls_to_person(self, db_path):
"""Defensive: unknown kind value silently falls back to 'person'."""
body = _call(db_path, kind="bogus")
assert body["kind_filter"] == "person"
def test_domain_filter(self, db_path):
"""domain=internet-finance scopes events; kind filter still applies."""
body = _call(db_path, domain="internet-finance")
assert body["domain"] == "internet-finance"
handles = {r["handle"] for r in body["leaderboard"]}
# alice has 2 internet-finance authors + 1 originator
# carol has 1 internet-finance author + 1 evaluator
# bob has 0 (all ai-alignment)
# newhandle has 0 (ai-alignment only)
assert "alice" in handles
assert "carol" in handles
assert "bob" not in handles
assert "newhandle" not in handles
def test_composed_window_kind_domain(self, db_path):
"""REGRESSION: composed filters must build SQL correctly.
7d + person + internet-finance — alice only.
"""
body = _call(db_path, window="7d", kind="person", domain="internet-finance")
handles = [r["handle"] for r in body["leaderboard"]]
assert "alice" in handles
assert "carol" in handles
assert "bob" not in handles # excluded by 7d
assert "rio" not in handles # excluded by kind=person
def test_limit_caps_results(self, db_path):
"""limit caps the leaderboard slice; total reflects unfiltered count."""
body = _call(db_path, kind="all", limit=3)
assert body["shown"] == 3
assert body["has_more"] is True
assert body["total"] == 7
def test_no_has_more_when_under_limit(self, db_path):
body = _call(db_path, kind="org")
assert body["shown"] == 1
assert body["has_more"] is False
assert body["total"] == 1
def test_invalid_limit_falls_to_default(self, db_path):
"""Defensive: garbage limit param falls to default 100. 7 entries < 100."""
body = _call(db_path, kind="all", limit="not-a-number")
assert body["shown"] == 7
assert body["has_more"] is False
def test_limit_capped_at_500(self, db_path):
"""Defensive: limit > 500 silently caps at 500."""
body = _call(db_path, limit=99999, kind="all")
# No assertion on the value of the cap from the response — just that
# it doesn't error and shown <= 500.
assert body["shown"] <= 500
def test_role_breakdown_present(self, db_path):
"""Each row includes ci_breakdown with all 5 roles."""
body = _call(db_path)
for entry in body["leaderboard"]:
assert set(entry["ci_breakdown"].keys()) == {
"author", "challenger", "synthesizer", "originator", "evaluator",
}
def test_alice_role_breakdown_correct(self, db_path):
"""Alice has 3 author (0.90) + 1 originator (0.15) = 1.05 total."""
body = _call(db_path)
alice = next(r for r in body["leaderboard"] if r["handle"] == "alice")
assert alice["ci"] == 1.05
assert alice["ci_breakdown"]["author"] == 0.90
assert alice["ci_breakdown"]["originator"] == 0.15
assert alice["ci_breakdown"]["challenger"] == 0
assert alice["ci_breakdown"]["synthesizer"] == 0
assert alice["ci_breakdown"]["evaluator"] == 0
assert alice["events_count"] == 4
assert alice["pr_count"] == 4
assert alice["domain_count"] == 2 # internet-finance + ai-alignment
def test_empty_window_returns_clean_response(self, db_path):
"""Window with no matching events returns shape-correct empty response."""
# 24h window + kind=org → cnbc is 5d ago, so empty
body = _call(db_path, window="24h", kind="org")
assert body["leaderboard"] == []
assert body["total"] == 0
assert body["shown"] == 0
assert body["has_more"] is False
assert body["source"] == "contribution_events"
def test_left_join_handles_missing_contributors_row(self, db_path):
"""REGRESSION: handle in events but missing from contributors must default to kind='person'.
Catches the failure mode where a handle classified as cited (auto-create
deferred to Branch 3) accumulates events but has no contributors row yet.
"""
body = _call(db_path)
newhandle_row = next(
(r for r in body["leaderboard"] if r["handle"] == "newhandle"), None
)
assert newhandle_row is not None
assert newhandle_row["kind"] == "person"
assert newhandle_row["ci"] == 0.30
# ─── Public path constant (auth middleware bypass) ───────────────────────────
def test_public_paths_includes_leaderboard():
"""Auth middleware needs LEADERBOARD_PUBLIC_PATHS to skip API key for /api/leaderboard."""
assert "/api/leaderboard" in LEADERBOARD_PUBLIC_PATHS
def test_kind_values_matches_contract():
"""API contract: only these 4 kind values are accepted."""
assert set(KIND_VALUES) == {"person", "agent", "org", "all"}