Some checks are pending
CI / lint-and-test (push) Waiting to run
3 nits from review of d60b6f8 + Q4 ask:
1. test_window_24h_only_today: replace always-true assertion with
concrete `assert handles == ["carol"]`. Push alice's most-recent
event from -1 days to -2 days to eliminate fixture-vs-query
microsecond drift on the 24h boundary.
2. _call helper: asyncio.get_event_loop().run_until_complete →
asyncio.run (deprecation in 3.12, raises in some 3.14 contexts).
3. test_invalid_limit_falls_to_default: dead first call removed,
misleading "7 entries" comment now matches assertion.
Q4: scripts/reset-m3taversal-sourcer.py captures the surgical
UPDATE we ran on VPS as a reviewable artifact. Idempotent (no-op
on already-reset rows), audit_log entry per run. Ganymede's point:
DB mutations should leave a code paper trail, not just an audit
row whose origin lives only in the executor's memory.
30/30 tests pass on VPS hermes venv (aiohttp 3.13.5, py 3.11.15).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
437 lines
18 KiB
Python
437 lines
18 KiB
Python
"""Tests for /api/leaderboard endpoint (diagnostics/leaderboard_routes.py).
|
|
|
|
Locks behavior for the four slicings consumed by Argus + Oberon:
|
|
- window: all_time | Nd | Nh
|
|
- domain: per-domain filter
|
|
- kind: person | agent | org | all
|
|
- limit: pagination + has_more flag
|
|
|
|
Regression coverage includes the AND-prefix SQL bug (commit 42d35d4): _parse_window
|
|
returned clauses prefixed with 'AND ' which produced 'WHERE 1=1 AND AND ...' when
|
|
joined into the WHERE clause via " AND ".join(...).
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import sqlite3
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
# Skip whole file if aiohttp isn't available (matches test_activity_classify.py pattern)
|
|
aiohttp = pytest.importorskip("aiohttp")
|
|
|
|
# Make diagnostics/ importable
|
|
import sys
|
|
DIAG_ROOT = Path(__file__).parent.parent / "diagnostics"
|
|
sys.path.insert(0, str(DIAG_ROOT))
|
|
|
|
from leaderboard_routes import ( # noqa: E402
|
|
_parse_window,
|
|
handle_leaderboard,
|
|
KIND_VALUES,
|
|
LEADERBOARD_PUBLIC_PATHS,
|
|
)
|
|
from aiohttp.test_utils import make_mocked_request # noqa: E402
|
|
|
|
|
|
# ─── Schema lifted from lib/db.py:138-209 (v25 minimum) ──────────────────────
|
|
|
|
SCHEMA = """
|
|
CREATE TABLE contributors (
|
|
handle TEXT PRIMARY KEY,
|
|
kind TEXT DEFAULT 'person',
|
|
tier TEXT DEFAULT 'new',
|
|
claims_merged INTEGER DEFAULT 0,
|
|
sourcer_count INTEGER DEFAULT 0,
|
|
extractor_count INTEGER DEFAULT 0,
|
|
challenger_count INTEGER DEFAULT 0,
|
|
synthesizer_count INTEGER DEFAULT 0,
|
|
reviewer_count INTEGER DEFAULT 0,
|
|
challenges_survived INTEGER DEFAULT 0,
|
|
domains TEXT DEFAULT '[]',
|
|
first_contribution TEXT,
|
|
last_contribution TEXT
|
|
);
|
|
|
|
CREATE TABLE contribution_events (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
handle TEXT NOT NULL,
|
|
kind TEXT NOT NULL DEFAULT 'person',
|
|
role TEXT NOT NULL,
|
|
weight REAL NOT NULL,
|
|
pr_number INTEGER NOT NULL,
|
|
claim_path TEXT,
|
|
domain TEXT,
|
|
channel TEXT,
|
|
timestamp TEXT NOT NULL DEFAULT (datetime('now'))
|
|
);
|
|
CREATE UNIQUE INDEX idx_ce_unique_claim ON contribution_events(
|
|
handle, role, pr_number, claim_path
|
|
) WHERE claim_path IS NOT NULL;
|
|
CREATE UNIQUE INDEX idx_ce_unique_pr ON contribution_events(
|
|
handle, role, pr_number
|
|
) WHERE claim_path IS NULL;
|
|
"""
|
|
|
|
|
|
# ─── Fixtures ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.fixture
|
|
def db_path(tmp_path):
|
|
"""Seeded pipeline.db with deterministic events.
|
|
|
|
Cohort:
|
|
- alice (person): 3 author events, 1 originator (recent 3d, internet-finance)
|
|
- bob (person): 5 author events (older, 60d ago, ai-alignment)
|
|
- carol (person): 1 author + 1 evaluator (today, internet-finance)
|
|
- rio (agent): 4 author + 2 evaluator (mixed, internet-finance + grand-strategy)
|
|
- leo (agent): 8 evaluator events (today, mixed domains)
|
|
- cnbc (org): 2 originator events (legacy, before classifier moved orgs)
|
|
- newhandle (no contributors row): 1 author event — tests LEFT JOIN COALESCE
|
|
"""
|
|
p = tmp_path / "pipeline.db"
|
|
conn = sqlite3.connect(str(p))
|
|
conn.executescript(SCHEMA)
|
|
|
|
contribs = [
|
|
("alice", "person"),
|
|
("bob", "person"),
|
|
("carol", "person"),
|
|
("rio", "agent"),
|
|
("leo", "agent"),
|
|
("cnbc", "org"),
|
|
# newhandle intentionally absent — tests LEFT JOIN
|
|
]
|
|
for handle, kind in contribs:
|
|
conn.execute(
|
|
"INSERT INTO contributors (handle, kind) VALUES (?, ?)",
|
|
(handle, kind),
|
|
)
|
|
|
|
# (handle, role, weight, pr_number, claim_path, domain, timestamp)
|
|
events = [
|
|
# alice — 3 author + 1 originator, recent (all >24h ago, all <7d)
|
|
# Most-recent event at -2 days (not -1 days) so 24h window exclusion is
|
|
# unambiguous and not subject to fixture-vs-query microsecond drift.
|
|
("alice", "author", 0.30, 100, None, "internet-finance", "now,-2 days"),
|
|
("alice", "author", 0.30, 101, None, "internet-finance", "now,-2 days"),
|
|
("alice", "author", 0.30, 102, None, "ai-alignment", "now,-3 days"),
|
|
("alice", "originator", 0.15, 103, "domains/internet-finance/x.md", "internet-finance", "now,-2 days"),
|
|
# bob — 5 author, all 60d ago (outside 30d, inside all_time)
|
|
("bob", "author", 0.30, 200, None, "ai-alignment", "now,-60 days"),
|
|
("bob", "author", 0.30, 201, None, "ai-alignment", "now,-60 days"),
|
|
("bob", "author", 0.30, 202, None, "ai-alignment", "now,-61 days"),
|
|
("bob", "author", 0.30, 203, None, "ai-alignment", "now,-62 days"),
|
|
("bob", "author", 0.30, 204, None, "ai-alignment", "now,-63 days"),
|
|
# carol — 1 author + 1 evaluator, today
|
|
("carol", "author", 0.30, 300, None, "internet-finance", "now"),
|
|
("carol", "evaluator", 0.05, 301, None, "internet-finance", "now"),
|
|
# rio agent — 4 author + 2 evaluator
|
|
("rio", "author", 0.30, 400, None, "internet-finance", "now,-2 days"),
|
|
("rio", "author", 0.30, 401, None, "grand-strategy", "now,-2 days"),
|
|
("rio", "author", 0.30, 402, None, "internet-finance", "now,-2 days"),
|
|
("rio", "author", 0.30, 403, None, "internet-finance", "now,-2 days"),
|
|
("rio", "evaluator", 0.05, 404, None, "ai-alignment", "now,-2 days"),
|
|
("rio", "evaluator", 0.05, 405, None, "ai-alignment", "now,-2 days"),
|
|
# leo agent — 8 evaluator
|
|
*[
|
|
("leo", "evaluator", 0.05, 500 + i, None, "internet-finance" if i % 2 == 0 else "ai-alignment", "now")
|
|
for i in range(8)
|
|
],
|
|
# cnbc org — 2 originator (legacy data, kept by classifier+gate split)
|
|
("cnbc", "originator", 0.15, 600, "domains/internet-finance/y.md", "internet-finance", "now,-5 days"),
|
|
("cnbc", "originator", 0.15, 601, "domains/internet-finance/z.md", "internet-finance", "now,-5 days"),
|
|
# newhandle — handle in events but no contributors row (LEFT JOIN COALESCE → person)
|
|
# -2 days so 24h-window test exclusion is unambiguous (matches alice).
|
|
("newhandle", "author", 0.30, 700, None, "ai-alignment", "now,-2 days"),
|
|
]
|
|
for handle, role, weight, pr_num, claim_path, domain, ts_modifier in events:
|
|
# Use SQLite datetime() to compute timestamps relative to "now" so tests
|
|
# are deterministic across days. Multi-arg form: datetime('now', '-1 days').
|
|
ts_args = ts_modifier.split(",")
|
|
if len(ts_args) == 1:
|
|
ts_sql = f"datetime('{ts_args[0]}')"
|
|
else:
|
|
ts_sql = f"datetime('{ts_args[0]}', '{ts_args[1].strip()}')"
|
|
conn.execute(
|
|
f"""INSERT INTO contribution_events
|
|
(handle, kind, role, weight, pr_number, claim_path, domain, timestamp)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, {ts_sql})""",
|
|
(handle, "agent" if handle in {"rio", "leo"} else "person",
|
|
role, weight, pr_num, claim_path, domain),
|
|
)
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
return str(p)
|
|
|
|
|
|
def _call(db_path, **query):
|
|
"""Build a mocked request, call handle_leaderboard, return parsed JSON."""
|
|
qs = "&".join(f"{k}={v}" for k, v in query.items())
|
|
req = make_mocked_request("GET", f"/api/leaderboard?{qs}")
|
|
# make_mocked_request gives us req.app — write db_path into it.
|
|
req.app["db_path"] = db_path
|
|
response = asyncio.run(handle_leaderboard(req))
|
|
return json.loads(response.body.decode())
|
|
|
|
|
|
# ─── _parse_window unit tests ────────────────────────────────────────────────
|
|
|
|
|
|
class TestParseWindow:
|
|
def test_default_is_all_time(self):
|
|
clause, params, label = _parse_window(None)
|
|
assert clause == ""
|
|
assert params == ()
|
|
assert label == "all_time"
|
|
|
|
def test_explicit_all_time(self):
|
|
clause, params, label = _parse_window("all_time")
|
|
assert clause == ""
|
|
assert label == "all_time"
|
|
|
|
def test_seven_days(self):
|
|
clause, params, label = _parse_window("7d")
|
|
assert clause == "ce.timestamp >= datetime('now', ?)"
|
|
assert params == ("-7 days",)
|
|
assert label == "7d"
|
|
# Regression: must NOT begin with "AND " (handle_leaderboard composes via " AND ".join)
|
|
assert not clause.startswith("AND")
|
|
|
|
def test_thirty_days(self):
|
|
clause, params, label = _parse_window("30d")
|
|
assert params == ("-30 days",)
|
|
assert label == "30d"
|
|
|
|
def test_hours(self):
|
|
clause, params, label = _parse_window("24h")
|
|
assert clause == "ce.timestamp >= datetime('now', ?)"
|
|
assert params == ("-24 hours",)
|
|
assert label == "24h"
|
|
|
|
def test_caps_days_at_365(self):
|
|
clause, params, label = _parse_window("9999d")
|
|
assert params == ("-365 days",)
|
|
|
|
def test_caps_hours_at_8760(self):
|
|
clause, params, label = _parse_window("99999h")
|
|
assert params == ("-8760 hours",)
|
|
|
|
def test_garbage_falls_to_all_time(self):
|
|
clause, params, label = _parse_window("foobar")
|
|
assert clause == ""
|
|
assert label == "all_time"
|
|
|
|
def test_uppercase_normalized(self):
|
|
clause, params, label = _parse_window("7D")
|
|
assert label == "7d"
|
|
|
|
def test_zero_days_still_emits_clause(self):
|
|
# 0d means "now or later" — empty result, but parse should succeed
|
|
clause, params, label = _parse_window("0d")
|
|
assert "datetime" in clause
|
|
assert label == "0d"
|
|
|
|
|
|
# ─── handle_leaderboard integration tests ────────────────────────────────────
|
|
|
|
|
|
class TestLeaderboardEndpoint:
|
|
def test_all_time_default_kind_person(self, db_path):
|
|
"""Default kind is 'person'. Returns all persons, sorted by CI desc."""
|
|
body = _call(db_path)
|
|
assert body["window"] == "all_time"
|
|
assert body["kind_filter"] == "person"
|
|
assert body["domain"] is None
|
|
assert body["source"] == "contribution_events"
|
|
# alice 3*0.30 + 0.15 = 1.05
|
|
# bob 5*0.30 = 1.50
|
|
# carol 0.30 + 0.05 = 0.35
|
|
# newhandle 0.30 (LEFT JOIN COALESCE → 'person')
|
|
# cnbc excluded (kind='org')
|
|
# rio/leo excluded (kind='agent')
|
|
handles = [r["handle"] for r in body["leaderboard"]]
|
|
assert "bob" in handles
|
|
assert "alice" in handles
|
|
assert "newhandle" in handles, "LEFT JOIN COALESCE should default missing contributors to 'person'"
|
|
assert "cnbc" not in handles, "kind=person should exclude orgs"
|
|
assert "rio" not in handles, "kind=person should exclude agents"
|
|
# Descending by CI
|
|
cis = [r["ci"] for r in body["leaderboard"]]
|
|
assert cis == sorted(cis, reverse=True)
|
|
|
|
def test_window_7d_excludes_old_events(self, db_path):
|
|
"""REGRESSION: 7d window must execute (no AND-prefix SQL error).
|
|
|
|
Bob has all events 60d ago → must not appear in 7d window.
|
|
Alice has events 1-3d ago → must appear.
|
|
"""
|
|
body = _call(db_path, window="7d")
|
|
assert body["window"] == "7d"
|
|
handles = [r["handle"] for r in body["leaderboard"]]
|
|
assert "alice" in handles
|
|
assert "bob" not in handles, "60d-old events must be excluded from 7d window"
|
|
assert "carol" in handles # today
|
|
|
|
def test_window_30d_excludes_60d_events(self, db_path):
|
|
"""REGRESSION: 30d window must execute. Bob (60d) excluded; alice/carol included."""
|
|
body = _call(db_path, window="30d")
|
|
assert body["window"] == "30d"
|
|
handles = [r["handle"] for r in body["leaderboard"]]
|
|
assert "alice" in handles
|
|
assert "carol" in handles
|
|
assert "bob" not in handles
|
|
|
|
def test_window_24h_only_today(self, db_path):
|
|
"""24h window picks up today's events only.
|
|
|
|
Default kind=person. Within 24h: only carol (events at 'now').
|
|
Excluded: alice/newhandle (events at -2 days), bob (-60d), rio/leo (kind),
|
|
cnbc (-5d AND kind=org).
|
|
"""
|
|
body = _call(db_path, window="24h")
|
|
handles = [r["handle"] for r in body["leaderboard"]]
|
|
assert handles == ["carol"], (
|
|
"24h + kind=person should return only carol; got %r" % handles
|
|
)
|
|
|
|
def test_kind_agent(self, db_path):
|
|
"""kind=agent returns only agents."""
|
|
body = _call(db_path, kind="agent")
|
|
handles = [r["handle"] for r in body["leaderboard"]]
|
|
assert "rio" in handles
|
|
assert "leo" in handles
|
|
assert "alice" not in handles
|
|
assert "bob" not in handles
|
|
|
|
def test_kind_org(self, db_path):
|
|
"""kind=org returns only orgs (legacy events still queryable)."""
|
|
body = _call(db_path, kind="org")
|
|
handles = [r["handle"] for r in body["leaderboard"]]
|
|
assert handles == ["cnbc"]
|
|
assert body["leaderboard"][0]["ci"] == 0.30 # 2 * 0.15
|
|
|
|
def test_kind_all_returns_everyone(self, db_path):
|
|
"""kind=all returns all kinds — persons + agents + orgs."""
|
|
body = _call(db_path, kind="all")
|
|
handles = {r["handle"] for r in body["leaderboard"]}
|
|
assert handles == {"alice", "bob", "carol", "rio", "leo", "cnbc", "newhandle"}
|
|
|
|
def test_invalid_kind_falls_to_person(self, db_path):
|
|
"""Defensive: unknown kind value silently falls back to 'person'."""
|
|
body = _call(db_path, kind="bogus")
|
|
assert body["kind_filter"] == "person"
|
|
|
|
def test_domain_filter(self, db_path):
|
|
"""domain=internet-finance scopes events; kind filter still applies."""
|
|
body = _call(db_path, domain="internet-finance")
|
|
assert body["domain"] == "internet-finance"
|
|
handles = {r["handle"] for r in body["leaderboard"]}
|
|
# alice has 2 internet-finance authors + 1 originator
|
|
# carol has 1 internet-finance author + 1 evaluator
|
|
# bob has 0 (all ai-alignment)
|
|
# newhandle has 0 (ai-alignment only)
|
|
assert "alice" in handles
|
|
assert "carol" in handles
|
|
assert "bob" not in handles
|
|
assert "newhandle" not in handles
|
|
|
|
def test_composed_window_kind_domain(self, db_path):
|
|
"""REGRESSION: composed filters must build SQL correctly.
|
|
|
|
7d + person + internet-finance — alice only.
|
|
"""
|
|
body = _call(db_path, window="7d", kind="person", domain="internet-finance")
|
|
handles = [r["handle"] for r in body["leaderboard"]]
|
|
assert "alice" in handles
|
|
assert "carol" in handles
|
|
assert "bob" not in handles # excluded by 7d
|
|
assert "rio" not in handles # excluded by kind=person
|
|
|
|
def test_limit_caps_results(self, db_path):
|
|
"""limit caps the leaderboard slice; total reflects unfiltered count."""
|
|
body = _call(db_path, kind="all", limit=3)
|
|
assert body["shown"] == 3
|
|
assert body["has_more"] is True
|
|
assert body["total"] == 7
|
|
|
|
def test_no_has_more_when_under_limit(self, db_path):
|
|
body = _call(db_path, kind="org")
|
|
assert body["shown"] == 1
|
|
assert body["has_more"] is False
|
|
assert body["total"] == 1
|
|
|
|
def test_invalid_limit_falls_to_default(self, db_path):
|
|
"""Defensive: garbage limit param falls to default 100. 7 entries < 100."""
|
|
body = _call(db_path, kind="all", limit="not-a-number")
|
|
assert body["shown"] == 7
|
|
assert body["has_more"] is False
|
|
|
|
def test_limit_capped_at_500(self, db_path):
|
|
"""Defensive: limit > 500 silently caps at 500."""
|
|
body = _call(db_path, limit=99999, kind="all")
|
|
# No assertion on the value of the cap from the response — just that
|
|
# it doesn't error and shown <= 500.
|
|
assert body["shown"] <= 500
|
|
|
|
def test_role_breakdown_present(self, db_path):
|
|
"""Each row includes ci_breakdown with all 5 roles."""
|
|
body = _call(db_path)
|
|
for entry in body["leaderboard"]:
|
|
assert set(entry["ci_breakdown"].keys()) == {
|
|
"author", "challenger", "synthesizer", "originator", "evaluator",
|
|
}
|
|
|
|
def test_alice_role_breakdown_correct(self, db_path):
|
|
"""Alice has 3 author (0.90) + 1 originator (0.15) = 1.05 total."""
|
|
body = _call(db_path)
|
|
alice = next(r for r in body["leaderboard"] if r["handle"] == "alice")
|
|
assert alice["ci"] == 1.05
|
|
assert alice["ci_breakdown"]["author"] == 0.90
|
|
assert alice["ci_breakdown"]["originator"] == 0.15
|
|
assert alice["ci_breakdown"]["challenger"] == 0
|
|
assert alice["ci_breakdown"]["synthesizer"] == 0
|
|
assert alice["ci_breakdown"]["evaluator"] == 0
|
|
assert alice["events_count"] == 4
|
|
assert alice["pr_count"] == 4
|
|
assert alice["domain_count"] == 2 # internet-finance + ai-alignment
|
|
|
|
def test_empty_window_returns_clean_response(self, db_path):
|
|
"""Window with no matching events returns shape-correct empty response."""
|
|
# 24h window + kind=org → cnbc is 5d ago, so empty
|
|
body = _call(db_path, window="24h", kind="org")
|
|
assert body["leaderboard"] == []
|
|
assert body["total"] == 0
|
|
assert body["shown"] == 0
|
|
assert body["has_more"] is False
|
|
assert body["source"] == "contribution_events"
|
|
|
|
def test_left_join_handles_missing_contributors_row(self, db_path):
|
|
"""REGRESSION: handle in events but missing from contributors must default to kind='person'.
|
|
|
|
Catches the failure mode where a handle classified as cited (auto-create
|
|
deferred to Branch 3) accumulates events but has no contributors row yet.
|
|
"""
|
|
body = _call(db_path)
|
|
newhandle_row = next(
|
|
(r for r in body["leaderboard"] if r["handle"] == "newhandle"), None
|
|
)
|
|
assert newhandle_row is not None
|
|
assert newhandle_row["kind"] == "person"
|
|
assert newhandle_row["ci"] == 0.30
|
|
|
|
|
|
# ─── Public path constant (auth middleware bypass) ───────────────────────────
|
|
|
|
|
|
def test_public_paths_includes_leaderboard():
|
|
"""Auth middleware needs LEADERBOARD_PUBLIC_PATHS to skip API key for /api/leaderboard."""
|
|
assert "/api/leaderboard" in LEADERBOARD_PUBLIC_PATHS
|
|
|
|
|
|
def test_kind_values_matches_contract():
|
|
"""API contract: only these 4 kind values are accepted."""
|
|
assert set(KIND_VALUES) == {"person", "agent", "org", "all"}
|