teleo-infrastructure/tests/test_leaderboard.py

"""Tests for /api/leaderboard endpoint (diagnostics/leaderboard_routes.py).

Locks behavior for the four slicings consumed by Argus + Oberon:
  - window: all_time | Nd | Nh
  - domain: per-domain filter
  - kind:   person | agent | org | all
  - limit:  pagination + has_more flag

Regression coverage includes the AND-prefix SQL bug (commit 42d35d4): _parse_window
returned clauses prefixed with 'AND ' which produced 'WHERE 1=1 AND AND ...' when
joined into the WHERE clause via " AND ".join(...).
"""

import asyncio
import json
import sqlite3
from pathlib import Path

import pytest

# Skip whole file if aiohttp isn't available (matches test_activity_classify.py pattern)
aiohttp = pytest.importorskip("aiohttp")

# Make diagnostics/ importable
import sys
DIAG_ROOT = Path(__file__).parent.parent / "diagnostics"
sys.path.insert(0, str(DIAG_ROOT))

from leaderboard_routes import (  # noqa: E402
    _parse_window,
    handle_leaderboard,
    KIND_VALUES,
    LEADERBOARD_PUBLIC_PATHS,
)
from aiohttp.test_utils import make_mocked_request  # noqa: E402


# ─── Schema lifted from lib/db.py:138-209 (v25 minimum) ──────────────────────

SCHEMA = """
CREATE TABLE contributors (
    handle TEXT PRIMARY KEY,
    kind TEXT DEFAULT 'person',
    tier TEXT DEFAULT 'new',
    claims_merged INTEGER DEFAULT 0,
    sourcer_count INTEGER DEFAULT 0,
    extractor_count INTEGER DEFAULT 0,
    challenger_count INTEGER DEFAULT 0,
    synthesizer_count INTEGER DEFAULT 0,
    reviewer_count INTEGER DEFAULT 0,
    challenges_survived INTEGER DEFAULT 0,
    domains TEXT DEFAULT '[]',
    first_contribution TEXT,
    last_contribution TEXT
);

CREATE TABLE contribution_events (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    handle TEXT NOT NULL,
    kind TEXT NOT NULL DEFAULT 'person',
    role TEXT NOT NULL,
    weight REAL NOT NULL,
    pr_number INTEGER NOT NULL,
    claim_path TEXT,
    domain TEXT,
    channel TEXT,
    timestamp TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE UNIQUE INDEX idx_ce_unique_claim ON contribution_events(
    handle, role, pr_number, claim_path
) WHERE claim_path IS NOT NULL;
CREATE UNIQUE INDEX idx_ce_unique_pr ON contribution_events(
    handle, role, pr_number
) WHERE claim_path IS NULL;
"""


# ─── Fixtures ────────────────────────────────────────────────────────────────


@pytest.fixture
def db_path(tmp_path):
    """Seeded pipeline.db with deterministic events.

    Cohort:
      - alice (person): 3 author events, 1 originator (recent 3d, internet-finance)
      - bob (person): 5 author events (older, 60d ago, ai-alignment)
      - carol (person): 1 author + 1 evaluator (today, internet-finance)
      - rio (agent): 4 author + 2 evaluator (mixed, internet-finance + grand-strategy)
      - leo (agent): 8 evaluator events (today, mixed domains)
      - cnbc (org): 2 originator events (legacy, before classifier moved orgs)
      - newhandle (no contributors row): 1 author event — tests LEFT JOIN COALESCE
    """
    p = tmp_path / "pipeline.db"
    conn = sqlite3.connect(str(p))
    conn.executescript(SCHEMA)

    contribs = [
        ("alice", "person"),
        ("bob", "person"),
        ("carol", "person"),
        ("rio", "agent"),
        ("leo", "agent"),
        ("cnbc", "org"),
        # newhandle intentionally absent — tests LEFT JOIN
    ]
    for handle, kind in contribs:
        conn.execute(
            "INSERT INTO contributors (handle, kind) VALUES (?, ?)",
            (handle, kind),
        )

    # (handle, role, weight, pr_number, claim_path, domain, timestamp)
    events = [
        # alice — 3 author + 1 originator, recent (all >24h ago, all <7d)
        # Most-recent event at -2 days (not -1 days) so 24h window exclusion is
        # unambiguous and not subject to fixture-vs-query microsecond drift.
        ("alice", "author", 0.30, 100, None, "internet-finance", "now,-2 days"),
        ("alice", "author", 0.30, 101, None, "internet-finance", "now,-2 days"),
        ("alice", "author", 0.30, 102, None, "ai-alignment", "now,-3 days"),
        ("alice", "originator", 0.15, 103, "domains/internet-finance/x.md", "internet-finance", "now,-2 days"),
        # bob — 5 author, all 60d ago (outside 30d, inside all_time)
        ("bob", "author", 0.30, 200, None, "ai-alignment", "now,-60 days"),
        ("bob", "author", 0.30, 201, None, "ai-alignment", "now,-60 days"),
        ("bob", "author", 0.30, 202, None, "ai-alignment", "now,-61 days"),
        ("bob", "author", 0.30, 203, None, "ai-alignment", "now,-62 days"),
        ("bob", "author", 0.30, 204, None, "ai-alignment", "now,-63 days"),
        # carol — 1 author + 1 evaluator, today
        ("carol", "author", 0.30, 300, None, "internet-finance", "now"),
        ("carol", "evaluator", 0.05, 301, None, "internet-finance", "now"),
        # rio agent — 4 author + 2 evaluator
        ("rio", "author", 0.30, 400, None, "internet-finance", "now,-2 days"),
        ("rio", "author", 0.30, 401, None, "grand-strategy", "now,-2 days"),
        ("rio", "author", 0.30, 402, None, "internet-finance", "now,-2 days"),
        ("rio", "author", 0.30, 403, None, "internet-finance", "now,-2 days"),
        ("rio", "evaluator", 0.05, 404, None, "ai-alignment", "now,-2 days"),
        ("rio", "evaluator", 0.05, 405, None, "ai-alignment", "now,-2 days"),
        # leo agent — 8 evaluator
        *[
            ("leo", "evaluator", 0.05, 500 + i, None, "internet-finance" if i % 2 == 0 else "ai-alignment", "now")
            for i in range(8)
        ],
        # cnbc org — 2 originator (legacy data, kept by classifier+gate split)
        ("cnbc", "originator", 0.15, 600, "domains/internet-finance/y.md", "internet-finance", "now,-5 days"),
        ("cnbc", "originator", 0.15, 601, "domains/internet-finance/z.md", "internet-finance", "now,-5 days"),
        # newhandle — handle in events but no contributors row (LEFT JOIN COALESCE → person)
        # -2 days so 24h-window test exclusion is unambiguous (matches alice).
        ("newhandle", "author", 0.30, 700, None, "ai-alignment", "now,-2 days"),
    ]
    for handle, role, weight, pr_num, claim_path, domain, ts_modifier in events:
        # Use SQLite datetime() to compute timestamps relative to "now" so tests
        # are deterministic across days. Multi-arg form: datetime('now', '-1 days').
        ts_args = ts_modifier.split(",")
        if len(ts_args) == 1:
            ts_sql = f"datetime('{ts_args[0]}')"
        else:
            ts_sql = f"datetime('{ts_args[0]}', '{ts_args[1].strip()}')"
        conn.execute(
            f"""INSERT INTO contribution_events
                (handle, kind, role, weight, pr_number, claim_path, domain, timestamp)
                VALUES (?, ?, ?, ?, ?, ?, ?, {ts_sql})""",
            (handle, "agent" if handle in {"rio", "leo"} else "person",
             role, weight, pr_num, claim_path, domain),
        )

    conn.commit()
    conn.close()
    return str(p)


def _call(db_path, **query):
    """Build a mocked request, call handle_leaderboard, return parsed JSON."""
    qs = "&".join(f"{k}={v}" for k, v in query.items())
    req = make_mocked_request("GET", f"/api/leaderboard?{qs}")
    # make_mocked_request gives us req.app — write db_path into it.
    req.app["db_path"] = db_path
    response = asyncio.run(handle_leaderboard(req))
    return json.loads(response.body.decode())


# ─── _parse_window unit tests ────────────────────────────────────────────────


class TestParseWindow:
    def test_default_is_all_time(self):
        clause, params, label = _parse_window(None)
        assert clause == ""
        assert params == ()
        assert label == "all_time"

    def test_explicit_all_time(self):
        clause, params, label = _parse_window("all_time")
        assert clause == ""
        assert label == "all_time"

    def test_seven_days(self):
        clause, params, label = _parse_window("7d")
        assert clause == "ce.timestamp >= datetime('now', ?)"
        assert params == ("-7 days",)
        assert label == "7d"
        # Regression: must NOT begin with "AND " (handle_leaderboard composes via " AND ".join)
        assert not clause.startswith("AND")

    def test_thirty_days(self):
        clause, params, label = _parse_window("30d")
        assert params == ("-30 days",)
        assert label == "30d"

    def test_hours(self):
        clause, params, label = _parse_window("24h")
        assert clause == "ce.timestamp >= datetime('now', ?)"
        assert params == ("-24 hours",)
        assert label == "24h"

    def test_caps_days_at_365(self):
        clause, params, label = _parse_window("9999d")
        assert params == ("-365 days",)

    def test_caps_hours_at_8760(self):
        clause, params, label = _parse_window("99999h")
        assert params == ("-8760 hours",)

    def test_garbage_falls_to_all_time(self):
        clause, params, label = _parse_window("foobar")
        assert clause == ""
        assert label == "all_time"

    def test_uppercase_normalized(self):
        clause, params, label = _parse_window("7D")
        assert label == "7d"

    def test_zero_days_still_emits_clause(self):
        # 0d means "now or later" — empty result, but parse should succeed
        clause, params, label = _parse_window("0d")
        assert "datetime" in clause
        assert label == "0d"


# ─── handle_leaderboard integration tests ────────────────────────────────────


class TestLeaderboardEndpoint:
    def test_all_time_default_kind_person(self, db_path):
        """Default kind is 'person'. Returns all persons, sorted by CI desc."""
        body = _call(db_path)
        assert body["window"] == "all_time"
        assert body["kind_filter"] == "person"
        assert body["domain"] is None
        assert body["source"] == "contribution_events"
        # alice 3*0.30 + 0.15 = 1.05
        # bob 5*0.30 = 1.50
        # carol 0.30 + 0.05 = 0.35
        # newhandle 0.30 (LEFT JOIN COALESCE → 'person')
        # cnbc excluded (kind='org')
        # rio/leo excluded (kind='agent')
        handles = [r["handle"] for r in body["leaderboard"]]
        assert "bob" in handles
        assert "alice" in handles
        assert "newhandle" in handles, "LEFT JOIN COALESCE should default missing contributors to 'person'"
        assert "cnbc" not in handles, "kind=person should exclude orgs"
        assert "rio" not in handles, "kind=person should exclude agents"
        # Descending by CI
        cis = [r["ci"] for r in body["leaderboard"]]
        assert cis == sorted(cis, reverse=True)

    def test_window_7d_excludes_old_events(self, db_path):
        """REGRESSION: 7d window must execute (no AND-prefix SQL error).

        Bob has all events 60d ago → must not appear in 7d window.
        Alice has events 1-3d ago → must appear.
        """
        body = _call(db_path, window="7d")
        assert body["window"] == "7d"
        handles = [r["handle"] for r in body["leaderboard"]]
        assert "alice" in handles
        assert "bob" not in handles, "60d-old events must be excluded from 7d window"
        assert "carol" in handles  # today

    def test_window_30d_excludes_60d_events(self, db_path):
        """REGRESSION: 30d window must execute. Bob (60d) excluded; alice/carol included."""
        body = _call(db_path, window="30d")
        assert body["window"] == "30d"
        handles = [r["handle"] for r in body["leaderboard"]]
        assert "alice" in handles
        assert "carol" in handles
        assert "bob" not in handles

    def test_window_24h_only_today(self, db_path):
        """24h window picks up today's events only.

        Default kind=person. Within 24h: only carol (events at 'now').
        Excluded: alice/newhandle (events at -2 days), bob (-60d), rio/leo (kind),
        cnbc (-5d AND kind=org).
        """
        body = _call(db_path, window="24h")
        handles = [r["handle"] for r in body["leaderboard"]]
        assert handles == ["carol"], (
            "24h + kind=person should return only carol; got %r" % handles
        )

    def test_kind_agent(self, db_path):
        """kind=agent returns only agents."""
        body = _call(db_path, kind="agent")
        handles = [r["handle"] for r in body["leaderboard"]]
        assert "rio" in handles
        assert "leo" in handles
        assert "alice" not in handles
        assert "bob" not in handles

    def test_kind_org(self, db_path):
        """kind=org returns only orgs (legacy events still queryable)."""
        body = _call(db_path, kind="org")
        handles = [r["handle"] for r in body["leaderboard"]]
        assert handles == ["cnbc"]
        assert body["leaderboard"][0]["ci"] == 0.30  # 2 * 0.15

    def test_kind_all_returns_everyone(self, db_path):
        """kind=all returns all kinds — persons + agents + orgs."""
        body = _call(db_path, kind="all")
        handles = {r["handle"] for r in body["leaderboard"]}
        assert handles == {"alice", "bob", "carol", "rio", "leo", "cnbc", "newhandle"}

    def test_invalid_kind_falls_to_person(self, db_path):
        """Defensive: unknown kind value silently falls back to 'person'."""
        body = _call(db_path, kind="bogus")
        assert body["kind_filter"] == "person"

    def test_domain_filter(self, db_path):
        """domain=internet-finance scopes events; kind filter still applies."""
        body = _call(db_path, domain="internet-finance")
        assert body["domain"] == "internet-finance"
        handles = {r["handle"] for r in body["leaderboard"]}
        # alice has 2 internet-finance authors + 1 originator
        # carol has 1 internet-finance author + 1 evaluator
        # bob has 0 (all ai-alignment)
        # newhandle has 0 (ai-alignment only)
        assert "alice" in handles
        assert "carol" in handles
        assert "bob" not in handles
        assert "newhandle" not in handles

    def test_composed_window_kind_domain(self, db_path):
        """REGRESSION: composed filters must build SQL correctly.

        7d + person + internet-finance — alice only.
        """
        body = _call(db_path, window="7d", kind="person", domain="internet-finance")
        handles = [r["handle"] for r in body["leaderboard"]]
        assert "alice" in handles
        assert "carol" in handles
        assert "bob" not in handles  # excluded by 7d
        assert "rio" not in handles  # excluded by kind=person

    def test_limit_caps_results(self, db_path):
        """limit caps the leaderboard slice; total reflects unfiltered count."""
        body = _call(db_path, kind="all", limit=3)
        assert body["shown"] == 3
        assert body["has_more"] is True
        assert body["total"] == 7

    def test_no_has_more_when_under_limit(self, db_path):
        body = _call(db_path, kind="org")
        assert body["shown"] == 1
        assert body["has_more"] is False
        assert body["total"] == 1

    def test_invalid_limit_falls_to_default(self, db_path):
        """Defensive: garbage limit param falls to default 100. 7 entries < 100."""
        body = _call(db_path, kind="all", limit="not-a-number")
        assert body["shown"] == 7
        assert body["has_more"] is False

    def test_limit_capped_at_500(self, db_path):
        """Defensive: limit > 500 silently caps at 500."""
        body = _call(db_path, limit=99999, kind="all")
        # No assertion on the value of the cap from the response — just that
        # it doesn't error and shown <= 500.
        assert body["shown"] <= 500

    def test_role_breakdown_present(self, db_path):
        """Each row includes ci_breakdown with all 5 roles."""
        body = _call(db_path)
        for entry in body["leaderboard"]:
            assert set(entry["ci_breakdown"].keys()) == {
                "author", "challenger", "synthesizer", "originator", "evaluator",
            }

    def test_alice_role_breakdown_correct(self, db_path):
        """Alice has 3 author (0.90) + 1 originator (0.15) = 1.05 total."""
        body = _call(db_path)
        alice = next(r for r in body["leaderboard"] if r["handle"] == "alice")
        assert alice["ci"] == 1.05
        assert alice["ci_breakdown"]["author"] == 0.90
        assert alice["ci_breakdown"]["originator"] == 0.15
        assert alice["ci_breakdown"]["challenger"] == 0
        assert alice["ci_breakdown"]["synthesizer"] == 0
        assert alice["ci_breakdown"]["evaluator"] == 0
        assert alice["events_count"] == 4
        assert alice["pr_count"] == 4
        assert alice["domain_count"] == 2  # internet-finance + ai-alignment

    def test_empty_window_returns_clean_response(self, db_path):
        """Window with no matching events returns shape-correct empty response."""
        # 24h window + kind=org → cnbc is 5d ago, so empty
        body = _call(db_path, window="24h", kind="org")
        assert body["leaderboard"] == []
        assert body["total"] == 0
        assert body["shown"] == 0
        assert body["has_more"] is False
        assert body["source"] == "contribution_events"

    def test_left_join_handles_missing_contributors_row(self, db_path):
        """REGRESSION: handle in events but missing from contributors must default to kind='person'.

        Catches the failure mode where a handle classified as cited (auto-create
        deferred to Branch 3) accumulates events but has no contributors row yet.
        """
        body = _call(db_path)
        newhandle_row = next(
            (r for r in body["leaderboard"] if r["handle"] == "newhandle"), None
        )
        assert newhandle_row is not None
        assert newhandle_row["kind"] == "person"
        assert newhandle_row["ci"] == 0.30


# ─── Public path constant (auth middleware bypass) ───────────────────────────


def test_public_paths_includes_leaderboard():
    """Auth middleware needs LEADERBOARD_PUBLIC_PATHS to skip API key for /api/leaderboard."""
    assert "/api/leaderboard" in LEADERBOARD_PUBLIC_PATHS


def test_kind_values_matches_contract():
    """API contract: only these 4 kind values are accepted."""
    assert set(KIND_VALUES) == {"person", "agent", "org", "all"}