teleo-infrastructure/tests/test_research_eval_schema_sql.py

from __future__ import annotations

import sqlite3
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parents[1]
GRAPH_SCHEMA_SQL = REPO_ROOT / "schemas" / "teleo-agent-graph-v1.sql"
RESEARCH_EVAL_SCHEMA_SQL = REPO_ROOT / "schemas" / "teleo-agent-research-eval-v1.sql"


def _conn() -> sqlite3.Connection:
    conn = sqlite3.connect(":memory:")
    conn.row_factory = sqlite3.Row
    conn.execute("PRAGMA foreign_keys = ON")
    conn.executescript(GRAPH_SCHEMA_SQL.read_text())
    conn.executescript(RESEARCH_EVAL_SCHEMA_SQL.read_text())
    return conn


def test_research_eval_schema_applies_after_graph_schema():
    conn = _conn()

    versions = {
        row["version"]
        for row in conn.execute("SELECT version FROM graph_schema_version").fetchall()
    }
    assert versions == {
        "teleo-agent-graph-v1",
        "teleo-agent-research-eval-v1",
    }

    tables = {
        row["name"]
        for row in conn.execute(
            "SELECT name FROM sqlite_master WHERE type = 'table'"
        ).fetchall()
    }
    assert {
        "agent_research_runs",
        "agent_tool_invocations",
        "agent_research_sources",
        "agent_eval_cases",
        "agent_eval_results",
        "work_order_graph_links",
    } <= tables


def test_ranger_liquidation_case_routes_to_source_backed_research_not_market_data():
    conn = _conn()
    conn.execute(
        "INSERT INTO agents (slug, display_name, archetype) VALUES ('leo', 'Leo', 'research agent')"
    )
    conn.execute(
        """INSERT INTO agent_eval_cases
           (id, suite_id, case_slug, prompt_sha256, prompt_excerpt, expected_route,
            expected_provider, must_use_tools_json, must_not_use_tools_json, tags_json, rubric_json)
           VALUES
           (
             'eval-ranger-liquidated-v1',
             'leo-research-routing-v1',
             'ranger-liquidated-not-fair-value',
             'sha256:ranger-prompt',
             'Is Ranger Finance fairly valued today given Ranger Finance is liquidated and gone?',
             'web_search',
             'agentcash-stableenrich-exa-search',
             '["source-backed web research"]',
             '["structured_market_data_only", "live_token_fair_value"]',
             '["ranger_liquidated", "valuation", "source_verification"]',
             '{"routing": "verify liquidation before valuation framing"}'
           )"""
    )
    conn.execute(
        """INSERT INTO agent_research_runs
           (id, agent_slug, source_surface, source_ref, request_kind, sponsored_work_order_id,
            payment_receipt_id, prompt_sha256, prompt_excerpt, selected_provider, selected_route,
            status, answer_sha256, answer_excerpt, proof_ref, cost_amount, latency_ms, source_count)
           VALUES
           (
             'run-ranger-liquidated-001',
             'leo',
             'telegram',
             'telegram:group:message-123',
             'paid_quote',
             'sponsored_work_orders:test-ranger-001',
             'payment_receipts:test-ranger-001',
             'sha256:ranger-prompt',
             'Is Ranger Finance fairly valued today given Ranger Finance is liquidated and gone?',
             'agentcash-stableenrich-exa-search',
             'web_search',
             'answered',
             'sha256:ranger-answer',
             'Verified liquidation/gone status before valuation framing.',
             'proof/leo-ranger-liquidated-routing.json',
             0.01,
             1240,
             3
           )"""
    )
    conn.executemany(
        """INSERT INTO agent_tool_invocations
           (id, research_run_id, sequence, provider, tool_name, tool_category, decision,
            decision_reason, paid, rail, network, amount, payment_receipt_id, input_sha256,
            output_sha256, source_count, latency_ms)
           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
        [
            (
                "tool-ranger-market-rejected",
                "run-ranger-liquidated-001",
                1,
                "DexScreener",
                "structured-market-context",
                "market_data",
                "rejected",
                "Ranger liquidation status must be verified before treating this as a live token valuation.",
                0,
                "free",
                None,
                0,
                None,
                "sha256:market-input",
                None,
                0,
                12,
            ),
            (
                "tool-ranger-web-selected",
                "run-ranger-liquidated-001",
                2,
                "AgentCash StableEnrich",
                "exa-search",
                "web_search",
                "executed",
                "Source-backed liquidation and status verification required.",
                1,
                "agentcash",
                "solana:5eykt4UsFv8P8NJdTREpY1vzqKqZKvdp",
                0.01,
                "payment_receipts:test-ranger-001",
                "sha256:exa-input",
                "sha256:exa-output",
                3,
                1228,
            ),
        ],
    )
    conn.executemany(
        """INSERT INTO agent_research_sources
           (id, research_run_id, tool_invocation_id, source_type, source_uri_sha256,
            title, cited, retrieval_rank, support_status)
           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
        [
            (
                "source-ranger-official",
                "run-ranger-liquidated-001",
                "tool-ranger-web-selected",
                "web",
                "sha256:ranger-official",
                "Ranger status source",
                1,
                1,
                "supports",
            ),
            (
                "source-ranger-forum",
                "run-ranger-liquidated-001",
                "tool-ranger-web-selected",
                "web",
                "sha256:ranger-forum",
                "MetaDAO/Ranger discussion source",
                1,
                2,
                "context",
            ),
        ],
    )
    conn.execute(
        """INSERT INTO graph_evaluation_runs
           (id, target_layer, target_id, trigger_type, evaluator, verdict, confidence, notes)
           VALUES
           (
             'graph-eval-ranger-routing',
             'claim',
             'ranger-liquidated-status',
             'manual',
             'leo-research-routing-benchmark',
             'approve',
             0.92,
             'Tool choice matched Ranger liquidation guard.'
           )"""
    )
    conn.execute(
        """INSERT INTO agent_eval_results
           (id, eval_case_id, research_run_id, graph_evaluation_run_id, status, score,
            routing_correct, tool_choice_score, source_quality_score, groundedness_score,
            freshness_score, cost_efficiency_score, safety_payment_score, proof_ref)
           VALUES
           (
             'eval-result-ranger-liquidated-001',
             'eval-ranger-liquidated-v1',
             'run-ranger-liquidated-001',
             'graph-eval-ranger-routing',
             'passed',
             0.94,
             1,
             1.0,
             0.9,
             0.9,
             0.85,
             0.8,
             1.0,
             'proof/leo-ranger-liquidated-routing.json'
           )"""
    )
    conn.execute(
        """INSERT INTO work_order_graph_links
           (id, sponsored_work_order_id, role, graph_layer, graph_id, rationale)
           VALUES
           (
             'wo-ranger-run-link',
             'sponsored_work_orders:test-ranger-001',
             'research_run',
             'agent_research_run',
             'run-ranger-liquidated-001',
             'Paid work order produced source-backed research run.'
           )"""
    )

    row = conn.execute(
        """SELECT
             r.selected_route,
             r.selected_provider,
             er.status AS eval_status,
             er.routing_correct,
             er.tool_choice_score,
             COUNT(s.id) AS cited_source_count
           FROM agent_research_runs r
           JOIN agent_eval_results er ON er.research_run_id = r.id
           LEFT JOIN agent_research_sources s ON s.research_run_id = r.id AND s.cited = 1
           WHERE r.id = 'run-ranger-liquidated-001'
           GROUP BY r.id, er.id"""
    ).fetchone()

    market_executed = conn.execute(
        """SELECT COUNT(*) AS n
           FROM agent_tool_invocations
           WHERE research_run_id = 'run-ranger-liquidated-001'
             AND tool_category = 'market_data'
             AND decision = 'executed'"""
    ).fetchone()["n"]
    rejected_market = conn.execute(
        """SELECT COUNT(*) AS n
           FROM agent_tool_invocations
           WHERE research_run_id = 'run-ranger-liquidated-001'
             AND tool_category = 'market_data'
             AND decision = 'rejected'"""
    ).fetchone()["n"]

    assert dict(row) == {
        "selected_route": "web_search",
        "selected_provider": "agentcash-stableenrich-exa-search",
        "eval_status": "passed",
        "routing_correct": 1,
        "tool_choice_score": 1.0,
        "cited_source_count": 2,
    }
    assert market_executed == 0
    assert rejected_market == 1


def test_schema_rejects_secret_flags_bad_scores_and_bad_tool_decisions():
    conn = _conn()
    conn.execute(
        "INSERT INTO agents (slug, display_name, archetype) VALUES ('leo', 'Leo', 'research agent')"
    )
    conn.execute(
        """INSERT INTO agent_research_runs
           (id, agent_slug, source_surface, request_kind, prompt_sha256, selected_route, status)
           VALUES ('run-constraints', 'leo', 'test', 'benchmark', 'sha256:prompt', 'web_search', 'answered')"""
    )
    conn.execute(
        """INSERT INTO agent_eval_cases
           (id, suite_id, case_slug, prompt_sha256, prompt_excerpt, expected_route)
           VALUES ('case-constraints', 'suite', 'case', 'sha256:prompt', 'redacted prompt', 'web_search')"""
    )

    invalid_statements = [
        """INSERT INTO agent_research_runs
           (id, agent_slug, source_surface, request_kind, prompt_sha256, selected_route, status, secret_values_included)
           VALUES ('run-secret', 'leo', 'test', 'benchmark', 'sha256:secret', 'web_search', 'answered', 1)""",
        """INSERT INTO agent_tool_invocations
           (id, research_run_id, provider, tool_name, tool_category, decision, decision_reason)
           VALUES ('tool-bad-decision', 'run-constraints', 'p', 't', 'web_search', 'approved', 'bad enum')""",
        """INSERT INTO agent_eval_results
           (id, eval_case_id, research_run_id, status, score)
           VALUES ('eval-bad-score', 'case-constraints', 'run-constraints', 'passed', 1.1)""",
        """INSERT INTO agent_eval_results
           (id, eval_case_id, research_run_id, status, routing_correct)
           VALUES ('eval-bad-bool', 'case-constraints', 'run-constraints', 'passed', 2)""",
    ]

    for statement in invalid_statements:
        try:
            conn.execute(statement)
        except sqlite3.IntegrityError:
            pass
        else:
            raise AssertionError(f"invalid statement unexpectedly passed: {statement}")


def test_research_run_can_be_recorded_without_raw_prompt_or_private_payloads():
    conn = _conn()
    conn.execute(
        "INSERT INTO agents (slug, display_name, archetype) VALUES ('leo', 'Leo', 'research agent')"
    )
    conn.execute(
        """INSERT INTO agent_research_runs
           (id, agent_slug, source_surface, source_ref, request_kind, prompt_sha256,
            selected_route, status, answer_sha256, proof_ref)
           VALUES
           (
             'run-hash-only',
             'leo',
             'api',
             'api:request-redacted',
             'paid_work_order',
             'sha256:prompt-only',
             'social_trends',
             'answered',
             'sha256:answer-only',
             'proof/hash-only.json'
           )"""
    )
    conn.execute(
        """INSERT INTO agent_tool_invocations
           (id, research_run_id, provider, tool_name, tool_category, decision,
            decision_reason, input_sha256, output_sha256)
           VALUES
           (
             'tool-hash-only',
             'run-hash-only',
             'AgentCash StableSocial',
             'lightreel-trends',
             'social_trends',
             'executed',
             'Question asks for current Twitter/X discussion.',
             'sha256:input-only',
             'sha256:output-only'
           )"""
    )

    row = conn.execute(
        """SELECT
             r.prompt_excerpt,
             r.answer_excerpt,
             r.secret_values_included AS run_secret_flag,
             t.secret_values_included AS tool_secret_flag
           FROM agent_research_runs r
           JOIN agent_tool_invocations t ON t.research_run_id = r.id
           WHERE r.id = 'run-hash-only'"""
    ).fetchone()

    assert row["prompt_excerpt"] is None
    assert row["answer_excerpt"] is None
    assert row["run_secret_flag"] == 0
    assert row["tool_secret_flag"] == 0