from __future__ import annotations import sqlite3 from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] GRAPH_SCHEMA_SQL = REPO_ROOT / "schemas" / "teleo-agent-graph-v1.sql" RESEARCH_EVAL_SCHEMA_SQL = REPO_ROOT / "schemas" / "teleo-agent-research-eval-v1.sql" def _conn() -> sqlite3.Connection: conn = sqlite3.connect(":memory:") conn.row_factory = sqlite3.Row conn.execute("PRAGMA foreign_keys = ON") conn.executescript(GRAPH_SCHEMA_SQL.read_text()) conn.executescript(RESEARCH_EVAL_SCHEMA_SQL.read_text()) return conn def test_research_eval_schema_applies_after_graph_schema(): conn = _conn() versions = { row["version"] for row in conn.execute("SELECT version FROM graph_schema_version").fetchall() } assert versions == { "teleo-agent-graph-v1", "teleo-agent-research-eval-v1", } tables = { row["name"] for row in conn.execute( "SELECT name FROM sqlite_master WHERE type = 'table'" ).fetchall() } assert { "agent_research_runs", "agent_tool_invocations", "agent_research_sources", "agent_eval_cases", "agent_eval_results", "work_order_graph_links", } <= tables def test_ranger_liquidation_case_routes_to_source_backed_research_not_market_data(): conn = _conn() conn.execute( "INSERT INTO agents (slug, display_name, archetype) VALUES ('leo', 'Leo', 'research agent')" ) conn.execute( """INSERT INTO agent_eval_cases (id, suite_id, case_slug, prompt_sha256, prompt_excerpt, expected_route, expected_provider, must_use_tools_json, must_not_use_tools_json, tags_json, rubric_json) VALUES ( 'eval-ranger-liquidated-v1', 'leo-research-routing-v1', 'ranger-liquidated-not-fair-value', 'sha256:ranger-prompt', 'Is Ranger Finance fairly valued today given Ranger Finance is liquidated and gone?', 'web_search', 'agentcash-stableenrich-exa-search', '["source-backed web research"]', '["structured_market_data_only", "live_token_fair_value"]', '["ranger_liquidated", "valuation", "source_verification"]', '{"routing": "verify liquidation before valuation framing"}' )""" ) conn.execute( """INSERT INTO agent_research_runs (id, agent_slug, source_surface, source_ref, request_kind, sponsored_work_order_id, payment_receipt_id, prompt_sha256, prompt_excerpt, selected_provider, selected_route, status, answer_sha256, answer_excerpt, proof_ref, cost_amount, latency_ms, source_count) VALUES ( 'run-ranger-liquidated-001', 'leo', 'telegram', 'telegram:group:message-123', 'paid_quote', 'sponsored_work_orders:test-ranger-001', 'payment_receipts:test-ranger-001', 'sha256:ranger-prompt', 'Is Ranger Finance fairly valued today given Ranger Finance is liquidated and gone?', 'agentcash-stableenrich-exa-search', 'web_search', 'answered', 'sha256:ranger-answer', 'Verified liquidation/gone status before valuation framing.', 'proof/leo-ranger-liquidated-routing.json', 0.01, 1240, 3 )""" ) conn.executemany( """INSERT INTO agent_tool_invocations (id, research_run_id, sequence, provider, tool_name, tool_category, decision, decision_reason, paid, rail, network, amount, payment_receipt_id, input_sha256, output_sha256, source_count, latency_ms) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", [ ( "tool-ranger-market-rejected", "run-ranger-liquidated-001", 1, "DexScreener", "structured-market-context", "market_data", "rejected", "Ranger liquidation status must be verified before treating this as a live token valuation.", 0, "free", None, 0, None, "sha256:market-input", None, 0, 12, ), ( "tool-ranger-web-selected", "run-ranger-liquidated-001", 2, "AgentCash StableEnrich", "exa-search", "web_search", "executed", "Source-backed liquidation and status verification required.", 1, "agentcash", "solana:5eykt4UsFv8P8NJdTREpY1vzqKqZKvdp", 0.01, "payment_receipts:test-ranger-001", "sha256:exa-input", "sha256:exa-output", 3, 1228, ), ], ) conn.executemany( """INSERT INTO agent_research_sources (id, research_run_id, tool_invocation_id, source_type, source_uri_sha256, title, cited, retrieval_rank, support_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", [ ( "source-ranger-official", "run-ranger-liquidated-001", "tool-ranger-web-selected", "web", "sha256:ranger-official", "Ranger status source", 1, 1, "supports", ), ( "source-ranger-forum", "run-ranger-liquidated-001", "tool-ranger-web-selected", "web", "sha256:ranger-forum", "MetaDAO/Ranger discussion source", 1, 2, "context", ), ], ) conn.execute( """INSERT INTO graph_evaluation_runs (id, target_layer, target_id, trigger_type, evaluator, verdict, confidence, notes) VALUES ( 'graph-eval-ranger-routing', 'claim', 'ranger-liquidated-status', 'manual', 'leo-research-routing-benchmark', 'approve', 0.92, 'Tool choice matched Ranger liquidation guard.' )""" ) conn.execute( """INSERT INTO agent_eval_results (id, eval_case_id, research_run_id, graph_evaluation_run_id, status, score, routing_correct, tool_choice_score, source_quality_score, groundedness_score, freshness_score, cost_efficiency_score, safety_payment_score, proof_ref) VALUES ( 'eval-result-ranger-liquidated-001', 'eval-ranger-liquidated-v1', 'run-ranger-liquidated-001', 'graph-eval-ranger-routing', 'passed', 0.94, 1, 1.0, 0.9, 0.9, 0.85, 0.8, 1.0, 'proof/leo-ranger-liquidated-routing.json' )""" ) conn.execute( """INSERT INTO work_order_graph_links (id, sponsored_work_order_id, role, graph_layer, graph_id, rationale) VALUES ( 'wo-ranger-run-link', 'sponsored_work_orders:test-ranger-001', 'research_run', 'agent_research_run', 'run-ranger-liquidated-001', 'Paid work order produced source-backed research run.' )""" ) row = conn.execute( """SELECT r.selected_route, r.selected_provider, er.status AS eval_status, er.routing_correct, er.tool_choice_score, COUNT(s.id) AS cited_source_count FROM agent_research_runs r JOIN agent_eval_results er ON er.research_run_id = r.id LEFT JOIN agent_research_sources s ON s.research_run_id = r.id AND s.cited = 1 WHERE r.id = 'run-ranger-liquidated-001' GROUP BY r.id, er.id""" ).fetchone() market_executed = conn.execute( """SELECT COUNT(*) AS n FROM agent_tool_invocations WHERE research_run_id = 'run-ranger-liquidated-001' AND tool_category = 'market_data' AND decision = 'executed'""" ).fetchone()["n"] rejected_market = conn.execute( """SELECT COUNT(*) AS n FROM agent_tool_invocations WHERE research_run_id = 'run-ranger-liquidated-001' AND tool_category = 'market_data' AND decision = 'rejected'""" ).fetchone()["n"] assert dict(row) == { "selected_route": "web_search", "selected_provider": "agentcash-stableenrich-exa-search", "eval_status": "passed", "routing_correct": 1, "tool_choice_score": 1.0, "cited_source_count": 2, } assert market_executed == 0 assert rejected_market == 1 def test_schema_rejects_secret_flags_bad_scores_and_bad_tool_decisions(): conn = _conn() conn.execute( "INSERT INTO agents (slug, display_name, archetype) VALUES ('leo', 'Leo', 'research agent')" ) conn.execute( """INSERT INTO agent_research_runs (id, agent_slug, source_surface, request_kind, prompt_sha256, selected_route, status) VALUES ('run-constraints', 'leo', 'test', 'benchmark', 'sha256:prompt', 'web_search', 'answered')""" ) conn.execute( """INSERT INTO agent_eval_cases (id, suite_id, case_slug, prompt_sha256, prompt_excerpt, expected_route) VALUES ('case-constraints', 'suite', 'case', 'sha256:prompt', 'redacted prompt', 'web_search')""" ) invalid_statements = [ """INSERT INTO agent_research_runs (id, agent_slug, source_surface, request_kind, prompt_sha256, selected_route, status, secret_values_included) VALUES ('run-secret', 'leo', 'test', 'benchmark', 'sha256:secret', 'web_search', 'answered', 1)""", """INSERT INTO agent_tool_invocations (id, research_run_id, provider, tool_name, tool_category, decision, decision_reason) VALUES ('tool-bad-decision', 'run-constraints', 'p', 't', 'web_search', 'approved', 'bad enum')""", """INSERT INTO agent_eval_results (id, eval_case_id, research_run_id, status, score) VALUES ('eval-bad-score', 'case-constraints', 'run-constraints', 'passed', 1.1)""", """INSERT INTO agent_eval_results (id, eval_case_id, research_run_id, status, routing_correct) VALUES ('eval-bad-bool', 'case-constraints', 'run-constraints', 'passed', 2)""", ] for statement in invalid_statements: try: conn.execute(statement) except sqlite3.IntegrityError: pass else: raise AssertionError(f"invalid statement unexpectedly passed: {statement}") def test_research_run_can_be_recorded_without_raw_prompt_or_private_payloads(): conn = _conn() conn.execute( "INSERT INTO agents (slug, display_name, archetype) VALUES ('leo', 'Leo', 'research agent')" ) conn.execute( """INSERT INTO agent_research_runs (id, agent_slug, source_surface, source_ref, request_kind, prompt_sha256, selected_route, status, answer_sha256, proof_ref) VALUES ( 'run-hash-only', 'leo', 'api', 'api:request-redacted', 'paid_work_order', 'sha256:prompt-only', 'social_trends', 'answered', 'sha256:answer-only', 'proof/hash-only.json' )""" ) conn.execute( """INSERT INTO agent_tool_invocations (id, research_run_id, provider, tool_name, tool_category, decision, decision_reason, input_sha256, output_sha256) VALUES ( 'tool-hash-only', 'run-hash-only', 'AgentCash StableSocial', 'lightreel-trends', 'social_trends', 'executed', 'Question asks for current Twitter/X discussion.', 'sha256:input-only', 'sha256:output-only' )""" ) row = conn.execute( """SELECT r.prompt_excerpt, r.answer_excerpt, r.secret_values_included AS run_secret_flag, t.secret_values_included AS tool_secret_flag FROM agent_research_runs r JOIN agent_tool_invocations t ON t.research_run_id = r.id WHERE r.id = 'run-hash-only'""" ).fetchone() assert row["prompt_excerpt"] is None assert row["answer_excerpt"] is None assert row["run_secret_flag"] == 0 assert row["tool_secret_flag"] == 0