Adds graph schema prerequisite plus research-eval schema/docs/tests for Leo tool-use benchmarks and x402 research telemetry. Validated by full local pytest and green CI.
247 lines
10 KiB
SQL
247 lines
10 KiB
SQL
-- Teleo Agent Research Eval Schema v1
|
|
-- Common SQL subset intended for ephemeral SQLite tests and Postgres/Supabase
|
|
-- staging. IDs are app-generated text IDs so this can run across engines.
|
|
--
|
|
-- Apply after teleo-agent-graph-v1.sql.
|
|
--
|
|
-- Secret policy: store hashes, redacted excerpts, and proof references only.
|
|
-- Raw prompts, bearer tokens, API keys, wallet secrets, and private receipts do
|
|
-- not belong in these tables.
|
|
|
|
INSERT OR IGNORE INTO graph_schema_version (version, source)
|
|
VALUES ('teleo-agent-research-eval-v1', 'leo-x402-research-routing-benchmark');
|
|
|
|
CREATE TABLE IF NOT EXISTS agent_research_runs (
|
|
id TEXT PRIMARY KEY,
|
|
agent_slug TEXT NOT NULL REFERENCES agents(slug),
|
|
source_surface TEXT NOT NULL
|
|
CHECK(source_surface IN ('telegram', 'api', 'checkout', 'web', 'cli', 'test', 'other')),
|
|
source_ref TEXT,
|
|
request_kind TEXT NOT NULL DEFAULT 'free'
|
|
CHECK(request_kind IN ('free', 'paid_quote', 'paid_work_order', 'benchmark', 'system')),
|
|
sponsored_work_order_id TEXT,
|
|
payment_receipt_id TEXT,
|
|
prompt_sha256 TEXT NOT NULL,
|
|
prompt_excerpt TEXT,
|
|
selected_provider TEXT,
|
|
selected_route TEXT NOT NULL DEFAULT 'unknown'
|
|
CHECK(selected_route IN (
|
|
'none',
|
|
'web_search',
|
|
'social_trends',
|
|
'structured_market_data',
|
|
'local_context',
|
|
'mixed',
|
|
'unknown'
|
|
)),
|
|
status TEXT NOT NULL DEFAULT 'running'
|
|
CHECK(status IN (
|
|
'quoted',
|
|
'payment_pending',
|
|
'running',
|
|
'answered',
|
|
'abstained',
|
|
'blocked',
|
|
'failed',
|
|
'cancelled'
|
|
)),
|
|
answer_sha256 TEXT,
|
|
answer_excerpt TEXT,
|
|
proof_ref TEXT,
|
|
cost_amount REAL NOT NULL DEFAULT 0 CHECK(cost_amount >= 0),
|
|
currency TEXT NOT NULL DEFAULT 'USDC',
|
|
latency_ms INTEGER CHECK(latency_ms IS NULL OR latency_ms >= 0),
|
|
source_count INTEGER NOT NULL DEFAULT 0 CHECK(source_count >= 0),
|
|
secret_values_included INTEGER NOT NULL DEFAULT 0 CHECK(secret_values_included = 0),
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
completed_at TEXT,
|
|
CHECK(prompt_excerpt IS NULL OR length(prompt_excerpt) <= 1000),
|
|
CHECK(answer_excerpt IS NULL OR length(answer_excerpt) <= 2000)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_agent_research_runs_agent_created
|
|
ON agent_research_runs(agent_slug, created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_research_runs_work_order
|
|
ON agent_research_runs(sponsored_work_order_id);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_research_runs_status_route
|
|
ON agent_research_runs(status, selected_route);
|
|
|
|
CREATE TABLE IF NOT EXISTS agent_tool_invocations (
|
|
id TEXT PRIMARY KEY,
|
|
research_run_id TEXT NOT NULL REFERENCES agent_research_runs(id) ON DELETE CASCADE,
|
|
sequence INTEGER NOT NULL DEFAULT 0 CHECK(sequence >= 0),
|
|
provider TEXT NOT NULL,
|
|
tool_name TEXT NOT NULL,
|
|
tool_category TEXT NOT NULL
|
|
CHECK(tool_category IN (
|
|
'web_search',
|
|
'social_trends',
|
|
'market_data',
|
|
'page_read',
|
|
'x402_checkout',
|
|
'agentcash',
|
|
'faremeter',
|
|
'database',
|
|
'local_context',
|
|
'other'
|
|
)),
|
|
endpoint_host TEXT,
|
|
endpoint_hash TEXT,
|
|
decision TEXT NOT NULL
|
|
CHECK(decision IN ('candidate', 'selected', 'executed', 'skipped', 'rejected', 'fallback', 'failed')),
|
|
decision_reason TEXT NOT NULL,
|
|
paid INTEGER NOT NULL DEFAULT 0 CHECK(paid IN (0, 1)),
|
|
rail TEXT CHECK(rail IS NULL OR rail IN ('x402', 'agentcash', 'manual', 'free', 'other')),
|
|
network TEXT,
|
|
amount REAL CHECK(amount IS NULL OR amount >= 0),
|
|
currency TEXT NOT NULL DEFAULT 'USDC',
|
|
payment_receipt_id TEXT,
|
|
input_sha256 TEXT,
|
|
output_sha256 TEXT,
|
|
source_count INTEGER NOT NULL DEFAULT 0 CHECK(source_count >= 0),
|
|
latency_ms INTEGER CHECK(latency_ms IS NULL OR latency_ms >= 0),
|
|
error_class TEXT,
|
|
secret_values_included INTEGER NOT NULL DEFAULT 0 CHECK(secret_values_included = 0),
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(research_run_id, sequence)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_agent_tool_invocations_run_decision
|
|
ON agent_tool_invocations(research_run_id, decision);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_tool_invocations_provider_category
|
|
ON agent_tool_invocations(provider, tool_category);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_tool_invocations_receipt
|
|
ON agent_tool_invocations(payment_receipt_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS agent_research_sources (
|
|
id TEXT PRIMARY KEY,
|
|
research_run_id TEXT NOT NULL REFERENCES agent_research_runs(id) ON DELETE CASCADE,
|
|
tool_invocation_id TEXT REFERENCES agent_tool_invocations(id) ON DELETE SET NULL,
|
|
source_type TEXT NOT NULL
|
|
CHECK(source_type IN ('web', 'social', 'market', 'db', 'document', 'other')),
|
|
source_uri TEXT,
|
|
source_uri_sha256 TEXT,
|
|
title TEXT,
|
|
cited INTEGER NOT NULL DEFAULT 0 CHECK(cited IN (0, 1)),
|
|
retrieval_rank INTEGER CHECK(retrieval_rank IS NULL OR retrieval_rank >= 0),
|
|
observed_at TEXT,
|
|
support_status TEXT NOT NULL DEFAULT 'unknown'
|
|
CHECK(support_status IN ('supports', 'context', 'conflicts', 'stale', 'unknown')),
|
|
secret_values_included INTEGER NOT NULL DEFAULT 0 CHECK(secret_values_included = 0),
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_agent_research_sources_run
|
|
ON agent_research_sources(research_run_id, cited);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_research_sources_tool
|
|
ON agent_research_sources(tool_invocation_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS agent_eval_cases (
|
|
id TEXT PRIMARY KEY,
|
|
suite_id TEXT NOT NULL,
|
|
case_slug TEXT NOT NULL,
|
|
case_version INTEGER NOT NULL DEFAULT 1 CHECK(case_version >= 1),
|
|
prompt_sha256 TEXT NOT NULL,
|
|
prompt_excerpt TEXT NOT NULL CHECK(length(prompt_excerpt) <= 1000),
|
|
fixture_context_sha256 TEXT,
|
|
fixture_context_excerpt TEXT CHECK(fixture_context_excerpt IS NULL OR length(fixture_context_excerpt) <= 2000),
|
|
expected_route TEXT NOT NULL
|
|
CHECK(expected_route IN (
|
|
'none',
|
|
'web_search',
|
|
'social_trends',
|
|
'structured_market_data',
|
|
'local_context',
|
|
'mixed',
|
|
'unknown'
|
|
)),
|
|
expected_provider TEXT,
|
|
must_use_tools_json TEXT NOT NULL DEFAULT '[]',
|
|
must_not_use_tools_json TEXT NOT NULL DEFAULT '[]',
|
|
tags_json TEXT NOT NULL DEFAULT '[]',
|
|
rubric_json TEXT NOT NULL DEFAULT '{}',
|
|
stale_after TEXT,
|
|
active INTEGER NOT NULL DEFAULT 1 CHECK(active IN (0, 1)),
|
|
secret_values_included INTEGER NOT NULL DEFAULT 0 CHECK(secret_values_included = 0),
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(suite_id, case_slug, case_version)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_agent_eval_cases_suite_active
|
|
ON agent_eval_cases(suite_id, active);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_eval_cases_route
|
|
ON agent_eval_cases(expected_route);
|
|
|
|
CREATE TABLE IF NOT EXISTS agent_eval_results (
|
|
id TEXT PRIMARY KEY,
|
|
eval_case_id TEXT NOT NULL REFERENCES agent_eval_cases(id) ON DELETE CASCADE,
|
|
research_run_id TEXT NOT NULL REFERENCES agent_research_runs(id) ON DELETE CASCADE,
|
|
graph_evaluation_run_id TEXT REFERENCES graph_evaluation_runs(id) ON DELETE SET NULL,
|
|
status TEXT NOT NULL
|
|
CHECK(status IN ('passed', 'failed', 'warning', 'blocked', 'skipped')),
|
|
score REAL CHECK(score IS NULL OR (score >= 0 AND score <= 1)),
|
|
routing_correct INTEGER CHECK(routing_correct IS NULL OR routing_correct IN (0, 1)),
|
|
tool_choice_score REAL CHECK(tool_choice_score IS NULL OR (tool_choice_score >= 0 AND tool_choice_score <= 1)),
|
|
source_quality_score REAL CHECK(source_quality_score IS NULL OR (source_quality_score >= 0 AND source_quality_score <= 1)),
|
|
groundedness_score REAL CHECK(groundedness_score IS NULL OR (groundedness_score >= 0 AND groundedness_score <= 1)),
|
|
freshness_score REAL CHECK(freshness_score IS NULL OR (freshness_score >= 0 AND freshness_score <= 1)),
|
|
cost_efficiency_score REAL CHECK(cost_efficiency_score IS NULL OR (cost_efficiency_score >= 0 AND cost_efficiency_score <= 1)),
|
|
safety_payment_score REAL CHECK(safety_payment_score IS NULL OR (safety_payment_score >= 0 AND safety_payment_score <= 1)),
|
|
failure_reason TEXT,
|
|
judge TEXT,
|
|
proof_ref TEXT,
|
|
secret_values_included INTEGER NOT NULL DEFAULT 0 CHECK(secret_values_included = 0),
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(eval_case_id, research_run_id)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_agent_eval_results_case_status
|
|
ON agent_eval_results(eval_case_id, status);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_eval_results_run
|
|
ON agent_eval_results(research_run_id);
|
|
CREATE INDEX IF NOT EXISTS idx_agent_eval_results_graph_eval
|
|
ON agent_eval_results(graph_evaluation_run_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS work_order_graph_links (
|
|
id TEXT PRIMARY KEY,
|
|
sponsored_work_order_id TEXT NOT NULL,
|
|
role TEXT NOT NULL
|
|
CHECK(role IN (
|
|
'input_context',
|
|
'evaluation_target',
|
|
'created_evidence',
|
|
'created_claim',
|
|
'created_eval_run',
|
|
'research_run',
|
|
'tool_trace',
|
|
'history_trace',
|
|
'outcome_trace'
|
|
)),
|
|
graph_layer TEXT NOT NULL
|
|
CHECK(graph_layer IN (
|
|
'persona',
|
|
'strategy',
|
|
'position',
|
|
'belief',
|
|
'claim',
|
|
'evidence',
|
|
'edge',
|
|
'graph_evaluation_run',
|
|
'cascade_event',
|
|
'graph_history_event',
|
|
'agent_research_run',
|
|
'agent_tool_invocation',
|
|
'agent_eval_result',
|
|
'outcome_observation'
|
|
)),
|
|
graph_id TEXT NOT NULL,
|
|
rationale TEXT,
|
|
secret_values_included INTEGER NOT NULL DEFAULT 0 CHECK(secret_values_included = 0),
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(sponsored_work_order_id, role, graph_layer, graph_id)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_work_order_graph_links_work_order
|
|
ON work_order_graph_links(sponsored_work_order_id);
|
|
CREATE INDEX IF NOT EXISTS idx_work_order_graph_links_graph
|
|
ON work_order_graph_links(graph_layer, graph_id);
|