teleo-infrastructure/proof/phase1b-local-e2e-proof.json
2026-05-29 15:08:09 +02:00

930 lines
19 KiB
JSON

{
"agent_review_calls": [
{
"agent": "Leo",
"files": [
"domains/grand-strategy/strategy.md"
],
"route": {
"evidence": [
{
"agent": "Leo",
"signal": "path",
"value": "domains/grand-strategy/strategy.md",
"weight": 8
}
],
"fallback": false,
"primary_agent": "Leo",
"required_agents": [
"Leo"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 8,
"Rio": 0,
"Theseus": 0,
"Vida": 0
},
"touched_domains": [
"grand-strategy"
]
},
"tier": "STANDARD",
"verdict": "APPROVE"
},
{
"agent": "Theseus",
"files": [
"domains/ai-alignment/systems.md"
],
"route": {
"evidence": [
{
"agent": "Theseus",
"signal": "path",
"value": "domains/ai-alignment/systems.md",
"weight": 8
}
],
"fallback": false,
"primary_agent": "Theseus",
"required_agents": [
"Theseus"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 0,
"Theseus": 8,
"Vida": 0
},
"touched_domains": [
"ai-alignment"
]
},
"tier": "STANDARD",
"verdict": "APPROVE"
},
{
"agent": "Rio",
"files": [
"domains/internet-finance/x402.md"
],
"route": {
"evidence": [
{
"agent": "Rio",
"signal": "path",
"value": "domains/internet-finance/x402.md",
"weight": 8
},
{
"agent": "Rio",
"signal": "keyword",
"value": "x402",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Rio",
"required_agents": [
"Rio"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 10,
"Theseus": 0,
"Vida": 0
},
"touched_domains": [
"internet-finance"
]
},
"tier": "STANDARD",
"verdict": "APPROVE"
},
{
"agent": "Vida",
"files": [
"domains/health/clinical.md"
],
"route": {
"evidence": [
{
"agent": "Vida",
"signal": "path",
"value": "domains/health/clinical.md",
"weight": 8
},
{
"agent": "Vida",
"signal": "keyword",
"value": "health",
"weight": 2
},
{
"agent": "Vida",
"signal": "keyword",
"value": "clinical",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Vida",
"required_agents": [
"Vida"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 0,
"Theseus": 0,
"Vida": 12
},
"touched_domains": [
"health"
]
},
"tier": "STANDARD",
"verdict": "APPROVE"
},
{
"agent": "Clay",
"files": [
"domains/entertainment/games.md"
],
"route": {
"evidence": [
{
"agent": "Clay",
"signal": "path",
"value": "domains/entertainment/games.md",
"weight": 8
},
{
"agent": "Clay",
"signal": "keyword",
"value": "entertainment",
"weight": 2
},
{
"agent": "Clay",
"signal": "keyword",
"value": "games",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Clay",
"required_agents": [
"Clay"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 12,
"Leo": 0,
"Rio": 0,
"Theseus": 0,
"Vida": 0
},
"touched_domains": [
"entertainment"
]
},
"tier": "STANDARD",
"verdict": "APPROVE"
},
{
"agent": "Astra",
"files": [
"domains/space-development/robotics.md"
],
"route": {
"evidence": [
{
"agent": "Astra",
"signal": "path",
"value": "domains/space-development/robotics.md",
"weight": 8
},
{
"agent": "Astra",
"signal": "keyword",
"value": "space",
"weight": 2
},
{
"agent": "Astra",
"signal": "keyword",
"value": "robotics",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Astra",
"required_agents": [
"Astra"
],
"route_kind": "single",
"scores": {
"Astra": 12,
"Clay": 0,
"Leo": 0,
"Rio": 0,
"Theseus": 0,
"Vida": 0
},
"touched_domains": [
"space-development"
]
},
"tier": "STANDARD",
"verdict": "APPROVE"
},
{
"agent": "Rio",
"files": [
"domains/ai-systems/agent-wallets.md",
"domains/internet-finance/x402.md"
],
"route": {
"evidence": [
{
"agent": "Theseus",
"signal": "path",
"value": "domains/ai-systems/agent-wallets.md",
"weight": 8
},
{
"agent": "Rio",
"signal": "path",
"value": "domains/internet-finance/x402.md",
"weight": 8
},
{
"agent": "Rio",
"signal": "keyword",
"value": "x402",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Rio",
"required_agents": [
"Rio",
"Theseus"
],
"route_kind": "multi",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 10,
"Theseus": 8,
"Vida": 0
},
"touched_domains": [
"ai-systems",
"internet-finance"
]
},
"tier": "STANDARD",
"verdict": "APPROVE"
},
{
"agent": "Theseus",
"files": [
"domains/ai-systems/agent-wallets.md",
"domains/internet-finance/x402.md"
],
"route": {
"evidence": [
{
"agent": "Theseus",
"signal": "path",
"value": "domains/ai-systems/agent-wallets.md",
"weight": 8
},
{
"agent": "Rio",
"signal": "path",
"value": "domains/internet-finance/x402.md",
"weight": 8
},
{
"agent": "Rio",
"signal": "keyword",
"value": "x402",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Rio",
"required_agents": [
"Rio",
"Theseus"
],
"route_kind": "multi",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 10,
"Theseus": 8,
"Vida": 0
},
"touched_domains": [
"ai-systems",
"internet-finance"
]
},
"tier": "STANDARD",
"verdict": "APPROVE"
},
{
"agent": "Vida",
"files": [
"domains/health/incorrect-health-claim.md"
],
"route": {
"evidence": [
{
"agent": "Vida",
"signal": "path",
"value": "domains/health/incorrect-health-claim.md",
"weight": 8
},
{
"agent": "Vida",
"signal": "keyword",
"value": "health",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Vida",
"required_agents": [
"Vida"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 0,
"Theseus": 0,
"Vida": 10
},
"touched_domains": [
"health"
]
},
"tier": "STANDARD",
"verdict": "REQUEST_CHANGES"
}
],
"agents_seen": [
"Astra",
"Clay",
"Leo",
"Rio",
"Theseus",
"Vida"
],
"case_results": [
{
"comments": 1,
"domain": "grand-strategy",
"domain_agent": "Leo",
"domain_verdict": "skipped",
"expected_agents": [
"Leo"
],
"markers": [
"<!-- PHASE1B_REVIEW:PR=101:AGENT=LEO -->"
],
"number": 101,
"reviewers": [
"Leo"
],
"status": "approved"
},
{
"comments": 1,
"domain": "ai-alignment",
"domain_agent": "Theseus",
"domain_verdict": "approve",
"expected_agents": [
"Theseus"
],
"markers": [
"<!-- PHASE1B_REVIEW:PR=102:AGENT=THESEUS -->"
],
"number": 102,
"reviewers": [
"Theseus"
],
"status": "approved"
},
{
"comments": 1,
"domain": "internet-finance",
"domain_agent": "Rio",
"domain_verdict": "approve",
"expected_agents": [
"Rio"
],
"markers": [
"<!-- PHASE1B_REVIEW:PR=103:AGENT=RIO -->"
],
"number": 103,
"reviewers": [
"Rio"
],
"status": "approved"
},
{
"comments": 1,
"domain": "health",
"domain_agent": "Vida",
"domain_verdict": "approve",
"expected_agents": [
"Vida"
],
"markers": [
"<!-- PHASE1B_REVIEW:PR=104:AGENT=VIDA -->"
],
"number": 104,
"reviewers": [
"Vida"
],
"status": "approved"
},
{
"comments": 1,
"domain": "entertainment",
"domain_agent": "Clay",
"domain_verdict": "approve",
"expected_agents": [
"Clay"
],
"markers": [
"<!-- PHASE1B_REVIEW:PR=105:AGENT=CLAY -->"
],
"number": 105,
"reviewers": [
"Clay"
],
"status": "approved"
},
{
"comments": 1,
"domain": "space-development",
"domain_agent": "Astra",
"domain_verdict": "approve",
"expected_agents": [
"Astra"
],
"markers": [
"<!-- PHASE1B_REVIEW:PR=106:AGENT=ASTRA -->"
],
"number": 106,
"reviewers": [
"Astra"
],
"status": "approved"
},
{
"comments": 2,
"domain": "cross-ai-finance",
"domain_agent": "Rio",
"domain_verdict": "approve",
"expected_agents": [
"Rio",
"Theseus"
],
"markers": [
"<!-- PHASE1B_REVIEW:PR=107:AGENT=RIO -->",
"<!-- PHASE1B_REVIEW:PR=107:AGENT=THESEUS -->"
],
"number": 107,
"reviewers": [
"Rio",
"Theseus"
],
"status": "approved"
},
{
"comments": 1,
"domain": "health-feedback",
"domain_agent": "Vida",
"domain_verdict": "request_changes",
"expected_agents": [
"Vida"
],
"markers": [
"<!-- PHASE1B_REVIEW:PR=108:AGENT=VIDA -->"
],
"number": 108,
"reviewers": [
"Vida"
],
"status": "open"
}
],
"cases_total": 8,
"eval_feedback": [
{
"issues": [],
"outcome": "approved",
"pr": 101
},
{
"issues": [],
"outcome": "approved",
"pr": 102
},
{
"issues": [],
"outcome": "approved",
"pr": 103
},
{
"issues": [],
"outcome": "approved",
"pr": 104
},
{
"issues": [],
"outcome": "approved",
"pr": 105
},
{
"issues": [],
"outcome": "approved",
"pr": 106
},
{
"issues": [],
"outcome": "approved",
"pr": 107
},
{
"issues": [
"factual_discrepancy"
],
"outcome": "rejected",
"pr": 108
}
],
"failed": 0,
"feature_flag": "PHASE1B_AGENT_ROUTING_ENABLED",
"formal_approvals": [
101,
102,
103,
104,
105,
106,
107
],
"ok": true,
"rejection_dispositions": [
{
"eval_attempts": 1,
"issues": [
"factual_discrepancy"
],
"pr": 108
}
],
"route_events": [
{
"pr": 101,
"route": {
"evidence": [
{
"agent": "Leo",
"signal": "path",
"value": "domains/grand-strategy/strategy.md",
"weight": 8
}
],
"fallback": false,
"primary_agent": "Leo",
"required_agents": [
"Leo"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 8,
"Rio": 0,
"Theseus": 0,
"Vida": 0
},
"touched_domains": [
"grand-strategy"
]
},
"tier": "STANDARD"
},
{
"pr": 102,
"route": {
"evidence": [
{
"agent": "Theseus",
"signal": "path",
"value": "domains/ai-alignment/systems.md",
"weight": 8
}
],
"fallback": false,
"primary_agent": "Theseus",
"required_agents": [
"Theseus"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 0,
"Theseus": 8,
"Vida": 0
},
"touched_domains": [
"ai-alignment"
]
},
"tier": "STANDARD"
},
{
"pr": 103,
"route": {
"evidence": [
{
"agent": "Rio",
"signal": "path",
"value": "domains/internet-finance/x402.md",
"weight": 8
},
{
"agent": "Rio",
"signal": "keyword",
"value": "x402",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Rio",
"required_agents": [
"Rio"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 10,
"Theseus": 0,
"Vida": 0
},
"touched_domains": [
"internet-finance"
]
},
"tier": "STANDARD"
},
{
"pr": 104,
"route": {
"evidence": [
{
"agent": "Vida",
"signal": "path",
"value": "domains/health/clinical.md",
"weight": 8
},
{
"agent": "Vida",
"signal": "keyword",
"value": "health",
"weight": 2
},
{
"agent": "Vida",
"signal": "keyword",
"value": "clinical",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Vida",
"required_agents": [
"Vida"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 0,
"Theseus": 0,
"Vida": 12
},
"touched_domains": [
"health"
]
},
"tier": "STANDARD"
},
{
"pr": 105,
"route": {
"evidence": [
{
"agent": "Clay",
"signal": "path",
"value": "domains/entertainment/games.md",
"weight": 8
},
{
"agent": "Clay",
"signal": "keyword",
"value": "entertainment",
"weight": 2
},
{
"agent": "Clay",
"signal": "keyword",
"value": "games",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Clay",
"required_agents": [
"Clay"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 12,
"Leo": 0,
"Rio": 0,
"Theseus": 0,
"Vida": 0
},
"touched_domains": [
"entertainment"
]
},
"tier": "STANDARD"
},
{
"pr": 106,
"route": {
"evidence": [
{
"agent": "Astra",
"signal": "path",
"value": "domains/space-development/robotics.md",
"weight": 8
},
{
"agent": "Astra",
"signal": "keyword",
"value": "space",
"weight": 2
},
{
"agent": "Astra",
"signal": "keyword",
"value": "robotics",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Astra",
"required_agents": [
"Astra"
],
"route_kind": "single",
"scores": {
"Astra": 12,
"Clay": 0,
"Leo": 0,
"Rio": 0,
"Theseus": 0,
"Vida": 0
},
"touched_domains": [
"space-development"
]
},
"tier": "STANDARD"
},
{
"pr": 107,
"route": {
"evidence": [
{
"agent": "Theseus",
"signal": "path",
"value": "domains/ai-systems/agent-wallets.md",
"weight": 8
},
{
"agent": "Rio",
"signal": "path",
"value": "domains/internet-finance/x402.md",
"weight": 8
},
{
"agent": "Rio",
"signal": "keyword",
"value": "x402",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Rio",
"required_agents": [
"Rio",
"Theseus"
],
"route_kind": "multi",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 10,
"Theseus": 8,
"Vida": 0
},
"touched_domains": [
"ai-systems",
"internet-finance"
]
},
"tier": "STANDARD"
},
{
"pr": 108,
"route": {
"evidence": [
{
"agent": "Vida",
"signal": "path",
"value": "domains/health/incorrect-health-claim.md",
"weight": 8
},
{
"agent": "Vida",
"signal": "keyword",
"value": "health",
"weight": 2
}
],
"fallback": false,
"primary_agent": "Vida",
"required_agents": [
"Vida"
],
"route_kind": "single",
"scores": {
"Astra": 0,
"Clay": 0,
"Leo": 0,
"Rio": 0,
"Theseus": 0,
"Vida": 10
},
"touched_domains": [
"health"
]
},
"tier": "STANDARD"
}
],
"schema_version": 27,
"scope": "local_no_network_phase1b_eval_cycle",
"source_feedback_paths": [
"inbox/archive/phase1b-108.md"
],
"succeeded": 8
}