31 lines
1.3 KiB
Python
31 lines
1.3 KiB
Python
"""End-to-end local proof for Phase 1b agent routing."""
|
|
|
|
import pytest
|
|
|
|
from scripts.prove_phase1b_local import CROSS_DOMAIN_CASE, FEEDBACK_CASE, SINGLE_DOMAIN_CASES, run_phase1b_local_proof
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_phase1b_local_eval_cycle_routes_reviews_approves_and_feedbacks():
|
|
proof = await run_phase1b_local_proof()
|
|
|
|
assert proof["scope"] == "local_no_network_phase1b_eval_cycle"
|
|
assert proof["succeeded"] == len(SINGLE_DOMAIN_CASES) + 2
|
|
assert proof["failed"] == 0
|
|
assert proof["agents_seen"] == ["Astra", "Clay", "Leo", "Rio", "Theseus", "Vida"]
|
|
|
|
results = {case["number"]: case for case in proof["case_results"]}
|
|
for case in SINGLE_DOMAIN_CASES:
|
|
result = results[case["number"]]
|
|
assert result["status"] == "approved"
|
|
assert result["reviewers"] == sorted(case["expected_agents"])
|
|
|
|
cross_domain = results[CROSS_DOMAIN_CASE["number"]]
|
|
assert cross_domain["status"] == "approved"
|
|
assert cross_domain["reviewers"] == sorted(CROSS_DOMAIN_CASE["expected_agents"])
|
|
|
|
feedback = results[FEEDBACK_CASE["number"]]
|
|
assert feedback["status"] == "open"
|
|
assert feedback["reviewers"] == ["Vida"]
|
|
assert feedback["domain_verdict"] == "request_changes"
|
|
assert proof["source_feedback_paths"] == [f"inbox/archive/phase1b-{FEEDBACK_CASE['number']}.md"]
|