from __future__ import annotations import importlib.util import json from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] SCRIPT_PATH = REPO_ROOT / "scripts" / "replay_decision_engine_eval.py" FIXTURES_DIR = REPO_ROOT / "fixtures" / "decision-engine-eval" spec = importlib.util.spec_from_file_location("replay_decision_engine_eval", SCRIPT_PATH) replay = importlib.util.module_from_spec(spec) assert spec.loader is not None spec.loader.exec_module(replay) def test_default_decision_engine_fixtures_replay_cleanly(): fixtures = replay.load_fixtures(FIXTURES_DIR) proof = replay.evaluate_fixtures(fixtures) assert proof["ok"] is True assert proof["fixture_count"] == 3 assert proof["metrics"]["route_accuracy"] == 1.0 assert proof["metrics"]["lanes"] == { "kb-interop": 1, "rio-economics": 1, "theseus-model-integrity": 1, } def test_candidate_false_approve_is_caught(tmp_path): fixtures = replay.load_fixtures(FIXTURES_DIR) candidate_path = tmp_path / "candidate.json" candidate_path.write_text( json.dumps( { "candidate_name": "bad-single-answer-model", "verdicts": [ { "fixture_id": "theseus_live_model_switch_reject", "disposition": "approve", "issue_tags": [], "primary_agent": "Theseus", "required_agents": ["Theseus"], } ], } ) ) candidate = replay._load_candidate_output(candidate_path) proof = replay.evaluate_fixtures(fixtures, candidate=candidate) assert proof["ok"] is False assert proof["candidate"]["false_approve_count"] == 1 assert proof["candidate"]["false_approves"] == ["theseus_live_model_switch_reject"]