- Define Rio and Theseus as economics and model-integrity evaluators - Add DB, Hermes, and OpenClaw skills with no-secret defaults - Gate CI on LLM refinement contracts; verify with 422-test suite `.agents/skills/decision-engine-refinement/SKILL.md` `.agents/skills/nousresearch-hermes-agent/SKILL.md` `.agents/skills/openclaw-agent/SKILL.md` `.agents/skills/teleo-db-operator/SKILL.md` `.crabbox.yaml` `.github/workflows/ci.yml` `docs/llm-refinement-decision-engine.md` `scripts/check_llm_refinement_contract.py`
116 lines
3.9 KiB
Python
Executable file
116 lines
3.9 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Validate the LLM refinement and decision-engine guidance surface."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
|
|
REQUIRED_FILES = {
|
|
"program_doc": REPO_ROOT / "docs" / "llm-refinement-decision-engine.md",
|
|
"decision_skill": REPO_ROOT / ".agents" / "skills" / "decision-engine-refinement" / "SKILL.md",
|
|
"db_skill": REPO_ROOT / ".agents" / "skills" / "teleo-db-operator" / "SKILL.md",
|
|
"hermes_skill": REPO_ROOT / ".agents" / "skills" / "nousresearch-hermes-agent" / "SKILL.md",
|
|
"openclaw_skill": REPO_ROOT / ".agents" / "skills" / "openclaw-agent" / "SKILL.md",
|
|
}
|
|
|
|
PROGRAM_REQUIRED_PHRASES = [
|
|
"Pentagon.run should own disposable infrastructure",
|
|
"This repo should own decision quality",
|
|
"Rio becomes the economic and incentive-quality evaluator",
|
|
"Theseus becomes the model-integrity and agent-refinement evaluator",
|
|
"No model switch is accepted because it",
|
|
"Default is read-only",
|
|
]
|
|
|
|
SKILL_REQUIRED = {
|
|
"decision_skill": [
|
|
"Rio economics",
|
|
"Theseus model integrity",
|
|
"Do not change live model assignments",
|
|
"baseline verdict output",
|
|
],
|
|
"db_skill": [
|
|
"Default to read-only",
|
|
"BEGIN IMMEDIATE",
|
|
"Do not attach, copy, or commit `pipeline.db`",
|
|
"review_records",
|
|
],
|
|
"hermes_skill": [
|
|
"model switching",
|
|
"fixture-first",
|
|
"Rio Hermes package",
|
|
"Theseus Hermes package",
|
|
],
|
|
"openclaw_skill": [
|
|
"AGENTS.md",
|
|
"SOUL.md",
|
|
"TOOLS.md",
|
|
"Default deny",
|
|
],
|
|
}
|
|
|
|
|
|
def _read(path: Path) -> str:
|
|
if not path.exists():
|
|
raise AssertionError(f"missing file: {path.relative_to(REPO_ROOT)}")
|
|
return path.read_text()
|
|
|
|
|
|
def _assert_frontmatter(path: Path, text: str) -> None:
|
|
match = re.match(r"^---\n(?P<body>.*?)\n---\n", text, flags=re.DOTALL)
|
|
if not match:
|
|
raise AssertionError(f"{path.relative_to(REPO_ROOT)} missing YAML frontmatter")
|
|
body = match.group("body")
|
|
if "name:" not in body or "description:" not in body:
|
|
raise AssertionError(f"{path.relative_to(REPO_ROOT)} frontmatter needs name and description")
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--output", default=".crabbox-results/llm-refinement-contract.json")
|
|
args = parser.parse_args()
|
|
|
|
program = _read(REQUIRED_FILES["program_doc"])
|
|
missing_program = [phrase for phrase in PROGRAM_REQUIRED_PHRASES if phrase not in program]
|
|
if missing_program:
|
|
raise AssertionError(f"program doc missing phrases: {missing_program}")
|
|
|
|
skill_checks = {}
|
|
for key, phrases in SKILL_REQUIRED.items():
|
|
path = REQUIRED_FILES[key]
|
|
text = _read(path)
|
|
_assert_frontmatter(path, text)
|
|
missing = [phrase for phrase in phrases if phrase not in text]
|
|
if missing:
|
|
raise AssertionError(f"{path.relative_to(REPO_ROOT)} missing phrases: {missing}")
|
|
skill_checks[key] = {
|
|
"path": str(path.relative_to(REPO_ROOT)),
|
|
"phrases_checked": phrases,
|
|
}
|
|
|
|
proof = {
|
|
"ok": True,
|
|
"scope": "llm_refinement_decision_engine_contract",
|
|
"program_doc": str(REQUIRED_FILES["program_doc"].relative_to(REPO_ROOT)),
|
|
"program_phrases_checked": PROGRAM_REQUIRED_PHRASES,
|
|
"skills": skill_checks,
|
|
"pivot": {
|
|
"infra_owner": "Pentagon.run",
|
|
"repo_owner": "decision quality, rubrics, model evals, prompt/tool refinement, DB feedback loops",
|
|
},
|
|
}
|
|
|
|
output = REPO_ROOT / args.output
|
|
output.parent.mkdir(parents=True, exist_ok=True)
|
|
output.write_text(json.dumps(proof, indent=2, sort_keys=True) + "\n")
|
|
print(json.dumps(proof, indent=2, sort_keys=True))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|