Add Crabbox CI contract gate

This commit is contained in:
twentyOne2x 2026-06-01 15:36:03 +02:00
parent 69b4987415
commit a2620c1f19
8 changed files with 448 additions and 30 deletions

View file

@ -6,16 +6,19 @@ Allowed jobs:
- `crabbox job run unit`
- `crabbox job run lint-phase1b`
- `crabbox job run ci-contract`
- `crabbox job run phase1b-local-proof`
- `crabbox job run sync-smoke`
Default workflow:
1. Run `crabbox job run --dry-run phase1b-local-proof`.
2. Inspect the planned commands and confirm no production secrets or production deploy commands appear.
3. Run `crabbox job run phase1b-local-proof`.
4. Save the run id, lease id, stdout, downloaded proof JSON, and JUnit output.
5. Stop the lease unless the CLI has already stopped it.
1. Run `crabbox job run --dry-run ci-contract`.
2. Run `crabbox job run --dry-run phase1b-local-proof`.
3. Inspect the planned commands and confirm no production secrets or production deploy commands appear.
4. Run `crabbox job run ci-contract`.
5. Run `crabbox job run phase1b-local-proof`.
6. Save the run id, lease id, stdout, downloaded proof JSON, and JUnit output.
7. Stop the lease unless the CLI has already stopped it.
Boundaries:

View file

@ -59,6 +59,29 @@ ssh:
- "22"
jobs:
ci-contract:
provider: hetzner
target: linux
architecture: arm64
class: beast
hydrate:
actions: true
githubRunner: false
waitTimeout: 20m
keepAliveMinutes: 90
actions:
workflow: .github/workflows/crabbox.yml
job: hydrate
shell: true
command: >
python3 -m pip install -e '.[dev]' &&
mkdir -p .crabbox-results &&
python3 scripts/check_crabbox_ci_contract.py
--output .crabbox-results/crabbox-ci-contract.json
downloads:
- .crabbox-results/crabbox-ci-contract.json
stop: always
unit:
provider: hetzner
target: linux
@ -136,6 +159,7 @@ jobs:
junit:
- .crabbox-results/phase1b-pytest.xml
downloads:
- .crabbox-results/crabbox-ci-contract.json
- proof/phase1b-local-e2e-proof.json
- .crabbox-results/phase1b-pytest.xml
- .crabbox-results/phase1b-proof-summary.json

View file

@ -10,20 +10,29 @@ on:
permissions:
contents: read
concurrency:
group: ci-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
PYTHON_VERSION: "3.11"
CI: "1"
jobs:
python:
lint:
name: Focused lint
runs-on: ubuntu-latest
timeout-minutes: 20
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
python-version: ${{ env.PYTHON_VERSION }}
- name: Install
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: Focused lint
- name: Ruff focused surface
run: |
python -m ruff check \
lib/agent_routing.py \
@ -33,6 +42,7 @@ jobs:
lib/llm.py \
lib/post_extract.py \
telegram/approvals.py \
scripts/check_crabbox_ci_contract.py \
scripts/prove_phase1b_local.py \
tests/test_agent_routing.py \
tests/test_evaluate_agent_routing.py \
@ -40,21 +50,87 @@ jobs:
tests/test_eval_parse.py \
tests/test_contributor.py \
tests/test_search.py
- name: Unit tests
test:
name: Unit tests
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: Pytest
run: |
mkdir -p .crabbox-results
python -m pytest --junitxml=.crabbox-results/pytest.xml
- name: Phase 1B local proof
env:
PHASE1B_AGENT_ROUTING_ENABLED: "true"
- name: Upload test artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: teleo-infrastructure-pytest
path: .crabbox-results/pytest.xml
if-no-files-found: warn
crabbox-contract:
name: Crabbox and Leo contract
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: Validate repo-owned contract
run: |
python scripts/check_crabbox_ci_contract.py \
--output .crabbox-results/crabbox-ci-contract.json
- name: Upload contract artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: teleo-infrastructure-crabbox-contract
path: .crabbox-results/crabbox-ci-contract.json
if-no-files-found: error
phase1b-local-proof:
name: Phase 1B local proof
runs-on: ubuntu-latest
needs:
- lint
- test
- crabbox-contract
timeout-minutes: 20
env:
PHASE1B_AGENT_ROUTING_ENABLED: "true"
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: Run proof wrapper
run: |
scripts/crabbox_phase1b_proof.sh
- name: Upload proof artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: teleo-infrastructure-ci-proof
name: teleo-infrastructure-phase1b-proof
path: |
.crabbox-results/crabbox-ci-contract.json
proof/phase1b-local-e2e-proof.json
.crabbox-results/*.xml
.crabbox-results/*.json
.crabbox-results/phase1b-pytest.xml
.crabbox-results/phase1b-proof-summary.json
if-no-files-found: warn

View file

@ -2,21 +2,53 @@
"status": "blocked_remote_execution",
"scope": "crabbox remote proof",
"attempted_discovery": [
"installed Crabbox 0.22.1 from the official darwin_arm64 release tarball after Homebrew was blocked by outdated Command Line Tools",
"verified Crabbox CLI is installed at /Users/user/.local/bin/crabbox",
"ran crabbox job list",
"ran crabbox sync-plan",
"ran crabbox job run --dry-run unit",
"ran crabbox job run --dry-run phase1b-local-proof",
"ran crabbox doctor --json",
"checked presence of CRABBOX_COORDINATOR, CRABBOX_COORDINATOR_TOKEN, HCLOUD_TOKEN, and HETZNER_TOKEN without printing values",
"checked Bitwarden CLI status"
"checked presence of CRABBOX_COORDINATOR, CRABBOX_COORDINATOR_TOKEN, HCLOUD_TOKEN, HETZNER_TOKEN, GH_TOKEN, and GITHUB_TOKEN without printing values",
"loaded retained Bitwarden session from /tmp/bw_session without printing the session value",
"ran bw status and bw sync",
"checked Bitwarden organization, collection, and item counts",
"checked visible Bitwarden item names and metadata only",
"scanned visible Bitwarden item names and notes for crabbox, hcloud, hetzner, and coordinator terms without printing note or secret values"
],
"exact_blocker": "Crabbox provider check fails because neither HCLOUD_TOKEN nor HETZNER_TOKEN is present, no Crabbox broker coordinator token is configured, and Bitwarden CLI is unauthenticated.",
"why_it_cannot_be_solved_autonomously": "Remote Crabbox execution requires a real Hetzner or Crabbox broker credential. Fabricating or bypassing that credential would either fail or create an unsafe secret-handling path.",
"next_action": "Authenticate Bitwarden or provide a scoped Hetzner/Crabbox broker credential in the operator environment, then rerun `crabbox doctor --json` and `crabbox job run phase1b-local-proof` from teleo-infrastructure.",
"exact_blocker": "Crabbox provider execution still lacks a real provider credential: HCLOUD_TOKEN, HETZNER_TOKEN, CRABBOX_COORDINATOR, and CRABBOX_COORDINATOR_TOKEN are unset, and the visible Bitwarden org collection contains only Anthropic API Key, Leo twitter, and LivingIPbot Github, with no Crabbox, HCloud, Hetzner, or coordinator metadata match.",
"why_it_cannot_be_solved_autonomously": "A remote Crabbox lease requires a real Hetzner or Crabbox broker credential. The repo can safely commit CI/CD config, dry-run plans, and blocker artifacts, but it cannot fabricate the provider credential or commit secret values.",
"exact_next_action": "Add a scoped Hetzner/Crabbox broker credential to Bitwarden or GitHub environment secrets as HCLOUD_TOKEN, HETZNER_TOKEN, CRABBOX_COORDINATOR, or CRABBOX_COORDINATOR_TOKEN, then rerun crabbox doctor --json and crabbox job run phase1b-local-proof from teleo-infrastructure.",
"safe_local_status": {
"crabbox_cli_installed": "0.22.1",
"job_dry_run": "passes",
"focused_ruff": "passes",
"phase1b_proof_wrapper": "passes",
"full_pytest": "422 passed"
"job_list": "passes",
"sync_plan": "217 files, 2.4 MiB",
"unit_dry_run": "passes",
"phase1b_proof_dry_run": "passes",
"ci_contract_guard": "passes",
"phase1b_proof_wrapper": "131 passed, 8 proof cases succeeded, all six agents seen",
"full_pytest": "422 passed",
"crabbox_doctor": "fails only provider credential check: HCLOUD_TOKEN or HETZNER_TOKEN is required",
"bitwarden_status": "unlocked",
"bitwarden_organizations": 1,
"bitwarden_collections": 1,
"bitwarden_items_visible": 3,
"bitwarden_matching_crabbox_or_hetzner_items": 0
},
"secret_commit_policy": {
"allowed_to_commit": [
"workflow files",
"Crabbox config with secret slot names omitted",
"proof scripts",
"machine-readable blocker artifacts",
"docs and agent skills"
],
"not_allowed_to_commit": [
"Bitwarden item values",
"Bitwarden vault exports",
"provider tokens",
"GitHub bot tokens",
"OpenRouter keys",
"SSH private keys",
"production databases"
]
}
}

View file

@ -5,6 +5,7 @@ Crabbox is the remote execution layer for `teleo-infrastructure`. It is not the
## Goals
- Run Python tests on a disposable or warm remote Linux box.
- Prove the CI/Crabbox contract without network access before remote runs.
- Run the Phase 1B local proof script remotely.
- Retain JUnit and machine-readable proof artifacts.
- Give agents a bounded job list instead of arbitrary cloud shell access.
@ -42,18 +43,32 @@ Do not commit either value.
```bash
crabbox job list
crabbox job run --dry-run ci-contract
crabbox job run --dry-run unit
crabbox job run --dry-run phase1b-local-proof
crabbox job run ci-contract
crabbox job run unit
crabbox job run phase1b-local-proof
```
`ci-contract` writes:
- `.crabbox-results/crabbox-ci-contract.json`
`phase1b-local-proof` writes:
- `.crabbox-results/crabbox-ci-contract.json`
- `proof/phase1b-local-e2e-proof.json`
- `.crabbox-results/phase1b-pytest.xml`
- `.crabbox-results/phase1b-proof-summary.json`
The contract proof checks that:
- Crabbox exposes only the named bounded jobs.
- sync excludes secret/runtime files such as `.env`, `secrets`, DBs, logs, caches, and virtualenvs.
- `.crabbox.yaml` contains no token-bearing env names.
- Leo routes are explicit: Leo-owned domains, fallback routes, and top-2 cross-domain routes that include Leo are covered, while Phase 1B does not silently preserve Leo as a universal second reviewer.
## Secret Boundary
Allowed:
@ -63,6 +78,7 @@ Allowed:
- `PHASE1B_AGENT_ROUTING_ENABLED`
- broker token in user config
- direct `HCLOUD_TOKEN` or `HETZNER_TOKEN` in local operator env
- GitHub environment secrets named `HCLOUD_TOKEN` or `HETZNER_TOKEN` for an explicitly dispatched remote proof workflow
Not allowed:
@ -73,6 +89,8 @@ Not allowed:
- Bitwarden exports
- prod `pipeline.db`
Bitwarden may be used as the human/operator source of truth for secret lookup and GitHub secret setup, but no Bitwarden item value, vault export, or copied secret belongs in this repo. The committed config may name required secret slots; it must not contain the values.
## Proof Boundary
Crabbox remote proof proves repo behavior on a remote Linux lease. It does not prove production parity unless the lease recreates the production runtime paths, systemd services, timers, DB path, and deploy script behavior.

View file

@ -1,3 +1,7 @@
[build-system]
requires = ["setuptools>=68"]
build-backend = "setuptools.build_meta"
[project]
name = "teleo-pipeline"
version = "2.0.0"
@ -14,6 +18,9 @@ dev = [
"ruff>=0.3",
]
[tool.setuptools]
packages = ["lib"]
[tool.ruff]
target-version = "py311"
line-length = 120

View file

@ -0,0 +1,251 @@
#!/usr/bin/env python3
"""Validate the repo-owned Crabbox and Leo CI contract.
This is intentionally no-network and dependency-free. It checks the local
Crabbox config for bounded jobs/secret hygiene and exercises a small Leo route
contract through the real Phase 1b router.
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from pathlib import Path
from typing import Any
REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from lib.agent_routing import classify_pr_route # noqa: E402
CRABBOX_CONFIG = REPO_ROOT / ".crabbox.yaml"
CRABBOX_DOC = REPO_ROOT / "docs" / "crabbox.md"
CRABBOX_SKILL = REPO_ROOT / ".agents" / "skills" / "crabbox" / "SKILL.md"
CRABBOX_WORKFLOW = REPO_ROOT / ".github" / "workflows" / "crabbox.yml"
CI_WORKFLOW = REPO_ROOT / ".github" / "workflows" / "ci.yml"
REQUIRED_JOBS = {
"unit",
"lint-phase1b",
"phase1b-local-proof",
"sync-smoke",
"ci-contract",
}
REQUIRED_SYNC_EXCLUDES = {
".cache",
".venv",
".pytest_cache",
".ruff_cache",
"__pycache__",
"*.db",
"*.db-wal",
"*.db-shm",
"*.log",
"logs",
"secrets",
".env",
"node_modules",
}
ALLOWED_ENV = {"CI", "PYTHONWARNINGS", "PHASE1B_AGENT_ROUTING_ENABLED"}
FORBIDDEN_CONFIG_TOKENS = {
"HCLOUD_TOKEN",
"HETZNER_TOKEN",
"CRABBOX_COORDINATOR_TOKEN",
"GITHUB_TOKEN",
"GH_TOKEN",
"OPENROUTER",
"FORGEJO",
"BITWARDEN",
"BW_SESSION",
"SSH_PRIVATE",
}
def _read(path: Path) -> str:
if not path.exists():
raise AssertionError(f"missing required file: {path.relative_to(REPO_ROOT)}")
return path.read_text()
def _list_values_under(text: str, parent: str, child: str) -> list[str]:
lines = text.splitlines()
in_parent = False
in_child = False
values: list[str] = []
for line in lines:
if not in_parent:
if line == f"{parent}:":
in_parent = True
continue
if line and not line.startswith(" "):
break
if not in_child:
if line == f" {child}:":
in_child = True
continue
if line.startswith(" - "):
values.append(line.removeprefix(" - ").strip().strip('"'))
continue
break
return values
def _top_level_job_names(text: str) -> set[str]:
jobs_match = re.search(r"(?ms)^jobs:\n(?P<body>.*?)(?:\n\S|\Z)", text)
if not jobs_match:
return set()
return set(re.findall(r"^ ([A-Za-z0-9_-]+):\s*$", jobs_match.group("body"), flags=re.MULTILINE))
def _diff_for(*paths: str, line: str = "+type: claim") -> str:
return "\n".join(f"diff --git a/{path} b/{path}\n{line}" for path in paths)
def _assert_equal(name: str, actual: Any, expected: Any) -> None:
if actual != expected:
raise AssertionError(f"{name}: expected {expected!r}, got {actual!r}")
def _validate_leo_route_contract() -> dict[str, Any]:
cases = [
{
"name": "leo_owned_domain",
"route": classify_pr_route(_diff_for("domains/grand-strategy/strategy.md")),
"required_agents": ["Leo"],
"route_kind": "single",
"fallback": False,
},
{
"name": "leo_fallback",
"route": classify_pr_route(_diff_for("docs/readme.md"), branch="misc/update"),
"required_agents": ["Leo"],
"route_kind": "fallback",
"fallback": True,
},
{
"name": "leo_cross_domain",
"route": classify_pr_route(
_diff_for(
"foundations/collective-intelligence/collective-ai-goals.md",
line="+Collective AI goals and AI systems self-understanding need review.",
)
),
"required_agents": ["Leo", "Theseus"],
"route_kind": "multi",
"fallback": False,
},
{
"name": "non_leo_single_domain",
"route": classify_pr_route(_diff_for("domains/internet-finance/x402.md")),
"required_agents": ["Rio"],
"route_kind": "single",
"fallback": False,
},
]
results = []
for case in cases:
route = case["route"]
result = route.to_audit_dict()
_assert_equal(f"{case['name']} required_agents", result["required_agents"], case["required_agents"])
_assert_equal(f"{case['name']} route_kind", result["route_kind"], case["route_kind"])
_assert_equal(f"{case['name']} fallback", result["fallback"], case["fallback"])
results.append({"name": case["name"], "route": result})
return {
"ok": True,
"cases": results,
"contract": {
"leo_required_when": [
"grand-strategy or Leo-owned domain route",
"no confident route fallback",
"top-2 cross-domain route where Leo is one of the top owners",
],
"leo_not_universal_second_review": True,
},
}
def _validate_crabbox_contract() -> dict[str, Any]:
config = _read(CRABBOX_CONFIG)
doc = _read(CRABBOX_DOC)
skill = _read(CRABBOX_SKILL)
crabbox_workflow = _read(CRABBOX_WORKFLOW)
ci_workflow = _read(CI_WORKFLOW)
jobs = _top_level_job_names(config)
missing_jobs = sorted(REQUIRED_JOBS - jobs)
if missing_jobs:
raise AssertionError(f"missing Crabbox jobs: {missing_jobs}")
sync_excludes = set(_list_values_under(config, "sync", "exclude"))
missing_excludes = sorted(REQUIRED_SYNC_EXCLUDES - sync_excludes)
if missing_excludes:
raise AssertionError(f"missing sync excludes: {missing_excludes}")
allowed_env = set(_list_values_under(config, "env", "allow"))
if allowed_env != ALLOWED_ENV:
raise AssertionError(f"env allowlist must be {sorted(ALLOWED_ENV)}, got {sorted(allowed_env)}")
upper_config = config.upper()
leaked_tokens = sorted(token for token in FORBIDDEN_CONFIG_TOKENS if token in upper_config)
if leaked_tokens:
raise AssertionError(f"secret-like token names must not appear in .crabbox.yaml: {leaked_tokens}")
if "scripts/check_crabbox_ci_contract.py" not in ci_workflow:
raise AssertionError("ci.yml must run scripts/check_crabbox_ci_contract.py")
if "scripts/crabbox_phase1b_proof.sh" not in ci_workflow:
raise AssertionError("ci.yml must run scripts/crabbox_phase1b_proof.sh")
if "crabbox_phase1b_proof.sh" not in config:
raise AssertionError(".crabbox.yaml must run the Phase 1B proof wrapper")
if "crabbox-ci-contract.json" not in config:
raise AssertionError(".crabbox.yaml must download the CI contract proof")
if "runs-on: [self-hosted" not in crabbox_workflow:
raise AssertionError("crabbox hydration workflow must target the dynamic self-hosted runner label")
for job in REQUIRED_JOBS:
if f"crabbox job run {job}" not in skill and f"`{job}`" not in skill:
raise AssertionError(f"Crabbox skill must name allowed job {job}")
if "production deploy" not in doc.lower() or "not the production deploy system" not in doc.lower():
raise AssertionError("docs/crabbox.md must preserve the production deploy boundary")
return {
"ok": True,
"jobs": sorted(jobs),
"required_jobs": sorted(REQUIRED_JOBS),
"sync_excludes_checked": sorted(REQUIRED_SYNC_EXCLUDES),
"env_allowlist": sorted(allowed_env),
"secret_token_names_absent": sorted(FORBIDDEN_CONFIG_TOKENS),
}
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--output", default=".crabbox-results/crabbox-ci-contract.json")
args = parser.parse_args()
proof = {
"ok": True,
"scope": "crabbox_ci_leo_contract",
"crabbox": _validate_crabbox_contract(),
"leo_route_contract": _validate_leo_route_contract(),
}
output = REPO_ROOT / args.output
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(json.dumps(proof, indent=2, sort_keys=True) + "\n")
print(json.dumps(proof, indent=2, sort_keys=True))
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -3,10 +3,14 @@ set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT"
PYTHON_BIN="${PYTHON:-python3}"
mkdir -p proof .crabbox-results
python3 -m pytest \
"$PYTHON_BIN" scripts/check_crabbox_ci_contract.py \
--output .crabbox-results/crabbox-ci-contract.json
"$PYTHON_BIN" -m pytest \
tests/test_agent_routing.py \
tests/test_evaluate_agent_routing.py \
tests/test_phase1b_end_to_end.py \
@ -16,19 +20,22 @@ python3 -m pytest \
--junitxml=.crabbox-results/phase1b-pytest.xml
PHASE1B_AGENT_ROUTING_ENABLED=true \
python3 scripts/prove_phase1b_local.py \
"$PYTHON_BIN" scripts/prove_phase1b_local.py \
--output proof/phase1b-local-e2e-proof.json
python3 - <<'PY'
"$PYTHON_BIN" - <<'PY'
import json
from pathlib import Path
proof_path = Path("proof/phase1b-local-e2e-proof.json")
proof = json.loads(proof_path.read_text())
contract = json.loads(Path(".crabbox-results/crabbox-ci-contract.json").read_text())
summary = {
"ok": proof.get("ok") is True,
"scope": proof.get("scope"),
"schema_version": proof.get("schema_version"),
"crabbox_ci_contract_ok": contract.get("ok") is True,
"leo_route_contract_ok": contract.get("leo_route_contract", {}).get("ok") is True,
"agents_seen": proof.get("agents_seen", []),
"cases_total": proof.get("cases_total"),
"succeeded": proof.get("succeeded"),