diff --git a/.agents/skills/crabbox/SKILL.md b/.agents/skills/crabbox/SKILL.md new file mode 100644 index 0000000..f4a07ed --- /dev/null +++ b/.agents/skills/crabbox/SKILL.md @@ -0,0 +1,32 @@ +# Crabbox + +Use Crabbox for remote Linux verification and PR proof only. + +Allowed jobs: + +- `crabbox job run unit` +- `crabbox job run lint-phase1b` +- `crabbox job run phase1b-local-proof` +- `crabbox job run sync-smoke` + +Default workflow: + +1. Run `crabbox job run --dry-run phase1b-local-proof`. +2. Inspect the planned commands and confirm no production secrets or production deploy commands appear. +3. Run `crabbox job run phase1b-local-proof`. +4. Save the run id, lease id, stdout, downloaded proof JSON, and JUnit output. +5. Stop the lease unless the CLI has already stopped it. + +Boundaries: + +- Do not run production deploy commands from Crabbox. +- Do not forward production GitHub, Forgejo, OpenRouter, SSH, Bitwarden, or VPS secrets. +- Do not target the production `decision-engine` repo for sandbox proof. +- Do not mutate the production VPS. +- Do not call Crabbox proof equivalent to production proof unless the lease recreates `/opt/teleo-eval`, systemd services, runtime users, DB paths, timers, and deploy scripts. + +Failure handling: + +- If sync sanity fails, stop the lease and retry on a fresh lease. +- If a proof script fails, save the full run output and do not summarize it as a pass. +- If a remote box has unknown state, stop it instead of debugging against reused state. diff --git a/.crabbox.yaml b/.crabbox.yaml new file mode 100644 index 0000000..b433ead --- /dev/null +++ b/.crabbox.yaml @@ -0,0 +1,157 @@ +profile: teleo-infrastructure-check +provider: hetzner +target: linux +architecture: arm64 +class: beast +ttl: 90m +idleTimeout: 20m +capacity: + market: spot + strategy: most-available + fallback: on-demand-after-120s +actions: + workflow: .github/workflows/crabbox.yml + job: hydrate + runnerLabels: + - crabbox + runnerVersion: latest + ephemeral: true +sync: + delete: true + checksum: false + gitSeed: true + fingerprint: true + timeout: 15m + warnFiles: 50000 + warnBytes: 5368709120 + failFiles: 150000 + failBytes: 21474836480 + exclude: + - .cache + - .venv + - .pytest_cache + - .ruff_cache + - __pycache__ + - "*.pyc" + - "*.db" + - "*.db-wal" + - "*.db-shm" + - "*.log" + - logs + - secrets + - .env + - htmlcov + - dist + - build + - "*.egg-info" + - .turbo + - node_modules +env: + allow: + - CI + - PYTHONWARNINGS + - PHASE1B_AGENT_ROUTING_ENABLED +ssh: + user: crabbox + port: "2222" + # Ordered fallback ports tried after ssh.port; use [] to disable fallback. + fallbackPorts: + - "22" + +jobs: + unit: + provider: hetzner + target: linux + architecture: arm64 + class: beast + hydrate: + actions: true + githubRunner: false + waitTimeout: 20m + keepAliveMinutes: 90 + actions: + workflow: .github/workflows/crabbox.yml + job: hydrate + shell: true + command: > + python3 -m pip install -e '.[dev]' && + mkdir -p .crabbox-results && + python3 -m pytest --junitxml=.crabbox-results/pytest.xml + junit: + - .crabbox-results/pytest.xml + downloads: + - .crabbox-results/pytest.xml + stop: always + + lint-phase1b: + provider: hetzner + target: linux + architecture: arm64 + class: beast + hydrate: + actions: true + githubRunner: false + waitTimeout: 20m + keepAliveMinutes: 90 + actions: + workflow: .github/workflows/crabbox.yml + job: hydrate + shell: true + command: > + python3 -m pip install -e '.[dev]' && + python3 -m ruff check + lib/agent_routing.py + lib/config.py + lib/db.py + lib/evaluate.py + lib/llm.py + lib/post_extract.py + telegram/approvals.py + scripts/prove_phase1b_local.py + tests/test_agent_routing.py + tests/test_evaluate_agent_routing.py + tests/test_phase1b_end_to_end.py + tests/test_eval_parse.py + tests/test_contributor.py + tests/test_search.py + stop: always + + phase1b-local-proof: + provider: hetzner + target: linux + architecture: arm64 + class: beast + hydrate: + actions: true + githubRunner: false + waitTimeout: 20m + keepAliveMinutes: 90 + actions: + workflow: .github/workflows/crabbox.yml + job: hydrate + shell: true + command: > + python3 -m pip install -e '.[dev]' && + scripts/crabbox_phase1b_proof.sh + junit: + - .crabbox-results/phase1b-pytest.xml + downloads: + - proof/phase1b-local-e2e-proof.json + - .crabbox-results/phase1b-pytest.xml + - .crabbox-results/phase1b-proof-summary.json + stop: always + + sync-smoke: + provider: hetzner + target: linux + architecture: arm64 + class: beast + hydrate: + actions: false + shell: true + command: > + python3 -m compileall + lib + tests + scripts/prove_phase1b_local.py + stop: always diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c56a101 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,60 @@ +name: ci + +on: + pull_request: + push: + branches: + - main + workflow_dispatch: + +permissions: + contents: read + +jobs: + python: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install + run: | + python -m pip install --upgrade pip + python -m pip install -e ".[dev]" + - name: Focused lint + run: | + python -m ruff check \ + lib/agent_routing.py \ + lib/config.py \ + lib/db.py \ + lib/evaluate.py \ + lib/llm.py \ + lib/post_extract.py \ + telegram/approvals.py \ + scripts/prove_phase1b_local.py \ + tests/test_agent_routing.py \ + tests/test_evaluate_agent_routing.py \ + tests/test_phase1b_end_to_end.py \ + tests/test_eval_parse.py \ + tests/test_contributor.py \ + tests/test_search.py + - name: Unit tests + run: | + python -m pytest --junitxml=.crabbox-results/pytest.xml + - name: Phase 1B local proof + env: + PHASE1B_AGENT_ROUTING_ENABLED: "true" + run: | + scripts/crabbox_phase1b_proof.sh + - name: Upload proof artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: teleo-infrastructure-ci-proof + path: | + proof/phase1b-local-e2e-proof.json + .crabbox-results/*.xml + .crabbox-results/*.json + if-no-files-found: warn diff --git a/.github/workflows/crabbox.yml b/.github/workflows/crabbox.yml new file mode 100644 index 0000000..ff7f971 --- /dev/null +++ b/.github/workflows/crabbox.yml @@ -0,0 +1,101 @@ +name: crabbox + +on: + workflow_dispatch: + inputs: + ref: + description: "Git ref to hydrate" + required: false + type: string + crabbox_id: + description: "Crabbox lease ID" + required: true + type: string + crabbox_runner_label: + description: "Dynamic Crabbox runner label" + required: true + type: string + crabbox_job: + description: "Hydration job identifier expected by Crabbox" + required: false + default: "hydrate" + type: string + crabbox_keep_alive_minutes: + description: "Minutes to keep the hydrated job alive" + required: false + default: "90" + type: string + +permissions: + contents: read + +jobs: + hydrate: + runs-on: [self-hosted, "${{ inputs.crabbox_runner_label }}"] + timeout-minutes: 120 + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.ref || github.ref }} + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Hydrate + run: | + python -m pip install --upgrade pip + python -m pip install -e ".[dev]" + if [ -f package-lock.json ]; then npm ci; fi + if [ -f pnpm-lock.yaml ]; then corepack enable && pnpm install --frozen-lockfile; fi + if [ -f go.mod ]; then go mod download; fi + - name: Mark Crabbox ready + shell: bash + run: | + job="${{ inputs.crabbox_job }}" + if [ -z "$job" ]; then job=hydrate; fi + mkdir -p "$HOME/.crabbox/actions" + state="$HOME/.crabbox/actions/${{ inputs.crabbox_id }}.env" + env_file="$HOME/.crabbox/actions/${{ inputs.crabbox_id }}.env.sh" + services_file="$HOME/.crabbox/actions/${{ inputs.crabbox_id }}.services" + write_export() { + key="$1" + value="${!key-}" + if [ -n "$value" ]; then + printf 'export %s=%q\n' "$key" "$value" + fi + } + { + for key in CI GITHUB_ACTIONS GITHUB_WORKSPACE GITHUB_REPOSITORY GITHUB_RUN_ID GITHUB_RUN_NUMBER GITHUB_RUN_ATTEMPT GITHUB_REF GITHUB_REF_NAME GITHUB_SHA GITHUB_EVENT_NAME GITHUB_ACTOR GITHUB_JOB RUNNER_OS RUNNER_ARCH RUNNER_TEMP RUNNER_TOOL_CACHE; do + write_export "$key" + done + } > "${env_file}.tmp" + mv "${env_file}.tmp" "$env_file" + { + echo "# Docker containers visible from the hydrated runner" + docker ps --format '{{.Names}}\t{{.Image}}\t{{.Ports}}' 2>/dev/null || true + } > "${services_file}.tmp" + mv "${services_file}.tmp" "$services_file" + tmp="${state}.tmp" + { + echo "WORKSPACE=${GITHUB_WORKSPACE}" + echo "RUN_ID=${GITHUB_RUN_ID}" + echo "JOB=${job}" + echo "ENV_FILE=${env_file}" + echo "SERVICES_FILE=${services_file}" + echo "READY_AT=$(date -u +%Y-%m-%dT%H:%M:%SZ)" + } > "$tmp" + mv "$tmp" "$state" + - name: Keep Crabbox job alive + shell: bash + run: | + minutes="${{ inputs.crabbox_keep_alive_minutes }}" + case "$minutes" in + ''|*[!0-9]*) minutes=90 ;; + esac + stop="$HOME/.crabbox/actions/${{ inputs.crabbox_id }}.stop" + deadline=$(( $(date +%s) + minutes * 60 )) + while [ "$(date +%s)" -lt "$deadline" ]; do + if [ -f "$stop" ]; then + exit 0 + fi + sleep 15 + done diff --git a/.gitignore b/.gitignore index c096ac3..d5191aa 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,8 @@ logs/ # Test artifacts .pytest_cache/ +.crabbox/ +.crabbox-results/ htmlcov/ .coverage diff --git a/docs/crabbox-remote-blocker.json b/docs/crabbox-remote-blocker.json new file mode 100644 index 0000000..ba04aef --- /dev/null +++ b/docs/crabbox-remote-blocker.json @@ -0,0 +1,22 @@ +{ + "status": "blocked_remote_execution", + "scope": "crabbox remote proof", + "attempted_discovery": [ + "installed Crabbox 0.22.1 from the official darwin_arm64 release tarball after Homebrew was blocked by outdated Command Line Tools", + "ran crabbox job list", + "ran crabbox job run --dry-run phase1b-local-proof", + "ran crabbox doctor --json", + "checked presence of CRABBOX_COORDINATOR, CRABBOX_COORDINATOR_TOKEN, HCLOUD_TOKEN, and HETZNER_TOKEN without printing values", + "checked Bitwarden CLI status" + ], + "exact_blocker": "Crabbox provider check fails because neither HCLOUD_TOKEN nor HETZNER_TOKEN is present, no Crabbox broker coordinator token is configured, and Bitwarden CLI is unauthenticated.", + "why_it_cannot_be_solved_autonomously": "Remote Crabbox execution requires a real Hetzner or Crabbox broker credential. Fabricating or bypassing that credential would either fail or create an unsafe secret-handling path.", + "next_action": "Authenticate Bitwarden or provide a scoped Hetzner/Crabbox broker credential in the operator environment, then rerun `crabbox doctor --json` and `crabbox job run phase1b-local-proof` from teleo-infrastructure.", + "safe_local_status": { + "crabbox_cli_installed": "0.22.1", + "job_dry_run": "passes", + "focused_ruff": "passes", + "phase1b_proof_wrapper": "passes", + "full_pytest": "422 passed" + } +} diff --git a/docs/crabbox.md b/docs/crabbox.md new file mode 100644 index 0000000..eba7f0d --- /dev/null +++ b/docs/crabbox.md @@ -0,0 +1,78 @@ +# Crabbox Remote Proof + +Crabbox is the remote execution layer for `teleo-infrastructure`. It is not the production deploy system. + +## Goals + +- Run Python tests on a disposable or warm remote Linux box. +- Run the Phase 1B local proof script remotely. +- Retain JUnit and machine-readable proof artifacts. +- Give agents a bounded job list instead of arbitrary cloud shell access. + +## Non-Goals + +- No production deploys. +- No production secrets. +- No production VPS mutation. +- No production `decision-engine` PR comments from Crabbox jobs. + +## Required Local Setup + +Crabbox CLI 0.22.1 or newer: + +```bash +crabbox --version +``` + +One of: + +```bash +crabbox login --url "$CRABBOX_COORDINATOR" +``` + +or direct Hetzner operator env: + +```bash +export HCLOUD_TOKEN="..." +``` + +Do not commit either value. + +## Jobs + +```bash +crabbox job list +crabbox job run --dry-run unit +crabbox job run --dry-run phase1b-local-proof +crabbox job run unit +crabbox job run phase1b-local-proof +``` + +`phase1b-local-proof` writes: + +- `proof/phase1b-local-e2e-proof.json` +- `.crabbox-results/phase1b-pytest.xml` +- `.crabbox-results/phase1b-proof-summary.json` + +## Secret Boundary + +Allowed: + +- `CI` +- `PYTHONWARNINGS` +- `PHASE1B_AGENT_ROUTING_ENABLED` +- broker token in user config +- direct `HCLOUD_TOKEN` or `HETZNER_TOKEN` in local operator env + +Not allowed: + +- production GitHub admin token +- production Forgejo token +- production OpenRouter key +- production SSH keys +- Bitwarden exports +- prod `pipeline.db` + +## Proof Boundary + +Crabbox remote proof proves repo behavior on a remote Linux lease. It does not prove production parity unless the lease recreates the production runtime paths, systemd services, timers, DB path, and deploy script behavior. diff --git a/scripts/crabbox_phase1b_proof.sh b/scripts/crabbox_phase1b_proof.sh new file mode 100755 index 0000000..cf26a83 --- /dev/null +++ b/scripts/crabbox_phase1b_proof.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT" + +mkdir -p proof .crabbox-results + +python3 -m pytest \ + tests/test_agent_routing.py \ + tests/test_evaluate_agent_routing.py \ + tests/test_phase1b_end_to_end.py \ + tests/test_eval_parse.py \ + tests/test_contributor.py \ + tests/test_search.py \ + --junitxml=.crabbox-results/phase1b-pytest.xml + +PHASE1B_AGENT_ROUTING_ENABLED=true \ + python3 scripts/prove_phase1b_local.py \ + --output proof/phase1b-local-e2e-proof.json + +python3 - <<'PY' +import json +from pathlib import Path + +proof_path = Path("proof/phase1b-local-e2e-proof.json") +proof = json.loads(proof_path.read_text()) +summary = { + "ok": proof.get("ok") is True, + "scope": proof.get("scope"), + "schema_version": proof.get("schema_version"), + "agents_seen": proof.get("agents_seen", []), + "cases_total": proof.get("cases_total"), + "succeeded": proof.get("succeeded"), + "failed": proof.get("failed"), +} +if not summary["ok"]: + raise SystemExit(f"phase1b proof failed: {summary}") +if len(summary["agents_seen"]) != 6: + raise SystemExit(f"expected six agents, got {summary['agents_seen']}") +Path(".crabbox-results/phase1b-proof-summary.json").write_text( + json.dumps(summary, indent=2, sort_keys=True) + "\n" +) +print(json.dumps(summary, indent=2, sort_keys=True)) +PY