teleo-codex/ops/deploy.sh
m3taversal e27f6a7b91 commit pending pipeline changes: watchdog tier0 recovery, stale_pr cleanup, deploy.sh improvements
- watchdog.py: tier0 auto-recovery (3 retries, 1h cooldown, audit trail) — pending Ganymede review
- stale_pr.py: new module, closes extraction PRs open >30 min with zero claims
- deploy.sh: expanded with new deployment features
- validate.py, extract.py, cascade.py, db.py: minor fixes
- backfill-descriptions.py: utility script
- review_queue.py: minor fix

Note: watchdog + stale_pr not yet deployed to VPS (reverted after missing import crash)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 10:14:54 +02:00

207 lines
6.1 KiB
Bash
Executable file

#!/usr/bin/env bash
# deploy.sh — Deploy pipeline and diagnostics to VPS from repo
# Usage: ./deploy.sh [--dry-run] [--restart]
#
# Requires: committed, clean working tree. Enforces repo-first workflow.
set -euo pipefail
VPS_HOST="teleo@77.42.65.182"
VPS_PIPELINE="/opt/teleo-eval/pipeline"
VPS_DIAGNOSTICS="/opt/teleo-eval/diagnostics"
VPS_AGENT_STATE="/opt/teleo-eval/ops/agent-state"
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
DRY_RUN=false
RESTART=false
for arg in "$@"; do
case "$arg" in
--dry-run) DRY_RUN=true ;;
--restart) RESTART=true ;;
--help|-h)
echo "Usage: $0 [--dry-run] [--restart]"
echo " --dry-run Show what would be deployed without doing it"
echo " --restart Restart services after deploy"
exit 0
;;
*) echo "Unknown arg: $arg"; exit 1 ;;
esac
done
# Gate: working tree must be clean
if [ -n "$(git -C "$REPO_ROOT" status --porcelain)" ]; then
echo "ERROR: Uncommitted changes. Commit first, deploy second."
git -C "$REPO_ROOT" status --short
exit 1
fi
echo "Deploying from commit: $(git -C "$REPO_ROOT" log --oneline -1)"
echo ""
# Syntax check all Python files before deploying
echo "=== Pre-deploy syntax check ==="
ERRORS=0
for f in "$REPO_ROOT/ops/pipeline-v2/lib/"*.py "$REPO_ROOT/ops/pipeline-v2/"*.py "$REPO_ROOT/ops/diagnostics/"*.py; do
[ -f "$f" ] || continue
if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>/dev/null; then
echo "SYNTAX ERROR: $f"
ERRORS=$((ERRORS + 1))
fi
done
if [ "$ERRORS" -gt 0 ]; then
echo "ERROR: $ERRORS files have syntax errors. Fix before deploying."
exit 1
fi
echo "All files pass syntax check."
echo ""
RSYNC_FLAGS="-avz --exclude='__pycache__' --exclude='*.pyc' --exclude='*.bak*'"
if $DRY_RUN; then
RSYNC_FLAGS="$RSYNC_FLAGS --dry-run"
echo "=== DRY RUN ==="
fi
echo "=== Pipeline lib/ ==="
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/pipeline-v2/lib/" "$VPS_HOST:$VPS_PIPELINE/lib/"
echo ""
echo "=== Pipeline top-level ==="
for f in teleo-pipeline.py reweave.py batch-extract-50.sh; do
[ -f "$REPO_ROOT/ops/pipeline-v2/$f" ] || continue
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/pipeline-v2/$f" "$VPS_HOST:$VPS_PIPELINE/$f"
done
echo ""
echo "=== Diagnostics ==="
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/diagnostics/" "$VPS_HOST:$VPS_DIAGNOSTICS/"
echo ""
echo "=== Agent state ==="
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/agent-state/" "$VPS_HOST:$VPS_AGENT_STATE/"
echo ""
echo "=== Research session ==="
rsync $RSYNC_FLAGS "$REPO_ROOT/ops/research-session.sh" "$VPS_HOST:/opt/teleo-eval/research-session.sh"
echo ""
if $DRY_RUN; then
echo "Dry run complete. No changes made."
exit 0
fi
echo "Deploy complete."
if $RESTART; then
echo ""
echo "=== Detecting services to restart ==="
# Determine which services need restart based on what was deployed.
# rsync touched these paths → these services:
# pipeline-v2/lib/, pipeline-v2/*.py → teleo-pipeline
# diagnostics/ → teleo-diagnostics
# agent-state/, research-session.sh → no restart (not daemons)
RESTART_SVCS=""
# Check VPS for recent file changes from this deploy
# Compare local files against VPS to see what actually changed
PIPELINE_CHANGED=false
DIAG_CHANGED=false
# Pipeline: lib/ or top-level scripts
if ! rsync -avzn --exclude='__pycache__' --exclude='*.pyc' --exclude='*.bak*' \
"$REPO_ROOT/ops/pipeline-v2/lib/" "$VPS_HOST:$VPS_PIPELINE/lib/" 2>/dev/null | grep -q '\.py$'; then
true # no python changes
else
PIPELINE_CHANGED=true
fi
for f in teleo-pipeline.py reweave.py; do
if [ -f "$REPO_ROOT/ops/pipeline-v2/$f" ]; then
if rsync -avzn "$REPO_ROOT/ops/pipeline-v2/$f" "$VPS_HOST:$VPS_PIPELINE/$f" 2>/dev/null | grep -q "$f"; then
PIPELINE_CHANGED=true
fi
fi
done
# Diagnostics
if rsync -avzn --exclude='__pycache__' --exclude='*.pyc' --exclude='*.bak*' \
"$REPO_ROOT/ops/diagnostics/" "$VPS_HOST:$VPS_DIAGNOSTICS/" 2>/dev/null | grep -q '\.py$'; then
DIAG_CHANGED=true
fi
if $PIPELINE_CHANGED; then
RESTART_SVCS="$RESTART_SVCS teleo-pipeline"
echo " teleo-pipeline: files changed, will restart"
else
echo " teleo-pipeline: no changes, skipping"
fi
if $DIAG_CHANGED; then
RESTART_SVCS="$RESTART_SVCS teleo-diagnostics"
echo " teleo-diagnostics: files changed, will restart"
else
echo " teleo-diagnostics: no changes, skipping"
fi
if [ -z "$RESTART_SVCS" ]; then
echo ""
echo "No service files changed. Skipping restart."
else
echo ""
echo "=== Restarting:$RESTART_SVCS ==="
ssh "$VPS_HOST" "sudo systemctl restart $RESTART_SVCS"
echo "Services restarted. Waiting 5s for startup..."
sleep 5
echo ""
echo "=== Smoke test ==="
SMOKE_FAIL=0
# Check systemd unit status for restarted services
for svc in $RESTART_SVCS; do
if ssh "$VPS_HOST" "systemctl is-active --quiet $svc"; then
echo " $svc: active"
else
echo " $svc: FAILED"
ssh "$VPS_HOST" "journalctl -u $svc -n 10 --no-pager" || true
SMOKE_FAIL=1
fi
done
# Hit health endpoints for restarted services
if echo "$RESTART_SVCS" | grep -q "teleo-pipeline"; then
if ssh "$VPS_HOST" "curl -sf --connect-timeout 3 http://localhost:8080/health > /dev/null"; then
echo " pipeline health (8080): OK"
else
echo " pipeline health (8080): FAILED"
SMOKE_FAIL=1
fi
fi
if echo "$RESTART_SVCS" | grep -q "teleo-diagnostics"; then
if ssh "$VPS_HOST" "curl -sf --connect-timeout 3 http://localhost:8081/ops > /dev/null"; then
echo " diagnostics (8081): OK"
else
echo " diagnostics (8081): FAILED"
SMOKE_FAIL=1
fi
fi
# Tail logs for quick visual check
echo ""
echo "=== Recent logs (10s) ==="
JOURNAL_UNITS=""
for svc in $RESTART_SVCS; do
JOURNAL_UNITS="$JOURNAL_UNITS -u $svc"
done
ssh "$VPS_HOST" "journalctl $JOURNAL_UNITS --since '-10s' --no-pager -n 20" || true
if [ "$SMOKE_FAIL" -gt 0 ]; then
echo ""
echo "WARNING: Smoke test detected failures. Check logs above."
exit 1
fi
echo ""
echo "Smoke test passed."
fi
fi