diff --git a/.gitignore b/.gitignore index 96cc2ae..c096ac3 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ build/ # OS .DS_Store + +# Hermes session artifacts +ops/sessions/ diff --git a/deploy/auto-deploy.sh b/deploy/auto-deploy.sh new file mode 100755 index 0000000..a392eff --- /dev/null +++ b/deploy/auto-deploy.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash +# auto-deploy.sh — Pull from Forgejo, sync to working dirs, restart if needed. +# Runs as systemd timer (teleo-auto-deploy.timer) every 2 minutes. +# Exits silently when nothing has changed. +set -euo pipefail + +LOCK_FILE="/tmp/teleo-auto-deploy.lock" +exec 9>"$LOCK_FILE" +if ! flock -n 9; then + logger -t "auto-deploy" "Another deploy is already running. Skipping." + exit 0 +fi + +DEPLOY_CHECKOUT="/opt/teleo-eval/workspaces/deploy-infra" +PIPELINE_DIR="/opt/teleo-eval/pipeline" +DIAGNOSTICS_DIR="/opt/teleo-eval/diagnostics" +AGENT_STATE_DIR="/opt/teleo-eval/ops/agent-state" +STAMP_FILE="/opt/teleo-eval/.last-deploy-sha" +LOG_TAG="auto-deploy" + +log() { logger -t "$LOG_TAG" "$1"; echo "$(date '+%Y-%m-%d %H:%M:%S') $1"; } + +if [ ! -d "$DEPLOY_CHECKOUT/.git" ]; then + log "ERROR: Deploy checkout not found at $DEPLOY_CHECKOUT. Run setup first." + exit 1 +fi + +cd "$DEPLOY_CHECKOUT" +if ! git fetch origin main --quiet 2>&1; then + log "ERROR: git fetch failed" + exit 1 +fi + +NEW_SHA=$(git rev-parse origin/main) +OLD_SHA=$(cat "$STAMP_FILE" 2>/dev/null || echo "none") + +if [ "$NEW_SHA" = "$OLD_SHA" ]; then + exit 0 +fi + +log "New commits: ${OLD_SHA:0:8} -> ${NEW_SHA:0:8}" + +if ! git checkout main --quiet 2>&1; then + log "ERROR: git checkout main failed — dirty tree or corrupted index" + exit 1 +fi +if ! git pull --ff-only --quiet 2>&1; then + log "ERROR: git pull --ff-only failed. Manual intervention needed." + exit 1 +fi + +# Syntax check all Python files before copying +ERRORS=0 +for f in lib/*.py *.py diagnostics/*.py telegram/*.py; do + [ -f "$f" ] || continue + if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>&1; then + log "SYNTAX ERROR: $f" + ERRORS=$((ERRORS + 1)) + fi +done +if [ "$ERRORS" -gt 0 ]; then + log "ERROR: $ERRORS syntax errors. Deploy aborted. Fix and push again." + exit 1 +fi +log "Syntax check passed" + +# Sync to working directories +RSYNC_FLAGS="-az --exclude='__pycache__' --exclude='*.pyc' --exclude='*.bak*'" + +rsync $RSYNC_FLAGS lib/ "$PIPELINE_DIR/lib/" + +for f in teleo-pipeline.py reweave.py; do + [ -f "$f" ] && rsync $RSYNC_FLAGS "$f" "$PIPELINE_DIR/$f" +done + +rsync $RSYNC_FLAGS telegram/ "$PIPELINE_DIR/telegram/" +rsync $RSYNC_FLAGS diagnostics/ "$DIAGNOSTICS_DIR/" +rsync $RSYNC_FLAGS agent-state/ "$AGENT_STATE_DIR/" +[ -f research/research-session.sh ] && rsync $RSYNC_FLAGS research/research-session.sh /opt/teleo-eval/research-session.sh + +log "Files synced" + +# Restart services only if Python files changed +RESTART="" +if [ "$OLD_SHA" != "none" ]; then + if git diff --name-only "$OLD_SHA" "$NEW_SHA" -- lib/ teleo-pipeline.py reweave.py telegram/ 2>/dev/null | grep -q '\.py$'; then + RESTART="$RESTART teleo-pipeline" + fi + if git diff --name-only "$OLD_SHA" "$NEW_SHA" -- diagnostics/ 2>/dev/null | grep -q '\.py$'; then + RESTART="$RESTART teleo-diagnostics" + fi +else + RESTART="teleo-pipeline teleo-diagnostics" +fi + +if [ -n "$RESTART" ]; then + log "Restarting:$RESTART" + sudo systemctl restart $RESTART + sleep 15 + + FAIL=0 + for svc in $RESTART; do + if systemctl is-active --quiet "$svc"; then + log "$svc: active" + else + log "ERROR: $svc failed to start" + journalctl -u "$svc" -n 5 --no-pager 2>/dev/null || true + FAIL=1 + fi + done + + if echo "$RESTART" | grep -q "teleo-pipeline"; then + if curl -sf --connect-timeout 3 http://localhost:8080/health > /dev/null 2>&1; then + log "pipeline health: OK" + else + log "WARNING: pipeline health check failed" + FAIL=1 + fi + fi + + if echo "$RESTART" | grep -q "teleo-diagnostics"; then + if curl -sf --connect-timeout 3 http://localhost:8081/ops > /dev/null 2>&1; then + log "diagnostics health: OK" + else + log "WARNING: diagnostics health check failed" + FAIL=1 + fi + fi + + if [ "$FAIL" -gt 0 ]; then + log "WARNING: Smoke test failures. NOT updating stamp. Will retry next cycle. Push a fix." + exit 1 + fi +else + log "No Python changes — services not restarted" +fi + +echo "$NEW_SHA" > "$STAMP_FILE" +log "Deploy complete: $(git log --oneline -1 "$NEW_SHA")" diff --git a/systemd/teleo-auto-deploy.service b/systemd/teleo-auto-deploy.service new file mode 100644 index 0000000..527b6c0 --- /dev/null +++ b/systemd/teleo-auto-deploy.service @@ -0,0 +1,10 @@ +[Unit] +Description=Auto-deploy teleo-infrastructure from Forgejo to working directories +After=network.target + +[Service] +Type=oneshot +User=teleo +ExecStart=/opt/teleo-eval/workspaces/deploy-infra/deploy/auto-deploy.sh +StandardOutput=journal +StandardError=journal diff --git a/systemd/teleo-auto-deploy.timer b/systemd/teleo-auto-deploy.timer new file mode 100644 index 0000000..b61cfe8 --- /dev/null +++ b/systemd/teleo-auto-deploy.timer @@ -0,0 +1,10 @@ +[Unit] +Description=Run teleo auto-deploy every 2 minutes + +[Timer] +OnBootSec=30 +OnUnitActiveSec=2min +AccuracySec=10s + +[Install] +WantedBy=timers.target