#!/usr/bin/env bash # auto-deploy.sh — Pull from Forgejo, sync to working dirs, restart if needed. # Runs as systemd timer (teleo-auto-deploy.timer) every 2 minutes. # Exits silently when nothing has changed. set -euo pipefail LOCK_FILE="/tmp/teleo-auto-deploy.lock" exec 9>"$LOCK_FILE" if ! flock -n 9; then logger -t "auto-deploy" "Another deploy is already running. Skipping." exit 0 fi DEPLOY_CHECKOUT="/opt/teleo-eval/workspaces/deploy" PIPELINE_DIR="/opt/teleo-eval/pipeline" DIAGNOSTICS_DIR="/opt/teleo-eval/diagnostics" AGENT_STATE_DIR="/opt/teleo-eval/ops/agent-state" STAMP_FILE="/opt/teleo-eval/.last-deploy-sha" LOG_TAG="auto-deploy" log() { logger -t "$LOG_TAG" "$1"; echo "$(date '+%Y-%m-%d %H:%M:%S') $1"; } if [ ! -d "$DEPLOY_CHECKOUT/.git" ]; then log "ERROR: Deploy checkout not found at $DEPLOY_CHECKOUT. Run setup first." exit 1 fi cd "$DEPLOY_CHECKOUT" if ! git fetch origin main --quiet 2>&1; then log "ERROR: git fetch failed" exit 1 fi NEW_SHA=$(git rev-parse origin/main) OLD_SHA=$(cat "$STAMP_FILE" 2>/dev/null || echo "none") if [ "$NEW_SHA" = "$OLD_SHA" ]; then exit 0 fi log "New commits: ${OLD_SHA:0:8} -> ${NEW_SHA:0:8}" if ! git checkout main --quiet 2>&1; then log "ERROR: git checkout main failed — dirty tree or corrupted index" exit 1 fi if ! git pull --ff-only --quiet 2>&1; then log "ERROR: git pull --ff-only failed. Manual intervention needed." exit 1 fi # Syntax check all Python files before copying ERRORS=0 for f in ops/pipeline-v2/lib/*.py ops/pipeline-v2/*.py ops/diagnostics/*.py; do [ -f "$f" ] || continue if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>&1; then log "SYNTAX ERROR: $f" ERRORS=$((ERRORS + 1)) fi done if [ "$ERRORS" -gt 0 ]; then log "ERROR: $ERRORS syntax errors. Deploy aborted. Fix and push again." exit 1 fi log "Syntax check passed" # Sync to working directories (mirrors deploy.sh logic) RSYNC_FLAGS="-az --exclude='__pycache__' --exclude='*.pyc' --exclude='*.bak*'" rsync $RSYNC_FLAGS ops/pipeline-v2/lib/ "$PIPELINE_DIR/lib/" for f in teleo-pipeline.py reweave.py batch-extract-50.sh; do [ -f "ops/pipeline-v2/$f" ] && rsync $RSYNC_FLAGS "ops/pipeline-v2/$f" "$PIPELINE_DIR/$f" done rsync $RSYNC_FLAGS ops/pipeline-v2/telegram/ "$PIPELINE_DIR/telegram/" rsync $RSYNC_FLAGS ops/diagnostics/ "$DIAGNOSTICS_DIR/" rsync $RSYNC_FLAGS ops/agent-state/ "$AGENT_STATE_DIR/" [ -f ops/research-session.sh ] && rsync $RSYNC_FLAGS ops/research-session.sh /opt/teleo-eval/research-session.sh log "Files synced" # Restart services only if Python files changed RESTART="" if [ "$OLD_SHA" != "none" ]; then if git diff --name-only "$OLD_SHA" "$NEW_SHA" -- ops/pipeline-v2/ 2>/dev/null | grep -q '\.py$'; then RESTART="$RESTART teleo-pipeline" fi if git diff --name-only "$OLD_SHA" "$NEW_SHA" -- ops/diagnostics/ 2>/dev/null | grep -q '\.py$'; then RESTART="$RESTART teleo-diagnostics" fi else RESTART="teleo-pipeline teleo-diagnostics" fi if [ -n "$RESTART" ]; then log "Restarting:$RESTART" sudo systemctl restart $RESTART sleep 5 FAIL=0 for svc in $RESTART; do if systemctl is-active --quiet "$svc"; then log "$svc: active" else log "ERROR: $svc failed to start" journalctl -u "$svc" -n 5 --no-pager 2>/dev/null || true FAIL=1 fi done if echo "$RESTART" | grep -q "teleo-pipeline"; then if curl -sf --connect-timeout 3 http://localhost:8080/health > /dev/null 2>&1; then log "pipeline health: OK" else log "WARNING: pipeline health check failed" FAIL=1 fi fi if echo "$RESTART" | grep -q "teleo-diagnostics"; then if curl -sf --connect-timeout 3 http://localhost:8081/ops > /dev/null 2>&1; then log "diagnostics health: OK" else log "WARNING: diagnostics health check failed" FAIL=1 fi fi if [ "$FAIL" -gt 0 ]; then # Code is already synced — push a fix, don't wait for next cycle log "WARNING: Smoke test failures. NOT updating stamp. Will retry next cycle. Push a fix." exit 1 fi else log "No Python changes — services not restarted" fi echo "$NEW_SHA" > "$STAMP_FILE" log "Deploy complete: $(git log --oneline -1 "$NEW_SHA")"