#!/usr/bin/env bash # auto-deploy.sh — Pull from Forgejo, sync to working dirs, restart if needed. # Runs as systemd timer (teleo-auto-deploy.timer) every 2 minutes. # Exits silently when nothing has changed. set -euo pipefail LOCK_FILE="/tmp/teleo-auto-deploy.lock" exec 9>"$LOCK_FILE" if ! flock -n 9; then logger -t "auto-deploy" "Another deploy is already running. Skipping." exit 0 fi DEPLOY_CHECKOUT="/opt/teleo-eval/workspaces/deploy-infra" PIPELINE_DIR="/opt/teleo-eval/pipeline" TELEGRAM_DIR="/opt/teleo-eval/telegram" DIAGNOSTICS_DIR="/opt/teleo-eval/diagnostics" AGENT_STATE_DIR="/opt/teleo-eval/ops/agent-state" STAMP_FILE="/opt/teleo-eval/.last-deploy-sha" LOG_TAG="auto-deploy" log() { logger -t "$LOG_TAG" "$1"; echo "$(date '+%Y-%m-%d %H:%M:%S') $1"; } DEPLOY_REMOTE="${TELEO_DEPLOY_REMOTE:-}" if [ -z "$DEPLOY_REMOTE" ]; then if git -C "$DEPLOY_CHECKOUT" remote get-url github >/dev/null 2>&1; then DEPLOY_REMOTE="github" else DEPLOY_REMOTE="origin" fi fi if [ ! -d "$DEPLOY_CHECKOUT/.git" ]; then log "ERROR: Deploy checkout not found at $DEPLOY_CHECKOUT. Run setup first." exit 1 fi cd "$DEPLOY_CHECKOUT" if ! git remote get-url "$DEPLOY_REMOTE" >/dev/null 2>&1; then log "ERROR: deploy remote '$DEPLOY_REMOTE' is not configured" exit 1 fi if ! git fetch "$DEPLOY_REMOTE" main --quiet 2>&1; then log "ERROR: git fetch failed for $DEPLOY_REMOTE/main" exit 1 fi NEW_SHA=$(git rev-parse "$DEPLOY_REMOTE/main") OLD_SHA=$(cat "$STAMP_FILE" 2>/dev/null || echo "none") if [ "$NEW_SHA" = "$OLD_SHA" ]; then exit 0 fi log "New commits: ${OLD_SHA:0:8} -> ${NEW_SHA:0:8}" if ! git checkout main --quiet 2>&1; then log "ERROR: git checkout main failed — dirty tree or corrupted index" exit 1 fi if ! git merge --ff-only "$DEPLOY_REMOTE/main" --quiet 2>&1; then log "ERROR: git merge --ff-only $DEPLOY_REMOTE/main failed. Manual intervention needed." exit 1 fi # Syntax check all Python files before copying ERRORS=0 for f in lib/*.py *.py diagnostics/*.py telegram/*.py tests/*.py; do [ -f "$f" ] || continue if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>&1; then log "SYNTAX ERROR: $f" ERRORS=$((ERRORS + 1)) fi done if [ "$ERRORS" -gt 0 ]; then log "ERROR: $ERRORS syntax errors. Deploy aborted. Fix and push again." exit 1 fi log "Syntax check passed" # Sync to working directories RSYNC_OPTS=(-az --exclude __pycache__ --exclude '*.pyc' --exclude '*.bak*') rsync "${RSYNC_OPTS[@]}" lib/ "$PIPELINE_DIR/lib/" for f in teleo-pipeline.py reweave.py fetch_coins.py pipeline-health-check.py; do [ -f "$f" ] && rsync "${RSYNC_OPTS[@]}" "$f" "$PIPELINE_DIR/$f" done rsync "${RSYNC_OPTS[@]}" telegram/ "$PIPELINE_DIR/telegram/" rsync "${RSYNC_OPTS[@]}" telegram/ "$TELEGRAM_DIR/" rsync "${RSYNC_OPTS[@]}" diagnostics/ "$DIAGNOSTICS_DIR/" rsync "${RSYNC_OPTS[@]}" agent-state/ "$AGENT_STATE_DIR/" rsync "${RSYNC_OPTS[@]}" tests/ "$PIPELINE_DIR/tests/" [ -f research/research-session.sh ] && rsync "${RSYNC_OPTS[@]}" research/research-session.sh /opt/teleo-eval/research-session.sh # Safety net: ensure all .sh files are executable after rsync find /opt/teleo-eval -maxdepth 3 -name '*.sh' -not -perm -u+x -exec chmod +x {} + log "Files synced" # Restart services only if Python files changed RESTART="" add_restart() { case " $RESTART " in *" $1 "*) ;; *) RESTART="$RESTART $1" ;; esac } add_restart_if_unit_exists() { if systemctl list-units --all --full "$1.service" --no-legend 2>/dev/null | grep -q .; then add_restart "$1" fi } if [ "$OLD_SHA" != "none" ]; then if git diff --name-only "$OLD_SHA" "$NEW_SHA" -- lib/ teleo-pipeline.py reweave.py telegram/ 2>/dev/null | grep -q '\.py$'; then add_restart teleo-pipeline fi if git diff --name-only "$OLD_SHA" "$NEW_SHA" -- telegram/ 2>/dev/null | grep -q '\.py$'; then add_restart teleo-agent@leo add_restart_if_unit_exists teleo-agent@leo-wallet-test fi if git diff --name-only "$OLD_SHA" "$NEW_SHA" -- diagnostics/ 2>/dev/null | grep -q '\.py$'; then add_restart teleo-diagnostics fi else RESTART="teleo-pipeline teleo-diagnostics teleo-agent@leo" add_restart_if_unit_exists teleo-agent@leo-wallet-test fi if [ -n "$RESTART" ]; then log "Restarting:$RESTART" sudo systemctl restart $RESTART sleep 30 FAIL=0 for svc in $RESTART; do if systemctl is-active --quiet "$svc"; then log "$svc: active" else log "ERROR: $svc failed to start" journalctl -u "$svc" -n 5 --no-pager 2>/dev/null || true FAIL=1 fi done if echo "$RESTART" | grep -q "teleo-pipeline"; then HEALTH_CODE=$(curl -s -o /dev/null -w '%{http_code}' --connect-timeout 3 http://localhost:8080/health 2>/dev/null || echo "000") if [ "$HEALTH_CODE" = "200" ] || [ "$HEALTH_CODE" = "503" ]; then log "pipeline health: OK (HTTP $HEALTH_CODE)" else log "WARNING: pipeline health check failed (HTTP $HEALTH_CODE)" FAIL=1 fi fi if echo "$RESTART" | grep -q "teleo-diagnostics"; then if curl -sf --connect-timeout 3 http://localhost:8081/ops > /dev/null 2>&1; then log "diagnostics health: OK" else log "WARNING: diagnostics health check failed" FAIL=1 fi fi if [ "$FAIL" -gt 0 ]; then log "WARNING: Smoke test failures. NOT updating stamp. Will retry next cycle. Push a fix." exit 1 fi else log "No Python changes — services not restarted" fi echo "$NEW_SHA" > "$STAMP_FILE" log "Deploy complete: $(git log --oneline -1 "$NEW_SHA")"