diff --git a/ops/AGENT-SOP.md b/ops/AGENT-SOP.md index 4a6a3cdf4..3f17e9670 100644 --- a/ops/AGENT-SOP.md +++ b/ops/AGENT-SOP.md @@ -68,9 +68,11 @@ Check auto-deploy status: `journalctl -u teleo-auto-deploy -n 20` ## Shell and Python Safety - Run `bash -n script.sh` after modifying any shell script. +- Never suppress stderr on critical git commands (`2>/dev/null || true`). Log errors, fail hard. - Never interpolate shell variables into Python strings via `'$var'`. Pass values via `os.environ` or `sys.argv`. - Never write credentials to `.git/config`. Use per-command `git -c http.extraHeader`. +- Tunable constants live in `ops/pipeline-v2/lib/config.py`. Don't hardcode numbers in module files. ## Schema Changes diff --git a/ops/auto-deploy.sh b/ops/auto-deploy.sh index 893883445..0117285f5 100755 --- a/ops/auto-deploy.sh +++ b/ops/auto-deploy.sh @@ -4,6 +4,13 @@ # Exits silently when nothing has changed. set -euo pipefail +LOCK_FILE="/tmp/teleo-auto-deploy.lock" +exec 9>"$LOCK_FILE" +if ! flock -n 9; then + logger -t "auto-deploy" "Another deploy is already running. Skipping." + exit 0 +fi + DEPLOY_CHECKOUT="/opt/teleo-eval/workspaces/deploy" PIPELINE_DIR="/opt/teleo-eval/pipeline" DIAGNOSTICS_DIR="/opt/teleo-eval/diagnostics" @@ -33,7 +40,10 @@ fi log "New commits: ${OLD_SHA:0:8} -> ${NEW_SHA:0:8}" -git checkout main --quiet 2>/dev/null || true +if ! git checkout main --quiet 2>&1; then + log "ERROR: git checkout main failed — dirty tree or corrupted index" + exit 1 +fi if ! git pull --ff-only --quiet 2>&1; then log "ERROR: git pull --ff-only failed. Manual intervention needed." exit 1 @@ -43,7 +53,7 @@ fi ERRORS=0 for f in ops/pipeline-v2/lib/*.py ops/pipeline-v2/*.py ops/diagnostics/*.py; do [ -f "$f" ] || continue - if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>/dev/null; then + if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>&1; then log "SYNTAX ERROR: $f" ERRORS=$((ERRORS + 1)) fi @@ -55,7 +65,7 @@ fi log "Syntax check passed" # Sync to working directories (mirrors deploy.sh logic) -RSYNC_FLAGS="-az --exclude=__pycache__ --exclude=*.pyc --exclude=*.bak*" +RSYNC_FLAGS="-az --exclude='__pycache__' --exclude='*.pyc' --exclude='*.bak*'" rsync $RSYNC_FLAGS ops/pipeline-v2/lib/ "$PIPELINE_DIR/lib/" @@ -63,6 +73,7 @@ for f in teleo-pipeline.py reweave.py batch-extract-50.sh; do [ -f "ops/pipeline-v2/$f" ] && rsync $RSYNC_FLAGS "ops/pipeline-v2/$f" "$PIPELINE_DIR/$f" done +rsync $RSYNC_FLAGS ops/pipeline-v2/telegram/ "$PIPELINE_DIR/telegram/" rsync $RSYNC_FLAGS ops/diagnostics/ "$DIAGNOSTICS_DIR/" rsync $RSYNC_FLAGS ops/agent-state/ "$AGENT_STATE_DIR/" [ -f ops/research-session.sh ] && rsync $RSYNC_FLAGS ops/research-session.sh /opt/teleo-eval/research-session.sh @@ -117,7 +128,8 @@ if [ -n "$RESTART" ]; then fi if [ "$FAIL" -gt 0 ]; then - log "WARNING: Smoke test failures. NOT updating stamp. Will retry next cycle." + # Code is already synced — push a fix, don't wait for next cycle + log "WARNING: Smoke test failures. NOT updating stamp. Will retry next cycle. Push a fix." exit 1 fi else diff --git a/ops/deploy.sh b/ops/deploy.sh index c571e9fca..861ec9bfe 100755 --- a/ops/deploy.sh +++ b/ops/deploy.sh @@ -43,7 +43,7 @@ echo "=== Pre-deploy syntax check ===" ERRORS=0 for f in "$REPO_ROOT/ops/pipeline-v2/lib/"*.py "$REPO_ROOT/ops/pipeline-v2/"*.py "$REPO_ROOT/ops/diagnostics/"*.py; do [ -f "$f" ] || continue - if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>/dev/null; then + if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>&1; then echo "SYNTAX ERROR: $f" ERRORS=$((ERRORS + 1)) fi @@ -76,6 +76,10 @@ echo "=== Diagnostics ===" rsync $RSYNC_FLAGS "$REPO_ROOT/ops/diagnostics/" "$VPS_HOST:$VPS_DIAGNOSTICS/" echo "" +echo "=== Telegram bot ===" +rsync $RSYNC_FLAGS "$REPO_ROOT/ops/pipeline-v2/telegram/" "$VPS_HOST:$VPS_PIPELINE/telegram/" +echo "" + echo "=== Agent state ===" rsync $RSYNC_FLAGS "$REPO_ROOT/ops/agent-state/" "$VPS_HOST:$VPS_AGENT_STATE/" echo "" diff --git a/ops/evaluate-trigger.sh b/ops/evaluate-trigger.sh index aa865cb68..078fae861 100755 --- a/ops/evaluate-trigger.sh +++ b/ops/evaluate-trigger.sh @@ -64,7 +64,7 @@ detect_code_pr() { files=$(gh pr view "$pr_number" --json files --jq '.files[].path' 2>/dev/null || echo "") - if echo "$files" | grep -qE "^ops/|^diagnostics/|\.py$|\.sh$|\.js$|\.html$|\.css$|\.json$"; then + if echo "$files" | grep -qE "^ops/|\.py$|\.sh$|\.js$|\.html$|\.css$|\.json$"; then echo "true" else echo "false" diff --git a/ops/prune-branches.sh b/ops/prune-branches.sh index d563278df..5409af063 100755 --- a/ops/prune-branches.sh +++ b/ops/prune-branches.sh @@ -41,9 +41,13 @@ while IFS= read -r branch; do COUNT=$((COUNT + 1)) if [[ "$last_date" < "$CUTOFF" ]]; then + if ! git merge-base --is-ancestor "$branch" "$REMOTE/main" 2>/dev/null; then + echo " SKIP (unmerged): $short ($last_date)" + continue + fi if $EXECUTE; then echo " DELETE: $short ($last_date)" - git push "$REMOTE" --delete "$short" 2>/dev/null && DELETE_COUNT=$((DELETE_COUNT + 1)) || echo " FAILED: $short" + git push "$REMOTE" --delete "$short" 2>&1 && DELETE_COUNT=$((DELETE_COUNT + 1)) || echo " FAILED: $short" else echo " WOULD DELETE: $short ($last_date)" DELETE_COUNT=$((DELETE_COUNT + 1))