#!/bin/bash # Bidirectional sync: Forgejo (authoritative) <-> GitHub (public mirror) # Forgejo wins on conflict. Runs every 2 minutes via cron. # # Repos handled (see MIRROR_REPOS below): # - teleo-codex (mode=bidirectional): full PR roundtrip — fork PR refs from # GitHub, auto-create Forgejo PR mirrors, link github_pr in pipeline.db. # - teleo-infrastructure (mode=main_only): one-way sync of branches+tags from # Forgejo to GitHub. No PR roundtrip — pipeline doesn't process infra PRs; # external infra PRs land on GitHub for visibility, get reviewed manually. # # Security note: GitHub->Forgejo path is for external contributor convenience. # Never auto-process branches arriving via this path without a PR. # Eval pipeline and extract cron only act on PRs, not raw branches. set -euo pipefail LOG="/opt/teleo-eval/logs/sync.log" LOCKFILE="/tmp/sync-mirror.lock" PIPELINE_DB="/opt/teleo-eval/pipeline/pipeline.db" GITHUB_PAT_FILE="/opt/teleo-eval/secrets/github-pat" # (forgejo_owner_repo, github_owner_repo, bare_path, mode) # mode: bidirectional | main_only MIRROR_REPOS=( "teleo/teleo-codex living-ip/teleo-codex /opt/teleo-eval/mirror/teleo-codex.git bidirectional" "teleo/teleo-infrastructure living-ip/teleo-infrastructure /opt/teleo-eval/mirror/teleo-infrastructure.git main_only" ) REPO_TAG="main" log() { echo "[$(date -Iseconds)] [$REPO_TAG] $1" >> "$LOG"; } # Lockfile — prevent concurrent runs (single lock for whole script) if [ -f "$LOCKFILE" ]; then pid=$(cat "$LOCKFILE" 2>/dev/null) if kill -0 "$pid" 2>/dev/null; then exit 0 fi rm -f "$LOCKFILE" fi echo $$ > "$LOCKFILE" trap 'rm -f "$LOCKFILE"' EXIT # ───────────────────────────────────────────────────────────────────────────── # sync_repo: process one mirror entry. Sets module-level FORGEJO_REPO, # GITHUB_REPO, REPO_DIR, MODE, REPO_TAG used by inner steps. # ───────────────────────────────────────────────────────────────────────────── sync_repo() { FORGEJO_REPO="$1" # e.g. teleo/teleo-codex (path on Forgejo) GITHUB_REPO="$2" # e.g. living-ip/teleo-codex (path on GitHub) REPO_DIR="$3" # bare mirror dir MODE="$4" # bidirectional | main_only REPO_TAG="${FORGEJO_REPO##*/}" # short name for log prefix # Pre-flight: bare repo must exist if [ ! -d "$REPO_DIR" ]; then log "ERROR: bare repo missing at $REPO_DIR — skipping" return 0 fi # Pre-flight: fix permissions if another user touched the mirror dir (Rhea) BAD_PERMS=$(find "$REPO_DIR" ! -user teleo 2>/dev/null | head -1 || true) if [ -n "$BAD_PERMS" ]; then log "Fixing mirror permissions (found: $BAD_PERMS)" chown -R teleo:teleo "$REPO_DIR" 2>/dev/null || true fi cd "$REPO_DIR" || { log "ERROR: cannot cd to $REPO_DIR"; return 0; } # Step 1: Fetch from Forgejo (must succeed — it's authoritative) log "Fetching from Forgejo..." if ! git fetch forgejo --prune >> "$LOG" 2>&1; then log "ERROR: Forgejo fetch failed — skipping this repo" return 0 fi # Step 2: Fetch from GitHub (warn on failure, don't abort) log "Fetching from GitHub..." git fetch origin --prune >> "$LOG" 2>&1 || log "WARN: GitHub fetch failed" # Step 2.1: Fetch GitHub fork PR refs (bidirectional only) # Fork-based PRs don't create branches on origin — they create refs/pull/N/head. # main_only repos don't accept fork PRs through the mirror path. if [ "$MODE" = "bidirectional" ]; then local PAT PAT=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]') if [ -n "$PAT" ]; then local OPEN_PRS OPEN_PRS=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls?state=open&per_page=100" \ -H "Authorization: token $PAT" 2>/dev/null || echo "[]") echo "$OPEN_PRS" | python3 -c " import sys, json prs = json.load(sys.stdin) for pr in prs: head = pr.get('head', {}) base_repo = pr.get('base', {}).get('repo', {}).get('full_name', '') head_repo = head.get('repo', {}) or {} head_full = head_repo.get('full_name', '') if head_full and head_full != base_repo: print(f\"{pr['number']} {head.get('ref', '')} {head.get('sha', '')}\") " 2>/dev/null | while read pr_num branch_name head_sha; do if [ -z "$pr_num" ] || [ -z "$branch_name" ]; then continue; fi local PR_BRANCH="gh-pr-${pr_num}/${branch_name}" local EXISTING EXISTING=$(git rev-parse "refs/heads/$PR_BRANCH" 2>/dev/null || true) if [ "$EXISTING" = "$head_sha" ]; then continue; fi git fetch origin "refs/pull/${pr_num}/head:refs/heads/$PR_BRANCH" >> "$LOG" 2>&1 && \ log "Fetched fork PR #$pr_num -> $PR_BRANCH" || \ log "WARN: Failed to fetch fork PR #$pr_num" done fi fi # Step 2.5: GitHub main -> Forgejo main (ff-only) # If a PR was merged on GitHub, GitHub main is ahead of Forgejo main. # Fast-forward Forgejo main to match — safe because ff-only guarantees no divergence. local GITHUB_MAIN_FF FORGEJO_MAIN_FF GITHUB_MAIN_FF=$(git rev-parse refs/remotes/origin/main 2>/dev/null || true) FORGEJO_MAIN_FF=$(git rev-parse refs/remotes/forgejo/main 2>/dev/null || true) if [ -n "$GITHUB_MAIN_FF" ] && [ -n "$FORGEJO_MAIN_FF" ]; then if [ "$GITHUB_MAIN_FF" != "$FORGEJO_MAIN_FF" ]; then if git merge-base --is-ancestor "$FORGEJO_MAIN_FF" "$GITHUB_MAIN_FF"; then log "GitHub main ($GITHUB_MAIN_FF) ahead of Forgejo main ($FORGEJO_MAIN_FF) — fast-forwarding" git push forgejo "refs/remotes/origin/main:refs/heads/main" >> "$LOG" 2>&1 && \ log "Forgejo main fast-forwarded to $GITHUB_MAIN_FF" || \ log "WARN: Failed to fast-forward Forgejo main" fi fi fi # Step 3: Forgejo -> GitHub (primary direction) log "Syncing Forgejo -> GitHub..." while read branch; do [ "$branch" = "HEAD" ] && continue git update-ref "refs/heads/$branch" "refs/remotes/forgejo/$branch" 2>/dev/null || \ log "WARN: Failed to update ref $branch" done < <(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/forgejo/) # Safety: verify Forgejo main descends from GitHub main before force-pushing local GITHUB_MAIN FORGEJO_MAIN PUSH_MAIN GITHUB_MAIN=$(git rev-parse refs/remotes/origin/main 2>/dev/null || true) FORGEJO_MAIN=$(git rev-parse refs/remotes/forgejo/main 2>/dev/null || true) PUSH_MAIN=true if [ -n "$GITHUB_MAIN" ] && [ -n "$FORGEJO_MAIN" ]; then if ! git merge-base --is-ancestor "$GITHUB_MAIN" "$FORGEJO_MAIN"; then log "CRITICAL: Forgejo main is NOT a descendant of GitHub main — skipping main push" log "CRITICAL: GitHub main: $GITHUB_MAIN, Forgejo main: $FORGEJO_MAIN" PUSH_MAIN=false fi fi if [ "$MODE" = "main_only" ]; then # Infra-style mirror: push main + tags ONLY. Pre-review agent branches # (epimetheus/*, ganymede/*, etc.) carry internal context — agent UUIDs, # in-flight discussion, WIP — and must not land in the public GitHub # history. (Ganymede review, finding #1.) if [ "$PUSH_MAIN" = true ]; then git push origin --force "refs/heads/main:refs/heads/main" >> "$LOG" 2>&1 || \ log "WARN: main push to GitHub failed" fi else # Bidirectional mirror (codex): push all branches so external # contributors can fork from any branch, not just main. if [ "$PUSH_MAIN" = true ]; then git push origin --all --force >> "$LOG" 2>&1 || log "WARN: Push to GitHub failed" else # Push all branches except main when main is divergent while read branch; do [ "$branch" = "main" ] && continue [ "$branch" = "HEAD" ] && continue git push origin --force "refs/heads/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || \ log "WARN: Failed to push $branch to GitHub" done < <(git for-each-ref --format="%(refname:lstrip=2)" refs/heads/) fi fi git push origin --tags --force >> "$LOG" 2>&1 || log "WARN: Tag push to GitHub failed" # Step 4: GitHub -> Forgejo + Forgejo PR auto-create (bidirectional only) if [ "$MODE" = "bidirectional" ]; then sync_github_to_forgejo_with_prs fi # Step 6: Divergence alerting (applies to both modes) check_divergence } # ───────────────────────────────────────────────────────────────────────────── # Step 4 split out: codex-specific GitHub→Forgejo branch push + PR auto-create. # Reads FORGEJO_REPO, GITHUB_REPO, PIPELINE_DB, REPO_TAG from sync_repo scope. # ───────────────────────────────────────────────────────────────────────────── sync_github_to_forgejo_with_prs() { log "Checking GitHub-only branches..." local FORGEJO_HOST="http://localhost:3000/api/v1/repos/$FORGEJO_REPO" local GITHUB_ONLY GITHUB_ONLY=$(comm -23 \ <(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/origin/ | grep -v HEAD | sort) \ <(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/forgejo/ | grep -v HEAD | sort)) if [ -z "$GITHUB_ONLY" ]; then log "No new GitHub-only branches" return 0 fi local FORGEJO_TOKEN FORGEJO_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token 2>/dev/null) for branch in $GITHUB_ONLY; do log "New from GitHub: $branch -> Forgejo" # Fork PR branches live as local refs (from Step 2.1), not on origin remote if [[ "$branch" == gh-pr-* ]]; then git push forgejo "refs/heads/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || { log "WARN: Failed to push fork PR branch $branch to Forgejo" continue } else git push forgejo "refs/remotes/origin/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || { log "WARN: Failed to push $branch to Forgejo" continue } fi # Skip pipeline-internal branch prefixes (no PR creation) case "$branch" in extract/*|ingestion/*) continue ;; esac if [ -z "$FORGEJO_TOKEN" ]; then continue; fi # Check if PR already exists for this branch (open or closed) # NOTE: Forgejo ?head= filter is broken (ignores head value, returns all PRs). # Workaround: fetch open+closed PRs, pipe to Python, check head.ref. local HAS_PR HAS_PR=$( { curl -sf "$FORGEJO_HOST/pulls?state=open&limit=50" \ -H "Authorization: token $FORGEJO_TOKEN" 2>/dev/null || echo "[]" echo "" curl -sf "$FORGEJO_HOST/pulls?state=closed&sort=created&limit=50" \ -H "Authorization: token $FORGEJO_TOKEN" 2>/dev/null || echo "[]" } | python3 -c " import sys, json branch = sys.argv[1] for line in sys.stdin: line = line.strip() if not line or line == '[]': continue try: for pr in json.loads(line): if pr.get('head', {}).get('ref') == branch: print('yes'); sys.exit(0) except: pass print('no') " "$branch" 2>/dev/null || echo "no") if [ "$HAS_PR" = "yes" ]; then continue; fi # Build PR title — for fork PRs, use the GitHub PR title local PR_TITLE PAYLOAD RESULT PR_NUM GH_PR_NUM if [[ "$branch" == gh-pr-* ]]; then local FORK_GH_NUM PAT_T FORK_GH_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|') PAT_T=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]') PR_TITLE=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls/$FORK_GH_NUM" \ -H "Authorization: token $PAT_T" 2>/dev/null | \ python3 -c "import sys,json; print(json.load(sys.stdin).get('title',''))" 2>/dev/null || true) [ -z "$PR_TITLE" ] && PR_TITLE=$(echo "$branch" | sed 's|/|: |;s/-/ /g') else PR_TITLE=$(echo "$branch" | sed 's|/|: |;s/-/ /g') fi PAYLOAD=$(python3 -c "import sys,json; print(json.dumps({'title':sys.argv[1],'head':sys.argv[2],'base':'main'}))" "$PR_TITLE" "$branch") RESULT=$(curl -sf -X POST "$FORGEJO_HOST/pulls" \ -H "Authorization: token $FORGEJO_TOKEN" \ -H "Content-Type: application/json" \ -d "$PAYLOAD" 2>/dev/null || echo "") PR_NUM=$(echo "$RESULT" | grep -o '"number":[0-9]*' | head -1 | grep -o "[0-9]*" || true) if [ -z "$PR_NUM" ]; then log "WARN: Failed to auto-create PR for $branch" continue fi log "Auto-created PR #$PR_NUM on Forgejo for $branch" # Step 4.5: Link GitHub PR to Forgejo PR in pipeline DB if [[ "$branch" == gh-pr-* ]]; then GH_PR_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|') else local PAT PAT=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]') GH_PR_NUM="" if [ -n "$PAT" ]; then GH_PR_NUM=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls?head=living-ip:$branch&state=all" \ -H "Authorization: token $PAT" 2>/dev/null | \ python3 -c "import sys,json; prs=json.load(sys.stdin); print(prs[0]['number'] if prs else '')" 2>/dev/null || true) fi fi if [[ "$GH_PR_NUM" =~ ^[0-9]+$ ]] && [[ "$PR_NUM" =~ ^[0-9]+$ ]]; then sqlite3 "$PIPELINE_DB" "UPDATE prs SET github_pr = $GH_PR_NUM, source_channel = 'github' WHERE number = $PR_NUM;" 2>/dev/null && \ log "Linked GitHub PR #$GH_PR_NUM -> Forgejo PR #$PR_NUM" || \ log "WARN: Failed to link GitHub PR #$GH_PR_NUM to Forgejo PR #$PR_NUM in DB" fi done } # ───────────────────────────────────────────────────────────────────────────── # Step 6 split out: divergence alerting. Per-repo state file so each repo # has its own divergence counter and alert state. # ───────────────────────────────────────────────────────────────────────────── check_divergence() { local DIVERGENCE_FILE="/opt/teleo-eval/logs/.divergence-count.${REPO_TAG}" git fetch forgejo main --quiet 2>/dev/null || true git fetch origin main --quiet 2>/dev/null || true local GH_MAIN_FINAL FG_MAIN_FINAL GH_MAIN_FINAL=$(git rev-parse refs/remotes/origin/main 2>/dev/null || true) FG_MAIN_FINAL=$(git rev-parse refs/remotes/forgejo/main 2>/dev/null || true) if [ -n "$GH_MAIN_FINAL" ] && [ -n "$FG_MAIN_FINAL" ] && [ "$GH_MAIN_FINAL" != "$FG_MAIN_FINAL" ]; then local PREV PREV=$(cat "$DIVERGENCE_FILE" 2>/dev/null || echo "0") if [ "$PREV" = "alerted" ]; then log "DIVERGENCE: still diverged (already alerted)" else local COUNT=$((PREV + 1)) echo "$COUNT" > "$DIVERGENCE_FILE" log "DIVERGENCE: cycle $COUNT — GitHub=$GH_MAIN_FINAL Forgejo=$FG_MAIN_FINAL" if [ "$COUNT" -ge 2 ]; then local BOT_TOKEN ADMIN_CHAT BOT_TOKEN=$(cat /opt/teleo-eval/secrets/telegram-bot-token 2>/dev/null || true) ADMIN_CHAT=$(cat /opt/teleo-eval/secrets/admin-chat-id 2>/dev/null || true) if [ -n "$BOT_TOKEN" ] && [ -n "$ADMIN_CHAT" ]; then local ALERT_MSG ALERT_MSG=$(python3 -c " import json, sys msg = '⚠️ Mirror divergence detected (' + sys.argv[5] + ')\\n\\n' msg += f'GitHub main: {sys.argv[1][:8]}\\n' msg += f'Forgejo main: {sys.argv[2][:8]}\\n' msg += f'Diverged for {sys.argv[3]} consecutive cycles ({int(sys.argv[3])*2} min)\\n\\n' msg += 'Check sync-mirror.sh logs: /opt/teleo-eval/logs/sync.log' print(json.dumps({'chat_id': sys.argv[4], 'text': msg, 'parse_mode': 'HTML'})) " "$GH_MAIN_FINAL" "$FG_MAIN_FINAL" "$COUNT" "$ADMIN_CHAT" "$REPO_TAG") if curl -sf -X POST "https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" \ -H "Content-Type: application/json" \ -d "$ALERT_MSG" >> "$LOG" 2>&1; then log "DIVERGENCE: alert sent to admin" echo "alerted" > "$DIVERGENCE_FILE" else log "WARN: Failed to send divergence alert (will retry next cycle)" fi else log "WARN: Cannot send divergence alert — missing bot token or admin chat ID" fi fi fi else if [ -f "$DIVERGENCE_FILE" ]; then local PREV PREV=$(cat "$DIVERGENCE_FILE" 2>/dev/null || echo "0") if [ "$PREV" != "0" ]; then log "DIVERGENCE: resolved — repos back in sync" fi rm -f "$DIVERGENCE_FILE" fi fi } # ───────────────────────────────────────────────────────────────────────────── # Main: process each configured mirror in sequence. # A failure on one repo doesn't block subsequent repos — sync_repo returns 0 # on most error paths to keep the loop going. # ───────────────────────────────────────────────────────────────────────────── REPO_TAG="main" log "Starting sync cycle" # Step 0: self-heal any gh-pr-* PR rows missing github_pr. # Runs FIRST — before per-repo work (branch-mirror loop, auto-create-PR block). # Recovers from races/transient failures in Step 4.5's one-shot link UPDATE. # Idempotent: SELECT empty when clean, zero-cost path. Same SELECT/UPDATE # heals historical orphans (PR 4066 picked up on first cron tick post-deploy) # and future races on subsequent ticks. The branch name encodes the GitHub PR # number deterministically (gh-pr-{N}/...) so no API call is required. if [ -f "$PIPELINE_DB" ]; then sqlite3 -separator '|' "$PIPELINE_DB" \ "SELECT number, branch FROM prs WHERE branch LIKE 'gh-pr-%' AND github_pr IS NULL;" \ 2>/dev/null | while IFS='|' read -r pr_num branch; do # Regex requires >=1 digit — empty/non-numeric branches fail to parse here, # not just at the empty-guard below. Keeps SQL-integer-safety load-bearing # on the regex alone. [0-9][0-9]* is the portable BRE form of [0-9]+, # works on both GNU sed (VPS) and BSD sed (dev macs). gh_pr_num=$(echo "$branch" | sed -n 's|^gh-pr-\([0-9][0-9]*\)/.*|\1|p') [ -z "$gh_pr_num" ] && continue # Both interpolated values are integer-validated upstream (pr_num from # INTEGER `number` column, gh_pr_num from regex above). No parametric # binding available in bash sqlite3 — safety relies on those invariants. if sqlite3 "$PIPELINE_DB" \ "UPDATE prs SET github_pr = $gh_pr_num, source_channel = 'github' WHERE number = $pr_num;" \ 2>/dev/null; then log "self-heal: linked Forgejo PR #$pr_num -> GitHub PR #$gh_pr_num" fi done fi for entry in "${MIRROR_REPOS[@]}"; do # Read the 4 fields. `read` splits on $IFS (whitespace) by default. read -r forgejo_repo github_repo bare_path mode <<< "$entry" sync_repo "$forgejo_repo" "$github_repo" "$bare_path" "$mode" done REPO_TAG="main" log "Sync cycle complete"