- Mirror: fetch GitHub fork PR refs (refs/pull/*/head), push to Forgejo as gh-pr-N/branch - Mirror: fork PRs auto-create Forgejo PR with GitHub PR title, link github_pr in DB - db.py: add contrib + gh-pr-* to classify_branch for external contributor branches - contributor.py: git commit author as attribution fallback (before branch agent) - contributor.py: skip bot/generic authors (m3taversal, teleo, pipeline) - Tests: fix fallback test for new git author path, add external contributor test Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
228 lines
11 KiB
Bash
Executable file
228 lines
11 KiB
Bash
Executable file
#!/bin/bash
|
|
# Bidirectional sync: Forgejo (authoritative) <-> GitHub (public mirror)
|
|
# Forgejo wins on conflict. Runs every 2 minutes via cron.
|
|
#
|
|
# Security note: GitHub->Forgejo path is for external contributor convenience.
|
|
# Never auto-process branches arriving via this path without a PR.
|
|
# Eval pipeline and extract cron only act on PRs, not raw branches.
|
|
|
|
set -euo pipefail
|
|
|
|
REPO_DIR="/opt/teleo-eval/mirror/teleo-codex.git"
|
|
LOG="/opt/teleo-eval/logs/sync.log"
|
|
LOCKFILE="/tmp/sync-mirror.lock"
|
|
PIPELINE_DB="/opt/teleo-eval/pipeline/pipeline.db"
|
|
GITHUB_PAT_FILE="/opt/teleo-eval/secrets/github-pat"
|
|
GITHUB_REPO="living-ip/teleo-codex"
|
|
|
|
log() { echo "[$(date -Iseconds)] $1" >> "$LOG"; }
|
|
|
|
# Lockfile — prevent concurrent runs
|
|
if [ -f "$LOCKFILE" ]; then
|
|
pid=$(cat "$LOCKFILE" 2>/dev/null)
|
|
if kill -0 "$pid" 2>/dev/null; then
|
|
exit 0
|
|
fi
|
|
rm -f "$LOCKFILE"
|
|
fi
|
|
echo $$ > "$LOCKFILE"
|
|
trap 'rm -f "$LOCKFILE"' EXIT
|
|
|
|
# Pre-flight: fix permissions if another user touched the mirror dir (Rhea)
|
|
BAD_PERMS=$(find "$REPO_DIR" ! -user teleo 2>/dev/null | head -1 || true)
|
|
if [ -n "$BAD_PERMS" ]; then
|
|
log "Fixing mirror permissions (found: $BAD_PERMS)"
|
|
chown -R teleo:teleo "$REPO_DIR" 2>/dev/null
|
|
fi
|
|
cd "$REPO_DIR" || { log "ERROR: cannot cd to $REPO_DIR"; exit 1; }
|
|
|
|
# Step 1: Fetch from Forgejo (must succeed — it's authoritative)
|
|
log "Fetching from Forgejo..."
|
|
if ! git fetch forgejo --prune >> "$LOG" 2>&1; then
|
|
log "ERROR: Forgejo fetch failed — aborting"
|
|
exit 1
|
|
fi
|
|
|
|
# Step 2: Fetch from GitHub (warn on failure, don't abort)
|
|
log "Fetching from GitHub..."
|
|
git fetch origin --prune >> "$LOG" 2>&1 || log "WARN: GitHub fetch failed"
|
|
|
|
# Step 2.1: Fetch GitHub fork PR refs
|
|
# Fork-based PRs don't create branches on origin — they create refs/pull/N/head
|
|
# Fetch these so we can push them to Forgejo for evaluation
|
|
GITHUB_PAT_STEP2=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
|
if [ -n "$GITHUB_PAT_STEP2" ]; then
|
|
OPEN_PRS=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls?state=open&per_page=100" \
|
|
-H "Authorization: token $GITHUB_PAT_STEP2" 2>/dev/null || echo "[]")
|
|
echo "$OPEN_PRS" | python3 -c "
|
|
import sys, json
|
|
prs = json.load(sys.stdin)
|
|
for pr in prs:
|
|
head = pr.get('head', {})
|
|
# Only process fork PRs (repo differs from base repo)
|
|
base_repo = pr.get('base', {}).get('repo', {}).get('full_name', '')
|
|
head_repo = head.get('repo', {}) or {}
|
|
head_full = head_repo.get('full_name', '')
|
|
if head_full and head_full != base_repo:
|
|
print(f\"{pr['number']} {head.get('ref', '')} {head.get('sha', '')}\")
|
|
" 2>/dev/null | while read pr_num branch_name head_sha; do
|
|
if [ -z "$pr_num" ] || [ -z "$branch_name" ]; then continue; fi
|
|
PR_BRANCH="gh-pr-${pr_num}/${branch_name}"
|
|
# Check if we already have this ref at the right SHA
|
|
EXISTING=$(git rev-parse "refs/heads/$PR_BRANCH" 2>/dev/null || true)
|
|
if [ "$EXISTING" = "$head_sha" ]; then continue; fi
|
|
# Fetch the PR ref and create a local branch
|
|
git fetch origin "refs/pull/${pr_num}/head:refs/heads/$PR_BRANCH" >> "$LOG" 2>&1 && \
|
|
log "Fetched fork PR #$pr_num -> $PR_BRANCH" || \
|
|
log "WARN: Failed to fetch fork PR #$pr_num"
|
|
done
|
|
fi
|
|
|
|
# Step 2.5: GitHub main -> Forgejo main (ff-only)
|
|
# If a PR was merged on GitHub, GitHub main is ahead of Forgejo main.
|
|
# Fast-forward Forgejo main to match — safe because ff-only guarantees no divergence.
|
|
GITHUB_MAIN_FF=$(git rev-parse refs/remotes/origin/main 2>/dev/null || true)
|
|
FORGEJO_MAIN_FF=$(git rev-parse refs/remotes/forgejo/main 2>/dev/null || true)
|
|
if [ -n "$GITHUB_MAIN_FF" ] && [ -n "$FORGEJO_MAIN_FF" ]; then
|
|
if [ "$GITHUB_MAIN_FF" != "$FORGEJO_MAIN_FF" ]; then
|
|
if git merge-base --is-ancestor "$FORGEJO_MAIN_FF" "$GITHUB_MAIN_FF"; then
|
|
log "GitHub main ($GITHUB_MAIN_FF) ahead of Forgejo main ($FORGEJO_MAIN_FF) — fast-forwarding"
|
|
git push forgejo "refs/remotes/origin/main:refs/heads/main" >> "$LOG" 2>&1 && \
|
|
log "Forgejo main fast-forwarded to $GITHUB_MAIN_FF" || \
|
|
log "WARN: Failed to fast-forward Forgejo main"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Step 3: Forgejo -> GitHub (primary direction)
|
|
# Update local refs from Forgejo remote refs using process substitution (avoids subshell)
|
|
log "Syncing Forgejo -> GitHub..."
|
|
while read branch; do
|
|
[ "$branch" = "HEAD" ] && continue
|
|
git update-ref "refs/heads/$branch" "refs/remotes/forgejo/$branch" 2>/dev/null || \
|
|
log "WARN: Failed to update ref $branch"
|
|
done < <(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/forgejo/)
|
|
|
|
# Safety: verify Forgejo main descends from GitHub main before force-pushing
|
|
GITHUB_MAIN=$(git rev-parse refs/remotes/origin/main 2>/dev/null || true)
|
|
FORGEJO_MAIN=$(git rev-parse refs/remotes/forgejo/main 2>/dev/null || true)
|
|
PUSH_MAIN=true
|
|
if [ -n "$GITHUB_MAIN" ] && [ -n "$FORGEJO_MAIN" ]; then
|
|
if ! git merge-base --is-ancestor "$GITHUB_MAIN" "$FORGEJO_MAIN"; then
|
|
log "CRITICAL: Forgejo main is NOT a descendant of GitHub main — skipping main push"
|
|
log "CRITICAL: GitHub main: $GITHUB_MAIN, Forgejo main: $FORGEJO_MAIN"
|
|
PUSH_MAIN=false
|
|
fi
|
|
fi
|
|
|
|
if [ "$PUSH_MAIN" = true ]; then
|
|
git push origin --all --force >> "$LOG" 2>&1 || log "WARN: Push to GitHub failed"
|
|
else
|
|
# Push all branches except main
|
|
while read branch; do
|
|
[ "$branch" = "main" ] && continue
|
|
[ "$branch" = "HEAD" ] && continue
|
|
git push origin --force "refs/heads/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || \
|
|
log "WARN: Failed to push $branch to GitHub"
|
|
done < <(git for-each-ref --format="%(refname:lstrip=2)" refs/heads/)
|
|
fi
|
|
git push origin --tags --force >> "$LOG" 2>&1 || log "WARN: Tag push to GitHub failed"
|
|
|
|
# Step 4: GitHub -> Forgejo (external contributions only)
|
|
# Only push branches that exist on GitHub but NOT on Forgejo
|
|
log "Checking GitHub-only branches..."
|
|
GITHUB_ONLY=$(comm -23 \
|
|
<(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/origin/ | grep -v HEAD | sort) \
|
|
<(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/forgejo/ | grep -v HEAD | sort))
|
|
|
|
if [ -n "$GITHUB_ONLY" ]; then
|
|
FORGEJO_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token 2>/dev/null)
|
|
for branch in $GITHUB_ONLY; do
|
|
log "New from GitHub: $branch -> Forgejo"
|
|
# Fork PR branches live as local refs (from Step 2.1), not on origin remote
|
|
if [[ "$branch" == gh-pr-* ]]; then
|
|
git push forgejo "refs/heads/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || {
|
|
log "WARN: Failed to push fork PR branch $branch to Forgejo"
|
|
continue
|
|
}
|
|
else
|
|
git push forgejo "refs/remotes/origin/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || {
|
|
log "WARN: Failed to push $branch to Forgejo"
|
|
continue
|
|
}
|
|
fi
|
|
# Auto-create PR on Forgejo for mirrored branches (external contributor path)
|
|
# Skip pipeline-internal branches
|
|
case "$branch" in
|
|
extract/*|ingestion/*) continue ;;
|
|
esac
|
|
if [ -n "$FORGEJO_TOKEN" ]; then
|
|
# Check if PR already exists for this branch (open or closed)
|
|
# NOTE: Forgejo ?head= filter is broken (ignores head value, returns all PRs).
|
|
# Workaround: fetch open+closed PRs, pipe to Python, check head.ref.
|
|
HAS_PR=$( {
|
|
curl -sf "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls?state=open&limit=50" \
|
|
-H "Authorization: token $FORGEJO_TOKEN" 2>/dev/null || echo "[]"
|
|
echo ""
|
|
curl -sf "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls?state=closed&sort=created&limit=50" \
|
|
-H "Authorization: token $FORGEJO_TOKEN" 2>/dev/null || echo "[]"
|
|
} | python3 -c "
|
|
import sys, json
|
|
branch = sys.argv[1]
|
|
for line in sys.stdin:
|
|
line = line.strip()
|
|
if not line or line == '[]': continue
|
|
try:
|
|
for pr in json.loads(line):
|
|
if pr.get('head', {}).get('ref') == branch:
|
|
print('yes'); sys.exit(0)
|
|
except: pass
|
|
print('no')
|
|
" "$branch" 2>/dev/null || echo "no")
|
|
if [ "$HAS_PR" = "no" ]; then
|
|
# Build PR title — for fork PRs, use the GitHub PR title
|
|
if [[ "$branch" == gh-pr-* ]]; then
|
|
FORK_GH_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|')
|
|
GITHUB_PAT_T=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
|
PR_TITLE=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls/$FORK_GH_NUM" \
|
|
-H "Authorization: token $GITHUB_PAT_T" 2>/dev/null | \
|
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('title',''))" 2>/dev/null || true)
|
|
[ -z "$PR_TITLE" ] && PR_TITLE=$(echo "$branch" | sed 's|/|: |;s/-/ /g')
|
|
else
|
|
PR_TITLE=$(echo "$branch" | sed 's|/|: |;s/-/ /g')
|
|
fi
|
|
RESULT=$(curl -sf -X POST "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls" \
|
|
-H "Authorization: token $FORGEJO_TOKEN" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"title\":\"$PR_TITLE\",\"head\":\"$branch\",\"base\":\"main\"}" 2>/dev/null || echo "")
|
|
PR_NUM=$(echo "$RESULT" | grep -o '"number":[0-9]*' | head -1 | grep -o "[0-9]*" || true)
|
|
if [ -n "$PR_NUM" ]; then
|
|
log "Auto-created PR #$PR_NUM on Forgejo for $branch"
|
|
# Step 4.5: Link GitHub PR to Forgejo PR in pipeline DB
|
|
if [[ "$branch" == gh-pr-* ]]; then
|
|
GH_PR_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|')
|
|
else
|
|
GITHUB_PAT=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
|
GH_PR_NUM=""
|
|
if [ -n "$GITHUB_PAT" ]; then
|
|
GH_PR_NUM=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls?head=living-ip:$branch&state=all" \
|
|
-H "Authorization: token $GITHUB_PAT" 2>/dev/null | \
|
|
python3 -c "import sys,json; prs=json.load(sys.stdin); print(prs[0]['number'] if prs else '')" 2>/dev/null || true)
|
|
fi
|
|
fi
|
|
if [ -n "$GH_PR_NUM" ]; then
|
|
sqlite3 "$PIPELINE_DB" "UPDATE prs SET github_pr = $GH_PR_NUM WHERE number = $PR_NUM;" 2>/dev/null && \
|
|
log "Linked GitHub PR #$GH_PR_NUM -> Forgejo PR #$PR_NUM" || \
|
|
log "WARN: Failed to link GitHub PR #$GH_PR_NUM to Forgejo PR #$PR_NUM in DB"
|
|
fi
|
|
else
|
|
log "WARN: Failed to auto-create PR for $branch"
|
|
fi
|
|
fi
|
|
fi
|
|
done
|
|
else
|
|
log "No new GitHub-only branches"
|
|
fi
|
|
|
|
log "Sync complete"
|