From b57cb41e3168a79d13603aeb6dd25d416ad2e98a Mon Sep 17 00:00:00 2001 From: m3taversal Date: Tue, 28 Apr 2026 18:21:04 +0100 Subject: [PATCH] fix(mirror): Step 0b self-heals stuck submitted_by via cron retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 4.5's submitted_by write inherits the same one-shot failure mode that Phase 1's Step 0 sweep was designed to retire. On transient GitHub API failure, link-only fallback writes github_pr and permanently closes the retry window — submitted_by stays stuck on 'm3taversal' (bot identity), and contribution_events never gets the author event. Step 0b adds a second sweep alongside Step 0: SELECT ... WHERE github_pr IS NOT NULL AND (submitted_by IS NULL OR submitted_by = 'm3taversal') Same idempotent cron-retry shape: SELECT empty when clean, per-row GitHub API call + UPDATE only when stuck. Targets bidirectional repos only (gh-pr-* branches don't exist for main_only mirrors). Derives bidirectional GitHub repo from MIRROR_REPOS at sweep time since Step 0 runs before sync_repo() sets GITHUB_REPO scope. Doubles as future-proof safety net: any external PR landing during the deploy window with submitted_by stuck on bot identity gets self-healed on the next cron tick. No backfill script needed. Per Ganymede line-level review of 1decf09. Co-Authored-By: Claude Opus 4.7 (1M context) --- deploy/sync-mirror.sh | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/deploy/sync-mirror.sh b/deploy/sync-mirror.sh index 9963a67..766d5bf 100755 --- a/deploy/sync-mirror.sh +++ b/deploy/sync-mirror.sh @@ -425,6 +425,50 @@ if [ -f "$PIPELINE_DB" ]; then log "self-heal: linked Forgejo PR #$pr_num -> GitHub PR #$gh_pr_num" fi done + + # Step 0b: self-heal stuck submitted_by on linked gh-pr-* PRs. + # Step 4.5 fall-through writes github_pr but leaves submitted_by as 'm3taversal' + # (bot identity) on transient GitHub API failures. Without retry, contribution_events + # never gets the author event — same one-shot failure mode Step 0 was designed to + # retire. Same idempotent cron-retry shape: SELECT empty when clean, per-row API + # call only when stuck. Targets bidirectional repos only (gh-pr-* branches don't + # exist for main_only mirrors). + GH_REPO_BIDIR="" + for entry in "${MIRROR_REPOS[@]}"; do + read -r _f gh_repo _bare mode <<< "$entry" + if [ "$mode" = "bidirectional" ]; then + GH_REPO_BIDIR="$gh_repo" + break + fi + done + if [ -n "$GH_REPO_BIDIR" ] && [ -f "$GITHUB_PAT_FILE" ]; then + sqlite3 -separator '|' "$PIPELINE_DB" \ + "SELECT number, branch FROM prs + WHERE branch LIKE 'gh-pr-%' + AND github_pr IS NOT NULL + AND (submitted_by IS NULL OR submitted_by = 'm3taversal');" \ + 2>/dev/null | while IFS='|' read -r pr_num branch; do + gh_pr_num=$(echo "$branch" | sed -n 's|^gh-pr-\([0-9][0-9]*\)/.*|\1|p') + [ -z "$gh_pr_num" ] && continue + PAT_S0=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]') + [ -z "$PAT_S0" ] && continue + gh_user=$(curl -sf "https://api.github.com/repos/$GH_REPO_BIDIR/pulls/$gh_pr_num" \ + -H "Authorization: token $PAT_S0" 2>/dev/null | \ + python3 -c "import sys,json; print((json.load(sys.stdin).get('user') or {}).get('login',''))" 2>/dev/null || true) + # Regex matches Step 4.5: GitHub username spec (anchored, alnum + hyphen, + # no consecutive-hyphen check). SQL-injection boundary: char class excludes + # quotes/semicolons/backslashes, so single-quoted literal is safe. + if [[ "$gh_user" =~ ^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$ ]]; then + gh_user_lc=$(echo "$gh_user" | tr '[:upper:]' '[:lower:]') + # SQL-integer-safe: pr_num from INTEGER column, gh_user_lc regex-validated. + if sqlite3 "$PIPELINE_DB" \ + "UPDATE prs SET submitted_by = '$gh_user_lc' WHERE number = $pr_num;" \ + 2>/dev/null; then + log "self-heal: set submitted_by=$gh_user_lc on Forgejo PR #$pr_num (GitHub PR #$gh_pr_num)" + fi + fi + done + fi fi for entry in "${MIRROR_REPOS[@]}"; do