fix(mirror): Step 0b self-heals stuck submitted_by via cron retry

Step 4.5's submitted_by write inherits the same one-shot failure mode
that Phase 1's Step 0 sweep was designed to retire. On transient
GitHub API failure, link-only fallback writes github_pr and permanently
closes the retry window — submitted_by stays stuck on 'm3taversal'
(bot identity), and contribution_events never gets the author event.

Step 0b adds a second sweep alongside Step 0:
  SELECT ... WHERE github_pr IS NOT NULL
              AND (submitted_by IS NULL OR submitted_by = 'm3taversal')

Same idempotent cron-retry shape: SELECT empty when clean, per-row
GitHub API call + UPDATE only when stuck. Targets bidirectional
repos only (gh-pr-* branches don't exist for main_only mirrors).

Derives bidirectional GitHub repo from MIRROR_REPOS at sweep time
since Step 0 runs before sync_repo() sets GITHUB_REPO scope.

Doubles as future-proof safety net: any external PR landing during
the deploy window with submitted_by stuck on bot identity gets
self-healed on the next cron tick. No backfill script needed.

Per Ganymede line-level review of 1decf09.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
m3taversal 2026-04-28 18:21:04 +01:00
parent 1decf09598
commit b57cb41e31

View file

@ -425,6 +425,50 @@ if [ -f "$PIPELINE_DB" ]; then
log "self-heal: linked Forgejo PR #$pr_num -> GitHub PR #$gh_pr_num"
fi
done
# Step 0b: self-heal stuck submitted_by on linked gh-pr-* PRs.
# Step 4.5 fall-through writes github_pr but leaves submitted_by as 'm3taversal'
# (bot identity) on transient GitHub API failures. Without retry, contribution_events
# never gets the author event — same one-shot failure mode Step 0 was designed to
# retire. Same idempotent cron-retry shape: SELECT empty when clean, per-row API
# call only when stuck. Targets bidirectional repos only (gh-pr-* branches don't
# exist for main_only mirrors).
GH_REPO_BIDIR=""
for entry in "${MIRROR_REPOS[@]}"; do
read -r _f gh_repo _bare mode <<< "$entry"
if [ "$mode" = "bidirectional" ]; then
GH_REPO_BIDIR="$gh_repo"
break
fi
done
if [ -n "$GH_REPO_BIDIR" ] && [ -f "$GITHUB_PAT_FILE" ]; then
sqlite3 -separator '|' "$PIPELINE_DB" \
"SELECT number, branch FROM prs
WHERE branch LIKE 'gh-pr-%'
AND github_pr IS NOT NULL
AND (submitted_by IS NULL OR submitted_by = 'm3taversal');" \
2>/dev/null | while IFS='|' read -r pr_num branch; do
gh_pr_num=$(echo "$branch" | sed -n 's|^gh-pr-\([0-9][0-9]*\)/.*|\1|p')
[ -z "$gh_pr_num" ] && continue
PAT_S0=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
[ -z "$PAT_S0" ] && continue
gh_user=$(curl -sf "https://api.github.com/repos/$GH_REPO_BIDIR/pulls/$gh_pr_num" \
-H "Authorization: token $PAT_S0" 2>/dev/null | \
python3 -c "import sys,json; print((json.load(sys.stdin).get('user') or {}).get('login',''))" 2>/dev/null || true)
# Regex matches Step 4.5: GitHub username spec (anchored, alnum + hyphen,
# no consecutive-hyphen check). SQL-injection boundary: char class excludes
# quotes/semicolons/backslashes, so single-quoted literal is safe.
if [[ "$gh_user" =~ ^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$ ]]; then
gh_user_lc=$(echo "$gh_user" | tr '[:upper:]' '[:lower:]')
# SQL-integer-safe: pr_num from INTEGER column, gh_user_lc regex-validated.
if sqlite3 "$PIPELINE_DB" \
"UPDATE prs SET submitted_by = '$gh_user_lc' WHERE number = $pr_num;" \
2>/dev/null; then
log "self-heal: set submitted_by=$gh_user_lc on Forgejo PR #$pr_num (GitHub PR #$gh_pr_num)"
fi
fi
done
fi
fi
for entry in "${MIRROR_REPOS[@]}"; do