diff --git a/deploy/sync-mirror.sh b/deploy/sync-mirror.sh index 7b3a4e2..9963a67 100755 --- a/deploy/sync-mirror.sh +++ b/deploy/sync-mirror.sh @@ -275,9 +275,26 @@ print('no') fi log "Auto-created PR #$PR_NUM on Forgejo for $branch" - # Step 4.5: Link GitHub PR to Forgejo PR in pipeline DB + # Step 4.5: Link GitHub PR to Forgejo PR + populate submitted_by from GitHub user.login. + # + # Why submitted_by here: sync-mirror creates the Forgejo PR using the admin + # token (user=m3taversal), so the Forgejo PR's submitter is bot-identity, not + # the GitHub author (FwazB, etc). Without this fix, contribution_events emits + # zero author events for external PRs because record_contributor_attribution + # treats m3taversal as a bot and finds no other signal. Architectural fix per + # Ship + Cory after the FwazB PR #4066 attribution gap (Apr 28). + local GH_USER + GH_USER="" if [[ "$branch" == gh-pr-* ]]; then GH_PR_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|') + # Look up GitHub PR author for submitted_by attribution. + local PAT_GHU + PAT_GHU=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]') + if [ -n "$PAT_GHU" ] && [[ "$GH_PR_NUM" =~ ^[0-9]+$ ]]; then + GH_USER=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls/$GH_PR_NUM" \ + -H "Authorization: token $PAT_GHU" 2>/dev/null | \ + python3 -c "import sys,json; print((json.load(sys.stdin).get('user') or {}).get('login',''))" 2>/dev/null || true) + fi else local PAT PAT=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]') @@ -289,9 +306,24 @@ print('no') fi fi if [[ "$GH_PR_NUM" =~ ^[0-9]+$ ]] && [[ "$PR_NUM" =~ ^[0-9]+$ ]]; then - sqlite3 "$PIPELINE_DB" "UPDATE prs SET github_pr = $GH_PR_NUM, source_channel = 'github' WHERE number = $PR_NUM;" 2>/dev/null && \ - log "Linked GitHub PR #$GH_PR_NUM -> Forgejo PR #$PR_NUM" || \ - log "WARN: Failed to link GitHub PR #$GH_PR_NUM to Forgejo PR #$PR_NUM in DB" + # GitHub usernames: 1-39 chars, alphanumeric + hyphen, can't start/end with hyphen. + # Strict regex is the SQL-injection safety boundary (no bash sqlite3 parametric binding). + if [[ "$GH_USER" =~ ^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$ ]]; then + # Lowercase for canonical handle storage (matches contributor.py normalize) + local GH_USER_LC + GH_USER_LC=$(echo "$GH_USER" | tr '[:upper:]' '[:lower:]') + sqlite3 "$PIPELINE_DB" "UPDATE prs SET github_pr = $GH_PR_NUM, source_channel = 'github', submitted_by = '$GH_USER_LC' WHERE number = $PR_NUM;" 2>/dev/null && \ + log "Linked GitHub PR #$GH_PR_NUM -> Forgejo PR #$PR_NUM (author: $GH_USER_LC)" || \ + log "WARN: Failed to link GitHub PR #$GH_PR_NUM to Forgejo PR #$PR_NUM in DB" + else + # Fall back to link-only when user.login lookup failed or was malformed. + # Still better than no link; record_contributor_attribution can use + # other signals (commit author trailer, etc) — author event may still + # not emit, but submitted_by stays NULL/whatever discovery set. + sqlite3 "$PIPELINE_DB" "UPDATE prs SET github_pr = $GH_PR_NUM, source_channel = 'github' WHERE number = $PR_NUM;" 2>/dev/null && \ + log "Linked GitHub PR #$GH_PR_NUM -> Forgejo PR #$PR_NUM (author lookup failed)" || \ + log "WARN: Failed to link GitHub PR #$GH_PR_NUM to Forgejo PR #$PR_NUM in DB" + fi fi done }