feat: external contributor pipeline — fork PR handling, attribution, prefix recognition
- Mirror: fetch GitHub fork PR refs (refs/pull/*/head), push to Forgejo as gh-pr-N/branch - Mirror: fork PRs auto-create Forgejo PR with GitHub PR title, link github_pr in DB - db.py: add contrib + gh-pr-* to classify_branch for external contributor branches - contributor.py: git commit author as attribution fallback (before branch agent) - contributor.py: skip bot/generic authors (m3taversal, teleo, pipeline) - Tests: fix fallback test for new git author path, add external contributor test Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0b28c71e11
commit
13f21f7732
4 changed files with 114 additions and 19 deletions
|
|
@ -47,6 +47,37 @@ fi
|
|||
log "Fetching from GitHub..."
|
||||
git fetch origin --prune >> "$LOG" 2>&1 || log "WARN: GitHub fetch failed"
|
||||
|
||||
# Step 2.1: Fetch GitHub fork PR refs
|
||||
# Fork-based PRs don't create branches on origin — they create refs/pull/N/head
|
||||
# Fetch these so we can push them to Forgejo for evaluation
|
||||
GITHUB_PAT_STEP2=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
||||
if [ -n "$GITHUB_PAT_STEP2" ]; then
|
||||
OPEN_PRS=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls?state=open&per_page=100" \
|
||||
-H "Authorization: token $GITHUB_PAT_STEP2" 2>/dev/null || echo "[]")
|
||||
echo "$OPEN_PRS" | python3 -c "
|
||||
import sys, json
|
||||
prs = json.load(sys.stdin)
|
||||
for pr in prs:
|
||||
head = pr.get('head', {})
|
||||
# Only process fork PRs (repo differs from base repo)
|
||||
base_repo = pr.get('base', {}).get('repo', {}).get('full_name', '')
|
||||
head_repo = head.get('repo', {}) or {}
|
||||
head_full = head_repo.get('full_name', '')
|
||||
if head_full and head_full != base_repo:
|
||||
print(f\"{pr['number']} {head.get('ref', '')} {head.get('sha', '')}\")
|
||||
" 2>/dev/null | while read pr_num branch_name head_sha; do
|
||||
if [ -z "$pr_num" ] || [ -z "$branch_name" ]; then continue; fi
|
||||
PR_BRANCH="gh-pr-${pr_num}/${branch_name}"
|
||||
# Check if we already have this ref at the right SHA
|
||||
EXISTING=$(git rev-parse "refs/heads/$PR_BRANCH" 2>/dev/null || true)
|
||||
if [ "$EXISTING" = "$head_sha" ]; then continue; fi
|
||||
# Fetch the PR ref and create a local branch
|
||||
git fetch origin "refs/pull/${pr_num}/head:refs/heads/$PR_BRANCH" >> "$LOG" 2>&1 && \
|
||||
log "Fetched fork PR #$pr_num -> $PR_BRANCH" || \
|
||||
log "WARN: Failed to fetch fork PR #$pr_num"
|
||||
done
|
||||
fi
|
||||
|
||||
# Step 2.5: GitHub main -> Forgejo main (ff-only)
|
||||
# If a PR was merged on GitHub, GitHub main is ahead of Forgejo main.
|
||||
# Fast-forward Forgejo main to match — safe because ff-only guarantees no divergence.
|
||||
|
|
@ -108,10 +139,18 @@ if [ -n "$GITHUB_ONLY" ]; then
|
|||
FORGEJO_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token 2>/dev/null)
|
||||
for branch in $GITHUB_ONLY; do
|
||||
log "New from GitHub: $branch -> Forgejo"
|
||||
git push forgejo "refs/remotes/origin/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || {
|
||||
log "WARN: Failed to push $branch to Forgejo"
|
||||
continue
|
||||
}
|
||||
# Fork PR branches live as local refs (from Step 2.1), not on origin remote
|
||||
if [[ "$branch" == gh-pr-* ]]; then
|
||||
git push forgejo "refs/heads/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || {
|
||||
log "WARN: Failed to push fork PR branch $branch to Forgejo"
|
||||
continue
|
||||
}
|
||||
else
|
||||
git push forgejo "refs/remotes/origin/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || {
|
||||
log "WARN: Failed to push $branch to Forgejo"
|
||||
continue
|
||||
}
|
||||
fi
|
||||
# Auto-create PR on Forgejo for mirrored branches (external contributor path)
|
||||
# Skip pipeline-internal branches
|
||||
case "$branch" in
|
||||
|
|
@ -141,7 +180,17 @@ for line in sys.stdin:
|
|||
print('no')
|
||||
" "$branch" 2>/dev/null || echo "no")
|
||||
if [ "$HAS_PR" = "no" ]; then
|
||||
PR_TITLE=$(echo "$branch" | sed 's|/|: |;s/-/ /g')
|
||||
# Build PR title — for fork PRs, use the GitHub PR title
|
||||
if [[ "$branch" == gh-pr-* ]]; then
|
||||
FORK_GH_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|')
|
||||
GITHUB_PAT_T=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
||||
PR_TITLE=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls/$FORK_GH_NUM" \
|
||||
-H "Authorization: token $GITHUB_PAT_T" 2>/dev/null | \
|
||||
python3 -c "import sys,json; print(json.load(sys.stdin).get('title',''))" 2>/dev/null || true)
|
||||
[ -z "$PR_TITLE" ] && PR_TITLE=$(echo "$branch" | sed 's|/|: |;s/-/ /g')
|
||||
else
|
||||
PR_TITLE=$(echo "$branch" | sed 's|/|: |;s/-/ /g')
|
||||
fi
|
||||
RESULT=$(curl -sf -X POST "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls" \
|
||||
-H "Authorization: token $FORGEJO_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
|
|
@ -150,17 +199,22 @@ print('no')
|
|||
if [ -n "$PR_NUM" ]; then
|
||||
log "Auto-created PR #$PR_NUM on Forgejo for $branch"
|
||||
# Step 4.5: Link GitHub PR to Forgejo PR in pipeline DB
|
||||
GITHUB_PAT=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
||||
if [ -n "$GITHUB_PAT" ]; then
|
||||
GH_PR_NUM=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls?head=living-ip:$branch&state=all" \
|
||||
-H "Authorization: token $GITHUB_PAT" 2>/dev/null | \
|
||||
python3 -c "import sys,json; prs=json.load(sys.stdin); print(prs[0]['number'] if prs else '')" 2>/dev/null || true)
|
||||
if [ -n "$GH_PR_NUM" ]; then
|
||||
sqlite3 "$PIPELINE_DB" "UPDATE prs SET github_pr = $GH_PR_NUM WHERE number = $PR_NUM;" 2>/dev/null && \
|
||||
log "Linked GitHub PR #$GH_PR_NUM -> Forgejo PR #$PR_NUM" || \
|
||||
log "WARN: Failed to link GitHub PR #$GH_PR_NUM to Forgejo PR #$PR_NUM in DB"
|
||||
if [[ "$branch" == gh-pr-* ]]; then
|
||||
GH_PR_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|')
|
||||
else
|
||||
GITHUB_PAT=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
||||
GH_PR_NUM=""
|
||||
if [ -n "$GITHUB_PAT" ]; then
|
||||
GH_PR_NUM=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls?head=living-ip:$branch&state=all" \
|
||||
-H "Authorization: token $GITHUB_PAT" 2>/dev/null | \
|
||||
python3 -c "import sys,json; prs=json.load(sys.stdin); print(prs[0]['number'] if prs else '')" 2>/dev/null || true)
|
||||
fi
|
||||
fi
|
||||
if [ -n "$GH_PR_NUM" ]; then
|
||||
sqlite3 "$PIPELINE_DB" "UPDATE prs SET github_pr = $GH_PR_NUM WHERE number = $PR_NUM;" 2>/dev/null && \
|
||||
log "Linked GitHub PR #$GH_PR_NUM -> Forgejo PR #$PR_NUM" || \
|
||||
log "WARN: Failed to link GitHub PR #$GH_PR_NUM to Forgejo PR #$PR_NUM in DB"
|
||||
fi
|
||||
else
|
||||
log "WARN: Failed to auto-create PR for $branch"
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -153,11 +153,24 @@ async def record_contributor_attribution(conn, pr_number: int, branch: str, git_
|
|||
agent_id = agent_id_match.group(1).strip() if agent_id_match else None
|
||||
upsert_contributor(conn, handle, agent_id, current_role, today)
|
||||
|
||||
# Fallback: if no attribution block found, credit the branch agent as extractor
|
||||
# Fallback: if no attribution block found, try git commit author then branch agent
|
||||
if not agents_found:
|
||||
row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||
if row and row["agent"]:
|
||||
upsert_contributor(conn, row["agent"].lower(), None, "extractor", today)
|
||||
# For external contributors: parse git commit author as attribution source
|
||||
rc_author, author_output = await git_fn(
|
||||
"log", f"origin/main..origin/{branch}", "--format=%an", "-1",
|
||||
timeout=10,
|
||||
)
|
||||
if rc_author == 0 and author_output.strip():
|
||||
author_name = author_output.strip().lower()
|
||||
# Skip generic/bot authors — fall through to branch agent
|
||||
if author_name not in ("m3taversal", "teleo", "pipeline", ""):
|
||||
upsert_contributor(conn, author_name, None, "extractor", today)
|
||||
agents_found.add(author_name)
|
||||
|
||||
if not agents_found:
|
||||
row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||
if row and row["agent"] and row["agent"] != "external":
|
||||
upsert_contributor(conn, row["agent"].lower(), None, "extractor", today)
|
||||
|
||||
|
||||
def upsert_contributor(
|
||||
|
|
|
|||
|
|
@ -208,6 +208,7 @@ BRANCH_PREFIX_MAP = {
|
|||
"leo": ("leo", "entity"),
|
||||
"reweave": ("pipeline", "reweave"),
|
||||
"fix": ("pipeline", "fix"),
|
||||
"contrib": ("external", "contrib"),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -217,6 +218,9 @@ def classify_branch(branch: str) -> tuple[str, str]:
|
|||
Returns ('unknown', 'unknown') and logs a warning for unrecognized prefixes.
|
||||
"""
|
||||
prefix = branch.split("/", 1)[0] if "/" in branch else branch
|
||||
# Fork PR branches: gh-pr-N/original-branch
|
||||
if prefix.startswith("gh-pr-"):
|
||||
return ("external", "contrib")
|
||||
result = BRANCH_PREFIX_MAP.get(prefix)
|
||||
if result is None:
|
||||
logger.warning("Unknown branch prefix %r in branch %r — defaulting to ('unknown', 'unknown')", prefix, branch)
|
||||
|
|
|
|||
|
|
@ -200,7 +200,8 @@ def test_record_fallback_to_pr_agent():
|
|||
|
||||
async def run():
|
||||
with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
|
||||
git_fn = AsyncMock(return_value=(0, "no trailers here"))
|
||||
# First call: trailer log (no trailers), Second call: author log (bot name → skipped)
|
||||
git_fn = AsyncMock(side_effect=[(0, "no trailers here"), (0, "m3taversal")])
|
||||
with patch("lib.contributor.config") as mock_config:
|
||||
mock_config.CONTRIBUTOR_TIER_RULES = {
|
||||
"veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
|
||||
|
|
@ -213,6 +214,29 @@ def test_record_fallback_to_pr_agent():
|
|||
assert row is not None
|
||||
assert row["extractor_count"] == 1
|
||||
|
||||
|
||||
def test_record_fallback_to_git_author():
|
||||
"""External contributors get credited via git commit author."""
|
||||
conn = _make_attribution_db()
|
||||
conn.execute("INSERT INTO prs VALUES (200, 'contrib', 'external')")
|
||||
mock_diff = "+++ b/domains/ai-alignment/claim.md\n+new content\n"
|
||||
|
||||
async def run():
|
||||
with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
|
||||
# First call: trailer log (no trailers), Second call: author log (external name)
|
||||
git_fn = AsyncMock(side_effect=[(0, "no trailers"), (0, "Cameron-S1")])
|
||||
with patch("lib.contributor.config") as mock_config:
|
||||
mock_config.CONTRIBUTOR_TIER_RULES = {
|
||||
"veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
|
||||
"contributor": {"claims_merged": 10},
|
||||
}
|
||||
await record_contributor_attribution(conn, 200, "contrib/cameron/challenge", git_fn)
|
||||
|
||||
asyncio.run(run())
|
||||
row = conn.execute("SELECT * FROM contributors WHERE handle = 'cameron-s1'").fetchone()
|
||||
assert row is not None
|
||||
assert row["extractor_count"] == 1
|
||||
|
||||
def test_record_parses_pentagon_trailer():
|
||||
conn = _make_attribution_db()
|
||||
mock_diff = "+++ b/domains/crypto/claim.md\n+new file content\n"
|
||||
|
|
|
|||
Loading…
Reference in a new issue