Compare commits
6 commits
epimetheus
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| ed5f7ef6cc | |||
| 7741c1e6de | |||
| 992b4ee36f | |||
| de204db539 | |||
| 1eb259de8a | |||
| 33f6ca9e3f |
4 changed files with 267 additions and 7 deletions
|
|
@ -98,14 +98,15 @@ echo "Fetching from Forgejo..."
|
|||
git fetch forgejo --prune 2>&1 | sed 's/^/ /'
|
||||
|
||||
# Initial push to GitHub (will populate the empty repo)
|
||||
echo "Pushing initial state to GitHub..."
|
||||
# Sync local refs from forgejo remote refs first (mirrors what sync-mirror.sh does)
|
||||
while read branch; do
|
||||
[ "$branch" = "HEAD" ] && continue
|
||||
git update-ref "refs/heads/$branch" "refs/remotes/forgejo/$branch" 2>/dev/null || true
|
||||
done < <(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/forgejo/)
|
||||
# main_only mode: push ONLY refs/heads/main + tags, mirroring what sync-mirror.sh
|
||||
# does for this repo on the recurring path. Agent review branches stay Forgejo-only.
|
||||
echo "Pushing initial main + tags to GitHub..."
|
||||
git update-ref refs/heads/main refs/remotes/forgejo/main 2>/dev/null || {
|
||||
echo "ERROR: forgejo/main ref missing — fetch may have failed" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
git push origin --all 2>&1 | sed 's/^/ /' || {
|
||||
git push origin "refs/heads/main:refs/heads/main" 2>&1 | sed 's/^/ /' || {
|
||||
echo "WARN: initial push failed — you may need to authorize the PAT for $GITHUB_REPO" >&2
|
||||
}
|
||||
git push origin --tags 2>&1 | sed 's/^/ /' || true
|
||||
|
|
|
|||
|
|
@ -367,6 +367,34 @@ print(json.dumps({'chat_id': sys.argv[4], 'text': msg, 'parse_mode': 'HTML'}))
|
|||
REPO_TAG="main"
|
||||
log "Starting sync cycle"
|
||||
|
||||
# Step 0: self-heal any gh-pr-* PR rows missing github_pr.
|
||||
# Runs FIRST — before per-repo work (branch-mirror loop, auto-create-PR block).
|
||||
# Recovers from races/transient failures in Step 4.5's one-shot link UPDATE.
|
||||
# Idempotent: SELECT empty when clean, zero-cost path. Same SELECT/UPDATE
|
||||
# heals historical orphans (PR 4066 picked up on first cron tick post-deploy)
|
||||
# and future races on subsequent ticks. The branch name encodes the GitHub PR
|
||||
# number deterministically (gh-pr-{N}/...) so no API call is required.
|
||||
if [ -f "$PIPELINE_DB" ]; then
|
||||
sqlite3 -separator '|' "$PIPELINE_DB" \
|
||||
"SELECT number, branch FROM prs WHERE branch LIKE 'gh-pr-%' AND github_pr IS NULL;" \
|
||||
2>/dev/null | while IFS='|' read -r pr_num branch; do
|
||||
# Regex requires >=1 digit — empty/non-numeric branches fail to parse here,
|
||||
# not just at the empty-guard below. Keeps SQL-integer-safety load-bearing
|
||||
# on the regex alone. [0-9][0-9]* is the portable BRE form of [0-9]+,
|
||||
# works on both GNU sed (VPS) and BSD sed (dev macs).
|
||||
gh_pr_num=$(echo "$branch" | sed -n 's|^gh-pr-\([0-9][0-9]*\)/.*|\1|p')
|
||||
[ -z "$gh_pr_num" ] && continue
|
||||
# Both interpolated values are integer-validated upstream (pr_num from
|
||||
# INTEGER `number` column, gh_pr_num from regex above). No parametric
|
||||
# binding available in bash sqlite3 — safety relies on those invariants.
|
||||
if sqlite3 "$PIPELINE_DB" \
|
||||
"UPDATE prs SET github_pr = $gh_pr_num, source_channel = 'github' WHERE number = $pr_num;" \
|
||||
2>/dev/null; then
|
||||
log "self-heal: linked Forgejo PR #$pr_num -> GitHub PR #$gh_pr_num"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
for entry in "${MIRROR_REPOS[@]}"; do
|
||||
# Read the 4 fields. `read` splits on $IFS (whitespace) by default.
|
||||
read -r forgejo_repo github_repo bare_path mode <<< "$entry"
|
||||
|
|
|
|||
|
|
@ -84,6 +84,14 @@ MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5"))
|
|||
MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7"))
|
||||
MAX_MERGE_WORKERS = 1 # domain-serialized, but one merge at a time per domain
|
||||
|
||||
# --- External GitHub PR merge strategy ---
|
||||
# When True, gh-pr-N/* branches merge with --no-ff (preserves contributor SHA in
|
||||
# main's history → GitHub recognizes "merged" badge). When False, fall back to
|
||||
# cherry-pick (the default for all other branches). Default True; flip to False
|
||||
# as an emergency backout if the no-ff path destabilizes merge throughput.
|
||||
# Phase 2 of external contributor merge flow (Ship architecture review Apr 28).
|
||||
EXTERNAL_PR_NO_FF_MERGE = True
|
||||
|
||||
# --- Timeouts (seconds) ---
|
||||
EXTRACT_TIMEOUT = 600 # 10 min
|
||||
EVAL_TIMEOUT = 120 # 2 min — routine Sonnet/Gemini Flash calls (was 600, caused 10-min stalls)
|
||||
|
|
|
|||
223
lib/merge.py
223
lib/merge.py
|
|
@ -429,6 +429,171 @@ async def _cherry_pick_onto_main(branch: str) -> tuple[bool, str]:
|
|||
await _git("branch", "-D", clean_branch)
|
||||
|
||||
|
||||
_GH_PR_BRANCH_RE = re.compile(r"^gh-pr-(\d+)/(.+)$")
|
||||
|
||||
|
||||
async def _merge_no_ff_external(branch: str) -> tuple[bool, str]:
|
||||
"""Merge an external GitHub fork PR with --no-ff so contributor SHA lands in main.
|
||||
|
||||
Why this differs from _cherry_pick_onto_main:
|
||||
- Cherry-pick rewrites the contributor's commit SHA → GitHub's "is PR head SHA
|
||||
an ancestor of main?" check returns false → "merged" badge never fires.
|
||||
- --no-ff preserves the contributor's commit SHA as a parent of the merge
|
||||
commit. After ff-push to main (the existing dispatch step), GitHub sees
|
||||
the SHA in ancestry and marks the PR merged.
|
||||
|
||||
Mechanics:
|
||||
1. Fetch origin/main + origin/{branch}
|
||||
2. Worktree on local branch _merged-{slug} from origin/main
|
||||
3. git merge --no-ff origin/{branch} with verbose message:
|
||||
"Merge external GitHub PR #{N}: {branch_slug}"
|
||||
4. Push merge commit to origin/_merged/{branch} (synthetic audit ref)
|
||||
5. ff-push merge_sha → origin/main directly (function owns the push, NOT
|
||||
dispatch — see sentinel return below)
|
||||
|
||||
The merge commit M has parents [main_sha, branch_sha]. M is a fast-forward
|
||||
descendant of main_sha (via first-parent chain), so the push to main
|
||||
works without --force.
|
||||
|
||||
Synthetic branch (Ship review Apr 28): we deliberately do NOT force-push
|
||||
the contributor's gh-pr-N/* branch. Force-pushing it would rewrite the
|
||||
branch tip with a merge commit the contributor didn't author, showing as
|
||||
a confusing bot force-push in Forgejo's PR UI. The synthetic _merged/*
|
||||
audit ref lets us track the merge commit without touching the contributor's
|
||||
branch. Mirrors the _clean/* synthetic branch pattern in cherry-pick.
|
||||
|
||||
Sentinel return: function pushes merge_sha → main itself (dispatch's ff-push
|
||||
can't, since origin/{branch} is unchanged and not a descendant of main).
|
||||
Returns a "merged --no-ff" sentinel string that dispatch detects to skip
|
||||
its ff-push step and route directly to PR-close + mark_merged + audit.
|
||||
The full 40-char merge SHA is in the return string for dispatch to extract.
|
||||
|
||||
Conflict handling: same auto-resolve pattern as cherry-pick — entity-only
|
||||
conflicts take main's version (--ours = current worktree HEAD = main),
|
||||
other conflicts abort and return False with detail.
|
||||
|
||||
Phase 2 of external contributor merge flow (Ship architecture review Apr 28).
|
||||
"""
|
||||
m = _GH_PR_BRANCH_RE.match(branch)
|
||||
if not m:
|
||||
return False, f"branch {branch} doesn't match gh-pr-N/* format"
|
||||
gh_pr_num = m.group(1)
|
||||
branch_slug = m.group(2)
|
||||
|
||||
slug = branch.replace("/", "-")
|
||||
worktree_path = f"/tmp/teleo-merge-{slug}"
|
||||
local_branch = f"_merged-{slug}" # local working branch in worktree
|
||||
audit_ref = f"_merged/{branch}" # remote synthetic ref (preserves hierarchy)
|
||||
|
||||
# Fetch latest state — separate calls (long branch names break combined refspec)
|
||||
rc, out = await _git("fetch", "origin", "main", timeout=15)
|
||||
if rc != 0:
|
||||
return False, f"fetch main failed: {out}"
|
||||
rc, out = await _git("fetch", "origin", branch, timeout=15)
|
||||
if rc != 0:
|
||||
return False, f"fetch branch failed: {out}"
|
||||
|
||||
# Up-to-date check (mirrors cherry-pick path semantics)
|
||||
rc, merge_base = await _git("merge-base", "origin/main", f"origin/{branch}")
|
||||
rc2, main_sha = await _git("rev-parse", "origin/main")
|
||||
if rc == 0 and rc2 == 0 and merge_base.strip() == main_sha.strip():
|
||||
rc_diff, diff_out = await _git(
|
||||
"diff", "--stat", f"origin/main..origin/{branch}", timeout=10,
|
||||
)
|
||||
if rc_diff != 0 or not diff_out.strip():
|
||||
return True, "already up to date"
|
||||
logger.info("External PR branch %s is descendant of main but has new content — proceeding", branch)
|
||||
|
||||
async with _bare_repo_lock:
|
||||
# Clean up any stale local branch from a prior failed run
|
||||
await _git("branch", "-D", local_branch)
|
||||
rc, out = await _git("worktree", "add", "-b", local_branch, worktree_path, "origin/main")
|
||||
if rc != 0:
|
||||
return False, f"worktree add failed: {out}"
|
||||
|
||||
try:
|
||||
merge_msg = f"Merge external GitHub PR #{gh_pr_num}: {branch_slug}"
|
||||
rc, out = await _git(
|
||||
"merge", "--no-ff", f"origin/{branch}",
|
||||
"-m", merge_msg,
|
||||
cwd=worktree_path, timeout=60,
|
||||
)
|
||||
|
||||
if rc != 0:
|
||||
# Identify conflicts
|
||||
rc_ls, conflicting = await _git(
|
||||
"diff", "--name-only", "--diff-filter=U", cwd=worktree_path,
|
||||
)
|
||||
conflict_files = [
|
||||
f.strip() for f in conflicting.split("\n") if f.strip()
|
||||
] if rc_ls == 0 else []
|
||||
|
||||
if conflict_files and all(f.startswith("entities/") for f in conflict_files):
|
||||
# Entity-only conflicts: take main's version (entities are recoverable)
|
||||
# In merge: --ours = branch we're ON (worktree HEAD = main)
|
||||
# --theirs = branch merging in (origin/{branch})
|
||||
for cf in conflict_files:
|
||||
await _git("checkout", "--ours", cf, cwd=worktree_path)
|
||||
await _git("add", cf, cwd=worktree_path)
|
||||
# Complete the merge using the prepared MERGE_MSG (no editor)
|
||||
rc_cont, cont_out = await _git(
|
||||
"-c", "core.editor=true",
|
||||
"commit", "--no-edit",
|
||||
cwd=worktree_path, timeout=60,
|
||||
)
|
||||
if rc_cont != 0:
|
||||
await _git("merge", "--abort", cwd=worktree_path)
|
||||
return False, f"merge entity resolution failed for PR #{gh_pr_num}: {cont_out}"
|
||||
logger.info(
|
||||
"External PR #%s merge: entity conflict auto-resolved (dropped %s)",
|
||||
gh_pr_num, ", ".join(sorted(conflict_files)),
|
||||
)
|
||||
else:
|
||||
conflict_detail = ", ".join(conflict_files) if conflict_files else out[:200]
|
||||
await _git("merge", "--abort", cwd=worktree_path)
|
||||
return False, f"merge conflict on PR #{gh_pr_num}: {conflict_detail}"
|
||||
|
||||
# Capture the merge commit SHA before any pushes
|
||||
rc, merge_sha = await _git("rev-parse", "HEAD", cwd=worktree_path)
|
||||
if rc != 0:
|
||||
return False, f"rev-parse merge HEAD failed: {merge_sha}"
|
||||
merge_sha = merge_sha.strip().split("\n")[0]
|
||||
|
||||
# Push to synthetic audit ref _merged/{branch} (does not touch contributor's
|
||||
# gh-pr-N/* branch). Plain --force: the audit ref is bot-owned and per-PR;
|
||||
# if a prior aborted attempt left a stale ref, overwriting it is the
|
||||
# intended behavior, and there's no concurrent writer to lease against.
|
||||
rc, out = await _git(
|
||||
"push", "--force", "origin", f"HEAD:refs/heads/{audit_ref}",
|
||||
cwd=worktree_path, timeout=30,
|
||||
)
|
||||
if rc != 0:
|
||||
return False, f"push to audit ref {audit_ref} failed: {out}"
|
||||
|
||||
# ff-push the merge commit to main. This is a true fast-forward (M is a
|
||||
# descendant of origin/main via its first parent), so no --force needed.
|
||||
# Forgejo's branch protection allows ff-push to main from authorized users.
|
||||
rc, out = await _git(
|
||||
"push", "origin", f"{merge_sha}:main",
|
||||
cwd=worktree_path, timeout=30,
|
||||
)
|
||||
if rc != 0:
|
||||
# Roll back audit ref if main push failed — keeps state consistent.
|
||||
await _git("push", "--delete", "origin", f"refs/heads/{audit_ref}",
|
||||
cwd=worktree_path, timeout=15)
|
||||
return False, f"ff-push to main failed: {out}"
|
||||
|
||||
# Sentinel return: "merged --no-ff" prefix triggers dispatch's external-PR
|
||||
# close path (skips ff-push, does PR-close + mark_merged + audit).
|
||||
# Full 40-char merge SHA in the message so dispatch can parse it for audit.
|
||||
return True, f"merged --no-ff (external PR #{gh_pr_num}, M={merge_sha}, audit_ref={audit_ref})"
|
||||
|
||||
finally:
|
||||
async with _bare_repo_lock:
|
||||
await _git("worktree", "remove", "--force", worktree_path)
|
||||
await _git("branch", "-D", local_branch)
|
||||
|
||||
|
||||
from .frontmatter import (
|
||||
REWEAVE_EDGE_FIELDS,
|
||||
parse_yaml_frontmatter,
|
||||
|
|
@ -733,6 +898,12 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
|
|||
# (Ganymede: manifest approach, Theseus: superset assertion + order-preserving dedup)
|
||||
if branch.startswith("reweave/"):
|
||||
merge_fn = _merge_reweave_pr(branch)
|
||||
elif branch.startswith("gh-pr-") and config.EXTERNAL_PR_NO_FF_MERGE:
|
||||
# External GitHub fork PRs: --no-ff merge so contributor SHA lands
|
||||
# in main's history → GitHub recognizes "merged" badge.
|
||||
# Backout via config.EXTERNAL_PR_NO_FF_MERGE = False (falls back to cherry-pick).
|
||||
# Phase 2 of external contributor merge flow (Ship architecture review Apr 28).
|
||||
merge_fn = _merge_no_ff_external(branch)
|
||||
else:
|
||||
# Extraction commits ADD new files — cherry-pick applies cleanly.
|
||||
merge_fn = _cherry_pick_onto_main(branch)
|
||||
|
|
@ -786,6 +957,58 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
|
|||
succeeded += 1
|
||||
continue
|
||||
|
||||
# External GitHub PR (gh-pr-*): _merge_no_ff_external already pushed
|
||||
# the merge commit to origin/main + the synthetic _merged/{branch}
|
||||
# audit ref. Skip dispatch's ff-push (would fail — origin/{branch} is
|
||||
# the contributor's untouched branch, not a descendant of main).
|
||||
# Just close PR + mark_merged + audit, parsing merge SHA from sentinel.
|
||||
if pick_msg.startswith("merged --no-ff"):
|
||||
m = re.search(r"M=([a-f0-9]{40})", pick_msg)
|
||||
merge_sha = m.group(1) if m else None
|
||||
m_ref = re.search(r"audit_ref=(\S+?)\)", pick_msg)
|
||||
audit_ref = m_ref.group(1) if m_ref else None
|
||||
m_pr = re.search(r"external PR #(\d+)", pick_msg)
|
||||
gh_pr_num = m_pr.group(1) if m_pr else None
|
||||
# Surface drift between dispatch and _merge_no_ff_external if the
|
||||
# success-message contract changes. Merge already succeeded; this
|
||||
# is signal-only, not a gate on the close path.
|
||||
if not (m and m_ref and m_pr):
|
||||
logger.warning(
|
||||
"PR #%d sentinel parse incomplete: M=%s, audit_ref=%s, gh_pr=%s, msg=%r",
|
||||
pr_num, bool(m), bool(m_ref), bool(m_pr), pick_msg,
|
||||
)
|
||||
|
||||
leo_token = get_agent_token("leo")
|
||||
comment_body = (
|
||||
f"Merged via --no-ff into main.\n"
|
||||
f"Merge commit: `{merge_sha}`\n"
|
||||
f"Audit ref: `{audit_ref}`\n"
|
||||
f"Branch: `{branch}` (preserved unchanged)"
|
||||
)
|
||||
await forgejo_api("POST", repo_path(f"issues/{pr_num}/comments"),
|
||||
{"body": comment_body})
|
||||
result = await forgejo_api("PATCH", repo_path(f"pulls/{pr_num}"),
|
||||
{"state": "closed"}, token=leo_token)
|
||||
if result is None:
|
||||
logger.error("PR #%d: Forgejo close failed (no-ff path), skipping DB update", pr_num)
|
||||
failed += 1
|
||||
continue
|
||||
mark_merged(conn, pr_num)
|
||||
db.audit(conn, "merge", "merged", json.dumps({
|
||||
"pr": pr_num, "branch": branch, "method": "no-ff",
|
||||
"merge_commit_sha": merge_sha,
|
||||
"audit_ref": audit_ref,
|
||||
"github_pr": gh_pr_num,
|
||||
}))
|
||||
# NOTE: do NOT _delete_remote_branch(branch) here. The contributor's
|
||||
# gh-pr-N/* branch is the mirror of their fork PR head — leaving it
|
||||
# in place lets sync-mirror keep the GitHub PR <-> Forgejo PR link
|
||||
# observable. The synthetic _merged/{branch} ref carries the merge.
|
||||
logger.info("PR #%d merged via --no-ff (M=%s)", pr_num,
|
||||
merge_sha[:8] if merge_sha else "?")
|
||||
succeeded += 1
|
||||
continue
|
||||
|
||||
# Local ff-push: cherry-picked branch is a descendant of origin/main.
|
||||
# Regular push = fast-forward. Non-ff rejected by default (same safety).
|
||||
# --force-with-lease removed: Forgejo categorically blocks it on protected branches.
|
||||
|
|
|
|||
Loading…
Reference in a new issue