diff --git a/lib/config.py b/lib/config.py index 9f1b401..fb09412 100644 --- a/lib/config.py +++ b/lib/config.py @@ -84,6 +84,14 @@ MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5")) MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7")) MAX_MERGE_WORKERS = 1 # domain-serialized, but one merge at a time per domain +# --- External GitHub PR merge strategy --- +# When True, gh-pr-N/* branches merge with --no-ff (preserves contributor SHA in +# main's history → GitHub recognizes "merged" badge). When False, fall back to +# cherry-pick (the default for all other branches). Default True; flip to False +# as an emergency backout if the no-ff path destabilizes merge throughput. +# Phase 2 of external contributor merge flow (Ship architecture review Apr 28). +EXTERNAL_PR_NO_FF_MERGE = True + # --- Timeouts (seconds) --- EXTRACT_TIMEOUT = 600 # 10 min EVAL_TIMEOUT = 120 # 2 min — routine Sonnet/Gemini Flash calls (was 600, caused 10-min stalls) diff --git a/lib/merge.py b/lib/merge.py index 23498bd..49cc61b 100644 --- a/lib/merge.py +++ b/lib/merge.py @@ -429,6 +429,136 @@ async def _cherry_pick_onto_main(branch: str) -> tuple[bool, str]: await _git("branch", "-D", clean_branch) +_GH_PR_BRANCH_RE = re.compile(r"^gh-pr-(\d+)/(.+)$") + + +async def _merge_no_ff_external(branch: str) -> tuple[bool, str]: + """Merge an external GitHub fork PR with --no-ff so contributor SHA lands in main. + + Why this differs from _cherry_pick_onto_main: + - Cherry-pick rewrites the contributor's commit SHA → GitHub's "is PR head SHA + an ancestor of main?" check returns false → "merged" badge never fires. + - --no-ff preserves the contributor's commit SHA as a parent of the merge + commit. After ff-push to main (the existing dispatch step), GitHub sees + the SHA in ancestry and marks the PR merged. + + Mechanics: + 1. Fetch origin/main + origin/{branch} + 2. Detached worktree at origin/main, git merge --no-ff origin/{branch} + with verbose message: "Merge external GitHub PR #{N}: {branch_slug}" + 3. Force-push the merge commit as origin/{branch} (replacing the branch tip). + Dispatch's existing ff-push to main then produces the merge commit on main. + + The merge commit M has parents [main_sha, branch_sha]. M is a fast-forward + descendant of main_sha (via first-parent chain), so the ff-push to main + works without --force. + + Conflict handling: same auto-resolve pattern as cherry-pick — entity-only + conflicts take main's version (--ours = current worktree HEAD = main), + other conflicts abort and return False with detail. + + Phase 2 of external contributor merge flow (Ship architecture review Apr 28). + """ + m = _GH_PR_BRANCH_RE.match(branch) + if not m: + return False, f"branch {branch} doesn't match gh-pr-N/* format" + gh_pr_num = m.group(1) + branch_slug = m.group(2) + + worktree_path = f"/tmp/teleo-merge-{branch.replace('/', '-')}" + + # Fetch latest state — separate calls (long branch names break combined refspec) + rc, out = await _git("fetch", "origin", "main", timeout=15) + if rc != 0: + return False, f"fetch main failed: {out}" + rc, out = await _git("fetch", "origin", branch, timeout=15) + if rc != 0: + return False, f"fetch branch failed: {out}" + + # Up-to-date check (mirrors cherry-pick path semantics) + rc, merge_base = await _git("merge-base", "origin/main", f"origin/{branch}") + rc2, main_sha = await _git("rev-parse", "origin/main") + if rc == 0 and rc2 == 0 and merge_base.strip() == main_sha.strip(): + rc_diff, diff_out = await _git( + "diff", "--stat", f"origin/main..origin/{branch}", timeout=10, + ) + if rc_diff != 0 or not diff_out.strip(): + return True, "already up to date" + logger.info("External PR branch %s is descendant of main but has new content — proceeding", branch) + + async with _bare_repo_lock: + rc, out = await _git("worktree", "add", "--detach", worktree_path, "origin/main") + if rc != 0: + return False, f"worktree add failed: {out}" + + try: + merge_msg = f"Merge external GitHub PR #{gh_pr_num}: {branch_slug}" + rc, out = await _git( + "merge", "--no-ff", f"origin/{branch}", + "-m", merge_msg, + cwd=worktree_path, timeout=60, + ) + + if rc != 0: + # Identify conflicts + rc_ls, conflicting = await _git( + "diff", "--name-only", "--diff-filter=U", cwd=worktree_path, + ) + conflict_files = [ + f.strip() for f in conflicting.split("\n") if f.strip() + ] if rc_ls == 0 else [] + + if conflict_files and all(f.startswith("entities/") for f in conflict_files): + # Entity-only conflicts: take main's version (entities are recoverable) + # In merge: --ours = branch we're ON (worktree HEAD = main) + # --theirs = branch merging in (origin/{branch}) + for cf in conflict_files: + await _git("checkout", "--ours", cf, cwd=worktree_path) + await _git("add", cf, cwd=worktree_path) + # Complete the merge using the prepared MERGE_MSG (no editor) + rc_cont, cont_out = await _git( + "-c", "core.editor=true", + "commit", "--no-edit", + cwd=worktree_path, timeout=60, + ) + if rc_cont != 0: + await _git("merge", "--abort", cwd=worktree_path) + return False, f"merge entity resolution failed for PR #{gh_pr_num}: {cont_out}" + logger.info( + "External PR #%s merge: entity conflict auto-resolved (dropped %s)", + gh_pr_num, ", ".join(sorted(conflict_files)), + ) + else: + conflict_detail = ", ".join(conflict_files) if conflict_files else out[:200] + await _git("merge", "--abort", cwd=worktree_path) + return False, f"merge conflict on PR #{gh_pr_num}: {conflict_detail}" + + # Force-push the merge commit as origin/{branch}, replacing the contributor's + # branch tip. Dispatch then ff-pushes origin/{branch} → main, producing the + # merge commit on main. --force-with-lease guards against concurrent updates. + rc, expected_sha = await _git("rev-parse", f"origin/{branch}") + if rc != 0: + return False, f"rev-parse origin/{branch} failed: {expected_sha}" + expected_sha = expected_sha.strip().split("\n")[0] + + rc, out = await _git( + "push", + f"--force-with-lease={branch}:{expected_sha}", + "origin", + f"HEAD:{branch}", + cwd=worktree_path, + timeout=30, + ) + if rc != 0: + return False, f"push rejected: {out}" + + return True, f"merged --no-ff (external PR #{gh_pr_num})" + + finally: + async with _bare_repo_lock: + await _git("worktree", "remove", "--force", worktree_path) + + from .frontmatter import ( REWEAVE_EDGE_FIELDS, parse_yaml_frontmatter, @@ -733,6 +863,12 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]: # (Ganymede: manifest approach, Theseus: superset assertion + order-preserving dedup) if branch.startswith("reweave/"): merge_fn = _merge_reweave_pr(branch) + elif branch.startswith("gh-pr-") and config.EXTERNAL_PR_NO_FF_MERGE: + # External GitHub fork PRs: --no-ff merge so contributor SHA lands + # in main's history → GitHub recognizes "merged" badge. + # Backout via config.EXTERNAL_PR_NO_FF_MERGE = False (falls back to cherry-pick). + # Phase 2 of external contributor merge flow (Ship architecture review Apr 28). + merge_fn = _merge_no_ff_external(branch) else: # Extraction commits ADD new files — cherry-pick applies cleanly. merge_fn = _cherry_pick_onto_main(branch)