fix: prevent reweave PR flood — freshen base, cleanup branches on failure

Three fixes for the reweave merge failure cycle:

1. reweave.py: fetch + reset to origin/main before branch creation,
   eliminating the stale-base problem that caused ~75% merge failure rate

2. merge.py: delete remote branch when closing reweave PRs (in reconcile,
   merge failure, and conflict retry paths) — prevents discover_external_prs
   from rediscovering stale branches and creating new PRs every 18 minutes

3. merge.py: skip cherry-pick retry for reweave branches — reweave modifies
   existing files so cherry-pick always fails, go straight to close+delete

Pentagon-Agent: Ship <f3064ef4-c330-4809-ad37-39290b2eaa5b>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
m3taversal 2026-04-07 11:38:25 +01:00 committed by Teleo Agents
parent 0591c4c0df
commit adbe3bd911
2 changed files with 76 additions and 12 deletions

View file

@ -1432,13 +1432,22 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
continue
if not pick_ok:
# Cherry-pick failed — this is a genuine conflict (not a race condition).
# No retry needed: cherry-pick onto fresh main means main can't have moved.
logger.warning("PR #%d cherry-pick failed: %s", pr_num, pick_msg)
conn.execute(
"UPDATE prs SET status = 'conflict', merge_cycled = 1, merge_failures = COALESCE(merge_failures, 0) + 1, last_error = ? WHERE number = ?",
(pick_msg[:500], pr_num),
)
logger.warning("PR #%d merge/cherry-pick failed: %s", pr_num, pick_msg)
# Reweave: close immediately, don't retry (Ship: same rationale as ff-push failure)
if branch.startswith("reweave/"):
conn.execute(
"UPDATE prs SET status = 'closed', merge_cycled = 1, merge_failures = COALESCE(merge_failures, 0) + 1, last_error = ? WHERE number = ?",
(f"reweave merge failed (closed, not retried): {pick_msg[:400]}", pr_num),
)
await forgejo_api("PATCH", repo_path(f"pulls/{pr_num}"), {"state": "closed"})
await forgejo_api("POST", repo_path(f"issues/{pr_num}/comments"),
{"body": f"Reweave merge failed — closing. Next nightly reweave will create a fresh branch.\n\nError: {pick_msg[:200]}"})
await _delete_remote_branch(branch)
else:
conn.execute(
"UPDATE prs SET status = 'conflict', merge_cycled = 1, merge_failures = COALESCE(merge_failures, 0) + 1, last_error = ? WHERE number = ?",
(pick_msg[:500], pr_num),
)
db.audit(conn, "merge", "cherry_pick_failed", json.dumps({"pr": pr_num, "error": pick_msg[:200]}))
failed += 1
continue
@ -1483,10 +1492,24 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
if not merge_ok:
logger.error("PR #%d merge failed: %s", pr_num, merge_msg)
conn.execute(
"UPDATE prs SET status = 'conflict', merge_cycled = 1, merge_failures = COALESCE(merge_failures, 0) + 1, last_error = ? WHERE number = ?",
(merge_msg[:500], pr_num),
)
# Reweave PRs: close immediately on failure. Cherry-pick retry
# will always fail (reweave modifies existing files). Next nightly
# run creates a fresh branch from current main — retry is wasteful.
# (Ship: prevents reweave flood + wasted retry cycles)
if branch.startswith("reweave/"):
conn.execute(
"UPDATE prs SET status = 'closed', merge_cycled = 1, merge_failures = COALESCE(merge_failures, 0) + 1, last_error = ? WHERE number = ?",
(f"reweave merge failed (closed, not retried): {merge_msg[:400]}", pr_num),
)
await forgejo_api("PATCH", repo_path(f"pulls/{pr_num}"), {"state": "closed"})
await forgejo_api("POST", repo_path(f"issues/{pr_num}/comments"),
{"body": f"Reweave merge failed — closing. Next nightly reweave will create a fresh branch.\n\nError: {merge_msg[:200]}"})
await _delete_remote_branch(branch)
else:
conn.execute(
"UPDATE prs SET status = 'conflict', merge_cycled = 1, merge_failures = COALESCE(merge_failures, 0) + 1, last_error = ? WHERE number = ?",
(merge_msg[:500], pr_num),
)
db.audit(conn, "merge", "merge_failed", json.dumps({"pr": pr_num, "error": merge_msg[:200]}))
failed += 1
continue
@ -1583,6 +1606,11 @@ async def _reconcile_db_state(conn):
continue
if forgejo_state == "closed" and not is_merged and db_status not in ("closed",):
# Clean up branch too — stale branches get rediscovered as new PRs
# (Ship: prevents reweave flood where closed PRs leave branches that
# trigger discover_external_prs → new PR → fail → close → repeat)
if branch:
await _delete_remote_branch(branch)
conn.execute(
"UPDATE prs SET status = 'closed', last_error = 'reconciled: closed on Forgejo' WHERE number = ?",
(pr_number,),
@ -1775,6 +1803,22 @@ async def _retry_conflict_prs(conn) -> tuple[int, int]:
branch = row["branch"]
attempts = row["conflict_rebase_attempts"] or 0
# Reweave branches modify existing files — cherry-pick will always fail.
# Close immediately and delete branch. Next nightly reweave creates fresh.
# (Ship: prevents wasting 3 retry cycles on branches that can never cherry-pick)
if branch.startswith("reweave/"):
logger.info("Reweave PR #%d: skipping retry, closing + deleting branch", pr_number)
conn.execute(
"UPDATE prs SET status = 'closed', last_error = 'reweave: closed (retry skipped, next nightly creates fresh)' WHERE number = ?",
(pr_number,),
)
await forgejo_api("PATCH", repo_path(f"pulls/{pr_number}"), {"state": "closed"})
await forgejo_api("POST", repo_path(f"issues/{pr_number}/comments"),
{"body": "Reweave conflict — closing instead of retrying. Cherry-pick always fails on reweave branches (they modify existing files). Next nightly reweave will create a fresh branch from current main."})
await _delete_remote_branch(branch)
failed += 1
continue
logger.info("Conflict retry [%d/%d] PR #%d branch=%s",
attempts + 1, MAX_CONFLICT_REBASE_ATTEMPTS, pr_number, branch)

View file

@ -597,7 +597,14 @@ def _write_edge_regex(neighbor_path: Path, fm_text: str, body_text: str,
def create_branch(repo_root: Path, branch_name: str) -> bool:
"""Create and checkout a new branch. Cleans up stale local/remote branches from prior failed runs."""
"""Create and checkout a new branch from fresh origin/main.
Cleans up stale local/remote branches from prior failed runs, then
fetches + resets to origin/main so the branch is never based on stale state.
(Ship: reduces reweave merge failure rate from ~75% to near-zero by
eliminating the stale-base problem that causes superset assertion failures
and force-with-lease races.)
"""
# Delete stale local branch if it exists (e.g., from a failed earlier run today)
subprocess.run(["git", "branch", "-D", branch_name],
cwd=str(repo_root), capture_output=True) # ignore errors if branch doesn't exist
@ -610,6 +617,19 @@ def create_branch(repo_root: Path, branch_name: str) -> bool:
subprocess.run(["git", "push", push_url, "--delete", branch_name],
cwd=str(repo_root), capture_output=True) # ignore errors if branch doesn't exist
# Freshen to origin/main before branching — ensures branch base matches
# the main HEAD that _merge_reweave_pr will read at merge time.
try:
subprocess.run(["git", "fetch", "origin", "main"],
cwd=str(repo_root), check=True, capture_output=True, timeout=30)
subprocess.run(["git", "checkout", "main"],
cwd=str(repo_root), check=True, capture_output=True)
subprocess.run(["git", "reset", "--hard", "origin/main"],
cwd=str(repo_root), check=True, capture_output=True)
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
logger.error("Failed to freshen to origin/main: %s", e)
return False
try:
subprocess.run(["git", "checkout", "-b", branch_name],
cwd=str(repo_root), check=True, capture_output=True)