Compare commits

..

No commits in common. "main" and "epimetheus/sync-mirror-self-heal" have entirely different histories.

4 changed files with 0 additions and 307 deletions

View file

@ -204,41 +204,7 @@ sync_github_to_forgejo_with_prs() {
local FORGEJO_TOKEN
FORGEJO_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token 2>/dev/null)
# Lazy schema for sync-mirror's auto-create tracker. Records (branch, sha)
# pairs we've already auto-created PRs for, so the loop below can skip
# redundant creates after pipeline merge → _delete_remote_branch →
# GitHub-only re-discovery → re-push. Cheap CREATE IF NOT EXISTS on each
# cycle; no migration needed because this table is private to sync-mirror.
sqlite3 "$PIPELINE_DB" "CREATE TABLE IF NOT EXISTS sync_autocreate_tracker (branch TEXT NOT NULL, sha TEXT NOT NULL, pr_number INTEGER, created_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (branch, sha));" 2>/dev/null || true
for branch in $GITHUB_ONLY; do
# Already-tracked gate: if we've previously auto-created a PR for
# this exact (branch, sha), skip the entire push+create sequence.
# Closes the empty-PR loop (research and reweave both observed):
# pipeline merges PR → _delete_remote_branch on Forgejo → next sync
# sees branch GitHub-only (origin still has it) → re-pushes to
# Forgejo → HAS_PR misses (Forgejo ?head= broken; closed PRs scroll
# past 50-item paginated window) → auto-creates fresh PR → pipeline
# merges (empty no-op via cherry-pick / reweave union) → repeat.
# Tracker keys on SHA, so legitimate new commits on the same branch
# produce a new SHA → tracker miss → auto-create proceeds normally.
local BRANCH_SHA TRACKED_PR
if [[ "$branch" == gh-pr-* ]]; then
BRANCH_SHA=$(git rev-parse "refs/heads/$branch" 2>/dev/null || true)
else
BRANCH_SHA=$(git rev-parse "refs/remotes/origin/$branch" 2>/dev/null || true)
fi
if [ -n "$BRANCH_SHA" ]; then
# stderr → $LOG so sustained sqlite3 contention surfaces in ops logs
# rather than silently falling through to a redundant auto-create.
TRACKED_PR=$(sqlite3 "$PIPELINE_DB" "SELECT pr_number FROM sync_autocreate_tracker WHERE branch=$(printf "'%s'" "${branch//\'/\'\'}") AND sha=$(printf "'%s'" "$BRANCH_SHA") LIMIT 1;" 2>>"$LOG" || echo "")
if [ -n "$TRACKED_PR" ]; then
log "Skip auto-create: $branch SHA $BRANCH_SHA already tracked (PR #$TRACKED_PR)"
continue
fi
fi
log "New from GitHub: $branch -> Forgejo"
# Fork PR branches live as local refs (from Step 2.1), not on origin remote
if [[ "$branch" == gh-pr-* ]]; then
@ -309,18 +275,6 @@ print('no')
fi
log "Auto-created PR #$PR_NUM on Forgejo for $branch"
# Record (branch, sha, pr_number) so the tracker gate above can short-
# circuit the next time we see this exact (branch, sha) combination.
# INSERT OR IGNORE: idempotent if a concurrent run already inserted.
# WARN log on failure: silent INSERT failure under sustained sqlite3
# contention would mask the loop reappearing on the next cycle (HAS_PR
# only saves us while the closed PR is in the 50-item pagination window).
if [ -n "$BRANCH_SHA" ] && [[ "$PR_NUM" =~ ^[0-9]+$ ]]; then
if ! sqlite3 "$PIPELINE_DB" "INSERT OR IGNORE INTO sync_autocreate_tracker (branch, sha, pr_number) VALUES ($(printf "'%s'" "${branch//\'/\'\'}"), $(printf "'%s'" "$BRANCH_SHA"), $PR_NUM);" 2>>"$LOG"; then
log "WARN: tracker insert failed for $branch SHA $BRANCH_SHA (PR #$PR_NUM) — duplicate auto-create possible next cycle"
fi
fi
# Step 4.5: Link GitHub PR to Forgejo PR in pipeline DB
if [[ "$branch" == gh-pr-* ]]; then
GH_PR_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|')

View file

@ -84,14 +84,6 @@ MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5"))
MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7"))
MAX_MERGE_WORKERS = 1 # domain-serialized, but one merge at a time per domain
# --- External GitHub PR merge strategy ---
# When True, gh-pr-N/* branches merge with --no-ff (preserves contributor SHA in
# main's history → GitHub recognizes "merged" badge). When False, fall back to
# cherry-pick (the default for all other branches). Default True; flip to False
# as an emergency backout if the no-ff path destabilizes merge throughput.
# Phase 2 of external contributor merge flow (Ship architecture review Apr 28).
EXTERNAL_PR_NO_FF_MERGE = True
# --- Timeouts (seconds) ---
EXTRACT_TIMEOUT = 600 # 10 min
EVAL_TIMEOUT = 120 # 2 min — routine Sonnet/Gemini Flash calls (was 600, caused 10-min stalls)

View file

@ -923,36 +923,6 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
except Exception:
logger.debug("Failed to read source %s", f, exc_info=True)
# Archive-basename filter: skip queue files whose basename already exists in
# inbox/archive/. Research-session commits on agent branches occasionally
# re-introduce already-archived queue files when the branch is re-merged,
# producing same-source re-extractions every cooldown cycle. The archive
# copy is the source of truth — if a file with this basename is in archive,
# the source is processed regardless of queue state. Single archive scan
# per cycle, cheap (~1k files).
#
# Assumes basename uniqueness across queue+archive — current naming
# convention (date-prefix + topic-slug) makes collisions vanishingly
# rare. If short generic names like "notes.md" enter the queue, this
# filter silently false-positives.
if unprocessed:
archive_dir = main / "inbox" / "archive"
archived_basenames: set[str] = set()
if archive_dir.exists():
for af in archive_dir.rglob("*.md"):
if af.name.startswith("_"):
continue
archived_basenames.add(af.name)
if archived_basenames:
before = len(unprocessed)
unprocessed = [
(sp, c, f) for sp, c, f in unprocessed
if Path(sp).name not in archived_basenames
]
skipped = before - len(unprocessed)
if skipped:
logger.info("Skipped %d queue source(s) — basename already in inbox/archive/", skipped)
# Don't early-return here — re-extraction sources may exist even when queue is empty
# (the re-extraction check runs after open-PR filtering below)

View file

@ -429,171 +429,6 @@ async def _cherry_pick_onto_main(branch: str) -> tuple[bool, str]:
await _git("branch", "-D", clean_branch)
_GH_PR_BRANCH_RE = re.compile(r"^gh-pr-(\d+)/(.+)$")
async def _merge_no_ff_external(branch: str) -> tuple[bool, str]:
"""Merge an external GitHub fork PR with --no-ff so contributor SHA lands in main.
Why this differs from _cherry_pick_onto_main:
- Cherry-pick rewrites the contributor's commit SHA → GitHub's "is PR head SHA
an ancestor of main?" check returns false → "merged" badge never fires.
- --no-ff preserves the contributor's commit SHA as a parent of the merge
commit. After ff-push to main (the existing dispatch step), GitHub sees
the SHA in ancestry and marks the PR merged.
Mechanics:
1. Fetch origin/main + origin/{branch}
2. Worktree on local branch _merged-{slug} from origin/main
3. git merge --no-ff origin/{branch} with verbose message:
"Merge external GitHub PR #{N}: {branch_slug}"
4. Push merge commit to origin/_merged/{branch} (synthetic audit ref)
5. ff-push merge_sha origin/main directly (function owns the push, NOT
dispatch see sentinel return below)
The merge commit M has parents [main_sha, branch_sha]. M is a fast-forward
descendant of main_sha (via first-parent chain), so the push to main
works without --force.
Synthetic branch (Ship review Apr 28): we deliberately do NOT force-push
the contributor's gh-pr-N/* branch. Force-pushing it would rewrite the
branch tip with a merge commit the contributor didn't author, showing as
a confusing bot force-push in Forgejo's PR UI. The synthetic _merged/*
audit ref lets us track the merge commit without touching the contributor's
branch. Mirrors the _clean/* synthetic branch pattern in cherry-pick.
Sentinel return: function pushes merge_sha main itself (dispatch's ff-push
can't, since origin/{branch} is unchanged and not a descendant of main).
Returns a "merged --no-ff" sentinel string that dispatch detects to skip
its ff-push step and route directly to PR-close + mark_merged + audit.
The full 40-char merge SHA is in the return string for dispatch to extract.
Conflict handling: same auto-resolve pattern as cherry-pick entity-only
conflicts take main's version (--ours = current worktree HEAD = main),
other conflicts abort and return False with detail.
Phase 2 of external contributor merge flow (Ship architecture review Apr 28).
"""
m = _GH_PR_BRANCH_RE.match(branch)
if not m:
return False, f"branch {branch} doesn't match gh-pr-N/* format"
gh_pr_num = m.group(1)
branch_slug = m.group(2)
slug = branch.replace("/", "-")
worktree_path = f"/tmp/teleo-merge-{slug}"
local_branch = f"_merged-{slug}" # local working branch in worktree
audit_ref = f"_merged/{branch}" # remote synthetic ref (preserves hierarchy)
# Fetch latest state — separate calls (long branch names break combined refspec)
rc, out = await _git("fetch", "origin", "main", timeout=15)
if rc != 0:
return False, f"fetch main failed: {out}"
rc, out = await _git("fetch", "origin", branch, timeout=15)
if rc != 0:
return False, f"fetch branch failed: {out}"
# Up-to-date check (mirrors cherry-pick path semantics)
rc, merge_base = await _git("merge-base", "origin/main", f"origin/{branch}")
rc2, main_sha = await _git("rev-parse", "origin/main")
if rc == 0 and rc2 == 0 and merge_base.strip() == main_sha.strip():
rc_diff, diff_out = await _git(
"diff", "--stat", f"origin/main..origin/{branch}", timeout=10,
)
if rc_diff != 0 or not diff_out.strip():
return True, "already up to date"
logger.info("External PR branch %s is descendant of main but has new content — proceeding", branch)
async with _bare_repo_lock:
# Clean up any stale local branch from a prior failed run
await _git("branch", "-D", local_branch)
rc, out = await _git("worktree", "add", "-b", local_branch, worktree_path, "origin/main")
if rc != 0:
return False, f"worktree add failed: {out}"
try:
merge_msg = f"Merge external GitHub PR #{gh_pr_num}: {branch_slug}"
rc, out = await _git(
"merge", "--no-ff", f"origin/{branch}",
"-m", merge_msg,
cwd=worktree_path, timeout=60,
)
if rc != 0:
# Identify conflicts
rc_ls, conflicting = await _git(
"diff", "--name-only", "--diff-filter=U", cwd=worktree_path,
)
conflict_files = [
f.strip() for f in conflicting.split("\n") if f.strip()
] if rc_ls == 0 else []
if conflict_files and all(f.startswith("entities/") for f in conflict_files):
# Entity-only conflicts: take main's version (entities are recoverable)
# In merge: --ours = branch we're ON (worktree HEAD = main)
# --theirs = branch merging in (origin/{branch})
for cf in conflict_files:
await _git("checkout", "--ours", cf, cwd=worktree_path)
await _git("add", cf, cwd=worktree_path)
# Complete the merge using the prepared MERGE_MSG (no editor)
rc_cont, cont_out = await _git(
"-c", "core.editor=true",
"commit", "--no-edit",
cwd=worktree_path, timeout=60,
)
if rc_cont != 0:
await _git("merge", "--abort", cwd=worktree_path)
return False, f"merge entity resolution failed for PR #{gh_pr_num}: {cont_out}"
logger.info(
"External PR #%s merge: entity conflict auto-resolved (dropped %s)",
gh_pr_num, ", ".join(sorted(conflict_files)),
)
else:
conflict_detail = ", ".join(conflict_files) if conflict_files else out[:200]
await _git("merge", "--abort", cwd=worktree_path)
return False, f"merge conflict on PR #{gh_pr_num}: {conflict_detail}"
# Capture the merge commit SHA before any pushes
rc, merge_sha = await _git("rev-parse", "HEAD", cwd=worktree_path)
if rc != 0:
return False, f"rev-parse merge HEAD failed: {merge_sha}"
merge_sha = merge_sha.strip().split("\n")[0]
# Push to synthetic audit ref _merged/{branch} (does not touch contributor's
# gh-pr-N/* branch). Plain --force: the audit ref is bot-owned and per-PR;
# if a prior aborted attempt left a stale ref, overwriting it is the
# intended behavior, and there's no concurrent writer to lease against.
rc, out = await _git(
"push", "--force", "origin", f"HEAD:refs/heads/{audit_ref}",
cwd=worktree_path, timeout=30,
)
if rc != 0:
return False, f"push to audit ref {audit_ref} failed: {out}"
# ff-push the merge commit to main. This is a true fast-forward (M is a
# descendant of origin/main via its first parent), so no --force needed.
# Forgejo's branch protection allows ff-push to main from authorized users.
rc, out = await _git(
"push", "origin", f"{merge_sha}:main",
cwd=worktree_path, timeout=30,
)
if rc != 0:
# Roll back audit ref if main push failed — keeps state consistent.
await _git("push", "--delete", "origin", f"refs/heads/{audit_ref}",
cwd=worktree_path, timeout=15)
return False, f"ff-push to main failed: {out}"
# Sentinel return: "merged --no-ff" prefix triggers dispatch's external-PR
# close path (skips ff-push, does PR-close + mark_merged + audit).
# Full 40-char merge SHA in the message so dispatch can parse it for audit.
return True, f"merged --no-ff (external PR #{gh_pr_num}, M={merge_sha}, audit_ref={audit_ref})"
finally:
async with _bare_repo_lock:
await _git("worktree", "remove", "--force", worktree_path)
await _git("branch", "-D", local_branch)
from .frontmatter import (
REWEAVE_EDGE_FIELDS,
parse_yaml_frontmatter,
@ -898,12 +733,6 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
# (Ganymede: manifest approach, Theseus: superset assertion + order-preserving dedup)
if branch.startswith("reweave/"):
merge_fn = _merge_reweave_pr(branch)
elif branch.startswith("gh-pr-") and config.EXTERNAL_PR_NO_FF_MERGE:
# External GitHub fork PRs: --no-ff merge so contributor SHA lands
# in main's history → GitHub recognizes "merged" badge.
# Backout via config.EXTERNAL_PR_NO_FF_MERGE = False (falls back to cherry-pick).
# Phase 2 of external contributor merge flow (Ship architecture review Apr 28).
merge_fn = _merge_no_ff_external(branch)
else:
# Extraction commits ADD new files — cherry-pick applies cleanly.
merge_fn = _cherry_pick_onto_main(branch)
@ -957,58 +786,6 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
succeeded += 1
continue
# External GitHub PR (gh-pr-*): _merge_no_ff_external already pushed
# the merge commit to origin/main + the synthetic _merged/{branch}
# audit ref. Skip dispatch's ff-push (would fail — origin/{branch} is
# the contributor's untouched branch, not a descendant of main).
# Just close PR + mark_merged + audit, parsing merge SHA from sentinel.
if pick_msg.startswith("merged --no-ff"):
m = re.search(r"M=([a-f0-9]{40})", pick_msg)
merge_sha = m.group(1) if m else None
m_ref = re.search(r"audit_ref=(\S+?)\)", pick_msg)
audit_ref = m_ref.group(1) if m_ref else None
m_pr = re.search(r"external PR #(\d+)", pick_msg)
gh_pr_num = m_pr.group(1) if m_pr else None
# Surface drift between dispatch and _merge_no_ff_external if the
# success-message contract changes. Merge already succeeded; this
# is signal-only, not a gate on the close path.
if not (m and m_ref and m_pr):
logger.warning(
"PR #%d sentinel parse incomplete: M=%s, audit_ref=%s, gh_pr=%s, msg=%r",
pr_num, bool(m), bool(m_ref), bool(m_pr), pick_msg,
)
leo_token = get_agent_token("leo")
comment_body = (
f"Merged via --no-ff into main.\n"
f"Merge commit: `{merge_sha}`\n"
f"Audit ref: `{audit_ref}`\n"
f"Branch: `{branch}` (preserved unchanged)"
)
await forgejo_api("POST", repo_path(f"issues/{pr_num}/comments"),
{"body": comment_body})
result = await forgejo_api("PATCH", repo_path(f"pulls/{pr_num}"),
{"state": "closed"}, token=leo_token)
if result is None:
logger.error("PR #%d: Forgejo close failed (no-ff path), skipping DB update", pr_num)
failed += 1
continue
mark_merged(conn, pr_num)
db.audit(conn, "merge", "merged", json.dumps({
"pr": pr_num, "branch": branch, "method": "no-ff",
"merge_commit_sha": merge_sha,
"audit_ref": audit_ref,
"github_pr": gh_pr_num,
}))
# NOTE: do NOT _delete_remote_branch(branch) here. The contributor's
# gh-pr-N/* branch is the mirror of their fork PR head — leaving it
# in place lets sync-mirror keep the GitHub PR <-> Forgejo PR link
# observable. The synthetic _merged/{branch} ref carries the merge.
logger.info("PR #%d merged via --no-ff (M=%s)", pr_num,
merge_sha[:8] if merge_sha else "?")
succeeded += 1
continue
# Local ff-push: cherry-picked branch is a descendant of origin/main.
# Regular push = fast-forward. Non-ff rejected by default (same safety).
# --force-with-lease removed: Forgejo categorically blocks it on protected branches.