Compare commits
4 commits
epimetheus
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 87f97eb4fa | |||
| ad1d82f5ee | |||
| 923454c9ea | |||
| ed4af4d72e |
3 changed files with 76 additions and 156 deletions
|
|
@ -204,7 +204,41 @@ sync_github_to_forgejo_with_prs() {
|
||||||
|
|
||||||
local FORGEJO_TOKEN
|
local FORGEJO_TOKEN
|
||||||
FORGEJO_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token 2>/dev/null)
|
FORGEJO_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token 2>/dev/null)
|
||||||
|
|
||||||
|
# Lazy schema for sync-mirror's auto-create tracker. Records (branch, sha)
|
||||||
|
# pairs we've already auto-created PRs for, so the loop below can skip
|
||||||
|
# redundant creates after pipeline merge → _delete_remote_branch →
|
||||||
|
# GitHub-only re-discovery → re-push. Cheap CREATE IF NOT EXISTS on each
|
||||||
|
# cycle; no migration needed because this table is private to sync-mirror.
|
||||||
|
sqlite3 "$PIPELINE_DB" "CREATE TABLE IF NOT EXISTS sync_autocreate_tracker (branch TEXT NOT NULL, sha TEXT NOT NULL, pr_number INTEGER, created_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (branch, sha));" 2>/dev/null || true
|
||||||
|
|
||||||
for branch in $GITHUB_ONLY; do
|
for branch in $GITHUB_ONLY; do
|
||||||
|
# Already-tracked gate: if we've previously auto-created a PR for
|
||||||
|
# this exact (branch, sha), skip the entire push+create sequence.
|
||||||
|
# Closes the empty-PR loop (research and reweave both observed):
|
||||||
|
# pipeline merges PR → _delete_remote_branch on Forgejo → next sync
|
||||||
|
# sees branch GitHub-only (origin still has it) → re-pushes to
|
||||||
|
# Forgejo → HAS_PR misses (Forgejo ?head= broken; closed PRs scroll
|
||||||
|
# past 50-item paginated window) → auto-creates fresh PR → pipeline
|
||||||
|
# merges (empty no-op via cherry-pick / reweave union) → repeat.
|
||||||
|
# Tracker keys on SHA, so legitimate new commits on the same branch
|
||||||
|
# produce a new SHA → tracker miss → auto-create proceeds normally.
|
||||||
|
local BRANCH_SHA TRACKED_PR
|
||||||
|
if [[ "$branch" == gh-pr-* ]]; then
|
||||||
|
BRANCH_SHA=$(git rev-parse "refs/heads/$branch" 2>/dev/null || true)
|
||||||
|
else
|
||||||
|
BRANCH_SHA=$(git rev-parse "refs/remotes/origin/$branch" 2>/dev/null || true)
|
||||||
|
fi
|
||||||
|
if [ -n "$BRANCH_SHA" ]; then
|
||||||
|
# stderr → $LOG so sustained sqlite3 contention surfaces in ops logs
|
||||||
|
# rather than silently falling through to a redundant auto-create.
|
||||||
|
TRACKED_PR=$(sqlite3 "$PIPELINE_DB" "SELECT pr_number FROM sync_autocreate_tracker WHERE branch=$(printf "'%s'" "${branch//\'/\'\'}") AND sha=$(printf "'%s'" "$BRANCH_SHA") LIMIT 1;" 2>>"$LOG" || echo "")
|
||||||
|
if [ -n "$TRACKED_PR" ]; then
|
||||||
|
log "Skip auto-create: $branch SHA $BRANCH_SHA already tracked (PR #$TRACKED_PR)"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
log "New from GitHub: $branch -> Forgejo"
|
log "New from GitHub: $branch -> Forgejo"
|
||||||
# Fork PR branches live as local refs (from Step 2.1), not on origin remote
|
# Fork PR branches live as local refs (from Step 2.1), not on origin remote
|
||||||
if [[ "$branch" == gh-pr-* ]]; then
|
if [[ "$branch" == gh-pr-* ]]; then
|
||||||
|
|
@ -275,6 +309,18 @@ print('no')
|
||||||
fi
|
fi
|
||||||
log "Auto-created PR #$PR_NUM on Forgejo for $branch"
|
log "Auto-created PR #$PR_NUM on Forgejo for $branch"
|
||||||
|
|
||||||
|
# Record (branch, sha, pr_number) so the tracker gate above can short-
|
||||||
|
# circuit the next time we see this exact (branch, sha) combination.
|
||||||
|
# INSERT OR IGNORE: idempotent if a concurrent run already inserted.
|
||||||
|
# WARN log on failure: silent INSERT failure under sustained sqlite3
|
||||||
|
# contention would mask the loop reappearing on the next cycle (HAS_PR
|
||||||
|
# only saves us while the closed PR is in the 50-item pagination window).
|
||||||
|
if [ -n "$BRANCH_SHA" ] && [[ "$PR_NUM" =~ ^[0-9]+$ ]]; then
|
||||||
|
if ! sqlite3 "$PIPELINE_DB" "INSERT OR IGNORE INTO sync_autocreate_tracker (branch, sha, pr_number) VALUES ($(printf "'%s'" "${branch//\'/\'\'}"), $(printf "'%s'" "$BRANCH_SHA"), $PR_NUM);" 2>>"$LOG"; then
|
||||||
|
log "WARN: tracker insert failed for $branch SHA $BRANCH_SHA (PR #$PR_NUM) — duplicate auto-create possible next cycle"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Step 4.5: Link GitHub PR to Forgejo PR in pipeline DB
|
# Step 4.5: Link GitHub PR to Forgejo PR in pipeline DB
|
||||||
if [[ "$branch" == gh-pr-* ]]; then
|
if [[ "$branch" == gh-pr-* ]]; then
|
||||||
GH_PR_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|')
|
GH_PR_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|')
|
||||||
|
|
|
||||||
|
|
@ -923,6 +923,36 @@ async def extract_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug("Failed to read source %s", f, exc_info=True)
|
logger.debug("Failed to read source %s", f, exc_info=True)
|
||||||
|
|
||||||
|
# Archive-basename filter: skip queue files whose basename already exists in
|
||||||
|
# inbox/archive/. Research-session commits on agent branches occasionally
|
||||||
|
# re-introduce already-archived queue files when the branch is re-merged,
|
||||||
|
# producing same-source re-extractions every cooldown cycle. The archive
|
||||||
|
# copy is the source of truth — if a file with this basename is in archive,
|
||||||
|
# the source is processed regardless of queue state. Single archive scan
|
||||||
|
# per cycle, cheap (~1k files).
|
||||||
|
#
|
||||||
|
# Assumes basename uniqueness across queue+archive — current naming
|
||||||
|
# convention (date-prefix + topic-slug) makes collisions vanishingly
|
||||||
|
# rare. If short generic names like "notes.md" enter the queue, this
|
||||||
|
# filter silently false-positives.
|
||||||
|
if unprocessed:
|
||||||
|
archive_dir = main / "inbox" / "archive"
|
||||||
|
archived_basenames: set[str] = set()
|
||||||
|
if archive_dir.exists():
|
||||||
|
for af in archive_dir.rglob("*.md"):
|
||||||
|
if af.name.startswith("_"):
|
||||||
|
continue
|
||||||
|
archived_basenames.add(af.name)
|
||||||
|
if archived_basenames:
|
||||||
|
before = len(unprocessed)
|
||||||
|
unprocessed = [
|
||||||
|
(sp, c, f) for sp, c, f in unprocessed
|
||||||
|
if Path(sp).name not in archived_basenames
|
||||||
|
]
|
||||||
|
skipped = before - len(unprocessed)
|
||||||
|
if skipped:
|
||||||
|
logger.info("Skipped %d queue source(s) — basename already in inbox/archive/", skipped)
|
||||||
|
|
||||||
# Don't early-return here — re-extraction sources may exist even when queue is empty
|
# Don't early-return here — re-extraction sources may exist even when queue is empty
|
||||||
# (the re-extraction check runs after open-PR filtering below)
|
# (the re-extraction check runs after open-PR filtering below)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,156 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""One-off cleanup for FwazB GitHub PR #90 (the cherry-pick artifact case).
|
|
||||||
|
|
||||||
Phase 3 of the external contributor merge flow rollout.
|
|
||||||
|
|
||||||
Background:
|
|
||||||
PR #90 was opened by @FwazB on Apr 27 with a single claim about Arcium
|
|
||||||
confidential computing. The pipeline merged it via cherry-pick (the path
|
|
||||||
that has now been replaced for gh-pr-* branches by _merge_no_ff_external,
|
|
||||||
Phase 2). Cherry-pick rewrote the contributor SHA, so the GitHub PR shows
|
|
||||||
state=open, merged=false, merge_commit_sha=null forever — looks abandoned.
|
|
||||||
|
|
||||||
The claim IS on main:
|
|
||||||
domains/internet-finance/confidential-computing-reshapes-defi-mechanism-design.md
|
|
||||||
via two commits:
|
|
||||||
f6a59d7d claim: confidential computing reshapes DeFi mechanism design
|
|
||||||
d7916d65 auto-fix: strip 2 broken wiki links
|
|
||||||
|
|
||||||
This script posts an explanatory comment on PR #90 and closes it. The PR
|
|
||||||
will show "closed" without the merged badge — accepting the artifact for
|
|
||||||
this single historical case rather than running a backfilled --no-ff
|
|
||||||
re-merge (which would rewrite main's history just to fix the badge).
|
|
||||||
|
|
||||||
Idempotent: checks current state, no-ops if already closed.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python3 cleanup-fwazb-pr90.py --dry-run
|
|
||||||
python3 cleanup-fwazb-pr90.py
|
|
||||||
"""
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import urllib.request
|
|
||||||
|
|
||||||
GITHUB_PAT_FILE = "/opt/teleo-eval/secrets/github-pat"
|
|
||||||
GITHUB_REPO = "living-ip/teleo-codex"
|
|
||||||
PR_NUMBER = 90
|
|
||||||
|
|
||||||
CLAIM_PATH = "domains/internet-finance/confidential-computing-reshapes-defi-mechanism-design.md"
|
|
||||||
CLAIM_COMMIT = "f6a59d7d" # the original add commit from cherry-pick
|
|
||||||
FIX_COMMIT = "d7916d65" # the auto-fixer wiki-link strip commit
|
|
||||||
|
|
||||||
CLOSE_COMMENT = (
|
|
||||||
"This contribution is **merged into the knowledge base** — but you'll "
|
|
||||||
"see this PR shows \"open\" with no diff. Two commits explain why:\n"
|
|
||||||
"\n"
|
|
||||||
f"- [`{CLAIM_COMMIT}`](https://github.com/{GITHUB_REPO}/commit/{CLAIM_COMMIT}) — your claim, added on `main`\n"
|
|
||||||
f"- [`{FIX_COMMIT}`](https://github.com/{GITHUB_REPO}/commit/{FIX_COMMIT}) — pipeline auto-fixer (stripped 2 broken wiki-link brackets)\n"
|
|
||||||
"\n"
|
|
||||||
f"Claim is live at [`{CLAIM_PATH}`](https://github.com/{GITHUB_REPO}/blob/main/{CLAIM_PATH}).\n"
|
|
||||||
"\n"
|
|
||||||
"Why the PR doesn't show as merged: the pipeline previously used "
|
|
||||||
"`git cherry-pick` to land contributions, which rewrote the commit SHA. "
|
|
||||||
"GitHub's \"merged\" badge fires when the PR head SHA is in main's history "
|
|
||||||
"— after a cherry-pick, the original SHA isn't there.\n"
|
|
||||||
"\n"
|
|
||||||
"**This has been fixed.** Future external GitHub PRs go through "
|
|
||||||
"`git merge --no-ff`, which preserves the contributor SHA so the merged "
|
|
||||||
"badge fires correctly. Closing this one as the lone historical artifact.\n"
|
|
||||||
"\n"
|
|
||||||
"Thank you for the Arcium claim — it's running through the pipeline as "
|
|
||||||
"intended.\n"
|
|
||||||
"\n"
|
|
||||||
"_— Epimetheus, on behalf of the LivingIP pipeline._"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def gh_api(method: str, path: str, token: str, body: dict | None = None) -> dict | None:
|
|
||||||
url = f"https://api.github.com{path}"
|
|
||||||
data = json.dumps(body).encode() if body else None
|
|
||||||
req = urllib.request.Request(url, data=data, method=method)
|
|
||||||
req.add_header("Authorization", f"token {token}")
|
|
||||||
req.add_header("Accept", "application/vnd.github+json")
|
|
||||||
if data:
|
|
||||||
req.add_header("Content-Type", "application/json")
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
||||||
return json.loads(resp.read())
|
|
||||||
except urllib.error.HTTPError as e:
|
|
||||||
body_text = e.read().decode()[:500]
|
|
||||||
print(f"ERROR: GitHub API {method} {path} returned {e.code}: {body_text}", file=sys.stderr)
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
|
||||||
print(f"ERROR: GitHub API {method} {path}: {e}", file=sys.stderr)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--dry-run", action="store_true")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if not os.path.exists(GITHUB_PAT_FILE):
|
|
||||||
print(f"ERROR: GitHub PAT not found at {GITHUB_PAT_FILE}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
token = open(GITHUB_PAT_FILE).read().strip()
|
|
||||||
|
|
||||||
print(f"=== cleanup FwazB GitHub PR #{PR_NUMBER} ===")
|
|
||||||
print(f" repo: {GITHUB_REPO}")
|
|
||||||
print(f" dry_run: {args.dry_run}")
|
|
||||||
|
|
||||||
pr = gh_api("GET", f"/repos/{GITHUB_REPO}/pulls/{PR_NUMBER}", token)
|
|
||||||
if pr is None:
|
|
||||||
print("ERROR: PR fetch failed", file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
print(f" state: {pr['state']}")
|
|
||||||
print(f" merged: {pr['merged']}")
|
|
||||||
print(f" user: {pr['user']['login']}")
|
|
||||||
|
|
||||||
if pr["state"] == "closed":
|
|
||||||
print("\nNO-OP: PR already closed.")
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
if pr["user"]["login"].lower() != "fwazb":
|
|
||||||
print(f"\nABORT: PR author is {pr['user']['login']!r}, expected 'FwazB'. "
|
|
||||||
"Refusing to act on the wrong PR.", file=sys.stderr)
|
|
||||||
sys.exit(3)
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
print("\nWould post comment + close (skipped: --dry-run).")
|
|
||||||
print(f"\nComment preview:\n{'-'*70}\n{CLOSE_COMMENT}\n{'-'*70}")
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Step 1: post comment
|
|
||||||
print("\nPosting comment...")
|
|
||||||
comment_resp = gh_api(
|
|
||||||
"POST",
|
|
||||||
f"/repos/{GITHUB_REPO}/issues/{PR_NUMBER}/comments",
|
|
||||||
token,
|
|
||||||
{"body": CLOSE_COMMENT},
|
|
||||||
)
|
|
||||||
if comment_resp is None:
|
|
||||||
print("ERROR: comment post failed", file=sys.stderr)
|
|
||||||
sys.exit(4)
|
|
||||||
print(f" comment posted: {comment_resp.get('html_url', '<no url>')}")
|
|
||||||
|
|
||||||
# Step 2: close PR
|
|
||||||
print("Closing PR...")
|
|
||||||
close_resp = gh_api(
|
|
||||||
"PATCH",
|
|
||||||
f"/repos/{GITHUB_REPO}/pulls/{PR_NUMBER}",
|
|
||||||
token,
|
|
||||||
{"state": "closed"},
|
|
||||||
)
|
|
||||||
if close_resp is None:
|
|
||||||
print("ERROR: PR close failed", file=sys.stderr)
|
|
||||||
sys.exit(5)
|
|
||||||
print(f" PR state: {close_resp['state']}")
|
|
||||||
|
|
||||||
print("\nDone.")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Loading…
Reference in a new issue