fix: decision extractor uses extract worktree + PR flow
Was writing directly to main worktree where daemon race condition wiped files. Now: syncs extract worktree to main, creates branch, writes records, commits, pushes, opens Forgejo PR. Same pattern as batch-extract. Also checks both main and extract worktrees for existing records. Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
This commit is contained in:
parent
9267351aba
commit
d67d36b409
1 changed files with 89 additions and 13 deletions
|
|
@ -28,9 +28,10 @@ import yaml
|
||||||
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||||
MODEL = "anthropic/claude-sonnet-4.5"
|
MODEL = "anthropic/claude-sonnet-4.5"
|
||||||
USAGE_CSV = "/opt/teleo-eval/logs/openrouter-usage.csv"
|
USAGE_CSV = "/opt/teleo-eval/logs/openrouter-usage.csv"
|
||||||
REPO_DIR = Path("/opt/teleo-eval/workspaces/main")
|
MAIN_REPO = Path("/opt/teleo-eval/workspaces/main")
|
||||||
ARCHIVE_DIR = REPO_DIR / "inbox" / "archive"
|
REPO_DIR = Path("/opt/teleo-eval/workspaces/extract")
|
||||||
DECISIONS_DIR = REPO_DIR / "decisions"
|
ARCHIVE_DIR = MAIN_REPO / "inbox" / "archive" # Read sources from main (canonical)
|
||||||
|
DECISIONS_DIR = REPO_DIR / "decisions" # Write records to extract worktree
|
||||||
|
|
||||||
|
|
||||||
# ─── LLM Call ───────────────────────────────────────────────────────────────
|
# ─── LLM Call ───────────────────────────────────────────────────────────────
|
||||||
|
|
@ -123,14 +124,13 @@ def find_proposal_sources() -> list[Path]:
|
||||||
# ─── Check if Decision Record Exists ────────────────────────────────────────
|
# ─── Check if Decision Record Exists ────────────────────────────────────────
|
||||||
|
|
||||||
def decision_exists(slug: str, domain: str = "internet-finance") -> bool:
|
def decision_exists(slug: str, domain: str = "internet-finance") -> bool:
|
||||||
"""Check if a decision record already exists."""
|
"""Check if a decision record already exists in main OR extract worktree."""
|
||||||
target_dir = DECISIONS_DIR / domain
|
for repo in [MAIN_REPO, REPO_DIR]:
|
||||||
|
target_dir = repo / "decisions" / domain
|
||||||
if not target_dir.exists():
|
if not target_dir.exists():
|
||||||
return False
|
continue
|
||||||
# Check exact slug match
|
|
||||||
if (target_dir / f"{slug}.md").exists():
|
if (target_dir / f"{slug}.md").exists():
|
||||||
return True
|
return True
|
||||||
# Check partial match (slug might be truncated)
|
|
||||||
for f in target_dir.iterdir():
|
for f in target_dir.iterdir():
|
||||||
if slug[:40] in f.name:
|
if slug[:40] in f.name:
|
||||||
return True
|
return True
|
||||||
|
|
@ -344,6 +344,12 @@ def main():
|
||||||
print(f" {title}")
|
print(f" {title}")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Prepare extract worktree: sync to main, create branch
|
||||||
|
branch_name = f"epimetheus/decisions-{date.today().isoformat()}"
|
||||||
|
if not _prepare_branch(branch_name):
|
||||||
|
print("ERROR: Failed to prepare extract worktree branch")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
processed = 0
|
processed = 0
|
||||||
created = 0
|
created = 0
|
||||||
skipped = 0
|
skipped = 0
|
||||||
|
|
@ -369,6 +375,76 @@ def main():
|
||||||
|
|
||||||
print(f"\nDone: {processed} processed, {created} created, {skipped} skipped, {errors} errors")
|
print(f"\nDone: {processed} processed, {created} created, {skipped} skipped, {errors} errors")
|
||||||
|
|
||||||
|
# Commit and push for PR review
|
||||||
|
if created > 0:
|
||||||
|
_commit_and_push(branch_name, created)
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_branch(branch_name: str) -> bool:
|
||||||
|
"""Sync extract worktree to main and create a new branch."""
|
||||||
|
import subprocess
|
||||||
|
cwd = str(REPO_DIR)
|
||||||
|
try:
|
||||||
|
subprocess.run(["git", "fetch", "origin", "main"], cwd=cwd, check=True, capture_output=True)
|
||||||
|
subprocess.run(["git", "checkout", "main"], cwd=cwd, check=True, capture_output=True)
|
||||||
|
subprocess.run(["git", "reset", "--hard", "origin/main"], cwd=cwd, check=True, capture_output=True)
|
||||||
|
subprocess.run(["git", "checkout", "-b", branch_name], cwd=cwd, check=True, capture_output=True)
|
||||||
|
print(f"Branch created: {branch_name}")
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"ERROR preparing branch: {e.stderr.decode()[:200] if e.stderr else e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _commit_and_push(branch_name: str, count: int):
|
||||||
|
"""Commit decision records and push branch for PR."""
|
||||||
|
import subprocess
|
||||||
|
cwd = str(REPO_DIR)
|
||||||
|
token_file = Path("/opt/teleo-eval/secrets/forgejo-leo-token")
|
||||||
|
token = token_file.read_text().strip() if token_file.exists() else ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(["git", "add", "decisions/"], cwd=cwd, check=True, capture_output=True)
|
||||||
|
result = subprocess.run(["git", "status", "--porcelain"], cwd=cwd, capture_output=True, text=True)
|
||||||
|
if not result.stdout.strip():
|
||||||
|
print("No changes to commit")
|
||||||
|
return
|
||||||
|
|
||||||
|
msg = (f"epimetheus: {count} decision records from proposal extraction\n\n"
|
||||||
|
f"Batch extraction of event_type: proposal sources into structured\n"
|
||||||
|
f"decision records with full verbatim text + LLM analysis.\n\n"
|
||||||
|
f"Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>")
|
||||||
|
subprocess.run(["git", "commit", "-m", msg], cwd=cwd, check=True, capture_output=True)
|
||||||
|
subprocess.run(["git", "push", "-u", "origin", branch_name], cwd=cwd, check=True, capture_output=True)
|
||||||
|
print(f"Pushed branch: {branch_name}")
|
||||||
|
|
||||||
|
# Create PR via Forgejo API
|
||||||
|
if token:
|
||||||
|
resp = requests.post(
|
||||||
|
"http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls",
|
||||||
|
headers={"Authorization": f"token {token}"},
|
||||||
|
json={
|
||||||
|
"title": f"epimetheus: {count} decision records from proposal extraction",
|
||||||
|
"body": (f"## Summary\n"
|
||||||
|
f"- {count} decision records extracted from archived proposal sources\n"
|
||||||
|
f"- Full verbatim proposal text + LLM-generated summary/significance\n"
|
||||||
|
f"- Both decision markets and fundraises\n\n"
|
||||||
|
f"## Source\n"
|
||||||
|
f"Extracted by `extract-decisions.py` from `event_type: proposal` sources in archive/"),
|
||||||
|
"head": branch_name,
|
||||||
|
"base": "main",
|
||||||
|
},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if resp.status_code in (200, 201):
|
||||||
|
pr_url = resp.json().get("html_url", "")
|
||||||
|
print(f"PR created: {pr_url}")
|
||||||
|
else:
|
||||||
|
print(f"WARNING: PR creation failed ({resp.status_code}): {resp.text[:200]}")
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"ERROR committing: {e.stderr.decode()[:200] if e.stderr else e}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue