#!/usr/bin/env python3 """Extract decision records from proposal sources. Reads event_type: proposal sources from archive, produces decision records in decisions/{domain}/ with full verbatim proposal text + LLM-generated summary, significance, and KB connections. Usage: python3 extract-decisions.py [--dry-run] [--limit N] [--source FILE] Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70> """ import argparse import csv import json import os import re import sys from datetime import date from pathlib import Path import requests import yaml # ─── Constants ────────────────────────────────────────────────────────────── OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" MODEL = "anthropic/claude-sonnet-4.5" USAGE_CSV = "/opt/teleo-eval/logs/openrouter-usage.csv" MAIN_REPO = Path("/opt/teleo-eval/workspaces/main") REPO_DIR = Path("/opt/teleo-eval/workspaces/extract") ARCHIVE_DIR = MAIN_REPO / "inbox" / "archive" # Read sources from main (canonical) DECISIONS_DIR = REPO_DIR / "decisions" # Write records to extract worktree # ─── LLM Call ─────────────────────────────────────────────────────────────── def call_llm(prompt: str, max_tokens: int = 4096) -> str | None: """Call OpenRouter API.""" api_key = os.environ.get("OPENROUTER_API_KEY", "") if not api_key: # Try reading from file (same location as openrouter-extract-v2.py) key_file = Path("/opt/teleo-eval/secrets/openrouter-key") if key_file.exists(): api_key = key_file.read_text().strip() if not api_key: print("ERROR: No OPENROUTER_API_KEY", file=sys.stderr) return None resp = requests.post( OPENROUTER_URL, headers={"Authorization": f"Bearer {api_key}"}, json={ "model": MODEL, "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens, "temperature": 0.3, }, timeout=120, ) if resp.status_code != 200: print(f"ERROR: OpenRouter {resp.status_code}: {resp.text[:200]}", file=sys.stderr) return None data = resp.json() # Log usage usage = data.get("usage", {}) try: with open(USAGE_CSV, "a") as f: writer = csv.writer(f) writer.writerow([ date.today().isoformat(), "extract-decisions", MODEL, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0), "", ]) except Exception: pass return data["choices"][0]["message"]["content"] # ─── Frontmatter Parsing ─────────────────────────────────────────────────── def parse_frontmatter(path: Path) -> tuple[dict | None, str]: """Parse YAML frontmatter and body.""" text = path.read_text(errors="replace") if not text.startswith("---"): return None, text end = text.find("\n---", 3) if end == -1: return None, text try: fm = yaml.safe_load(text[3:end]) if not isinstance(fm, dict): return None, text body = text[end + 4:].strip() return fm, body except Exception: return None, text # ─── Find Unprocessed Proposal Sources ────────────────────────────────────── def find_proposal_sources() -> list[Path]: """Find all unprocessed proposal sources in archive.""" sources = [] for md_file in sorted(ARCHIVE_DIR.rglob("*.md")): try: fm, _ = parse_frontmatter(md_file) except Exception: continue if not fm: continue if fm.get("event_type") == "proposal" and fm.get("status") in ("unprocessed", None): sources.append(md_file) return sources # ─── Check if Decision Record Exists ──────────────────────────────────────── def decision_exists(slug: str, domain: str = "internet-finance") -> bool: """Check if a decision record already exists in main OR extract worktree.""" for repo in [MAIN_REPO, REPO_DIR]: target_dir = repo / "decisions" / domain if not target_dir.exists(): continue if (target_dir / f"{slug}.md").exists(): return True for f in target_dir.iterdir(): if slug[:40] in f.name: return True return False def slugify(text: str) -> str: """Convert text to filename slug.""" text = text.lower() text = re.sub(r'[^a-z0-9\s-]', '', text) text = re.sub(r'[\s]+', '-', text.strip()) text = re.sub(r'-+', '-', text) return text[:80] # ─── Build Decision Record ────────────────────────────────────────────────── ANALYSIS_PROMPT = """You are analyzing a futarchy/governance proposal to create a structured decision record for a knowledge base. Given this proposal source, produce a JSON object with these fields: - "name": The full proposal name (e.g., "MetaDAO: Hire Robin Hanson as Advisor") - "status": "passed" or "failed" or "active" (from the source data) - "proposer": Who proposed it (name or handle) - "proposal_date": ISO date when created - "resolution_date": ISO date when resolved (null if active) - "record_type": One of: "decision_market" (governance proposals voted on via futarchy) or "fundraise" (ICO/launch raising capital through MetaDAO or Futardio) - "category": One of: treasury, hiring, product, governance, fundraise, incentives, migration, other - "summary": 1-2 sentence summary of what this proposal does and why it matters. Be specific — include dollar amounts, key parameters, and outcomes. - "significance": 2-3 paragraphs analyzing why this proposal matters for the futarchy ecosystem. What does it prove or test? What precedent does it set? How does it relate to broader governance patterns? - "related_claims": List of 2-5 wiki-link titles from the Teleo knowledge base that this proposal is evidence for or against. Use full prose-as-title format like "futarchy-governed DAOs converge on traditional corporate governance scaffolding for treasury operations because market mechanisms alone cannot provide operational security and legal compliance" IMPORTANT: Only output valid JSON. No markdown, no commentary. Here is the proposal source: {source_text} """ def build_decision_record(source_path: Path, dry_run: bool = False) -> Path | None: """Build a decision record from a proposal source.""" fm, body = parse_frontmatter(source_path) if not fm: print(f" SKIP: No frontmatter in {source_path.name}") return None title = fm.get("title", "") domain = fm.get("domain", "internet-finance") url = fm.get("url", "") source_date = fm.get("date", "") tags = fm.get("tags", []) or [] # Extract project name from body project_match = re.search(r'Project:\s*(.+)', body) project = project_match.group(1).strip() if project_match else "Unknown" # Build slug from title slug = slugify(title.replace("Futardio: ", "").replace("futardio: ", "")) if not slug: slug = slugify(source_path.stem) # Check if already exists if decision_exists(slug, domain): print(f" SKIP: Decision record already exists for {slug}") return None # Full source text for LLM (truncate at 8K to fit in context) source_text = f"Title: {title}\nURL: {url}\nDate: {source_date}\n\n{body}" if len(source_text) > 8000: source_text = source_text[:8000] + "\n\n[... truncated for analysis ...]" if dry_run: print(f" DRY RUN: Would create {slug}.md from {source_path.name}") return None # Call LLM for analysis prompt = ANALYSIS_PROMPT.format(source_text=source_text) response = call_llm(prompt) if not response: print(f" ERROR: LLM call failed for {source_path.name}") return None # Parse LLM response try: # Strip markdown code fences if present cleaned = re.sub(r'^```json\s*', '', response.strip()) cleaned = re.sub(r'\s*```$', '', cleaned) analysis = json.loads(cleaned) except json.JSONDecodeError as e: print(f" ERROR: Invalid JSON from LLM for {source_path.name}: {e}") print(f" Response: {response[:200]}") return None # Extract market data from body if present market_lines = [] for line in body.split("\n"): line_stripped = line.strip() if any(kw in line_stripped.lower() for kw in ["status:", "total volume", "pass", "fail", "spot", "outcome", "autocrat", "proposal account", "dao account", "proposer:"]): if line_stripped.startswith("- ") or line_stripped.startswith("**"): market_lines.append(line_stripped) # Build frontmatter record_type = analysis.get("record_type", "decision_market") record_fm = { "type": "decision", "entity_type": record_type, "name": analysis.get("name", title), "domain": domain, "status": analysis.get("status", "unknown"), "tracked_by": "rio", "created": str(date.today()), "last_updated": str(date.today()), "parent_entity": f"[[{project.lower()}]]" if project != "Unknown" else "", "platform": "metadao", "proposer": analysis.get("proposer", ""), "proposal_url": url, "proposal_date": analysis.get("proposal_date", str(source_date)), "resolution_date": analysis.get("resolution_date", ""), "category": analysis.get("category", "other"), "summary": analysis.get("summary", ""), "tags": tags + [project.lower()] if project != "Unknown" else tags, } # Build body name = analysis.get("name", title) summary = analysis.get("summary", "") significance = analysis.get("significance", "") related = analysis.get("related_claims", []) body_parts = [f"# {name}\n"] body_parts.append(f"## Summary\n\n{summary}\n") if market_lines: body_parts.append("## Market Data\n") for ml in market_lines: body_parts.append(ml) body_parts.append("") body_parts.append(f"## Significance\n\n{significance}\n") # Full proposal text — verbatim body_parts.append("## Full Proposal Text\n") body_parts.append(body) body_parts.append("") # KB relationships if related: body_parts.append("## Relationship to KB\n") for claim_title in related: slug_link = claim_title.replace(" ", "-").lower() body_parts.append(f"- [[{slug_link}]]") body_parts.append("") body_parts.append("---\n") body_parts.append("Relevant Entities:") if project != "Unknown": body_parts.append(f"- [[{project.lower()}]] — parent organization") body_parts.append(f"\nTopics:\n- [[internet finance and decision markets]]") # Write file target_dir = DECISIONS_DIR / domain target_dir.mkdir(parents=True, exist_ok=True) target_path = target_dir / f"{slug}.md" # Serialize frontmatter fm_str = yaml.dump(record_fm, default_flow_style=False, allow_unicode=True, sort_keys=False) content = f"---\n{fm_str}---\n\n" + "\n".join(body_parts) target_path.write_text(content) print(f" CREATED: {target_path.name} ({len(content)} chars)") # Mark source as processed source_text_full = source_path.read_text() updated = source_text_full.replace("status: unprocessed", "status: processed") source_path.write_text(updated) return target_path # ─── Main ─────────────────────────────────────────────────────────────────── def main(): parser = argparse.ArgumentParser(description="Extract decision records from proposal sources") parser.add_argument("--dry-run", action="store_true", help="Show what would be created without writing") parser.add_argument("--limit", type=int, default=0, help="Max proposals to process (0 = all)") parser.add_argument("--source", type=str, help="Process a single source file") parser.add_argument("--skip-existing", action="store_true", default=True, help="Skip sources that already have decision records") args = parser.parse_args() if args.source: source_path = Path(args.source) if not source_path.exists(): print(f"ERROR: Source not found: {source_path}") sys.exit(1) result = build_decision_record(source_path, dry_run=args.dry_run) if result: print(f"Done: {result}") return # Find all unprocessed proposals sources = find_proposal_sources() print(f"Found {len(sources)} unprocessed proposal sources") if args.dry_run: for s in sources[:args.limit or len(sources)]: fm, _ = parse_frontmatter(s) title = fm.get("title", s.stem) if fm else s.stem print(f" {title}") return # Prepare extract worktree: sync to main, create branch branch_name = f"epimetheus/decisions-{date.today().isoformat()}" if not _prepare_branch(branch_name): print("ERROR: Failed to prepare extract worktree branch") sys.exit(1) processed = 0 created = 0 skipped = 0 errors = 0 limit = args.limit or len(sources) for source_path in sources[:limit]: fm, _ = parse_frontmatter(source_path) title = fm.get("title", source_path.stem) if fm else source_path.stem print(f"\nProcessing: {title}") try: result = build_decision_record(source_path, dry_run=False) if result: created += 1 else: skipped += 1 except Exception as e: print(f" ERROR: {e}") errors += 1 processed += 1 print(f"\nDone: {processed} processed, {created} created, {skipped} skipped, {errors} errors") # Commit and push for PR review if created > 0: _commit_and_push(branch_name, created) def _prepare_branch(branch_name: str) -> bool: """Sync extract worktree to main and create a new branch.""" import subprocess cwd = str(REPO_DIR) try: subprocess.run(["git", "fetch", "origin", "main"], cwd=cwd, check=True, capture_output=True) subprocess.run(["git", "checkout", "main"], cwd=cwd, check=True, capture_output=True) subprocess.run(["git", "reset", "--hard", "origin/main"], cwd=cwd, check=True, capture_output=True) # Delete branch if it already exists (from a failed previous run) subprocess.run(["git", "branch", "-D", branch_name], cwd=cwd, capture_output=True) subprocess.run(["git", "checkout", "-b", branch_name], cwd=cwd, check=True, capture_output=True) print(f"Branch created: {branch_name}") return True except subprocess.CalledProcessError as e: print(f"ERROR preparing branch: {e.stderr.decode()[:200] if e.stderr else e}") return False def _commit_and_push(branch_name: str, count: int): """Commit decision records and push branch for PR.""" import subprocess cwd = str(REPO_DIR) token_file = Path("/opt/teleo-eval/secrets/forgejo-leo-token") token = token_file.read_text().strip() if token_file.exists() else "" try: subprocess.run(["git", "add", "decisions/"], cwd=cwd, check=True, capture_output=True) result = subprocess.run(["git", "status", "--porcelain"], cwd=cwd, capture_output=True, text=True) if not result.stdout.strip(): print("No changes to commit") return msg = (f"epimetheus: {count} decision records from proposal extraction\n\n" f"Batch extraction of event_type: proposal sources into structured\n" f"decision records with full verbatim text + LLM analysis.\n\n" f"Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>") subprocess.run(["git", "commit", "-m", msg], cwd=cwd, check=True, capture_output=True) subprocess.run(["git", "push", "-u", "origin", branch_name], cwd=cwd, check=True, capture_output=True) print(f"Pushed branch: {branch_name}") # Create PR via Forgejo API if token: resp = requests.post( "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls", headers={"Authorization": f"token {token}"}, json={ "title": f"epimetheus: {count} decision records from proposal extraction", "body": (f"## Summary\n" f"- {count} decision records extracted from archived proposal sources\n" f"- Full verbatim proposal text + LLM-generated summary/significance\n" f"- Both decision markets and fundraises\n\n" f"## Source\n" f"Extracted by `extract-decisions.py` from `event_type: proposal` sources in archive/"), "head": branch_name, "base": "main", }, timeout=30, ) if resp.status_code in (200, 201): pr_url = resp.json().get("html_url", "") print(f"PR created: {pr_url}") else: print(f"WARNING: PR creation failed ({resp.status_code}): {resp.text[:200]}") except subprocess.CalledProcessError as e: print(f"ERROR committing: {e.stderr.decode()[:200] if e.stderr else e}") if __name__ == "__main__": main()