From 5b9ce01412c7978b5d32f391a0c6b28fc283e7f0 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Tue, 14 Apr 2026 12:01:21 +0100 Subject: [PATCH] epimetheus: wire LLM connections into typed frontmatter edges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract.py was discarding LLM-provided connections — related_claims went into frontmatter as wiki-links but supports/challenges/depends_on from the connections field were ignored entirely. This is the primary driver of 50%+ orphan ratio. Now: connections[] → typed edge fields (supports/challenges/related) in YAML frontmatter. related_claims fall back to related edges. Post-write connect_new_claims() adds vector-search edges for claims the LLM missed. Co-Authored-By: Claude Opus 4.6 (1M context) --- ops/pipeline-v2/lib/extract.py | 43 ++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/ops/pipeline-v2/lib/extract.py b/ops/pipeline-v2/lib/extract.py index ab663c2d2..de6a8c995 100644 --- a/ops/pipeline-v2/lib/extract.py +++ b/ops/pipeline-v2/lib/extract.py @@ -37,6 +37,7 @@ from .domains import agent_for_domain from .extraction_prompt import build_extraction_prompt from .forgejo import api as forgejo_api from .llm import openrouter_call +from .connect import connect_new_claims from .post_extract import load_existing_claims_from_repo, validate_and_fix_claims from .worktree_lock import async_main_worktree_lock @@ -225,7 +226,29 @@ def _build_claim_content(claim: dict, agent: str) -> str: body = claim.get("body", "") scope = claim.get("scope", "") sourcer = claim.get("sourcer", "") - related = claim.get("related_claims", []) + related_claims = claim.get("related_claims", []) + connections = claim.get("connections", []) + + edge_fields = {"supports": [], "challenges": [], "related": []} + for conn in connections: + target = conn.get("target", "") + rel = conn.get("relationship", "related") + if target and rel in edge_fields: + target = target.replace(".md", "") + if target not in edge_fields[rel]: + edge_fields[rel].append(target) + for r in related_claims[:5]: + r_clean = r.replace(".md", "") + if r_clean not in edge_fields["related"]: + edge_fields["related"].append(r_clean) + + edge_lines = [] + for edge_type in ("supports", "challenges", "related"): + targets = edge_fields[edge_type] + if targets: + edge_lines.append(f"{edge_type}:") + for t in targets: + edge_lines.append(f" - {t}") lines = [ "---", @@ -242,10 +265,7 @@ def _build_claim_content(claim: dict, agent: str) -> str: lines.append(f"scope: {scope}") if sourcer: lines.append(f'sourcer: "{sourcer}"') - if related: - lines.append("related_claims:") - for r in related: - lines.append(f' - "[[{r}]]"') + lines.extend(edge_lines) lines.append("---") lines.append("") lines.append(f"# {title}") @@ -456,6 +476,19 @@ async def _extract_one_source( await _archive_source(source_path, domain, "null-result") return 0, 0 + # Post-write: connect new claims to existing KB via vector search (non-fatal) + claim_paths = [str(worktree / f) for f in files_written if f.startswith("domains/")] + if claim_paths: + try: + connect_stats = connect_new_claims(claim_paths) + if connect_stats["connected"] > 0: + logger.info( + "Extract-connect: %d/%d claims → %d edges", + connect_stats["connected"], len(claim_paths), connect_stats["edges_added"], + ) + except Exception: + logger.warning("Extract-connect failed (non-fatal)", exc_info=True) + # Stage and commit for f in files_written: await _git("add", f, cwd=str(EXTRACT_WORKTREE))