From 5b9ce01412c7978b5d32f391a0c6b28fc283e7f0 Mon Sep 17 00:00:00 2001
From: m3taversal <m3taversal@gmail.com>
Date: Tue, 14 Apr 2026 12:01:21 +0100
Subject: [PATCH] epimetheus: wire LLM connections into typed frontmatter edges
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extract.py was discarding LLM-provided connections — related_claims went into
frontmatter as wiki-links but supports/challenges/depends_on from the
connections field were ignored entirely. This is the primary driver of 50%+
orphan ratio.

Now: connections[] → typed edge fields (supports/challenges/related) in YAML
frontmatter. related_claims fall back to related edges. Post-write
connect_new_claims() adds vector-search edges for claims the LLM missed.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 ops/pipeline-v2/lib/extract.py | 43 ++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/ops/pipeline-v2/lib/extract.py b/ops/pipeline-v2/lib/extract.py
index ab663c2d2..de6a8c995 100644
--- a/ops/pipeline-v2/lib/extract.py
+++ b/ops/pipeline-v2/lib/extract.py
@@ -37,6 +37,7 @@ from .domains import agent_for_domain
 from .extraction_prompt import build_extraction_prompt
 from .forgejo import api as forgejo_api
 from .llm import openrouter_call
+from .connect import connect_new_claims
 from .post_extract import load_existing_claims_from_repo, validate_and_fix_claims
 from .worktree_lock import async_main_worktree_lock
 
@@ -225,7 +226,29 @@ def _build_claim_content(claim: dict, agent: str) -> str:
     body = claim.get("body", "")
     scope = claim.get("scope", "")
     sourcer = claim.get("sourcer", "")
-    related = claim.get("related_claims", [])
+    related_claims = claim.get("related_claims", [])
+    connections = claim.get("connections", [])
+
+    edge_fields = {"supports": [], "challenges": [], "related": []}
+    for conn in connections:
+        target = conn.get("target", "")
+        rel = conn.get("relationship", "related")
+        if target and rel in edge_fields:
+            target = target.replace(".md", "")
+            if target not in edge_fields[rel]:
+                edge_fields[rel].append(target)
+    for r in related_claims[:5]:
+        r_clean = r.replace(".md", "")
+        if r_clean not in edge_fields["related"]:
+            edge_fields["related"].append(r_clean)
+
+    edge_lines = []
+    for edge_type in ("supports", "challenges", "related"):
+        targets = edge_fields[edge_type]
+        if targets:
+            edge_lines.append(f"{edge_type}:")
+            for t in targets:
+                edge_lines.append(f"  - {t}")
 
     lines = [
         "---",
@@ -242,10 +265,7 @@ def _build_claim_content(claim: dict, agent: str) -> str:
         lines.append(f"scope: {scope}")
     if sourcer:
         lines.append(f'sourcer: "{sourcer}"')
-    if related:
-        lines.append("related_claims:")
-        for r in related:
-            lines.append(f'  - "[[{r}]]"')
+    lines.extend(edge_lines)
     lines.append("---")
     lines.append("")
     lines.append(f"# {title}")
@@ -456,6 +476,19 @@ async def _extract_one_source(
         await _archive_source(source_path, domain, "null-result")
         return 0, 0
 
+    # Post-write: connect new claims to existing KB via vector search (non-fatal)
+    claim_paths = [str(worktree / f) for f in files_written if f.startswith("domains/")]
+    if claim_paths:
+        try:
+            connect_stats = connect_new_claims(claim_paths)
+            if connect_stats["connected"] > 0:
+                logger.info(
+                    "Extract-connect: %d/%d claims → %d edges",
+                    connect_stats["connected"], len(claim_paths), connect_stats["edges_added"],
+                )
+        except Exception:
+            logger.warning("Extract-connect failed (non-fatal)", exc_info=True)
+
     # Stage and commit
     for f in files_written:
         await _git("add", f, cwd=str(EXTRACT_WORKTREE))