From 1e0c1cd788e088e0e776a3e3c6d302f3afdfed30 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Thu, 16 Apr 2026 12:12:15 +0100 Subject: [PATCH] Write enrichments as file modifications; strengthen correction extraction Two changes: 1. extract.py: Enrichments now modify existing claim files by appending evidence sections. Previously enrichment-only extractions were discarded as null-result even when they contained valuable challenges. 2. extraction_prompt.py: Corrections should produce BOTH a claim (the corrected knowledge) AND an enrichment (linking to what it corrects). Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/extract.py | 33 +++++++++++++++++++++++++++++++-- lib/extraction_prompt.py | 8 ++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/lib/extract.py b/lib/extract.py index cb68682..ae32e63 100644 --- a/lib/extract.py +++ b/lib/extract.py @@ -432,8 +432,8 @@ async def _extract_one_source( ) claim_files = kept_claims - if not claim_files and not entity_files: - logger.info("No valid claims/entities after validation for %s — archiving as null-result", source_file) + if not claim_files and not entity_files and not enrichments: + logger.info("No valid claims/entities/enrichments after validation for %s — archiving as null-result", source_file) await _archive_source(source_path, domain, "null-result") return 0, 0 @@ -471,6 +471,35 @@ async def _extract_one_source( fpath.write_text(ef["content"], encoding="utf-8") files_written.append(f"entities/{domain}/{ef['filename']}") + # Write enrichments as modifications to existing claim files + for enr in enrichments: + target = enr.get("target_file", "") + evidence = enr.get("evidence", "") + enr_type = enr.get("type", "extend") # confirm|challenge|extend + source_ref = enr.get("source_ref", source_file) + if not target or not evidence: + continue + # Find the target claim file in the worktree (search domains/) + target_stem = target.replace(".md", "") + found = None + for domain_dir in (worktree / "domains").iterdir(): + candidate = domain_dir / f"{target_stem}.md" + if candidate.exists(): + found = candidate + break + if not found: + logger.debug("Enrichment target %s not found in worktree", target) + continue + # Append enrichment evidence to the claim file + existing = found.read_text(encoding="utf-8") + label = {"confirm": "Supporting", "challenge": "Challenging", "extend": "Extending"}.get(enr_type, "Additional") + enrichment_block = f"\n\n## {label} Evidence\n\n**Source:** {source_ref}\n\n{evidence}\n" + found.write_text(existing + enrichment_block, encoding="utf-8") + rel_path = str(found.relative_to(worktree)) + if rel_path not in files_written: + files_written.append(rel_path) + logger.info("Enrichment applied to %s (%s)", target, enr_type) + if not files_written: logger.info("No files written for %s — cleaning up", source_file) await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE)) diff --git a/lib/extraction_prompt.py b/lib/extraction_prompt.py index 797f4d8..ae633f9 100644 --- a/lib/extraction_prompt.py +++ b/lib/extraction_prompt.py @@ -156,12 +156,16 @@ When the human says "that's wrong", "not true", "you're wrong", "out of date", o 1. **Extract the correction as a claim or enrichment.** The human is correcting the KB's understanding. This is precisely what the KB needs. 2. **The correction itself IS the claim.** "Curated launches had significantly more committed - capital than permissionless launches" is a testable, disagreeable proposition — extract it. + capital than permissionless launches" is a testable, disagreeable proposition — extract it + AS A CLAIM, not just an enrichment. If the correction states something specific enough to + disagree with, it's a claim. Extract it even if it's only one sentence. 3. **Short corrections are HIGH value, not low value.** A 15-word correction that fixes a factual error is worth more than a 500-word article that confirms what we already know. NEVER null-result a conversation just because the human's message is short. 4. **Map corrections to existing claims.** Search the KB index for claims that the correction - challenges. Output as an ENRICHMENT with `type: "challenge"` if the target claim exists. + challenges. Output BOTH a new claim (the corrected understanding) AND an enrichment + (type: "challenge") targeting the existing claim. The enrichment links the correction + to what it corrects; the claim captures the corrected knowledge as a standalone proposition. ### Bot LEARNING lines are extraction hints