Write enrichments as file modifications; strengthen correction extraction
Some checks are pending
CI / lint-and-test (push) Waiting to run
Some checks are pending
CI / lint-and-test (push) Waiting to run
Two changes: 1. extract.py: Enrichments now modify existing claim files by appending evidence sections. Previously enrichment-only extractions were discarded as null-result even when they contained valuable challenges. 2. extraction_prompt.py: Corrections should produce BOTH a claim (the corrected knowledge) AND an enrichment (linking to what it corrects). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
1f5eb324f3
commit
1e0c1cd788
2 changed files with 37 additions and 4 deletions
|
|
@ -432,8 +432,8 @@ async def _extract_one_source(
|
||||||
)
|
)
|
||||||
claim_files = kept_claims
|
claim_files = kept_claims
|
||||||
|
|
||||||
if not claim_files and not entity_files:
|
if not claim_files and not entity_files and not enrichments:
|
||||||
logger.info("No valid claims/entities after validation for %s — archiving as null-result", source_file)
|
logger.info("No valid claims/entities/enrichments after validation for %s — archiving as null-result", source_file)
|
||||||
await _archive_source(source_path, domain, "null-result")
|
await _archive_source(source_path, domain, "null-result")
|
||||||
return 0, 0
|
return 0, 0
|
||||||
|
|
||||||
|
|
@ -471,6 +471,35 @@ async def _extract_one_source(
|
||||||
fpath.write_text(ef["content"], encoding="utf-8")
|
fpath.write_text(ef["content"], encoding="utf-8")
|
||||||
files_written.append(f"entities/{domain}/{ef['filename']}")
|
files_written.append(f"entities/{domain}/{ef['filename']}")
|
||||||
|
|
||||||
|
# Write enrichments as modifications to existing claim files
|
||||||
|
for enr in enrichments:
|
||||||
|
target = enr.get("target_file", "")
|
||||||
|
evidence = enr.get("evidence", "")
|
||||||
|
enr_type = enr.get("type", "extend") # confirm|challenge|extend
|
||||||
|
source_ref = enr.get("source_ref", source_file)
|
||||||
|
if not target or not evidence:
|
||||||
|
continue
|
||||||
|
# Find the target claim file in the worktree (search domains/)
|
||||||
|
target_stem = target.replace(".md", "")
|
||||||
|
found = None
|
||||||
|
for domain_dir in (worktree / "domains").iterdir():
|
||||||
|
candidate = domain_dir / f"{target_stem}.md"
|
||||||
|
if candidate.exists():
|
||||||
|
found = candidate
|
||||||
|
break
|
||||||
|
if not found:
|
||||||
|
logger.debug("Enrichment target %s not found in worktree", target)
|
||||||
|
continue
|
||||||
|
# Append enrichment evidence to the claim file
|
||||||
|
existing = found.read_text(encoding="utf-8")
|
||||||
|
label = {"confirm": "Supporting", "challenge": "Challenging", "extend": "Extending"}.get(enr_type, "Additional")
|
||||||
|
enrichment_block = f"\n\n## {label} Evidence\n\n**Source:** {source_ref}\n\n{evidence}\n"
|
||||||
|
found.write_text(existing + enrichment_block, encoding="utf-8")
|
||||||
|
rel_path = str(found.relative_to(worktree))
|
||||||
|
if rel_path not in files_written:
|
||||||
|
files_written.append(rel_path)
|
||||||
|
logger.info("Enrichment applied to %s (%s)", target, enr_type)
|
||||||
|
|
||||||
if not files_written:
|
if not files_written:
|
||||||
logger.info("No files written for %s — cleaning up", source_file)
|
logger.info("No files written for %s — cleaning up", source_file)
|
||||||
await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE))
|
await _git("checkout", "main", cwd=str(EXTRACT_WORKTREE))
|
||||||
|
|
|
||||||
|
|
@ -156,12 +156,16 @@ When the human says "that's wrong", "not true", "you're wrong", "out of date", o
|
||||||
1. **Extract the correction as a claim or enrichment.** The human is correcting the KB's
|
1. **Extract the correction as a claim or enrichment.** The human is correcting the KB's
|
||||||
understanding. This is precisely what the KB needs.
|
understanding. This is precisely what the KB needs.
|
||||||
2. **The correction itself IS the claim.** "Curated launches had significantly more committed
|
2. **The correction itself IS the claim.** "Curated launches had significantly more committed
|
||||||
capital than permissionless launches" is a testable, disagreeable proposition — extract it.
|
capital than permissionless launches" is a testable, disagreeable proposition — extract it
|
||||||
|
AS A CLAIM, not just an enrichment. If the correction states something specific enough to
|
||||||
|
disagree with, it's a claim. Extract it even if it's only one sentence.
|
||||||
3. **Short corrections are HIGH value, not low value.** A 15-word correction that fixes a
|
3. **Short corrections are HIGH value, not low value.** A 15-word correction that fixes a
|
||||||
factual error is worth more than a 500-word article that confirms what we already know.
|
factual error is worth more than a 500-word article that confirms what we already know.
|
||||||
NEVER null-result a conversation just because the human's message is short.
|
NEVER null-result a conversation just because the human's message is short.
|
||||||
4. **Map corrections to existing claims.** Search the KB index for claims that the correction
|
4. **Map corrections to existing claims.** Search the KB index for claims that the correction
|
||||||
challenges. Output as an ENRICHMENT with `type: "challenge"` if the target claim exists.
|
challenges. Output BOTH a new claim (the corrected understanding) AND an enrichment
|
||||||
|
(type: "challenge") targeting the existing claim. The enrichment links the correction
|
||||||
|
to what it corrects; the claim captures the corrected knowledge as a standalone proposition.
|
||||||
|
|
||||||
### Bot LEARNING lines are extraction hints
|
### Bot LEARNING lines are extraction hints
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue