fix: wire fix_wiki_links into extraction pipeline
fix_wiki_links was imported but never called — LLM-generated [[_map]] dead links and slug-formatted wiki links passed through to claim files. Now runs after claim file write, before atomic connect. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b766176259
commit
6e8aab2dd9
1 changed files with 17 additions and 0 deletions
|
|
@ -36,6 +36,7 @@ sys.path.insert(0, str(Path(__file__).parent))
|
|||
|
||||
from lib.extraction_prompt import build_extraction_prompt
|
||||
from lib.post_extract import (
|
||||
fix_wiki_links,
|
||||
load_existing_claims_from_repo,
|
||||
validate_and_fix_claims,
|
||||
validate_and_fix_entities,
|
||||
|
|
@ -456,6 +457,22 @@ def main():
|
|||
written.append(filename)
|
||||
print(f" Wrote: {claim_path}")
|
||||
|
||||
# ── Fix wiki links: resolve slugs→spaces, strip dead links ──
|
||||
if written:
|
||||
existing_stems = {Path(c).stem for c in existing_claims}
|
||||
wiki_fix_count = 0
|
||||
for filename in written:
|
||||
claim_path = os.path.join(domain_dir, filename)
|
||||
with open(claim_path) as f:
|
||||
content = f.read()
|
||||
fixed_content, fixes = fix_wiki_links(content, existing_stems)
|
||||
if fixes:
|
||||
with open(claim_path, "w") as f:
|
||||
f.write(fixed_content)
|
||||
wiki_fix_count += len(fixes)
|
||||
if wiki_fix_count:
|
||||
print(f" Wiki links: {wiki_fix_count} fixed/stripped")
|
||||
|
||||
# ── Atomic connect: wire new claims to existing KB via vector search ──
|
||||
connect_stats = {"connected": 0, "edges_added": 0}
|
||||
if written:
|
||||
|
|
|
|||
Loading…
Reference in a new issue