diff --git a/openrouter-extract-v2.py b/openrouter-extract-v2.py index 6e50f24..cedd9d0 100644 --- a/openrouter-extract-v2.py +++ b/openrouter-extract-v2.py @@ -36,6 +36,7 @@ sys.path.insert(0, str(Path(__file__).parent)) from lib.extraction_prompt import build_extraction_prompt from lib.post_extract import ( + fix_wiki_links, load_existing_claims_from_repo, validate_and_fix_claims, validate_and_fix_entities, @@ -456,6 +457,22 @@ def main(): written.append(filename) print(f" Wrote: {claim_path}") + # ── Fix wiki links: resolve slugs→spaces, strip dead links ── + if written: + existing_stems = {Path(c).stem for c in existing_claims} + wiki_fix_count = 0 + for filename in written: + claim_path = os.path.join(domain_dir, filename) + with open(claim_path) as f: + content = f.read() + fixed_content, fixes = fix_wiki_links(content, existing_stems) + if fixes: + with open(claim_path, "w") as f: + f.write(fixed_content) + wiki_fix_count += len(fixes) + if wiki_fix_count: + print(f" Wiki links: {wiki_fix_count} fixed/stripped") + # ── Atomic connect: wire new claims to existing KB via vector search ── connect_stats = {"connected": 0, "edges_added": 0} if written: