"""Tests for reweave.py — orphan detection, entity filtering, same-source detection, frontmatter editing.""" import sys import tempfile from pathlib import Path import pytest sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from reweave import ( _is_entity, _same_source, _parse_frontmatter, _get_edge_targets, _claim_name_variants, find_all_claims, build_reverse_link_index, find_orphans, write_edge, _count_reweave_edges, CLASSIFY_PROMPT, ) @pytest.fixture def kb_dir(tmp_path): """Create a minimal KB structure for testing.""" domains = tmp_path / "domains" / "ai-alignment" domains.mkdir(parents=True) entities = tmp_path / "entities" / "ai-alignment" entities.mkdir(parents=True) return tmp_path def _write_claim(path: Path, name: str, type_: str = "claim", **extra_fm): fm_lines = [f"name: {name}", f"type: {type_}"] for k, v in extra_fm.items(): if isinstance(v, list): fm_lines.append(f"{k}:") for item in v: fm_lines.append(f" - {item}") else: fm_lines.append(f"{k}: {v}") fm = "\n".join(fm_lines) path.write_text(f"---\n{fm}\n---\n\nBody of {name}.\n") # ─── Entity Detection ────────────────────────────────────────────────────── class TestEntityDetection: def test_entity_detected(self, kb_dir): p = kb_dir / "entities" / "ai-alignment" / "anthropic.md" _write_claim(p, "Anthropic", type_="entity") assert _is_entity(p) is True def test_claim_not_entity(self, kb_dir): p = kb_dir / "domains" / "ai-alignment" / "rlhf-works.md" _write_claim(p, "RLHF works", type_="claim") assert _is_entity(p) is False def test_no_frontmatter(self, tmp_path): p = tmp_path / "bare.md" p.write_text("No frontmatter here.") assert _is_entity(p) is False # ─── Same Source Detection ────────────────────────────────────────────────── class TestSameSourceDetection: def test_same_source_field(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" a = d / "claim-a.md" b = d / "claim-b.md" _write_claim(a, "Claim A", source="paper-xyz.md") _write_claim(b, "Claim B", source="paper-xyz.md") assert _same_source(a, b) is True def test_different_source(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" a = d / "claim-a.md" b = d / "claim-b.md" _write_claim(a, "Claim A", source="paper-xyz.md") _write_claim(b, "Claim B", source="paper-abc.md") assert _same_source(a, b) is False def test_same_source_file_field(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" a = d / "claim-a.md" b = d / "claim-b.md" _write_claim(a, "Claim A", source_file="sources/arxiv/1234.md") _write_claim(b, "Claim B", source_file="sources/arxiv/1234.md") assert _same_source(a, b) is True def test_no_source_field(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" a = d / "claim-a.md" b = d / "claim-b.md" _write_claim(a, "Claim A") _write_claim(b, "Claim B") assert _same_source(a, b) is False # ─── Orphan Detection ────────────────────────────────────────────────────── class TestOrphanDetection: def test_orphan_found(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" a = d / "connected-claim.md" b = d / "orphan-claim.md" _write_claim(a, "Connected Claim", related=["orphan-claim"]) _write_claim(b, "Orphan Claim") claims = find_all_claims(kb_dir) incoming = build_reverse_link_index(claims) orphans = find_orphans(claims, incoming, kb_dir) orphan_names = [p.stem for p in orphans] assert "connected-claim" not in orphan_names or "orphan-claim" not in orphan_names # connected-claim has no incoming either (only outgoing), so both may be orphans # but the key point: orphan detection runs without error def test_no_orphans_when_connected(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" a = d / "claim-a.md" b = d / "claim-b.md" _write_claim(a, "Claim A", related=["claim-b"]) _write_claim(b, "Claim B", related=["claim-a"]) claims = find_all_claims(kb_dir) incoming = build_reverse_link_index(claims) orphans = find_orphans(claims, incoming, kb_dir) assert len(orphans) == 0 # ─── Frontmatter Editing ─────────────────────────────────────────────────── class TestWriteEdge: def test_write_edge_adds_field(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" p = d / "neighbor.md" _write_claim(p, "Neighbor Claim") ok = write_edge(p, "Orphan Title", "related", "2026-03-31") assert ok is True text = p.read_text() assert "Orphan Title" in text assert "reweave_edges" in text def test_no_duplicate_edges(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" p = d / "neighbor.md" _write_claim(p, "Neighbor Claim", related=["Orphan Title"]) ok = write_edge(p, "Orphan Title", "related", "2026-03-31") assert ok is False # duplicate detected def test_per_file_cap(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" p = d / "neighbor.md" # Create a file with 10 reweave_edges already rw = [f"edge-{i}|related|2026-03-31" for i in range(10)] _write_claim(p, "Neighbor Claim", reweave_edges=rw) ok = write_edge(p, "New Orphan", "related", "2026-03-31") assert ok is False # cap reached def test_no_blank_lines_in_frontmatter(self, kb_dir): d = kb_dir / "domains" / "ai-alignment" p = d / "neighbor.md" _write_claim(p, "Neighbor Claim", supports=["existing-claim"]) write_edge(p, "New Orphan", "related", "2026-03-31") text = p.read_text() # Find frontmatter section start = text.index("---") + 3 end = text.index("---", start) fm_section = text[start:end] # No blank lines in frontmatter for line in fm_section.strip().split("\n"): if line.strip() == "": pytest.fail(f"Blank line found in frontmatter: {repr(fm_section)}") # ─── Prompt Content ───────────────────────────────────────────────────────── class TestClassifyPrompt: def test_challenges_guidance_present(self): assert "challenges" in CLASSIFY_PROMPT assert "underused" in CLASSIFY_PROMPT.lower() def test_related_is_weakest(self): assert "WEAKEST" in CLASSIFY_PROMPT # ─── Name Variants ────────────────────────────────────────────────────────── class TestNameVariants: def test_stem_variants(self, kb_dir): p = kb_dir / "domains" / "ai-alignment" / "rlhf-reward-hacking.md" _write_claim(p, "RLHF Reward Hacking") variants = _claim_name_variants(p, kb_dir) assert "rlhf-reward-hacking" in variants assert "rlhf reward hacking" in variants