"""Tests for enrichment idempotency — dedup at insertion and post-rebase.""" import os import tempfile import pytest # ─── Unit tests for dedup_evidence_blocks ──────────────────────────────── from lib.dedup import dedup_evidence_blocks class TestDedupEvidenceBlocks: """Test the post-rebase evidence block deduplication.""" def test_no_blocks_unchanged(self): content = "---\ntype: claim\n---\n\nSome claim body.\n" assert dedup_evidence_blocks(content) == content def test_single_block_unchanged(self): content = ( "---\ntype: claim\n---\n\nClaim body.\n\n" "### Additional Evidence (extend)\n" "*Source: [[some-source-2026-03-19]] | Added: 2026-03-19*\n\n" "Evidence text here.\n" ) assert dedup_evidence_blocks(content) == content def test_duplicate_blocks_removed(self): """Two evidence blocks from the same source — second is removed.""" block = ( "\n\n### Additional Evidence (extend)\n" "*Source: [[interlune-he3-quantum-demand]] | Added: 2026-03-19*\n\n" "Some evidence text.\n" ) content = f"---\ntype: claim\n---\n\nClaim body.{block}{block}\nRelevant Notes:\n" result = dedup_evidence_blocks(content) # Should contain exactly one occurrence assert result.count("[[interlune-he3-quantum-demand]]") == 1 assert "Relevant Notes:" in result def test_different_sources_kept(self): """Two evidence blocks from different sources — both kept.""" block1 = ( "\n\n### Additional Evidence (extend)\n" "*Source: [[source-a]] | Added: 2026-03-19*\n\n" "Evidence A.\n" ) block2 = ( "\n\n### Additional Evidence (challenge)\n" "*Source: [[source-b]] | Added: 2026-03-20*\n\n" "Evidence B.\n" ) content = f"---\ntype: claim\n---\n\nClaim body.{block1}{block2}" result = dedup_evidence_blocks(content) assert "[[source-a]]" in result assert "[[source-b]]" in result def test_auto_enrichment_dedup(self): """Duplicate auto-enrichment blocks from substantive fixer.""" block = ( "\n\n### Auto-enrichment (near-duplicate conversion, similarity=0.92)\n" "*Source: PR #1234 — \"Some duplicate claim\"*\n\n" "Converted evidence.\n" ) content = f"---\ntype: claim\n---\n\nBody.{block}{block}" result = dedup_evidence_blocks(content) assert result.count("PR #1234") == 1 def test_mixed_types_dedup(self): """Same source appears in both Additional Evidence and Auto-enrichment.""" block1 = ( "\n\n### Additional Evidence (extend)\n" "*Source: [[my-source]] | Added: 2026-03-19*\n\n" "First version of evidence.\n" ) block2 = ( "\n\n### Additional Evidence (extend)\n" "*Source: [[my-source]] | Added: 2026-03-19*\n\n" "Second version of evidence (rebase duplicate).\n" ) content = f"---\ntype: claim\n---\n\nBody.{block1}{block2}" result = dedup_evidence_blocks(content) assert result.count("[[my-source]]") == 1 # First occurrence kept assert "First version" in result assert "Second version" not in result def test_real_pr1751_pattern(self): """Reproduce the actual PR #1751 duplicate pattern from space-development.""" content = ( "---\ntype: claim\ndomain: space-development\n---\n\n" "Claim about SpaceX vertical integration.\n\n" "### Additional Evidence (extend)\n" "*Source: [[2026-03-00-commercial-stations-haven1-slip-orbital-reef-delays]] | Added: 2026-03-19*\n\n" "Orbital Reef multi-party structure experiencing delays.\n\n" "### Additional Evidence (extend)\n" "*Source: [[2026-03-00-commercial-stations-haven1-slip-orbital-reef-delays]] | Added: 2026-03-19*\n\n" "Orbital Reef multi-party structure experiencing delays (duplicate from rebase).\n\n" "---\n\n" "### Additional Evidence (extend)\n" "*Source: [[2026-03-19-space-com-starship-v3-first-static-fire]] | Added: 2026-03-24*\n\n" "V3 Starship static fire completed.\n" ) result = dedup_evidence_blocks(content) assert result.count("[[2026-03-00-commercial-stations-haven1-slip-orbital-reef-delays]]") == 1 assert result.count("[[2026-03-19-space-com-starship-v3-first-static-fire]]") == 1 # ─── Insertion-time dedup tests ────────────────────────────────────────── class TestInsertionDedup: """Test that enrichment insertion skips already-enriched claims.""" def test_entity_batch_dedup(self): """_apply_claim_enrichment skips if PR already enriched the claim.""" from lib.entity_batch import _apply_claim_enrichment with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: f.write( "---\ntype: claim\n---\n\nClaim body.\n\n" "### Auto-enrichment (near-duplicate conversion, similarity=0.90)\n" "*Source: PR #100 — \"Existing enrichment\"*\n\n" "Already enriched evidence.\n" ) f.flush() path = f.name try: ok, msg = _apply_claim_enrichment(path, "New evidence", 100, "Duplicate", 0.91) assert not ok assert "already enriched" in msg # Different PR should succeed ok2, msg2 = _apply_claim_enrichment(path, "New evidence", 200, "Different PR", 0.88) assert ok2 finally: os.unlink(path) def test_entity_batch_first_enrichment_succeeds(self): """First enrichment of a claim by a PR should succeed.""" from lib.entity_batch import _apply_claim_enrichment with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: f.write("---\ntype: claim\n---\n\nClaim body with no enrichments yet.\n") f.flush() path = f.name try: ok, msg = _apply_claim_enrichment(path, "New evidence", 500, "First enrichment", 0.92) assert ok with open(path) as rf: content = rf.read() assert "PR #500" in content finally: os.unlink(path)