"""Tests for frontmatter helpers — frontmatter union, order-preserving dedup, string-level splicing. These test the pure functions in lib/frontmatter.py (extracted from merge.py Phase 6). """ import sys import os import pytest sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "lib")) from frontmatter import ( REWEAVE_EDGE_FIELDS, parse_yaml_frontmatter, union_edge_lists, serialize_edge_fields, serialize_frontmatter, ) class TestParseYamlFrontmatter: def test_basic(self): text = "---\ntitle: Test Claim\nsupports:\n- claim-a\n---\nBody text here." fm, raw, body = parse_yaml_frontmatter(text) assert fm is not None assert fm["title"] == "Test Claim" assert fm["supports"] == ["claim-a"] assert body.startswith("\n---") assert "title: Test Claim" in raw def test_no_frontmatter(self): text = "Just plain text" fm, raw, body = parse_yaml_frontmatter(text) assert fm is None assert raw == "" assert body == text def test_malformed_yaml(self): text = "---\n: invalid: yaml: {{{\n---\nBody" fm, raw, body = parse_yaml_frontmatter(text) assert fm is None class TestUnionEdgeLists: def test_no_overlap(self): main = ["claim-a", "claim-b"] branch = ["claim-c", "claim-d"] result = union_edge_lists(main, branch) assert result == ["claim-a", "claim-b", "claim-c", "claim-d"] def test_overlap_preserves_main_order(self): main = ["claim-b", "claim-a"] branch = ["claim-a", "claim-c"] result = union_edge_lists(main, branch) assert result == ["claim-b", "claim-a", "claim-c"] def test_case_insensitive_dedup(self): main = ["Claim A"] branch = ["claim a", "Claim B"] result = union_edge_lists(main, branch) assert len(result) == 2 assert result[0] == "Claim A" assert result[1] == "Claim B" def test_empty_main(self): result = union_edge_lists([], ["claim-a", "claim-b"]) assert result == ["claim-a", "claim-b"] def test_empty_branch(self): result = union_edge_lists(["claim-a"], []) assert result == ["claim-a"] def test_both_empty(self): assert union_edge_lists([], []) == [] def test_duplicates_within_branch(self): main = ["claim-a"] branch = ["claim-b", "claim-b"] result = union_edge_lists(main, branch) assert result == ["claim-a", "claim-b"] class TestSerializeEdgeFields: def test_replaces_existing_field(self): raw = "title: Test\nsupports:\n- old-claim" merged = {"supports": ["old-claim", "new-claim"]} result = serialize_edge_fields(raw, merged) assert "- old-claim" in result assert "- new-claim" in result assert "title: Test" in result def test_preserves_non_edge_fields_exactly(self): raw = "title: 'Quoted Title'\nconfidence: 0.85\ntype: claim" merged = {"related": ["new-claim"]} result = serialize_edge_fields(raw, merged) assert "title: 'Quoted Title'" in result assert "confidence: 0.85" in result assert "type: claim" in result assert "related:" in result assert "- new-claim" in result def test_appends_new_field(self): raw = "title: Test\ntype: claim" merged = {"supports": ["claim-a"]} result = serialize_edge_fields(raw, merged) assert "title: Test" in result assert "supports:" in result assert "- claim-a" in result def test_empty_edges_removes_field(self): raw = "title: Test\nsupports:\n- old-claim\ntype: claim" merged = {} # no edges to write result = serialize_edge_fields(raw, merged) assert "supports:" not in result assert "title: Test" in result assert "type: claim" in result def test_multiple_edge_fields(self): raw = "title: Test\nsupports:\n- a\nchallenges:\n- b" merged = {"supports": ["a", "c"], "challenges": ["b", "d"]} result = serialize_edge_fields(raw, merged) lines = result.split("\n") # supports and challenges both present with merged values assert "- a" in result assert "- c" in result assert "- b" in result assert "- d" in result class TestSerializeFrontmatter: def test_roundtrip_preserves_formatting(self): original = "---\ntitle: 'Quoted Title'\nconfidence: 0.85\nsupports:\n- claim-a\n---\nBody text here." fm, raw, body = parse_yaml_frontmatter(original) merged_edges = {"supports": ["claim-a", "claim-b"]} result = serialize_frontmatter(raw, merged_edges, body) # Non-edge fields preserved exactly assert "title: 'Quoted Title'" in result assert "confidence: 0.85" in result # Edge fields updated assert "- claim-a" in result assert "- claim-b" in result # Structure preserved assert result.startswith("---\n") assert "\n---\n" in result assert result.endswith("Body text here.") def test_no_blank_line_before_closing_delimiter(self): """Ganymede critical: no extra blank line compounds on repeat reweaves.""" original = "---\ntitle: Test\nsupports:\n- a\n---\nBody." fm, raw, body = parse_yaml_frontmatter(original) merged_edges = {"supports": ["a", "b"]} result = serialize_frontmatter(raw, merged_edges, body) # Should NOT have \n\n--- (double newline before closing) assert "\n\n---" not in result def test_repeated_serialize_no_drift(self): """Repeated serialization should be idempotent — no accumulating blank lines.""" text = "---\ntitle: Test\nsupports:\n- a\n---\nBody." merged_edges = {"supports": ["a", "b"]} for _ in range(5): fm, raw, body = parse_yaml_frontmatter(text) text = serialize_frontmatter(raw, merged_edges, body) assert text.count("\n\n") == 0 # no double newlines anywhere class TestSupersetDetection: def test_branch_is_superset(self): main_edges = {"claim-a", "claim-b"} branch_edges = {"claim-a", "claim-b", "claim-c"} assert len(main_edges - branch_edges) == 0 def test_branch_missing_edge(self): main_edges = {"claim-a", "claim-b"} branch_edges = {"claim-a", "claim-c"} assert "claim-b" in (main_edges - branch_edges) def test_equal_sets(self): main_edges = {"claim-a", "claim-b"} branch_edges = {"claim-a", "claim-b"} assert len(main_edges - branch_edges) == 0 class TestEdgeFieldsCoverage: def test_standard_fields_present(self): assert "supports" in REWEAVE_EDGE_FIELDS assert "challenges" in REWEAVE_EDGE_FIELDS assert "related" in REWEAVE_EDGE_FIELDS assert "reweave_edges" in REWEAVE_EDGE_FIELDS assert "depends_on" in REWEAVE_EDGE_FIELDS class TestFullUnionWorkflow: def test_main_evolved_branch_stale(self): """Main got new edges after branch was created. Union includes both.""" main_text = ( "---\ntitle: Test Claim\nconfidence: 0.8\n" "supports:\n- claim-a\n- claim-b\n" "related:\n- claim-x\n" "---\nBody text." ) branch_text = ( "---\ntitle: Test Claim\nconfidence: 0.8\n" "supports:\n- claim-a\n" "related:\n- claim-x\n- claim-y\n" "reweave_edges:\n- \"claim-y|related|2026-04-04\"\n" "---\nBody text." ) main_fm, main_raw, main_body = parse_yaml_frontmatter(main_text) branch_fm, _, _ = parse_yaml_frontmatter(branch_text) merged_edges = {} for field in REWEAVE_EDGE_FIELDS: main_list = main_fm.get(field, []) branch_list = branch_fm.get(field, []) if not isinstance(main_list, list): main_list = [main_list] if main_list else [] if not isinstance(branch_list, list): branch_list = [branch_list] if branch_list else [] if main_list or branch_list: merged_edges[field] = union_edge_lists(main_list, branch_list) assert merged_edges["supports"] == ["claim-a", "claim-b"] assert "claim-x" in merged_edges["related"] assert "claim-y" in merged_edges["related"] assert len(merged_edges.get("reweave_edges", [])) == 1 # Verify non-edge fields preserved in serialization result = serialize_frontmatter(main_raw, merged_edges, main_body) assert "confidence: 0.8" in result assert "title: Test Claim" in result def test_no_edge_fields_untouched(self): """Non-edge fields (title, confidence, type) come from main unchanged.""" main_text = "---\ntitle: Original\nconfidence: 0.9\ntype: claim\n---\nBody." branch_text = "---\ntitle: Original\nconfidence: 0.9\ntype: claim\nrelated:\n- new-claim\n---\nBody." main_fm, main_raw, main_body = parse_yaml_frontmatter(main_text) branch_fm, _, _ = parse_yaml_frontmatter(branch_text) merged_edges = {} for field in REWEAVE_EDGE_FIELDS: main_list = main_fm.get(field, []) branch_list = branch_fm.get(field, []) if not isinstance(main_list, list): main_list = [main_list] if main_list else [] if not isinstance(branch_list, list): branch_list = [branch_list] if branch_list else [] if main_list or branch_list: merged_edges[field] = union_edge_lists(main_list, branch_list) result = serialize_frontmatter(main_raw, merged_edges, main_body) assert "title: Original" in result assert "confidence: 0.9" in result assert "type: claim" in result assert "- new-claim" in result def test_scalar_edge_field_converted_to_list(self): """Edge fields stored as scalars (not lists) are handled gracefully.""" main_fm = {"supports": "single-claim"} branch_fm = {"supports": ["single-claim", "new-claim"]} main_list = main_fm.get("supports", []) branch_list = branch_fm.get("supports", []) if not isinstance(main_list, list): main_list = [main_list] if main_list else [] if not isinstance(branch_list, list): branch_list = [branch_list] if branch_list else [] result = union_edge_lists(main_list, branch_list) assert result == ["single-claim", "new-claim"] def test_yaml_formatting_preserved_across_reweave(self): """The key test: non-edge YAML formatting stays byte-identical.""" # Use unusual but valid YAML formatting main_text = "---\ntitle: 'A \"Quoted\" Title'\nconfidence: 0.85\nsome_custom_field: [1, 2, 3]\nsupports:\n- old-claim\n---\nBody." fm, raw, body = parse_yaml_frontmatter(main_text) merged_edges = {"supports": ["old-claim", "new-claim"]} result = serialize_frontmatter(raw, merged_edges, body) # These non-edge fields must be byte-identical to source assert "title: 'A \"Quoted\" Title'" in result assert "confidence: 0.85" in result assert "some_custom_field: [1, 2, 3]" in result