"""Pure YAML frontmatter parsing and serialization for claim/entity files. Shared by merge (reweave merge, reciprocal edges) and reweave scripts. All functions are pure — zero I/O, zero async, zero DB. Extracted from merge.py Phase 6 of decomposition (Ganymede-approved plan). """ import yaml # Edge field names recognized in claim frontmatter. # Order matters: serialize_edge_fields writes them in this order when appending new fields. REWEAVE_EDGE_FIELDS = ("supports", "challenges", "challenged_by", "depends_on", "related", "reweave_edges") # Reciprocal edge mapping: when A has edge_type → B, B gets reciprocal → A. # When A supports B, B also supports A (approximately symmetric). # When A challenges B, B is challenged_by A (NOT symmetric — direction matters). RECIPROCAL_EDGE_MAP = { "supports": "supports", "challenges": "challenged_by", "related": "related", "depends_on": "related", # A depends_on B → B is related to A (not symmetric) } def parse_yaml_frontmatter(text: str) -> tuple[dict | None, str, str]: """Parse YAML frontmatter from markdown text. Returns (frontmatter_dict, raw_fm_text, body_text_including_closing_delimiter). Returns (None, "", text) if no valid frontmatter found. raw_fm_text is the text between the --- delimiters (no delimiters, no leading newline). """ if not text.startswith("---"): return None, "", text end = text.find("\n---", 3) if end == -1: return None, "", text try: raw_fm_text = text[4:end] # skip "---\n", stop before "\n---" fm = yaml.safe_load(raw_fm_text) body = text[end:] # includes closing \n--- and body return (fm if isinstance(fm, dict) else None), raw_fm_text, body except Exception: return None, "", text def union_edge_lists(main_edges: list, branch_edges: list) -> list: """Union two edge lists, preserving order from main (append new at end). Deduplicates by lowercase slug. Main's order is preserved; branch-only edges are appended in their original order. """ seen = set() result = [] for edge in main_edges: key = str(edge).strip().lower() if key not in seen: seen.add(key) result.append(edge) for edge in branch_edges: key = str(edge).strip().lower() if key not in seen: seen.add(key) result.append(edge) return result def serialize_edge_fields(raw_fm_text: str, merged_edges: dict[str, list]) -> str: """Splice merged edge fields into raw frontmatter text, preserving all other fields byte-identical. Only modifies REWEAVE_EDGE_FIELDS lines. All other frontmatter (title, confidence, type, etc.) stays exactly as it was in the source text — no yaml.dump reformatting. Args: raw_fm_text: The raw YAML text between the --- delimiters (no delimiters included). merged_edges: {field_name: [edge_values]} for each edge field that should be present. """ lines = raw_fm_text.split("\n") result_lines = [] i = 0 fields_written = set() while i < len(lines): line = lines[i] # Check if this line starts an edge field matched_field = None for field in REWEAVE_EDGE_FIELDS: if line.startswith(f"{field}:"): matched_field = field break if matched_field: fields_written.add(matched_field) # Skip the old field and its list items (may be indented with spaces) i += 1 while i < len(lines) and lines[i] and (lines[i][0] in (' ', '-')): i += 1 # Write the merged version edges = merged_edges.get(matched_field, []) if edges: result_lines.append(f"{matched_field}:") for edge in edges: result_lines.append(f"- {edge}") # Don't increment i — it's already past the old field continue else: result_lines.append(line) i += 1 # Append any new edge fields that didn't exist in the original for field in REWEAVE_EDGE_FIELDS: if field not in fields_written: edges = merged_edges.get(field, []) if edges: result_lines.append(f"{field}:") for edge in edges: result_lines.append(f"- {edge}") return "\n".join(result_lines) def serialize_frontmatter(raw_fm_text: str, merged_edges: dict[str, list], body: str) -> str: """Rebuild markdown file: splice merged edges into raw frontmatter, append body. Uses string-level surgery — only edge fields are modified. All other frontmatter stays byte-identical to the source. No yaml.dump reformatting. """ spliced = serialize_edge_fields(raw_fm_text, merged_edges) # body starts with \n--- (closing delimiter + body text) if body.startswith("\n"): return f"---\n{spliced}{body}" return f"---\n{spliced}\n{body}"