"""Tests for lib/eval_parse.py — pure parsing functions extracted from evaluate.py.""" import sys import os from unittest.mock import MagicMock import pytest # Mock heavy dependencies before importing sys.modules.setdefault("aiohttp", MagicMock()) sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from lib.eval_parse import ( VALID_ISSUE_TAGS, classify_issues, deterministic_tier, diff_contains_claim_type, extract_changed_files, filter_diff, infer_issues_from_prose, is_musings_only, normalize_tag, parse_batch_response, parse_issues, parse_verdict, validate_batch_fanout, ) # --------------------------------------------------------------------------- # filter_diff # --------------------------------------------------------------------------- class TestFilterDiff: def test_strips_inbox_files(self): diff = ( "diff --git a/inbox/archive/foo.md b/inbox/archive/foo.md\n" "+some content\n" "diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n" "+real content\n" ) review_diff, entity_diff = filter_diff(diff) assert "inbox" not in review_diff assert "claim.md" in review_diff def test_separates_entity_diffs(self): diff = ( "diff --git a/entities/people/alice.md b/entities/people/alice.md\n" "+entity content\n" "diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n" "+claim content\n" ) review_diff, entity_diff = filter_diff(diff) assert "alice.md" in entity_diff assert "claim.md" in review_diff def test_core_domain_entities_stay_in_review(self): diff = ( "diff --git a/entities/living-agents/agent.md b/entities/living-agents/agent.md\n" "+core entity\n" ) review_diff, entity_diff = filter_diff(diff) assert "agent.md" in review_diff assert entity_diff == "" # --------------------------------------------------------------------------- # extract_changed_files # --------------------------------------------------------------------------- class TestExtractChangedFiles: def test_extracts_paths(self): diff = ( "diff --git a/domains/foo.md b/domains/foo.md\n" "+content\n" "diff --git a/entities/bar.md b/entities/bar.md\n" "+more\n" ) result = extract_changed_files(diff) assert "domains/foo.md" in result assert "entities/bar.md" in result def test_empty_diff(self): assert extract_changed_files("") == "" # --------------------------------------------------------------------------- # is_musings_only # --------------------------------------------------------------------------- class TestIsMusingsOnly: def test_musings_only(self): diff = "diff --git a/agents/rio/musings/thought.md b/agents/rio/musings/thought.md\n+musing\n" assert is_musings_only(diff) is True def test_mixed_files(self): diff = ( "diff --git a/agents/rio/musings/thought.md b/agents/rio/musings/thought.md\n+musing\n" "diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n+claim\n" ) assert is_musings_only(diff) is False def test_no_musings(self): diff = "diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n+claim\n" assert is_musings_only(diff) is False # --------------------------------------------------------------------------- # diff_contains_claim_type # --------------------------------------------------------------------------- class TestDiffContainsClaimType: def test_detects_claim_type(self): diff = "+type: claim\n+confidence: 0.8\n" assert diff_contains_claim_type(diff) is True def test_ignores_non_claim(self): diff = "+type: entity\n" assert diff_contains_claim_type(diff) is False def test_ignores_file_header(self): diff = "+++ b/type: claim\n" assert diff_contains_claim_type(diff) is False # --------------------------------------------------------------------------- # deterministic_tier # --------------------------------------------------------------------------- class TestDeterministicTier: def test_entities_only_is_light(self): diff = "diff --git a/entities/people/alice.md b/entities/people/alice.md\n+data\n" assert deterministic_tier(diff) == "LIGHT" def test_inbox_only_is_light(self): diff = "diff --git a/inbox/archive/src.md b/inbox/archive/src.md\n+data\n" assert deterministic_tier(diff) == "LIGHT" def test_core_is_deep(self): diff = "diff --git a/core/schema.md b/core/schema.md\n+structural change\n" assert deterministic_tier(diff) == "DEEP" def test_challenged_by_is_deep(self): diff = "diff --git a/domains/x/claim.md b/domains/x/claim.md\n+challenged_by: some-claim\n" assert deterministic_tier(diff) == "DEEP" def test_domain_claim_returns_none(self): diff = "diff --git a/domains/finance/new-claim.md b/domains/finance/new-claim.md\n--- /dev/null\n+++ b/domains/finance/new-claim.md\n+type: claim\n" assert deterministic_tier(diff) is None def test_empty_diff_returns_none(self): assert deterministic_tier("") is None # --------------------------------------------------------------------------- # parse_verdict # --------------------------------------------------------------------------- class TestParseVerdict: def test_approve(self): assert parse_verdict("VERDICT:LEO:APPROVE", "LEO") == "approve" def test_request_changes(self): assert parse_verdict("VERDICT:LEO:REQUEST_CHANGES", "LEO") == "request_changes" def test_no_verdict_defaults_to_request_changes(self): assert parse_verdict("some review text", "LEO") == "request_changes" def test_case_insensitive_reviewer(self): assert parse_verdict("VERDICT:LEO:APPROVE", "leo") == "approve" # --------------------------------------------------------------------------- # normalize_tag # --------------------------------------------------------------------------- class TestNormalizeTag: def test_valid_tag_passes_through(self): assert normalize_tag("broken_wiki_links") == "broken_wiki_links" def test_alias_normalizes(self): assert normalize_tag("schema_violation") == "frontmatter_schema" assert normalize_tag("duplicate") == "near_duplicate" assert normalize_tag("factual_error") == "factual_discrepancy" def test_hyphen_to_underscore(self): assert normalize_tag("unverified-wiki-links") == "broken_wiki_links" def test_unknown_returns_none(self): assert normalize_tag("totally_made_up") is None def test_fuzzy_substring_match(self): assert normalize_tag("wiki_links") == "broken_wiki_links" # --------------------------------------------------------------------------- # parse_issues # --------------------------------------------------------------------------- class TestParseIssues: def test_structured_comment(self): text = "Review text. " result = parse_issues(text) assert "frontmatter_schema" in result assert "near_duplicate" in result def test_fallback_to_prose(self): text = "The frontmatter is missing required fields." result = parse_issues(text) assert "frontmatter_schema" in result def test_empty_structured_falls_back(self): text = "Review. " # All tags unrecognizable, falls through to prose result = parse_issues(text) # With no prose keywords either, should return empty assert isinstance(result, list) # --------------------------------------------------------------------------- # infer_issues_from_prose # --------------------------------------------------------------------------- class TestInferIssuesFromProse: def test_detects_frontmatter_issues(self): result = infer_issues_from_prose("The YAML frontmatter is invalid.") assert "frontmatter_schema" in result def test_detects_wiki_link_issues(self): result = infer_issues_from_prose("Found broken wiki links in the claim.") assert "broken_wiki_links" in result def test_detects_near_duplicate(self): result = infer_issues_from_prose("This is a near-duplicate of an existing claim.") assert "near_duplicate" in result def test_no_matches(self): result = infer_issues_from_prose("This claim is well-structured and accurate.") assert result == [] # --------------------------------------------------------------------------- # classify_issues # --------------------------------------------------------------------------- class TestClassifyIssues: def test_mechanical_only(self): assert classify_issues(["frontmatter_schema", "near_duplicate"]) == "mechanical" def test_substantive_only(self): assert classify_issues(["factual_discrepancy"]) == "substantive" def test_mixed(self): assert classify_issues(["frontmatter_schema", "factual_discrepancy"]) == "mixed" def test_empty(self): assert classify_issues([]) == "unknown" # --------------------------------------------------------------------------- # parse_batch_response # --------------------------------------------------------------------------- class TestParseBatchResponse: def test_parses_multiple_prs(self): response = ( "=== PR #100\nReview for 100\n\n" "=== PR #200\nReview for 200\n\n" ) result = parse_batch_response(response, [100, 200], "agent") assert 100 in result assert 200 in result assert "APPROVE" in result[100] assert "REQUEST_CHANGES" in result[200] def test_missing_pr(self): response = "=== PR #100\nReview\n\n" result = parse_batch_response(response, [100, 200], "agent") assert 100 in result assert 200 not in result def test_empty_response(self): result = parse_batch_response("no verdicts here", [100], "agent") assert result == {} # --------------------------------------------------------------------------- # validate_batch_fanout # --------------------------------------------------------------------------- class TestValidateBatchFanout: def test_missing_pr_falls_back(self): parsed = {100: "review for 100"} pr_diffs = [ {"number": 100, "diff": "diff --git a/domains/long-enough-path.md b/domains/long-enough-path.md\n"}, {"number": 200, "diff": "diff --git a/domains/other-long-path.md b/domains/other-long-path.md\n"}, ] valid, fallback = validate_batch_fanout(parsed, pr_diffs, "agent") assert 200 in fallback assert 200 not in valid def test_all_present_passes(self): parsed = { 100: "review mentioning domains/long-enough-path.md", 200: "review mentioning domains/other-long-path.md", } pr_diffs = [ {"number": 100, "diff": "diff --git a/domains/long-enough-path.md b/domains/long-enough-path.md\n"}, {"number": 200, "diff": "diff --git a/domains/other-long-path.md b/domains/other-long-path.md\n"}, ] valid, fallback = validate_batch_fanout(parsed, pr_diffs, "agent") assert 100 in valid assert 200 in valid assert fallback == []