teleo-infrastructure/tests/test_eval_parse.py
m3taversal 376b77999f
Some checks are pending
CI / lint-and-test (push) Waiting to run
refactor: Phase 3 — fix close_pr ghost bug, wire stale_pr, extract eval_parse
Critical bug fix: close_pr now checks forgejo_api return value and
skips DB update on Forgejo failure, preventing ghost PRs (DB closed,
Forgejo open). Returns bool so callers can handle failures.

_terminate_pr checks return value — skips source requeue on failure.
stale_pr.py migrated from raw Forgejo+DB to close_pr (last raw close
transition eliminated).

eval_parse.py: 15 pure parsing functions extracted from evaluate.py
(~370 lines removed). Zero I/O, zero async, independently testable.
evaluate.py drops from ~1510 to ~1140 lines.

Tests: 295 passed (42 new eval_parse + 2 new close_pr), zero regressions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 12:40:23 +01:00

315 lines
12 KiB
Python

"""Tests for lib/eval_parse.py — pure parsing functions extracted from evaluate.py."""
import sys
import os
from unittest.mock import MagicMock
import pytest
# Mock heavy dependencies before importing
sys.modules.setdefault("aiohttp", MagicMock())
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from lib.eval_parse import (
VALID_ISSUE_TAGS,
_classify_issues,
_deterministic_tier,
_diff_contains_claim_type,
_extract_changed_files,
_filter_diff,
_infer_issues_from_prose,
_is_musings_only,
_normalize_tag,
_parse_batch_response,
_parse_issues,
_parse_verdict,
_validate_batch_fanout,
)
# ---------------------------------------------------------------------------
# _filter_diff
# ---------------------------------------------------------------------------
class TestFilterDiff:
def test_strips_inbox_files(self):
diff = (
"diff --git a/inbox/archive/foo.md b/inbox/archive/foo.md\n"
"+some content\n"
"diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n"
"+real content\n"
)
review_diff, entity_diff = _filter_diff(diff)
assert "inbox" not in review_diff
assert "claim.md" in review_diff
def test_separates_entity_diffs(self):
diff = (
"diff --git a/entities/people/alice.md b/entities/people/alice.md\n"
"+entity content\n"
"diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n"
"+claim content\n"
)
review_diff, entity_diff = _filter_diff(diff)
assert "alice.md" in entity_diff
assert "claim.md" in review_diff
def test_core_domain_entities_stay_in_review(self):
diff = (
"diff --git a/entities/living-agents/agent.md b/entities/living-agents/agent.md\n"
"+core entity\n"
)
review_diff, entity_diff = _filter_diff(diff)
assert "agent.md" in review_diff
assert entity_diff == ""
# ---------------------------------------------------------------------------
# _extract_changed_files
# ---------------------------------------------------------------------------
class TestExtractChangedFiles:
def test_extracts_paths(self):
diff = (
"diff --git a/domains/foo.md b/domains/foo.md\n"
"+content\n"
"diff --git a/entities/bar.md b/entities/bar.md\n"
"+more\n"
)
result = _extract_changed_files(diff)
assert "domains/foo.md" in result
assert "entities/bar.md" in result
def test_empty_diff(self):
assert _extract_changed_files("") == ""
# ---------------------------------------------------------------------------
# _is_musings_only
# ---------------------------------------------------------------------------
class TestIsMusingsOnly:
def test_musings_only(self):
diff = "diff --git a/agents/rio/musings/thought.md b/agents/rio/musings/thought.md\n+musing\n"
assert _is_musings_only(diff) is True
def test_mixed_files(self):
diff = (
"diff --git a/agents/rio/musings/thought.md b/agents/rio/musings/thought.md\n+musing\n"
"diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n+claim\n"
)
assert _is_musings_only(diff) is False
def test_no_musings(self):
diff = "diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n+claim\n"
assert _is_musings_only(diff) is False
# ---------------------------------------------------------------------------
# _diff_contains_claim_type
# ---------------------------------------------------------------------------
class TestDiffContainsClaimType:
def test_detects_claim_type(self):
diff = "+type: claim\n+confidence: 0.8\n"
assert _diff_contains_claim_type(diff) is True
def test_ignores_non_claim(self):
diff = "+type: entity\n"
assert _diff_contains_claim_type(diff) is False
def test_ignores_file_header(self):
diff = "+++ b/type: claim\n"
assert _diff_contains_claim_type(diff) is False
# ---------------------------------------------------------------------------
# _deterministic_tier
# ---------------------------------------------------------------------------
class TestDeterministicTier:
def test_entities_only_is_light(self):
diff = "diff --git a/entities/people/alice.md b/entities/people/alice.md\n+data\n"
assert _deterministic_tier(diff) == "LIGHT"
def test_inbox_only_is_light(self):
diff = "diff --git a/inbox/archive/src.md b/inbox/archive/src.md\n+data\n"
assert _deterministic_tier(diff) == "LIGHT"
def test_core_is_deep(self):
diff = "diff --git a/core/schema.md b/core/schema.md\n+structural change\n"
assert _deterministic_tier(diff) == "DEEP"
def test_challenged_by_is_deep(self):
diff = "diff --git a/domains/x/claim.md b/domains/x/claim.md\n+challenged_by: some-claim\n"
assert _deterministic_tier(diff) == "DEEP"
def test_domain_claim_returns_none(self):
diff = "diff --git a/domains/finance/new-claim.md b/domains/finance/new-claim.md\n--- /dev/null\n+++ b/domains/finance/new-claim.md\n+type: claim\n"
assert _deterministic_tier(diff) is None
def test_empty_diff_returns_none(self):
assert _deterministic_tier("") is None
# ---------------------------------------------------------------------------
# _parse_verdict
# ---------------------------------------------------------------------------
class TestParseVerdict:
def test_approve(self):
assert _parse_verdict("VERDICT:LEO:APPROVE", "LEO") == "approve"
def test_request_changes(self):
assert _parse_verdict("VERDICT:LEO:REQUEST_CHANGES", "LEO") == "request_changes"
def test_no_verdict_defaults_to_request_changes(self):
assert _parse_verdict("some review text", "LEO") == "request_changes"
def test_case_insensitive_reviewer(self):
assert _parse_verdict("VERDICT:LEO:APPROVE", "leo") == "approve"
# ---------------------------------------------------------------------------
# _normalize_tag
# ---------------------------------------------------------------------------
class TestNormalizeTag:
def test_valid_tag_passes_through(self):
assert _normalize_tag("broken_wiki_links") == "broken_wiki_links"
def test_alias_normalizes(self):
assert _normalize_tag("schema_violation") == "frontmatter_schema"
assert _normalize_tag("duplicate") == "near_duplicate"
assert _normalize_tag("factual_error") == "factual_discrepancy"
def test_hyphen_to_underscore(self):
assert _normalize_tag("unverified-wiki-links") == "broken_wiki_links"
def test_unknown_returns_none(self):
assert _normalize_tag("totally_made_up") is None
def test_fuzzy_substring_match(self):
assert _normalize_tag("wiki_links") == "broken_wiki_links"
# ---------------------------------------------------------------------------
# _parse_issues
# ---------------------------------------------------------------------------
class TestParseIssues:
def test_structured_comment(self):
text = "Review text. <!-- ISSUES: schema_violation, duplicate -->"
result = _parse_issues(text)
assert "frontmatter_schema" in result
assert "near_duplicate" in result
def test_fallback_to_prose(self):
text = "The frontmatter is missing required fields."
result = _parse_issues(text)
assert "frontmatter_schema" in result
def test_empty_structured_falls_back(self):
text = "Review. <!-- ISSUES: totally_unknown_tag -->"
# All tags unrecognizable, falls through to prose
result = _parse_issues(text)
# With no prose keywords either, should return empty
assert isinstance(result, list)
# ---------------------------------------------------------------------------
# _infer_issues_from_prose
# ---------------------------------------------------------------------------
class TestInferIssuesFromProse:
def test_detects_frontmatter_issues(self):
result = _infer_issues_from_prose("The YAML frontmatter is invalid.")
assert "frontmatter_schema" in result
def test_detects_wiki_link_issues(self):
result = _infer_issues_from_prose("Found broken wiki links in the claim.")
assert "broken_wiki_links" in result
def test_detects_near_duplicate(self):
result = _infer_issues_from_prose("This is a near-duplicate of an existing claim.")
assert "near_duplicate" in result
def test_no_matches(self):
result = _infer_issues_from_prose("This claim is well-structured and accurate.")
assert result == []
# ---------------------------------------------------------------------------
# _classify_issues
# ---------------------------------------------------------------------------
class TestClassifyIssues:
def test_mechanical_only(self):
assert _classify_issues(["frontmatter_schema", "near_duplicate"]) == "mechanical"
def test_substantive_only(self):
assert _classify_issues(["factual_discrepancy"]) == "substantive"
def test_mixed(self):
assert _classify_issues(["frontmatter_schema", "factual_discrepancy"]) == "mixed"
def test_empty(self):
assert _classify_issues([]) == "unknown"
# ---------------------------------------------------------------------------
# _parse_batch_response
# ---------------------------------------------------------------------------
class TestParseBatchResponse:
def test_parses_multiple_prs(self):
response = (
"=== PR #100\nReview for 100\n<!-- PR:100 VERDICT:AGENT:APPROVE -->\n"
"=== PR #200\nReview for 200\n<!-- PR:200 VERDICT:AGENT:REQUEST_CHANGES -->\n"
)
result = _parse_batch_response(response, [100, 200], "agent")
assert 100 in result
assert 200 in result
assert "APPROVE" in result[100]
assert "REQUEST_CHANGES" in result[200]
def test_missing_pr(self):
response = "=== PR #100\nReview\n<!-- PR:100 VERDICT:AGENT:APPROVE -->\n"
result = _parse_batch_response(response, [100, 200], "agent")
assert 100 in result
assert 200 not in result
def test_empty_response(self):
result = _parse_batch_response("no verdicts here", [100], "agent")
assert result == {}
# ---------------------------------------------------------------------------
# _validate_batch_fanout
# ---------------------------------------------------------------------------
class TestValidateBatchFanout:
def test_missing_pr_falls_back(self):
parsed = {100: "review for 100"}
pr_diffs = [
{"number": 100, "diff": "diff --git a/domains/long-enough-path.md b/domains/long-enough-path.md\n"},
{"number": 200, "diff": "diff --git a/domains/other-long-path.md b/domains/other-long-path.md\n"},
]
valid, fallback = _validate_batch_fanout(parsed, pr_diffs, "agent")
assert 200 in fallback
assert 200 not in valid
def test_all_present_passes(self):
parsed = {
100: "review mentioning domains/long-enough-path.md",
200: "review mentioning domains/other-long-path.md",
}
pr_diffs = [
{"number": 100, "diff": "diff --git a/domains/long-enough-path.md b/domains/long-enough-path.md\n"},
{"number": 200, "diff": "diff --git a/domains/other-long-path.md b/domains/other-long-path.md\n"},
]
valid, fallback = _validate_batch_fanout(parsed, pr_diffs, "agent")
assert 100 in valid
assert 200 in valid
assert fallback == []