Some checks are pending
CI / lint-and-test (push) Waiting to run
Critical bug fix: close_pr now checks forgejo_api return value and skips DB update on Forgejo failure, preventing ghost PRs (DB closed, Forgejo open). Returns bool so callers can handle failures. _terminate_pr checks return value — skips source requeue on failure. stale_pr.py migrated from raw Forgejo+DB to close_pr (last raw close transition eliminated). eval_parse.py: 15 pure parsing functions extracted from evaluate.py (~370 lines removed). Zero I/O, zero async, independently testable. evaluate.py drops from ~1510 to ~1140 lines. Tests: 295 passed (42 new eval_parse + 2 new close_pr), zero regressions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
315 lines
12 KiB
Python
315 lines
12 KiB
Python
"""Tests for lib/eval_parse.py — pure parsing functions extracted from evaluate.py."""
|
|
|
|
import sys
|
|
import os
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
|
|
# Mock heavy dependencies before importing
|
|
sys.modules.setdefault("aiohttp", MagicMock())
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
from lib.eval_parse import (
|
|
VALID_ISSUE_TAGS,
|
|
_classify_issues,
|
|
_deterministic_tier,
|
|
_diff_contains_claim_type,
|
|
_extract_changed_files,
|
|
_filter_diff,
|
|
_infer_issues_from_prose,
|
|
_is_musings_only,
|
|
_normalize_tag,
|
|
_parse_batch_response,
|
|
_parse_issues,
|
|
_parse_verdict,
|
|
_validate_batch_fanout,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _filter_diff
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestFilterDiff:
|
|
def test_strips_inbox_files(self):
|
|
diff = (
|
|
"diff --git a/inbox/archive/foo.md b/inbox/archive/foo.md\n"
|
|
"+some content\n"
|
|
"diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n"
|
|
"+real content\n"
|
|
)
|
|
review_diff, entity_diff = _filter_diff(diff)
|
|
assert "inbox" not in review_diff
|
|
assert "claim.md" in review_diff
|
|
|
|
def test_separates_entity_diffs(self):
|
|
diff = (
|
|
"diff --git a/entities/people/alice.md b/entities/people/alice.md\n"
|
|
"+entity content\n"
|
|
"diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n"
|
|
"+claim content\n"
|
|
)
|
|
review_diff, entity_diff = _filter_diff(diff)
|
|
assert "alice.md" in entity_diff
|
|
assert "claim.md" in review_diff
|
|
|
|
def test_core_domain_entities_stay_in_review(self):
|
|
diff = (
|
|
"diff --git a/entities/living-agents/agent.md b/entities/living-agents/agent.md\n"
|
|
"+core entity\n"
|
|
)
|
|
review_diff, entity_diff = _filter_diff(diff)
|
|
assert "agent.md" in review_diff
|
|
assert entity_diff == ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _extract_changed_files
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestExtractChangedFiles:
|
|
def test_extracts_paths(self):
|
|
diff = (
|
|
"diff --git a/domains/foo.md b/domains/foo.md\n"
|
|
"+content\n"
|
|
"diff --git a/entities/bar.md b/entities/bar.md\n"
|
|
"+more\n"
|
|
)
|
|
result = _extract_changed_files(diff)
|
|
assert "domains/foo.md" in result
|
|
assert "entities/bar.md" in result
|
|
|
|
def test_empty_diff(self):
|
|
assert _extract_changed_files("") == ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _is_musings_only
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestIsMusingsOnly:
|
|
def test_musings_only(self):
|
|
diff = "diff --git a/agents/rio/musings/thought.md b/agents/rio/musings/thought.md\n+musing\n"
|
|
assert _is_musings_only(diff) is True
|
|
|
|
def test_mixed_files(self):
|
|
diff = (
|
|
"diff --git a/agents/rio/musings/thought.md b/agents/rio/musings/thought.md\n+musing\n"
|
|
"diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n+claim\n"
|
|
)
|
|
assert _is_musings_only(diff) is False
|
|
|
|
def test_no_musings(self):
|
|
diff = "diff --git a/domains/finance/claim.md b/domains/finance/claim.md\n+claim\n"
|
|
assert _is_musings_only(diff) is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _diff_contains_claim_type
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDiffContainsClaimType:
|
|
def test_detects_claim_type(self):
|
|
diff = "+type: claim\n+confidence: 0.8\n"
|
|
assert _diff_contains_claim_type(diff) is True
|
|
|
|
def test_ignores_non_claim(self):
|
|
diff = "+type: entity\n"
|
|
assert _diff_contains_claim_type(diff) is False
|
|
|
|
def test_ignores_file_header(self):
|
|
diff = "+++ b/type: claim\n"
|
|
assert _diff_contains_claim_type(diff) is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _deterministic_tier
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDeterministicTier:
|
|
def test_entities_only_is_light(self):
|
|
diff = "diff --git a/entities/people/alice.md b/entities/people/alice.md\n+data\n"
|
|
assert _deterministic_tier(diff) == "LIGHT"
|
|
|
|
def test_inbox_only_is_light(self):
|
|
diff = "diff --git a/inbox/archive/src.md b/inbox/archive/src.md\n+data\n"
|
|
assert _deterministic_tier(diff) == "LIGHT"
|
|
|
|
def test_core_is_deep(self):
|
|
diff = "diff --git a/core/schema.md b/core/schema.md\n+structural change\n"
|
|
assert _deterministic_tier(diff) == "DEEP"
|
|
|
|
def test_challenged_by_is_deep(self):
|
|
diff = "diff --git a/domains/x/claim.md b/domains/x/claim.md\n+challenged_by: some-claim\n"
|
|
assert _deterministic_tier(diff) == "DEEP"
|
|
|
|
def test_domain_claim_returns_none(self):
|
|
diff = "diff --git a/domains/finance/new-claim.md b/domains/finance/new-claim.md\n--- /dev/null\n+++ b/domains/finance/new-claim.md\n+type: claim\n"
|
|
assert _deterministic_tier(diff) is None
|
|
|
|
def test_empty_diff_returns_none(self):
|
|
assert _deterministic_tier("") is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _parse_verdict
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestParseVerdict:
|
|
def test_approve(self):
|
|
assert _parse_verdict("VERDICT:LEO:APPROVE", "LEO") == "approve"
|
|
|
|
def test_request_changes(self):
|
|
assert _parse_verdict("VERDICT:LEO:REQUEST_CHANGES", "LEO") == "request_changes"
|
|
|
|
def test_no_verdict_defaults_to_request_changes(self):
|
|
assert _parse_verdict("some review text", "LEO") == "request_changes"
|
|
|
|
def test_case_insensitive_reviewer(self):
|
|
assert _parse_verdict("VERDICT:LEO:APPROVE", "leo") == "approve"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _normalize_tag
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestNormalizeTag:
|
|
def test_valid_tag_passes_through(self):
|
|
assert _normalize_tag("broken_wiki_links") == "broken_wiki_links"
|
|
|
|
def test_alias_normalizes(self):
|
|
assert _normalize_tag("schema_violation") == "frontmatter_schema"
|
|
assert _normalize_tag("duplicate") == "near_duplicate"
|
|
assert _normalize_tag("factual_error") == "factual_discrepancy"
|
|
|
|
def test_hyphen_to_underscore(self):
|
|
assert _normalize_tag("unverified-wiki-links") == "broken_wiki_links"
|
|
|
|
def test_unknown_returns_none(self):
|
|
assert _normalize_tag("totally_made_up") is None
|
|
|
|
def test_fuzzy_substring_match(self):
|
|
assert _normalize_tag("wiki_links") == "broken_wiki_links"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _parse_issues
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestParseIssues:
|
|
def test_structured_comment(self):
|
|
text = "Review text. <!-- ISSUES: schema_violation, duplicate -->"
|
|
result = _parse_issues(text)
|
|
assert "frontmatter_schema" in result
|
|
assert "near_duplicate" in result
|
|
|
|
def test_fallback_to_prose(self):
|
|
text = "The frontmatter is missing required fields."
|
|
result = _parse_issues(text)
|
|
assert "frontmatter_schema" in result
|
|
|
|
def test_empty_structured_falls_back(self):
|
|
text = "Review. <!-- ISSUES: totally_unknown_tag -->"
|
|
# All tags unrecognizable, falls through to prose
|
|
result = _parse_issues(text)
|
|
# With no prose keywords either, should return empty
|
|
assert isinstance(result, list)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _infer_issues_from_prose
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestInferIssuesFromProse:
|
|
def test_detects_frontmatter_issues(self):
|
|
result = _infer_issues_from_prose("The YAML frontmatter is invalid.")
|
|
assert "frontmatter_schema" in result
|
|
|
|
def test_detects_wiki_link_issues(self):
|
|
result = _infer_issues_from_prose("Found broken wiki links in the claim.")
|
|
assert "broken_wiki_links" in result
|
|
|
|
def test_detects_near_duplicate(self):
|
|
result = _infer_issues_from_prose("This is a near-duplicate of an existing claim.")
|
|
assert "near_duplicate" in result
|
|
|
|
def test_no_matches(self):
|
|
result = _infer_issues_from_prose("This claim is well-structured and accurate.")
|
|
assert result == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _classify_issues
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestClassifyIssues:
|
|
def test_mechanical_only(self):
|
|
assert _classify_issues(["frontmatter_schema", "near_duplicate"]) == "mechanical"
|
|
|
|
def test_substantive_only(self):
|
|
assert _classify_issues(["factual_discrepancy"]) == "substantive"
|
|
|
|
def test_mixed(self):
|
|
assert _classify_issues(["frontmatter_schema", "factual_discrepancy"]) == "mixed"
|
|
|
|
def test_empty(self):
|
|
assert _classify_issues([]) == "unknown"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _parse_batch_response
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestParseBatchResponse:
|
|
def test_parses_multiple_prs(self):
|
|
response = (
|
|
"=== PR #100\nReview for 100\n<!-- PR:100 VERDICT:AGENT:APPROVE -->\n"
|
|
"=== PR #200\nReview for 200\n<!-- PR:200 VERDICT:AGENT:REQUEST_CHANGES -->\n"
|
|
)
|
|
result = _parse_batch_response(response, [100, 200], "agent")
|
|
assert 100 in result
|
|
assert 200 in result
|
|
assert "APPROVE" in result[100]
|
|
assert "REQUEST_CHANGES" in result[200]
|
|
|
|
def test_missing_pr(self):
|
|
response = "=== PR #100\nReview\n<!-- PR:100 VERDICT:AGENT:APPROVE -->\n"
|
|
result = _parse_batch_response(response, [100, 200], "agent")
|
|
assert 100 in result
|
|
assert 200 not in result
|
|
|
|
def test_empty_response(self):
|
|
result = _parse_batch_response("no verdicts here", [100], "agent")
|
|
assert result == {}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _validate_batch_fanout
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestValidateBatchFanout:
|
|
def test_missing_pr_falls_back(self):
|
|
parsed = {100: "review for 100"}
|
|
pr_diffs = [
|
|
{"number": 100, "diff": "diff --git a/domains/long-enough-path.md b/domains/long-enough-path.md\n"},
|
|
{"number": 200, "diff": "diff --git a/domains/other-long-path.md b/domains/other-long-path.md\n"},
|
|
]
|
|
valid, fallback = _validate_batch_fanout(parsed, pr_diffs, "agent")
|
|
assert 200 in fallback
|
|
assert 200 not in valid
|
|
|
|
def test_all_present_passes(self):
|
|
parsed = {
|
|
100: "review mentioning domains/long-enough-path.md",
|
|
200: "review mentioning domains/other-long-path.md",
|
|
}
|
|
pr_diffs = [
|
|
{"number": 100, "diff": "diff --git a/domains/long-enough-path.md b/domains/long-enough-path.md\n"},
|
|
{"number": 200, "diff": "diff --git a/domains/other-long-path.md b/domains/other-long-path.md\n"},
|
|
]
|
|
valid, fallback = _validate_batch_fanout(parsed, pr_diffs, "agent")
|
|
assert 100 in valid
|
|
assert 200 in valid
|
|
assert fallback == []
|