Some checks are pending
CI / lint-and-test (push) Waiting to run
Three changes: 1. Drop underscore prefixes in eval_parse.py — functions are now the public API of the module (filter_diff, parse_verdict, classify_issues, etc.). All 12 functions renamed, imports updated in evaluate.py and tests. 2. Extract eval_actions.py from evaluate.py — 3 async PR disposition functions: - post_formal_approvals: submit Forgejo reviews from 2 agents - terminate_pr: close PR, post rejection comment, requeue source - dispose_rejected_pr: disposition logic for rejected PRs on attempt 2+ evaluate.py drops from ~1140 to 911 lines. 3. 14 new tests in test_eval_actions.py covering all three functions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
290 lines
11 KiB
Python
290 lines
11 KiB
Python
"""Tests for lib/eval_actions.py — async PR disposition actions."""
|
|
|
|
import asyncio
|
|
import json
|
|
import sqlite3
|
|
import sys
|
|
import os
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
# Mock heavy dependencies before importing
|
|
sys.modules.setdefault("aiohttp", MagicMock())
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
from lib.eval_actions import dispose_rejected_pr, post_formal_approvals, terminate_pr
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_db():
|
|
"""Create a minimal in-memory DB with prs + sources tables."""
|
|
conn = sqlite3.connect(":memory:")
|
|
conn.row_factory = sqlite3.Row
|
|
conn.execute("""
|
|
CREATE TABLE prs (
|
|
number INTEGER PRIMARY KEY,
|
|
source_path TEXT,
|
|
branch TEXT,
|
|
status TEXT NOT NULL DEFAULT 'open',
|
|
domain TEXT,
|
|
agent TEXT,
|
|
auto_merge INTEGER DEFAULT 0,
|
|
leo_verdict TEXT DEFAULT 'pending',
|
|
domain_verdict TEXT DEFAULT 'pending',
|
|
eval_attempts INTEGER DEFAULT 0,
|
|
eval_issues TEXT DEFAULT '[]',
|
|
merge_cycled INTEGER DEFAULT 0,
|
|
merge_failures INTEGER DEFAULT 0,
|
|
conflict_rebase_attempts INTEGER DEFAULT 0,
|
|
last_error TEXT,
|
|
merged_at TEXT,
|
|
last_attempt TEXT,
|
|
cost_usd REAL DEFAULT 0,
|
|
created_at TEXT DEFAULT (datetime('now'))
|
|
)
|
|
""")
|
|
conn.execute("""
|
|
CREATE TABLE sources (
|
|
path TEXT PRIMARY KEY,
|
|
status TEXT DEFAULT 'extracted',
|
|
feedback TEXT,
|
|
updated_at TEXT
|
|
)
|
|
""")
|
|
conn.execute("""
|
|
CREATE TABLE audit_log (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
stage TEXT,
|
|
event TEXT,
|
|
detail TEXT,
|
|
ts TEXT DEFAULT (datetime('now'))
|
|
)
|
|
""")
|
|
return conn
|
|
|
|
|
|
def _insert_pr(conn, number=100, status="open", source_path=None, eval_issues=None, **kwargs):
|
|
"""Insert a test PR row."""
|
|
cols = ["number", "status"]
|
|
vals = [number, status]
|
|
if source_path is not None:
|
|
cols.append("source_path")
|
|
vals.append(source_path)
|
|
if eval_issues is not None:
|
|
cols.append("eval_issues")
|
|
vals.append(eval_issues)
|
|
for k, v in kwargs.items():
|
|
cols.append(k)
|
|
vals.append(v)
|
|
placeholders = ", ".join("?" * len(cols))
|
|
conn.execute(f"INSERT INTO prs ({', '.join(cols)}) VALUES ({placeholders})", vals)
|
|
conn.commit()
|
|
|
|
|
|
def _run(coro):
|
|
"""Run async coroutine in sync test."""
|
|
return asyncio.run(coro)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# post_formal_approvals
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestPostFormalApprovals:
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.get_agent_token")
|
|
def test_posts_two_approvals(self, mock_token, mock_api):
|
|
mock_token.return_value = "fake-token"
|
|
mock_api.return_value = {"id": 1}
|
|
_run(post_formal_approvals(100, "epimetheus"))
|
|
# Should have called forgejo_api exactly 2 times (first 2 agents that aren't author)
|
|
assert mock_api.call_count == 2
|
|
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.get_agent_token")
|
|
def test_skips_pr_author(self, mock_token, mock_api):
|
|
mock_token.return_value = "fake-token"
|
|
mock_api.return_value = {"id": 1}
|
|
_run(post_formal_approvals(100, "leo"))
|
|
# Leo is first in the list, should be skipped, next 2 agents used
|
|
calls = mock_api.call_args_list
|
|
for call in calls:
|
|
assert "leo" not in str(call).lower() or "token" in str(call)
|
|
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.get_agent_token")
|
|
def test_handles_no_tokens(self, mock_token, mock_api):
|
|
mock_token.return_value = None
|
|
_run(post_formal_approvals(100, "someone"))
|
|
# No tokens available — no API calls
|
|
mock_api.assert_not_called()
|
|
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.get_agent_token")
|
|
def test_continues_on_api_failure(self, mock_token, mock_api):
|
|
mock_token.return_value = "fake-token"
|
|
# First call fails (returns None), subsequent succeed
|
|
mock_api.side_effect = [None, {"id": 1}, {"id": 2}, {"id": 3}]
|
|
_run(post_formal_approvals(100, "nobody"))
|
|
# Should keep trying until 2 successes
|
|
assert mock_api.call_count >= 3
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# terminate_pr
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestTerminatePr:
|
|
@patch("lib.eval_actions.close_pr", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_closes_pr_and_requeues_source(self, mock_api, mock_close):
|
|
mock_api.return_value = {"id": 1}
|
|
mock_close.return_value = True
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100, source_path="inbox/queue/test.md",
|
|
eval_issues='["factual_discrepancy"]')
|
|
conn.execute("INSERT INTO sources (path, status) VALUES (?, ?)",
|
|
("inbox/queue/test.md", "extracted"))
|
|
conn.commit()
|
|
|
|
_run(terminate_pr(conn, 100, "test reason"))
|
|
|
|
# close_pr called
|
|
mock_close.assert_called_once()
|
|
# Source requeued
|
|
src = conn.execute("SELECT status FROM sources WHERE path = ?",
|
|
("inbox/queue/test.md",)).fetchone()
|
|
assert src["status"] == "needs_reextraction"
|
|
# Audit logged
|
|
audit = conn.execute("SELECT * FROM audit_log WHERE event = 'pr_terminated'").fetchone()
|
|
assert audit is not None
|
|
|
|
@patch("lib.eval_actions.close_pr", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_skips_requeue_on_close_failure(self, mock_api, mock_close):
|
|
mock_api.return_value = {"id": 1}
|
|
mock_close.return_value = False
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100, source_path="inbox/queue/test.md")
|
|
conn.execute("INSERT INTO sources (path, status) VALUES (?, ?)",
|
|
("inbox/queue/test.md", "extracted"))
|
|
conn.commit()
|
|
|
|
_run(terminate_pr(conn, 100, "test reason"))
|
|
|
|
# Source NOT requeued
|
|
src = conn.execute("SELECT status FROM sources WHERE path = ?",
|
|
("inbox/queue/test.md",)).fetchone()
|
|
assert src["status"] == "extracted"
|
|
# No audit for pr_terminated
|
|
audit = conn.execute("SELECT * FROM audit_log WHERE event = 'pr_terminated'").fetchone()
|
|
assert audit is None
|
|
|
|
@patch("lib.eval_actions.close_pr", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_posts_structured_feedback_with_issues(self, mock_api, mock_close):
|
|
mock_api.return_value = {"id": 1}
|
|
mock_close.return_value = True
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100, eval_issues='["frontmatter_schema", "near_duplicate"]')
|
|
|
|
_run(terminate_pr(conn, 100, "budget exhausted"))
|
|
|
|
# Comment posted with structured feedback
|
|
comment_call = mock_api.call_args_list[0]
|
|
body = comment_call[0][2]["body"]
|
|
assert "Closed by eval pipeline" in body
|
|
|
|
@patch("lib.eval_actions.close_pr", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_handles_no_source_path(self, mock_api, mock_close):
|
|
mock_api.return_value = {"id": 1}
|
|
mock_close.return_value = True
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100) # No source_path
|
|
|
|
_run(terminate_pr(conn, 100, "no source"))
|
|
|
|
# Should not crash, audit should still be logged
|
|
audit = conn.execute("SELECT * FROM audit_log WHERE event = 'pr_terminated'").fetchone()
|
|
assert audit is not None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# dispose_rejected_pr
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDisposeRejectedPr:
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_attempt_1_posts_feedback_only(self, mock_api):
|
|
mock_api.return_value = {"id": 1}
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100)
|
|
|
|
_run(dispose_rejected_pr(conn, 100, eval_attempts=1, all_issues=["factual_discrepancy"]))
|
|
|
|
# Should post feedback comment but NOT close
|
|
assert mock_api.call_count == 1 # Just the comment
|
|
pr = conn.execute("SELECT status FROM prs WHERE number = 100").fetchone()
|
|
assert pr["status"] == "open" # Still open
|
|
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_attempt_1_no_issues_no_action(self, mock_api):
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100)
|
|
|
|
_run(dispose_rejected_pr(conn, 100, eval_attempts=1, all_issues=[]))
|
|
|
|
mock_api.assert_not_called()
|
|
|
|
@patch("lib.eval_actions.terminate_pr", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_attempt_max_terminates(self, mock_api, mock_terminate):
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100)
|
|
|
|
_run(dispose_rejected_pr(conn, 100, eval_attempts=3, all_issues=["factual_discrepancy"]))
|
|
|
|
mock_terminate.assert_called_once()
|
|
assert "eval budget exhausted" in mock_terminate.call_args[0][2]
|
|
|
|
@patch("lib.eval_actions.terminate_pr", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_attempt_2_mechanical_keeps_open(self, mock_api, mock_terminate):
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100)
|
|
|
|
_run(dispose_rejected_pr(conn, 100, eval_attempts=2, all_issues=["frontmatter_schema"]))
|
|
|
|
# Should NOT terminate — mechanical issues get one more try
|
|
mock_terminate.assert_not_called()
|
|
# Should log audit
|
|
audit = conn.execute("SELECT * FROM audit_log WHERE event = 'mechanical_retry'").fetchone()
|
|
assert audit is not None
|
|
|
|
@patch("lib.eval_actions.terminate_pr", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_attempt_2_substantive_terminates(self, mock_api, mock_terminate):
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100)
|
|
|
|
_run(dispose_rejected_pr(conn, 100, eval_attempts=2, all_issues=["factual_discrepancy"]))
|
|
|
|
mock_terminate.assert_called_once()
|
|
assert "substantive issues" in mock_terminate.call_args[0][2]
|
|
|
|
@patch("lib.eval_actions.terminate_pr", new_callable=AsyncMock)
|
|
@patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock)
|
|
def test_attempt_2_mixed_terminates(self, mock_api, mock_terminate):
|
|
conn = _make_db()
|
|
_insert_pr(conn, 100)
|
|
|
|
_run(dispose_rejected_pr(conn, 100, eval_attempts=2,
|
|
all_issues=["frontmatter_schema", "factual_discrepancy"]))
|
|
|
|
mock_terminate.assert_called_once()
|