"""Tests for lib/eval_actions.py — async PR disposition actions.""" import asyncio import json import sqlite3 import sys import os from unittest.mock import AsyncMock, MagicMock, patch import pytest # Mock heavy dependencies before importing sys.modules.setdefault("aiohttp", MagicMock()) sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from lib.eval_actions import dispose_rejected_pr, post_formal_approvals, terminate_pr # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- def _make_db(): """Create a minimal in-memory DB with prs + sources tables.""" conn = sqlite3.connect(":memory:") conn.row_factory = sqlite3.Row conn.execute(""" CREATE TABLE prs ( number INTEGER PRIMARY KEY, source_path TEXT, branch TEXT, status TEXT NOT NULL DEFAULT 'open', domain TEXT, agent TEXT, auto_merge INTEGER DEFAULT 0, leo_verdict TEXT DEFAULT 'pending', domain_verdict TEXT DEFAULT 'pending', eval_attempts INTEGER DEFAULT 0, eval_issues TEXT DEFAULT '[]', merge_cycled INTEGER DEFAULT 0, merge_failures INTEGER DEFAULT 0, conflict_rebase_attempts INTEGER DEFAULT 0, last_error TEXT, merged_at TEXT, last_attempt TEXT, cost_usd REAL DEFAULT 0, created_at TEXT DEFAULT (datetime('now')) ) """) conn.execute(""" CREATE TABLE sources ( path TEXT PRIMARY KEY, status TEXT DEFAULT 'extracted', feedback TEXT, updated_at TEXT ) """) conn.execute(""" CREATE TABLE audit_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, stage TEXT, event TEXT, detail TEXT, ts TEXT DEFAULT (datetime('now')) ) """) return conn def _insert_pr(conn, number=100, status="open", source_path=None, eval_issues=None, **kwargs): """Insert a test PR row.""" cols = ["number", "status"] vals = [number, status] if source_path is not None: cols.append("source_path") vals.append(source_path) if eval_issues is not None: cols.append("eval_issues") vals.append(eval_issues) for k, v in kwargs.items(): cols.append(k) vals.append(v) placeholders = ", ".join("?" * len(cols)) conn.execute(f"INSERT INTO prs ({', '.join(cols)}) VALUES ({placeholders})", vals) conn.commit() def _run(coro): """Run async coroutine in sync test.""" return asyncio.run(coro) # --------------------------------------------------------------------------- # post_formal_approvals # --------------------------------------------------------------------------- class TestPostFormalApprovals: @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) @patch("lib.eval_actions.get_agent_token") def test_posts_two_approvals(self, mock_token, mock_api): mock_token.return_value = "fake-token" mock_api.return_value = {"id": 1} _run(post_formal_approvals(100, "epimetheus")) # Should have called forgejo_api exactly 2 times (first 2 agents that aren't author) assert mock_api.call_count == 2 @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) @patch("lib.eval_actions.get_agent_token") def test_skips_pr_author(self, mock_token, mock_api): mock_token.return_value = "fake-token" mock_api.return_value = {"id": 1} _run(post_formal_approvals(100, "leo")) # Leo is first in the list, should be skipped, next 2 agents used calls = mock_api.call_args_list for call in calls: assert "leo" not in str(call).lower() or "token" in str(call) @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) @patch("lib.eval_actions.get_agent_token") def test_handles_no_tokens(self, mock_token, mock_api): mock_token.return_value = None _run(post_formal_approvals(100, "someone")) # No tokens available — no API calls mock_api.assert_not_called() @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) @patch("lib.eval_actions.get_agent_token") def test_continues_on_api_failure(self, mock_token, mock_api): mock_token.return_value = "fake-token" # First call fails (returns None), subsequent succeed mock_api.side_effect = [None, {"id": 1}, {"id": 2}, {"id": 3}] _run(post_formal_approvals(100, "nobody")) # Should keep trying until 2 successes assert mock_api.call_count >= 3 # --------------------------------------------------------------------------- # terminate_pr # --------------------------------------------------------------------------- class TestTerminatePr: @patch("lib.eval_actions.close_pr", new_callable=AsyncMock) @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_closes_pr_and_requeues_source(self, mock_api, mock_close): mock_api.return_value = {"id": 1} mock_close.return_value = True conn = _make_db() _insert_pr(conn, 100, source_path="inbox/queue/test.md", eval_issues='["factual_discrepancy"]') conn.execute("INSERT INTO sources (path, status) VALUES (?, ?)", ("inbox/queue/test.md", "extracted")) conn.commit() _run(terminate_pr(conn, 100, "test reason")) # close_pr called mock_close.assert_called_once() # Source requeued src = conn.execute("SELECT status FROM sources WHERE path = ?", ("inbox/queue/test.md",)).fetchone() assert src["status"] == "needs_reextraction" # Audit logged audit = conn.execute("SELECT * FROM audit_log WHERE event = 'pr_terminated'").fetchone() assert audit is not None @patch("lib.eval_actions.close_pr", new_callable=AsyncMock) @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_skips_requeue_on_close_failure(self, mock_api, mock_close): mock_api.return_value = {"id": 1} mock_close.return_value = False conn = _make_db() _insert_pr(conn, 100, source_path="inbox/queue/test.md") conn.execute("INSERT INTO sources (path, status) VALUES (?, ?)", ("inbox/queue/test.md", "extracted")) conn.commit() _run(terminate_pr(conn, 100, "test reason")) # Source NOT requeued src = conn.execute("SELECT status FROM sources WHERE path = ?", ("inbox/queue/test.md",)).fetchone() assert src["status"] == "extracted" # No audit for pr_terminated audit = conn.execute("SELECT * FROM audit_log WHERE event = 'pr_terminated'").fetchone() assert audit is None @patch("lib.eval_actions.close_pr", new_callable=AsyncMock) @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_posts_structured_feedback_with_issues(self, mock_api, mock_close): mock_api.return_value = {"id": 1} mock_close.return_value = True conn = _make_db() _insert_pr(conn, 100, eval_issues='["frontmatter_schema", "near_duplicate"]') _run(terminate_pr(conn, 100, "budget exhausted")) # Comment posted with structured feedback comment_call = mock_api.call_args_list[0] body = comment_call[0][2]["body"] assert "Closed by eval pipeline" in body @patch("lib.eval_actions.close_pr", new_callable=AsyncMock) @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_handles_no_source_path(self, mock_api, mock_close): mock_api.return_value = {"id": 1} mock_close.return_value = True conn = _make_db() _insert_pr(conn, 100) # No source_path _run(terminate_pr(conn, 100, "no source")) # Should not crash, audit should still be logged audit = conn.execute("SELECT * FROM audit_log WHERE event = 'pr_terminated'").fetchone() assert audit is not None # --------------------------------------------------------------------------- # dispose_rejected_pr # --------------------------------------------------------------------------- class TestDisposeRejectedPr: @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_attempt_1_posts_feedback_only(self, mock_api): mock_api.return_value = {"id": 1} conn = _make_db() _insert_pr(conn, 100) _run(dispose_rejected_pr(conn, 100, eval_attempts=1, all_issues=["factual_discrepancy"])) # Should post feedback comment but NOT close assert mock_api.call_count == 1 # Just the comment pr = conn.execute("SELECT status FROM prs WHERE number = 100").fetchone() assert pr["status"] == "open" # Still open @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_attempt_1_no_issues_no_action(self, mock_api): conn = _make_db() _insert_pr(conn, 100) _run(dispose_rejected_pr(conn, 100, eval_attempts=1, all_issues=[])) mock_api.assert_not_called() @patch("lib.eval_actions.terminate_pr", new_callable=AsyncMock) @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_attempt_max_terminates(self, mock_api, mock_terminate): conn = _make_db() _insert_pr(conn, 100) _run(dispose_rejected_pr(conn, 100, eval_attempts=3, all_issues=["factual_discrepancy"])) mock_terminate.assert_called_once() assert "eval budget exhausted" in mock_terminate.call_args[0][2] @patch("lib.eval_actions.terminate_pr", new_callable=AsyncMock) @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_attempt_2_mechanical_keeps_open(self, mock_api, mock_terminate): conn = _make_db() _insert_pr(conn, 100) _run(dispose_rejected_pr(conn, 100, eval_attempts=2, all_issues=["frontmatter_schema"])) # Should NOT terminate — mechanical issues get one more try mock_terminate.assert_not_called() # Should log audit audit = conn.execute("SELECT * FROM audit_log WHERE event = 'mechanical_retry'").fetchone() assert audit is not None @patch("lib.eval_actions.terminate_pr", new_callable=AsyncMock) @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_attempt_2_substantive_terminates(self, mock_api, mock_terminate): conn = _make_db() _insert_pr(conn, 100) _run(dispose_rejected_pr(conn, 100, eval_attempts=2, all_issues=["factual_discrepancy"])) mock_terminate.assert_called_once() assert "substantive issues" in mock_terminate.call_args[0][2] @patch("lib.eval_actions.terminate_pr", new_callable=AsyncMock) @patch("lib.eval_actions.forgejo_api", new_callable=AsyncMock) def test_attempt_2_mixed_terminates(self, mock_api, mock_terminate): conn = _make_db() _insert_pr(conn, 100) _run(dispose_rejected_pr(conn, 100, eval_attempts=2, all_issues=["frontmatter_schema", "factual_discrepancy"])) mock_terminate.assert_called_once()