Pipeline reliability (8 fixes, reviewed by Ganymede+Rhea+Leo+Rio):
1. Merge API recovery — pre-flight approval check, transient/permanent distinction, jitter
2. Ghost PR detection — ls-remote branch check in reconciliation, network guard
3. Source status contract — directory IS status, no code change needed
4. Batch-state markers eliminated — two-gate skip (archive-check + batched branch-check)
5. Branch SHA tracking — batched ls-remote, auto-reset verdicts, dismiss stale reviews
6. Mirror pre-flight permissions — chown check in sync-mirror.sh
7. Telegram archive commit-after-write — git add/commit/push with rebase --abort fallback
8. Post-merge source archiving — queue/ → archive/{domain}/ after merge
Pipeline fixes:
- merge_cycled flag — eval attempts preserved during merge-failure cycling (Ganymede+Rhea)
- merge_failures diagnostic counter
- Startup recovery preserves eval_attempts (was incorrectly resetting to 0)
- No-diff PRs auto-closed by eval (root cause of 17 zombie PRs)
- GC threshold aligned with substantive fixer budget (was 2, now 4)
- Conflict retry with 3-attempt budget + permanent conflict handler
- Local ff-merge fallback for Forgejo 405 errors
Telegram bot:
- KB retrieval: 3-layer (entity resolution → claim search → agent context)
- Reply-to-bot handler (context.bot.id check)
- Tag regex: @teleo|@futairdbot
- Prompt rewrite for natural analyst voice
- Market data API integration (Ben's token price endpoint)
- Conversation windows (5-message unanswered counter, per-user-per-chat)
- Conversation history in prompt (last 5 exchanges)
- Worktree file lock for archive writes
Infrastructure:
- worktree_lock.py — file-based lock (flock) for main worktree coordination
- backfill-sources.py — source DB registration for Argus funnel
- batch-extract-50.sh v3 — two-gate skip, batched ls-remote, network guard
- sync-mirror.sh — auto-PR creation for mirrored GitHub branches, permission pre-flight
- Argus dashboard — conflicts + reviewing in backlog, queue count in funnel
- Enrichment-inside-frontmatter bug fix (regex anchor, not --- split)
Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
147 lines
6 KiB
Python
147 lines
6 KiB
Python
"""Tests for structured rejection feedback system."""
|
|
|
|
import json
|
|
|
|
import pytest
|
|
|
|
from lib.feedback import (
|
|
QUALITY_GATES,
|
|
format_rejection_comment,
|
|
get_agent_error_patterns,
|
|
parse_rejection_comment,
|
|
)
|
|
|
|
|
|
# ─── Quality gate coverage ─────────────────────────────────────────────────
|
|
|
|
|
|
class TestQualityGates:
|
|
def test_all_eval_tags_have_gates(self):
|
|
"""Every issue tag used by evaluate.py should have a quality gate entry."""
|
|
eval_tags = {
|
|
"broken_wiki_links", "frontmatter_schema", "title_overclaims",
|
|
"confidence_miscalibration", "date_errors", "factual_discrepancy",
|
|
"near_duplicate", "scope_error",
|
|
}
|
|
for tag in eval_tags:
|
|
assert tag in QUALITY_GATES, f"Missing quality gate for eval tag: {tag}"
|
|
|
|
def test_post_extract_tags_have_gates(self):
|
|
"""Issue tags from post_extract.py should also have quality gate entries."""
|
|
post_extract_tags = {
|
|
"opsec_internal_deal_terms", "body_too_thin",
|
|
"title_too_few_words", "title_not_proposition",
|
|
}
|
|
for tag in post_extract_tags:
|
|
assert tag in QUALITY_GATES, f"Missing quality gate for post_extract tag: {tag}"
|
|
|
|
def test_every_gate_has_required_fields(self):
|
|
for tag, gate in QUALITY_GATES.items():
|
|
assert "gate" in gate, f"{tag} missing 'gate'"
|
|
assert "description" in gate, f"{tag} missing 'description'"
|
|
assert "fix" in gate, f"{tag} missing 'fix'"
|
|
assert "severity" in gate, f"{tag} missing 'severity'"
|
|
assert gate["severity"] in ("blocking", "warning"), f"{tag} invalid severity"
|
|
|
|
|
|
# ─── format_rejection_comment ──────────────────────────────────────────────
|
|
|
|
|
|
class TestFormatRejectionComment:
|
|
def test_single_blocking_issue(self):
|
|
comment = format_rejection_comment(["frontmatter_schema"])
|
|
assert "<!-- REJECTION:" in comment
|
|
assert "BLOCK" in comment
|
|
assert "Schema compliance" in comment
|
|
assert "Fix:" in comment
|
|
|
|
def test_multiple_issues(self):
|
|
comment = format_rejection_comment(
|
|
["frontmatter_schema", "confidence_miscalibration", "broken_wiki_links"]
|
|
)
|
|
assert "2 blocking" in comment # frontmatter + confidence
|
|
assert "BLOCK" in comment
|
|
assert "WARN" in comment # wiki links
|
|
|
|
def test_warning_only(self):
|
|
comment = format_rejection_comment(["broken_wiki_links", "near_duplicate"])
|
|
assert "Warnings" in comment
|
|
assert "Rejected" not in comment
|
|
|
|
def test_machine_readable_block(self):
|
|
comment = format_rejection_comment(["scope_error"], source="tier0")
|
|
data = parse_rejection_comment(comment)
|
|
assert data is not None
|
|
assert data["issues"] == ["scope_error"]
|
|
assert data["source"] == "tier0"
|
|
assert "ts" in data
|
|
|
|
def test_unknown_tag_handled(self):
|
|
comment = format_rejection_comment(["unknown_tag"])
|
|
assert "unknown_tag" in comment # doesn't crash
|
|
|
|
|
|
# ─── parse_rejection_comment ───────────────────────────────────────────────
|
|
|
|
|
|
class TestParseRejectionComment:
|
|
def test_parse_valid(self):
|
|
body = '<!-- REJECTION: {"issues": ["scope_error"], "source": "eval"} -->\n\nSome text'
|
|
data = parse_rejection_comment(body)
|
|
assert data["issues"] == ["scope_error"]
|
|
|
|
def test_parse_no_rejection(self):
|
|
assert parse_rejection_comment("Just a normal comment") is None
|
|
|
|
def test_parse_malformed_json(self):
|
|
assert parse_rejection_comment("<!-- REJECTION: {bad json} -->") is None
|
|
|
|
|
|
# ─── get_agent_error_patterns ──────────────────────────────────────────────
|
|
|
|
|
|
class TestAgentErrorPatterns:
|
|
def test_empty_agent(self, conn):
|
|
result = get_agent_error_patterns(conn, "rio")
|
|
assert result["total_prs"] == 0
|
|
assert result["trend"] == "no_data"
|
|
|
|
def test_agent_with_rejections(self, conn):
|
|
# Insert some test PRs
|
|
conn.execute(
|
|
"""INSERT INTO prs (number, branch, status, agent, eval_issues, last_attempt, domain)
|
|
VALUES (1, 'rio/test-1', 'closed', 'rio', '["frontmatter_schema", "confidence_miscalibration"]',
|
|
datetime('now'), 'internet-finance')"""
|
|
)
|
|
conn.execute(
|
|
"""INSERT INTO prs (number, branch, status, agent, eval_issues, last_attempt, domain)
|
|
VALUES (2, 'rio/test-2', 'merged', 'rio', '[]',
|
|
datetime('now'), 'internet-finance')"""
|
|
)
|
|
conn.execute(
|
|
"""INSERT INTO prs (number, branch, status, agent, eval_issues, last_attempt, domain)
|
|
VALUES (3, 'rio/test-3', 'closed', 'rio', '["frontmatter_schema"]',
|
|
datetime('now'), 'internet-finance')"""
|
|
)
|
|
|
|
result = get_agent_error_patterns(conn, "rio")
|
|
assert result["total_prs"] == 3
|
|
assert result["rejected_prs"] == 2
|
|
assert result["approval_rate"] == round(1/3, 3)
|
|
|
|
# frontmatter_schema should be top issue (appears in 2 PRs)
|
|
top = result["top_issues"]
|
|
assert len(top) > 0
|
|
assert top[0]["tag"] == "frontmatter_schema"
|
|
assert top[0]["count"] == 2
|
|
assert "fix" in top[0] # Guidance included
|
|
|
|
def test_agent_with_all_approvals(self, conn):
|
|
conn.execute(
|
|
"""INSERT INTO prs (number, branch, status, agent, eval_issues, last_attempt, domain)
|
|
VALUES (1, 'clay/test-1', 'merged', 'clay', '[]', datetime('now'), 'entertainment')"""
|
|
)
|
|
result = get_agent_error_patterns(conn, "clay")
|
|
assert result["total_prs"] == 1
|
|
assert result["rejected_prs"] == 0
|
|
assert result["approval_rate"] == 1.0
|