Pipeline reliability (8 fixes, reviewed by Ganymede+Rhea+Leo+Rio):
1. Merge API recovery — pre-flight approval check, transient/permanent distinction, jitter
2. Ghost PR detection — ls-remote branch check in reconciliation, network guard
3. Source status contract — directory IS status, no code change needed
4. Batch-state markers eliminated — two-gate skip (archive-check + batched branch-check)
5. Branch SHA tracking — batched ls-remote, auto-reset verdicts, dismiss stale reviews
6. Mirror pre-flight permissions — chown check in sync-mirror.sh
7. Telegram archive commit-after-write — git add/commit/push with rebase --abort fallback
8. Post-merge source archiving — queue/ → archive/{domain}/ after merge
Pipeline fixes:
- merge_cycled flag — eval attempts preserved during merge-failure cycling (Ganymede+Rhea)
- merge_failures diagnostic counter
- Startup recovery preserves eval_attempts (was incorrectly resetting to 0)
- No-diff PRs auto-closed by eval (root cause of 17 zombie PRs)
- GC threshold aligned with substantive fixer budget (was 2, now 4)
- Conflict retry with 3-attempt budget + permanent conflict handler
- Local ff-merge fallback for Forgejo 405 errors
Telegram bot:
- KB retrieval: 3-layer (entity resolution → claim search → agent context)
- Reply-to-bot handler (context.bot.id check)
- Tag regex: @teleo|@futairdbot
- Prompt rewrite for natural analyst voice
- Market data API integration (Ben's token price endpoint)
- Conversation windows (5-message unanswered counter, per-user-per-chat)
- Conversation history in prompt (last 5 exchanges)
- Worktree file lock for archive writes
Infrastructure:
- worktree_lock.py — file-based lock (flock) for main worktree coordination
- backfill-sources.py — source DB registration for Argus funnel
- batch-extract-50.sh v3 — two-gate skip, batched ls-remote, network guard
- sync-mirror.sh — auto-PR creation for mirrored GitHub branches, permission pre-flight
- Argus dashboard — conflicts + reviewing in backlog, queue count in funnel
- Enrichment-inside-frontmatter bug fix (regex anchor, not --- split)
Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
273 lines
11 KiB
Python
273 lines
11 KiB
Python
"""Structured rejection feedback — closes the loop for proposer agents.
|
|
|
|
Maps issue tags to CLAUDE.md quality gates with actionable guidance.
|
|
Tracks per-agent error patterns. Provides agent-queryable rejection history.
|
|
|
|
Problem: Proposer agents (Rio, Clay, etc.) get generic PR comments when
|
|
claims are rejected. They can't tell what specifically failed, so they
|
|
repeat the same mistakes. Rio: "I have to read the full review comment
|
|
and infer what to fix."
|
|
|
|
Solution: Machine-readable rejection codes in PR comments + per-agent
|
|
error pattern tracking on /metrics + agent feedback endpoint.
|
|
|
|
Epimetheus owns this module. Leo reviews changes.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
from datetime import datetime, timezone
|
|
|
|
logger = logging.getLogger("pipeline.feedback")
|
|
|
|
# ─── Quality Gate Mapping ──────────────────────────────────────────────────
|
|
#
|
|
# Maps each issue tag to its CLAUDE.md quality gate, with actionable guidance
|
|
# for the proposer agent. The "gate" field references the specific checklist
|
|
# item in CLAUDE.md. The "fix" field tells the agent exactly what to change.
|
|
|
|
QUALITY_GATES: dict[str, dict] = {
|
|
"frontmatter_schema": {
|
|
"gate": "Schema compliance",
|
|
"description": "Missing or invalid YAML frontmatter fields",
|
|
"fix": "Ensure all 6 required fields: type, domain, description, confidence, source, created. "
|
|
"Use exact field names (not source_archive, not claim).",
|
|
"severity": "blocking",
|
|
"auto_fixable": True,
|
|
},
|
|
"broken_wiki_links": {
|
|
"gate": "Wiki link validity",
|
|
"description": "[[wiki links]] reference files that don't exist in the KB",
|
|
"fix": "Only link to files listed in the KB index. If a claim doesn't exist yet, "
|
|
"omit the link or use <!-- claim pending: description -->.",
|
|
"severity": "warning",
|
|
"auto_fixable": True,
|
|
},
|
|
"title_overclaims": {
|
|
"gate": "Title precision",
|
|
"description": "Title asserts more than the evidence supports",
|
|
"fix": "Scope the title to match the evidence strength. Single source = "
|
|
"'X suggests Y' not 'X proves Y'. Name the specific mechanism.",
|
|
"severity": "blocking",
|
|
"auto_fixable": False,
|
|
},
|
|
"confidence_miscalibration": {
|
|
"gate": "Confidence calibration",
|
|
"description": "Confidence level doesn't match evidence strength",
|
|
"fix": "Single source = experimental max. 3+ corroborating sources with data = likely. "
|
|
"Pitch rhetoric or self-reported metrics = speculative. "
|
|
"proven requires multiple independent confirmations.",
|
|
"severity": "blocking",
|
|
"auto_fixable": False,
|
|
},
|
|
"date_errors": {
|
|
"gate": "Date accuracy",
|
|
"description": "Invalid or incorrect date format in created field",
|
|
"fix": "created = extraction date (today), not source publication date. Format: YYYY-MM-DD.",
|
|
"severity": "blocking",
|
|
"auto_fixable": True,
|
|
},
|
|
"factual_discrepancy": {
|
|
"gate": "Factual accuracy",
|
|
"description": "Claim contains factual errors or misrepresents source material",
|
|
"fix": "Re-read the source. Verify specific numbers, names, dates. "
|
|
"If source X quotes source Y, attribute to Y.",
|
|
"severity": "blocking",
|
|
"auto_fixable": False,
|
|
},
|
|
"near_duplicate": {
|
|
"gate": "Duplicate check",
|
|
"description": "Substantially similar claim already exists in KB",
|
|
"fix": "Check KB index before extracting. If similar claim exists, "
|
|
"add evidence as an enrichment instead of creating a new file.",
|
|
"severity": "warning",
|
|
"auto_fixable": False,
|
|
},
|
|
"scope_error": {
|
|
"gate": "Scope qualification",
|
|
"description": "Claim uses unscoped universals or is too vague to disagree with",
|
|
"fix": "Specify: structural vs functional, micro vs macro, causal vs correlational. "
|
|
"Replace 'always/never/the fundamental' with scoped language.",
|
|
"severity": "blocking",
|
|
"auto_fixable": False,
|
|
},
|
|
"opsec_internal_deal_terms": {
|
|
"gate": "OPSEC",
|
|
"description": "Claim contains internal LivingIP/Teleo deal terms",
|
|
"fix": "Never extract specific dollar amounts, valuations, equity percentages, "
|
|
"or deal terms for LivingIP/Teleo. General market data is fine.",
|
|
"severity": "blocking",
|
|
"auto_fixable": False,
|
|
},
|
|
"body_too_thin": {
|
|
"gate": "Evidence quality",
|
|
"description": "Claim body lacks substantive argument or evidence",
|
|
"fix": "The body must explain WHY the claim is supported with specific data, "
|
|
"quotes, or studies from the source. A body that restates the title is not enough.",
|
|
"severity": "blocking",
|
|
"auto_fixable": False,
|
|
},
|
|
"title_too_few_words": {
|
|
"gate": "Title precision",
|
|
"description": "Title is too short to be a specific, disagreeable proposition",
|
|
"fix": "Minimum 4 words. Name the specific mechanism and outcome. "
|
|
"Bad: 'futarchy works'. Good: 'futarchy is manipulation-resistant because "
|
|
"attack attempts create profitable opportunities for defenders'.",
|
|
"severity": "blocking",
|
|
"auto_fixable": False,
|
|
},
|
|
"title_not_proposition": {
|
|
"gate": "Title precision",
|
|
"description": "Title reads as a label, not an arguable proposition",
|
|
"fix": "The title must contain a verb and read as a complete sentence. "
|
|
"Test: 'This note argues that [title]' must work grammatically.",
|
|
"severity": "blocking",
|
|
"auto_fixable": False,
|
|
},
|
|
}
|
|
|
|
|
|
# ─── Feedback Formatting ──────────────────────────────────────────────────
|
|
|
|
|
|
def format_rejection_comment(
|
|
issues: list[str],
|
|
source: str = "validator",
|
|
) -> str:
|
|
"""Format a structured rejection comment for a PR.
|
|
|
|
Includes machine-readable tags AND human-readable guidance.
|
|
Agents can parse the <!-- REJECTION: --> block programmatically.
|
|
"""
|
|
lines = []
|
|
|
|
# Machine-readable block (agents parse this)
|
|
rejection_data = {
|
|
"issues": issues,
|
|
"source": source,
|
|
"ts": datetime.now(timezone.utc).isoformat(),
|
|
}
|
|
lines.append(f"<!-- REJECTION: {json.dumps(rejection_data)} -->")
|
|
lines.append("")
|
|
|
|
# Human-readable summary
|
|
blocking = [i for i in issues if QUALITY_GATES.get(i, {}).get("severity") == "blocking"]
|
|
warnings = [i for i in issues if QUALITY_GATES.get(i, {}).get("severity") == "warning"]
|
|
|
|
if blocking:
|
|
lines.append(f"**Rejected** — {len(blocking)} blocking issue{'s' if len(blocking) > 1 else ''}\n")
|
|
elif warnings:
|
|
lines.append(f"**Warnings** — {len(warnings)} non-blocking issue{'s' if len(warnings) > 1 else ''}\n")
|
|
|
|
# Per-issue guidance
|
|
for tag in issues:
|
|
gate = QUALITY_GATES.get(tag, {})
|
|
severity = gate.get("severity", "unknown")
|
|
icon = "BLOCK" if severity == "blocking" else "WARN"
|
|
gate_name = gate.get("gate", tag)
|
|
description = gate.get("description", tag)
|
|
fix = gate.get("fix", "See CLAUDE.md quality gates.")
|
|
auto = " (auto-fixable)" if gate.get("auto_fixable") else ""
|
|
|
|
lines.append(f"**[{icon}] {gate_name}**: {description}{auto}")
|
|
lines.append(f" - Fix: {fix}")
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def parse_rejection_comment(comment_body: str) -> dict | None:
|
|
"""Parse a structured rejection comment. Returns rejection data or None."""
|
|
match = re.search(r"<!-- REJECTION: ({.+?}) -->", comment_body)
|
|
if match:
|
|
try:
|
|
return json.loads(match.group(1))
|
|
except json.JSONDecodeError:
|
|
return None
|
|
return None
|
|
|
|
|
|
# ─── Per-Agent Error Tracking ──────────────────────────────────────────────
|
|
|
|
|
|
def get_agent_error_patterns(conn, agent: str, hours: int = 168) -> dict:
|
|
"""Get rejection patterns for a specific agent over the last N hours.
|
|
|
|
Returns {total_prs, rejected_prs, top_issues, issue_breakdown, trend}.
|
|
Default 168 hours = 7 days.
|
|
"""
|
|
# Get PRs by this agent in the time window
|
|
rows = conn.execute(
|
|
"""SELECT number, status, eval_issues, domain_verdict, leo_verdict,
|
|
tier, created_at, last_attempt
|
|
FROM prs
|
|
WHERE agent = ?
|
|
AND last_attempt > datetime('now', ? || ' hours')
|
|
ORDER BY last_attempt DESC""",
|
|
(agent, f"-{hours}"),
|
|
).fetchall()
|
|
|
|
total = len(rows)
|
|
if total == 0:
|
|
return {"total_prs": 0, "rejected_prs": 0, "approval_rate": None,
|
|
"top_issues": [], "issue_breakdown": {}, "trend": "no_data"}
|
|
|
|
rejected = 0
|
|
issue_counts: dict[str, int] = {}
|
|
|
|
for row in rows:
|
|
status = row["status"]
|
|
if status in ("closed", "zombie"):
|
|
rejected += 1
|
|
|
|
issues_raw = row["eval_issues"]
|
|
if issues_raw and issues_raw != "[]":
|
|
try:
|
|
tags = json.loads(issues_raw)
|
|
for tag in tags:
|
|
if isinstance(tag, str):
|
|
issue_counts[tag] = issue_counts.get(tag, 0) + 1
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass
|
|
|
|
approval_rate = round((total - rejected) / total, 3) if total > 0 else None
|
|
top_issues = sorted(issue_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
|
|
|
# Add guidance for top issues
|
|
top_with_guidance = []
|
|
for tag, count in top_issues:
|
|
gate = QUALITY_GATES.get(tag, {})
|
|
top_with_guidance.append({
|
|
"tag": tag,
|
|
"count": count,
|
|
"pct": round(count / total * 100, 1),
|
|
"gate": gate.get("gate", tag),
|
|
"fix": gate.get("fix", "See CLAUDE.md"),
|
|
"auto_fixable": gate.get("auto_fixable", False),
|
|
})
|
|
|
|
return {
|
|
"agent": agent,
|
|
"period_hours": hours,
|
|
"total_prs": total,
|
|
"rejected_prs": rejected,
|
|
"approval_rate": approval_rate,
|
|
"top_issues": top_with_guidance,
|
|
"issue_breakdown": issue_counts,
|
|
}
|
|
|
|
|
|
def get_all_agent_patterns(conn, hours: int = 168) -> dict:
|
|
"""Get rejection patterns for all agents. Returns {agent: patterns}."""
|
|
agents = conn.execute(
|
|
"""SELECT DISTINCT agent FROM prs
|
|
WHERE agent IS NOT NULL
|
|
AND last_attempt > datetime('now', ? || ' hours')""",
|
|
(f"-{hours}",),
|
|
).fetchall()
|
|
|
|
return {
|
|
row["agent"]: get_agent_error_patterns(conn, row["agent"], hours)
|
|
for row in agents
|
|
}
|