"""Structured rejection feedback — closes the loop for proposer agents. Maps issue tags to CLAUDE.md quality gates with actionable guidance. Tracks per-agent error patterns. Provides agent-queryable rejection history. Problem: Proposer agents (Rio, Clay, etc.) get generic PR comments when claims are rejected. They can't tell what specifically failed, so they repeat the same mistakes. Rio: "I have to read the full review comment and infer what to fix." Solution: Machine-readable rejection codes in PR comments + per-agent error pattern tracking on /metrics + agent feedback endpoint. Epimetheus owns this module. Leo reviews changes. """ import json import logging import re from datetime import datetime, timezone logger = logging.getLogger("pipeline.feedback") # ─── Quality Gate Mapping ────────────────────────────────────────────────── # # Maps each issue tag to its CLAUDE.md quality gate, with actionable guidance # for the proposer agent. The "gate" field references the specific checklist # item in CLAUDE.md. The "fix" field tells the agent exactly what to change. QUALITY_GATES: dict[str, dict] = { "frontmatter_schema": { "gate": "Schema compliance", "description": "Missing or invalid YAML frontmatter fields", "fix": "Ensure all 6 required fields: type, domain, description, confidence, source, created. " "Use exact field names (not source_archive, not claim).", "severity": "blocking", "auto_fixable": True, }, "broken_wiki_links": { "gate": "Wiki link validity", "description": "[[wiki links]] reference files that don't exist in the KB", "fix": "Only link to files listed in the KB index. If a claim doesn't exist yet, " "omit the link or use .", "severity": "warning", "auto_fixable": True, }, "title_overclaims": { "gate": "Title precision", "description": "Title asserts more than the evidence supports", "fix": "Scope the title to match the evidence strength. Single source = " "'X suggests Y' not 'X proves Y'. Name the specific mechanism.", "severity": "blocking", "auto_fixable": False, }, "confidence_miscalibration": { "gate": "Confidence calibration", "description": "Confidence level doesn't match evidence strength", "fix": "Single source = experimental max. 3+ corroborating sources with data = likely. " "Pitch rhetoric or self-reported metrics = speculative. " "proven requires multiple independent confirmations.", "severity": "blocking", "auto_fixable": False, }, "date_errors": { "gate": "Date accuracy", "description": "Invalid or incorrect date format in created field", "fix": "created = extraction date (today), not source publication date. Format: YYYY-MM-DD.", "severity": "blocking", "auto_fixable": True, }, "factual_discrepancy": { "gate": "Factual accuracy", "description": "Claim contains factual errors or misrepresents source material", "fix": "Re-read the source. Verify specific numbers, names, dates. " "If source X quotes source Y, attribute to Y.", "severity": "blocking", "auto_fixable": False, }, "near_duplicate": { "gate": "Duplicate check", "description": "Substantially similar claim already exists in KB", "fix": "Check KB index before extracting. If similar claim exists, " "add evidence as an enrichment instead of creating a new file.", "severity": "warning", "auto_fixable": False, }, "scope_error": { "gate": "Scope qualification", "description": "Claim uses unscoped universals or is too vague to disagree with", "fix": "Specify: structural vs functional, micro vs macro, causal vs correlational. " "Replace 'always/never/the fundamental' with scoped language.", "severity": "blocking", "auto_fixable": False, }, "opsec_internal_deal_terms": { "gate": "OPSEC", "description": "Claim contains internal LivingIP/Teleo deal terms", "fix": "Never extract specific dollar amounts, valuations, equity percentages, " "or deal terms for LivingIP/Teleo. General market data is fine.", "severity": "blocking", "auto_fixable": False, }, "body_too_thin": { "gate": "Evidence quality", "description": "Claim body lacks substantive argument or evidence", "fix": "The body must explain WHY the claim is supported with specific data, " "quotes, or studies from the source. A body that restates the title is not enough.", "severity": "blocking", "auto_fixable": False, }, "title_too_few_words": { "gate": "Title precision", "description": "Title is too short to be a specific, disagreeable proposition", "fix": "Minimum 4 words. Name the specific mechanism and outcome. " "Bad: 'futarchy works'. Good: 'futarchy is manipulation-resistant because " "attack attempts create profitable opportunities for defenders'.", "severity": "blocking", "auto_fixable": False, }, "title_not_proposition": { "gate": "Title precision", "description": "Title reads as a label, not an arguable proposition", "fix": "The title must contain a verb and read as a complete sentence. " "Test: 'This note argues that [title]' must work grammatically.", "severity": "blocking", "auto_fixable": False, }, } # ─── Feedback Formatting ────────────────────────────────────────────────── def format_rejection_comment( issues: list[str], source: str = "validator", ) -> str: """Format a structured rejection comment for a PR. Includes machine-readable tags AND human-readable guidance. Agents can parse the block programmatically. """ lines = [] # Machine-readable block (agents parse this) rejection_data = { "issues": issues, "source": source, "ts": datetime.now(timezone.utc).isoformat(), } lines.append(f"") lines.append("") # Human-readable summary blocking = [i for i in issues if QUALITY_GATES.get(i, {}).get("severity") == "blocking"] warnings = [i for i in issues if QUALITY_GATES.get(i, {}).get("severity") == "warning"] if blocking: lines.append(f"**Rejected** — {len(blocking)} blocking issue{'s' if len(blocking) > 1 else ''}\n") elif warnings: lines.append(f"**Warnings** — {len(warnings)} non-blocking issue{'s' if len(warnings) > 1 else ''}\n") # Per-issue guidance for tag in issues: gate = QUALITY_GATES.get(tag, {}) severity = gate.get("severity", "unknown") icon = "BLOCK" if severity == "blocking" else "WARN" gate_name = gate.get("gate", tag) description = gate.get("description", tag) fix = gate.get("fix", "See CLAUDE.md quality gates.") auto = " (auto-fixable)" if gate.get("auto_fixable") else "" lines.append(f"**[{icon}] {gate_name}**: {description}{auto}") lines.append(f" - Fix: {fix}") lines.append("") return "\n".join(lines) def parse_rejection_comment(comment_body: str) -> dict | None: """Parse a structured rejection comment. Returns rejection data or None.""" match = re.search(r"", comment_body) if match: try: return json.loads(match.group(1)) except json.JSONDecodeError: return None return None # ─── Per-Agent Error Tracking ────────────────────────────────────────────── def get_agent_error_patterns(conn, agent: str, hours: int = 168) -> dict: """Get rejection patterns for a specific agent over the last N hours. Returns {total_prs, rejected_prs, top_issues, issue_breakdown, trend}. Default 168 hours = 7 days. """ # Get PRs by this agent in the time window rows = conn.execute( """SELECT number, status, eval_issues, domain_verdict, leo_verdict, tier, created_at, last_attempt FROM prs WHERE agent = ? AND last_attempt > datetime('now', ? || ' hours') ORDER BY last_attempt DESC""", (agent, f"-{hours}"), ).fetchall() total = len(rows) if total == 0: return {"total_prs": 0, "rejected_prs": 0, "approval_rate": None, "top_issues": [], "issue_breakdown": {}, "trend": "no_data"} rejected = 0 issue_counts: dict[str, int] = {} for row in rows: status = row["status"] if status in ("closed", "zombie"): rejected += 1 issues_raw = row["eval_issues"] if issues_raw and issues_raw != "[]": try: tags = json.loads(issues_raw) for tag in tags: if isinstance(tag, str): issue_counts[tag] = issue_counts.get(tag, 0) + 1 except (json.JSONDecodeError, TypeError): pass approval_rate = round((total - rejected) / total, 3) if total > 0 else None top_issues = sorted(issue_counts.items(), key=lambda x: x[1], reverse=True)[:5] # Add guidance for top issues top_with_guidance = [] for tag, count in top_issues: gate = QUALITY_GATES.get(tag, {}) top_with_guidance.append({ "tag": tag, "count": count, "pct": round(count / total * 100, 1), "gate": gate.get("gate", tag), "fix": gate.get("fix", "See CLAUDE.md"), "auto_fixable": gate.get("auto_fixable", False), }) return { "agent": agent, "period_hours": hours, "total_prs": total, "rejected_prs": rejected, "approval_rate": approval_rate, "top_issues": top_with_guidance, "issue_breakdown": issue_counts, } def get_all_agent_patterns(conn, hours: int = 168) -> dict: """Get rejection patterns for all agents. Returns {agent: patterns}.""" agents = conn.execute( """SELECT DISTINCT agent FROM prs WHERE agent IS NOT NULL AND last_attempt > datetime('now', ? || ' hours')""", (f"-{hours}",), ).fetchall() return { row["agent"]: get_agent_error_patterns(conn, row["agent"], hours) for row in agents }