Harden local phase 1b review path
This commit is contained in:
parent
b9cb965591
commit
ca96f5f8e3
5 changed files with 63 additions and 26 deletions
|
|
@ -150,18 +150,17 @@ Pre-implementation repo truth:
|
|||
Estimated implementation progress on this branch:
|
||||
|
||||
- B1 classifier foundation: 100 percent locally, pending staging calibration.
|
||||
- B2 routing layer: 70 percent locally behind a default-off feature flag.
|
||||
- Cross-domain top-2 review: 70 percent locally through mocked eval proof.
|
||||
- Local proof suite: 80 percent for router/eval/parser scope.
|
||||
- B2 routing layer: 75 percent locally behind a default-off feature flag.
|
||||
- Cross-domain top-2 review: 75 percent locally through mocked eval proof.
|
||||
- Local proof suite: 85 percent for router/eval/parser scope.
|
||||
- Staging or VPS proof: 0 percent.
|
||||
|
||||
Remaining delta:
|
||||
|
||||
1. Decide whether the production Phase 1b transport stays Forgejo-first for cutover or switches direct to GitHub `decision-engine` before staging.
|
||||
2. Add stronger idempotency for duplicate comments on retry after partial multi-agent success.
|
||||
3. Update reporting/health compatibility beyond `review_records` if staging shows false readiness.
|
||||
4. Prove against staging before production.
|
||||
5. Deploy only an exact reviewed/tested SHA after Leo signoff.
|
||||
2. Update reporting/health compatibility beyond `review_records` if staging shows false readiness.
|
||||
3. Prove against staging before production.
|
||||
4. Deploy only an exact reviewed/tested SHA after Leo signoff.
|
||||
|
||||
## Closure, Endpoint, And Deployment Truth
|
||||
|
||||
|
|
|
|||
|
|
@ -71,14 +71,13 @@ Branch truth:
|
|||
|
||||
## Completion Percent And Remaining Delta
|
||||
|
||||
Current completion on this branch: 70 percent local implementation behind a default-off feature flag.
|
||||
Current completion on this branch: 75 percent local implementation behind a default-off feature flag.
|
||||
|
||||
Remaining delta:
|
||||
|
||||
1. Decide direct GitHub `decision-engine` comment transport versus Forgejo-first cutover compatibility.
|
||||
2. Add duplicate-comment idempotency for retry after partial multi-agent success.
|
||||
3. Prove with staging PRs and real daemon logs.
|
||||
4. Update contributor/dashboard assumptions only where staging or tests prove breakage.
|
||||
2. Prove with staging PRs and real daemon logs.
|
||||
3. Update contributor/dashboard assumptions only where staging or tests prove breakage.
|
||||
|
||||
## Closure, Endpoint, And Deployment Truth
|
||||
|
||||
|
|
@ -138,6 +137,7 @@ Minimal DB field use:
|
|||
- `domain_verdict`: keep aggregate non-Leo review verdict or aggregate verdict.
|
||||
- `leo_verdict`: set `skipped` unless Leo is a required agent; if Leo is required, store Leo verdict.
|
||||
- `review_records`: write one row per required reviewer attempt with reviewer agent, model, outcome, and notes.
|
||||
- review comments include a `PHASE1B_REVIEW` marker and the current local helper suppresses duplicate posts for the same PR and agent.
|
||||
- audit log: route and all per-agent verdicts.
|
||||
|
||||
This is a compatibility posture, not the ideal long-term schema.
|
||||
|
|
|
|||
|
|
@ -12,20 +12,14 @@ DOMAIN_AGENT_MAP: dict[str, str] = {
|
|||
"entertainment": "Clay",
|
||||
"health": "Vida",
|
||||
"ai-alignment": "Theseus",
|
||||
"ai-systems": "Theseus",
|
||||
"space-development": "Astra",
|
||||
"space": "Astra",
|
||||
"robotics": "Astra",
|
||||
"energy": "Astra",
|
||||
"manufacturing": "Astra",
|
||||
"advanced-manufacturing": "Astra",
|
||||
"mechanisms": "Rio",
|
||||
"living-capital": "Rio",
|
||||
"living-agents": "Theseus",
|
||||
"teleohumanity": "Leo",
|
||||
"grand-strategy": "Leo",
|
||||
"critical-systems": "Theseus",
|
||||
"collective-intelligence": "Leo",
|
||||
"collective-intelligence": "Theseus",
|
||||
"teleological-economics": "Rio",
|
||||
"cultural-dynamics": "Clay",
|
||||
}
|
||||
|
|
@ -37,7 +31,7 @@ VALID_DOMAINS: frozenset[str] = frozenset(DOMAIN_AGENT_MAP.keys())
|
|||
_AGENT_PRIMARY_DOMAIN: dict[str, str] = {
|
||||
"rio": "internet-finance",
|
||||
"clay": "entertainment",
|
||||
"theseus": "ai-systems",
|
||||
"theseus": "ai-alignment",
|
||||
"vida": "health",
|
||||
"astra": "space-development",
|
||||
"leo": "grand-strategy",
|
||||
|
|
|
|||
|
|
@ -80,6 +80,30 @@ def _phase1b_compat_verdicts(agent_verdicts: dict[str, str]) -> tuple[str, str]:
|
|||
return leo_verdict, domain_verdict
|
||||
|
||||
|
||||
def _phase1b_review_marker(pr_number: int, agent: str) -> str:
|
||||
return f"<!-- PHASE1B_REVIEW:PR={pr_number}:AGENT={agent.upper()} -->"
|
||||
|
||||
|
||||
async def _post_phase1b_review_comment(pr_number: int, agent: str, review_text: str) -> bool:
|
||||
"""Post a routed review comment once per PR/agent marker."""
|
||||
marker = _phase1b_review_marker(pr_number, agent)
|
||||
comments = await forgejo_api("GET", repo_path(f"issues/{pr_number}/comments"))
|
||||
if isinstance(comments, list):
|
||||
for comment in comments:
|
||||
body = comment.get("body", "") if isinstance(comment, dict) else ""
|
||||
if marker in body:
|
||||
logger.info("PR #%d: Phase 1b %s review comment already posted", pr_number, agent)
|
||||
return False
|
||||
|
||||
body = review_text if marker in review_text else f"{marker}\n{review_text}"
|
||||
result = await forgejo_api(
|
||||
"POST",
|
||||
repo_path(f"issues/{pr_number}/comments"),
|
||||
{"body": body},
|
||||
)
|
||||
return result is not None
|
||||
|
||||
|
||||
async def _evaluate_pr_phase1b(
|
||||
conn,
|
||||
pr_number: int,
|
||||
|
|
@ -133,18 +157,14 @@ async def _evaluate_pr_phase1b(
|
|||
agent_verdicts[agent] = verdict
|
||||
usage_by_agent[agent] = usage
|
||||
|
||||
await forgejo_api(
|
||||
"POST",
|
||||
repo_path(f"issues/{pr_number}/comments"),
|
||||
{"body": review_text},
|
||||
)
|
||||
await _post_phase1b_review_comment(pr_number, agent, review_text)
|
||||
|
||||
db.record_review(
|
||||
conn,
|
||||
pr_number,
|
||||
"approved" if verdict == "approve" else "rejected",
|
||||
domain=domain,
|
||||
agent=route.primary_agent,
|
||||
agent=agent,
|
||||
reviewer=agent,
|
||||
reviewer_model=_phase1b_review_model(agent, tier),
|
||||
rejection_reason=",".join(parse_issues(review_text)) if verdict == "request_changes" else None,
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from unittest.mock import AsyncMock
|
|||
import pytest
|
||||
|
||||
from lib import config
|
||||
from lib.evaluate import _evaluate_pr_phase1b, evaluate_pr
|
||||
from lib.evaluate import _evaluate_pr_phase1b, _post_phase1b_review_comment, evaluate_pr
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -116,6 +116,8 @@ async def _fake_agent_review_reject_vida(_diff, _files, agent, _route_context, t
|
|||
|
||||
|
||||
async def _fake_forgejo_api(method, path, body=None, token=None):
|
||||
if method == "GET" and "comments" in path:
|
||||
return []
|
||||
if method == "GET" and "pulls/" in path:
|
||||
return {"user": {"login": "contributor"}}
|
||||
return {"id": 1}
|
||||
|
|
@ -154,6 +156,10 @@ async def test_phase1b_cross_domain_approves_after_all_required_agents(phase1b_c
|
|||
assert row["domain_agent"] in {"Theseus", "Rio"}
|
||||
review_count = conn.execute("SELECT COUNT(*) AS n FROM review_records WHERE pr_number = 1").fetchone()["n"]
|
||||
assert review_count == 2
|
||||
reviewers = {
|
||||
row["agent"] for row in conn.execute("SELECT agent FROM review_records WHERE pr_number = 1").fetchall()
|
||||
}
|
||||
assert reviewers == {"Theseus", "Rio"}
|
||||
post_formal.assert_awaited_once()
|
||||
|
||||
|
||||
|
|
@ -212,3 +218,21 @@ async def test_evaluate_pr_flag_uses_phase1b_and_not_legacy_reviewers(phase1b_co
|
|||
assert result["agent_verdicts"] == {"Rio": "approve"}
|
||||
legacy_domain.assert_not_awaited()
|
||||
legacy_leo.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_phase1b_review_comment_is_idempotent(monkeypatch):
|
||||
calls = []
|
||||
|
||||
async def fake_api(method, path, body=None, token=None):
|
||||
calls.append((method, path, body))
|
||||
if method == "GET":
|
||||
return [{"body": "<!-- PHASE1B_REVIEW:PR=7:AGENT=RIO -->\nold review"}]
|
||||
return {"id": 1}
|
||||
|
||||
monkeypatch.setattr("lib.evaluate.forgejo_api", fake_api)
|
||||
|
||||
posted = await _post_phase1b_review_comment(7, "Rio", "new review\n<!-- VERDICT:RIO:APPROVE -->")
|
||||
|
||||
assert posted is False
|
||||
assert [call[0] for call in calls] == ["GET"]
|
||||
|
|
|
|||
Loading…
Reference in a new issue