Phase 1 — Audit logging infrastructure: - review_records table (migration v12) capturing every eval verdict with outcome, rejection reason, disagreement type - Cascade automation: auto-flag dependent beliefs/positions when merged claims change - Merge frontmatter stamps: last_review metadata on merged claim files Phase 2 — Cross-domain and state tracking: - Cross-domain citation index: entity overlap detection across domains on every merge - Agent-state schema v1: file-backed state for VPS agents (memory, tasks, inbox, metrics) - Cascade completion tracking: process-cascade-inbox.py logs review outcomes - research-session.sh: state hooks + cascade processing integration All changes are live on VPS. This commit brings the code under version control for review. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
274 lines
9.8 KiB
Python
274 lines
9.8 KiB
Python
"""Cascade automation — auto-flag dependent beliefs/positions when claims change.
|
|
|
|
Hook point: called from merge.py after _embed_merged_claims, before _delete_remote_branch.
|
|
Uses the same main_sha/branch_sha diff to detect changed claim files, then scans
|
|
all agent beliefs and positions for depends_on references to those claims.
|
|
|
|
Notifications are written to /opt/teleo-eval/agent-state/{agent}/inbox/ using
|
|
the same atomic-write pattern as lib-state.sh.
|
|
"""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import tempfile
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger("pipeline.cascade")
|
|
|
|
AGENT_STATE_DIR = Path("/opt/teleo-eval/agent-state")
|
|
CLAIM_DIRS = {"domains/", "core/", "foundations/", "decisions/"}
|
|
AGENT_NAMES = ["rio", "leo", "clay", "astra", "vida", "theseus"]
|
|
|
|
|
|
def _extract_claim_titles_from_diff(diff_files: list[str]) -> set[str]:
|
|
"""Extract claim titles from changed file paths."""
|
|
titles = set()
|
|
for fpath in diff_files:
|
|
if not fpath.endswith(".md"):
|
|
continue
|
|
if not any(fpath.startswith(d) for d in CLAIM_DIRS):
|
|
continue
|
|
basename = os.path.basename(fpath)
|
|
if basename.startswith("_") or basename == "directory.md":
|
|
continue
|
|
title = basename.removesuffix(".md")
|
|
titles.add(title)
|
|
return titles
|
|
|
|
|
|
def _normalize_for_match(text: str) -> str:
|
|
"""Normalize for fuzzy matching: lowercase, hyphens to spaces, strip punctuation, collapse whitespace."""
|
|
text = text.lower().strip()
|
|
text = text.replace("-", " ")
|
|
text = re.sub(r"[^\w\s]", "", text)
|
|
text = re.sub(r"\s+", " ", text)
|
|
return text
|
|
|
|
|
|
def _slug_to_words(slug: str) -> str:
|
|
"""Convert kebab-case slug to space-separated words."""
|
|
return slug.replace("-", " ")
|
|
|
|
|
|
def _parse_depends_on(file_path: Path) -> tuple[str, list[str]]:
|
|
"""Parse a belief or position file's depends_on entries.
|
|
|
|
Returns (agent_name, [dependency_titles]).
|
|
"""
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8")
|
|
except (OSError, UnicodeDecodeError):
|
|
return ("", [])
|
|
|
|
agent = ""
|
|
deps = []
|
|
in_frontmatter = False
|
|
in_depends = False
|
|
|
|
for line in content.split("\n"):
|
|
if line.strip() == "---":
|
|
if not in_frontmatter:
|
|
in_frontmatter = True
|
|
continue
|
|
else:
|
|
break
|
|
|
|
if in_frontmatter:
|
|
if line.startswith("agent:"):
|
|
agent = line.split(":", 1)[1].strip().strip('"').strip("'")
|
|
elif line.startswith("depends_on:"):
|
|
in_depends = True
|
|
rest = line.split(":", 1)[1].strip()
|
|
if rest.startswith("["):
|
|
items = re.findall(r'"([^"]+)"|\'([^\']+)\'', rest)
|
|
for item in items:
|
|
dep = item[0] or item[1]
|
|
dep = dep.strip("[]").replace("[[", "").replace("]]", "")
|
|
deps.append(dep)
|
|
in_depends = False
|
|
elif in_depends:
|
|
if line.startswith(" - "):
|
|
dep = line.strip().lstrip("- ").strip('"').strip("'")
|
|
dep = dep.replace("[[", "").replace("]]", "")
|
|
deps.append(dep)
|
|
elif line.strip() and not line.startswith(" "):
|
|
in_depends = False
|
|
|
|
# Also scan body for [[wiki-links]]
|
|
body_links = re.findall(r"\[\[([^\]]+)\]\]", content)
|
|
for link in body_links:
|
|
if link not in deps:
|
|
deps.append(link)
|
|
|
|
return (agent, deps)
|
|
|
|
|
|
def _write_inbox_message(agent: str, subject: str, body: str) -> bool:
|
|
"""Write a cascade notification to an agent's inbox. Atomic tmp+rename."""
|
|
inbox_dir = AGENT_STATE_DIR / agent / "inbox"
|
|
if not inbox_dir.exists():
|
|
logger.warning("cascade: no inbox dir for agent %s, skipping", agent)
|
|
return False
|
|
|
|
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
|
file_hash = hashlib.md5(f"{agent}-{subject}-{body[:200]}".encode()).hexdigest()[:8]
|
|
filename = f"cascade-{ts}-{subject[:60]}-{file_hash}.md"
|
|
final_path = inbox_dir / filename
|
|
|
|
try:
|
|
fd, tmp_path = tempfile.mkstemp(dir=str(inbox_dir), suffix=".tmp")
|
|
with os.fdopen(fd, "w") as f:
|
|
f.write(f"---\n")
|
|
f.write(f"type: cascade\n")
|
|
f.write(f"from: pipeline\n")
|
|
f.write(f"to: {agent}\n")
|
|
f.write(f"subject: \"{subject}\"\n")
|
|
f.write(f"created: {datetime.now(timezone.utc).isoformat()}\n")
|
|
f.write(f"status: unread\n")
|
|
f.write(f"---\n\n")
|
|
f.write(body)
|
|
os.rename(tmp_path, str(final_path))
|
|
return True
|
|
except OSError:
|
|
logger.exception("cascade: failed to write inbox message for %s", agent)
|
|
return False
|
|
|
|
|
|
def _find_matches(deps: list[str], claim_lookup: dict[str, str]) -> list[str]:
|
|
"""Check if any dependency matches a changed claim.
|
|
|
|
Uses exact normalized match first, then substring containment for longer
|
|
strings only (min 15 chars) to avoid false positives on short generic names.
|
|
"""
|
|
matched = []
|
|
for dep in deps:
|
|
norm = _normalize_for_match(dep)
|
|
if norm in claim_lookup:
|
|
matched.append(claim_lookup[norm])
|
|
else:
|
|
# Substring match only for sufficiently specific strings
|
|
shorter = min(len(norm), min((len(k) for k in claim_lookup), default=0))
|
|
if shorter >= 15:
|
|
for claim_norm, claim_orig in claim_lookup.items():
|
|
if claim_norm in norm or norm in claim_norm:
|
|
matched.append(claim_orig)
|
|
break
|
|
return matched
|
|
|
|
|
|
def _format_cascade_body(
|
|
file_name: str,
|
|
file_type: str,
|
|
matched_claims: list[str],
|
|
pr_num: int,
|
|
) -> str:
|
|
"""Format the cascade notification body."""
|
|
claims_list = "\n".join(f"- {c}" for c in matched_claims)
|
|
return (
|
|
f"# Cascade: upstream claims changed\n\n"
|
|
f"Your {file_type} **{file_name}** depends on claims that were modified in PR #{pr_num}.\n\n"
|
|
f"## Changed claims\n\n{claims_list}\n\n"
|
|
f"## Action needed\n\n"
|
|
f"Review whether your {file_type}'s confidence, description, or grounding "
|
|
f"needs updating in light of these changes. If the evidence strengthened, "
|
|
f"consider increasing confidence. If it weakened or contradicted, flag for "
|
|
f"re-evaluation.\n"
|
|
)
|
|
|
|
|
|
async def cascade_after_merge(
|
|
main_sha: str,
|
|
branch_sha: str,
|
|
pr_num: int,
|
|
main_worktree: Path,
|
|
conn=None,
|
|
) -> int:
|
|
"""Scan for beliefs/positions affected by claims changed in this merge.
|
|
|
|
Returns the number of cascade notifications sent.
|
|
"""
|
|
# 1. Get changed files
|
|
proc = await asyncio.create_subprocess_exec(
|
|
"git", "diff", "--name-only", "--diff-filter=ACMR",
|
|
main_sha, branch_sha,
|
|
cwd=str(main_worktree),
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
try:
|
|
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=10)
|
|
except asyncio.TimeoutError:
|
|
proc.kill()
|
|
await proc.wait()
|
|
logger.warning("cascade: git diff timed out")
|
|
return 0
|
|
|
|
if proc.returncode != 0:
|
|
logger.warning("cascade: git diff failed (rc=%d)", proc.returncode)
|
|
return 0
|
|
|
|
diff_files = [f for f in stdout.decode().strip().split("\n") if f]
|
|
|
|
# 2. Extract claim titles from changed files
|
|
changed_claims = _extract_claim_titles_from_diff(diff_files)
|
|
if not changed_claims:
|
|
return 0
|
|
|
|
logger.info("cascade: %d claims changed in PR #%d: %s",
|
|
len(changed_claims), pr_num, list(changed_claims)[:5])
|
|
|
|
# Build normalized lookup for fuzzy matching
|
|
claim_lookup = {}
|
|
for claim in changed_claims:
|
|
claim_lookup[_normalize_for_match(claim)] = claim
|
|
claim_lookup[_normalize_for_match(_slug_to_words(claim))] = claim
|
|
|
|
# 3. Scan all beliefs and positions
|
|
notifications = 0
|
|
agents_dir = main_worktree / "agents"
|
|
if not agents_dir.exists():
|
|
logger.warning("cascade: no agents/ dir in worktree")
|
|
return 0
|
|
|
|
for agent_name in AGENT_NAMES:
|
|
agent_dir = agents_dir / agent_name
|
|
if not agent_dir.exists():
|
|
continue
|
|
|
|
for subdir, file_type in [("beliefs", "belief"), ("positions", "position")]:
|
|
target_dir = agent_dir / subdir
|
|
if not target_dir.exists():
|
|
continue
|
|
for md_file in target_dir.glob("*.md"):
|
|
_, deps = _parse_depends_on(md_file)
|
|
matched = _find_matches(deps, claim_lookup)
|
|
if matched:
|
|
body = _format_cascade_body(md_file.name, file_type, matched, pr_num)
|
|
if _write_inbox_message(agent_name, f"claim-changed-affects-{file_type}", body):
|
|
notifications += 1
|
|
logger.info("cascade: notified %s — %s '%s' affected by %s",
|
|
agent_name, file_type, md_file.stem, matched)
|
|
|
|
if notifications:
|
|
logger.info("cascade: sent %d notifications for PR #%d", notifications, pr_num)
|
|
|
|
# Write structured audit_log entry for cascade tracking (Page 4 data)
|
|
if conn is not None:
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO audit_log (stage, event, detail) VALUES (?, ?, ?)",
|
|
("cascade", "cascade_triggered", json.dumps({
|
|
"pr": pr_num,
|
|
"claims_changed": list(changed_claims)[:20],
|
|
"notifications_sent": notifications,
|
|
})),
|
|
)
|
|
except Exception:
|
|
logger.exception("cascade: audit_log write failed (non-fatal)")
|
|
|
|
return notifications
|