vida: KB health assessment script for ops/ #203

Closed
leo wants to merge 2 commits from vida/kb-health-check into main
Showing only changes of commit d74b7e0a12 - Show all commits

562
ops/kb-health-check.py Normal file
View file

@ -0,0 +1,562 @@
#!/usr/bin/env python3
"""
Teleo Codex Knowledge Base Health Assessment
Computes Tier 1 (automated) and Tier 2 (semi-automated) health metrics for the
collective knowledge base. Outputs JSON snapshot + markdown report.
Usage:
REPO_ROOT=/path/to/teleo-codex python3 ops/kb-health-check.py
Optional env vars:
REPO_ROOT Path to repo checkout (default: current directory)
OUTPUT_DIR Where to write snapshots (default: stdout + agents/vida/musings/)
METRICS_DIR VPS metrics directory (default: none, for local runs)
Designed to run:
- Manually by any agent during a session
- Daily via VPS cron at /opt/teleo-eval/metrics/
- claim-index.json is a runtime cache, regenerated each run
Infrastructure decisions (from collective design review):
- Script lives in ops/ (shared infrastructure, not any agent's territory)
- claim-index.json is a runtime cache, not git-tracked (derived artifact)
- Daily snapshots go to VPS filesystem, not main branch (repo is for knowledge, not telemetry)
- Weekly digests go IN repo via normal PR flow (agent-authored analysis = knowledge)
Design: Vida (domain health), Leo (cross-domain), Theseus (measurement theory), Ganymede (ops)
"""
import os
import re
import json
import sys
from collections import defaultdict
from datetime import datetime, date
from pathlib import Path
REPO_ROOT = os.environ.get("REPO_ROOT", ".")
CLAIM_DIRS = ["domains", "core", "foundations"]
AGENT_DIR = "agents"
TODAY = date.today().isoformat()
# ---------------------------------------------------------------------------
# Parsing
# ---------------------------------------------------------------------------
def parse_frontmatter(filepath):
"""Extract YAML frontmatter from a markdown file."""
try:
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
except Exception:
return None, ""
if not content.startswith("---"):
return None, content
end = content.find("---", 3)
if end == -1:
return None, content
fm_text = content[3:end].strip()
fm = {}
for line in fm_text.split("\n"):
if ":" in line:
key, val = line.split(":", 1)
fm[key.strip()] = val.strip().strip('"').strip("'")
body = content[end + 3 :]
return fm, body
def extract_wiki_links(text):
"""Extract all [[wiki links]] from text, ignoring pipe aliases."""
return re.findall(r"\[\[([^\]|]+?)(?:\|[^\]]+?)?\]\]", text)
def extract_argumentative_links(body):
"""
Split wiki links into argumentative (in prose paragraphs) vs structural
(in 'Relevant Notes' / 'Topics' footer sections).
Argumentative links carry more weight per Theseus's Goodhart mitigation.
"""
# Split at common footer markers
footer_markers = ["Relevant Notes:", "Topics:", "---"]
prose_section = body
for marker in footer_markers:
idx = body.rfind(marker)
if idx != -1:
prose_section = body[:idx]
break
prose_links = extract_wiki_links(prose_section)
all_links = extract_wiki_links(body)
footer_links = [l for l in all_links if l not in prose_links]
return prose_links, footer_links
def get_domain_from_path(filepath):
"""Determine domain from file path."""
parts = Path(filepath).parts
for i, p in enumerate(parts):
if p == "domains" and i + 1 < len(parts):
return parts[i + 1]
if p == "core":
# Sub-categorize core
if i + 1 < len(parts):
return parts[i + 1]
return "core"
if p == "foundations" and i + 1 < len(parts):
return parts[i + 1]
return "unknown"
# ---------------------------------------------------------------------------
# Claim index (runtime cache — the spine everything else computes from)
# ---------------------------------------------------------------------------
def build_claim_index(repo_root):
"""
Build the claim index. Includes both outgoing and incoming links
per Leo's feedback (incoming links = votes of relevance, PageRank intuition).
"""
claims = []
title_to_idx = {}
# First pass: collect all claims with outgoing links
for base_dir in CLAIM_DIRS:
full_path = os.path.join(repo_root, base_dir)
if not os.path.exists(full_path):
continue
for root, _dirs, files in os.walk(full_path):
for f in files:
if f.endswith(".md") and not f.startswith("_") and not f.startswith("."):
filepath = os.path.join(root, f)
fm, body = parse_frontmatter(filepath)
if fm and fm.get("type") == "claim":
rel_path = os.path.relpath(filepath, repo_root)
domain = get_domain_from_path(rel_path)
prose_links, footer_links = extract_argumentative_links(body)
all_links = extract_wiki_links(body)
idx = len(claims)
title = f[:-3]
title_to_idx[title.lower()] = idx
claims.append({
"title": title,
"path": rel_path,
"domain": domain,
"confidence": fm.get("confidence", "unknown"),
"source": fm.get("source", ""),
"created": fm.get("created", ""),
"outgoing_links": all_links,
"prose_links": prose_links,
"footer_links": footer_links,
"incoming_links": [], # populated in second pass
"body": body,
})
# Second pass: compute incoming links
for i, claim in enumerate(claims):
for link in claim["outgoing_links"]:
target_idx = title_to_idx.get(link.lower())
if target_idx is not None:
claims[target_idx]["incoming_links"].append(claim["title"])
return claims, title_to_idx
# ---------------------------------------------------------------------------
# Belief parsing
# ---------------------------------------------------------------------------
def parse_beliefs(repo_root):
"""Parse all agent belief files for grounding depth analysis."""
beliefs = {}
agents_path = os.path.join(repo_root, AGENT_DIR)
if not os.path.exists(agents_path):
return beliefs
for agent_name in os.listdir(agents_path):
beliefs_file = os.path.join(agents_path, agent_name, "beliefs.md")
if os.path.exists(beliefs_file):
with open(beliefs_file, "r") as f:
content = f.read()
belief_headings = re.findall(r"### \d+\.", content)
grounding_links = extract_wiki_links(content)
beliefs[agent_name] = {
"count": len(belief_headings),
"total_grounding_links": len(grounding_links),
"avg_grounding": round(
len(grounding_links) / max(len(belief_headings), 1), 1
),
}
return beliefs
# ---------------------------------------------------------------------------
# Metrics
# ---------------------------------------------------------------------------
def compute_metrics(claims, title_to_idx, beliefs):
"""Compute all Tier 1 and Tier 2 metrics."""
total = len(claims)
results = {
"generated": datetime.now().isoformat(),
"date": TODAY,
}
# --- 1. Claim counts ---
by_domain = defaultdict(int)
for c in claims:
by_domain[c["domain"]] += 1
results["claims"] = {
"total": total,
"by_domain": dict(sorted(by_domain.items(), key=lambda x: -x[1])),
}
# --- 2. Confidence distribution ---
conf_dist = defaultdict(int)
conf_by_domain = defaultdict(lambda: defaultdict(int))
for c in claims:
conf_dist[c["confidence"]] += 1
conf_by_domain[c["domain"]][c["confidence"]] += 1
results["confidence_distribution"] = {
"overall": dict(conf_dist),
"by_domain": {d: dict(v) for d, v in conf_by_domain.items()},
}
# --- 3. Orphan ratio ---
orphans = []
for c in claims:
if len(c["incoming_links"]) == 0:
orphans.append({
"title": c["title"][:100],
"domain": c["domain"],
"outgoing_links": len(c["outgoing_links"]),
})
orphan_ratio = len(orphans) / max(total, 1)
results["orphan_ratio"] = {
"total_claims": total,
"orphans": len(orphans),
"ratio": round(orphan_ratio, 3),
"status": (
"healthy" if orphan_ratio < 0.10
else "warning" if orphan_ratio < 0.20
else "critical"
),
"target": 0.10,
"sample_orphans": orphans[:10],
}
# --- 4. Cross-domain linkage density ---
total_links = 0
cross_domain_links = 0
unresolved_links = 0
cross_by_domain = defaultdict(lambda: {"total": 0, "cross": 0})
# Track reciprocal links (higher quality per Theseus)
reciprocal_count = 0
for c in claims:
for link in c["outgoing_links"]:
total_links += 1
cross_by_domain[c["domain"]]["total"] += 1
target_idx = title_to_idx.get(link.lower())
if target_idx is None:
unresolved_links += 1
else:
target = claims[target_idx]
if target["domain"] != c["domain"]:
cross_domain_links += 1
cross_by_domain[c["domain"]]["cross"] += 1
# Check reciprocity
if c["title"].lower() in [
l.lower() for l in target["outgoing_links"]
]:
reciprocal_count += 1
cross_ratio = cross_domain_links / max(total_links, 1)
results["cross_domain_linkage"] = {
"total_links": total_links,
"cross_domain": cross_domain_links,
"ratio": round(cross_ratio, 3),
"reciprocal_links": reciprocal_count // 2, # each pair counted twice
"unresolved_links": unresolved_links,
"status": "healthy" if cross_ratio >= 0.35 else "warning" if cross_ratio >= 0.15 else "critical",
"target": 0.35,
"by_domain": {
d: {
"total": v["total"],
"cross": v["cross"],
"ratio": round(v["cross"] / max(v["total"], 1), 3),
}
for d, v in cross_by_domain.items()
},
}
# --- 5. Source diversity (Tier 1 per Leo) ---
source_by_domain = defaultdict(set)
for c in claims:
if c["source"]:
source_by_domain[c["domain"]].add(c["source"][:100].strip())
source_diversity = {}
for domain in by_domain:
n_sources = len(source_by_domain.get(domain, set()))
n_claims = by_domain[domain]
ratio = round(n_sources / max(n_claims, 1), 3)
source_diversity[domain] = {
"unique_sources": n_sources,
"total_claims": n_claims,
"ratio": ratio,
"status": "healthy" if ratio >= 0.3 else "warning",
}
results["source_diversity"] = source_diversity
# --- 6. Evidence freshness ---
ages = []
stale = []
fast_domains = {"health", "ai-alignment", "internet-finance", "entertainment"}
for c in claims:
if c["created"]:
try:
created = datetime.strptime(c["created"], "%Y-%m-%d").date()
age = (date.today() - created).days
ages.append(age)
threshold = 180 if c["domain"] in fast_domains else 365
if age > threshold:
stale.append({
"title": c["title"][:80],
"domain": c["domain"],
"age_days": age,
})
except ValueError:
pass
results["evidence_freshness"] = {
"median_age_days": sorted(ages)[len(ages) // 2] if ages else None,
"mean_age_days": round(sum(ages) / len(ages), 1) if ages else None,
"stale_count": len(stale),
"total_with_dates": len(ages),
"stale_claims": stale[:10],
}
# --- 7. Belief grounding depth ---
results["belief_grounding"] = beliefs
# --- 8. Challenge coverage ---
likely_proven = [c for c in claims if c["confidence"] in ("likely", "proven")]
has_challenge = 0
for c in likely_proven:
body_lower = c["body"].lower()
if any(
marker in body_lower
for marker in ["challenged_by", "counter-evidence", "counter:", "challenges considered"]
):
has_challenge += 1
challenge_ratio = has_challenge / max(len(likely_proven), 1)
results["challenge_coverage"] = {
"likely_proven_claims": len(likely_proven),
"with_challenges": has_challenge,
"ratio": round(challenge_ratio, 3),
"status": "healthy" if challenge_ratio >= 0.25 else "warning",
"target": 0.25,
}
# --- 9. Most-linked claims (centrality, from incoming links) ---
centrality = sorted(claims, key=lambda c: len(c["incoming_links"]), reverse=True)
results["most_central_claims"] = [
{
"title": c["title"][:100],
"domain": c["domain"],
"incoming_links": len(c["incoming_links"]),
}
for c in centrality[:10]
]
return results
# ---------------------------------------------------------------------------
# Report formatting
# ---------------------------------------------------------------------------
def format_report(results):
"""Format results as readable markdown."""
lines = []
lines.append("# Teleo Codex — Knowledge Base Health Assessment")
lines.append(f"*Generated: {results['generated']}*")
lines.append("")
# Claims
c = results["claims"]
lines.append(f"## 1. Claim Inventory — {c['total']} total")
lines.append("")
lines.append("| Domain | Claims |")
lines.append("|--------|--------|")
for domain, count in c["by_domain"].items():
lines.append(f"| {domain} | {count} |")
lines.append("")
# Confidence
cd = results["confidence_distribution"]
lines.append("## 2. Confidence Distribution")
lines.append("")
lines.append("| Domain | proven | likely | experimental | speculative |")
lines.append("|--------|--------|--------|-------------|-------------|")
for domain, dist in cd["by_domain"].items():
lines.append(
f"| {domain} | {dist.get('proven',0)} | {dist.get('likely',0)} "
f"| {dist.get('experimental',0)} | {dist.get('speculative',0)} |"
)
lines.append("")
# Orphans
o = results["orphan_ratio"]
lines.append(f"## 3. Orphan Ratio — {o['status'].upper()}")
lines.append(
f"**{o['orphans']}/{o['total_claims']} claims are orphans "
f"({o['ratio']:.1%})** — target: <{o['target']:.0%}"
)
lines.append("")
# Cross-domain
cl = results["cross_domain_linkage"]
lines.append(f"## 4. Cross-Domain Linkage — {cl['status'].upper()}")
lines.append(
f"**{cl['cross_domain']}/{cl['total_links']} links cross domain boundaries "
f"({cl['ratio']:.1%})** — target: >{cl['target']:.0%}"
)
lines.append(f"Reciprocal link pairs: {cl['reciprocal_links']}")
lines.append(f"Unresolved links: {cl['unresolved_links']}")
lines.append("")
lines.append("| Domain | Total links | Cross-domain | Ratio |")
lines.append("|--------|------------|-------------|-------|")
for domain, v in sorted(cl["by_domain"].items(), key=lambda x: -x[1]["total"]):
lines.append(f"| {domain} | {v['total']} | {v['cross']} | {v['ratio']:.1%} |")
lines.append("")
# Source diversity
sd = results["source_diversity"]
lines.append("## 5. Source Diversity")
lines.append("")
lines.append("| Domain | Unique sources | Claims | Ratio | Status |")
lines.append("|--------|---------------|--------|-------|--------|")
for domain, v in sorted(sd.items(), key=lambda x: x[1]["ratio"]):
lines.append(
f"| {domain} | {v['unique_sources']} | {v['total_claims']} "
f"| {v['ratio']:.2f} | {v['status']} |"
)
lines.append("")
# Evidence freshness
ef = results["evidence_freshness"]
lines.append("## 6. Evidence Freshness")
lines.append(
f"**Median claim age: {ef['median_age_days']} days "
f"| Mean: {ef['mean_age_days']} days**"
)
lines.append(f"Stale claims: {ef['stale_count']}")
lines.append("")
# Belief grounding
bg = results["belief_grounding"]
lines.append("## 7. Belief Grounding Depth")
lines.append("")
lines.append("| Agent | Beliefs | Total grounding links | Avg per belief |")
lines.append("|-------|---------|---------------------|----------------|")
for agent, v in sorted(bg.items()):
lines.append(
f"| {agent} | {v['count']} | {v['total_grounding_links']} "
f"| {v['avg_grounding']} |"
)
lines.append("")
# Challenge coverage
cc = results["challenge_coverage"]
lines.append(f"## 8. Challenge Coverage — {cc['status'].upper()}")
lines.append(
f"**{cc['with_challenges']}/{cc['likely_proven_claims']} likely/proven claims "
f"acknowledge counter-evidence ({cc['ratio']:.1%})** — target: >{cc['target']:.0%}"
)
lines.append("")
# Most central
mc = results["most_central_claims"]
lines.append("## 9. Most Central Claims (by incoming links)")
lines.append("")
lines.append("| Claim | Domain | Incoming |")
lines.append("|-------|--------|----------|")
for item in mc:
lines.append(f"| {item['title'][:70]}... | {item['domain']} | {item['incoming_links']} |")
lines.append("")
# Automation note
lines.append("---")
lines.append("")
lines.append("*Automate more of this over time: daily VPS cron, belief drift detection,")
lines.append("reasoning chain depth, weekly digest template. See agents/vida/musings/kb-health-assessment-design.md.*")
lines.append("")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
if __name__ == "__main__":
repo_root = os.environ.get("REPO_ROOT", ".")
output_dir = os.environ.get("OUTPUT_DIR", os.path.join(repo_root, "agents", "vida", "musings"))
metrics_dir = os.environ.get("METRICS_DIR", None)
# Build index + compute
claims, title_to_idx = build_claim_index(repo_root)
beliefs = parse_beliefs(repo_root)
results = compute_metrics(claims, title_to_idx, beliefs)
# Strip body from claims before serializing (too large for JSON output)
for c in claims:
c.pop("body", None)
c.pop("prose_links", None)
c.pop("footer_links", None)
# Write claim-index (runtime cache)
index_output = {
"generated": results["generated"],
"total_claims": len(claims),
"claims": claims,
}
# Write outputs
report_md = format_report(results)
if metrics_dir:
# VPS mode: write to metrics directory
os.makedirs(os.path.join(metrics_dir, "daily-evolution"), exist_ok=True)
snapshot_path = os.path.join(metrics_dir, "daily-evolution", f"{TODAY}.json")
index_path = os.path.join(metrics_dir, "claim-index.json")
with open(snapshot_path, "w") as f:
json.dump(results, f, indent=2)
with open(index_path, "w") as f:
json.dump(index_output, f, indent=2)
print(f"Snapshot written to {snapshot_path}", file=sys.stderr)
print(f"Index written to {index_path}", file=sys.stderr)
# Always write markdown report to stdout
print(report_md)