1 changed files with 562 additions and 0 deletions
--- a/ops/kb-health-check.py
+++ b/ops/kb-health-check.py
@ -0,0 +1,562 @@
+#!/usr/bin/env python3
+"""
+Teleo Codex — Knowledge Base Health Assessment
+
+Computes Tier 1 (automated) and Tier 2 (semi-automated) health metrics for the
+collective knowledge base. Outputs JSON snapshot + markdown report.
+
+Usage:
+    REPO_ROOT=/path/to/teleo-codex python3 ops/kb-health-check.py
+
+    Optional env vars:
+        REPO_ROOT       Path to repo checkout (default: current directory)
+        OUTPUT_DIR      Where to write snapshots (default: stdout + agents/vida/musings/)
+        METRICS_DIR     VPS metrics directory (default: none, for local runs)
+
+Designed to run:
+    - Manually by any agent during a session
+    - Daily via VPS cron at /opt/teleo-eval/metrics/
+    - claim-index.json is a runtime cache, regenerated each run
+
+Infrastructure decisions (from collective design review):
+    - Script lives in ops/ (shared infrastructure, not any agent's territory)
+    - claim-index.json is a runtime cache, not git-tracked (derived artifact)
+    - Daily snapshots go to VPS filesystem, not main branch (repo is for knowledge, not telemetry)
+    - Weekly digests go IN repo via normal PR flow (agent-authored analysis = knowledge)
+
+Design: Vida (domain health), Leo (cross-domain), Theseus (measurement theory), Ganymede (ops)
+"""
+
+import os
+import re
+import json
+import sys
+from collections import defaultdict
+from datetime import datetime, date
+from pathlib import Path
+
+REPO_ROOT = os.environ.get("REPO_ROOT", ".")
+CLAIM_DIRS = ["domains", "core", "foundations"]
+AGENT_DIR = "agents"
+TODAY = date.today().isoformat()
+
+
+# ---------------------------------------------------------------------------
+# Parsing
+# ---------------------------------------------------------------------------
+
+def parse_frontmatter(filepath):
+    """Extract YAML frontmatter from a markdown file."""
+    try:
+        with open(filepath, "r", encoding="utf-8") as f:
+            content = f.read()
+    except Exception:
+        return None, ""
+
+    if not content.startswith("---"):
+        return None, content
+
+    end = content.find("---", 3)
+    if end == -1:
+        return None, content
+
+    fm_text = content[3:end].strip()
+    fm = {}
+    for line in fm_text.split("\n"):
+        if ":" in line:
+            key, val = line.split(":", 1)
+            fm[key.strip()] = val.strip().strip('"').strip("'")
+
+    body = content[end + 3 :]
+    return fm, body
+
+
+def extract_wiki_links(text):
+    """Extract all [[wiki links]] from text, ignoring pipe aliases."""
+    return re.findall(r"\[\[([^\]|]+?)(?:\|[^\]]+?)?\]\]", text)
+
+
+def extract_argumentative_links(body):
+    """
+    Split wiki links into argumentative (in prose paragraphs) vs structural
+    (in 'Relevant Notes' / 'Topics' footer sections).
+
+    Argumentative links carry more weight per Theseus's Goodhart mitigation.
+    """
+    # Split at common footer markers
+    footer_markers = ["Relevant Notes:", "Topics:", "---"]
+    prose_section = body
+    for marker in footer_markers:
+        idx = body.rfind(marker)
+        if idx != -1:
+            prose_section = body[:idx]
+            break
+
+    prose_links = extract_wiki_links(prose_section)
+    all_links = extract_wiki_links(body)
+    footer_links = [l for l in all_links if l not in prose_links]
+
+    return prose_links, footer_links
+
+
+def get_domain_from_path(filepath):
+    """Determine domain from file path."""
+    parts = Path(filepath).parts
+    for i, p in enumerate(parts):
+        if p == "domains" and i + 1 < len(parts):
+            return parts[i + 1]
+        if p == "core":
+            # Sub-categorize core
+            if i + 1 < len(parts):
+                return parts[i + 1]
+            return "core"
+        if p == "foundations" and i + 1 < len(parts):
+            return parts[i + 1]
+    return "unknown"
+
+
+# ---------------------------------------------------------------------------
+# Claim index (runtime cache — the spine everything else computes from)
+# ---------------------------------------------------------------------------
+
+def build_claim_index(repo_root):
+    """
+    Build the claim index. Includes both outgoing and incoming links
+    per Leo's feedback (incoming links = votes of relevance, PageRank intuition).
+    """
+    claims = []
+    title_to_idx = {}
+
+    # First pass: collect all claims with outgoing links
+    for base_dir in CLAIM_DIRS:
+        full_path = os.path.join(repo_root, base_dir)
+        if not os.path.exists(full_path):
+            continue
+        for root, _dirs, files in os.walk(full_path):
+            for f in files:
+                if f.endswith(".md") and not f.startswith("_") and not f.startswith("."):
+                    filepath = os.path.join(root, f)
+                    fm, body = parse_frontmatter(filepath)
+                    if fm and fm.get("type") == "claim":
+                        rel_path = os.path.relpath(filepath, repo_root)
+                        domain = get_domain_from_path(rel_path)
+                        prose_links, footer_links = extract_argumentative_links(body)
+                        all_links = extract_wiki_links(body)
+
+                        idx = len(claims)
+                        title = f[:-3]
+                        title_to_idx[title.lower()] = idx
+
+                        claims.append({
+                            "title": title,
+                            "path": rel_path,
+                            "domain": domain,
+                            "confidence": fm.get("confidence", "unknown"),
+                            "source": fm.get("source", ""),
+                            "created": fm.get("created", ""),
+                            "outgoing_links": all_links,
+                            "prose_links": prose_links,
+                            "footer_links": footer_links,
+                            "incoming_links": [],  # populated in second pass
+                            "body": body,
+                        })
+
+    # Second pass: compute incoming links
+    for i, claim in enumerate(claims):
+        for link in claim["outgoing_links"]:
+            target_idx = title_to_idx.get(link.lower())
+            if target_idx is not None:
+                claims[target_idx]["incoming_links"].append(claim["title"])
+
+    return claims, title_to_idx
+
+
+# ---------------------------------------------------------------------------
+# Belief parsing
+# ---------------------------------------------------------------------------
+
+def parse_beliefs(repo_root):
+    """Parse all agent belief files for grounding depth analysis."""
+    beliefs = {}
+    agents_path = os.path.join(repo_root, AGENT_DIR)
+    if not os.path.exists(agents_path):
+        return beliefs
+
+    for agent_name in os.listdir(agents_path):
+        beliefs_file = os.path.join(agents_path, agent_name, "beliefs.md")
+        if os.path.exists(beliefs_file):
+            with open(beliefs_file, "r") as f:
+                content = f.read()
+            belief_headings = re.findall(r"### \d+\.", content)
+            grounding_links = extract_wiki_links(content)
+            beliefs[agent_name] = {
+                "count": len(belief_headings),
+                "total_grounding_links": len(grounding_links),
+                "avg_grounding": round(
+                    len(grounding_links) / max(len(belief_headings), 1), 1
+                ),
+            }
+    return beliefs
+
+
+# ---------------------------------------------------------------------------
+# Metrics
+# ---------------------------------------------------------------------------
+
+def compute_metrics(claims, title_to_idx, beliefs):
+    """Compute all Tier 1 and Tier 2 metrics."""
+    total = len(claims)
+    results = {
+        "generated": datetime.now().isoformat(),
+        "date": TODAY,
+    }
+
+    # --- 1. Claim counts ---
+    by_domain = defaultdict(int)
+    for c in claims:
+        by_domain[c["domain"]] += 1
+
+    results["claims"] = {
+        "total": total,
+        "by_domain": dict(sorted(by_domain.items(), key=lambda x: -x[1])),
+    }
+
+    # --- 2. Confidence distribution ---
+    conf_dist = defaultdict(int)
+    conf_by_domain = defaultdict(lambda: defaultdict(int))
+    for c in claims:
+        conf_dist[c["confidence"]] += 1
+        conf_by_domain[c["domain"]][c["confidence"]] += 1
+
+    results["confidence_distribution"] = {
+        "overall": dict(conf_dist),
+        "by_domain": {d: dict(v) for d, v in conf_by_domain.items()},
+    }
+
+    # --- 3. Orphan ratio ---
+    orphans = []
+    for c in claims:
+        if len(c["incoming_links"]) == 0:
+            orphans.append({
+                "title": c["title"][:100],
+                "domain": c["domain"],
+                "outgoing_links": len(c["outgoing_links"]),
+            })
+
+    orphan_ratio = len(orphans) / max(total, 1)
+    results["orphan_ratio"] = {
+        "total_claims": total,
+        "orphans": len(orphans),
+        "ratio": round(orphan_ratio, 3),
+        "status": (
+            "healthy" if orphan_ratio < 0.10
+            else "warning" if orphan_ratio < 0.20
+            else "critical"
+        ),
+        "target": 0.10,
+        "sample_orphans": orphans[:10],
+    }
+
+    # --- 4. Cross-domain linkage density ---
+    total_links = 0
+    cross_domain_links = 0
+    unresolved_links = 0
+    cross_by_domain = defaultdict(lambda: {"total": 0, "cross": 0})
+
+    # Track reciprocal links (higher quality per Theseus)
+    reciprocal_count = 0
+
+    for c in claims:
+        for link in c["outgoing_links"]:
+            total_links += 1
+            cross_by_domain[c["domain"]]["total"] += 1
+
+            target_idx = title_to_idx.get(link.lower())
+            if target_idx is None:
+                unresolved_links += 1
+            else:
+                target = claims[target_idx]
+                if target["domain"] != c["domain"]:
+                    cross_domain_links += 1
+                    cross_by_domain[c["domain"]]["cross"] += 1
+                # Check reciprocity
+                if c["title"].lower() in [
+                    l.lower() for l in target["outgoing_links"]
+                ]:
+                    reciprocal_count += 1
+
+    cross_ratio = cross_domain_links / max(total_links, 1)
+    results["cross_domain_linkage"] = {
+        "total_links": total_links,
+        "cross_domain": cross_domain_links,
+        "ratio": round(cross_ratio, 3),
+        "reciprocal_links": reciprocal_count // 2,  # each pair counted twice
+        "unresolved_links": unresolved_links,
+        "status": "healthy" if cross_ratio >= 0.35 else "warning" if cross_ratio >= 0.15 else "critical",
+        "target": 0.35,
+        "by_domain": {
+            d: {
+                "total": v["total"],
+                "cross": v["cross"],
+                "ratio": round(v["cross"] / max(v["total"], 1), 3),
+            }
+            for d, v in cross_by_domain.items()
+        },
+    }
+
+    # --- 5. Source diversity (Tier 1 per Leo) ---
+    source_by_domain = defaultdict(set)
+    for c in claims:
+        if c["source"]:
+            source_by_domain[c["domain"]].add(c["source"][:100].strip())
+
+    source_diversity = {}
+    for domain in by_domain:
+        n_sources = len(source_by_domain.get(domain, set()))
+        n_claims = by_domain[domain]
+        ratio = round(n_sources / max(n_claims, 1), 3)
+        source_diversity[domain] = {
+            "unique_sources": n_sources,
+            "total_claims": n_claims,
+            "ratio": ratio,
+            "status": "healthy" if ratio >= 0.3 else "warning",
+        }
+
+    results["source_diversity"] = source_diversity
+
+    # --- 6. Evidence freshness ---
+    ages = []
+    stale = []
+    fast_domains = {"health", "ai-alignment", "internet-finance", "entertainment"}
+
+    for c in claims:
+        if c["created"]:
+            try:
+                created = datetime.strptime(c["created"], "%Y-%m-%d").date()
+                age = (date.today() - created).days
+                ages.append(age)
+                threshold = 180 if c["domain"] in fast_domains else 365
+                if age > threshold:
+                    stale.append({
+                        "title": c["title"][:80],
+                        "domain": c["domain"],
+                        "age_days": age,
+                    })
+            except ValueError:
+                pass
+
+    results["evidence_freshness"] = {
+        "median_age_days": sorted(ages)[len(ages) // 2] if ages else None,
+        "mean_age_days": round(sum(ages) / len(ages), 1) if ages else None,
+        "stale_count": len(stale),
+        "total_with_dates": len(ages),
+        "stale_claims": stale[:10],
+    }
+
+    # --- 7. Belief grounding depth ---
+    results["belief_grounding"] = beliefs
+
+    # --- 8. Challenge coverage ---
+    likely_proven = [c for c in claims if c["confidence"] in ("likely", "proven")]
+    has_challenge = 0
+    for c in likely_proven:
+        body_lower = c["body"].lower()
+        if any(
+            marker in body_lower
+            for marker in ["challenged_by", "counter-evidence", "counter:", "challenges considered"]
+        ):
+            has_challenge += 1
+
+    challenge_ratio = has_challenge / max(len(likely_proven), 1)
+    results["challenge_coverage"] = {
+        "likely_proven_claims": len(likely_proven),
+        "with_challenges": has_challenge,
+        "ratio": round(challenge_ratio, 3),
+        "status": "healthy" if challenge_ratio >= 0.25 else "warning",
+        "target": 0.25,
+    }
+
+    # --- 9. Most-linked claims (centrality, from incoming links) ---
+    centrality = sorted(claims, key=lambda c: len(c["incoming_links"]), reverse=True)
+    results["most_central_claims"] = [
+        {
+            "title": c["title"][:100],
+            "domain": c["domain"],
+            "incoming_links": len(c["incoming_links"]),
+        }
+        for c in centrality[:10]
+    ]
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Report formatting
+# ---------------------------------------------------------------------------
+
+def format_report(results):
+    """Format results as readable markdown."""
+    lines = []
+    lines.append("# Teleo Codex — Knowledge Base Health Assessment")
+    lines.append(f"*Generated: {results['generated']}*")
+    lines.append("")
+
+    # Claims
+    c = results["claims"]
+    lines.append(f"## 1. Claim Inventory — {c['total']} total")
+    lines.append("")
+    lines.append("| Domain | Claims |")
+    lines.append("|--------|--------|")
+    for domain, count in c["by_domain"].items():
+        lines.append(f"| {domain} | {count} |")
+    lines.append("")
+
+    # Confidence
+    cd = results["confidence_distribution"]
+    lines.append("## 2. Confidence Distribution")
+    lines.append("")
+    lines.append("| Domain | proven | likely | experimental | speculative |")
+    lines.append("|--------|--------|--------|-------------|-------------|")
+    for domain, dist in cd["by_domain"].items():
+        lines.append(
+            f"| {domain} | {dist.get('proven',0)} | {dist.get('likely',0)} "
+            f"| {dist.get('experimental',0)} | {dist.get('speculative',0)} |"
+        )
+    lines.append("")
+
+    # Orphans
+    o = results["orphan_ratio"]
+    lines.append(f"## 3. Orphan Ratio — {o['status'].upper()}")
+    lines.append(
+        f"**{o['orphans']}/{o['total_claims']} claims are orphans "
+        f"({o['ratio']:.1%})** — target: <{o['target']:.0%}"
+    )
+    lines.append("")
+
+    # Cross-domain
+    cl = results["cross_domain_linkage"]
+    lines.append(f"## 4. Cross-Domain Linkage — {cl['status'].upper()}")
+    lines.append(
+        f"**{cl['cross_domain']}/{cl['total_links']} links cross domain boundaries "
+        f"({cl['ratio']:.1%})** — target: >{cl['target']:.0%}"
+    )
+    lines.append(f"Reciprocal link pairs: {cl['reciprocal_links']}")
+    lines.append(f"Unresolved links: {cl['unresolved_links']}")
+    lines.append("")
+    lines.append("| Domain | Total links | Cross-domain | Ratio |")
+    lines.append("|--------|------------|-------------|-------|")
+    for domain, v in sorted(cl["by_domain"].items(), key=lambda x: -x[1]["total"]):
+        lines.append(f"| {domain} | {v['total']} | {v['cross']} | {v['ratio']:.1%} |")
+    lines.append("")
+
+    # Source diversity
+    sd = results["source_diversity"]
+    lines.append("## 5. Source Diversity")
+    lines.append("")
+    lines.append("| Domain | Unique sources | Claims | Ratio | Status |")
+    lines.append("|--------|---------------|--------|-------|--------|")
+    for domain, v in sorted(sd.items(), key=lambda x: x[1]["ratio"]):
+        lines.append(
+            f"| {domain} | {v['unique_sources']} | {v['total_claims']} "
+            f"| {v['ratio']:.2f} | {v['status']} |"
+        )
+    lines.append("")
+
+    # Evidence freshness
+    ef = results["evidence_freshness"]
+    lines.append("## 6. Evidence Freshness")
+    lines.append(
+        f"**Median claim age: {ef['median_age_days']} days "
+        f"| Mean: {ef['mean_age_days']} days**"
+    )
+    lines.append(f"Stale claims: {ef['stale_count']}")
+    lines.append("")
+
+    # Belief grounding
+    bg = results["belief_grounding"]
+    lines.append("## 7. Belief Grounding Depth")
+    lines.append("")
+    lines.append("| Agent | Beliefs | Total grounding links | Avg per belief |")
+    lines.append("|-------|---------|---------------------|----------------|")
+    for agent, v in sorted(bg.items()):
+        lines.append(
+            f"| {agent} | {v['count']} | {v['total_grounding_links']} "
+            f"| {v['avg_grounding']} |"
+        )
+    lines.append("")
+
+    # Challenge coverage
+    cc = results["challenge_coverage"]
+    lines.append(f"## 8. Challenge Coverage — {cc['status'].upper()}")
+    lines.append(
+        f"**{cc['with_challenges']}/{cc['likely_proven_claims']} likely/proven claims "
+        f"acknowledge counter-evidence ({cc['ratio']:.1%})** — target: >{cc['target']:.0%}"
+    )
+    lines.append("")
+
+    # Most central
+    mc = results["most_central_claims"]
+    lines.append("## 9. Most Central Claims (by incoming links)")
+    lines.append("")
+    lines.append("| Claim | Domain | Incoming |")
+    lines.append("|-------|--------|----------|")
+    for item in mc:
+        lines.append(f"| {item['title'][:70]}... | {item['domain']} | {item['incoming_links']} |")
+    lines.append("")
+
+    # Automation note
+    lines.append("---")
+    lines.append("")
+    lines.append("*Automate more of this over time: daily VPS cron, belief drift detection,")
+    lines.append("reasoning chain depth, weekly digest template. See agents/vida/musings/kb-health-assessment-design.md.*")
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    repo_root = os.environ.get("REPO_ROOT", ".")
+    output_dir = os.environ.get("OUTPUT_DIR", os.path.join(repo_root, "agents", "vida", "musings"))
+    metrics_dir = os.environ.get("METRICS_DIR", None)
+
+    # Build index + compute
+    claims, title_to_idx = build_claim_index(repo_root)
+    beliefs = parse_beliefs(repo_root)
+    results = compute_metrics(claims, title_to_idx, beliefs)
+
+    # Strip body from claims before serializing (too large for JSON output)
+    for c in claims:
+        c.pop("body", None)
+        c.pop("prose_links", None)
+        c.pop("footer_links", None)
+
+    # Write claim-index (runtime cache)
+    index_output = {
+        "generated": results["generated"],
+        "total_claims": len(claims),
+        "claims": claims,
+    }
+
+    # Write outputs
+    report_md = format_report(results)
+
+    if metrics_dir:
+        # VPS mode: write to metrics directory
+        os.makedirs(os.path.join(metrics_dir, "daily-evolution"), exist_ok=True)
+        snapshot_path = os.path.join(metrics_dir, "daily-evolution", f"{TODAY}.json")
+        index_path = os.path.join(metrics_dir, "claim-index.json")
+
+        with open(snapshot_path, "w") as f:
+            json.dump(results, f, indent=2)
+        with open(index_path, "w") as f:
+            json.dump(index_output, f, indent=2)
+
+        print(f"Snapshot written to {snapshot_path}", file=sys.stderr)
+        print(f"Index written to {index_path}", file=sys.stderr)
+
+    # Always write markdown report to stdout
+    print(report_md)