feat(claims): /api/claims/{slug} canonical detail endpoint

Implements Ship's claim detail contract — one round-trip, all data resolved server-side. Replaces thin domain-only stub with full tree walk (domains/ + foundations/ + core/), DB joins for PRs and reviews, and server-side wikilink resolution to eliminate frontend N+1 cascades. Response shape (Ship brief 2026-04-29): slug, title, domain, secondary_domains, confidence, description, created, last_review, body (raw markdown), sourced_from, reviews, prs, edges {supports,challenges,related,depends_on}, wikilinks Wikilink resolution: - Builds title→stem index from frontmatter title field, fallback to filename stem normalized via _normalize_for_match - Returns flat {link_text: slug_or_null} map; unresolved → null so frontend can render plain text - Inline normalization (lowercase, hyphen↔space, collapse whitespace, strip punctuation). Note: lib/attribution.py exposes only normalize_handle today, not the title normalizer Ship referenced. If a canonical helper lands later, point at it. Caches: - title→slug index: 60s TTL (warm cache <20ms p50 verified) - list endpoint: 5min TTL (preserved from prior) - Cold: ~3.3s for tree walk of 1,866 files; warm: 13-17ms Bug fixed in second pass: - _resolve_sourced_from defaulted title="" which leaked LIKE '%%' matching every PR. Now requires non-empty title+stem; handler falls back to slug.replace("-"," ") when frontmatter title is missing. Verified live on VPS: - AI diagnostic triage claim (no fm.title): sourced_from=1, prs=0 (correct — Feb claim, pre-description-tracking) - Recent extract PR claim: sourced_from=1 with URL, prs=1, reviews=1, last_review populated, edges 3 supports + 7 related, wikilinks 0 - 404 on missing slug: correct - Claim with [[maps/...]] wikilink: 5/6 resolved (correct null on map) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 17:37:26 +01:00 · 2026-05-09 17:37:26 +01:00 · 0eb26327fc
commit 0eb26327fc
parent fc002354d4
1 changed files with 428 additions and 79 deletions
--- a/diagnostics/claims_api.py
+++ b/diagnostics/claims_api.py
@ -1,29 +1,343 @@
-"""Claims API endpoint — serves claim data from the codex filesystem."""
+"""Claims API — list endpoint + canonical claim detail page.
-import os
+
 Owner: Argus
 Routes:
  GET /api/claims              — list/filter (frontmatter scan, lightweight)
  GET /api/claims/{slug}       — full claim detail (Ship contract)
  GET /api/domains             — domain rollups for sidebar
 The detail endpoint is the canonical /claims/{slug} backend per Ship's
 2026-04-29 brief. One round-trip, no N+1 cascade. Wikilinks resolved
 server-side via title→slug index built from a tree walk.
 """
 import json
 import re
 import sqlite3
 import time
 import yaml
 from pathlib import Path
 import yaml
 from aiohttp import web
-CODEX_ROOT = Path("/opt/teleo-eval/workspaces/main/domains")
+# Codex tree roots — claims live in three places (Sourcer Apr 26 fix scope)
-_cache = {"data": None, "ts": 0}
+CODEX_BASE = Path("/opt/teleo-eval/workspaces/main")
-CACHE_TTL = 300  # 5 minutes
+CLAIM_TREES = [CODEX_BASE / "domains", CODEX_BASE / "foundations", CODEX_BASE / "core"]
-def _parse_frontmatter(filepath):
+# pipeline.db for joins (review_records, prs, sources)
 DB_PATH = "/opt/teleo-eval/pipeline/pipeline.db"
 # In-process caches
 _list_cache = {"data": None, "ts": 0}
 _LIST_CACHE_TTL = 300  # 5 min — list view tolerates staleness
 _index_cache = {"by_title": None, "by_stem": None, "ts": 0}
 _INDEX_CACHE_TTL = 60  # 1 min — title→slug index for wikilink resolution
 CORS_HEADERS = {"Access-Control-Allow-Origin": "*"}
 # Wikilink pattern. [[text]] or [[text|alias]] — we keep the link text only.
 _WIKILINK_RE = re.compile(r"\[\[([^\]|#]+?)(?:[#|][^\]]*)?\]\]")
 # ─── Normalization ─────────────────────────────────────────────────────────
 def _normalize_for_match(s):
    """Collapse a title or slug to a comparable form.
    Rules (from Ship's brief — match the link-fixer canonicalization):
      - lowercase
      - hyphen ↔ space tolerant (both → single space)
      - collapse runs of whitespace
      - strip leading/trailing whitespace
      - drop trailing punctuation that gets stripped from filenames
        (`.`, `?`, `!`, `:`, `--`)
    NOTE: lib/attribution.py exposes only normalize_handle today, not the
    title normalizer Ship referenced. Implementing inline; if a canonical
    helper lands later we point at it.
    """
    if not s:
        return ""
    s = str(s).lower().strip()
    # Treat hyphens as spaces, then collapse whitespace runs
    s = s.replace("-", " ").replace("_", " ")
    s = re.sub(r"\s+", " ", s)
    # Strip ASCII punctuation that filenames drop
    s = re.sub(r"[^\w\s]", "", s)
    return s.strip()
 # ─── Frontmatter parse ─────────────────────────────────────────────────────
 def _split_frontmatter(text):
    """Return (frontmatter_dict, body_str) or (None, None) if not a claim file."""
    if not text.startswith("---"):
        return None, None
    try:
        end = text.index("\n---", 3)
    except ValueError:
        return None, None
    try:
        fm = yaml.safe_load(text[3:end])
    except Exception:
        return None, None
    if not isinstance(fm, dict):
        return None, None
    body = text[end + 4:].lstrip()
    return fm, body
 def _read_claim_file(filepath):
    """Read a claim file from disk. Returns (frontmatter, body) or (None, None)."""
    try:
        text = filepath.read_text(encoding="utf-8")
-        if not text.startswith("---"):
+    except (OSError, UnicodeDecodeError):
-            return None
+        return None, None
-        end = text.index("---", 3)
+    return _split_frontmatter(text)
-        fm = yaml.safe_load(text[3:end])
+
 # ─── Tree walk + indexing ──────────────────────────────────────────────────
 def _walk_claim_files():
    """Yield Path objects for every .md claim file in domains/, foundations/, core/."""
    for root in CLAIM_TREES:
        if not root.exists():
            continue
        for f in root.rglob("*.md"):
            if f.name == "_map.md":
                continue
            yield f
 def _build_indexes():
    """Build (title→stem, stem→relpath) indexes for wikilink resolution.
    Cached for _INDEX_CACHE_TTL. Pulls from claim-index endpoint when
    possible (already cached upstream) and falls back to filesystem walk.
    """
    now = time.time()
    if _index_cache["by_title"] is not None and now - _index_cache["ts"] < _INDEX_CACHE_TTL:
        return _index_cache["by_title"], _index_cache["by_stem"]
    by_title = {}
    by_stem = {}
    for f in _walk_claim_files():
        stem = f.stem
        rel = str(f.relative_to(CODEX_BASE))
        by_stem[stem] = rel
        # Index by stem-as-normalized too (covers wikilinks that use the slug)
        by_title[_normalize_for_match(stem)] = stem
        # Also try parsing the title from frontmatter for higher-fidelity matches
        fm, _ = _read_claim_file(f)
        if fm:
            title = fm.get("title")
            if title:
                key = _normalize_for_match(title)
                if key and key not in by_title:
                    by_title[key] = stem
    _index_cache["by_title"] = by_title
    _index_cache["by_stem"] = by_stem
    _index_cache["ts"] = now
    return by_title, by_stem
 def _resolve_wikilinks(body, by_title):
    """Extract [[link]] occurrences from body, return {link_text: slug_or_null}."""
    out = {}
    for match in _WIKILINK_RE.finditer(body or ""):
        link_text = match.group(1).strip()
        if not link_text or link_text in out:
            continue
        norm = _normalize_for_match(link_text)
        out[link_text] = by_title.get(norm)
    return out
 # ─── Edge extraction from frontmatter ──────────────────────────────────────
 _EDGE_FIELDS = {
    "supports": "supports",
    "challenges": "challenges",
    "challenged_by": "challenges",  # canonical: store as challenges direction
    "related": "related",
    "related_claims": "related",
    "depends_on": "depends_on",
 }
 def _extract_edges(fm, by_title, by_stem):
    """Return edges dict shaped per Ship's contract.
    Each edge is {slug, title, exists}. Slug resolved through title index.
    """
    edges = {"supports": [], "challenges": [], "related": [], "depends_on": []}
    for fm_key, edge_kind in _EDGE_FIELDS.items():
        raw = fm.get(fm_key)
        if not raw:
            continue
        items = raw if isinstance(raw, list) else [raw]
        for item in items:
            if not isinstance(item, str):
                continue
            text = item.strip()
            # Strip wikilink wrapping if present
            text = re.sub(r"^\[\[|\]\]$", "", text)
            # Strip pipe annotations: "[[link|alias]]" style or "claim | edge_type | date"
            text = text.split("|")[0].strip()
            if not text:
                continue
            # Try title match first, fall back to stem match
            slug = by_title.get(_normalize_for_match(text))
            if not slug and text in by_stem:
                slug = text
            edges[edge_kind].append({
                "slug": slug,
                "title": text,
                "exists": slug is not None,
            })
    return edges
 # ─── Source provenance ─────────────────────────────────────────────────────
 def _resolve_sourced_from(conn, claim_filepath, fm, title, stem):
    """Build sourced_from list for the claim.
    Strategy: find PRs that produced this claim (via prs.description LIKE
    or branch slug match), look at prs.source_path → inbox archive file →
    parse that source's frontmatter for title/url. Falls back to the raw
    `source` string from the claim's own frontmatter.
    Both `title` and `stem` must be non-empty — caller (handler) already
    falls back stem→title; passing empty values would leak `LIKE '%%'`
    and match unrelated PRs.
    """
    out = []
    seen_paths = set()
    pr_rows = []
    if (title or "").strip() and (stem or "").strip():
        try:
            pr_rows = conn.execute(
                """SELECT DISTINCT source_path
                   FROM prs
                   WHERE source_path IS NOT NULL AND source_path != ''
                   AND (description LIKE ? OR branch LIKE ?)
                   LIMIT 10""",
                (f"%{title}%", f"%{stem}%"),
            ).fetchall()
        except sqlite3.OperationalError:
            pr_rows = []
    for row in pr_rows:
        path = row["source_path"]
        if not path or path in seen_paths:
            continue
        seen_paths.add(path)
        out.append(_resolve_source_file(path))
    # 2. Fallback: parse raw source frontmatter field if no PR match
    if not out:
        raw = fm.get("source")
        if isinstance(raw, str) and raw.strip():
            out.append({"path": None, "title": raw.strip()[:200], "url": None})
    return out
 def _resolve_source_file(rel_path):
    """Given inbox/archive/... path, parse frontmatter for title+url. Best-effort."""
    full = CODEX_BASE / rel_path
    entry = {"path": rel_path, "title": None, "url": None}
    if full.exists():
        fm, _ = _read_claim_file(full)
        if fm:
            entry["title"] = fm.get("title") or fm.get("source") or rel_path
            entry["url"] = fm.get("url")
    if not entry["title"]:
        # Last resort: derive from filename
        entry["title"] = Path(rel_path).stem.replace("-", " ")
    return entry
 # ─── Reviews + PRs ─────────────────────────────────────────────────────────
 def _load_pr_history(conn, title, stem):
    """Find PRs that touched this claim and their reviews.
    Both title and stem must be non-empty strings — empty leaks `LIKE '%%'`
    which matches every PR. Handler already populates a fallback so this
    is a defense-in-depth guard.
    """
    if not (title or "").strip() or not (stem or "").strip():
        return [], []
    try:
        pr_rows = conn.execute(
            """SELECT number, merged_at, commit_type, agent, branch, status
               FROM prs
               WHERE merged_at IS NOT NULL
               AND (description LIKE ? OR branch LIKE ?)
               ORDER BY merged_at ASC
               LIMIT 50""",
            (f"%{title}%", f"%{stem}%"),
        ).fetchall()
    except sqlite3.OperationalError:
        return [], []
    prs = [
        {
            "number": r["number"],
            "merged_at": r["merged_at"],
            "kind": r["commit_type"] or "unknown",
            "agent": r["agent"],
            "branch": r["branch"],
        }
        for r in pr_rows
    ]
    pr_numbers = [p["number"] for p in prs]
    if not pr_numbers:
        return prs, []
    placeholders = ",".join("?" * len(pr_numbers))
    try:
        review_rows = conn.execute(
            f"""SELECT pr_number, reviewer, reviewer_model, outcome,
                       rejection_reason, notes, reviewed_at
                FROM review_records
                WHERE pr_number IN ({placeholders})
                ORDER BY reviewed_at ASC""",
            pr_numbers,
        ).fetchall()
    except sqlite3.OperationalError:
        review_rows = []
    reviews = [
        {
            "pr_number": r["pr_number"],
            "reviewer": r["reviewer"],
            "model": r["reviewer_model"],
            "outcome": r["outcome"],
            "rejection_reason": r["rejection_reason"],
            "notes": r["notes"],
            "reviewed_at": r["reviewed_at"],
        }
        for r in review_rows
    ]
    return prs, reviews
 # ─── List view (preserved) ─────────────────────────────────────────────────
 def _parse_list_entry(filepath):
    fm, body = _read_claim_file(filepath)
    if not fm or fm.get("type") != "claim":
        return None
-        body = text[end+3:].strip()
+    links = _WIKILINK_RE.findall(body or "")
-        # Count wiki-links
+    paragraphs = [p.strip() for p in (body or "").split("\n\n")
-        links = re.findall(r"\[\[([^\]]+)\]\]", body)
+                  if p.strip() and not p.strip().startswith("#")]
        # Extract first paragraph as summary
        paragraphs = [p.strip() for p in body.split("\n\n") if p.strip() and not p.strip().startswith("#")]
    summary = paragraphs[0][:300] if paragraphs else ""
    return {
        "slug": filepath.stem,
@ -40,40 +354,32 @@ def _parse_frontmatter(filepath):
        "challenged_by": fm.get("challenged_by"),
        "related_claims": fm.get("related_claims", []),
    }
    except Exception:
        return None
-def _load_all_claims():
+def _load_all_claims_list():
    now = time.time()
-    if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
+    if _list_cache["data"] and now - _list_cache["ts"] < _LIST_CACHE_TTL:
-        return _cache["data"]
+        return _list_cache["data"]
    claims = []
-    for domain_dir in sorted(CODEX_ROOT.iterdir()):
+    for f in _walk_claim_files():
-        if not domain_dir.is_dir():
+        entry = _parse_list_entry(f)
-            continue
+        if entry:
-        for f in sorted(domain_dir.glob("*.md")):
+            claims.append(entry)
-            if f.name == "_map.md":
+    _list_cache["data"] = claims
-                continue
+    _list_cache["ts"] = now
            c = _parse_frontmatter(f)
            if c:
                claims.append(c)
    _cache["data"] = claims
    _cache["ts"] = now
    return claims
-async def handle_claims(request):
+# ─── Handlers ──────────────────────────────────────────────────────────────
-    claims = _load_all_claims()
+
 async def handle_claims(request):
    claims = _load_all_claims_list()
    # Filters
    domain = request.query.get("domain")
    search = request.query.get("q", "").lower()
    confidence = request.query.get("confidence")
    agent = request.query.get("agent")
-    sort = request.query.get("sort", "recent")  # recent, alpha, domain
+    sort = request.query.get("sort", "recent")
    filtered = claims
    if domain:
@ -83,9 +389,9 @@ async def handle_claims(request):
    if agent:
        filtered = [c for c in filtered if c["agent"] == agent]
    if search:
-        filtered = [c for c in filtered if search in c["title"].lower() or search in c["summary"].lower()]
+        filtered = [c for c in filtered
                    if search in c["title"].lower() or search in c["summary"].lower()]
    # Sort
    if sort == "recent":
        filtered.sort(key=lambda c: c["created"], reverse=True)
    elif sort == "alpha":
@ -93,12 +399,10 @@ async def handle_claims(request):
    elif sort == "domain":
        filtered.sort(key=lambda c: (c["domain"], c["title"].lower()))
    # Pagination
    limit = min(int(request.query.get("limit", "50")), 200)
    offset = int(request.query.get("offset", "0"))
    page = filtered[offset:offset + limit]
    # Domain counts for sidebar
    domain_counts = {}
    for c in claims:
        domain_counts[c["domain"]] = domain_counts.get(c["domain"], 0) + 1
@ -111,31 +415,78 @@ async def handle_claims(request):
        "domains": dict(sorted(domain_counts.items(), key=lambda x: -x[1])),
        "confidence_levels": sorted(set(c["confidence"] for c in claims)),
        "agents": sorted(set(c["agent"] for c in claims if c["agent"])),
-    }, headers={"Access-Control-Allow-Origin": "*"})
+    }, headers=CORS_HEADERS)
 async def handle_claim_detail(request):
    """GET /api/claims/{slug} — canonical claim detail page (Ship contract).
    One round-trip, all data resolved server-side. Wikilinks pre-resolved.
    """
    slug = request.match_info["slug"]
-    claims = _load_all_claims()
+    by_title, by_stem = _build_indexes()
-    for c in claims:
+
-        if c["slug"] == slug:
+    rel_path = by_stem.get(slug)
-            # Read full body for detail view
+    if not rel_path:
-            for domain_dir in CODEX_ROOT.iterdir():
+        return web.json_response({"error": "claim not found", "slug": slug},
-                if not domain_dir.is_dir():
+                                 status=404, headers=CORS_HEADERS)
-                    continue
+
-                f = domain_dir / f"{slug}.md"
+    filepath = CODEX_BASE / rel_path
-                if f.exists():
+    fm, body = _read_claim_file(filepath)
-                    text = f.read_text(encoding="utf-8")
+    if not fm:
-                    end = text.index("---", 3)
+        return web.json_response({"error": "frontmatter parse failed", "slug": slug},
-                    body = text[end+3:].strip()
+                                 status=500, headers=CORS_HEADERS)
-                    c["body"] = body
+
-                    break
+    # Open read-only DB connection for this request
-            return web.json_response(c, headers={"Access-Control-Allow-Origin": "*"})
+    conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True)
-    return web.json_response({"error": "claim not found"}, status=404)
+    conn.row_factory = sqlite3.Row
    try:
        title = fm.get("title") or slug.replace("-", " ")
        prs, reviews = _load_pr_history(conn, title, slug)
        sourced_from = _resolve_sourced_from(conn, filepath, fm, title, slug)
    finally:
        conn.close()
    last_review = None
    if reviews:
        latest = reviews[-1]
        last_review = {
            "outcome": latest["outcome"],
            "reviewer": latest["reviewer"],
            "date": (latest["reviewed_at"] or "")[:10],
        }
    # secondary_domains: explicit list, or empty
    secondary = fm.get("secondary_domains") or fm.get("cross_domain_links") or []
    if isinstance(secondary, str):
        secondary = [secondary]
    description = fm.get("description") or ""
    edges = _extract_edges(fm, by_title, by_stem)
    wikilinks = _resolve_wikilinks(body, by_title)
    response = {
        "slug": slug,
        "title": title,
        "domain": fm.get("domain", "unknown"),
        "secondary_domains": secondary,
        "confidence": fm.get("confidence", "unknown"),
        "description": description,
        "created": str(fm.get("created", "")),
        "last_review": last_review,
        "body": body or "",
        "sourced_from": sourced_from,
        "reviews": reviews,
        "prs": prs,
        "edges": edges,
        "wikilinks": wikilinks,
    }
    return web.json_response(response, headers=CORS_HEADERS)
 async def handle_domains(request):
-    claims = _load_all_claims()
+    claims = _load_all_claims_list()
    domains = {}
    for c in claims:
        d = c["domain"]
@ -146,13 +497,11 @@ async def handle_domains(request):
            domains[d]["agents"].add(c["agent"])
        conf = c["confidence"]
        domains[d]["confidence_dist"][conf] = domains[d]["confidence_dist"].get(conf, 0) + 1
    result = []
    for d in sorted(domains.values(), key=lambda x: -x["count"]):
        d["agents"] = sorted(d["agents"])
        result.append(d)
-
+    return web.json_response(result, headers=CORS_HEADERS)
    return web.json_response(result, headers={"Access-Control-Allow-Origin": "*"})
 def register_claims_routes(app):