feat(claims): /api/claims/{slug} canonical detail endpoint
Some checks are pending
CI / lint-and-test (push) Waiting to run

Implements Ship's claim detail contract — one round-trip, all data
resolved server-side. Replaces thin domain-only stub with full tree walk
(domains/ + foundations/ + core/), DB joins for PRs and reviews, and
server-side wikilink resolution to eliminate frontend N+1 cascades.

Response shape (Ship brief 2026-04-29):
  slug, title, domain, secondary_domains, confidence, description,
  created, last_review, body (raw markdown), sourced_from, reviews,
  prs, edges {supports,challenges,related,depends_on}, wikilinks

Wikilink resolution:
- Builds title→stem index from frontmatter title field, fallback to
  filename stem normalized via _normalize_for_match
- Returns flat {link_text: slug_or_null} map; unresolved → null so
  frontend can render plain text
- Inline normalization (lowercase, hyphen↔space, collapse whitespace,
  strip punctuation). Note: lib/attribution.py exposes only
  normalize_handle today, not the title normalizer Ship referenced.
  If a canonical helper lands later, point at it.

Caches:
- title→slug index: 60s TTL (warm cache <20ms p50 verified)
- list endpoint: 5min TTL (preserved from prior)
- Cold: ~3.3s for tree walk of 1,866 files; warm: 13-17ms

Bug fixed in second pass:
- _resolve_sourced_from defaulted title="" which leaked LIKE '%%'
  matching every PR. Now requires non-empty title+stem; handler falls
  back to slug.replace("-"," ") when frontmatter title is missing.

Verified live on VPS:
- AI diagnostic triage claim (no fm.title): sourced_from=1, prs=0
  (correct — Feb claim, pre-description-tracking)
- Recent extract PR claim: sourced_from=1 with URL, prs=1, reviews=1,
  last_review populated, edges 3 supports + 7 related, wikilinks 0
- 404 on missing slug: correct
- Claim with [[maps/...]] wikilink: 5/6 resolved (correct null on map)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
m3taversal 2026-05-09 17:37:26 +01:00
parent fc002354d4
commit 0eb26327fc

View file

@ -1,29 +1,343 @@
"""Claims API endpoint — serves claim data from the codex filesystem.""" """Claims API — list endpoint + canonical claim detail page.
import os
Owner: Argus
Routes:
GET /api/claims list/filter (frontmatter scan, lightweight)
GET /api/claims/{slug} full claim detail (Ship contract)
GET /api/domains domain rollups for sidebar
The detail endpoint is the canonical /claims/{slug} backend per Ship's
2026-04-29 brief. One round-trip, no N+1 cascade. Wikilinks resolved
server-side via titleslug index built from a tree walk.
"""
import json
import re import re
import sqlite3
import time import time
import yaml
from pathlib import Path from pathlib import Path
import yaml
from aiohttp import web from aiohttp import web
CODEX_ROOT = Path("/opt/teleo-eval/workspaces/main/domains") # Codex tree roots — claims live in three places (Sourcer Apr 26 fix scope)
_cache = {"data": None, "ts": 0} CODEX_BASE = Path("/opt/teleo-eval/workspaces/main")
CACHE_TTL = 300 # 5 minutes CLAIM_TREES = [CODEX_BASE / "domains", CODEX_BASE / "foundations", CODEX_BASE / "core"]
def _parse_frontmatter(filepath): # pipeline.db for joins (review_records, prs, sources)
DB_PATH = "/opt/teleo-eval/pipeline/pipeline.db"
# In-process caches
_list_cache = {"data": None, "ts": 0}
_LIST_CACHE_TTL = 300 # 5 min — list view tolerates staleness
_index_cache = {"by_title": None, "by_stem": None, "ts": 0}
_INDEX_CACHE_TTL = 60 # 1 min — title→slug index for wikilink resolution
CORS_HEADERS = {"Access-Control-Allow-Origin": "*"}
# Wikilink pattern. [[text]] or [[text|alias]] — we keep the link text only.
_WIKILINK_RE = re.compile(r"\[\[([^\]|#]+?)(?:[#|][^\]]*)?\]\]")
# ─── Normalization ─────────────────────────────────────────────────────────
def _normalize_for_match(s):
"""Collapse a title or slug to a comparable form.
Rules (from Ship's brief — match the link-fixer canonicalization):
- lowercase
- hyphen space tolerant (both single space)
- collapse runs of whitespace
- strip leading/trailing whitespace
- drop trailing punctuation that gets stripped from filenames
(`.`, `?`, `!`, `:`, `--`)
NOTE: lib/attribution.py exposes only normalize_handle today, not the
title normalizer Ship referenced. Implementing inline; if a canonical
helper lands later we point at it.
"""
if not s:
return ""
s = str(s).lower().strip()
# Treat hyphens as spaces, then collapse whitespace runs
s = s.replace("-", " ").replace("_", " ")
s = re.sub(r"\s+", " ", s)
# Strip ASCII punctuation that filenames drop
s = re.sub(r"[^\w\s]", "", s)
return s.strip()
# ─── Frontmatter parse ─────────────────────────────────────────────────────
def _split_frontmatter(text):
"""Return (frontmatter_dict, body_str) or (None, None) if not a claim file."""
if not text.startswith("---"):
return None, None
try:
end = text.index("\n---", 3)
except ValueError:
return None, None
try:
fm = yaml.safe_load(text[3:end])
except Exception:
return None, None
if not isinstance(fm, dict):
return None, None
body = text[end + 4:].lstrip()
return fm, body
def _read_claim_file(filepath):
"""Read a claim file from disk. Returns (frontmatter, body) or (None, None)."""
try: try:
text = filepath.read_text(encoding="utf-8") text = filepath.read_text(encoding="utf-8")
if not text.startswith("---"): except (OSError, UnicodeDecodeError):
return None return None, None
end = text.index("---", 3) return _split_frontmatter(text)
fm = yaml.safe_load(text[3:end])
# ─── Tree walk + indexing ──────────────────────────────────────────────────
def _walk_claim_files():
"""Yield Path objects for every .md claim file in domains/, foundations/, core/."""
for root in CLAIM_TREES:
if not root.exists():
continue
for f in root.rglob("*.md"):
if f.name == "_map.md":
continue
yield f
def _build_indexes():
"""Build (title→stem, stem→relpath) indexes for wikilink resolution.
Cached for _INDEX_CACHE_TTL. Pulls from claim-index endpoint when
possible (already cached upstream) and falls back to filesystem walk.
"""
now = time.time()
if _index_cache["by_title"] is not None and now - _index_cache["ts"] < _INDEX_CACHE_TTL:
return _index_cache["by_title"], _index_cache["by_stem"]
by_title = {}
by_stem = {}
for f in _walk_claim_files():
stem = f.stem
rel = str(f.relative_to(CODEX_BASE))
by_stem[stem] = rel
# Index by stem-as-normalized too (covers wikilinks that use the slug)
by_title[_normalize_for_match(stem)] = stem
# Also try parsing the title from frontmatter for higher-fidelity matches
fm, _ = _read_claim_file(f)
if fm:
title = fm.get("title")
if title:
key = _normalize_for_match(title)
if key and key not in by_title:
by_title[key] = stem
_index_cache["by_title"] = by_title
_index_cache["by_stem"] = by_stem
_index_cache["ts"] = now
return by_title, by_stem
def _resolve_wikilinks(body, by_title):
"""Extract [[link]] occurrences from body, return {link_text: slug_or_null}."""
out = {}
for match in _WIKILINK_RE.finditer(body or ""):
link_text = match.group(1).strip()
if not link_text or link_text in out:
continue
norm = _normalize_for_match(link_text)
out[link_text] = by_title.get(norm)
return out
# ─── Edge extraction from frontmatter ──────────────────────────────────────
_EDGE_FIELDS = {
"supports": "supports",
"challenges": "challenges",
"challenged_by": "challenges", # canonical: store as challenges direction
"related": "related",
"related_claims": "related",
"depends_on": "depends_on",
}
def _extract_edges(fm, by_title, by_stem):
"""Return edges dict shaped per Ship's contract.
Each edge is {slug, title, exists}. Slug resolved through title index.
"""
edges = {"supports": [], "challenges": [], "related": [], "depends_on": []}
for fm_key, edge_kind in _EDGE_FIELDS.items():
raw = fm.get(fm_key)
if not raw:
continue
items = raw if isinstance(raw, list) else [raw]
for item in items:
if not isinstance(item, str):
continue
text = item.strip()
# Strip wikilink wrapping if present
text = re.sub(r"^\[\[|\]\]$", "", text)
# Strip pipe annotations: "[[link|alias]]" style or "claim | edge_type | date"
text = text.split("|")[0].strip()
if not text:
continue
# Try title match first, fall back to stem match
slug = by_title.get(_normalize_for_match(text))
if not slug and text in by_stem:
slug = text
edges[edge_kind].append({
"slug": slug,
"title": text,
"exists": slug is not None,
})
return edges
# ─── Source provenance ─────────────────────────────────────────────────────
def _resolve_sourced_from(conn, claim_filepath, fm, title, stem):
"""Build sourced_from list for the claim.
Strategy: find PRs that produced this claim (via prs.description LIKE
or branch slug match), look at prs.source_path inbox archive file
parse that source's frontmatter for title/url. Falls back to the raw
`source` string from the claim's own frontmatter.
Both `title` and `stem` must be non-empty caller (handler) already
falls back stemtitle; passing empty values would leak `LIKE '%%'`
and match unrelated PRs.
"""
out = []
seen_paths = set()
pr_rows = []
if (title or "").strip() and (stem or "").strip():
try:
pr_rows = conn.execute(
"""SELECT DISTINCT source_path
FROM prs
WHERE source_path IS NOT NULL AND source_path != ''
AND (description LIKE ? OR branch LIKE ?)
LIMIT 10""",
(f"%{title}%", f"%{stem}%"),
).fetchall()
except sqlite3.OperationalError:
pr_rows = []
for row in pr_rows:
path = row["source_path"]
if not path or path in seen_paths:
continue
seen_paths.add(path)
out.append(_resolve_source_file(path))
# 2. Fallback: parse raw source frontmatter field if no PR match
if not out:
raw = fm.get("source")
if isinstance(raw, str) and raw.strip():
out.append({"path": None, "title": raw.strip()[:200], "url": None})
return out
def _resolve_source_file(rel_path):
"""Given inbox/archive/... path, parse frontmatter for title+url. Best-effort."""
full = CODEX_BASE / rel_path
entry = {"path": rel_path, "title": None, "url": None}
if full.exists():
fm, _ = _read_claim_file(full)
if fm:
entry["title"] = fm.get("title") or fm.get("source") or rel_path
entry["url"] = fm.get("url")
if not entry["title"]:
# Last resort: derive from filename
entry["title"] = Path(rel_path).stem.replace("-", " ")
return entry
# ─── Reviews + PRs ─────────────────────────────────────────────────────────
def _load_pr_history(conn, title, stem):
"""Find PRs that touched this claim and their reviews.
Both title and stem must be non-empty strings empty leaks `LIKE '%%'`
which matches every PR. Handler already populates a fallback so this
is a defense-in-depth guard.
"""
if not (title or "").strip() or not (stem or "").strip():
return [], []
try:
pr_rows = conn.execute(
"""SELECT number, merged_at, commit_type, agent, branch, status
FROM prs
WHERE merged_at IS NOT NULL
AND (description LIKE ? OR branch LIKE ?)
ORDER BY merged_at ASC
LIMIT 50""",
(f"%{title}%", f"%{stem}%"),
).fetchall()
except sqlite3.OperationalError:
return [], []
prs = [
{
"number": r["number"],
"merged_at": r["merged_at"],
"kind": r["commit_type"] or "unknown",
"agent": r["agent"],
"branch": r["branch"],
}
for r in pr_rows
]
pr_numbers = [p["number"] for p in prs]
if not pr_numbers:
return prs, []
placeholders = ",".join("?" * len(pr_numbers))
try:
review_rows = conn.execute(
f"""SELECT pr_number, reviewer, reviewer_model, outcome,
rejection_reason, notes, reviewed_at
FROM review_records
WHERE pr_number IN ({placeholders})
ORDER BY reviewed_at ASC""",
pr_numbers,
).fetchall()
except sqlite3.OperationalError:
review_rows = []
reviews = [
{
"pr_number": r["pr_number"],
"reviewer": r["reviewer"],
"model": r["reviewer_model"],
"outcome": r["outcome"],
"rejection_reason": r["rejection_reason"],
"notes": r["notes"],
"reviewed_at": r["reviewed_at"],
}
for r in review_rows
]
return prs, reviews
# ─── List view (preserved) ─────────────────────────────────────────────────
def _parse_list_entry(filepath):
fm, body = _read_claim_file(filepath)
if not fm or fm.get("type") != "claim": if not fm or fm.get("type") != "claim":
return None return None
body = text[end+3:].strip() links = _WIKILINK_RE.findall(body or "")
# Count wiki-links paragraphs = [p.strip() for p in (body or "").split("\n\n")
links = re.findall(r"\[\[([^\]]+)\]\]", body) if p.strip() and not p.strip().startswith("#")]
# Extract first paragraph as summary
paragraphs = [p.strip() for p in body.split("\n\n") if p.strip() and not p.strip().startswith("#")]
summary = paragraphs[0][:300] if paragraphs else "" summary = paragraphs[0][:300] if paragraphs else ""
return { return {
"slug": filepath.stem, "slug": filepath.stem,
@ -40,40 +354,32 @@ def _parse_frontmatter(filepath):
"challenged_by": fm.get("challenged_by"), "challenged_by": fm.get("challenged_by"),
"related_claims": fm.get("related_claims", []), "related_claims": fm.get("related_claims", []),
} }
except Exception:
return None
def _load_all_claims(): def _load_all_claims_list():
now = time.time() now = time.time()
if _cache["data"] and now - _cache["ts"] < CACHE_TTL: if _list_cache["data"] and now - _list_cache["ts"] < _LIST_CACHE_TTL:
return _cache["data"] return _list_cache["data"]
claims = [] claims = []
for domain_dir in sorted(CODEX_ROOT.iterdir()): for f in _walk_claim_files():
if not domain_dir.is_dir(): entry = _parse_list_entry(f)
continue if entry:
for f in sorted(domain_dir.glob("*.md")): claims.append(entry)
if f.name == "_map.md": _list_cache["data"] = claims
continue _list_cache["ts"] = now
c = _parse_frontmatter(f)
if c:
claims.append(c)
_cache["data"] = claims
_cache["ts"] = now
return claims return claims
async def handle_claims(request): # ─── Handlers ──────────────────────────────────────────────────────────────
claims = _load_all_claims()
async def handle_claims(request):
claims = _load_all_claims_list()
# Filters
domain = request.query.get("domain") domain = request.query.get("domain")
search = request.query.get("q", "").lower() search = request.query.get("q", "").lower()
confidence = request.query.get("confidence") confidence = request.query.get("confidence")
agent = request.query.get("agent") agent = request.query.get("agent")
sort = request.query.get("sort", "recent") # recent, alpha, domain sort = request.query.get("sort", "recent")
filtered = claims filtered = claims
if domain: if domain:
@ -83,9 +389,9 @@ async def handle_claims(request):
if agent: if agent:
filtered = [c for c in filtered if c["agent"] == agent] filtered = [c for c in filtered if c["agent"] == agent]
if search: if search:
filtered = [c for c in filtered if search in c["title"].lower() or search in c["summary"].lower()] filtered = [c for c in filtered
if search in c["title"].lower() or search in c["summary"].lower()]
# Sort
if sort == "recent": if sort == "recent":
filtered.sort(key=lambda c: c["created"], reverse=True) filtered.sort(key=lambda c: c["created"], reverse=True)
elif sort == "alpha": elif sort == "alpha":
@ -93,12 +399,10 @@ async def handle_claims(request):
elif sort == "domain": elif sort == "domain":
filtered.sort(key=lambda c: (c["domain"], c["title"].lower())) filtered.sort(key=lambda c: (c["domain"], c["title"].lower()))
# Pagination
limit = min(int(request.query.get("limit", "50")), 200) limit = min(int(request.query.get("limit", "50")), 200)
offset = int(request.query.get("offset", "0")) offset = int(request.query.get("offset", "0"))
page = filtered[offset:offset + limit] page = filtered[offset:offset + limit]
# Domain counts for sidebar
domain_counts = {} domain_counts = {}
for c in claims: for c in claims:
domain_counts[c["domain"]] = domain_counts.get(c["domain"], 0) + 1 domain_counts[c["domain"]] = domain_counts.get(c["domain"], 0) + 1
@ -111,31 +415,78 @@ async def handle_claims(request):
"domains": dict(sorted(domain_counts.items(), key=lambda x: -x[1])), "domains": dict(sorted(domain_counts.items(), key=lambda x: -x[1])),
"confidence_levels": sorted(set(c["confidence"] for c in claims)), "confidence_levels": sorted(set(c["confidence"] for c in claims)),
"agents": sorted(set(c["agent"] for c in claims if c["agent"])), "agents": sorted(set(c["agent"] for c in claims if c["agent"])),
}, headers={"Access-Control-Allow-Origin": "*"}) }, headers=CORS_HEADERS)
async def handle_claim_detail(request): async def handle_claim_detail(request):
"""GET /api/claims/{slug} — canonical claim detail page (Ship contract).
One round-trip, all data resolved server-side. Wikilinks pre-resolved.
"""
slug = request.match_info["slug"] slug = request.match_info["slug"]
claims = _load_all_claims() by_title, by_stem = _build_indexes()
for c in claims:
if c["slug"] == slug: rel_path = by_stem.get(slug)
# Read full body for detail view if not rel_path:
for domain_dir in CODEX_ROOT.iterdir(): return web.json_response({"error": "claim not found", "slug": slug},
if not domain_dir.is_dir(): status=404, headers=CORS_HEADERS)
continue
f = domain_dir / f"{slug}.md" filepath = CODEX_BASE / rel_path
if f.exists(): fm, body = _read_claim_file(filepath)
text = f.read_text(encoding="utf-8") if not fm:
end = text.index("---", 3) return web.json_response({"error": "frontmatter parse failed", "slug": slug},
body = text[end+3:].strip() status=500, headers=CORS_HEADERS)
c["body"] = body
break # Open read-only DB connection for this request
return web.json_response(c, headers={"Access-Control-Allow-Origin": "*"}) conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True)
return web.json_response({"error": "claim not found"}, status=404) conn.row_factory = sqlite3.Row
try:
title = fm.get("title") or slug.replace("-", " ")
prs, reviews = _load_pr_history(conn, title, slug)
sourced_from = _resolve_sourced_from(conn, filepath, fm, title, slug)
finally:
conn.close()
last_review = None
if reviews:
latest = reviews[-1]
last_review = {
"outcome": latest["outcome"],
"reviewer": latest["reviewer"],
"date": (latest["reviewed_at"] or "")[:10],
}
# secondary_domains: explicit list, or empty
secondary = fm.get("secondary_domains") or fm.get("cross_domain_links") or []
if isinstance(secondary, str):
secondary = [secondary]
description = fm.get("description") or ""
edges = _extract_edges(fm, by_title, by_stem)
wikilinks = _resolve_wikilinks(body, by_title)
response = {
"slug": slug,
"title": title,
"domain": fm.get("domain", "unknown"),
"secondary_domains": secondary,
"confidence": fm.get("confidence", "unknown"),
"description": description,
"created": str(fm.get("created", "")),
"last_review": last_review,
"body": body or "",
"sourced_from": sourced_from,
"reviews": reviews,
"prs": prs,
"edges": edges,
"wikilinks": wikilinks,
}
return web.json_response(response, headers=CORS_HEADERS)
async def handle_domains(request): async def handle_domains(request):
claims = _load_all_claims() claims = _load_all_claims_list()
domains = {} domains = {}
for c in claims: for c in claims:
d = c["domain"] d = c["domain"]
@ -146,13 +497,11 @@ async def handle_domains(request):
domains[d]["agents"].add(c["agent"]) domains[d]["agents"].add(c["agent"])
conf = c["confidence"] conf = c["confidence"]
domains[d]["confidence_dist"][conf] = domains[d]["confidence_dist"].get(conf, 0) + 1 domains[d]["confidence_dist"][conf] = domains[d]["confidence_dist"].get(conf, 0) + 1
result = [] result = []
for d in sorted(domains.values(), key=lambda x: -x["count"]): for d in sorted(domains.values(), key=lambda x: -x["count"]):
d["agents"] = sorted(d["agents"]) d["agents"] = sorted(d["agents"])
result.append(d) result.append(d)
return web.json_response(result, headers=CORS_HEADERS)
return web.json_response(result, headers={"Access-Control-Allow-Origin": "*"})
def register_claims_routes(app): def register_claims_routes(app):