From ed4893e837f0bbd1c521a2651def8a9e4664908e Mon Sep 17 00:00:00 2001 From: m3taversal Date: Mon, 11 May 2026 12:02:54 +0100 Subject: [PATCH] fix(claims): unwrap ```markdown code fences + 404 for fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues Ship hit on the Montreal Protocol claim: 1. 500 on canonical stem lookup. File starts with ```markdown wrapper instead of bare --- frontmatter delimiter. _split_frontmatter checked startswith("---") and bailed, returning "frontmatter parse failed". Same wrapper exists on 6 other claim files (audit grep). Now strip the wrapper before frontmatter detection. 2. 404 on long activity-feed slug. Same root cause — _build_indexes couldn't read the file's title from frontmatter, so by_title never indexed it, so title-fallback resolution had nothing to match against. Both bugs collapse once we unwrap. Also: switched "file exists but has no frontmatter" from 500 to 404 with reason=file_no_frontmatter. These are stray enrichment fragments living in domains/ that never got merged into a parent claim. From the API caller's perspective there's no claim at that slug — 500 implied "server bug, retry later" which isn't actionable. Verified: 3/3 wrapped claims (montreal, medicare, dod) now return 200 warm-cache ~13ms. Long-slug repro (montreal) resolves via title fallback to canonical stem. Negative test (nonsense slug) still 404. --- diagnostics/claims_api.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/diagnostics/claims_api.py b/diagnostics/claims_api.py index 8808e45..712a46b 100644 --- a/diagnostics/claims_api.py +++ b/diagnostics/claims_api.py @@ -68,8 +68,22 @@ def _normalize_for_match(s): # ─── Frontmatter parse ───────────────────────────────────────────────────── +_CODE_FENCE_WRAPPER_RE = re.compile(r"^\s*```(?:markdown|md)?\s*\n(.*?)\n```\s*$", re.DOTALL) + + def _split_frontmatter(text): - """Return (frontmatter_dict, body_str) or (None, None) if not a claim file.""" + """Return (frontmatter_dict, body_str) or (None, None) if not a claim file. + + Tolerates files wrapped in a top-level ```markdown ... ``` code fence — + some agents have produced these (e.g. Montreal Protocol claim from Astra, + 2024-12-09). Unwrap once before frontmatter detection. + """ + if not text: + return None, None + m = _CODE_FENCE_WRAPPER_RE.match(text) + if m: + text = m.group(1) + text = text.lstrip() if not text.startswith("---"): return None, None try: @@ -465,8 +479,13 @@ async def handle_claim_detail(request): filepath = CODEX_BASE / rel_path fm, body = _read_claim_file(filepath) if not fm: - return web.json_response({"error": "frontmatter parse failed", "slug": slug}, - status=500, headers=CORS_HEADERS) + # File exists at this stem but has no parseable frontmatter — almost + # always a stray enrichment fragment that landed in domains/ without + # being merged into a parent claim. Surfacing as 404 (no claim here) + # not 500: the caller can't act on it differently anyway. + return web.json_response({"error": "claim not found", "slug": slug, + "reason": "file_no_frontmatter"}, + status=404, headers=CORS_HEADERS) # Open read-only DB connection for this request conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True)