Compare commits
1 commit
main
...
argus/clai
| Author | SHA1 | Date | |
|---|---|---|---|
| 61007042bc |
1 changed files with 36 additions and 14 deletions
|
|
@ -10,7 +10,9 @@ The detail endpoint is the canonical /claims/{slug} backend per Ship's
|
||||||
2026-04-29 brief. One round-trip, no N+1 cascade. Wikilinks resolved
|
2026-04-29 brief. One round-trip, no N+1 cascade. Wikilinks resolved
|
||||||
server-side via title→slug index built from a tree walk.
|
server-side via title→slug index built from a tree walk.
|
||||||
"""
|
"""
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import time
|
import time
|
||||||
|
|
@ -19,6 +21,8 @@ from pathlib import Path
|
||||||
import yaml
|
import yaml
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
|
|
||||||
|
logger = logging.getLogger("argus.claims")
|
||||||
|
|
||||||
# Codex tree roots — claims live in three places (Sourcer Apr 26 fix scope)
|
# Codex tree roots — claims live in three places (Sourcer Apr 26 fix scope)
|
||||||
CODEX_BASE = Path("/opt/teleo-eval/workspaces/main")
|
CODEX_BASE = Path("/opt/teleo-eval/workspaces/main")
|
||||||
CLAIM_TREES = [CODEX_BASE / "domains", CODEX_BASE / "foundations", CODEX_BASE / "core"]
|
CLAIM_TREES = [CODEX_BASE / "domains", CODEX_BASE / "foundations", CODEX_BASE / "core"]
|
||||||
|
|
@ -31,7 +35,14 @@ _list_cache = {"data": None, "ts": 0}
|
||||||
_LIST_CACHE_TTL = 300 # 5 min — list view tolerates staleness
|
_LIST_CACHE_TTL = 300 # 5 min — list view tolerates staleness
|
||||||
|
|
||||||
_index_cache = {"by_title": None, "by_stem": None, "ts": 0}
|
_index_cache = {"by_title": None, "by_stem": None, "ts": 0}
|
||||||
_INDEX_CACHE_TTL = 60 # 1 min — title→slug index for wikilink resolution
|
_INDEX_CACHE_TTL = 300 # 5 min — title→slug index for wikilink resolution
|
||||||
|
|
||||||
|
# Minimum normalized-stem length for prefix-fallback resolution.
|
||||||
|
# Stems shorter than this are too generic to be unambiguous in the prefix
|
||||||
|
# space (e.g. a "rio" stem would match any request starting with "rio").
|
||||||
|
# Proper-prefix matching is much stronger than common-prefix at preventing
|
||||||
|
# spurious hits, so this can be lower than the original common-prefix anchor.
|
||||||
|
_PREFIX_ANCHOR_MIN = 16
|
||||||
|
|
||||||
CORS_HEADERS = {"Access-Control-Allow-Origin": "*"}
|
CORS_HEADERS = {"Access-Control-Allow-Origin": "*"}
|
||||||
|
|
||||||
|
|
@ -71,12 +82,16 @@ def _normalize_for_match(s):
|
||||||
_CODE_FENCE_WRAPPER_RE = re.compile(r"^\s*```(?:markdown|md)?\s*\n(.*?)\n```\s*$", re.DOTALL)
|
_CODE_FENCE_WRAPPER_RE = re.compile(r"^\s*```(?:markdown|md)?\s*\n(.*?)\n```\s*$", re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
def _split_frontmatter(text):
|
def _split_frontmatter(text, filepath=None):
|
||||||
"""Return (frontmatter_dict, body_str) or (None, None) if not a claim file.
|
"""Return (frontmatter_dict, body_str) or (None, None) if not a claim file.
|
||||||
|
|
||||||
Tolerates files wrapped in a top-level ```markdown ... ``` code fence —
|
Tolerates files wrapped in a top-level ```markdown ... ``` code fence —
|
||||||
some agents have produced these (e.g. Montreal Protocol claim from Astra,
|
some agents have produced these (e.g. Montreal Protocol claim from Astra,
|
||||||
2024-12-09). Unwrap once before frontmatter detection.
|
2024-12-09). Unwrap once before frontmatter detection.
|
||||||
|
|
||||||
|
YAML parse failures are logged at WARNING with the file path (when
|
||||||
|
provided) so KB integrity drift surfaces in logs rather than silently
|
||||||
|
becoming 404s on the detail endpoint.
|
||||||
"""
|
"""
|
||||||
if not text:
|
if not text:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
@ -92,7 +107,8 @@ def _split_frontmatter(text):
|
||||||
return None, None
|
return None, None
|
||||||
try:
|
try:
|
||||||
fm = yaml.safe_load(text[3:end])
|
fm = yaml.safe_load(text[3:end])
|
||||||
except Exception:
|
except yaml.YAMLError as e:
|
||||||
|
logger.warning("YAML parse failed in %s: %s", filepath or "<unknown>", e)
|
||||||
return None, None
|
return None, None
|
||||||
if not isinstance(fm, dict):
|
if not isinstance(fm, dict):
|
||||||
return None, None
|
return None, None
|
||||||
|
|
@ -106,7 +122,7 @@ def _read_claim_file(filepath):
|
||||||
text = filepath.read_text(encoding="utf-8")
|
text = filepath.read_text(encoding="utf-8")
|
||||||
except (OSError, UnicodeDecodeError):
|
except (OSError, UnicodeDecodeError):
|
||||||
return None, None
|
return None, None
|
||||||
return _split_frontmatter(text)
|
return _split_frontmatter(text, filepath)
|
||||||
|
|
||||||
|
|
||||||
# ─── Tree walk + indexing ──────────────────────────────────────────────────
|
# ─── Tree walk + indexing ──────────────────────────────────────────────────
|
||||||
|
|
@ -438,7 +454,12 @@ async def handle_claim_detail(request):
|
||||||
One round-trip, all data resolved server-side. Wikilinks pre-resolved.
|
One round-trip, all data resolved server-side. Wikilinks pre-resolved.
|
||||||
"""
|
"""
|
||||||
requested_slug = request.match_info["slug"]
|
requested_slug = request.match_info["slug"]
|
||||||
by_title, by_stem = _build_indexes()
|
# Cold-cache rebuild walks ~1,900 files (~3.3s of sync I/O). Route through
|
||||||
|
# to_thread so the aiohttp event loop stays responsive while the index
|
||||||
|
# rebuilds — concurrent requests don't all stall behind one walk.
|
||||||
|
# Warm-cache cost is a dict access (microseconds), to_thread overhead
|
||||||
|
# negligible. Ganymede review 2026-05-11.
|
||||||
|
by_title, by_stem = await asyncio.to_thread(_build_indexes)
|
||||||
|
|
||||||
# Resolution order: exact stem → title-normalized (handles description-derived
|
# Resolution order: exact stem → title-normalized (handles description-derived
|
||||||
# slugs from /api/activity-feed that are longer than on-disk file stems) →
|
# slugs from /api/activity-feed that are longer than on-disk file stems) →
|
||||||
|
|
@ -454,21 +475,22 @@ async def handle_claim_detail(request):
|
||||||
slug = resolved_stem
|
slug = resolved_stem
|
||||||
rel_path = by_stem.get(resolved_stem)
|
rel_path = by_stem.get(resolved_stem)
|
||||||
if not rel_path:
|
if not rel_path:
|
||||||
# Prefix fallback: walk stems sharing a common prefix with the request,
|
# Proper-prefix fallback: the requested slug should START WITH a known
|
||||||
# pick longest match. Anchored at 32 chars to avoid spurious hits.
|
# stem (covers activity-feed slugs longer than on-disk filenames). The
|
||||||
|
# earlier common-prefix variant of this was broken in two directions —
|
||||||
|
# served non-deterministic matches on same-prefix collisions, and
|
||||||
|
# missed legitimate matches under the 32-char anchor (e.g. stems
|
||||||
|
# shorter than 32 chars normalized). Ganymede review 2026-05-11.
|
||||||
norm_req = _normalize_for_match(requested_slug)
|
norm_req = _normalize_for_match(requested_slug)
|
||||||
best_stem = None
|
best_stem = None
|
||||||
best_len = 0
|
best_len = 0
|
||||||
for stem in by_stem:
|
for stem in by_stem:
|
||||||
norm_stem = _normalize_for_match(stem)
|
norm_stem = _normalize_for_match(stem)
|
||||||
common = 0
|
if len(norm_stem) < _PREFIX_ANCHOR_MIN:
|
||||||
for a, b in zip(norm_req, norm_stem):
|
continue # too generic in prefix space
|
||||||
if a != b:
|
if norm_req.startswith(norm_stem) and len(norm_stem) > best_len:
|
||||||
break
|
|
||||||
common += 1
|
|
||||||
if common >= 32 and common > best_len:
|
|
||||||
best_stem = stem
|
best_stem = stem
|
||||||
best_len = common
|
best_len = len(norm_stem)
|
||||||
if best_stem:
|
if best_stem:
|
||||||
slug = best_stem
|
slug = best_stem
|
||||||
rel_path = by_stem.get(best_stem)
|
rel_path = by_stem.get(best_stem)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue