fix: index decisions/ as entities so decision records reach the bot prompt

Root cause: decision records have type: decision, but the entity indexer
only accepted type: entity and only scanned entities/. The claim indexer
scanned decisions/ but filtered out non-claim types. Result: decision
records fell through both indexes entirely — invisible to the bot.

Fix: add decisions/ to entity indexer scan paths, accept type: decision
alongside type: entity, include summary/proposer in search aliases.
Remove decisions/ from claim indexer (was silently dropping them anyway).

Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
This commit is contained in:
m3taversal 2026-03-23 17:28:30 +00:00
parent c7c71ec9d1
commit 425e7a1bac

View file

@ -106,78 +106,100 @@ class KBIndex:
time.time() - start, len(self._entities), len(self._claims), len(self._positions))
def _index_entities(self):
"""Scan entities/ for all entity files."""
entities_dir = self.repo_dir / "entities"
if not entities_dir.exists():
return
for md_file in entities_dir.rglob("*.md"):
try:
fm, body = _parse_frontmatter(md_file)
if not fm or fm.get("type") != "entity":
continue
"""Scan entities/ and decisions/ for entity and decision files."""
entity_dirs = [
self.repo_dir / "entities",
self.repo_dir / "decisions",
]
for entities_dir in entity_dirs:
if not entities_dir.exists():
continue
for md_file in entities_dir.rglob("*.md"):
self._index_single_entity(md_file)
name = fm.get("name", md_file.stem)
handles = fm.get("handles", []) or []
tags = fm.get("tags", []) or []
entity_type = fm.get("entity_type", "unknown")
domain = fm.get("domain", "unknown")
def _index_single_entity(self, md_file: Path):
"""Index a single entity or decision file."""
try:
fm, body = _parse_frontmatter(md_file)
if not fm or fm.get("type") not in ("entity", "decision"):
return
# Build aliases from multiple sources
aliases = set()
aliases.add(name.lower())
aliases.add(md_file.stem.lower()) # slugified name
for h in handles:
aliases.add(h.lower().lstrip("@"))
for t in tags:
aliases.add(t.lower())
name = fm.get("name", md_file.stem)
handles = fm.get("handles", []) or []
tags = fm.get("tags", []) or []
entity_type = fm.get("entity_type", "unknown")
domain = fm.get("domain", "unknown")
# Mine body for ticker mentions ($XXXX and standalone ALL-CAPS tokens)
dollar_tickers = re.findall(r"\$([A-Z]{2,10})", body[:2000])
for ticker in dollar_tickers:
aliases.add(ticker.lower())
aliases.add(f"${ticker.lower()}")
# Standalone all-caps tokens (likely tickers: OMFG, META, SOL)
caps_tokens = re.findall(r"\b([A-Z]{2,10})\b", body[:2000])
for token in caps_tokens:
# Filter common English words that happen to be short caps
if token not in ("THE", "AND", "FOR", "NOT", "BUT", "HAS", "ARE", "WAS",
"ITS", "ALL", "CAN", "HAD", "HER", "ONE", "OUR", "OUT",
"NEW", "NOW", "OLD", "SEE", "WAY", "MAY", "SAY", "SHE",
"TWO", "HOW", "BOY", "DID", "GET", "PUT", "KEY", "TVL",
"AMM", "CEO", "SDK", "API", "ICO", "APY", "FAQ", "IPO"):
aliases.add(token.lower())
aliases.add(f"${token.lower()}")
# For decision records, also index summary and proposer as searchable text
summary = fm.get("summary", "")
proposer = fm.get("proposer", "")
# Also add aliases field if it exists (future schema)
for a in (fm.get("aliases", []) or []):
aliases.add(a.lower())
# Build aliases from multiple sources
aliases = set()
aliases.add(name.lower())
aliases.add(md_file.stem.lower()) # slugified name
for h in handles:
aliases.add(h.lower().lstrip("@"))
for t in tags:
aliases.add(t.lower())
# Add proposer name as alias for decision records
if proposer:
aliases.add(proposer.lower())
# Extract wiki-linked claim references from body
related_claims = re.findall(r"\[\[([^\]]+)\]\]", body)
# Mine body for ticker mentions ($XXXX and standalone ALL-CAPS tokens)
dollar_tickers = re.findall(r"\$([A-Z]{2,10})", body[:2000])
for ticker in dollar_tickers:
aliases.add(ticker.lower())
aliases.add(f"${ticker.lower()}")
# Standalone all-caps tokens (likely tickers: OMFG, META, SOL)
caps_tokens = re.findall(r"\b([A-Z]{2,10})\b", body[:2000])
for token in caps_tokens:
# Filter common English words that happen to be short caps
if token not in ("THE", "AND", "FOR", "NOT", "BUT", "HAS", "ARE", "WAS",
"ITS", "ALL", "CAN", "HAD", "HER", "ONE", "OUR", "OUT",
"NEW", "NOW", "OLD", "SEE", "WAY", "MAY", "SAY", "SHE",
"TWO", "HOW", "BOY", "DID", "GET", "PUT", "KEY", "TVL",
"AMM", "CEO", "SDK", "API", "ICO", "APY", "FAQ", "IPO"):
aliases.add(token.lower())
aliases.add(f"${token.lower()}")
# Body excerpt for context
# Also add aliases field if it exists (future schema)
for a in (fm.get("aliases", []) or []):
aliases.add(a.lower())
# Extract wiki-linked claim references from body
related_claims = re.findall(r"\[\[([^\]]+)\]\]", body)
# Body excerpt — for decisions, lead with summary for better prompt context
if summary:
overview = f"{summary} "
body_lines = [l for l in body.split("\n") if l.strip() and not l.startswith("#")]
remaining = 500 - len(overview)
if remaining > 0:
overview += " ".join(body_lines[:10])[:remaining]
else:
body_lines = [l for l in body.split("\n") if l.strip() and not l.startswith("#")]
overview = " ".join(body_lines[:10])[:500]
idx = len(self._entities)
self._entities.append({
"name": name,
"path": str(md_file),
"type": entity_type,
"domain": domain,
"tags": tags,
"handles": handles,
"aliases": list(aliases),
"overview": overview,
"related_claims": related_claims,
})
idx = len(self._entities)
self._entities.append({
"name": name,
"path": str(md_file),
"type": entity_type,
"domain": domain,
"tags": tags,
"handles": handles,
"aliases": list(aliases),
"overview": overview,
"related_claims": related_claims,
})
# Register all aliases in lookup map
for alias in aliases:
self._entity_alias_map.setdefault(alias, []).append(idx)
# Register all aliases in lookup map
for alias in aliases:
self._entity_alias_map.setdefault(alias, []).append(idx)
except Exception as e:
logger.warning("Failed to index entity %s: %s", md_file, e)
except Exception as e:
logger.warning("Failed to index entity %s: %s", md_file, e)
def _index_claims(self):
"""Scan domains/, core/, and foundations/ for claim files."""
@ -185,7 +207,6 @@ class KBIndex:
self.repo_dir / "domains",
self.repo_dir / "core",
self.repo_dir / "foundations",
self.repo_dir / "decisions",
]
for claim_dir in claim_dirs:
if not claim_dir.exists():
@ -447,7 +468,7 @@ def _domain_from_path(path: Path, repo_dir: Path) -> str:
"""Infer domain from file path."""
rel = path.relative_to(repo_dir)
parts = rel.parts
if len(parts) >= 2 and parts[0] in ("domains", "entities"):
if len(parts) >= 2 and parts[0] in ("domains", "entities", "decisions"):
return parts[1]
if len(parts) >= 1 and parts[0] == "core":
return "core"