fix: index decisions/ as entities so decision records reach the bot prompt
Root cause: decision records have type: decision, but the entity indexer only accepted type: entity and only scanned entities/. The claim indexer scanned decisions/ but filtered out non-claim types. Result: decision records fell through both indexes entirely — invisible to the bot. Fix: add decisions/ to entity indexer scan paths, accept type: decision alongside type: entity, include summary/proposer in search aliases. Remove decisions/ from claim indexer (was silently dropping them anyway). Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
This commit is contained in:
parent
c7c71ec9d1
commit
425e7a1bac
1 changed files with 84 additions and 63 deletions
|
|
@ -106,78 +106,100 @@ class KBIndex:
|
|||
time.time() - start, len(self._entities), len(self._claims), len(self._positions))
|
||||
|
||||
def _index_entities(self):
|
||||
"""Scan entities/ for all entity files."""
|
||||
entities_dir = self.repo_dir / "entities"
|
||||
if not entities_dir.exists():
|
||||
return
|
||||
for md_file in entities_dir.rglob("*.md"):
|
||||
try:
|
||||
fm, body = _parse_frontmatter(md_file)
|
||||
if not fm or fm.get("type") != "entity":
|
||||
continue
|
||||
"""Scan entities/ and decisions/ for entity and decision files."""
|
||||
entity_dirs = [
|
||||
self.repo_dir / "entities",
|
||||
self.repo_dir / "decisions",
|
||||
]
|
||||
for entities_dir in entity_dirs:
|
||||
if not entities_dir.exists():
|
||||
continue
|
||||
for md_file in entities_dir.rglob("*.md"):
|
||||
self._index_single_entity(md_file)
|
||||
|
||||
name = fm.get("name", md_file.stem)
|
||||
handles = fm.get("handles", []) or []
|
||||
tags = fm.get("tags", []) or []
|
||||
entity_type = fm.get("entity_type", "unknown")
|
||||
domain = fm.get("domain", "unknown")
|
||||
def _index_single_entity(self, md_file: Path):
|
||||
"""Index a single entity or decision file."""
|
||||
try:
|
||||
fm, body = _parse_frontmatter(md_file)
|
||||
if not fm or fm.get("type") not in ("entity", "decision"):
|
||||
return
|
||||
|
||||
# Build aliases from multiple sources
|
||||
aliases = set()
|
||||
aliases.add(name.lower())
|
||||
aliases.add(md_file.stem.lower()) # slugified name
|
||||
for h in handles:
|
||||
aliases.add(h.lower().lstrip("@"))
|
||||
for t in tags:
|
||||
aliases.add(t.lower())
|
||||
name = fm.get("name", md_file.stem)
|
||||
handles = fm.get("handles", []) or []
|
||||
tags = fm.get("tags", []) or []
|
||||
entity_type = fm.get("entity_type", "unknown")
|
||||
domain = fm.get("domain", "unknown")
|
||||
|
||||
# Mine body for ticker mentions ($XXXX and standalone ALL-CAPS tokens)
|
||||
dollar_tickers = re.findall(r"\$([A-Z]{2,10})", body[:2000])
|
||||
for ticker in dollar_tickers:
|
||||
aliases.add(ticker.lower())
|
||||
aliases.add(f"${ticker.lower()}")
|
||||
# Standalone all-caps tokens (likely tickers: OMFG, META, SOL)
|
||||
caps_tokens = re.findall(r"\b([A-Z]{2,10})\b", body[:2000])
|
||||
for token in caps_tokens:
|
||||
# Filter common English words that happen to be short caps
|
||||
if token not in ("THE", "AND", "FOR", "NOT", "BUT", "HAS", "ARE", "WAS",
|
||||
"ITS", "ALL", "CAN", "HAD", "HER", "ONE", "OUR", "OUT",
|
||||
"NEW", "NOW", "OLD", "SEE", "WAY", "MAY", "SAY", "SHE",
|
||||
"TWO", "HOW", "BOY", "DID", "GET", "PUT", "KEY", "TVL",
|
||||
"AMM", "CEO", "SDK", "API", "ICO", "APY", "FAQ", "IPO"):
|
||||
aliases.add(token.lower())
|
||||
aliases.add(f"${token.lower()}")
|
||||
# For decision records, also index summary and proposer as searchable text
|
||||
summary = fm.get("summary", "")
|
||||
proposer = fm.get("proposer", "")
|
||||
|
||||
# Also add aliases field if it exists (future schema)
|
||||
for a in (fm.get("aliases", []) or []):
|
||||
aliases.add(a.lower())
|
||||
# Build aliases from multiple sources
|
||||
aliases = set()
|
||||
aliases.add(name.lower())
|
||||
aliases.add(md_file.stem.lower()) # slugified name
|
||||
for h in handles:
|
||||
aliases.add(h.lower().lstrip("@"))
|
||||
for t in tags:
|
||||
aliases.add(t.lower())
|
||||
# Add proposer name as alias for decision records
|
||||
if proposer:
|
||||
aliases.add(proposer.lower())
|
||||
|
||||
# Extract wiki-linked claim references from body
|
||||
related_claims = re.findall(r"\[\[([^\]]+)\]\]", body)
|
||||
# Mine body for ticker mentions ($XXXX and standalone ALL-CAPS tokens)
|
||||
dollar_tickers = re.findall(r"\$([A-Z]{2,10})", body[:2000])
|
||||
for ticker in dollar_tickers:
|
||||
aliases.add(ticker.lower())
|
||||
aliases.add(f"${ticker.lower()}")
|
||||
# Standalone all-caps tokens (likely tickers: OMFG, META, SOL)
|
||||
caps_tokens = re.findall(r"\b([A-Z]{2,10})\b", body[:2000])
|
||||
for token in caps_tokens:
|
||||
# Filter common English words that happen to be short caps
|
||||
if token not in ("THE", "AND", "FOR", "NOT", "BUT", "HAS", "ARE", "WAS",
|
||||
"ITS", "ALL", "CAN", "HAD", "HER", "ONE", "OUR", "OUT",
|
||||
"NEW", "NOW", "OLD", "SEE", "WAY", "MAY", "SAY", "SHE",
|
||||
"TWO", "HOW", "BOY", "DID", "GET", "PUT", "KEY", "TVL",
|
||||
"AMM", "CEO", "SDK", "API", "ICO", "APY", "FAQ", "IPO"):
|
||||
aliases.add(token.lower())
|
||||
aliases.add(f"${token.lower()}")
|
||||
|
||||
# Body excerpt for context
|
||||
# Also add aliases field if it exists (future schema)
|
||||
for a in (fm.get("aliases", []) or []):
|
||||
aliases.add(a.lower())
|
||||
|
||||
# Extract wiki-linked claim references from body
|
||||
related_claims = re.findall(r"\[\[([^\]]+)\]\]", body)
|
||||
|
||||
# Body excerpt — for decisions, lead with summary for better prompt context
|
||||
if summary:
|
||||
overview = f"{summary} "
|
||||
body_lines = [l for l in body.split("\n") if l.strip() and not l.startswith("#")]
|
||||
remaining = 500 - len(overview)
|
||||
if remaining > 0:
|
||||
overview += " ".join(body_lines[:10])[:remaining]
|
||||
else:
|
||||
body_lines = [l for l in body.split("\n") if l.strip() and not l.startswith("#")]
|
||||
overview = " ".join(body_lines[:10])[:500]
|
||||
|
||||
idx = len(self._entities)
|
||||
self._entities.append({
|
||||
"name": name,
|
||||
"path": str(md_file),
|
||||
"type": entity_type,
|
||||
"domain": domain,
|
||||
"tags": tags,
|
||||
"handles": handles,
|
||||
"aliases": list(aliases),
|
||||
"overview": overview,
|
||||
"related_claims": related_claims,
|
||||
})
|
||||
idx = len(self._entities)
|
||||
self._entities.append({
|
||||
"name": name,
|
||||
"path": str(md_file),
|
||||
"type": entity_type,
|
||||
"domain": domain,
|
||||
"tags": tags,
|
||||
"handles": handles,
|
||||
"aliases": list(aliases),
|
||||
"overview": overview,
|
||||
"related_claims": related_claims,
|
||||
})
|
||||
|
||||
# Register all aliases in lookup map
|
||||
for alias in aliases:
|
||||
self._entity_alias_map.setdefault(alias, []).append(idx)
|
||||
# Register all aliases in lookup map
|
||||
for alias in aliases:
|
||||
self._entity_alias_map.setdefault(alias, []).append(idx)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to index entity %s: %s", md_file, e)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to index entity %s: %s", md_file, e)
|
||||
|
||||
def _index_claims(self):
|
||||
"""Scan domains/, core/, and foundations/ for claim files."""
|
||||
|
|
@ -185,7 +207,6 @@ class KBIndex:
|
|||
self.repo_dir / "domains",
|
||||
self.repo_dir / "core",
|
||||
self.repo_dir / "foundations",
|
||||
self.repo_dir / "decisions",
|
||||
]
|
||||
for claim_dir in claim_dirs:
|
||||
if not claim_dir.exists():
|
||||
|
|
@ -447,7 +468,7 @@ def _domain_from_path(path: Path, repo_dir: Path) -> str:
|
|||
"""Infer domain from file path."""
|
||||
rel = path.relative_to(repo_dir)
|
||||
parts = rel.parts
|
||||
if len(parts) >= 2 and parts[0] in ("domains", "entities"):
|
||||
if len(parts) >= 2 and parts[0] in ("domains", "entities", "decisions"):
|
||||
return parts[1]
|
||||
if len(parts) >= 1 and parts[0] == "core":
|
||||
return "core"
|
||||
|
|
|
|||
Loading…
Reference in a new issue