From 425e7a1bac4e8d0e071d1f50463697cfcb0900b3 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Mon, 23 Mar 2026 17:28:30 +0000 Subject: [PATCH] fix: index decisions/ as entities so decision records reach the bot prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: decision records have type: decision, but the entity indexer only accepted type: entity and only scanned entities/. The claim indexer scanned decisions/ but filtered out non-claim types. Result: decision records fell through both indexes entirely — invisible to the bot. Fix: add decisions/ to entity indexer scan paths, accept type: decision alongside type: entity, include summary/proposer in search aliases. Remove decisions/ from claim indexer (was silently dropping them anyway). Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70> --- telegram/kb_retrieval.py | 147 ++++++++++++++++++++++----------------- 1 file changed, 84 insertions(+), 63 deletions(-) diff --git a/telegram/kb_retrieval.py b/telegram/kb_retrieval.py index b4149ae..737e1c6 100644 --- a/telegram/kb_retrieval.py +++ b/telegram/kb_retrieval.py @@ -106,78 +106,100 @@ class KBIndex: time.time() - start, len(self._entities), len(self._claims), len(self._positions)) def _index_entities(self): - """Scan entities/ for all entity files.""" - entities_dir = self.repo_dir / "entities" - if not entities_dir.exists(): - return - for md_file in entities_dir.rglob("*.md"): - try: - fm, body = _parse_frontmatter(md_file) - if not fm or fm.get("type") != "entity": - continue + """Scan entities/ and decisions/ for entity and decision files.""" + entity_dirs = [ + self.repo_dir / "entities", + self.repo_dir / "decisions", + ] + for entities_dir in entity_dirs: + if not entities_dir.exists(): + continue + for md_file in entities_dir.rglob("*.md"): + self._index_single_entity(md_file) - name = fm.get("name", md_file.stem) - handles = fm.get("handles", []) or [] - tags = fm.get("tags", []) or [] - entity_type = fm.get("entity_type", "unknown") - domain = fm.get("domain", "unknown") + def _index_single_entity(self, md_file: Path): + """Index a single entity or decision file.""" + try: + fm, body = _parse_frontmatter(md_file) + if not fm or fm.get("type") not in ("entity", "decision"): + return - # Build aliases from multiple sources - aliases = set() - aliases.add(name.lower()) - aliases.add(md_file.stem.lower()) # slugified name - for h in handles: - aliases.add(h.lower().lstrip("@")) - for t in tags: - aliases.add(t.lower()) + name = fm.get("name", md_file.stem) + handles = fm.get("handles", []) or [] + tags = fm.get("tags", []) or [] + entity_type = fm.get("entity_type", "unknown") + domain = fm.get("domain", "unknown") - # Mine body for ticker mentions ($XXXX and standalone ALL-CAPS tokens) - dollar_tickers = re.findall(r"\$([A-Z]{2,10})", body[:2000]) - for ticker in dollar_tickers: - aliases.add(ticker.lower()) - aliases.add(f"${ticker.lower()}") - # Standalone all-caps tokens (likely tickers: OMFG, META, SOL) - caps_tokens = re.findall(r"\b([A-Z]{2,10})\b", body[:2000]) - for token in caps_tokens: - # Filter common English words that happen to be short caps - if token not in ("THE", "AND", "FOR", "NOT", "BUT", "HAS", "ARE", "WAS", - "ITS", "ALL", "CAN", "HAD", "HER", "ONE", "OUR", "OUT", - "NEW", "NOW", "OLD", "SEE", "WAY", "MAY", "SAY", "SHE", - "TWO", "HOW", "BOY", "DID", "GET", "PUT", "KEY", "TVL", - "AMM", "CEO", "SDK", "API", "ICO", "APY", "FAQ", "IPO"): - aliases.add(token.lower()) - aliases.add(f"${token.lower()}") + # For decision records, also index summary and proposer as searchable text + summary = fm.get("summary", "") + proposer = fm.get("proposer", "") - # Also add aliases field if it exists (future schema) - for a in (fm.get("aliases", []) or []): - aliases.add(a.lower()) + # Build aliases from multiple sources + aliases = set() + aliases.add(name.lower()) + aliases.add(md_file.stem.lower()) # slugified name + for h in handles: + aliases.add(h.lower().lstrip("@")) + for t in tags: + aliases.add(t.lower()) + # Add proposer name as alias for decision records + if proposer: + aliases.add(proposer.lower()) - # Extract wiki-linked claim references from body - related_claims = re.findall(r"\[\[([^\]]+)\]\]", body) + # Mine body for ticker mentions ($XXXX and standalone ALL-CAPS tokens) + dollar_tickers = re.findall(r"\$([A-Z]{2,10})", body[:2000]) + for ticker in dollar_tickers: + aliases.add(ticker.lower()) + aliases.add(f"${ticker.lower()}") + # Standalone all-caps tokens (likely tickers: OMFG, META, SOL) + caps_tokens = re.findall(r"\b([A-Z]{2,10})\b", body[:2000]) + for token in caps_tokens: + # Filter common English words that happen to be short caps + if token not in ("THE", "AND", "FOR", "NOT", "BUT", "HAS", "ARE", "WAS", + "ITS", "ALL", "CAN", "HAD", "HER", "ONE", "OUR", "OUT", + "NEW", "NOW", "OLD", "SEE", "WAY", "MAY", "SAY", "SHE", + "TWO", "HOW", "BOY", "DID", "GET", "PUT", "KEY", "TVL", + "AMM", "CEO", "SDK", "API", "ICO", "APY", "FAQ", "IPO"): + aliases.add(token.lower()) + aliases.add(f"${token.lower()}") - # Body excerpt for context + # Also add aliases field if it exists (future schema) + for a in (fm.get("aliases", []) or []): + aliases.add(a.lower()) + + # Extract wiki-linked claim references from body + related_claims = re.findall(r"\[\[([^\]]+)\]\]", body) + + # Body excerpt — for decisions, lead with summary for better prompt context + if summary: + overview = f"{summary} " + body_lines = [l for l in body.split("\n") if l.strip() and not l.startswith("#")] + remaining = 500 - len(overview) + if remaining > 0: + overview += " ".join(body_lines[:10])[:remaining] + else: body_lines = [l for l in body.split("\n") if l.strip() and not l.startswith("#")] overview = " ".join(body_lines[:10])[:500] - idx = len(self._entities) - self._entities.append({ - "name": name, - "path": str(md_file), - "type": entity_type, - "domain": domain, - "tags": tags, - "handles": handles, - "aliases": list(aliases), - "overview": overview, - "related_claims": related_claims, - }) + idx = len(self._entities) + self._entities.append({ + "name": name, + "path": str(md_file), + "type": entity_type, + "domain": domain, + "tags": tags, + "handles": handles, + "aliases": list(aliases), + "overview": overview, + "related_claims": related_claims, + }) - # Register all aliases in lookup map - for alias in aliases: - self._entity_alias_map.setdefault(alias, []).append(idx) + # Register all aliases in lookup map + for alias in aliases: + self._entity_alias_map.setdefault(alias, []).append(idx) - except Exception as e: - logger.warning("Failed to index entity %s: %s", md_file, e) + except Exception as e: + logger.warning("Failed to index entity %s: %s", md_file, e) def _index_claims(self): """Scan domains/, core/, and foundations/ for claim files.""" @@ -185,7 +207,6 @@ class KBIndex: self.repo_dir / "domains", self.repo_dir / "core", self.repo_dir / "foundations", - self.repo_dir / "decisions", ] for claim_dir in claim_dirs: if not claim_dir.exists(): @@ -447,7 +468,7 @@ def _domain_from_path(path: Path, repo_dir: Path) -> str: """Infer domain from file path.""" rel = path.relative_to(repo_dir) parts = rel.parts - if len(parts) >= 2 and parts[0] in ("domains", "entities"): + if len(parts) >= 2 and parts[0] in ("domains", "entities", "decisions"): return parts[1] if len(parts) >= 1 and parts[0] == "core": return "core"