2026-03-30 11:03:35 +00:00
1 changed files with 84 additions and 63 deletions
--- a/telegram/kb_retrieval.py
+++ b/telegram/kb_retrieval.py
@ -106,78 +106,100 @@ class KBIndex:
                     time.time() - start, len(self._entities), len(self._claims), len(self._positions))

    def _index_entities(self):
-        """Scan entities/ for all entity files."""
-        entities_dir = self.repo_dir / "entities"
-        if not entities_dir.exists():
-            return
-        for md_file in entities_dir.rglob("*.md"):
-            try:
-                fm, body = _parse_frontmatter(md_file)
-                if not fm or fm.get("type") != "entity":
-                    continue
+        """Scan entities/ and decisions/ for entity and decision files."""
+        entity_dirs = [
+            self.repo_dir / "entities",
+            self.repo_dir / "decisions",
+        ]
+        for entities_dir in entity_dirs:
+            if not entities_dir.exists():
+                continue
+            for md_file in entities_dir.rglob("*.md"):
+                self._index_single_entity(md_file)

-                name = fm.get("name", md_file.stem)
-                handles = fm.get("handles", []) or []
-                tags = fm.get("tags", []) or []
-                entity_type = fm.get("entity_type", "unknown")
-                domain = fm.get("domain", "unknown")
+    def _index_single_entity(self, md_file: Path):
+        """Index a single entity or decision file."""
+        try:
+            fm, body = _parse_frontmatter(md_file)
+            if not fm or fm.get("type") not in ("entity", "decision"):
+                return

-                # Build aliases from multiple sources
-                aliases = set()
-                aliases.add(name.lower())
-                aliases.add(md_file.stem.lower())  # slugified name
-                for h in handles:
-                    aliases.add(h.lower().lstrip("@"))
-                for t in tags:
-                    aliases.add(t.lower())
+            name = fm.get("name", md_file.stem)
+            handles = fm.get("handles", []) or []
+            tags = fm.get("tags", []) or []
+            entity_type = fm.get("entity_type", "unknown")
+            domain = fm.get("domain", "unknown")

-                # Mine body for ticker mentions ($XXXX and standalone ALL-CAPS tokens)
-                dollar_tickers = re.findall(r"\$([A-Z]{2,10})", body[:2000])
-                for ticker in dollar_tickers:
-                    aliases.add(ticker.lower())
-                    aliases.add(f"${ticker.lower()}")
-                # Standalone all-caps tokens (likely tickers: OMFG, META, SOL)
-                caps_tokens = re.findall(r"\b([A-Z]{2,10})\b", body[:2000])
-                for token in caps_tokens:
-                    # Filter common English words that happen to be short caps
-                    if token not in ("THE", "AND", "FOR", "NOT", "BUT", "HAS", "ARE", "WAS",
-                                     "ITS", "ALL", "CAN", "HAD", "HER", "ONE", "OUR", "OUT",
-                                     "NEW", "NOW", "OLD", "SEE", "WAY", "MAY", "SAY", "SHE",
-                                     "TWO", "HOW", "BOY", "DID", "GET", "PUT", "KEY", "TVL",
-                                     "AMM", "CEO", "SDK", "API", "ICO", "APY", "FAQ", "IPO"):
-                        aliases.add(token.lower())
-                        aliases.add(f"${token.lower()}")
+            # For decision records, also index summary and proposer as searchable text
+            summary = fm.get("summary", "")
+            proposer = fm.get("proposer", "")

-                # Also add aliases field if it exists (future schema)
-                for a in (fm.get("aliases", []) or []):
-                    aliases.add(a.lower())
+            # Build aliases from multiple sources
+            aliases = set()
+            aliases.add(name.lower())
+            aliases.add(md_file.stem.lower())  # slugified name
+            for h in handles:
+                aliases.add(h.lower().lstrip("@"))
+            for t in tags:
+                aliases.add(t.lower())
+            # Add proposer name as alias for decision records
+            if proposer:
+                aliases.add(proposer.lower())

-                # Extract wiki-linked claim references from body
-                related_claims = re.findall(r"\[\[([^\]]+)\]\]", body)
+            # Mine body for ticker mentions ($XXXX and standalone ALL-CAPS tokens)
+            dollar_tickers = re.findall(r"\$([A-Z]{2,10})", body[:2000])
+            for ticker in dollar_tickers:
+                aliases.add(ticker.lower())
+                aliases.add(f"${ticker.lower()}")
+            # Standalone all-caps tokens (likely tickers: OMFG, META, SOL)
+            caps_tokens = re.findall(r"\b([A-Z]{2,10})\b", body[:2000])
+            for token in caps_tokens:
+                # Filter common English words that happen to be short caps
+                if token not in ("THE", "AND", "FOR", "NOT", "BUT", "HAS", "ARE", "WAS",
+                                 "ITS", "ALL", "CAN", "HAD", "HER", "ONE", "OUR", "OUT",
+                                 "NEW", "NOW", "OLD", "SEE", "WAY", "MAY", "SAY", "SHE",
+                                 "TWO", "HOW", "BOY", "DID", "GET", "PUT", "KEY", "TVL",
+                                 "AMM", "CEO", "SDK", "API", "ICO", "APY", "FAQ", "IPO"):
+                    aliases.add(token.lower())
+                    aliases.add(f"${token.lower()}")

-                # Body excerpt for context
+            # Also add aliases field if it exists (future schema)
+            for a in (fm.get("aliases", []) or []):
+                aliases.add(a.lower())
+
+            # Extract wiki-linked claim references from body
+            related_claims = re.findall(r"\[\[([^\]]+)\]\]", body)
+
+            # Body excerpt — for decisions, lead with summary for better prompt context
+            if summary:
+                overview = f"{summary} "
+                body_lines = [l for l in body.split("\n") if l.strip() and not l.startswith("#")]
+                remaining = 500 - len(overview)
+                if remaining > 0:
+                    overview += " ".join(body_lines[:10])[:remaining]
+            else:
                body_lines = [l for l in body.split("\n") if l.strip() and not l.startswith("#")]
                overview = " ".join(body_lines[:10])[:500]

-                idx = len(self._entities)
-                self._entities.append({
-                    "name": name,
-                    "path": str(md_file),
-                    "type": entity_type,
-                    "domain": domain,
-                    "tags": tags,
-                    "handles": handles,
-                    "aliases": list(aliases),
-                    "overview": overview,
-                    "related_claims": related_claims,
-                })
+            idx = len(self._entities)
+            self._entities.append({
+                "name": name,
+                "path": str(md_file),
+                "type": entity_type,
+                "domain": domain,
+                "tags": tags,
+                "handles": handles,
+                "aliases": list(aliases),
+                "overview": overview,
+                "related_claims": related_claims,
+            })

-                # Register all aliases in lookup map
-                for alias in aliases:
-                    self._entity_alias_map.setdefault(alias, []).append(idx)
+            # Register all aliases in lookup map
+            for alias in aliases:
+                self._entity_alias_map.setdefault(alias, []).append(idx)

-            except Exception as e:
-                logger.warning("Failed to index entity %s: %s", md_file, e)
+        except Exception as e:
+            logger.warning("Failed to index entity %s: %s", md_file, e)

    def _index_claims(self):
        """Scan domains/, core/, and foundations/ for claim files."""
@ -185,7 +207,6 @@ class KBIndex:
            self.repo_dir / "domains",
            self.repo_dir / "core",
            self.repo_dir / "foundations",
-            self.repo_dir / "decisions",
        ]
        for claim_dir in claim_dirs:
            if not claim_dir.exists():
@ -447,7 +468,7 @@ def _domain_from_path(path: Path, repo_dir: Path) -> str:
    """Infer domain from file path."""
    rel = path.relative_to(repo_dir)
    parts = rel.parts
-    if len(parts) >= 2 and parts[0] in ("domains", "entities"):
+    if len(parts) >= 2 and parts[0] in ("domains", "entities", "decisions"):
        return parts[1]
    if len(parts) >= 1 and parts[0] == "core":
        return "core"