From b7661762592cb3230ff7f866cb1272bd9767e0f5 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Wed, 1 Apr 2026 15:27:13 +0100 Subject: [PATCH] =?UTF-8?q?Wire=20RRF=20merge=20into=20bot.py=20=E2=80=94?= =?UTF-8?q?=20replace=20keyword+vector=20concatenation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bot.py now calls orchestrate_retrieval() from retrieval.py instead of doing keyword retrieval and vector search separately then concatenating. Claims found by both systems get RRF-boosted to the top. Query decomposition (multi-part → sub-queries) is now active. Co-Authored-By: Claude Opus 4.6 (1M context) --- telegram/bot.py | 56 ++++++++++++++++++------------------------------- 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/telegram/bot.py b/telegram/bot.py index 521972b..79fa921 100644 --- a/telegram/bot.py +++ b/telegram/bot.py @@ -42,7 +42,8 @@ from telegram.ext import ( sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import json as _json -from kb_retrieval import KBIndex, format_context_for_prompt, retrieve_context +from kb_retrieval import KBIndex, retrieve_context, retrieve_vector_context +from retrieval import orchestrate_retrieval from market_data import get_token_price, format_price_context from worktree_lock import main_worktree_lock from x_client import search_tweets, fetch_from_url, check_research_rate_limit, record_research_usage, get_research_remaining @@ -994,41 +995,24 @@ async def handle_tagged(update: Update, context: ContextTypes.DEFAULT_TYPE): logger.warning("Query reformulation failed: %s", e) # Fall through — use raw text - # Retrieve full KB context (entity resolution + claim search + agent positions) - t_kb = time.monotonic() - kb_ctx = retrieve_context(search_query_text, KB_READ_DIR, index=kb_index) - kb_context_text = format_context_for_prompt(kb_ctx) - kb_duration = int((time.monotonic() - t_kb) * 1000) - retrieval_layers = ["keyword"] if (kb_ctx and (kb_ctx.entities or kb_ctx.claims)) else [] - tool_calls.append({ - "tool": "retrieve_context", - "input": {"query": search_query_text[:200], "original_query": text[:200] if search_query_text != text else None}, - "output": {"entities": len(kb_ctx.entities) if kb_ctx else 0, - "claims": len(kb_ctx.claims) if kb_ctx else 0}, - "duration_ms": kb_duration, - }) - - # Layer 1+2: Qdrant vector search + graph expansion (semantic, complements keyword) - # Pass keyword-matched paths to exclude duplicates at Qdrant query level - # Normalize: KBIndex stores absolute paths, Qdrant stores repo-relative paths - keyword_paths = [] - if kb_ctx and kb_ctx.claims: - for c in kb_ctx.claims: - p = c.path - if KB_READ_DIR and p.startswith(KB_READ_DIR): - p = p[len(KB_READ_DIR):].lstrip("/") - keyword_paths.append(p) - from kb_retrieval import retrieve_vector_context - vector_context, vector_meta = retrieve_vector_context(search_query_text, keyword_paths=keyword_paths) - if vector_context: - kb_context_text = kb_context_text + "\n\n" + vector_context - retrieval_layers.extend(vector_meta.get("layers_hit", [])) - tool_calls.append({ - "tool": "retrieve_qdrant_context", "input": {"query": text[:200]}, - "output": {"direct_hits": len(vector_meta.get("direct_results", [])), - "expanded": len(vector_meta.get("expanded_results", []))}, - "duration_ms": vector_meta.get("duration_ms", 0), - }) + # Unified retrieval: keyword → decompose → vector → RRF merge + # Both systems search independently; RRF boosts claims found by both. + def _vector_fn(q): + return retrieve_vector_context(q) # no keyword_paths exclusion — RRF deduplicates + retrieval_result = await orchestrate_retrieval( + text=text, + search_query=search_query_text, + kb_read_dir=KB_READ_DIR, + kb_index=kb_index, + llm_fn=call_openrouter, + triage_model=TRIAGE_MODEL, + retrieve_context_fn=retrieve_context, + retrieve_vector_fn=_vector_fn, + ) + kb_context_text = retrieval_result["kb_context_text"] + kb_ctx = retrieval_result["kb_ctx"] + retrieval_layers = retrieval_result["retrieval_layers"] + tool_calls.extend(retrieval_result["tool_calls"]) stats = get_db_stats()