#!/usr/bin/env python3 """Teleo Telegram Bot — Rio as analytical agent in community groups. Architecture: - Always-on ingestion: captures all messages, batch triage every N minutes - Tag-based response: Opus-quality KB-grounded responses when @tagged - Conversation-window triage: identifies coherent claims across message threads - Full eval tracing: Rio's responses are logged as KB claims, accountable Two paths (Ganymede architecture): - Fast path (read): tag → KB query → Opus response → post to group - Slow path (write): batch triage → archive to inbox/ → pipeline extracts Separate systemd service: teleo-telegram.service Does NOT integrate with pipeline daemon. Epimetheus owns this module. """ import asyncio import json import logging import os import re import sqlite3 import sys import time from collections import defaultdict from datetime import datetime, timezone from pathlib import Path # Add pipeline lib to path for shared modules sys.path.insert(0, "/opt/teleo-eval/pipeline") from telegram import Update from telegram.ext import ( Application, CommandHandler, ContextTypes, MessageHandler, filters, ) # ─── Config ───────────────────────────────────────────────────────────── BOT_TOKEN_FILE = "/opt/teleo-eval/secrets/telegram-bot-token" OPENROUTER_KEY_FILE = "/opt/teleo-eval/secrets/openrouter-key" PIPELINE_DB = "/opt/teleo-eval/pipeline/pipeline.db" CLAIM_INDEX_PATH = Path.home() / ".pentagon" / "workspace" / "collective" / "claim-index.json" REPO_DIR = "/opt/teleo-eval/workspaces/extract" # For archiving sources LOG_FILE = "/opt/teleo-eval/logs/telegram-bot.log" # Triage interval (seconds) TRIAGE_INTERVAL = 900 # 15 minutes # Models RESPONSE_MODEL = "anthropic/claude-opus-4-6" # Opus for tagged responses TRIAGE_MODEL = "anthropic/claude-haiku-4.5" # Haiku for batch triage # Rate limits MAX_RESPONSE_PER_USER_PER_HOUR = 6 MIN_MESSAGE_LENGTH = 20 # Skip very short messages # ─── Logging ──────────────────────────────────────────────────────────── logging.basicConfig( level=logging.INFO, format="%(asctime)s %(name)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler(LOG_FILE), logging.StreamHandler(), ], ) logger = logging.getLogger("telegram-bot") # ─── State ────────────────────────────────────────────────────────────── # Message buffer for batch triage message_buffer: list[dict] = [] # Rate limiting user_response_times: dict[int, list[float]] = defaultdict(list) # Allowed group IDs (set after first message received, or configure) allowed_groups: set[int] = set() # ─── Helpers ──────────────────────────────────────────────────────────── def load_claim_index() -> dict | None: """Load the claim-index.json for KB queries.""" try: if CLAIM_INDEX_PATH.exists(): return json.loads(CLAIM_INDEX_PATH.read_text()) # Fallback: try VPS path vps_path = Path("/opt/teleo-eval/pipeline/claim-index.json") if vps_path.exists(): return json.loads(vps_path.read_text()) except Exception as e: logger.error("Failed to load claim index: %s", e) return None def find_relevant_claims(query: str, index: dict, max_results: int = 5) -> list[dict]: """Find claims relevant to a query using keyword matching. Simple for now — upgrade to semantic search later. """ query_words = set(query.lower().split()) scored = [] for claim in index.get("claims", []): title_words = set(claim.get("title", "").lower().split()) overlap = len(query_words & title_words) if overlap >= 2: scored.append((overlap, claim)) scored.sort(key=lambda x: x[0], reverse=True) return [c for _, c in scored[:max_results]] def get_db_stats() -> dict: """Get basic KB stats from pipeline DB.""" try: conn = sqlite3.connect(PIPELINE_DB, timeout=5) conn.row_factory = sqlite3.Row conn.execute("PRAGMA query_only=ON") merged = conn.execute("SELECT COUNT(*) as n FROM prs WHERE status='merged'").fetchone()["n"] contributors = conn.execute("SELECT COUNT(*) as n FROM contributors").fetchone()["n"] conn.close() return {"merged_claims": merged, "contributors": contributors} except Exception: return {"merged_claims": "?", "contributors": "?"} async def call_openrouter(model: str, prompt: str, max_tokens: int = 2048) -> str | None: """Call OpenRouter API.""" import aiohttp key = Path(OPENROUTER_KEY_FILE).read_text().strip() payload = { "model": model, "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens, "temperature": 0.3, } try: async with aiohttp.ClientSession() as session: async with session.post( "https://openrouter.ai/api/v1/chat/completions", headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"}, json=payload, timeout=aiohttp.ClientTimeout(total=120), ) as resp: if resp.status >= 400: logger.error("OpenRouter %s → %d", model, resp.status) return None data = await resp.json() return data.get("choices", [{}])[0].get("message", {}).get("content") except Exception as e: logger.error("OpenRouter error: %s", e) return None def is_rate_limited(user_id: int) -> bool: """Check if a user has exceeded the response rate limit.""" now = time.time() times = user_response_times[user_id] # Prune old entries times[:] = [t for t in times if now - t < 3600] return len(times) >= MAX_RESPONSE_PER_USER_PER_HOUR def sanitize_message(text: str) -> str: """Sanitize message content before sending to LLM. (Ganymede: security)""" # Strip code blocks (potential prompt injection) text = re.sub(r"```.*?```", "[code block removed]", text, flags=re.DOTALL) # Strip anything that looks like system instructions text = re.sub(r"(system:|assistant:|human:|<\|.*?\|>)", "", text, flags=re.IGNORECASE) # Truncate return text[:2000] # ─── Message Handlers ─────────────────────────────────────────────────── async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle ALL incoming group messages — buffer for triage.""" if not update.message or not update.message.text: return msg = update.message text = msg.text.strip() # Skip very short messages if len(text) < MIN_MESSAGE_LENGTH: return # Buffer for batch triage message_buffer.append({ "text": sanitize_message(text), "user_id": msg.from_user.id if msg.from_user else None, "username": msg.from_user.username if msg.from_user else None, "display_name": msg.from_user.full_name if msg.from_user else None, "chat_id": msg.chat_id, "message_id": msg.message_id, "timestamp": msg.date.isoformat() if msg.date else datetime.now(timezone.utc).isoformat(), "reply_to": msg.reply_to_message.message_id if msg.reply_to_message else None, }) async def handle_tagged(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle messages that tag the bot — Rio responds with Opus.""" if not update.message or not update.message.text: return msg = update.message user = msg.from_user text = sanitize_message(msg.text) # Rate limit check if user and is_rate_limited(user.id): await msg.reply_text("I'm processing other requests — try again in a few minutes.") return logger.info("Tagged by @%s: %s", user.username if user else "unknown", text[:100]) # Send typing indicator await msg.chat.send_action("typing") # Load KB index = load_claim_index() stats = get_db_stats() if not index: await msg.reply_text("KB index unavailable — try again shortly.") return # Find relevant claims relevant = find_relevant_claims(text, index, max_results=5) claims_context = "" if relevant: claims_context = "## Relevant KB Claims\n" for c in relevant: claims_context += f"- **{c['title']}** (confidence: {c.get('confidence', '?')}, domain: {c.get('domain', '?')})\n" # Build Opus prompt — Rio's voice prompt = f"""You are Rio, the internet finance domain expert for TeleoHumanity's collective knowledge base. You're responding to a message in the ownership community Telegram group. ## Your Voice - Grounded in KB evidence — cite specific claims and their confidence levels - State your position when you have one — analyst means grounded, not neutral - Name uncertainty explicitly — "we don't have data on this yet" is honest - Never shill — present evidence and risks alongside convictions - If the message contains a genuine insight the KB doesn't have, say so: "That's something we haven't captured yet — it's worth investigating" ## KB State - {stats['merged_claims']} merged claims across 14 domains - {stats['contributors']} contributors tracked - {index.get('total_claims', '?')} claims in index {claims_context} ## The Message From: @{user.username if user else 'unknown'} ({user.full_name if user else 'unknown'}) Message: {text} ## Your Response Respond substantively. If the message contains a claim or evidence: 1. Connect it to what the KB already knows 2. State where you agree and where the evidence is uncertain 3. If this challenges an existing claim, say so specifically Keep it conversational — this is Telegram, not a paper. 2-4 paragraphs max. Do NOT use markdown headers. Light formatting only (bold for claim titles, italics for emphasis).""" # Call Opus response = await call_openrouter(RESPONSE_MODEL, prompt, max_tokens=1024) if not response: await msg.reply_text("Processing error — I'll get back to you.") return # Post response await msg.reply_text(response) # Record rate limit if user: user_response_times[user.id].append(time.time()) # Log the exchange for audit trail logger.info("Rio responded to @%s (msg_id=%d)", user.username if user else "?", msg.message_id) # Detect and fetch URLs for pipeline ingestion urls = _extract_urls(text) url_content = None if urls: logger.info("Fetching URL: %s", urls[0]) url_content = await _fetch_url_content(urls[0]) if url_content: logger.info("Fetched %d chars from %s", len(url_content), urls[0]) # Archive the exchange as a source for pipeline (slow path) _archive_exchange(text, response, user, msg, url_content=url_content, urls=urls) async def _fetch_url_content(url: str) -> str | None: """Fetch article/page content from a URL for pipeline ingestion.""" import aiohttp try: async with aiohttp.ClientSession() as session: async with session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as resp: if resp.status >= 400: return None html = await resp.text() # Strip HTML tags for plain text (basic — upgrade to readability later) text = re.sub(r"", "", html, flags=re.DOTALL) text = re.sub(r"", "", text, flags=re.DOTALL) text = re.sub(r"<[^>]+>", " ", text) text = re.sub(r"\s+", " ", text).strip() return text[:10000] # Cap at 10K chars except Exception as e: logger.warning("Failed to fetch URL %s: %s", url, e) return None def _extract_urls(text: str) -> list[str]: """Extract URLs from message text.""" return re.findall(r"https?://[^\s<>\"']+", text) def _archive_exchange(user_text: str, rio_response: str, user, msg, url_content: str | None = None, urls: list[str] | None = None): """Archive a tagged exchange to inbox/queue/ for pipeline processing.""" try: date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") username = user.username if user else "anonymous" slug = re.sub(r"[^a-z0-9]+", "-", user_text[:50].lower()).strip("-") filename = f"{date_str}-telegram-{username}-{slug}.md" archive_path = Path(REPO_DIR) / "inbox" / "queue" / filename archive_path.parent.mkdir(parents=True, exist_ok=True) # Extract rationale (the user's text minus the @mention and URL) rationale = re.sub(r"@\w+", "", user_text).strip() for url in (urls or []): rationale = rationale.replace(url, "").strip() # Determine priority — directed contribution with rationale gets high priority priority = "high" if rationale and len(rationale) > 20 else "medium" intake_tier = "directed" if rationale and len(rationale) > 20 else "undirected" url_section = "" if url_content: url_section = f"\n## Article Content (fetched)\n\n{url_content[:8000]}\n" content = f"""--- type: source source_type: telegram title: "Telegram: @{username} — {slug}" author: "@{username}" url: "{urls[0] if urls else ''}" date: {date_str} domain: internet-finance format: conversation status: unprocessed priority: {priority} intake_tier: {intake_tier} rationale: "{rationale[:200]}" proposed_by: "@{username}" tags: [telegram, ownership-community] --- ## Conversation **@{username}:** {user_text} **Rio (response):** {rio_response} {url_section} ## Agent Notes **Why archived:** Tagged exchange in ownership community. **Rationale from contributor:** {rationale if rationale else 'No rationale provided (bare link or question)'} **Intake tier:** {intake_tier} — {'fast-tracked, contributor provided reasoning' if intake_tier == 'directed' else 'standard processing'} **Triage:** Conversation may contain [CLAIM], [ENTITY], or [EVIDENCE] for extraction. """ archive_path.write_text(content) logger.info("Archived exchange to %s (tier: %s, urls: %d)", filename, intake_tier, len(urls or [])) except Exception as e: logger.error("Failed to archive exchange: %s", e) # ─── Batch Triage ─────────────────────────────────────────────────────── async def run_batch_triage(context: ContextTypes.DEFAULT_TYPE): """Batch triage of buffered messages every TRIAGE_INTERVAL seconds. Groups messages into conversation windows, sends to Haiku for classification, archives substantive findings. """ global message_buffer if not message_buffer: return # Grab and clear buffer messages = message_buffer[:] message_buffer = [] logger.info("Batch triage: %d messages to process", len(messages)) # Group into conversation windows (messages within 5 min of each other) windows = _group_into_windows(messages, window_seconds=300) if not windows: return # Build triage prompt windows_text = "" for i, window in enumerate(windows): window_msgs = "\n".join( f" @{m.get('username', '?')}: {m['text'][:200]}" for m in window ) windows_text += f"\n--- Window {i+1} ({len(window)} messages) ---\n{window_msgs}\n" prompt = f"""Classify each conversation window. For each, respond with ONE tag: [CLAIM] — Contains a specific, disagreeable proposition about how something works [ENTITY] — Contains factual data about a company, protocol, person, or market [EVIDENCE] — Contains data or argument that supports or challenges an existing claim about internet finance, futarchy, prediction markets, or token governance [SKIP] — Casual conversation, not relevant to the knowledge base Be generous with EVIDENCE — even confirming evidence strengthens the KB. {windows_text} Respond with ONLY the window numbers and tags, one per line: 1: [TAG] 2: [TAG] ...""" result = await call_openrouter(TRIAGE_MODEL, prompt, max_tokens=500) if not result: logger.warning("Triage LLM call failed — buffered messages dropped") return # Parse triage results for line in result.strip().split("\n"): match = re.match(r"(\d+):\s*\[(\w+)\]", line) if not match: continue idx = int(match.group(1)) - 1 tag = match.group(2).upper() if idx < 0 or idx >= len(windows): continue if tag in ("CLAIM", "ENTITY", "EVIDENCE"): _archive_window(windows[idx], tag) logger.info("Triage complete: %d windows processed", len(windows)) def _group_into_windows(messages: list[dict], window_seconds: int = 300) -> list[list[dict]]: """Group messages into conversation windows by time proximity.""" if not messages: return [] # Sort by timestamp messages.sort(key=lambda m: m.get("timestamp", "")) windows = [] current_window = [messages[0]] for msg in messages[1:]: # Simple grouping: if within window_seconds of previous message, same window current_window.append(msg) if len(current_window) >= 10: # Cap window size windows.append(current_window) current_window = [] if current_window: windows.append(current_window) return windows def _archive_window(window: list[dict], tag: str): """Archive a triaged conversation window to inbox/queue/.""" try: date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") first_user = window[0].get("username", "group") slug = re.sub(r"[^a-z0-9]+", "-", window[0]["text"][:40].lower()).strip("-") filename = f"{date_str}-telegram-{first_user}-{slug}.md" archive_path = Path(REPO_DIR) / "inbox" / "queue" / filename archive_path.parent.mkdir(parents=True, exist_ok=True) # Build conversation content conversation = "" contributors = set() for msg in window: username = msg.get("username", "anonymous") contributors.add(username) conversation += f"**@{username}:** {msg['text']}\n\n" content = f"""--- type: source source_type: telegram title: "Telegram conversation: {slug}" author: "{', '.join(contributors)}" date: {date_str} domain: internet-finance format: conversation status: unprocessed priority: medium triage_tag: {tag.lower()} tags: [telegram, ownership-community] --- ## Conversation ({len(window)} messages, {len(contributors)} participants) {conversation} ## Agent Notes **Triage:** [{tag}] — classified by batch triage **Participants:** {', '.join(f'@{u}' for u in contributors)} """ archive_path.write_text(content) logger.info("Archived window [%s]: %s (%d msgs, %d participants)", tag, filename, len(window), len(contributors)) except Exception as e: logger.error("Failed to archive window: %s", e) # ─── Bot Setup ────────────────────────────────────────────────────────── async def start_command(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle /start command.""" await update.message.reply_text( "I'm Rio, the internet finance agent for TeleoHumanity's collective knowledge base. " "Tag me with @teleo to ask about futarchy, prediction markets, token governance, " "or anything in our domain. I'll ground my response in our KB's evidence." ) async def stats_command(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle /stats command — show KB stats.""" index = load_claim_index() stats = get_db_stats() if index: await update.message.reply_text( f"📊 KB Stats:\n" f"• {index.get('total_claims', '?')} claims across {len(index.get('domains', {}))} domains\n" f"• {stats['merged_claims']} PRs merged\n" f"• {stats['contributors']} contributors\n" f"• {index.get('orphan_count', '?')} orphan claims ({index.get('orphan_ratio', 0)*100:.0f}%)\n" f"• {index.get('cross_domain_links', '?')} cross-domain connections" ) else: await update.message.reply_text("KB index unavailable.") def main(): """Start the bot.""" # Load token token_path = Path(BOT_TOKEN_FILE) if not token_path.exists(): logger.error("Bot token not found at %s", BOT_TOKEN_FILE) sys.exit(1) token = token_path.read_text().strip() logger.info("Starting Teleo Telegram bot (Rio)...") # Build application app = Application.builder().token(token).build() # Command handlers app.add_handler(CommandHandler("start", start_command)) app.add_handler(CommandHandler("stats", stats_command)) # Tag handler — messages mentioning the bot # python-telegram-bot filters.Mention doesn't work for bot mentions in groups # Use a regex filter for the bot username app.add_handler(MessageHandler( filters.TEXT & filters.Regex(r"(?i)@teleo"), handle_tagged, )) # All other text messages — buffer for triage app.add_handler(MessageHandler( filters.TEXT & ~filters.COMMAND, handle_message, )) # Batch triage job app.job_queue.run_repeating( run_batch_triage, interval=TRIAGE_INTERVAL, first=TRIAGE_INTERVAL, ) # Run logger.info("Bot running. Triage interval: %ds", TRIAGE_INTERVAL) app.run_polling(drop_pending_updates=True) if __name__ == "__main__": main()