From 075965568854c989a67754698e2c4c1061bfdbb9 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Wed, 25 Mar 2026 13:21:26 +0000 Subject: [PATCH] fix: process all URLs in a message, not just the first When a user shared two X links in one message (sjdedic + knimkar), only the first got a standalone source. Now processes up to 5 URLs per message, each getting its own standalone source file. Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70> --- telegram/bot.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/telegram/bot.py b/telegram/bot.py index de6fd8f..1ca8c20 100644 --- a/telegram/bot.py +++ b/telegram/bot.py @@ -820,17 +820,17 @@ IMPORTANT: Two special tags you can append at the end of your response (after yo # Log the exchange for audit trail logger.info("Rio responded to @%s (msg_id=%d)", user.username if user else "?", msg.message_id) - # Detect and fetch URLs for pipeline ingestion + # Detect and fetch URLs for pipeline ingestion (all URLs, not just first) urls = _extract_urls(text) url_content = None - if urls: - logger.info("Fetching URL: %s", urls[0]) - url_content = await _fetch_url_content(urls[0]) - if url_content: - logger.info("Fetched %d chars from %s", len(url_content), urls[0]) - # Create standalone source file for the article (separate from conversation) - # This enters the extraction pipeline as a proper source, attributed to contributor - _archive_standalone_source(urls[0], url_content, user) + for url in urls[:5]: # Cap at 5 URLs per message + logger.info("Fetching URL: %s", url) + content = await _fetch_url_content(url) + if content: + logger.info("Fetched %d chars from %s", len(content), url) + if url_content is None: + url_content = content # First URL's content for conversation archive + _archive_standalone_source(url, content, user) # Archive the exchange as a source for pipeline (slow path) _archive_exchange(text, response, user, msg, url_content=url_content, urls=urls)