From 075965568854c989a67754698e2c4c1061bfdbb9 Mon Sep 17 00:00:00 2001
From: m3taversal <m3taversal@gmail.com>
Date: Wed, 25 Mar 2026 13:21:26 +0000
Subject: [PATCH] fix: process all URLs in a message, not just the first

When a user shared two X links in one message (sjdedic + knimkar),
only the first got a standalone source. Now processes up to 5 URLs
per message, each getting its own standalone source file.

Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
---
 telegram/bot.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/telegram/bot.py b/telegram/bot.py
index de6fd8f..1ca8c20 100644
--- a/telegram/bot.py
+++ b/telegram/bot.py
@@ -820,17 +820,17 @@ IMPORTANT: Two special tags you can append at the end of your response (after yo
     # Log the exchange for audit trail
     logger.info("Rio responded to @%s (msg_id=%d)", user.username if user else "?", msg.message_id)
 
-    # Detect and fetch URLs for pipeline ingestion
+    # Detect and fetch URLs for pipeline ingestion (all URLs, not just first)
     urls = _extract_urls(text)
     url_content = None
-    if urls:
-        logger.info("Fetching URL: %s", urls[0])
-        url_content = await _fetch_url_content(urls[0])
-        if url_content:
-            logger.info("Fetched %d chars from %s", len(url_content), urls[0])
-            # Create standalone source file for the article (separate from conversation)
-            # This enters the extraction pipeline as a proper source, attributed to contributor
-            _archive_standalone_source(urls[0], url_content, user)
+    for url in urls[:5]:  # Cap at 5 URLs per message
+        logger.info("Fetching URL: %s", url)
+        content = await _fetch_url_content(url)
+        if content:
+            logger.info("Fetched %d chars from %s", len(content), url)
+            if url_content is None:
+                url_content = content  # First URL's content for conversation archive
+            _archive_standalone_source(url, content, user)
 
     # Archive the exchange as a source for pipeline (slow path)
     _archive_exchange(text, response, user, msg, url_content=url_content, urls=urls)