diff --git a/telegram/bot.py b/telegram/bot.py index de6fd8f..1ca8c20 100644 --- a/telegram/bot.py +++ b/telegram/bot.py @@ -820,17 +820,17 @@ IMPORTANT: Two special tags you can append at the end of your response (after yo # Log the exchange for audit trail logger.info("Rio responded to @%s (msg_id=%d)", user.username if user else "?", msg.message_id) - # Detect and fetch URLs for pipeline ingestion + # Detect and fetch URLs for pipeline ingestion (all URLs, not just first) urls = _extract_urls(text) url_content = None - if urls: - logger.info("Fetching URL: %s", urls[0]) - url_content = await _fetch_url_content(urls[0]) - if url_content: - logger.info("Fetched %d chars from %s", len(url_content), urls[0]) - # Create standalone source file for the article (separate from conversation) - # This enters the extraction pipeline as a proper source, attributed to contributor - _archive_standalone_source(urls[0], url_content, user) + for url in urls[:5]: # Cap at 5 URLs per message + logger.info("Fetching URL: %s", url) + content = await _fetch_url_content(url) + if content: + logger.info("Fetched %d chars from %s", len(content), url) + if url_content is None: + url_content = content # First URL's content for conversation archive + _archive_standalone_source(url, content, user) # Archive the exchange as a source for pipeline (slow path) _archive_exchange(text, response, user, msg, url_content=url_content, urls=urls)