fix: process all URLs in a message, not just the first

When a user shared two X links in one message (sjdedic + knimkar),
only the first got a standalone source. Now processes up to 5 URLs
per message, each getting its own standalone source file.

Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
This commit is contained in:
m3taversal 2026-03-25 13:21:26 +00:00
parent 102d97859c
commit 0759655688

View file

@ -820,17 +820,17 @@ IMPORTANT: Two special tags you can append at the end of your response (after yo
# Log the exchange for audit trail
logger.info("Rio responded to @%s (msg_id=%d)", user.username if user else "?", msg.message_id)
# Detect and fetch URLs for pipeline ingestion
# Detect and fetch URLs for pipeline ingestion (all URLs, not just first)
urls = _extract_urls(text)
url_content = None
if urls:
logger.info("Fetching URL: %s", urls[0])
url_content = await _fetch_url_content(urls[0])
if url_content:
logger.info("Fetched %d chars from %s", len(url_content), urls[0])
# Create standalone source file for the article (separate from conversation)
# This enters the extraction pipeline as a proper source, attributed to contributor
_archive_standalone_source(urls[0], url_content, user)
for url in urls[:5]: # Cap at 5 URLs per message
logger.info("Fetching URL: %s", url)
content = await _fetch_url_content(url)
if content:
logger.info("Fetched %d chars from %s", len(content), url)
if url_content is None:
url_content = content # First URL's content for conversation archive
_archive_standalone_source(url, content, user)
# Archive the exchange as a source for pipeline (slow path)
_archive_exchange(text, response, user, msg, url_content=url_content, urls=urls)