fix: process all URLs in a message, not just the first
When a user shared two X links in one message (sjdedic + knimkar), only the first got a standalone source. Now processes up to 5 URLs per message, each getting its own standalone source file. Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
This commit is contained in:
parent
102d97859c
commit
0759655688
1 changed files with 9 additions and 9 deletions
|
|
@ -820,17 +820,17 @@ IMPORTANT: Two special tags you can append at the end of your response (after yo
|
|||
# Log the exchange for audit trail
|
||||
logger.info("Rio responded to @%s (msg_id=%d)", user.username if user else "?", msg.message_id)
|
||||
|
||||
# Detect and fetch URLs for pipeline ingestion
|
||||
# Detect and fetch URLs for pipeline ingestion (all URLs, not just first)
|
||||
urls = _extract_urls(text)
|
||||
url_content = None
|
||||
if urls:
|
||||
logger.info("Fetching URL: %s", urls[0])
|
||||
url_content = await _fetch_url_content(urls[0])
|
||||
if url_content:
|
||||
logger.info("Fetched %d chars from %s", len(url_content), urls[0])
|
||||
# Create standalone source file for the article (separate from conversation)
|
||||
# This enters the extraction pipeline as a proper source, attributed to contributor
|
||||
_archive_standalone_source(urls[0], url_content, user)
|
||||
for url in urls[:5]: # Cap at 5 URLs per message
|
||||
logger.info("Fetching URL: %s", url)
|
||||
content = await _fetch_url_content(url)
|
||||
if content:
|
||||
logger.info("Fetched %d chars from %s", len(content), url)
|
||||
if url_content is None:
|
||||
url_content = content # First URL's content for conversation archive
|
||||
_archive_standalone_source(url, content, user)
|
||||
|
||||
# Archive the exchange as a source for pipeline (slow path)
|
||||
_archive_exchange(text, response, user, msg, url_content=url_content, urls=urls)
|
||||
|
|
|
|||
Loading…
Reference in a new issue