diff --git a/telegram/bot.py b/telegram/bot.py index 25606c4..24babef 100644 --- a/telegram/bot.py +++ b/telegram/bot.py @@ -332,7 +332,9 @@ async def handle_research(msg, query: str, user, silent: bool = False): # Fetch full content for top tweets (not just search snippets) from x_client import fetch_from_url - for tweet in tweets[:5]: # Top 5 by engagement + for i, tweet in enumerate(tweets[:5]): # Top 5 by engagement + if i > 0: + await asyncio.sleep(0.5) # Ganymede: 500ms between calls, polite to Ben's API url = tweet.get("url", "") if url: try: @@ -766,7 +768,8 @@ def _archive_standalone_source(url: str, content: str, user): Separate from the conversation archive — this is the actual article/tweet entering the extraction pipeline as a proper source, attributed to the - contributor who shared it. + contributor who shared it. Ganymede: keep pure (no Rio analysis), two + source_types (x-tweet vs x-article). """ try: username = user.username if user else "anonymous" @@ -778,6 +781,11 @@ def _archive_standalone_source(url: str, content: str, user): if author_match: author = f"@{author_match.group(1)}" + # Distinguish tweet vs article (Ganymede: different extraction behavior) + is_article = "--- Article Content ---" in content and len(content) > 1000 + source_type = "x-article" if is_article else "x-tweet" + fmt = "article" if is_article else "social-media" + slug = re.sub(r"[^a-z0-9]+", "-", f"{author}-{url.split('/')[-1][:30]}".lower()).strip("-") filename = f"{date_str}-tg-shared-{slug}.md" source_path = Path(ARCHIVE_DIR) / filename @@ -788,20 +796,20 @@ def _archive_standalone_source(url: str, content: str, user): source_content = f"""--- type: source -source_type: x-article +source_type: {source_type} title: "{author} — shared via Telegram by @{username}" author: "{author}" url: "{url}" date: {date_str} domain: internet-finance -format: article +format: {fmt} status: unprocessed proposed_by: "@{username}" contribution_type: source-submission -tags: [telegram-shared, x-article] +tags: [telegram-shared, {source_type}] --- -# {author} — Article/Thread +# {author} — {'Article' if is_article else 'Tweet/Thread'} Shared by @{username} via Telegram. Source URL: {url}