diff --git a/telegram/x_client.py b/telegram/x_client.py index 6051c2a..f1c4cf2 100644 --- a/telegram/x_client.py +++ b/telegram/x_client.py @@ -151,15 +151,42 @@ async def get_article(tweet_id: str) -> Optional[dict]: article = data.get("article") if not article: return None + # Article body is in "contents" array (not "text" field) + contents = article.get("contents", []) + text_parts = [] + for block in contents: + block_text = block.get("text", "") + if not block_text: + continue + block_type = block.get("type", "unstyled") + if block_type.startswith("header"): + text_parts.append(f"\n## {block_text}\n") + elif block_type == "markdown": + text_parts.append(block_text) + elif block_type in ("unordered-list-item",): + text_parts.append(f"- {block_text}") + elif block_type in ("ordered-list-item",): + text_parts.append(f"* {block_text}") + elif block_type == "blockquote": + text_parts.append(f"> {block_text}") + else: + text_parts.append(block_text) + full_text = "\n".join(text_parts) + author_data = article.get("author", {}) + likes = article.get("likeCount", 0) or 0 + retweets = article.get("retweetCount", 0) or 0 return { - "text": article.get("text", article.get("content", "")), + "text": full_text, "title": article.get("title", ""), - "author": article.get("author", {}).get("userName", ""), - "author_name": article.get("author", {}).get("name", ""), - "author_followers": article.get("author", {}).get("followers", 0), + "author": author_data.get("userName", ""), + "author_name": author_data.get("name", ""), + "author_followers": author_data.get("followers", 0), "tweet_date": article.get("createdAt", ""), "is_article": True, - "engagement": 0, + "engagement": likes + retweets, + "likes": likes, + "retweets": retweets, + "views": article.get("viewCount", 0) or 0, } except Exception as e: logger.warning("get_article(%s) error: %s", tweet_id, e) @@ -260,19 +287,41 @@ async def fetch_from_url(url: str) -> Optional[dict]: tweet_id = match.group(2) # Try tweet first (most X URLs are tweets) - result = await get_tweet(tweet_id) - if result: - result["url"] = url - return result + tweet_result = await get_tweet(tweet_id) - # Try article (X long-form posts) - result = await get_article(tweet_id) - if result: - result["url"] = url - result["author"] = result.get("author") or username - return result + if tweet_result: + tweet_text = tweet_result.get("text", "").strip() + is_just_url = tweet_text.startswith("http") and len(tweet_text.split()) <= 2 - # Both failed — return placeholder so caller can surface the failure + if not is_just_url: + # Regular tweet with real content — return it + tweet_result["url"] = url + return tweet_result + + # Tweet was empty/URL-only, or tweet lookup failed — try article endpoint + article_result = await get_article(tweet_id) + if article_result: + article_result["url"] = url + article_result["author"] = article_result.get("author") or username + # Article endpoint may return title but not full text + if article_result.get("title") and not article_result.get("text"): + article_result["text"] = ( + f'This is an X Article titled "{article_result["title"]}" by @{username}. ' + f"The API returned the title but not the full content. " + f"Ask the user to paste the key points so you can analyze them." + ) + return article_result + + # If we got the tweet but it was just a URL, return with helpful context + if tweet_result: + tweet_result["url"] = url + tweet_result["text"] = ( + f"Tweet by @{username} links to content but contains no text. " + f"This may be an X Article. Ask the user to paste the key points." + ) + return tweet_result + + # Everything failed return { "text": f"[Could not fetch content from @{username}]", "url": url,