epimetheus: fix article content parsing — contents[] array, not text field

Article endpoint returns body in "contents" array of typed blocks (unstyled, header-two, markdown, list-item, blockquote, etc). Was looking for article.text which is empty. Now parses all block types into readable text. Also extracts engagement stats (likes, views). Fixes: "Claude + Obsidian" article returned title but empty text. Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
2026-03-23 15:30:59 +00:00 · 2026-03-23 15:30:59 +00:00 · c59db5812f
commit c59db5812f
parent bcbe54a0a3
1 changed files with 65 additions and 16 deletions
--- a/telegram/x_client.py
+++ b/telegram/x_client.py
@ -151,15 +151,42 @@ async def get_article(tweet_id: str) -> Optional[dict]:
                article = data.get("article")
                if not article:
                    return None
+                # Article body is in "contents" array (not "text" field)
+                contents = article.get("contents", [])
+                text_parts = []
+                for block in contents:
+                    block_text = block.get("text", "")
+                    if not block_text:
+                        continue
+                    block_type = block.get("type", "unstyled")
+                    if block_type.startswith("header"):
+                        text_parts.append(f"\n## {block_text}\n")
+                    elif block_type == "markdown":
+                        text_parts.append(block_text)
+                    elif block_type in ("unordered-list-item",):
+                        text_parts.append(f"- {block_text}")
+                    elif block_type in ("ordered-list-item",):
+                        text_parts.append(f"* {block_text}")
+                    elif block_type == "blockquote":
+                        text_parts.append(f"> {block_text}")
+                    else:
+                        text_parts.append(block_text)
+                full_text = "\n".join(text_parts)
+                author_data = article.get("author", {})
+                likes = article.get("likeCount", 0) or 0
+                retweets = article.get("retweetCount", 0) or 0
                return {
-                    "text": article.get("text", article.get("content", "")),
+                    "text": full_text,
                    "title": article.get("title", ""),
-                    "author": article.get("author", {}).get("userName", ""),
-                    "author_name": article.get("author", {}).get("name", ""),
-                    "author_followers": article.get("author", {}).get("followers", 0),
+                    "author": author_data.get("userName", ""),
+                    "author_name": author_data.get("name", ""),
+                    "author_followers": author_data.get("followers", 0),
                    "tweet_date": article.get("createdAt", ""),
                    "is_article": True,
-                    "engagement": 0,
+                    "engagement": likes + retweets,
+                    "likes": likes,
+                    "retweets": retweets,
+                    "views": article.get("viewCount", 0) or 0,
                }
    except Exception as e:
        logger.warning("get_article(%s) error: %s", tweet_id, e)
@ -260,19 +287,41 @@ async def fetch_from_url(url: str) -> Optional[dict]:
    tweet_id = match.group(2)

    # Try tweet first (most X URLs are tweets)
-    result = await get_tweet(tweet_id)
-    if result:
-        result["url"] = url
-        return result
+    tweet_result = await get_tweet(tweet_id)

-    # Try article (X long-form posts)
-    result = await get_article(tweet_id)
-    if result:
-        result["url"] = url
-        result["author"] = result.get("author") or username
-        return result
+    if tweet_result:
+        tweet_text = tweet_result.get("text", "").strip()
+        is_just_url = tweet_text.startswith("http") and len(tweet_text.split()) <= 2

-    # Both failed — return placeholder so caller can surface the failure
+        if not is_just_url:
+            # Regular tweet with real content — return it
+            tweet_result["url"] = url
+            return tweet_result
+
+    # Tweet was empty/URL-only, or tweet lookup failed — try article endpoint
+    article_result = await get_article(tweet_id)
+    if article_result:
+        article_result["url"] = url
+        article_result["author"] = article_result.get("author") or username
+        # Article endpoint may return title but not full text
+        if article_result.get("title") and not article_result.get("text"):
+            article_result["text"] = (
+                f'This is an X Article titled "{article_result["title"]}" by @{username}. '
+                f"The API returned the title but not the full content. "
+                f"Ask the user to paste the key points so you can analyze them."
+            )
+        return article_result
+
+    # If we got the tweet but it was just a URL, return with helpful context
+    if tweet_result:
+        tweet_result["url"] = url
+        tweet_result["text"] = (
+            f"Tweet by @{username} links to content but contains no text. "
+            f"This may be an X Article. Ask the user to paste the key points."
+        )
+        return tweet_result
+
+    # Everything failed
    return {
        "text": f"[Could not fetch content from @{username}]",
        "url": url,