epimetheus: fix article content parsing — contents[] array, not text field

Article endpoint returns body in "contents" array of typed blocks
(unstyled, header-two, markdown, list-item, blockquote, etc).
Was looking for article.text which is empty. Now parses all block types
into readable text. Also extracts engagement stats (likes, views).

Fixes: "Claude + Obsidian" article returned title but empty text.

Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
This commit is contained in:
m3taversal 2026-03-23 15:30:59 +00:00
parent bcbe54a0a3
commit c59db5812f

View file

@ -151,15 +151,42 @@ async def get_article(tweet_id: str) -> Optional[dict]:
article = data.get("article")
if not article:
return None
# Article body is in "contents" array (not "text" field)
contents = article.get("contents", [])
text_parts = []
for block in contents:
block_text = block.get("text", "")
if not block_text:
continue
block_type = block.get("type", "unstyled")
if block_type.startswith("header"):
text_parts.append(f"\n## {block_text}\n")
elif block_type == "markdown":
text_parts.append(block_text)
elif block_type in ("unordered-list-item",):
text_parts.append(f"- {block_text}")
elif block_type in ("ordered-list-item",):
text_parts.append(f"* {block_text}")
elif block_type == "blockquote":
text_parts.append(f"> {block_text}")
else:
text_parts.append(block_text)
full_text = "\n".join(text_parts)
author_data = article.get("author", {})
likes = article.get("likeCount", 0) or 0
retweets = article.get("retweetCount", 0) or 0
return {
"text": article.get("text", article.get("content", "")),
"text": full_text,
"title": article.get("title", ""),
"author": article.get("author", {}).get("userName", ""),
"author_name": article.get("author", {}).get("name", ""),
"author_followers": article.get("author", {}).get("followers", 0),
"author": author_data.get("userName", ""),
"author_name": author_data.get("name", ""),
"author_followers": author_data.get("followers", 0),
"tweet_date": article.get("createdAt", ""),
"is_article": True,
"engagement": 0,
"engagement": likes + retweets,
"likes": likes,
"retweets": retweets,
"views": article.get("viewCount", 0) or 0,
}
except Exception as e:
logger.warning("get_article(%s) error: %s", tweet_id, e)
@ -260,19 +287,41 @@ async def fetch_from_url(url: str) -> Optional[dict]:
tweet_id = match.group(2)
# Try tweet first (most X URLs are tweets)
result = await get_tweet(tweet_id)
if result:
result["url"] = url
return result
tweet_result = await get_tweet(tweet_id)
# Try article (X long-form posts)
result = await get_article(tweet_id)
if result:
result["url"] = url
result["author"] = result.get("author") or username
return result
if tweet_result:
tweet_text = tweet_result.get("text", "").strip()
is_just_url = tweet_text.startswith("http") and len(tweet_text.split()) <= 2
# Both failed — return placeholder so caller can surface the failure
if not is_just_url:
# Regular tweet with real content — return it
tweet_result["url"] = url
return tweet_result
# Tweet was empty/URL-only, or tweet lookup failed — try article endpoint
article_result = await get_article(tweet_id)
if article_result:
article_result["url"] = url
article_result["author"] = article_result.get("author") or username
# Article endpoint may return title but not full text
if article_result.get("title") and not article_result.get("text"):
article_result["text"] = (
f'This is an X Article titled "{article_result["title"]}" by @{username}. '
f"The API returned the title but not the full content. "
f"Ask the user to paste the key points so you can analyze them."
)
return article_result
# If we got the tweet but it was just a URL, return with helpful context
if tweet_result:
tweet_result["url"] = url
tweet_result["text"] = (
f"Tweet by @{username} links to content but contains no text. "
f"This may be an X Article. Ask the user to paste the key points."
)
return tweet_result
# Everything failed
return {
"text": f"[Could not fetch content from @{username}]",
"url": url,