Pulled from /opt/teleo-eval/telegram/ on VPS. Includes: - bot.py (92K), kb_retrieval.py, kb_tools.py (agentic retrieval) - retrieval.py (RRF merge, query decomposition, entity traversal) - response.py (system prompt builder, response parser) - agent_config.py, agent_runner.py (multi-agent template unit support) - approval_stages.py, approvals.py, digest.py (approval workflow) - eval_checks.py, eval.py (response quality checks) - output_gate.py, x_publisher.py, x_client.py, x_search.py (X pipeline) - market_data.py, worktree_lock.py (utilities) - rio.yaml, theseus.yaml (agent configs) These files were deployed to VPS but never committed to the repo. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
366 lines
14 KiB
Python
366 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""X (Twitter) API client for Teleo agents.
|
|
|
|
Consolidated interface to twitterapi.io. Used by:
|
|
- Telegram bot (research, tweet fetching, link analysis)
|
|
- Research sessions (network monitoring, source discovery)
|
|
- Any agent that needs X data
|
|
|
|
Epimetheus owns this module.
|
|
|
|
## Available Endpoints (twitterapi.io)
|
|
|
|
| Endpoint | What it does | When to use |
|
|
|----------|-------------|-------------|
|
|
| GET /tweets?tweet_ids={id} | Fetch specific tweet(s) by ID | User drops a link, need full content |
|
|
| GET /article?tweet_id={id} | Fetch X long-form article | User drops an article link |
|
|
| GET /tweet/advanced_search?query={q} | Search tweets by keyword | /research command, topic discovery |
|
|
| GET /user/last_tweets?userName={u} | Get user's recent tweets | Network monitoring, agent research |
|
|
|
|
## Cost
|
|
|
|
All endpoints use the X-API-Key header. Pricing is per-request via twitterapi.io.
|
|
Rate limits depend on plan tier. Key at /opt/teleo-eval/secrets/twitterapi-io-key.
|
|
|
|
## Rate Limiting
|
|
|
|
Research searches: 3 per user per day (explicit /research).
|
|
Haiku autonomous searches: uncapped (don't burn user budget).
|
|
Tweet fetches (URL lookups): uncapped (cheap, single tweet).
|
|
"""
|
|
|
|
import logging
|
|
import re
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import aiohttp
|
|
|
|
logger = logging.getLogger("x-client")
|
|
|
|
# ─── Config ──────────────────────────────────────────────────────────────
|
|
|
|
BASE_URL = "https://api.twitterapi.io/twitter"
|
|
API_KEY_FILE = "/opt/teleo-eval/secrets/twitterapi-io-key"
|
|
REQUEST_TIMEOUT = 15 # seconds
|
|
|
|
# Rate limiting for user-triggered research
|
|
_research_usage: dict[int, list[float]] = {}
|
|
MAX_RESEARCH_PER_DAY = 3
|
|
|
|
|
|
# ─── API Key ─────────────────────────────────────────────────────────────
|
|
|
|
def _load_api_key() -> Optional[str]:
|
|
"""Load the twitterapi.io API key from secrets."""
|
|
try:
|
|
return Path(API_KEY_FILE).read_text().strip()
|
|
except Exception:
|
|
logger.warning("X API key not found at %s", API_KEY_FILE)
|
|
return None
|
|
|
|
|
|
def _headers() -> dict:
|
|
"""Build request headers with API key."""
|
|
key = _load_api_key()
|
|
if not key:
|
|
return {}
|
|
return {"X-API-Key": key}
|
|
|
|
|
|
# ─── Rate Limiting ───────────────────────────────────────────────────────
|
|
|
|
def check_research_rate_limit(user_id: int) -> bool:
|
|
"""Check if user has research requests remaining. Returns True if allowed."""
|
|
now = time.time()
|
|
times = _research_usage.get(user_id, [])
|
|
times = [t for t in times if now - t < 86400]
|
|
_research_usage[user_id] = times
|
|
return len(times) < MAX_RESEARCH_PER_DAY
|
|
|
|
|
|
def record_research_usage(user_id: int):
|
|
"""Record an explicit research request against user's daily limit."""
|
|
_research_usage.setdefault(user_id, []).append(time.time())
|
|
|
|
|
|
def get_research_remaining(user_id: int) -> int:
|
|
"""Get remaining research requests for today."""
|
|
now = time.time()
|
|
times = [t for t in _research_usage.get(user_id, []) if now - t < 86400]
|
|
return max(0, MAX_RESEARCH_PER_DAY - len(times))
|
|
|
|
|
|
# ─── Core API Functions ──────────────────────────────────────────────────
|
|
|
|
async def get_tweet(tweet_id: str) -> Optional[dict]:
|
|
"""Fetch a single tweet by ID. Works for any tweet, any age.
|
|
|
|
Endpoint: GET /tweets?tweet_ids={id}
|
|
|
|
Returns structured dict or None on failure.
|
|
"""
|
|
headers = _headers()
|
|
if not headers:
|
|
return None
|
|
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(
|
|
f"{BASE_URL}/tweets",
|
|
params={"tweet_ids": tweet_id},
|
|
headers=headers,
|
|
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
|
|
) as resp:
|
|
if resp.status != 200:
|
|
logger.warning("get_tweet(%s) → %d", tweet_id, resp.status)
|
|
return None
|
|
data = await resp.json()
|
|
tweets = data.get("tweets", [])
|
|
if not tweets:
|
|
return None
|
|
return _normalize_tweet(tweets[0])
|
|
except Exception as e:
|
|
logger.warning("get_tweet(%s) error: %s", tweet_id, e)
|
|
return None
|
|
|
|
|
|
async def get_article(tweet_id: str) -> Optional[dict]:
|
|
"""Fetch an X long-form article by tweet ID.
|
|
|
|
Endpoint: GET /article?tweet_id={id}
|
|
|
|
Returns structured dict or None if not an article / not found.
|
|
"""
|
|
headers = _headers()
|
|
if not headers:
|
|
return None
|
|
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(
|
|
f"{BASE_URL}/article",
|
|
params={"tweet_id": tweet_id},
|
|
headers=headers,
|
|
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
|
|
) as resp:
|
|
if resp.status != 200:
|
|
return None
|
|
data = await resp.json()
|
|
article = data.get("article")
|
|
if not article:
|
|
return None
|
|
# Article body is in "contents" array (not "text" field)
|
|
contents = article.get("contents", [])
|
|
text_parts = []
|
|
for block in contents:
|
|
block_text = block.get("text", "")
|
|
if not block_text:
|
|
continue
|
|
block_type = block.get("type", "unstyled")
|
|
if block_type.startswith("header"):
|
|
text_parts.append(f"\n## {block_text}\n")
|
|
elif block_type == "markdown":
|
|
text_parts.append(block_text)
|
|
elif block_type in ("unordered-list-item",):
|
|
text_parts.append(f"- {block_text}")
|
|
elif block_type in ("ordered-list-item",):
|
|
text_parts.append(f"* {block_text}")
|
|
elif block_type == "blockquote":
|
|
text_parts.append(f"> {block_text}")
|
|
else:
|
|
text_parts.append(block_text)
|
|
full_text = "\n".join(text_parts)
|
|
author_data = article.get("author", {})
|
|
likes = article.get("likeCount", 0) or 0
|
|
retweets = article.get("retweetCount", 0) or 0
|
|
return {
|
|
"text": full_text,
|
|
"title": article.get("title", ""),
|
|
"author": author_data.get("userName", ""),
|
|
"author_name": author_data.get("name", ""),
|
|
"author_followers": author_data.get("followers", 0),
|
|
"tweet_date": article.get("createdAt", ""),
|
|
"is_article": True,
|
|
"engagement": likes + retweets,
|
|
"likes": likes,
|
|
"retweets": retweets,
|
|
"views": article.get("viewCount", 0) or 0,
|
|
}
|
|
except Exception as e:
|
|
logger.warning("get_article(%s) error: %s", tweet_id, e)
|
|
return None
|
|
|
|
|
|
async def search_tweets(query: str, max_results: int = 20, min_engagement: int = 0) -> list[dict]:
|
|
"""Search X for tweets matching a query. Returns most recent, sorted by engagement.
|
|
|
|
Endpoint: GET /tweet/advanced_search?query={q}&queryType=Latest
|
|
|
|
Use short queries (2-3 words). Long queries return nothing.
|
|
"""
|
|
headers = _headers()
|
|
if not headers:
|
|
return []
|
|
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(
|
|
f"{BASE_URL}/tweet/advanced_search",
|
|
params={"query": query, "queryType": "Latest"},
|
|
headers=headers,
|
|
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
|
|
) as resp:
|
|
if resp.status >= 400:
|
|
logger.warning("search_tweets('%s') → %d", query, resp.status)
|
|
return []
|
|
data = await resp.json()
|
|
raw_tweets = data.get("tweets", [])
|
|
except Exception as e:
|
|
logger.warning("search_tweets('%s') error: %s", query, e)
|
|
return []
|
|
|
|
results = []
|
|
for tweet in raw_tweets[:max_results * 2]:
|
|
normalized = _normalize_tweet(tweet)
|
|
if not normalized:
|
|
continue
|
|
if normalized["text"].startswith("RT @"):
|
|
continue
|
|
if normalized["engagement"] < min_engagement:
|
|
continue
|
|
results.append(normalized)
|
|
if len(results) >= max_results:
|
|
break
|
|
|
|
results.sort(key=lambda t: t["engagement"], reverse=True)
|
|
return results
|
|
|
|
|
|
async def get_user_tweets(username: str, max_results: int = 20) -> list[dict]:
|
|
"""Get a user's most recent tweets.
|
|
|
|
Endpoint: GET /user/last_tweets?userName={username}
|
|
|
|
Used by research sessions for network monitoring.
|
|
"""
|
|
headers = _headers()
|
|
if not headers:
|
|
return []
|
|
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(
|
|
f"{BASE_URL}/user/last_tweets",
|
|
params={"userName": username},
|
|
headers=headers,
|
|
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
|
|
) as resp:
|
|
if resp.status >= 400:
|
|
logger.warning("get_user_tweets('%s') → %d", username, resp.status)
|
|
return []
|
|
data = await resp.json()
|
|
raw_tweets = data.get("tweets", [])
|
|
except Exception as e:
|
|
logger.warning("get_user_tweets('%s') error: %s", username, e)
|
|
return []
|
|
|
|
return [_normalize_tweet(t) for t in raw_tweets[:max_results] if _normalize_tweet(t)]
|
|
|
|
|
|
# ─── High-Level Functions ────────────────────────────────────────────────
|
|
|
|
async def fetch_from_url(url: str) -> Optional[dict]:
|
|
"""Fetch tweet or article content from an X URL.
|
|
|
|
Tries tweet lookup first (most common), then article endpoint.
|
|
Returns structured dict with text, author, engagement.
|
|
Returns placeholder dict (not None) on failure so the caller can tell
|
|
the user "couldn't fetch" instead of silently ignoring.
|
|
"""
|
|
match = re.search(r'(?:twitter\.com|x\.com)/(\w+)/status/(\d+)', url)
|
|
if not match:
|
|
return None
|
|
|
|
username = match.group(1)
|
|
tweet_id = match.group(2)
|
|
|
|
# Try tweet first (most X URLs are tweets)
|
|
tweet_result = await get_tweet(tweet_id)
|
|
|
|
if tweet_result:
|
|
tweet_text = tweet_result.get("text", "").strip()
|
|
is_just_url = tweet_text.startswith("http") and len(tweet_text.split()) <= 2
|
|
|
|
if not is_just_url:
|
|
# Regular tweet with real content — return it
|
|
tweet_result["url"] = url
|
|
return tweet_result
|
|
|
|
# Tweet was empty/URL-only, or tweet lookup failed — try article endpoint
|
|
article_result = await get_article(tweet_id)
|
|
if article_result:
|
|
article_result["url"] = url
|
|
article_result["author"] = article_result.get("author") or username
|
|
# Article endpoint may return title but not full text
|
|
if article_result.get("title") and not article_result.get("text"):
|
|
article_result["text"] = (
|
|
f'This is an X Article titled "{article_result["title"]}" by @{username}. '
|
|
f"The API returned the title but not the full content. "
|
|
f"Ask the user to paste the key points so you can analyze them."
|
|
)
|
|
return article_result
|
|
|
|
# If we got the tweet but it was just a URL, return with helpful context
|
|
if tweet_result:
|
|
tweet_result["url"] = url
|
|
tweet_result["text"] = (
|
|
f"Tweet by @{username} links to content but contains no text. "
|
|
f"This may be an X Article. Ask the user to paste the key points."
|
|
)
|
|
return tweet_result
|
|
|
|
# Everything failed
|
|
return {
|
|
"text": f"[Could not fetch content from @{username}]",
|
|
"url": url,
|
|
"author": username,
|
|
"author_name": "",
|
|
"author_followers": 0,
|
|
"engagement": 0,
|
|
"tweet_date": "",
|
|
"is_article": False,
|
|
}
|
|
|
|
|
|
# ─── Internal ────────────────────────────────────────────────────────────
|
|
|
|
def _normalize_tweet(raw: dict) -> Optional[dict]:
|
|
"""Normalize a raw API tweet into a consistent structure."""
|
|
text = raw.get("text", "")
|
|
if not text:
|
|
return None
|
|
|
|
author = raw.get("author", {})
|
|
likes = raw.get("likeCount", 0) or 0
|
|
retweets = raw.get("retweetCount", 0) or 0
|
|
replies = raw.get("replyCount", 0) or 0
|
|
views = raw.get("viewCount", 0) or 0
|
|
|
|
return {
|
|
"id": raw.get("id", ""),
|
|
"text": text,
|
|
"url": raw.get("twitterUrl", raw.get("url", "")),
|
|
"author": author.get("userName", "unknown"),
|
|
"author_name": author.get("name", ""),
|
|
"author_followers": author.get("followers", 0),
|
|
"engagement": likes + retweets + replies,
|
|
"likes": likes,
|
|
"retweets": retweets,
|
|
"replies": replies,
|
|
"views": views,
|
|
"tweet_date": raw.get("createdAt", ""),
|
|
"is_reply": bool(raw.get("inReplyToId")),
|
|
"is_article": False,
|
|
}
|