teleo-infrastructure/telegram/x-ingest.py
m3taversal 81afcd319f
Some checks are pending
CI / lint-and-test (push) Waiting to run
fix: sync all code from VPS — repo is now authoritative source of truth
24 files: 8 pipeline lib modules, 6 diagnostics updates, 4 new diagnostics
modules, telegram bot fix, 5 active operational scripts. Key changes:
- Security: SQL injection prevention (alerting.py), SSL verification
  (review_queue.py), path traversal guard (extract.py)
- Cost tracking: per-PR cost accumulation in evaluate.py
- Auto-recovery: watchdog tier0 reset with retry cap + cooldown
- Extraction: structured edge fields, post-write vector connection
- New modules: vitality, research_tracking, research_routes

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 13:18:01 +01:00

86 lines
2.8 KiB
Python

#!/usr/bin/env python3
"""Pull all tweets from specified X accounts and save as JSON archives."""
import json
import sys
import time
import urllib.request
API_KEY = "new1_280dafc879374475a86a64f6f388ac22"
BASE = "https://api.twitterapi.io/twitter/user/last_tweets"
OUT_DIR = "/opt/teleo-eval/x-archives"
ACCOUNTS = [
"m3taversal",
"Living_IP",
"teLEOhuman",
"aiCLAYno",
"futaRdIO_ai",
]
import os
os.makedirs(OUT_DIR, exist_ok=True)
def fetch_page(username, cursor=None):
url = f"{BASE}?userName={username}"
if cursor:
url += f"&cursor={cursor}"
req = urllib.request.Request(url, headers={"X-API-Key": API_KEY})
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read())
except Exception as e:
print(f" ERROR fetching {username}: {e}")
return None
def pull_all_tweets(username):
all_tweets = []
cursor = None
page = 0
while True:
page += 1
print(f" Page {page} (cursor: {'yes' if cursor else 'start'})...", end=" ")
data = fetch_page(username, cursor)
if not data or data.get("status") != "success":
print(f"FAILED: {data}")
break
tweets = data.get("data", {}).get("tweets", [])
next_cursor = data.get("data", {}).get("next_cursor")
# Deduplicate
seen_ids = {t["id"] for t in all_tweets}
new_tweets = [t for t in tweets if t["id"] not in seen_ids]
all_tweets.extend(new_tweets)
print(f"{len(new_tweets)} new tweets (total: {len(all_tweets)})")
if not next_cursor or not new_tweets:
break
cursor = next_cursor
time.sleep(1) # Rate limit courtesy
return all_tweets
for account in ACCOUNTS:
print(f"\n=== @{account} ===")
tweets = pull_all_tweets(account)
# Save raw
outfile = os.path.join(OUT_DIR, f"{account}-tweets.json")
with open(outfile, "w") as f:
json.dump({"account": account, "tweet_count": len(tweets), "tweets": tweets}, f, indent=2)
print(f" Saved {len(tweets)} tweets to {outfile}")
# Quick stats
originals = [t for t in tweets if not t.get("text", "").startswith("RT @") and not t.get("isReply")]
replies = [t for t in tweets if t.get("isReply")]
rts = [t for t in tweets if t.get("text", "").startswith("RT @")]
print(f" Breakdown: {len(originals)} original, {len(replies)} replies, {len(rts)} RTs")
if originals:
top = sorted(originals, key=lambda t: int(t.get("viewCount", 0) or 0), reverse=True)[:5]
print(f" Top 5 by views:")
for t in top:
text = t["text"][:80].replace("\n", " ")
print(f" {t.get('viewCount', '?')} views | {t.get('likeCount', '?')} likes | {text}...")
print("\n=== DONE ===")