feat: atomic extract-and-connect + stale PR monitor + response audit #4

Merged
m3taversal merged 70 commits from epimetheus/atomic-connect-and-stale-monitor into main 2026-03-30 11:03:35 +00:00
Showing only changes of commit 2ec4c445b1 - Show all commits

View file

@ -703,7 +703,49 @@ IMPORTANT: Two special tags you can append at the end of your response (after yo
async def _fetch_url_content(url: str) -> str | None:
"""Fetch article/page content from a URL for pipeline ingestion."""
"""Fetch article/page content from a URL for pipeline ingestion.
For X/Twitter URLs, uses Ben's API (x_client.fetch_from_url) which returns
structured article content. For other URLs, falls back to raw HTTP fetch.
"""
# X/Twitter URLs → use x_client for structured content
if "x.com/" in url or "twitter.com/" in url:
try:
from x_client import fetch_from_url
data = await fetch_from_url(url)
if not data:
logger.warning("x_client returned no data for %s", url)
return None
# Format structured content
parts = []
# Tweet text
tweet_text = data.get("text", "")
if tweet_text:
parts.append(tweet_text)
# Article content (contents[] array with typed blocks)
contents = data.get("contents", [])
if contents:
parts.append("\n--- Article Content ---\n")
for block in contents:
block_type = block.get("type", "unstyled")
block_text = block.get("text", "")
if not block_text:
continue
if block_type in ("header-one", "header-two", "header-three"):
parts.append(f"\n## {block_text}\n")
elif block_type == "blockquote":
parts.append(f"> {block_text}")
elif block_type == "list-item":
parts.append(f"- {block_text}")
else:
parts.append(block_text)
result = "\n".join(parts)
return result[:10000] if result else None
except Exception as e:
logger.warning("x_client fetch failed for %s: %s", url, e)
return None
# Non-X URLs → raw HTTP fetch with HTML stripping
import aiohttp
try:
async with aiohttp.ClientSession() as session:
@ -711,12 +753,11 @@ async def _fetch_url_content(url: str) -> str | None:
if resp.status >= 400:
return None
html = await resp.text()
# Strip HTML tags for plain text (basic — upgrade to readability later)
text = re.sub(r"<script.*?</script>", "", html, flags=re.DOTALL)
text = re.sub(r"<style.*?</style>", "", text, flags=re.DOTALL)
text = re.sub(r"<[^>]+>", " ", text)
text = re.sub(r"\s+", " ", text).strip()
return text[:10000] # Cap at 10K chars
return text[:10000]
except Exception as e:
logger.warning("Failed to fetch URL %s: %s", url, e)
return None