teleo-infrastructure/telegram/http_chat_proxy.py

"""HTTP chat proxy helpers for opt-in Telegram agent routing."""

from __future__ import annotations

import re
from typing import Any

DEFAULT_SMART_RESEARCH_COMMAND_PREFIXES = ("/smart_research", "/paid_research")
_TELEGRAM_COMMAND_NAME_RE = re.compile(r"^[A-Za-z0-9_]+$")
_AUTO_SMART_RESEARCH_RE = re.compile(
    r"\b("
    r"research|source|sources|citation|citations|evidence|"
    r"search|find|lookup|look\s+up|web|"
    r"latest|current|today|recent|as\s+of|this\s+week|this\s+month|"
    r"twitter|x\.com|tweet|tweets|social|social\s+media|trend|trends|"
    r"discussion|discussions|sentiment|narrative|narratives|"
    r"revenue|revenues|fees|tvl|volume|fdv|fully\s+diluted|"
    r"market\s+cap|mcap|valuation|funding|liquidity|price|chart|"
    r"token|coin|pair|pool|dex|dexscreener|birdeye|jupiter|"
    r"buy|sell|should\s+i|yes\s+or\s+no|"
    r"estimate|compare|benchmark"
    r")\b",
    re.IGNORECASE,
)
_AUTO_CONTEXTUAL_RESEARCH_RE = re.compile(
    r"("
    r"\b(what\s+are\s+your\s+thoughts|thoughts\s+on|take\s+on|opinion\s+on|"
    r"how\s+did|how\s+has|how\s+is|assess|evaluate)\b"
    r".*\b(managed|manage|handled|handle|handling|responded|response|situation|incident|"
    r"controversy|fallout|fault|blame|position|stance|fair|valuation|valued|growth|metrics|peers?)\b"
    r"|"
    r"\b(who|what|why)\s+(was\s+)?(at\s+)?fault\b"
    r"|"
    r"\b(position|stance)\s+(on|about|towards?)\b"
    r"|"
    r"\b(compare|benchmark)\b.*\b(metrics|growth|valuation|peers?|fintech|web2|products?)\b"
    r")",
    re.IGNORECASE,
)
_AUTO_SMART_RESEARCH_FOLLOWUP_RE = re.compile(
    r"\b("
    r"check\s+it\s+yourself|check\s+yourself|actually\s+check|"
    r"look\s+it\s+up|look\s+that\s+up|search\s+it|search\s+that|"
    r"use\s+(the\s+)?web|use\s+sources|find\s+sources|"
    r"do\s+the\s+research|go\s+research|verify\s+it"
    r")\b",
    re.IGNORECASE,
)
_PAID_WORK_ORDER_ID_RE = re.compile(
    r"\b((?:sponsored_work_orders|payment_receipts):[a-f0-9]{16,64})\b",
    re.IGNORECASE,
)
_MARKET_CONTEXT_RE = re.compile(
    r"\b("
    r"volume|fdv|fully\s+diluted|market\s+cap|mcap|liquidity|price|chart|"
    r"token|coin|pair|pool|dex|dexscreener|birdeye|jupiter|"
    r"buy|sell|should\s+i|yes\s+or\s+no"
    r")\b",
    re.IGNORECASE,
)
_SOCIAL_DISCUSSION_RE = re.compile(
    r"\b(twitter|x\.com|x\/twitter|tweet|tweets|social)\b.*"
    r"\b(current|latest|recent|discussion|discussions|trend|trends|narrative|sentiment|fault|blame|position|stance)\b"
    r"|"
    r"\b(current|latest|recent|discussion|discussions|trend|trends|narrative|sentiment|fault|blame|position|stance)\b.*"
    r"\b(twitter|x\.com|x\/twitter|tweet|tweets|social)\b",
    re.IGNORECASE,
)


def smart_research_command_names(
    command_prefixes: tuple[str, ...] | list[str] = DEFAULT_SMART_RESEARCH_COMMAND_PREFIXES,
) -> list[str]:
    """Return Telegram command names registered for smart-research routing."""
    command_names: set[str] = set()
    for prefix in command_prefixes:
        command = str(prefix).strip()
        if not command.startswith("/"):
            continue
        command = command[1:].split("@", 1)[0].strip()
        if command and _TELEGRAM_COMMAND_NAME_RE.match(command):
            command_names.add(command)
    return sorted(command_names)


def build_chat_proxy_payload(
    *,
    message: str,
    source: str,
    agent: str,
    chat_id: int | None = None,
    message_id: int | None = None,
    username: str | None = None,
) -> dict[str, Any]:
    """Build the no-secret payload sent from Telegram to an agent HTTP chat route."""
    metadata = {
        "source": source,
        "agent": agent,
        "chat_id": chat_id,
        "message_id": message_id,
        "username": username,
    }
    return {
        "message": message,
        "metadata": {k: v for k, v in metadata.items() if v is not None},
    }


def extract_smart_research_goal(
    message: str,
    command_prefixes: tuple[str, ...] | list[str] = DEFAULT_SMART_RESEARCH_COMMAND_PREFIXES,
) -> str | None:
    """Return the research goal when a Telegram message opts into smart research."""
    text = message.strip()
    for prefix in command_prefixes:
        command = re.escape(prefix.lstrip("/"))
        match = re.match(rf"^(?:@\w+\s+)?/{command}(?:@\w+)?(?:\s+(?P<goal>.+))?$", text, re.IGNORECASE)
        if match:
            goal = (match.group("goal") or "").strip()
            return goal or None
    return None


def extract_auto_smart_research_goal(
    message: str,
    mention_aliases: tuple[str, ...] | list[str] = (),
) -> str | None:
    """Return a research goal when ordinary chat clearly asks for sourced/current research."""
    text = message.strip()
    for alias in mention_aliases:
        clean_alias = str(alias).strip()
        if not clean_alias:
            continue
        text = re.sub(rf"(^|\s){re.escape(clean_alias)}(?:@\w+)?\b", " ", text, flags=re.IGNORECASE).strip()
    text = re.sub(r"\s+", " ", text).strip()
    if not text or text.startswith("/"):
        return None
    if _AUTO_SMART_RESEARCH_RE.search(text) or _AUTO_CONTEXTUAL_RESEARCH_RE.search(text):
        return text
    return None


def extract_auto_smart_research_followup_goal(
    message: str,
    previous_user_message: str | None,
    mention_aliases: tuple[str, ...] | list[str] = (),
) -> str | None:
    """Turn short follow-ups like 'check it yourself' into the prior research goal."""
    text = message.strip()
    for alias in mention_aliases:
        clean_alias = str(alias).strip()
        if not clean_alias:
            continue
        text = re.sub(rf"(^|\s){re.escape(clean_alias)}(?:@\w+)?\b", " ", text, flags=re.IGNORECASE).strip()
    text = re.sub(r"\s+", " ", text).strip()
    if not text or text.startswith("/") or not _AUTO_SMART_RESEARCH_FOLLOWUP_RE.search(text):
        return None
    previous_goal = extract_auto_smart_research_goal(previous_user_message or "", mention_aliases)
    if not previous_goal:
        return None
    return (
        f"{previous_goal}\n\n"
        f"Follow-up instruction: {text}. Use current public sources and cite assumptions. "
        "For buy/sell wording, do not provide personalized financial advice; provide market data, risks, "
        "and a concise non-advice thesis."
    )


def extract_paid_work_order_id(message: str) -> str | None:
    """Return a paid x402 work-order/receipt id from ordinary Telegram text."""
    match = _PAID_WORK_ORDER_ID_RE.search(message.strip())
    if not match:
        return None
    return match.group(1)


def should_attach_structured_market_context(message: str) -> bool:
    """Return true only for explicit market-data questions, not social narrative research."""
    text = message.strip()
    if not text:
        return False
    if _SOCIAL_DISCUSSION_RE.search(text):
        return False
    return bool(_MARKET_CONTEXT_RE.search(text))


def build_smart_research_proxy_payload(
    *,
    research_goal: str,
    source: str,
    agent: str,
    chat_id: int | None = None,
    message_id: int | None = None,
    username: str | None = None,
    allow_paid_execution: bool = False,
    approval_ref: str | None = None,
    max_amount_usd: float | None = None,
    include_synthesis: bool = True,
    work_order_id: str | None = None,
    original_research_goal: str | None = None,
) -> dict[str, Any]:
    """Build the no-secret Telegram payload for Leo smart research."""
    payload = build_chat_proxy_payload(
        message=research_goal,
        source=source,
        agent=agent,
        chat_id=chat_id,
        message_id=message_id,
        username=username,
    )
    payload.update(
        {
            "research_goal": research_goal,
            "allow_paid_execution": bool(allow_paid_execution),
            "include_synthesis": bool(include_synthesis),
        }
    )
    if max_amount_usd is not None:
        payload["max_amount_usd"] = max_amount_usd
    if allow_paid_execution and approval_ref:
        payload["approval_ref"] = approval_ref
    if work_order_id:
        payload["work_order_id"] = work_order_id
    if original_research_goal:
        payload["original_research_goal"] = original_research_goal
    return payload


def extract_chat_proxy_reply(payload: dict[str, Any]) -> str | None:
    """Extract only user-facing replies from supported Living IP Leo response shapes."""
    if not isinstance(payload, dict):
        return None

    direct_reply = payload.get("reply")
    if isinstance(direct_reply, str) and direct_reply.strip():
        return direct_reply.strip()

    decision = payload.get("decision")
    if isinstance(decision, dict):
        decision_reply = decision.get("reply")
        if isinstance(decision_reply, str) and decision_reply.strip():
            return decision_reply.strip()

    llm = payload.get("llm")
    if isinstance(llm, dict):
        llm_reply = llm.get("reply")
        if isinstance(llm_reply, str) and llm_reply.strip():
            return llm_reply.strip()
        llm_decision = llm.get("decision")
        if isinstance(llm_decision, dict):
            llm_decision_reply = llm_decision.get("reply")
            if isinstance(llm_decision_reply, str) and llm_decision_reply.strip():
                return llm_decision_reply.strip()

    synthesis = payload.get("synthesis")
    if isinstance(synthesis, dict):
        synthesis_reply = synthesis.get("reply")
        if isinstance(synthesis_reply, str) and synthesis_reply.strip():
            return synthesis_reply.strip()
        synthesis_decision = synthesis.get("decision")
        if isinstance(synthesis_decision, dict):
            synthesis_decision_reply = synthesis_decision.get("reply")
            if isinstance(synthesis_decision_reply, str) and synthesis_decision_reply.strip():
                return synthesis_decision_reply.strip()

    return None


async def post_chat_proxy(
    *,
    url: str,
    payload: dict[str, Any],
    timeout_seconds: int = 30,
) -> tuple[int, dict[str, Any], str | None]:
    """POST to an agent HTTP chat route and return status, JSON body, and reply."""
    import aiohttp

    async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=timeout_seconds)) as session:
        async with session.post(
            url,
            json=payload,
            headers={
                "Accept": "application/json",
                "Content-Type": "application/json",
                "X-LivingIP-Source": "telegram-agent-proxy",
            },
        ) as resp:
            data = await resp.json(content_type=None)
            return resp.status, data, extract_chat_proxy_reply(data)