teleo-infrastructure/telegram/eval_checks.py

"""Eval pipeline — pure functions for response quality checks.

Extracted from bot.py so tests can import without telegram dependency.
No side effects, no I/O, no imports beyond stdlib.

Pentagon-Agent: Epimetheus <0144398e-4ed3-4fe2-95a3-3d72e1abf887>
"""

import re

# Per-model pricing (input $/M tokens, output $/M tokens) — from OpenRouter
MODEL_PRICING = {
    "anthropic/claude-opus-4-6": (15.0, 75.0),
    "anthropic/claude-sonnet-4-6": (3.0, 15.0),
    "anthropic/claude-haiku-4.5": (0.80, 4.0),
    "anthropic/claude-3.5-haiku": (0.80, 4.0),
    "openai/gpt-4o": (2.50, 10.0),
    "openai/gpt-4o-mini": (0.15, 0.60),
}

CONFIDENCE_FLOOR = 0.4
COST_ALERT_THRESHOLD = 0.22  # per-response alert threshold in USD

# URL fabrication regex — matches http:// and https:// URLs
_URL_RE = re.compile(r'https?://[^\s\)\]\"\'<>]+')


class _LLMResponse(str):
    """String subclass carrying token counts and cost from OpenRouter usage field."""
    prompt_tokens: int = 0
    completion_tokens: int = 0
    cost: float = 0.0
    model: str = ""

    def __new__(cls, text: str, prompt_tokens: int = 0, completion_tokens: int = 0,
                cost: float = 0.0, model: str = ""):
        obj = super().__new__(cls, text)
        obj.prompt_tokens = prompt_tokens
        obj.completion_tokens = completion_tokens
        obj.cost = cost
        obj.model = model
        return obj


def estimate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
    """Estimate cost in USD from token counts and model pricing."""
    input_rate, output_rate = MODEL_PRICING.get(model, (3.0, 15.0))  # default to Sonnet
    return (prompt_tokens * input_rate + completion_tokens * output_rate) / 1_000_000


def check_url_fabrication(response_text: str, kb_context: str) -> tuple[str, list[str]]:
    """Check for fabricated URLs in response. Replace any not found in KB context.

    Returns (cleaned_text, list_of_fabricated_urls).
    """
    kb_urls = set(_URL_RE.findall(kb_context)) if kb_context else set()
    response_urls = _URL_RE.findall(response_text)
    fabricated = [url for url in response_urls if url not in kb_urls]
    result = response_text
    for url in fabricated:
        result = result.replace(url, "[URL removed — not verified]")
    return result, fabricated


def apply_confidence_floor(display_response: str, confidence_score: float | None) -> tuple[str, bool, str | None]:
    """Apply confidence floor check.

    Returns (possibly_modified_response, is_blocked, block_reason).
    """
    if confidence_score is not None and confidence_score < CONFIDENCE_FLOOR:
        modified = (
            f"⚠️ Low confidence — I may not have reliable data on this topic.\n\n"
            + display_response
        )
        return modified, True, f"confidence {confidence_score:.2f} < floor {CONFIDENCE_FLOOR}"
    return display_response, False, None