teleo-codex/ops/pipeline-v2/telegram/eval.py
m3taversal 7bfce6b706 commit telegram bot module from VPS — 20 files never previously in repo
Pulled from /opt/teleo-eval/telegram/ on VPS. Includes:
- bot.py (92K), kb_retrieval.py, kb_tools.py (agentic retrieval)
- retrieval.py (RRF merge, query decomposition, entity traversal)
- response.py (system prompt builder, response parser)
- agent_config.py, agent_runner.py (multi-agent template unit support)
- approval_stages.py, approvals.py, digest.py (approval workflow)
- eval_checks.py, eval.py (response quality checks)
- output_gate.py, x_publisher.py, x_client.py, x_search.py (X pipeline)
- market_data.py, worktree_lock.py (utilities)
- rio.yaml, theseus.yaml (agent configs)

These files were deployed to VPS but never committed to the repo.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 11:02:32 +02:00

52 lines
2.1 KiB
Python

"""Eval pipeline stub — provides imports for bot.py.
Full implementation pending Ganymede review."""
CONFIDENCE_FLOOR = 0.3
COST_ALERT_THRESHOLD = 0.22
class _LLMResponse(str):
"""str subclass carrying token counts and cost."""
def __new__(cls, content, prompt_tokens=0, completion_tokens=0, cost=0.0, model=''):
obj = super().__new__(cls, content)
obj.prompt_tokens = prompt_tokens
obj.completion_tokens = completion_tokens
obj.cost = cost
obj.model = model
return obj
def estimate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
"""Per-model cost estimation."""
rates = {
'anthropic/claude-opus-4': (15.0, 75.0),
'anthropic/claude-sonnet-4': (3.0, 15.0),
'anthropic/claude-haiku-4.5': (0.80, 4.0),
'openai/gpt-4o': (2.50, 10.0),
}
for prefix, (input_rate, output_rate) in rates.items():
if prefix in model:
return (prompt_tokens * input_rate + completion_tokens * output_rate) / 1_000_000
return (prompt_tokens * 3.0 + completion_tokens * 15.0) / 1_000_000
def check_url_fabrication(response: str, kb_context: str) -> tuple[str, list[str]]:
"""Check for fabricated URLs. Returns (cleaned_response, fabricated_urls)."""
import re
urls = re.findall(r'https?://[^\s\)"]+', response)
if not urls or not kb_context:
return response, []
kb_urls = set(re.findall(r'https?://[^\s\)"]+', kb_context))
fabricated = [u for u in urls if u not in kb_urls and not u.startswith('https://t.me/')]
cleaned = response
for u in fabricated:
cleaned = cleaned.replace(u, '[URL removed]')
return cleaned, fabricated
def apply_confidence_floor(response: str, confidence: float | None) -> tuple[str, bool, str | None]:
"""Apply confidence floor. Returns (response, blocked, block_reason)."""
if confidence is not None and confidence < CONFIDENCE_FLOOR:
caveat = '⚠️ Low confidence response — treat with skepticism.\n\n'
return caveat + response, True, f'confidence {confidence:.2f} below floor {CONFIDENCE_FLOOR}'
return response, False, None