Some checks failed
CI / lint-and-test (push) Has been cancelled
Sources merged: - teleo-codex/ops/pipeline-v2/ (11 newer lib files, 5 new lib modules) - teleo-codex/ops/ (agent-state, diagnostics expansion, systemd units, ops scripts) - VPS /opt/teleo-eval/telegram/ (10 new bot files, agent configs) - VPS /opt/teleo-eval/pipeline/ops/ (vector-gc, backfill-descriptions) - VPS /opt/teleo-eval/sync-mirror.sh (Bug 2 + Step 2.5 fixes) Non-trivial merges: - connect.py: kept codex threshold (0.65) + added infra domain parameter - watchdog.py: kept infra version (stale_pr integration, superset of codex) - deploy.sh: codex rsync version (interim, until VPS git clone migration) - diagnostics/app.py: codex decomposed dashboard (14 new route modules) 81 files changed, +17105/-200 lines Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
52 lines
2.1 KiB
Python
52 lines
2.1 KiB
Python
"""Eval pipeline stub — provides imports for bot.py.
|
|
Full implementation pending Ganymede review."""
|
|
|
|
CONFIDENCE_FLOOR = 0.3
|
|
COST_ALERT_THRESHOLD = 0.22
|
|
|
|
|
|
class _LLMResponse(str):
|
|
"""str subclass carrying token counts and cost."""
|
|
def __new__(cls, content, prompt_tokens=0, completion_tokens=0, cost=0.0, model=''):
|
|
obj = super().__new__(cls, content)
|
|
obj.prompt_tokens = prompt_tokens
|
|
obj.completion_tokens = completion_tokens
|
|
obj.cost = cost
|
|
obj.model = model
|
|
return obj
|
|
|
|
|
|
def estimate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
|
|
"""Per-model cost estimation."""
|
|
rates = {
|
|
'anthropic/claude-opus-4': (15.0, 75.0),
|
|
'anthropic/claude-sonnet-4': (3.0, 15.0),
|
|
'anthropic/claude-haiku-4.5': (0.80, 4.0),
|
|
'openai/gpt-4o': (2.50, 10.0),
|
|
}
|
|
for prefix, (input_rate, output_rate) in rates.items():
|
|
if prefix in model:
|
|
return (prompt_tokens * input_rate + completion_tokens * output_rate) / 1_000_000
|
|
return (prompt_tokens * 3.0 + completion_tokens * 15.0) / 1_000_000
|
|
|
|
|
|
def check_url_fabrication(response: str, kb_context: str) -> tuple[str, list[str]]:
|
|
"""Check for fabricated URLs. Returns (cleaned_response, fabricated_urls)."""
|
|
import re
|
|
urls = re.findall(r'https?://[^\s\)"]+', response)
|
|
if not urls or not kb_context:
|
|
return response, []
|
|
kb_urls = set(re.findall(r'https?://[^\s\)"]+', kb_context))
|
|
fabricated = [u for u in urls if u not in kb_urls and not u.startswith('https://t.me/')]
|
|
cleaned = response
|
|
for u in fabricated:
|
|
cleaned = cleaned.replace(u, '[URL removed]')
|
|
return cleaned, fabricated
|
|
|
|
|
|
def apply_confidence_floor(response: str, confidence: float | None) -> tuple[str, bool, str | None]:
|
|
"""Apply confidence floor. Returns (response, blocked, block_reason)."""
|
|
if confidence is not None and confidence < CONFIDENCE_FLOOR:
|
|
caveat = '⚠️ Low confidence response — treat with skepticism.\n\n'
|
|
return caveat + response, True, f'confidence {confidence:.2f} below floor {CONFIDENCE_FLOOR}'
|
|
return response, False, None
|