Imports 67 files from VPS (/opt/teleo-eval/) into repo as the single source of truth. Previously only 8 of 67 files existed in repo — the rest were deployed directly to VPS via SCP, causing massive drift. Includes: - pipeline/lib/: 33 Python modules (daemon core, extraction, evaluation, merge, cascade, cross-domain, costs, attribution, etc.) - pipeline/: main daemon (teleo-pipeline.py), reweave.py, batch-extract-50.sh - diagnostics/: 19 files (4-page dashboard, alerting, daily digest, review queue, tier1 metrics) - agent-state/: bootstrap, lib-state, cascade inbox processor, schema - systemd/: service unit files for reference - deploy.sh: rsync-based deploy with --dry-run, syntax checks, dirty-tree gate - research-session.sh: updated with Step 8.5 digest + cascade inbox processing No new code written — all files are exact copies from VPS as of 2026-04-06. From this point forward: edit in repo, commit, then deploy.sh. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
110 lines
4.1 KiB
Python
110 lines
4.1 KiB
Python
"""Cost tracking — per-model per-day with budget enforcement."""
|
||
|
||
import logging
|
||
from datetime import date
|
||
|
||
from . import config
|
||
|
||
logger = logging.getLogger("pipeline.costs")
|
||
|
||
|
||
def record_usage(
|
||
conn,
|
||
model: str,
|
||
stage: str,
|
||
input_tokens: int = 0,
|
||
output_tokens: int = 0,
|
||
backend: str = "api",
|
||
duration_ms: int = 0,
|
||
cache_read_tokens: int = 0,
|
||
cache_write_tokens: int = 0,
|
||
cost_estimate_usd: float = 0.0,
|
||
):
|
||
"""Record usage and compute cost. Returns cost in USD.
|
||
|
||
backend: "max" (Claude Max subscription, free) or "api" (paid).
|
||
Claude Max calls are tracked for volume metrics but cost $0. (Ganymede)
|
||
"""
|
||
# Always compute estimated cost from tokens × published rates
|
||
rates = config.MODEL_COSTS.get(model)
|
||
if rates and (input_tokens or output_tokens):
|
||
estimated = (input_tokens * rates["input"] + output_tokens * rates["output"]) / 1000
|
||
# Cache reads are ~90% cheaper than regular input
|
||
if cache_read_tokens and rates:
|
||
estimated += (cache_read_tokens * rates["input"] * 0.1) / 1000
|
||
if cache_write_tokens and rates:
|
||
estimated += (cache_write_tokens * rates["input"] * 1.25) / 1000
|
||
else:
|
||
estimated = 0.0
|
||
# Use caller-provided estimate if we can't compute (e.g. CLI gives its own)
|
||
if cost_estimate_usd > 0 and estimated == 0:
|
||
estimated = cost_estimate_usd
|
||
cost_estimate_usd = estimated
|
||
|
||
if backend == "max":
|
||
cost = 0.0 # subscription — no actual spend
|
||
else:
|
||
cost = estimated if estimated > 0 else 0.0
|
||
|
||
today = date.today().isoformat()
|
||
# Include backend in the stage key so max vs api are tracked separately
|
||
stage_key = f"{stage}:{backend}" if backend != "api" else stage
|
||
conn.execute(
|
||
"""INSERT INTO costs (date, model, stage, calls, input_tokens, output_tokens, cost_usd,
|
||
duration_ms, cache_read_tokens, cache_write_tokens, cost_estimate_usd)
|
||
VALUES (?, ?, ?, 1, ?, ?, ?, ?, ?, ?, ?)
|
||
ON CONFLICT (date, model, stage) DO UPDATE SET
|
||
calls = calls + 1,
|
||
input_tokens = input_tokens + excluded.input_tokens,
|
||
output_tokens = output_tokens + excluded.output_tokens,
|
||
cost_usd = cost_usd + excluded.cost_usd,
|
||
duration_ms = duration_ms + excluded.duration_ms,
|
||
cache_read_tokens = cache_read_tokens + excluded.cache_read_tokens,
|
||
cache_write_tokens = cache_write_tokens + excluded.cache_write_tokens,
|
||
cost_estimate_usd = cost_estimate_usd + excluded.cost_estimate_usd""",
|
||
(today, model, stage_key, input_tokens, output_tokens, cost,
|
||
duration_ms, cache_read_tokens, cache_write_tokens, cost_estimate_usd),
|
||
)
|
||
return cost
|
||
|
||
|
||
def get_daily_spend(conn, day: str = None) -> float:
|
||
"""Get total OpenRouter spend for a given day (default: today)."""
|
||
if day is None:
|
||
day = date.today().isoformat()
|
||
row = conn.execute(
|
||
"SELECT COALESCE(SUM(cost_usd), 0) as total FROM costs WHERE date = ?",
|
||
(day,),
|
||
).fetchone()
|
||
return row["total"]
|
||
|
||
|
||
def get_daily_breakdown(conn, day: str = None) -> list:
|
||
"""Get per-model per-stage breakdown for a day."""
|
||
if day is None:
|
||
day = date.today().isoformat()
|
||
rows = conn.execute(
|
||
"""SELECT model, stage, calls, input_tokens, output_tokens, cost_usd,
|
||
duration_ms, cache_read_tokens, cache_write_tokens, cost_estimate_usd
|
||
FROM costs WHERE date = ? ORDER BY cost_usd DESC""",
|
||
(day,),
|
||
).fetchall()
|
||
return [dict(r) for r in rows]
|
||
|
||
|
||
def check_budget(conn) -> dict:
|
||
"""Check budget status. Returns {ok, spend, budget, pct}."""
|
||
spend = get_daily_spend(conn)
|
||
pct = spend / config.OPENROUTER_DAILY_BUDGET if config.OPENROUTER_DAILY_BUDGET > 0 else 0
|
||
return {
|
||
"ok": pct < 1.0,
|
||
"warn": pct >= config.OPENROUTER_WARN_THRESHOLD,
|
||
"spend": round(spend, 4),
|
||
"budget": config.OPENROUTER_DAILY_BUDGET,
|
||
"pct": round(pct * 100, 1),
|
||
}
|
||
|
||
|
||
def budget_allows(conn) -> bool:
|
||
"""Quick check: is spending under daily budget?"""
|
||
return check_budget(conn)["ok"]
|