teleo-infrastructure/lib/config.py

"""Pipeline v2 configuration — all constants and thresholds."""

import os
from pathlib import Path

# --- Paths ---
BASE_DIR = Path(os.environ.get("PIPELINE_BASE", "/opt/teleo-eval"))
REPO_DIR = BASE_DIR / "workspaces" / "teleo-codex.git"
MAIN_WORKTREE = BASE_DIR / "workspaces" / "main"
SECRETS_DIR = BASE_DIR / "secrets"
LOG_DIR = BASE_DIR / "logs"
DB_PATH = BASE_DIR / "pipeline" / "pipeline.db"
INBOX_ARCHIVE = "inbox/archive"

# --- Forgejo ---
FORGEJO_URL = os.environ.get("FORGEJO_URL", "http://localhost:3000")
FORGEJO_OWNER = "teleo"
FORGEJO_REPO = "teleo-codex"
FORGEJO_TOKEN_FILE = SECRETS_DIR / "forgejo-admin-token"
FORGEJO_PIPELINE_USER = "teleo"  # git user for pipeline commits

# --- Models ---
CLAUDE_CLI = os.environ.get("CLAUDE_CLI", "/home/teleo/.local/bin/claude")
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"

# Model IDs
MODEL_OPUS = "opus"
MODEL_SONNET = "sonnet"
MODEL_HAIKU = "anthropic/claude-3.5-haiku"
MODEL_GPT4O = "openai/gpt-4o"

# --- Model assignment per stage ---
# Principle: Opus is a scarce resource. Use it only where judgment quality matters.
# Sonnet handles volume. Haiku handles routing. Opus handles synthesis + critical eval.
#
# Pipeline eval ordering (domain-first, Leo-last):
#   1. Domain review → Sonnet (catches domain issues, evidence gaps — high volume filter)
#   2. Leo review → Opus (cross-domain synthesis, confidence calibration — only pre-filtered PRs)
#   3. DEEP cross-family → GPT-4o (adversarial blind-spot check — paid, highest-value claims only)
EXTRACT_MODEL = MODEL_SONNET           # extraction: structured output, volume work
TRIAGE_MODEL = MODEL_HAIKU             # triage: routing decision, cheapest
EVAL_DOMAIN_MODEL = MODEL_SONNET       # domain review: high-volume filter
EVAL_LEO_MODEL = MODEL_OPUS            # Leo review: scarce, high-value
EVAL_DEEP_MODEL = MODEL_GPT4O          # DEEP cross-family: paid, adversarial

# --- Model backends ---
# Each model can run on Claude Max (subscription, base load) or API (overflow/spikes).
# Claude Max: free but rate-limited. API: paid but unlimited.
# When Claude Max is rate-limited, behavior per stage:
#   "queue"    — wait for capacity (preferred for non-urgent work)
#   "overflow" — fall back to API (for time-sensitive work)
#   "skip"     — skip this cycle (for optional stages like sample audit)
OVERFLOW_POLICY = {
    "extract": "queue",      # extraction can wait
    "triage": "overflow",    # triage is cheap on API anyway
    "eval_domain": "overflow",  # domain review is the volume filter — don't let it bottleneck (Rhea)
    "eval_leo": "queue",     # Leo review is the bottleneck we protect
    "eval_deep": "overflow", # DEEP is already on API
    "sample_audit": "skip",  # optional, skip if constrained
}

# OpenRouter cost rates per 1K tokens (only applies when using API, not Claude Max)
MODEL_COSTS = {
    "opus": {"input": 0.015, "output": 0.075},
    "sonnet": {"input": 0.003, "output": 0.015},
    MODEL_HAIKU: {"input": 0.0008, "output": 0.004},
    MODEL_GPT4O: {"input": 0.0025, "output": 0.01},
}

# --- Concurrency ---
MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5"))
MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7"))
MAX_MERGE_WORKERS = 1  # domain-serialized, but one merge at a time per domain

# --- Timeouts (seconds) ---
EXTRACT_TIMEOUT = 600   # 10 min
EVAL_TIMEOUT = 300      # 5 min
MERGE_TIMEOUT = 300     # 5 min — force-reset to conflict if exceeded (Rhea)
CLAUDE_MAX_PROBE_TIMEOUT = 15

# --- Backpressure ---
BACKPRESSURE_HIGH = 40  # pause extraction above this
BACKPRESSURE_LOW = 20   # throttle extraction above this
BACKPRESSURE_THROTTLE_WORKERS = 2  # workers when throttled

# --- Retry budgets ---
TRANSIENT_RETRY_MAX = 5       # API timeouts, rate limits
SUBSTANTIVE_RETRY_STANDARD = 2  # reviewer request_changes
SUBSTANTIVE_RETRY_DEEP = 3

# --- Circuit breakers ---
BREAKER_THRESHOLD = 5
BREAKER_COOLDOWN = 900  # 15 min

# --- Cost budgets ---
OPENROUTER_DAILY_BUDGET = 20.0  # USD
OPENROUTER_WARN_THRESHOLD = 0.8  # 80% of budget

# --- Quality ---
SAMPLE_AUDIT_RATE = 0.10  # 10% of LIGHT merges
SAMPLE_AUDIT_DISAGREEMENT_THRESHOLD = 0.10  # 10% disagreement → tighten LIGHT criteria

# --- Polling intervals (seconds) ---
INGEST_INTERVAL = 60
VALIDATE_INTERVAL = 30
EVAL_INTERVAL = 30
MERGE_INTERVAL = 30
HEALTH_CHECK_INTERVAL = 60

# --- Health API ---
HEALTH_PORT = 8080

# --- Logging ---
LOG_FILE = LOG_DIR / "pipeline.jsonl"
LOG_ROTATION_MAX_BYTES = 50 * 1024 * 1024  # 50MB per file
LOG_ROTATION_BACKUP_COUNT = 7  # keep 7 days