- teleo-pipeline.py: async daemon with 4 stage loops (ingest/validate/evaluate/merge) - lib/: config, db, evaluate, validate, merge, breaker, costs, health, log modules - INFRASTRUCTURE.md: comprehensive deep-dive for onboarding - teleo-pipeline.service: systemd unit file Pentagon-Agent: Leo <294C3CA1-0205-4668-82FA-B984D54F48AD>
116 lines
4.5 KiB
Python
116 lines
4.5 KiB
Python
"""Pipeline v2 configuration — all constants and thresholds."""
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# --- Paths ---
|
|
BASE_DIR = Path(os.environ.get("PIPELINE_BASE", "/opt/teleo-eval"))
|
|
REPO_DIR = BASE_DIR / "workspaces" / "teleo-codex.git"
|
|
MAIN_WORKTREE = BASE_DIR / "workspaces" / "main"
|
|
SECRETS_DIR = BASE_DIR / "secrets"
|
|
LOG_DIR = BASE_DIR / "logs"
|
|
DB_PATH = BASE_DIR / "pipeline" / "pipeline.db"
|
|
INBOX_ARCHIVE = "inbox/archive"
|
|
|
|
# --- Forgejo ---
|
|
FORGEJO_URL = os.environ.get("FORGEJO_URL", "http://localhost:3000")
|
|
FORGEJO_OWNER = "teleo"
|
|
FORGEJO_REPO = "teleo-codex"
|
|
FORGEJO_TOKEN_FILE = SECRETS_DIR / "forgejo-admin-token"
|
|
FORGEJO_PIPELINE_USER = "teleo" # git user for pipeline commits
|
|
|
|
# --- Models ---
|
|
CLAUDE_CLI = os.environ.get("CLAUDE_CLI", "/home/teleo/.local/bin/claude")
|
|
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
|
|
# Model IDs
|
|
MODEL_OPUS = "opus"
|
|
MODEL_SONNET = "sonnet"
|
|
MODEL_HAIKU = "anthropic/claude-3.5-haiku"
|
|
MODEL_GPT4O = "openai/gpt-4o"
|
|
|
|
# --- Model assignment per stage ---
|
|
# Principle: Opus is a scarce resource. Use it only where judgment quality matters.
|
|
# Sonnet handles volume. Haiku handles routing. Opus handles synthesis + critical eval.
|
|
#
|
|
# Pipeline eval ordering (domain-first, Leo-last):
|
|
# 1. Domain review → Sonnet (catches domain issues, evidence gaps — high volume filter)
|
|
# 2. Leo review → Opus (cross-domain synthesis, confidence calibration — only pre-filtered PRs)
|
|
# 3. DEEP cross-family → GPT-4o (adversarial blind-spot check — paid, highest-value claims only)
|
|
EXTRACT_MODEL = MODEL_SONNET # extraction: structured output, volume work
|
|
TRIAGE_MODEL = MODEL_HAIKU # triage: routing decision, cheapest
|
|
EVAL_DOMAIN_MODEL = MODEL_SONNET # domain review: high-volume filter
|
|
EVAL_LEO_MODEL = MODEL_OPUS # Leo review: scarce, high-value
|
|
EVAL_DEEP_MODEL = MODEL_GPT4O # DEEP cross-family: paid, adversarial
|
|
|
|
# --- Model backends ---
|
|
# Each model can run on Claude Max (subscription, base load) or API (overflow/spikes).
|
|
# Claude Max: free but rate-limited. API: paid but unlimited.
|
|
# When Claude Max is rate-limited, behavior per stage:
|
|
# "queue" — wait for capacity (preferred for non-urgent work)
|
|
# "overflow" — fall back to API (for time-sensitive work)
|
|
# "skip" — skip this cycle (for optional stages like sample audit)
|
|
OVERFLOW_POLICY = {
|
|
"extract": "queue", # extraction can wait
|
|
"triage": "overflow", # triage is cheap on API anyway
|
|
"eval_domain": "overflow", # domain review is the volume filter — don't let it bottleneck (Rhea)
|
|
"eval_leo": "queue", # Leo review is the bottleneck we protect
|
|
"eval_deep": "overflow", # DEEP is already on API
|
|
"sample_audit": "skip", # optional, skip if constrained
|
|
}
|
|
|
|
# OpenRouter cost rates per 1K tokens (only applies when using API, not Claude Max)
|
|
MODEL_COSTS = {
|
|
"opus": {"input": 0.015, "output": 0.075},
|
|
"sonnet": {"input": 0.003, "output": 0.015},
|
|
MODEL_HAIKU: {"input": 0.0008, "output": 0.004},
|
|
MODEL_GPT4O: {"input": 0.0025, "output": 0.01},
|
|
}
|
|
|
|
# --- Concurrency ---
|
|
MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5"))
|
|
MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7"))
|
|
MAX_MERGE_WORKERS = 1 # domain-serialized, but one merge at a time per domain
|
|
|
|
# --- Timeouts (seconds) ---
|
|
EXTRACT_TIMEOUT = 600 # 10 min
|
|
EVAL_TIMEOUT = 300 # 5 min
|
|
MERGE_TIMEOUT = 300 # 5 min — force-reset to conflict if exceeded (Rhea)
|
|
CLAUDE_MAX_PROBE_TIMEOUT = 15
|
|
|
|
# --- Backpressure ---
|
|
BACKPRESSURE_HIGH = 40 # pause extraction above this
|
|
BACKPRESSURE_LOW = 20 # throttle extraction above this
|
|
BACKPRESSURE_THROTTLE_WORKERS = 2 # workers when throttled
|
|
|
|
# --- Retry budgets ---
|
|
TRANSIENT_RETRY_MAX = 5 # API timeouts, rate limits
|
|
SUBSTANTIVE_RETRY_STANDARD = 2 # reviewer request_changes
|
|
SUBSTANTIVE_RETRY_DEEP = 3
|
|
|
|
# --- Circuit breakers ---
|
|
BREAKER_THRESHOLD = 5
|
|
BREAKER_COOLDOWN = 900 # 15 min
|
|
|
|
# --- Cost budgets ---
|
|
OPENROUTER_DAILY_BUDGET = 20.0 # USD
|
|
OPENROUTER_WARN_THRESHOLD = 0.8 # 80% of budget
|
|
|
|
# --- Quality ---
|
|
SAMPLE_AUDIT_RATE = 0.10 # 10% of LIGHT merges
|
|
SAMPLE_AUDIT_DISAGREEMENT_THRESHOLD = 0.10 # 10% disagreement → tighten LIGHT criteria
|
|
|
|
# --- Polling intervals (seconds) ---
|
|
INGEST_INTERVAL = 60
|
|
VALIDATE_INTERVAL = 30
|
|
EVAL_INTERVAL = 30
|
|
MERGE_INTERVAL = 30
|
|
HEALTH_CHECK_INTERVAL = 60
|
|
|
|
# --- Health API ---
|
|
HEALTH_PORT = 8080
|
|
|
|
# --- Logging ---
|
|
LOG_FILE = LOG_DIR / "pipeline.jsonl"
|
|
LOG_ROTATION_MAX_BYTES = 50 * 1024 * 1024 # 50MB per file
|
|
LOG_ROTATION_BACKUP_COUNT = 7 # keep 7 days
|