- Claim-shape detector: if YAML has type: claim, force STANDARD minimum (Theseus) - Random pre-merge promotion: 15% of LIGHT → STANDARD before eval (Rio) - LIGHT_SKIP_LLM config flag: skip domain+Leo review for LIGHT (Rhea: env var rollback) - Updated both_approve: domain_verdict=skipped is valid for LIGHT auto-approve - Cost recording: only charge for reviews that actually ran - SAMPLE_AUDIT_RATE bumped 0.10 → 0.15, audit model = Opus (Leo: different family from Haiku) Multi-agent design review: Rio (gaming vectors, model diversity), Theseus (correlated blindspots, claim-shape guard), Rhea (shadow mode, config flag, deployment), Leo (approval). Pentagon-Agent: Ganymede <F99EBFA6-547B-4096-BEEA-1D59C3E4028A>
135 lines
5.8 KiB
Python
135 lines
5.8 KiB
Python
"""Pipeline v2 configuration — all constants and thresholds."""
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# --- Paths ---
|
|
BASE_DIR = Path(os.environ.get("PIPELINE_BASE", "/opt/teleo-eval"))
|
|
REPO_DIR = BASE_DIR / "workspaces" / "teleo-codex.git"
|
|
MAIN_WORKTREE = BASE_DIR / "workspaces" / "main"
|
|
SECRETS_DIR = BASE_DIR / "secrets"
|
|
LOG_DIR = BASE_DIR / "logs"
|
|
DB_PATH = BASE_DIR / "pipeline" / "pipeline.db"
|
|
INBOX_ARCHIVE = "inbox/archive"
|
|
|
|
# --- Forgejo ---
|
|
FORGEJO_URL = os.environ.get("FORGEJO_URL", "http://localhost:3000")
|
|
FORGEJO_OWNER = "teleo"
|
|
FORGEJO_REPO = "teleo-codex"
|
|
FORGEJO_TOKEN_FILE = SECRETS_DIR / "forgejo-admin-token"
|
|
FORGEJO_PIPELINE_USER = "teleo" # git user for pipeline commits
|
|
|
|
# --- Models ---
|
|
CLAUDE_CLI = os.environ.get("CLAUDE_CLI", "/home/teleo/.local/bin/claude")
|
|
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
|
|
# Model IDs
|
|
MODEL_OPUS = "opus"
|
|
MODEL_SONNET = "sonnet"
|
|
MODEL_HAIKU = "anthropic/claude-3.5-haiku"
|
|
MODEL_GPT4O = "openai/gpt-4o"
|
|
MODEL_SONNET_OR = "anthropic/claude-sonnet-4.5" # OpenRouter Sonnet (paid, not Claude Max)
|
|
|
|
# --- Model assignment per stage ---
|
|
# Principle: Opus is scarce (Claude Max). Reserve for DEEP eval + overnight research.
|
|
# Model diversity: domain (GPT-4o) + Leo (Sonnet) = two model families, no correlated blindspots.
|
|
# Both on OpenRouter = Claude Max rate limit untouched for Opus.
|
|
#
|
|
# Pipeline eval ordering (domain-first, Leo-last):
|
|
# 1. Domain review → GPT-4o (OpenRouter) — different family from Leo
|
|
# 2. Leo STANDARD → Sonnet (OpenRouter) — different family from domain
|
|
# 3. Leo DEEP → Opus (Claude Max) — highest judgment, scarce
|
|
EXTRACT_MODEL = MODEL_SONNET # extraction: structured output, volume work (Claude Max)
|
|
TRIAGE_MODEL = MODEL_HAIKU # triage: routing decision, cheapest (OpenRouter)
|
|
EVAL_DOMAIN_MODEL = MODEL_GPT4O # domain review: OpenRouter GPT-4o
|
|
EVAL_LEO_MODEL = MODEL_OPUS # Leo DEEP review: Claude Max Opus
|
|
EVAL_LEO_STANDARD_MODEL = MODEL_SONNET_OR # Leo STANDARD review: OpenRouter Sonnet
|
|
EVAL_DEEP_MODEL = MODEL_GPT4O # DEEP cross-family: paid, adversarial
|
|
|
|
# --- Model backends ---
|
|
# Each model can run on Claude Max (subscription, base load) or API (overflow/spikes).
|
|
# Claude Max: free but rate-limited. API: paid but unlimited.
|
|
# When Claude Max is rate-limited, behavior per stage:
|
|
# "queue" — wait for capacity (preferred for non-urgent work)
|
|
# "overflow" — fall back to API (for time-sensitive work)
|
|
# "skip" — skip this cycle (for optional stages like sample audit)
|
|
OVERFLOW_POLICY = {
|
|
"extract": "queue", # extraction can wait
|
|
"triage": "overflow", # triage is cheap on API anyway
|
|
"eval_domain": "overflow", # domain review is the volume filter — don't let it bottleneck (Rhea)
|
|
"eval_leo": "queue", # Leo review is the bottleneck we protect
|
|
"eval_deep": "overflow", # DEEP is already on API
|
|
"sample_audit": "skip", # optional, skip if constrained
|
|
}
|
|
|
|
# OpenRouter cost rates per 1K tokens (only applies when using API, not Claude Max)
|
|
MODEL_COSTS = {
|
|
"opus": {"input": 0.015, "output": 0.075},
|
|
"sonnet": {"input": 0.003, "output": 0.015},
|
|
MODEL_HAIKU: {"input": 0.0008, "output": 0.004},
|
|
MODEL_GPT4O: {"input": 0.0025, "output": 0.01},
|
|
MODEL_SONNET_OR: {"input": 0.003, "output": 0.015},
|
|
}
|
|
|
|
# --- Concurrency ---
|
|
MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5"))
|
|
MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7"))
|
|
MAX_MERGE_WORKERS = 1 # domain-serialized, but one merge at a time per domain
|
|
|
|
# --- Timeouts (seconds) ---
|
|
EXTRACT_TIMEOUT = 600 # 10 min
|
|
EVAL_TIMEOUT = 300 # 5 min
|
|
MERGE_TIMEOUT = 300 # 5 min — force-reset to conflict if exceeded (Rhea)
|
|
CLAUDE_MAX_PROBE_TIMEOUT = 15
|
|
|
|
# --- Backpressure ---
|
|
BACKPRESSURE_HIGH = 40 # pause extraction above this
|
|
BACKPRESSURE_LOW = 20 # throttle extraction above this
|
|
BACKPRESSURE_THROTTLE_WORKERS = 2 # workers when throttled
|
|
|
|
# --- Retry budgets ---
|
|
TRANSIENT_RETRY_MAX = 5 # API timeouts, rate limits
|
|
SUBSTANTIVE_RETRY_STANDARD = 2 # reviewer request_changes
|
|
SUBSTANTIVE_RETRY_DEEP = 3
|
|
MAX_EVAL_ATTEMPTS = 3 # Hard cap on eval cycles per PR before terminal
|
|
|
|
# Issue tags that can be fixed mechanically (Python fixer or Haiku)
|
|
MECHANICAL_ISSUE_TAGS = {"frontmatter_schema", "broken_wiki_links", "near_duplicate"}
|
|
# Issue tags that require re-extraction (substantive quality problems)
|
|
SUBSTANTIVE_ISSUE_TAGS = {"factual_discrepancy", "confidence_miscalibration", "scope_error", "title_overclaims"}
|
|
|
|
# --- Circuit breakers ---
|
|
BREAKER_THRESHOLD = 5
|
|
BREAKER_COOLDOWN = 900 # 15 min
|
|
|
|
# --- Cost budgets ---
|
|
OPENROUTER_DAILY_BUDGET = 20.0 # USD
|
|
OPENROUTER_WARN_THRESHOLD = 0.8 # 80% of budget
|
|
|
|
# --- Quality ---
|
|
SAMPLE_AUDIT_RATE = 0.15 # 15% of LIGHT merges get pre-merge promotion to STANDARD (Rio)
|
|
SAMPLE_AUDIT_DISAGREEMENT_THRESHOLD = 0.10 # 10% disagreement → tighten LIGHT criteria
|
|
SAMPLE_AUDIT_MODEL = MODEL_OPUS # Opus for audit — different family from Haiku triage (Leo)
|
|
|
|
# --- Tier logic ---
|
|
# LIGHT_SKIP_LLM: when True, LIGHT PRs skip domain+Leo review entirely (auto-approve on Tier 0 pass).
|
|
# Set False for shadow mode (domain review runs but logs only). Flip True after 24h validation (Rhea).
|
|
LIGHT_SKIP_LLM = os.environ.get("LIGHT_SKIP_LLM", "false").lower() == "true"
|
|
# Random pre-merge promotion: fraction of LIGHT PRs upgraded to STANDARD before eval (Rio).
|
|
# Makes gaming unpredictable — extraction agents can't know which LIGHT PRs get full review.
|
|
LIGHT_PROMOTION_RATE = float(os.environ.get("LIGHT_PROMOTION_RATE", "0.15"))
|
|
|
|
# --- Polling intervals (seconds) ---
|
|
INGEST_INTERVAL = 60
|
|
VALIDATE_INTERVAL = 30
|
|
EVAL_INTERVAL = 30
|
|
MERGE_INTERVAL = 30
|
|
HEALTH_CHECK_INTERVAL = 60
|
|
|
|
# --- Health API ---
|
|
HEALTH_PORT = 8080
|
|
|
|
# --- Logging ---
|
|
LOG_FILE = LOG_DIR / "pipeline.jsonl"
|
|
LOG_ROTATION_MAX_BYTES = 50 * 1024 * 1024 # 50MB per file
|
|
LOG_ROTATION_BACKUP_COUNT = 7 # keep 7 days
|