teleo-infrastructure/lib/config.py
m3taversal e7c902bac8 leo: implement retry budget — stop infinite eval loops
Schema migration v3: adds eval_attempts (INTEGER) and eval_issues (TEXT/JSON)
columns to prs table.

Retry budget logic (Ganymede-approved design):
- Increment eval_attempts on each evaluate_pr() call
- Hard cap: eval_attempts >= 3 → terminal (close PR, tag source needs_human)
- Attempt 1: normal — back to open, wait for fix
- Attempt 2: classify issues as mechanical/substantive
  - Mechanical only (schema, wiki links, dedup): keep open for one more try
  - Substantive (factual, confidence, scope, title): close PR, requeue source
- Issue tags parsed from reviewer comments, stored in eval_issues column
- SHA-based reset: new commits on PR branch → eval_attempts=0, verdicts reset
- Post-migration stagger: LIMIT 5 for first batch to avoid OpenRouter spike
- Cost recording updated: domain review → OpenRouter, Leo → tier-dependent

Stops the 32-PR infinite loop burning ~$0.03/cycle with no terminal state.

Pentagon-Agent: Leo <294C3CA1-0205-4668-82FA-B984D54F48AD>
2026-03-13 17:14:12 +00:00

126 lines
5.1 KiB
Python

"""Pipeline v2 configuration — all constants and thresholds."""
import os
from pathlib import Path
# --- Paths ---
BASE_DIR = Path(os.environ.get("PIPELINE_BASE", "/opt/teleo-eval"))
REPO_DIR = BASE_DIR / "workspaces" / "teleo-codex.git"
MAIN_WORKTREE = BASE_DIR / "workspaces" / "main"
SECRETS_DIR = BASE_DIR / "secrets"
LOG_DIR = BASE_DIR / "logs"
DB_PATH = BASE_DIR / "pipeline" / "pipeline.db"
INBOX_ARCHIVE = "inbox/archive"
# --- Forgejo ---
FORGEJO_URL = os.environ.get("FORGEJO_URL", "http://localhost:3000")
FORGEJO_OWNER = "teleo"
FORGEJO_REPO = "teleo-codex"
FORGEJO_TOKEN_FILE = SECRETS_DIR / "forgejo-admin-token"
FORGEJO_PIPELINE_USER = "teleo" # git user for pipeline commits
# --- Models ---
CLAUDE_CLI = os.environ.get("CLAUDE_CLI", "/home/teleo/.local/bin/claude")
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
# Model IDs
MODEL_OPUS = "opus"
MODEL_SONNET = "sonnet"
MODEL_HAIKU = "anthropic/claude-3.5-haiku"
MODEL_GPT4O = "openai/gpt-4o"
MODEL_SONNET_OR = "anthropic/claude-sonnet-4.5" # OpenRouter Sonnet (paid, not Claude Max)
# --- Model assignment per stage ---
# Principle: Opus is scarce (Claude Max). Reserve for DEEP eval + overnight research.
# Model diversity: domain (GPT-4o) + Leo (Sonnet) = two model families, no correlated blindspots.
# Both on OpenRouter = Claude Max rate limit untouched for Opus.
#
# Pipeline eval ordering (domain-first, Leo-last):
# 1. Domain review → GPT-4o (OpenRouter) — different family from Leo
# 2. Leo STANDARD → Sonnet (OpenRouter) — different family from domain
# 3. Leo DEEP → Opus (Claude Max) — highest judgment, scarce
EXTRACT_MODEL = MODEL_SONNET # extraction: structured output, volume work (Claude Max)
TRIAGE_MODEL = MODEL_HAIKU # triage: routing decision, cheapest (OpenRouter)
EVAL_DOMAIN_MODEL = MODEL_GPT4O # domain review: OpenRouter GPT-4o
EVAL_LEO_MODEL = MODEL_OPUS # Leo DEEP review: Claude Max Opus
EVAL_LEO_STANDARD_MODEL = MODEL_SONNET_OR # Leo STANDARD review: OpenRouter Sonnet
EVAL_DEEP_MODEL = MODEL_GPT4O # DEEP cross-family: paid, adversarial
# --- Model backends ---
# Each model can run on Claude Max (subscription, base load) or API (overflow/spikes).
# Claude Max: free but rate-limited. API: paid but unlimited.
# When Claude Max is rate-limited, behavior per stage:
# "queue" — wait for capacity (preferred for non-urgent work)
# "overflow" — fall back to API (for time-sensitive work)
# "skip" — skip this cycle (for optional stages like sample audit)
OVERFLOW_POLICY = {
"extract": "queue", # extraction can wait
"triage": "overflow", # triage is cheap on API anyway
"eval_domain": "overflow", # domain review is the volume filter — don't let it bottleneck (Rhea)
"eval_leo": "queue", # Leo review is the bottleneck we protect
"eval_deep": "overflow", # DEEP is already on API
"sample_audit": "skip", # optional, skip if constrained
}
# OpenRouter cost rates per 1K tokens (only applies when using API, not Claude Max)
MODEL_COSTS = {
"opus": {"input": 0.015, "output": 0.075},
"sonnet": {"input": 0.003, "output": 0.015},
MODEL_HAIKU: {"input": 0.0008, "output": 0.004},
MODEL_GPT4O: {"input": 0.0025, "output": 0.01},
MODEL_SONNET_OR: {"input": 0.003, "output": 0.015},
}
# --- Concurrency ---
MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5"))
MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7"))
MAX_MERGE_WORKERS = 1 # domain-serialized, but one merge at a time per domain
# --- Timeouts (seconds) ---
EXTRACT_TIMEOUT = 600 # 10 min
EVAL_TIMEOUT = 300 # 5 min
MERGE_TIMEOUT = 300 # 5 min — force-reset to conflict if exceeded (Rhea)
CLAUDE_MAX_PROBE_TIMEOUT = 15
# --- Backpressure ---
BACKPRESSURE_HIGH = 40 # pause extraction above this
BACKPRESSURE_LOW = 20 # throttle extraction above this
BACKPRESSURE_THROTTLE_WORKERS = 2 # workers when throttled
# --- Retry budgets ---
TRANSIENT_RETRY_MAX = 5 # API timeouts, rate limits
SUBSTANTIVE_RETRY_STANDARD = 2 # reviewer request_changes
SUBSTANTIVE_RETRY_DEEP = 3
MAX_EVAL_ATTEMPTS = 3 # Hard cap on eval cycles per PR before terminal
# Issue tags that can be fixed mechanically (Python fixer or Haiku)
MECHANICAL_ISSUE_TAGS = {"frontmatter_schema", "broken_wiki_links", "near_duplicate"}
# Issue tags that require re-extraction (substantive quality problems)
SUBSTANTIVE_ISSUE_TAGS = {"factual_discrepancy", "confidence_miscalibration", "scope_error", "title_overclaims"}
# --- Circuit breakers ---
BREAKER_THRESHOLD = 5
BREAKER_COOLDOWN = 900 # 15 min
# --- Cost budgets ---
OPENROUTER_DAILY_BUDGET = 20.0 # USD
OPENROUTER_WARN_THRESHOLD = 0.8 # 80% of budget
# --- Quality ---
SAMPLE_AUDIT_RATE = 0.10 # 10% of LIGHT merges
SAMPLE_AUDIT_DISAGREEMENT_THRESHOLD = 0.10 # 10% disagreement → tighten LIGHT criteria
# --- Polling intervals (seconds) ---
INGEST_INTERVAL = 60
VALIDATE_INTERVAL = 30
EVAL_INTERVAL = 30
MERGE_INTERVAL = 30
HEALTH_CHECK_INTERVAL = 60
# --- Health API ---
HEALTH_PORT = 8080
# --- Logging ---
LOG_FILE = LOG_DIR / "pipeline.jsonl"
LOG_ROTATION_MAX_BYTES = 50 * 1024 * 1024 # 50MB per file
LOG_ROTATION_BACKUP_COUNT = 7 # keep 7 days