teleo-infrastructure/lib/config.py
m3taversal 97b590acd6
Some checks are pending
CI / lint-and-test (push) Waiting to run
fix: close cooldown-dependence gaps in extract.py (Ganymede review)
Three targeted fixes from Ganymede's review of commit 469cb7f:

BUG #1 — Success path now updates sources.status='extracting' before PR
creation, so queue scan's DB-authoritative filter catches sources between
PR creation and merge. Previously the cooldown gate was load-bearing for
this window, not belt-and-suspenders as claimed.

BUG #2 — Second null-result path (line 573, triggered when enrichments
existed but all targets were missing in worktree) now updates DB. Without
this, that path created no PR, no DB mark, and would have re-entered the
runaway loop 4h later when the cooldown window expired.

NIT #6 — 4h cooldown moved to config.EXTRACTION_COOLDOWN_HOURS. Tunable
without code change. Log format now shows the configured hours.

Also backfilled 59 pre-existing zombie queue-path rows where the file
was already archived but DB status said 'unprocessed' — these would have
leaked past the DB filter once the 4h cooldown expired.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 11:33:10 +01:00

222 lines
9.6 KiB
Python

"""Pipeline v2 configuration — all constants and thresholds."""
import os
from pathlib import Path
# --- Paths ---
BASE_DIR = Path(os.environ.get("PIPELINE_BASE", "/opt/teleo-eval"))
REPO_DIR = BASE_DIR / "workspaces" / "teleo-codex.git"
MAIN_WORKTREE = BASE_DIR / "workspaces" / "main"
SECRETS_DIR = BASE_DIR / "secrets"
LOG_DIR = BASE_DIR / "logs"
DB_PATH = BASE_DIR / "pipeline" / "pipeline.db"
# File-based worktree lock path — used by all processes that write to main worktree
# (pipeline daemon stages + telegram bot). Ganymede: one lock, one mechanism.
MAIN_WORKTREE_LOCKFILE = BASE_DIR / "workspaces" / ".main-worktree.lock"
INBOX_QUEUE = "inbox/queue"
INBOX_ARCHIVE = "inbox/archive"
INBOX_NULL_RESULT = "inbox/null-result"
# --- Forgejo ---
FORGEJO_URL = os.environ.get("FORGEJO_URL", "http://localhost:3000")
FORGEJO_OWNER = "teleo"
FORGEJO_REPO = "teleo-codex"
FORGEJO_TOKEN_FILE = SECRETS_DIR / "forgejo-admin-token"
FORGEJO_PIPELINE_USER = "teleo" # git user for pipeline commits
# --- Models ---
CLAUDE_CLI = os.environ.get("CLAUDE_CLI", "/home/teleo/.local/bin/claude")
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
# Model IDs
MODEL_OPUS = "opus"
MODEL_SONNET = "sonnet"
MODEL_HAIKU = "anthropic/claude-3.5-haiku"
MODEL_GPT4O = "openai/gpt-4o" # legacy, kept for reference
MODEL_GEMINI_FLASH = "google/gemini-2.5-flash" # was -preview, removed by OpenRouter
MODEL_SONNET_OR = "anthropic/claude-sonnet-4.5" # OpenRouter Sonnet (paid, not Claude Max)
# --- Model assignment per stage ---
# Principle: Opus is scarce (Claude Max). Reserve for DEEP eval + overnight research.
# Model diversity: domain (GPT-4o) + Leo (Sonnet) = two model families, no correlated blindspots.
# Both on OpenRouter = Claude Max rate limit untouched for Opus.
#
# Pipeline eval ordering (domain-first, Leo-last):
# 1. Domain review → GPT-4o (OpenRouter) — different family from Leo
# 2. Leo STANDARD → Sonnet (OpenRouter) — different family from domain
# 3. Leo DEEP → Opus (Claude Max) — highest judgment, scarce
EXTRACT_MODEL = MODEL_SONNET # extraction: structured output, volume work (Claude Max)
TRIAGE_MODEL = MODEL_HAIKU # triage: routing decision, cheapest (OpenRouter)
EVAL_DOMAIN_MODEL = MODEL_GEMINI_FLASH # domain review: Gemini 2.5 Flash (was GPT-4o — 16x cheaper, different family from Sonnet)
EVAL_LEO_MODEL = MODEL_OPUS # Leo DEEP review: Claude Max Opus
EVAL_LEO_STANDARD_MODEL = MODEL_SONNET_OR # Leo STANDARD review: OpenRouter Sonnet
EVAL_DEEP_MODEL = MODEL_GEMINI_FLASH # DEEP cross-family: paid, adversarial
# --- Model backends ---
# Each model can run on Claude Max (subscription, base load) or API (overflow/spikes).
# Claude Max: free but rate-limited. API: paid but unlimited.
# When Claude Max is rate-limited, behavior per stage:
# "queue" — wait for capacity (preferred for non-urgent work)
# "overflow" — fall back to API (for time-sensitive work)
# "skip" — skip this cycle (for optional stages like sample audit)
OVERFLOW_POLICY = {
"extract": "queue", # extraction can wait
"triage": "overflow", # triage is cheap on API anyway
"eval_domain": "overflow", # domain review is the volume filter — don't let it bottleneck (Rhea)
"eval_leo": "queue", # Leo review is the bottleneck we protect
"eval_deep": "overflow", # DEEP is already on API
"sample_audit": "skip", # optional, skip if constrained
}
# OpenRouter cost rates per 1K tokens (only applies when using API, not Claude Max)
MODEL_COSTS = {
"opus": {"input": 0.015, "output": 0.075},
"sonnet": {"input": 0.003, "output": 0.015},
MODEL_HAIKU: {"input": 0.0008, "output": 0.004},
MODEL_GPT4O: {"input": 0.0025, "output": 0.01},
MODEL_GEMINI_FLASH: {"input": 0.00015, "output": 0.0006},
MODEL_SONNET_OR: {"input": 0.003, "output": 0.015},
}
# --- Concurrency ---
MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5"))
MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7"))
MAX_MERGE_WORKERS = 1 # domain-serialized, but one merge at a time per domain
# --- Timeouts (seconds) ---
EXTRACT_TIMEOUT = 600 # 10 min
EVAL_TIMEOUT = 120 # 2 min — routine Sonnet/Gemini Flash calls (was 600, caused 10-min stalls)
EVAL_TIMEOUT_OPUS = 600 # 10 min — Opus DEEP eval needs more time for complex reasoning
MERGE_TIMEOUT = 300 # 5 min — force-reset to conflict if exceeded (Rhea)
CLAUDE_MAX_PROBE_TIMEOUT = 15
# --- Backpressure ---
BACKPRESSURE_HIGH = 40 # pause extraction above this
BACKPRESSURE_LOW = 20 # throttle extraction above this
BACKPRESSURE_THROTTLE_WORKERS = 2 # workers when throttled
# --- Retry budgets ---
TRANSIENT_RETRY_MAX = 5 # API timeouts, rate limits
SUBSTANTIVE_RETRY_STANDARD = 2 # reviewer request_changes
SUBSTANTIVE_RETRY_DEEP = 3
MAX_EVAL_ATTEMPTS = 3 # Hard cap on eval cycles per PR before terminal
MAX_FIX_ATTEMPTS = 2 # Hard cap on auto-fix cycles per PR before giving up
MAX_FIX_PER_CYCLE = 15 # PRs to fix per cycle — bumped from 5 to clear backlog (Cory, Mar 14)
# Issue tags that can be fixed mechanically (Python fixer or Haiku)
# broken_wiki_links removed — downgraded to warning, not a gate. Links to claims
# in other open PRs resolve naturally as the dependency chain merges. (Cory, Mar 14)
MECHANICAL_ISSUE_TAGS = {"frontmatter_schema", "near_duplicate"}
# Issue tags that require re-extraction (substantive quality problems)
SUBSTANTIVE_ISSUE_TAGS = {"factual_discrepancy", "confidence_miscalibration", "scope_error", "title_overclaims"}
# --- Content type schemas ---
# Registry of content types. validate.py branches on type to apply the right
# required fields, confidence rules, and title checks. Adding a new type is a
# dict entry here — no code changes in validate.py needed.
TYPE_SCHEMAS = {
"claim": {
"required": ("type", "domain", "description", "confidence", "source", "created"),
"valid_confidence": ("proven", "likely", "experimental", "speculative"),
"needs_proposition_title": True,
},
"framework": {
"required": ("type", "domain", "description", "source", "created"),
"valid_confidence": None,
"needs_proposition_title": True,
},
"entity": {
"required": ("type", "domain", "description"),
"valid_confidence": None,
"needs_proposition_title": False,
},
"decision": {
"required": ("type", "domain", "description", "parent_entity", "status"),
"valid_confidence": None,
"needs_proposition_title": False,
"valid_status": ("active", "passed", "failed", "expired", "cancelled"),
},
}
# --- Content directories ---
ENTITY_DIR_TEMPLATE = "entities/{domain}" # centralized path (Rhea: don't hardcode across 5 files)
DECISION_DIR_TEMPLATE = "decisions/{domain}"
# --- Contributor tiers ---
# Auto-promotion rules. CI is computed, not stored.
CONTRIBUTOR_TIER_RULES = {
"contributor": {
"claims_merged": 1,
},
"veteran": {
"claims_merged": 10,
"min_days_since_first": 30,
"challenges_survived": 1,
},
}
# Role weights for CI computation (must match core/contribution-architecture.md)
CONTRIBUTION_ROLE_WEIGHTS = {
"challenger": 0.35,
"synthesizer": 0.25,
"reviewer": 0.20,
"sourcer": 0.15,
"extractor": 0.05,
}
# --- Circuit breakers ---
BREAKER_THRESHOLD = 5
BREAKER_COOLDOWN = 900 # 15 min
# --- Cost budgets ---
OPENROUTER_DAILY_BUDGET = 20.0 # USD
OPENROUTER_WARN_THRESHOLD = 0.8 # 80% of budget
# --- Quality ---
SAMPLE_AUDIT_RATE = 0.15 # 15% of LIGHT merges get pre-merge promotion to STANDARD (Rio)
SAMPLE_AUDIT_DISAGREEMENT_THRESHOLD = 0.10 # 10% disagreement → tighten LIGHT criteria
SAMPLE_AUDIT_MODEL = MODEL_OPUS # Opus for audit — different family from Haiku triage (Leo)
# --- Batch eval ---
# Batch domain review: group STANDARD PRs by domain, one LLM call per batch.
# Leo review stays individual (safety net for cross-contamination).
BATCH_EVAL_MAX_PRS = int(os.environ.get("BATCH_EVAL_MAX_PRS", "5"))
BATCH_EVAL_MAX_DIFF_BYTES = int(os.environ.get("BATCH_EVAL_MAX_DIFF_BYTES", "100000")) # 100KB
# --- Tier logic ---
# LIGHT_SKIP_LLM: when True, LIGHT PRs skip domain+Leo review entirely (auto-approve on Tier 0 pass).
# Set False for shadow mode (domain review runs but logs only). Flip True after 24h validation (Rhea).
LIGHT_SKIP_LLM = os.environ.get("LIGHT_SKIP_LLM", "false").lower() == "true"
# Random pre-merge promotion: fraction of LIGHT PRs upgraded to STANDARD before eval (Rio).
# Makes gaming unpredictable — extraction agents can't know which LIGHT PRs get full review.
LIGHT_PROMOTION_RATE = float(os.environ.get("LIGHT_PROMOTION_RATE", "0.15"))
# --- Polling intervals (seconds) ---
INGEST_INTERVAL = 60
VALIDATE_INTERVAL = 30
EVAL_INTERVAL = 30
MERGE_INTERVAL = 30
FIX_INTERVAL = 60
HEALTH_CHECK_INTERVAL = 60
# --- Extraction gates ---
EXTRACTION_COOLDOWN_HOURS = 4 # Skip sources with any PR activity in this window. Defense-in-depth for DB-status filter.
# --- Retrieval (Telegram bot) ---
RETRIEVAL_RRF_K = 20 # RRF smoothing constant — tuned for 5-10 results per source
RETRIEVAL_ENTITY_BOOST = 1.5 # RRF score multiplier for claims wiki-linked from matched entities
RETRIEVAL_MAX_RESULTS = 10 # Max claims shown to LLM after RRF merge
RETRIEVAL_MIN_CLAIM_SCORE = 3.0 # Floor for keyword claim scoring — filters single-stopword matches
# --- Health API ---
HEALTH_PORT = 8080
# --- Logging ---
LOG_FILE = LOG_DIR / "pipeline.jsonl"
LOG_ROTATION_MAX_BYTES = 50 * 1024 * 1024 # 50MB per file
LOG_ROTATION_BACKUP_COUNT = 7 # keep 7 days
# --- Versioning (tracked in metrics_snapshots for chart annotations) ---
PROMPT_VERSION = "v2-lean-directed" # bump on every prompt change
PIPELINE_VERSION = "2.2" # bump on every significant pipeline change