teleo-codex/ops/pipeline-v2/lib/config.py
m3taversal 05d74d5e32 sync: import all VPS pipeline + diagnostics code as baseline
Imports 67 files from VPS (/opt/teleo-eval/) into repo as the single source
of truth. Previously only 8 of 67 files existed in repo — the rest were
deployed directly to VPS via SCP, causing massive drift.

Includes:
- pipeline/lib/: 33 Python modules (daemon core, extraction, evaluation, merge, cascade, cross-domain, costs, attribution, etc.)
- pipeline/: main daemon (teleo-pipeline.py), reweave.py, batch-extract-50.sh
- diagnostics/: 19 files (4-page dashboard, alerting, daily digest, review queue, tier1 metrics)
- agent-state/: bootstrap, lib-state, cascade inbox processor, schema
- systemd/: service unit files for reference
- deploy.sh: rsync-based deploy with --dry-run, syntax checks, dirty-tree gate
- research-session.sh: updated with Step 8.5 digest + cascade inbox processing

No new code written — all files are exact copies from VPS as of 2026-04-06.
From this point forward: edit in repo, commit, then deploy.sh.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 00:00:00 +01:00

219 lines
9.5 KiB
Python

"""Pipeline v2 configuration — all constants and thresholds."""
import os
from pathlib import Path
# --- Paths ---
BASE_DIR = Path(os.environ.get("PIPELINE_BASE", "/opt/teleo-eval"))
REPO_DIR = BASE_DIR / "workspaces" / "teleo-codex.git"
MAIN_WORKTREE = BASE_DIR / "workspaces" / "main"
SECRETS_DIR = BASE_DIR / "secrets"
LOG_DIR = BASE_DIR / "logs"
DB_PATH = BASE_DIR / "pipeline" / "pipeline.db"
# File-based worktree lock path — used by all processes that write to main worktree
# (pipeline daemon stages + telegram bot). Ganymede: one lock, one mechanism.
MAIN_WORKTREE_LOCKFILE = BASE_DIR / "workspaces" / ".main-worktree.lock"
INBOX_QUEUE = "inbox/queue"
INBOX_ARCHIVE = "inbox/archive"
INBOX_NULL_RESULT = "inbox/null-result"
# --- Forgejo ---
FORGEJO_URL = os.environ.get("FORGEJO_URL", "http://localhost:3000")
FORGEJO_OWNER = "teleo"
FORGEJO_REPO = "teleo-codex"
FORGEJO_TOKEN_FILE = SECRETS_DIR / "forgejo-admin-token"
FORGEJO_PIPELINE_USER = "teleo" # git user for pipeline commits
# --- Models ---
CLAUDE_CLI = os.environ.get("CLAUDE_CLI", "/home/teleo/.local/bin/claude")
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
# Model IDs
MODEL_OPUS = "opus"
MODEL_SONNET = "sonnet"
MODEL_HAIKU = "anthropic/claude-3.5-haiku"
MODEL_GPT4O = "openai/gpt-4o" # legacy, kept for reference
MODEL_GEMINI_FLASH = "google/gemini-2.5-flash" # was -preview, removed by OpenRouter
MODEL_SONNET_OR = "anthropic/claude-sonnet-4.5" # OpenRouter Sonnet (paid, not Claude Max)
# --- Model assignment per stage ---
# Principle: Opus is scarce (Claude Max). Reserve for DEEP eval + overnight research.
# Model diversity: domain (GPT-4o) + Leo (Sonnet) = two model families, no correlated blindspots.
# Both on OpenRouter = Claude Max rate limit untouched for Opus.
#
# Pipeline eval ordering (domain-first, Leo-last):
# 1. Domain review → GPT-4o (OpenRouter) — different family from Leo
# 2. Leo STANDARD → Sonnet (OpenRouter) — different family from domain
# 3. Leo DEEP → Opus (Claude Max) — highest judgment, scarce
EXTRACT_MODEL = MODEL_SONNET # extraction: structured output, volume work (Claude Max)
TRIAGE_MODEL = MODEL_HAIKU # triage: routing decision, cheapest (OpenRouter)
EVAL_DOMAIN_MODEL = MODEL_GEMINI_FLASH # domain review: Gemini 2.5 Flash (was GPT-4o — 16x cheaper, different family from Sonnet)
EVAL_LEO_MODEL = MODEL_OPUS # Leo DEEP review: Claude Max Opus
EVAL_LEO_STANDARD_MODEL = MODEL_SONNET_OR # Leo STANDARD review: OpenRouter Sonnet
EVAL_DEEP_MODEL = MODEL_GEMINI_FLASH # DEEP cross-family: paid, adversarial
# --- Model backends ---
# Each model can run on Claude Max (subscription, base load) or API (overflow/spikes).
# Claude Max: free but rate-limited. API: paid but unlimited.
# When Claude Max is rate-limited, behavior per stage:
# "queue" — wait for capacity (preferred for non-urgent work)
# "overflow" — fall back to API (for time-sensitive work)
# "skip" — skip this cycle (for optional stages like sample audit)
OVERFLOW_POLICY = {
"extract": "queue", # extraction can wait
"triage": "overflow", # triage is cheap on API anyway
"eval_domain": "overflow", # domain review is the volume filter — don't let it bottleneck (Rhea)
"eval_leo": "queue", # Leo review is the bottleneck we protect
"eval_deep": "overflow", # DEEP is already on API
"sample_audit": "skip", # optional, skip if constrained
}
# OpenRouter cost rates per 1K tokens (only applies when using API, not Claude Max)
MODEL_COSTS = {
"opus": {"input": 0.015, "output": 0.075},
"sonnet": {"input": 0.003, "output": 0.015},
MODEL_HAIKU: {"input": 0.0008, "output": 0.004},
MODEL_GPT4O: {"input": 0.0025, "output": 0.01},
MODEL_GEMINI_FLASH: {"input": 0.00015, "output": 0.0006},
MODEL_SONNET_OR: {"input": 0.003, "output": 0.015},
}
# --- Concurrency ---
MAX_EXTRACT_WORKERS = int(os.environ.get("MAX_EXTRACT_WORKERS", "5"))
MAX_EVAL_WORKERS = int(os.environ.get("MAX_EVAL_WORKERS", "7"))
MAX_MERGE_WORKERS = 1 # domain-serialized, but one merge at a time per domain
# --- Timeouts (seconds) ---
EXTRACT_TIMEOUT = 600 # 10 min
EVAL_TIMEOUT = 120 # 2 min — routine Sonnet/Gemini Flash calls (was 600, caused 10-min stalls)
EVAL_TIMEOUT_OPUS = 600 # 10 min — Opus DEEP eval needs more time for complex reasoning
MERGE_TIMEOUT = 300 # 5 min — force-reset to conflict if exceeded (Rhea)
CLAUDE_MAX_PROBE_TIMEOUT = 15
# --- Backpressure ---
BACKPRESSURE_HIGH = 40 # pause extraction above this
BACKPRESSURE_LOW = 20 # throttle extraction above this
BACKPRESSURE_THROTTLE_WORKERS = 2 # workers when throttled
# --- Retry budgets ---
TRANSIENT_RETRY_MAX = 5 # API timeouts, rate limits
SUBSTANTIVE_RETRY_STANDARD = 2 # reviewer request_changes
SUBSTANTIVE_RETRY_DEEP = 3
MAX_EVAL_ATTEMPTS = 3 # Hard cap on eval cycles per PR before terminal
MAX_FIX_ATTEMPTS = 2 # Hard cap on auto-fix cycles per PR before giving up
MAX_FIX_PER_CYCLE = 15 # PRs to fix per cycle — bumped from 5 to clear backlog (Cory, Mar 14)
# Issue tags that can be fixed mechanically (Python fixer or Haiku)
# broken_wiki_links removed — downgraded to warning, not a gate. Links to claims
# in other open PRs resolve naturally as the dependency chain merges. (Cory, Mar 14)
MECHANICAL_ISSUE_TAGS = {"frontmatter_schema", "near_duplicate"}
# Issue tags that require re-extraction (substantive quality problems)
SUBSTANTIVE_ISSUE_TAGS = {"factual_discrepancy", "confidence_miscalibration", "scope_error", "title_overclaims"}
# --- Content type schemas ---
# Registry of content types. validate.py branches on type to apply the right
# required fields, confidence rules, and title checks. Adding a new type is a
# dict entry here — no code changes in validate.py needed.
TYPE_SCHEMAS = {
"claim": {
"required": ("type", "domain", "description", "confidence", "source", "created"),
"valid_confidence": ("proven", "likely", "experimental", "speculative"),
"needs_proposition_title": True,
},
"framework": {
"required": ("type", "domain", "description", "source", "created"),
"valid_confidence": None,
"needs_proposition_title": True,
},
"entity": {
"required": ("type", "domain", "description"),
"valid_confidence": None,
"needs_proposition_title": False,
},
"decision": {
"required": ("type", "domain", "description", "parent_entity", "status"),
"valid_confidence": None,
"needs_proposition_title": False,
"valid_status": ("active", "passed", "failed", "expired", "cancelled"),
},
}
# --- Content directories ---
ENTITY_DIR_TEMPLATE = "entities/{domain}" # centralized path (Rhea: don't hardcode across 5 files)
DECISION_DIR_TEMPLATE = "decisions/{domain}"
# --- Contributor tiers ---
# Auto-promotion rules. CI is computed, not stored.
CONTRIBUTOR_TIER_RULES = {
"contributor": {
"claims_merged": 1,
},
"veteran": {
"claims_merged": 10,
"min_days_since_first": 30,
"challenges_survived": 1,
},
}
# Role weights for CI computation (must match schemas/contribution-weights.yaml)
CONTRIBUTION_ROLE_WEIGHTS = {
"sourcer": 0.15,
"extractor": 0.40,
"challenger": 0.20,
"synthesizer": 0.15,
"reviewer": 0.10,
}
# --- Circuit breakers ---
BREAKER_THRESHOLD = 5
BREAKER_COOLDOWN = 900 # 15 min
# --- Cost budgets ---
OPENROUTER_DAILY_BUDGET = 20.0 # USD
OPENROUTER_WARN_THRESHOLD = 0.8 # 80% of budget
# --- Quality ---
SAMPLE_AUDIT_RATE = 0.15 # 15% of LIGHT merges get pre-merge promotion to STANDARD (Rio)
SAMPLE_AUDIT_DISAGREEMENT_THRESHOLD = 0.10 # 10% disagreement → tighten LIGHT criteria
SAMPLE_AUDIT_MODEL = MODEL_OPUS # Opus for audit — different family from Haiku triage (Leo)
# --- Batch eval ---
# Batch domain review: group STANDARD PRs by domain, one LLM call per batch.
# Leo review stays individual (safety net for cross-contamination).
BATCH_EVAL_MAX_PRS = int(os.environ.get("BATCH_EVAL_MAX_PRS", "5"))
BATCH_EVAL_MAX_DIFF_BYTES = int(os.environ.get("BATCH_EVAL_MAX_DIFF_BYTES", "100000")) # 100KB
# --- Tier logic ---
# LIGHT_SKIP_LLM: when True, LIGHT PRs skip domain+Leo review entirely (auto-approve on Tier 0 pass).
# Set False for shadow mode (domain review runs but logs only). Flip True after 24h validation (Rhea).
LIGHT_SKIP_LLM = os.environ.get("LIGHT_SKIP_LLM", "false").lower() == "true"
# Random pre-merge promotion: fraction of LIGHT PRs upgraded to STANDARD before eval (Rio).
# Makes gaming unpredictable — extraction agents can't know which LIGHT PRs get full review.
LIGHT_PROMOTION_RATE = float(os.environ.get("LIGHT_PROMOTION_RATE", "0.15"))
# --- Polling intervals (seconds) ---
INGEST_INTERVAL = 60
VALIDATE_INTERVAL = 30
EVAL_INTERVAL = 30
MERGE_INTERVAL = 30
FIX_INTERVAL = 60
HEALTH_CHECK_INTERVAL = 60
# --- Retrieval (Telegram bot) ---
RETRIEVAL_RRF_K = 20 # RRF smoothing constant — tuned for 5-10 results per source
RETRIEVAL_ENTITY_BOOST = 1.5 # RRF score multiplier for claims wiki-linked from matched entities
RETRIEVAL_MAX_RESULTS = 10 # Max claims shown to LLM after RRF merge
RETRIEVAL_MIN_CLAIM_SCORE = 3.0 # Floor for keyword claim scoring — filters single-stopword matches
# --- Health API ---
HEALTH_PORT = 8080
# --- Logging ---
LOG_FILE = LOG_DIR / "pipeline.jsonl"
LOG_ROTATION_MAX_BYTES = 50 * 1024 * 1024 # 50MB per file
LOG_ROTATION_BACKUP_COUNT = 7 # keep 7 days
# --- Versioning (tracked in metrics_snapshots for chart annotations) ---
PROMPT_VERSION = "v2-lean-directed" # bump on every prompt change
PIPELINE_VERSION = "2.2" # bump on every significant pipeline change