Imports 67 files from VPS (/opt/teleo-eval/) into repo as the single source of truth. Previously only 8 of 67 files existed in repo — the rest were deployed directly to VPS via SCP, causing massive drift. Includes: - pipeline/lib/: 33 Python modules (daemon core, extraction, evaluation, merge, cascade, cross-domain, costs, attribution, etc.) - pipeline/: main daemon (teleo-pipeline.py), reweave.py, batch-extract-50.sh - diagnostics/: 19 files (4-page dashboard, alerting, daily digest, review queue, tier1 metrics) - agent-state/: bootstrap, lib-state, cascade inbox processor, schema - systemd/: service unit files for reference - deploy.sh: rsync-based deploy with --dry-run, syntax checks, dirty-tree gate - research-session.sh: updated with Step 8.5 digest + cascade inbox processing No new code written — all files are exact copies from VPS as of 2026-04-06. From this point forward: edit in repo, commit, then deploy.sh. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
295 lines
12 KiB
Python
295 lines
12 KiB
Python
"""Auto-fixer stage — mechanical fixes for known issue types.
|
|
|
|
Currently fixes:
|
|
- broken_wiki_links: strips [[ ]] brackets from links that don't resolve
|
|
|
|
Runs as a pipeline stage on FIX_INTERVAL. Only fixes mechanical issues
|
|
that don't require content understanding. Does NOT fix frontmatter_schema,
|
|
near_duplicate, or any substantive issues.
|
|
|
|
Key design decisions (Ganymede):
|
|
- Only fix files in the PR diff (not the whole worktree/repo)
|
|
- Add intra-PR file stems to valid set (avoids stripping cross-references
|
|
between new claims in the same PR)
|
|
- Atomic claim via status='fixing' (same pattern as eval's 'reviewing')
|
|
- fix_attempts cap prevents infinite fix loops
|
|
- Reset eval_attempts + tier0_pass on successful fix for re-evaluation
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from . import config, db
|
|
from .validate import WIKI_LINK_RE, load_existing_claims
|
|
|
|
logger = logging.getLogger("pipeline.fixer")
|
|
|
|
|
|
# ─── Git helper (async subprocess, same pattern as merge.py) ─────────────
|
|
|
|
|
|
async def _git(*args, cwd: str = None, timeout: int = 60) -> tuple[int, str]:
|
|
"""Run a git command async. Returns (returncode, combined output)."""
|
|
proc = await asyncio.create_subprocess_exec(
|
|
"git",
|
|
*args,
|
|
cwd=cwd or str(config.REPO_DIR),
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
try:
|
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
|
|
except asyncio.TimeoutError:
|
|
proc.kill()
|
|
await proc.wait()
|
|
return -1, f"git {args[0]} timed out after {timeout}s"
|
|
output = (stdout or b"").decode().strip()
|
|
if stderr:
|
|
output += "\n" + stderr.decode().strip()
|
|
return proc.returncode, output
|
|
|
|
|
|
# ─── Wiki link fixer ─────────────────────────────────────────────────────
|
|
|
|
|
|
async def _fix_wiki_links_in_pr(conn, pr_number: int) -> dict:
|
|
"""Fix broken wiki links in a single PR by stripping brackets.
|
|
|
|
Only processes files in the PR diff (not the whole repo).
|
|
Adds intra-PR file stems to the valid set so cross-references
|
|
between new claims in the same PR are preserved.
|
|
"""
|
|
# Atomic claim — prevent concurrent fixers and evaluators
|
|
cursor = conn.execute(
|
|
"UPDATE prs SET status = 'fixing', last_attempt = datetime('now') WHERE number = ? AND status = 'open'",
|
|
(pr_number,),
|
|
)
|
|
if cursor.rowcount == 0:
|
|
return {"pr": pr_number, "skipped": True, "reason": "not_open"}
|
|
|
|
# Increment fix_attempts
|
|
conn.execute(
|
|
"UPDATE prs SET fix_attempts = COALESCE(fix_attempts, 0) + 1 WHERE number = ?",
|
|
(pr_number,),
|
|
)
|
|
|
|
# Get PR branch from DB first, fall back to Forgejo API
|
|
row = conn.execute("SELECT branch FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
|
branch = row["branch"] if row and row["branch"] else None
|
|
|
|
if not branch:
|
|
from .forgejo import api as forgejo_api
|
|
from .forgejo import repo_path
|
|
|
|
pr_info = await forgejo_api("GET", repo_path(f"pulls/{pr_number}"))
|
|
if pr_info:
|
|
branch = pr_info.get("head", {}).get("ref")
|
|
|
|
if not branch:
|
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
|
return {"pr": pr_number, "skipped": True, "reason": "no_branch"}
|
|
|
|
# Fetch latest refs
|
|
await _git("fetch", "origin", branch, timeout=30)
|
|
|
|
# Create worktree
|
|
worktree_path = str(config.BASE_DIR / "workspaces" / f"fix-{pr_number}")
|
|
|
|
rc, out = await _git("worktree", "add", "--detach", worktree_path, f"origin/{branch}")
|
|
if rc != 0:
|
|
logger.error("PR #%d: worktree creation failed: %s", pr_number, out)
|
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
|
return {"pr": pr_number, "skipped": True, "reason": "worktree_failed"}
|
|
|
|
try:
|
|
# Checkout the actual branch (so we can push)
|
|
rc, out = await _git("checkout", "-B", branch, f"origin/{branch}", cwd=worktree_path)
|
|
if rc != 0:
|
|
logger.error("PR #%d: checkout failed: %s", pr_number, out)
|
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
|
return {"pr": pr_number, "skipped": True, "reason": "checkout_failed"}
|
|
|
|
# Get files changed in PR (only fix these, not the whole repo)
|
|
rc, out = await _git("diff", "--name-only", "origin/main...HEAD", cwd=worktree_path)
|
|
if rc != 0:
|
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
|
return {"pr": pr_number, "skipped": True, "reason": "diff_failed"}
|
|
|
|
pr_files = [f for f in out.split("\n") if f.strip() and f.endswith(".md")]
|
|
|
|
if not pr_files:
|
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
|
return {"pr": pr_number, "skipped": True, "reason": "no_md_files"}
|
|
|
|
# Load existing claims from main + add intra-PR stems
|
|
# (avoids stripping cross-references between new claims in same PR)
|
|
existing_claims = load_existing_claims()
|
|
for f in pr_files:
|
|
existing_claims.add(Path(f).stem)
|
|
|
|
# Fix broken links in each PR file
|
|
total_fixed = 0
|
|
|
|
for filepath in pr_files:
|
|
full_path = Path(worktree_path) / filepath
|
|
if not full_path.is_file():
|
|
continue
|
|
|
|
content = full_path.read_text(encoding="utf-8")
|
|
file_fixes = 0
|
|
|
|
def replace_broken_link(match):
|
|
nonlocal file_fixes
|
|
link_text = match.group(1)
|
|
if link_text.strip() not in existing_claims:
|
|
file_fixes += 1
|
|
return link_text # Strip brackets, keep text
|
|
return match.group(0) # Keep valid link
|
|
|
|
new_content = WIKI_LINK_RE.sub(replace_broken_link, content)
|
|
if new_content != content:
|
|
full_path.write_text(new_content, encoding="utf-8")
|
|
total_fixed += file_fixes
|
|
|
|
if total_fixed == 0:
|
|
# No broken links found — issue might be something else
|
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
|
return {"pr": pr_number, "skipped": True, "reason": "no_broken_links"}
|
|
|
|
# Commit and push
|
|
rc, out = await _git("add", *pr_files, cwd=worktree_path)
|
|
if rc != 0:
|
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
|
return {"pr": pr_number, "skipped": True, "reason": "git_add_failed"}
|
|
|
|
commit_msg = (
|
|
f"auto-fix: strip {total_fixed} broken wiki links\n\n"
|
|
f"Pipeline auto-fixer: removed [[ ]] brackets from links\n"
|
|
f"that don't resolve to existing claims in the knowledge base."
|
|
)
|
|
rc, out = await _git("commit", "-m", commit_msg, cwd=worktree_path)
|
|
if rc != 0:
|
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (pr_number,))
|
|
return {"pr": pr_number, "skipped": True, "reason": "commit_failed"}
|
|
|
|
# Reset eval state BEFORE push — if daemon crashes between push and
|
|
# reset, the PR would be permanently stuck at max eval_attempts.
|
|
# Reset-first: worst case is one wasted eval cycle on old content.
|
|
conn.execute(
|
|
"""UPDATE prs SET
|
|
status = 'open',
|
|
eval_attempts = 0,
|
|
eval_issues = '[]',
|
|
tier0_pass = NULL,
|
|
domain_verdict = 'pending',
|
|
leo_verdict = 'pending',
|
|
last_error = NULL
|
|
WHERE number = ?""",
|
|
(pr_number,),
|
|
)
|
|
|
|
rc, out = await _git("push", "origin", branch, cwd=worktree_path, timeout=30)
|
|
if rc != 0:
|
|
logger.error("PR #%d: push failed: %s", pr_number, out)
|
|
# Eval state already reset — PR will re-evaluate old content,
|
|
# find same issues, and fixer will retry next cycle. No harm.
|
|
return {"pr": pr_number, "skipped": True, "reason": "push_failed"}
|
|
|
|
db.audit(
|
|
conn,
|
|
"fixer",
|
|
"wiki_links_fixed",
|
|
json.dumps({"pr": pr_number, "links_fixed": total_fixed}),
|
|
)
|
|
logger.info("PR #%d: fixed %d broken wiki links, reset for re-evaluation", pr_number, total_fixed)
|
|
|
|
return {"pr": pr_number, "fixed": True, "links_fixed": total_fixed}
|
|
|
|
finally:
|
|
# Always cleanup worktree
|
|
await _git("worktree", "remove", "--force", worktree_path)
|
|
|
|
|
|
# ─── Stage entry point ───────────────────────────────────────────────────
|
|
|
|
|
|
async def fix_cycle(conn, max_workers=None) -> tuple[int, int]:
|
|
"""Run one fix cycle. Returns (fixed, errors).
|
|
|
|
Finds PRs with broken_wiki_links issues (from eval or tier0) that
|
|
haven't exceeded fix_attempts cap. Processes up to 5 per cycle
|
|
to avoid overlapping with eval.
|
|
"""
|
|
# Garbage collection: close PRs with exhausted fix budget that are stuck in open.
|
|
# These were evaluated, rejected, fixer couldn't help, nobody closes them.
|
|
# (Epimetheus session 2 — prevents zombie PR accumulation)
|
|
# Bug fix: must also close on Forgejo + delete branch, not just DB update.
|
|
# DB-only close caused Forgejo/DB state divergence — branches stayed alive,
|
|
# blocking Gate 2 in batch-extract for 5 days. (Epimetheus session 4)
|
|
gc_rows = conn.execute(
|
|
"""SELECT number, branch FROM prs
|
|
WHERE status = 'open'
|
|
AND fix_attempts >= ?
|
|
AND (domain_verdict = 'request_changes' OR leo_verdict = 'request_changes')""",
|
|
(config.MAX_FIX_ATTEMPTS + 2,),
|
|
).fetchall()
|
|
if gc_rows:
|
|
from .forgejo import api as _gc_forgejo, repo_path as _gc_repo_path
|
|
for row in gc_rows:
|
|
pr_num, branch = row["number"], row["branch"]
|
|
try:
|
|
await _gc_forgejo("POST", _gc_repo_path(f"issues/{pr_num}/comments"),
|
|
{"body": "Auto-closed: fix budget exhausted. Source will be re-extracted."})
|
|
await _gc_forgejo("PATCH", _gc_repo_path(f"pulls/{pr_num}"), {"state": "closed"})
|
|
if branch:
|
|
await _gc_forgejo("DELETE", _gc_repo_path(f"branches/{branch}"))
|
|
except Exception as e:
|
|
logger.warning("GC: failed to close PR #%d on Forgejo: %s", pr_num, e)
|
|
conn.execute(
|
|
"UPDATE prs SET status = 'closed', last_error = 'fix budget exhausted — auto-closed' WHERE number = ?",
|
|
(pr_num,),
|
|
)
|
|
logger.info("GC: closed %d exhausted PRs (DB + Forgejo + branch cleanup)", len(gc_rows))
|
|
|
|
batch_limit = min(max_workers or config.MAX_FIX_PER_CYCLE, config.MAX_FIX_PER_CYCLE)
|
|
|
|
# Only fix PRs that passed tier0 but have broken_wiki_links from eval.
|
|
# Do NOT fix PRs with tier0_pass=0 where the only issue is wiki links —
|
|
# wiki links are warnings, not gates. Fixing them creates an infinite
|
|
# fixer→validate→fixer loop. (Epimetheus session 2 — root cause of overnight stall)
|
|
rows = conn.execute(
|
|
"""SELECT number FROM prs
|
|
WHERE status = 'open'
|
|
AND tier0_pass = 1
|
|
AND eval_issues LIKE '%broken_wiki_links%'
|
|
AND COALESCE(fix_attempts, 0) < ?
|
|
AND (last_attempt IS NULL OR last_attempt < datetime('now', '-5 minutes'))
|
|
ORDER BY created_at ASC
|
|
LIMIT ?""",
|
|
(config.MAX_FIX_ATTEMPTS, batch_limit),
|
|
).fetchall()
|
|
|
|
if not rows:
|
|
return 0, 0
|
|
|
|
fixed = 0
|
|
errors = 0
|
|
|
|
for row in rows:
|
|
try:
|
|
result = await _fix_wiki_links_in_pr(conn, row["number"])
|
|
if result.get("fixed"):
|
|
fixed += 1
|
|
elif result.get("skipped"):
|
|
logger.debug("PR #%d fix skipped: %s", row["number"], result.get("reason"))
|
|
except Exception:
|
|
logger.exception("Failed to fix PR #%d", row["number"])
|
|
errors += 1
|
|
conn.execute("UPDATE prs SET status = 'open' WHERE number = ?", (row["number"],))
|
|
|
|
if fixed or errors:
|
|
logger.info("Fix cycle: %d fixed, %d errors", fixed, errors)
|
|
|
|
return fixed, errors
|