From 9925576c132ac79eef4996c3364ec8425cca2b77 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Tue, 7 Apr 2026 12:54:06 +0100 Subject: [PATCH] ship: add contributor attribution tracing to PR lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Migration v19: submitted_by column on prs + sources tables - extract.py: propagates proposed_by from source frontmatter → PR record - merge.py: sets submitted_by from Forgejo author for human PRs - dashboard_prs.py: redesigned with Contributor column, improved claim visibility in expanded rows, cost estimates, evaluator chain display - dashboard_routes.py: submitted_by + source_path in pr-lifecycle API - backfill_submitted_by.py: one-time backfill (1525/1777 PRs matched) Co-Authored-By: Claude Opus 4.6 (1M context) --- ops/diagnostics/backfill_submitted_by.py | 138 +++++++++++ ops/diagnostics/dashboard_prs.py | 279 ++++++++++++++--------- ops/diagnostics/dashboard_routes.py | 5 +- ops/pipeline-v2/lib/db.py | 16 +- ops/pipeline-v2/lib/extract.py | 43 ++++ ops/pipeline-v2/lib/merge.py | 10 +- 6 files changed, 378 insertions(+), 113 deletions(-) create mode 100644 ops/diagnostics/backfill_submitted_by.py diff --git a/ops/diagnostics/backfill_submitted_by.py b/ops/diagnostics/backfill_submitted_by.py new file mode 100644 index 000000000..8c0933582 --- /dev/null +++ b/ops/diagnostics/backfill_submitted_by.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +"""One-time backfill: populate submitted_by on prs table from source archive files. + +Matches PRs to sources via branch name slug → source filename. +Reads proposed_by and intake_tier from source frontmatter. + +Run: python3 backfill_submitted_by.py +""" + +import os +import re +import sqlite3 +from pathlib import Path + +DB_PATH = os.environ.get("DB_PATH", "/opt/teleo-eval/pipeline/pipeline.db") +ARCHIVE_DIR = Path(os.environ.get("ARCHIVE_DIR", "/opt/teleo-eval/workspaces/main/inbox/archive")) + + +def parse_frontmatter(path: Path) -> dict: + """Parse YAML-like frontmatter from a markdown file.""" + text = path.read_text(encoding="utf-8", errors="replace") + if not text.startswith("---"): + return {} + end = text.find("---", 3) + if end == -1: + return {} + fm = {} + for line in text[3:end].strip().split("\n"): + line = line.strip() + if not line or ":" not in line: + continue + key, _, val = line.partition(":") + key = key.strip() + val = val.strip().strip('"').strip("'") + if val.lower() == "null" or val == "": + val = None + fm[key] = val + return fm + + +def slug_from_branch(branch: str) -> str: + """Extract source slug from branch name like 'extract/2026-04-06-slug-hash'.""" + if "/" in branch: + branch = branch.split("/", 1)[1] + # Strip trailing hex hash (e.g., -3e68, -a6af) + branch = re.sub(r"-[0-9a-f]{4}$", "", branch) + return branch + + +def main(): + conn = sqlite3.connect(DB_PATH, timeout=30) + conn.row_factory = sqlite3.Row + + # Build source index: filename stem → frontmatter + source_index = {} + if ARCHIVE_DIR.exists(): + for f in ARCHIVE_DIR.glob("*.md"): + fm = parse_frontmatter(f) + source_index[f.stem] = fm + print(f"Indexed {len(source_index)} source files from {ARCHIVE_DIR}") + + # Get all PRs without submitted_by + prs = conn.execute( + "SELECT number, branch FROM prs WHERE submitted_by IS NULL AND branch IS NOT NULL" + ).fetchall() + print(f"Found {len(prs)} PRs without submitted_by") + + updated = 0 + for pr in prs: + branch = pr["branch"] + slug = slug_from_branch(branch) + + # Try to match slug to a source file + fm = source_index.get(slug) + if not fm: + # Try partial matching: slug might be a substring of the source filename + for stem, sfm in source_index.items(): + if slug in stem or stem in slug: + fm = sfm + break + + if fm: + proposed_by = fm.get("proposed_by") + intake_tier = fm.get("intake_tier") + + if proposed_by: + contributor = proposed_by.strip().strip('"').strip("'") + elif intake_tier == "research-task": + # Derive agent from branch prefix + prefix = branch.split("/", 1)[0] if "/" in branch else "unknown" + agent_map = { + "extract": "pipeline", "ingestion": "pipeline", + "rio": "rio", "theseus": "theseus", "vida": "vida", + "clay": "clay", "astra": "astra", "leo": "leo", + "reweave": "pipeline", + } + agent = agent_map.get(prefix, prefix) + contributor = f"{agent} (self-directed)" + elif intake_tier == "directed": + contributor = "directed (unknown)" + else: + contributor = None + + if contributor: + conn.execute( + "UPDATE prs SET submitted_by = ?, source_path = ? WHERE number = ?", + (contributor, f"inbox/archive/{slug}.md", pr["number"]), + ) + updated += 1 + else: + # For extract/ branches, mark as pipeline self-directed + if branch.startswith("extract/") or branch.startswith("ingestion/"): + conn.execute( + "UPDATE prs SET submitted_by = 'pipeline (self-directed)' WHERE number = ?", + (pr["number"],), + ) + updated += 1 + elif branch.startswith(("rio/", "theseus/", "vida/", "clay/", "astra/", "leo/")): + agent = branch.split("/", 1)[0] + conn.execute( + "UPDATE prs SET submitted_by = ? WHERE number = ?", + (f"{agent} (self-directed)", pr["number"]), + ) + updated += 1 + elif branch.startswith("reweave/"): + conn.execute( + "UPDATE prs SET submitted_by = 'pipeline (reweave)' WHERE number = ?", + (pr["number"],), + ) + updated += 1 + + conn.commit() + conn.close() + print(f"Updated {updated}/{len(prs)} PRs with submitted_by") + + +if __name__ == "__main__": + main() diff --git a/ops/diagnostics/dashboard_prs.py b/ops/diagnostics/dashboard_prs.py index 121d9266e..0fd21c24f 100644 --- a/ops/diagnostics/dashboard_prs.py +++ b/ops/diagnostics/dashboard_prs.py @@ -1,8 +1,8 @@ """PR Lifecycle dashboard — single-page view of every PR through the pipeline. -Sortable table: PR#, summary, agent, domain, outcome, TTM, date. -Click any row to expand the full trace (triage reasoning, review text, cascade). -Hero cards: total PRs, merge rate, median TTM, median eval rounds. +Sortable table: PR#, summary, claims, domain, contributor, outcome, evals, evaluator, cost, date. +Click any row to expand: claim titles, eval chain, timeline, reviews, issues. +Hero cards: total PRs, merge rate, total claims, est. cost. Data sources: prs table, audit_log (eval rounds), review_records. Owner: Ship @@ -14,19 +14,23 @@ from shared_ui import render_page EXTRA_CSS = """ + .content-wrapper { max-width: 1600px !important; } .filters { display: flex; gap: 12px; flex-wrap: wrap; margin-bottom: 16px; } .filters select, .filters input { background: #161b22; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; padding: 6px 10px; font-size: 12px; } .filters select:focus, .filters input:focus { border-color: #58a6ff; outline: none; } .pr-table { width: 100%; border-collapse: collapse; font-size: 13px; table-layout: fixed; } - .pr-table th:nth-child(1) { width: 60px; } /* PR# */ - .pr-table th:nth-child(2) { width: 38%; } /* Summary */ - .pr-table th:nth-child(3) { width: 10%; } /* Agent */ - .pr-table th:nth-child(4) { width: 14%; } /* Domain */ - .pr-table th:nth-child(5) { width: 10%; } /* Outcome */ - .pr-table th:nth-child(6) { width: 7%; } /* TTM */ - .pr-table th:nth-child(7) { width: 10%; } /* Date */ + .pr-table th:nth-child(1) { width: 50px; } /* PR# */ + .pr-table th:nth-child(2) { width: 28%; } /* Summary */ + .pr-table th:nth-child(3) { width: 50px; } /* Claims */ + .pr-table th:nth-child(4) { width: 11%; } /* Domain */ + .pr-table th:nth-child(5) { width: 10%; } /* Contributor */ + .pr-table th:nth-child(6) { width: 10%; } /* Outcome */ + .pr-table th:nth-child(7) { width: 44px; } /* Evals */ + .pr-table th:nth-child(8) { width: 12%; } /* Evaluator */ + .pr-table th:nth-child(9) { width: 60px; } /* Cost */ + .pr-table th:nth-child(10) { width: 80px; } /* Date */ .pr-table td { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; padding: 8px 6px; } .pr-table td:nth-child(2) { white-space: normal; overflow: visible; line-height: 1.4; } .pr-table th { cursor: pointer; user-select: none; position: relative; padding: 8px 18px 8px 6px; } @@ -46,11 +50,23 @@ EXTRA_CSS = """ .pr-table td .summary-text { font-size: 12px; color: #c9d1d9; } .pr-table td .review-snippet { font-size: 11px; color: #f85149; margin-top: 2px; opacity: 0.8; } .pr-table td .model-tag { font-size: 10px; color: #6e7681; background: #161b22; border-radius: 3px; padding: 1px 4px; } + .pr-table td .contributor-tag { font-size: 11px; color: #d2a8ff; } + .pr-table td .contributor-self { font-size: 11px; color: #6e7681; font-style: italic; } .pr-table td .expand-chevron { display: inline-block; width: 12px; color: #484f58; font-size: 10px; transition: transform 0.2s; } .pr-table tr.expanded .expand-chevron { transform: rotate(90deg); color: #58a6ff; } .trace-panel { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 4px 0 8px 0; font-size: 12px; display: none; } .trace-panel.open { display: block; } + .trace-panel h4 { color: #58a6ff; font-size: 12px; margin: 12px 0 6px 0; } + .trace-panel h4:first-child { margin-top: 0; } + .claim-list { list-style: none; padding: 0; margin: 0; } + .claim-list li { padding: 4px 0 4px 16px; border-left: 2px solid #238636; color: #c9d1d9; font-size: 12px; line-height: 1.5; } + .claim-list li .claim-confidence { font-size: 10px; color: #8b949e; margin-left: 6px; } + .issues-box { background: #1c1210; border: 1px solid #f8514933; border-radius: 6px; + padding: 8px 12px; margin: 4px 0; font-size: 12px; color: #f85149; } + .eval-chain { background: #161b22; border-radius: 6px; padding: 8px 12px; margin: 4px 0; font-size: 12px; } + .eval-chain .chain-step { display: inline-block; margin-right: 6px; } + .eval-chain .chain-arrow { color: #484f58; margin: 0 4px; } .trace-timeline { list-style: none; padding: 0; } .trace-timeline li { padding: 4px 0; border-left: 2px solid #30363d; padding-left: 12px; margin-left: 8px; } .trace-timeline li .ts { color: #484f58; font-size: 11px; } @@ -66,9 +82,6 @@ EXTRA_CSS = """ .pagination button:hover { border-color: #58a6ff; } .pagination button:disabled { opacity: 0.4; cursor: default; } .pagination .page-info { color: #8b949e; font-size: 12px; } - .stat-row { display: flex; gap: 6px; flex-wrap: wrap; margin-top: 4px; } - .stat-row .mini-stat { font-size: 11px; color: #8b949e; } - .stat-row .mini-stat span { color: #c9d1d9; font-weight: 600; } """ @@ -80,15 +93,14 @@ def render_prs_page(now: datetime) -> str:
Total PRs
--
Merge Rate
--
-
Median Time-to-Merge
--
-
Median Eval Rounds
--
Total Claims
--
+
Est. Cost
--
- +