fix: wire commit_type into contributor role assignment
The contributor attribution always recorded "extractor" regardless of the PR's refined commit_type. Added COMMIT_TYPE_TO_ROLE mapping and applied it in all three attribution paths (Pentagon-Agent trailer, git author fallback, PR agent fallback). Backfill script resets and re-derives role counts from prs.commit_type. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f463f49b46
commit
c29049924e
2 changed files with 135 additions and 3 deletions
|
|
@ -38,6 +38,22 @@ def is_knowledge_pr(diff: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
COMMIT_TYPE_TO_ROLE = {
|
||||
"challenge": "challenger",
|
||||
"enrich": "synthesizer",
|
||||
"extract": "extractor",
|
||||
"research": "synthesizer",
|
||||
"entity": "extractor",
|
||||
"reweave": "synthesizer",
|
||||
"fix": "extractor",
|
||||
}
|
||||
|
||||
|
||||
def commit_type_to_role(commit_type: str) -> str:
|
||||
"""Map a refined commit_type to a contributor role."""
|
||||
return COMMIT_TYPE_TO_ROLE.get(commit_type, "extractor")
|
||||
|
||||
|
||||
def refine_commit_type(diff: str, branch_commit_type: str) -> str:
|
||||
"""Refine commit_type from diff content when branch prefix is ambiguous.
|
||||
|
||||
|
|
@ -126,8 +142,9 @@ async def record_contributor_attribution(conn, pr_number: int, branch: str, git_
|
|||
for match in re.finditer(r"Pentagon-Agent:\s*(\S+)\s*<([^>]+)>", log_output):
|
||||
agent_name = match.group(1).lower()
|
||||
agent_uuid = match.group(2)
|
||||
role = commit_type_to_role(refined_type)
|
||||
upsert_contributor(
|
||||
conn, agent_name, agent_uuid, "extractor", today,
|
||||
conn, agent_name, agent_uuid, role, today,
|
||||
)
|
||||
agents_found.add(agent_name)
|
||||
|
||||
|
|
@ -167,13 +184,15 @@ async def record_contributor_attribution(conn, pr_number: int, branch: str, git_
|
|||
for author_line in author_output.strip().split("\n"):
|
||||
author_name = author_line.strip().lower()
|
||||
if author_name and author_name not in _BOT_AUTHORS:
|
||||
upsert_contributor(conn, author_name, None, "extractor", today)
|
||||
role = commit_type_to_role(refined_type)
|
||||
upsert_contributor(conn, author_name, None, role, today)
|
||||
agents_found.add(author_name)
|
||||
|
||||
if not agents_found:
|
||||
row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||
if row and row["agent"] and row["agent"] != "external":
|
||||
upsert_contributor(conn, row["agent"].lower(), None, "extractor", today)
|
||||
role = commit_type_to_role(refined_type)
|
||||
upsert_contributor(conn, row["agent"].lower(), None, role, today)
|
||||
|
||||
|
||||
def upsert_contributor(
|
||||
|
|
|
|||
113
ops/backfill-contributor-roles.py
Normal file
113
ops/backfill-contributor-roles.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Backfill contributor role counts from prs.commit_type.
|
||||
|
||||
Resets all role counts to 0, then re-derives them from the prs table's
|
||||
commit_type column using the COMMIT_TYPE_TO_ROLE mapping. This corrects
|
||||
the bug where all contributors were recorded as 'extractor' regardless
|
||||
of their actual commit_type.
|
||||
|
||||
Usage:
|
||||
python3 ops/backfill-contributor-roles.py [--dry-run]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sqlite3
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from lib.contributor import COMMIT_TYPE_TO_ROLE, commit_type_to_role
|
||||
|
||||
DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
|
||||
|
||||
|
||||
def backfill(db_path: str, dry_run: bool = False):
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Get all merged PRs with commit_type and agent
|
||||
prs = conn.execute("""
|
||||
SELECT number, commit_type, agent, branch
|
||||
FROM prs
|
||||
WHERE status = 'merged' AND agent IS NOT NULL
|
||||
ORDER BY number
|
||||
""").fetchall()
|
||||
|
||||
print(f"Processing {len(prs)} merged PRs...")
|
||||
|
||||
# Reset all role counts
|
||||
if not dry_run:
|
||||
conn.execute("""
|
||||
UPDATE contributors SET
|
||||
extractor_count = 0,
|
||||
challenger_count = 0,
|
||||
synthesizer_count = 0,
|
||||
sourcer_count = 0
|
||||
""")
|
||||
print("Reset all role counts to 0")
|
||||
|
||||
# Tally roles from commit_type
|
||||
role_counts: dict[str, dict[str, int]] = {}
|
||||
for pr in prs:
|
||||
agent = pr["agent"].lower() if pr["agent"] else None
|
||||
if not agent or agent in ("external", "pipeline"):
|
||||
continue
|
||||
|
||||
commit_type = pr["commit_type"] or "extract"
|
||||
role = commit_type_to_role(commit_type)
|
||||
|
||||
if agent not in role_counts:
|
||||
role_counts[agent] = {
|
||||
"extractor_count": 0, "challenger_count": 0,
|
||||
"synthesizer_count": 0, "sourcer_count": 0,
|
||||
"reviewer_count": 0,
|
||||
}
|
||||
role_col = f"{role}_count"
|
||||
if role_col in role_counts[agent]:
|
||||
role_counts[agent][role_col] += 1
|
||||
|
||||
# Apply tallied counts
|
||||
for handle, counts in sorted(role_counts.items()):
|
||||
non_zero = {k: v for k, v in counts.items() if v > 0}
|
||||
print(f" {handle}: {non_zero or '(no knowledge PRs)'}")
|
||||
if not dry_run and non_zero:
|
||||
set_clauses = ", ".join(f"{k} = {v}" for k, v in non_zero.items())
|
||||
conn.execute(
|
||||
f"UPDATE contributors SET {set_clauses}, updated_at = datetime('now') WHERE handle = ?",
|
||||
(handle,),
|
||||
)
|
||||
|
||||
if not dry_run:
|
||||
conn.commit()
|
||||
print("\nBackfill committed.")
|
||||
else:
|
||||
print("\n[DRY RUN] No changes made.")
|
||||
|
||||
# Print summary
|
||||
print("\nRole distribution across all contributors:")
|
||||
if not dry_run:
|
||||
rows = conn.execute("""
|
||||
SELECT handle, extractor_count, challenger_count, synthesizer_count,
|
||||
sourcer_count, reviewer_count
|
||||
FROM contributors
|
||||
ORDER BY (extractor_count + challenger_count + synthesizer_count) DESC
|
||||
""").fetchall()
|
||||
for r in rows:
|
||||
parts = []
|
||||
if r["extractor_count"]: parts.append(f"extract:{r['extractor_count']}")
|
||||
if r["challenger_count"]: parts.append(f"challenge:{r['challenger_count']}")
|
||||
if r["synthesizer_count"]: parts.append(f"synthesize:{r['synthesizer_count']}")
|
||||
if r["sourcer_count"]: parts.append(f"source:{r['sourcer_count']}")
|
||||
if r["reviewer_count"]: parts.append(f"review:{r['reviewer_count']}")
|
||||
if parts:
|
||||
print(f" {r['handle']}: {', '.join(parts)}")
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--db", default=DB_PATH)
|
||||
args = parser.parse_args()
|
||||
backfill(args.db, args.dry_run)
|
||||
Loading…
Reference in a new issue