Phase 5: Extract contributor.py from merge.py (−234 lines)
Some checks are pending
CI / lint-and-test (push) Waiting to run
Some checks are pending
CI / lint-and-test (push) Waiting to run
5 functions extracted: is_knowledge_pr, refine_commit_type, record_contributor_attribution, upsert_contributor, recalculate_tier. git_fn parameter injection avoids circular import (merge→contributor, contributor needs _git from merge). Single call site passes _git. merge.py: 1912 → 1678 lines. 23 new tests, zero regressions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f46e14dfae
commit
53dc18afd5
3 changed files with 509 additions and 236 deletions
244
lib/contributor.py
Normal file
244
lib/contributor.py
Normal file
|
|
@ -0,0 +1,244 @@
|
||||||
|
"""Contributor attribution — tracks who contributed what and calculates tiers.
|
||||||
|
|
||||||
|
Extracted from merge.py (Phase 5 decomposition). Functions:
|
||||||
|
- is_knowledge_pr: diff classification (knowledge vs pipeline-only)
|
||||||
|
- refine_commit_type: extract → challenge/enrich refinement from diff content
|
||||||
|
- record_contributor_attribution: parse trailers + frontmatter, upsert contributors
|
||||||
|
- upsert_contributor: insert/update contributor record with role counts
|
||||||
|
- recalculate_tier: tier promotion based on config rules
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
from . import config, db
|
||||||
|
from .forgejo import get_pr_diff
|
||||||
|
|
||||||
|
logger = logging.getLogger("pipeline.contributor")
|
||||||
|
|
||||||
|
|
||||||
|
def is_knowledge_pr(diff: str) -> bool:
|
||||||
|
"""Check if a PR touches knowledge files (claims, decisions, core, foundations).
|
||||||
|
|
||||||
|
Knowledge PRs get full CI attribution weight.
|
||||||
|
Pipeline-only PRs (inbox, entities, agents, archive) get zero CI weight.
|
||||||
|
|
||||||
|
Mixed PRs count as knowledge — if a PR adds a claim, it gets attribution
|
||||||
|
even if it also moves source files. Knowledge takes priority. (Ganymede review)
|
||||||
|
"""
|
||||||
|
knowledge_prefixes = ("domains/", "core/", "foundations/", "decisions/")
|
||||||
|
|
||||||
|
for line in diff.split("\n"):
|
||||||
|
if line.startswith("+++ b/") or line.startswith("--- a/"):
|
||||||
|
path = line.split("/", 1)[1] if "/" in line else ""
|
||||||
|
if any(path.startswith(p) for p in knowledge_prefixes):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def refine_commit_type(diff: str, branch_commit_type: str) -> str:
|
||||||
|
"""Refine commit_type from diff content when branch prefix is ambiguous.
|
||||||
|
|
||||||
|
Branch prefix gives initial classification (extract, research, entity, etc.).
|
||||||
|
For 'extract' branches, diff content can distinguish:
|
||||||
|
- challenge: adds challenged_by edges to existing claims
|
||||||
|
- enrich: modifies existing claim frontmatter without new files
|
||||||
|
- extract: creates new claim files (default for extract branches)
|
||||||
|
|
||||||
|
Only refines 'extract' type — other branch types (research, entity, reweave, fix)
|
||||||
|
are already specific enough.
|
||||||
|
"""
|
||||||
|
if branch_commit_type != "extract":
|
||||||
|
return branch_commit_type
|
||||||
|
|
||||||
|
new_files = 0
|
||||||
|
modified_files = 0
|
||||||
|
has_challenge_edge = False
|
||||||
|
|
||||||
|
in_diff_header = False
|
||||||
|
current_is_new = False
|
||||||
|
for line in diff.split("\n"):
|
||||||
|
if line.startswith("diff --git"):
|
||||||
|
in_diff_header = True
|
||||||
|
current_is_new = False
|
||||||
|
elif line.startswith("new file"):
|
||||||
|
current_is_new = True
|
||||||
|
elif line.startswith("+++ b/"):
|
||||||
|
path = line[6:]
|
||||||
|
if any(path.startswith(p) for p in ("domains/", "core/", "foundations/")):
|
||||||
|
if current_is_new:
|
||||||
|
new_files += 1
|
||||||
|
else:
|
||||||
|
modified_files += 1
|
||||||
|
in_diff_header = False
|
||||||
|
elif line.startswith("+") and not line.startswith("+++"):
|
||||||
|
if "challenged_by:" in line or "challenges:" in line:
|
||||||
|
has_challenge_edge = True
|
||||||
|
|
||||||
|
if has_challenge_edge and new_files == 0:
|
||||||
|
return "challenge"
|
||||||
|
if modified_files > 0 and new_files == 0:
|
||||||
|
return "enrich"
|
||||||
|
return "extract"
|
||||||
|
|
||||||
|
|
||||||
|
async def record_contributor_attribution(conn, pr_number: int, branch: str, git_fn):
|
||||||
|
"""Record contributor attribution after a successful merge.
|
||||||
|
|
||||||
|
Parses git trailers and claim frontmatter to identify contributors
|
||||||
|
and their roles. Upserts into contributors table. Refines commit_type
|
||||||
|
from diff content. Pipeline-only PRs (no knowledge files) are skipped.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
git_fn: async callable matching _git signature (for git log parsing).
|
||||||
|
"""
|
||||||
|
from datetime import date as _date
|
||||||
|
|
||||||
|
today = _date.today().isoformat()
|
||||||
|
|
||||||
|
# Get the PR diff to parse claim frontmatter for attribution blocks
|
||||||
|
diff = await get_pr_diff(pr_number)
|
||||||
|
if not diff:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Pipeline-only PRs (inbox, entities, agents) don't count toward CI
|
||||||
|
if not is_knowledge_pr(diff):
|
||||||
|
logger.info("PR #%d: pipeline-only commit — skipping CI attribution", pr_number)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Refine commit_type from diff content (branch prefix may be too broad)
|
||||||
|
row = conn.execute("SELECT commit_type FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||||
|
branch_type = row["commit_type"] if row and row["commit_type"] else "extract"
|
||||||
|
refined_type = refine_commit_type(diff, branch_type)
|
||||||
|
if refined_type != branch_type:
|
||||||
|
conn.execute("UPDATE prs SET commit_type = ? WHERE number = ?", (refined_type, pr_number))
|
||||||
|
logger.info("PR #%d: commit_type refined %s → %s", pr_number, branch_type, refined_type)
|
||||||
|
|
||||||
|
# Parse Pentagon-Agent trailer from branch commit messages
|
||||||
|
agents_found: set[str] = set()
|
||||||
|
rc, log_output = await git_fn(
|
||||||
|
"log", f"origin/main..origin/{branch}", "--format=%b%n%N",
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if rc == 0:
|
||||||
|
for match in re.finditer(r"Pentagon-Agent:\s*(\S+)\s*<([^>]+)>", log_output):
|
||||||
|
agent_name = match.group(1).lower()
|
||||||
|
agent_uuid = match.group(2)
|
||||||
|
upsert_contributor(
|
||||||
|
conn, agent_name, agent_uuid, "extractor", today,
|
||||||
|
)
|
||||||
|
agents_found.add(agent_name)
|
||||||
|
|
||||||
|
# Parse attribution blocks from claim frontmatter in diff
|
||||||
|
# Look for added lines with attribution YAML
|
||||||
|
current_role = None
|
||||||
|
for line in diff.split("\n"):
|
||||||
|
if not line.startswith("+") or line.startswith("+++"):
|
||||||
|
continue
|
||||||
|
stripped = line[1:].strip()
|
||||||
|
|
||||||
|
# Detect role sections in attribution block
|
||||||
|
for role in ("sourcer", "extractor", "challenger", "synthesizer", "reviewer"):
|
||||||
|
if stripped.startswith(f"{role}:"):
|
||||||
|
current_role = role
|
||||||
|
break
|
||||||
|
|
||||||
|
# Extract handle from attribution entries
|
||||||
|
handle_match = re.match(r'-\s*handle:\s*["\']?([^"\']+)["\']?', stripped)
|
||||||
|
if handle_match and current_role:
|
||||||
|
handle = handle_match.group(1).strip().lower()
|
||||||
|
agent_id_match = re.search(r'agent_id:\s*["\']?([^"\']+)', stripped)
|
||||||
|
agent_id = agent_id_match.group(1).strip() if agent_id_match else None
|
||||||
|
upsert_contributor(conn, handle, agent_id, current_role, today)
|
||||||
|
|
||||||
|
# Fallback: if no attribution block found, credit the branch agent as extractor
|
||||||
|
if not agents_found:
|
||||||
|
row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||||
|
if row and row["agent"]:
|
||||||
|
upsert_contributor(conn, row["agent"].lower(), None, "extractor", today)
|
||||||
|
|
||||||
|
|
||||||
|
def upsert_contributor(
|
||||||
|
conn, handle: str, agent_id: str | None, role: str, date_str: str,
|
||||||
|
):
|
||||||
|
"""Upsert a contributor record, incrementing the appropriate role count."""
|
||||||
|
role_col = f"{role}_count"
|
||||||
|
if role_col not in (
|
||||||
|
"sourcer_count", "extractor_count", "challenger_count",
|
||||||
|
"synthesizer_count", "reviewer_count",
|
||||||
|
):
|
||||||
|
logger.warning("Unknown contributor role: %s", role)
|
||||||
|
return
|
||||||
|
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT handle FROM contributors WHERE handle = ?", (handle,)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
conn.execute(
|
||||||
|
f"""UPDATE contributors SET
|
||||||
|
{role_col} = {role_col} + 1,
|
||||||
|
claims_merged = claims_merged + CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END,
|
||||||
|
last_contribution = ?,
|
||||||
|
updated_at = datetime('now')
|
||||||
|
WHERE handle = ?""",
|
||||||
|
(role, date_str, handle),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
conn.execute(
|
||||||
|
f"""INSERT INTO contributors (handle, agent_id, first_contribution, last_contribution, {role_col}, claims_merged)
|
||||||
|
VALUES (?, ?, ?, ?, 1, CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END)""",
|
||||||
|
(handle, agent_id, date_str, date_str, role),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Recalculate tier
|
||||||
|
recalculate_tier(conn, handle)
|
||||||
|
|
||||||
|
|
||||||
|
def recalculate_tier(conn, handle: str):
|
||||||
|
"""Recalculate contributor tier based on config rules."""
|
||||||
|
from datetime import date as _date, datetime as _dt
|
||||||
|
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT claims_merged, challenges_survived, first_contribution, tier FROM contributors WHERE handle = ?",
|
||||||
|
(handle,),
|
||||||
|
).fetchone()
|
||||||
|
if not row:
|
||||||
|
return
|
||||||
|
|
||||||
|
current_tier = row["tier"]
|
||||||
|
claims_merged = row["claims_merged"] or 0
|
||||||
|
challenges_survived = row["challenges_survived"] or 0
|
||||||
|
first_contribution = row["first_contribution"]
|
||||||
|
|
||||||
|
days_since_first = 0
|
||||||
|
if first_contribution:
|
||||||
|
try:
|
||||||
|
first_date = _dt.strptime(first_contribution, "%Y-%m-%d").date()
|
||||||
|
days_since_first = (_date.today() - first_date).days
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check veteran first (higher tier)
|
||||||
|
vet_rules = config.CONTRIBUTOR_TIER_RULES["veteran"]
|
||||||
|
if (claims_merged >= vet_rules["claims_merged"]
|
||||||
|
and days_since_first >= vet_rules["min_days_since_first"]
|
||||||
|
and challenges_survived >= vet_rules["challenges_survived"]):
|
||||||
|
new_tier = "veteran"
|
||||||
|
elif claims_merged >= config.CONTRIBUTOR_TIER_RULES["contributor"]["claims_merged"]:
|
||||||
|
new_tier = "contributor"
|
||||||
|
else:
|
||||||
|
new_tier = "new"
|
||||||
|
|
||||||
|
if new_tier != current_tier:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE contributors SET tier = ?, updated_at = datetime('now') WHERE handle = ?",
|
||||||
|
(new_tier, handle),
|
||||||
|
)
|
||||||
|
logger.info("Contributor %s: tier %s → %s", handle, current_tier, new_tier)
|
||||||
|
db.audit(
|
||||||
|
conn, "contributor", "tier_change",
|
||||||
|
json.dumps({"handle": handle, "from": current_tier, "to": new_tier}),
|
||||||
|
)
|
||||||
238
lib/merge.py
238
lib/merge.py
|
|
@ -21,6 +21,7 @@ from collections import defaultdict
|
||||||
|
|
||||||
from . import config, db
|
from . import config, db
|
||||||
from .db import classify_branch
|
from .db import classify_branch
|
||||||
|
from .contributor import record_contributor_attribution
|
||||||
from .dedup import dedup_evidence_blocks
|
from .dedup import dedup_evidence_blocks
|
||||||
from .domains import detect_domain_from_branch
|
from .domains import detect_domain_from_branch
|
||||||
from .forgejo import api as forgejo_api
|
from .forgejo import api as forgejo_api
|
||||||
|
|
@ -800,241 +801,6 @@ async def _delete_remote_branch(branch: str):
|
||||||
logger.warning("Failed to delete remote branch %s — cosmetic, continuing", branch)
|
logger.warning("Failed to delete remote branch %s — cosmetic, continuing", branch)
|
||||||
|
|
||||||
|
|
||||||
# --- Contributor attribution ---
|
|
||||||
|
|
||||||
|
|
||||||
def _is_knowledge_pr(diff: str) -> bool:
|
|
||||||
"""Check if a PR touches knowledge files (claims, decisions, core, foundations).
|
|
||||||
|
|
||||||
Knowledge PRs get full CI attribution weight.
|
|
||||||
Pipeline-only PRs (inbox, entities, agents, archive) get zero CI weight.
|
|
||||||
|
|
||||||
Mixed PRs count as knowledge — if a PR adds a claim, it gets attribution
|
|
||||||
even if it also moves source files. Knowledge takes priority. (Ganymede review)
|
|
||||||
"""
|
|
||||||
knowledge_prefixes = ("domains/", "core/", "foundations/", "decisions/")
|
|
||||||
|
|
||||||
for line in diff.split("\n"):
|
|
||||||
if line.startswith("+++ b/") or line.startswith("--- a/"):
|
|
||||||
path = line.split("/", 1)[1] if "/" in line else ""
|
|
||||||
if any(path.startswith(p) for p in knowledge_prefixes):
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _refine_commit_type(diff: str, branch_commit_type: str) -> str:
|
|
||||||
"""Refine commit_type from diff content when branch prefix is ambiguous.
|
|
||||||
|
|
||||||
Branch prefix gives initial classification (extract, research, entity, etc.).
|
|
||||||
For 'extract' branches, diff content can distinguish:
|
|
||||||
- challenge: adds challenged_by edges to existing claims
|
|
||||||
- enrich: modifies existing claim frontmatter without new files
|
|
||||||
- extract: creates new claim files (default for extract branches)
|
|
||||||
|
|
||||||
Only refines 'extract' type — other branch types (research, entity, reweave, fix)
|
|
||||||
are already specific enough.
|
|
||||||
"""
|
|
||||||
if branch_commit_type != "extract":
|
|
||||||
return branch_commit_type
|
|
||||||
|
|
||||||
new_files = 0
|
|
||||||
modified_files = 0
|
|
||||||
has_challenge_edge = False
|
|
||||||
|
|
||||||
in_diff_header = False
|
|
||||||
current_is_new = False
|
|
||||||
for line in diff.split("\n"):
|
|
||||||
if line.startswith("diff --git"):
|
|
||||||
in_diff_header = True
|
|
||||||
current_is_new = False
|
|
||||||
elif line.startswith("new file"):
|
|
||||||
current_is_new = True
|
|
||||||
elif line.startswith("+++ b/"):
|
|
||||||
path = line[6:]
|
|
||||||
if any(path.startswith(p) for p in ("domains/", "core/", "foundations/")):
|
|
||||||
if current_is_new:
|
|
||||||
new_files += 1
|
|
||||||
else:
|
|
||||||
modified_files += 1
|
|
||||||
in_diff_header = False
|
|
||||||
elif line.startswith("+") and not line.startswith("+++"):
|
|
||||||
if "challenged_by:" in line or "challenges:" in line:
|
|
||||||
has_challenge_edge = True
|
|
||||||
|
|
||||||
if has_challenge_edge and new_files == 0:
|
|
||||||
return "challenge"
|
|
||||||
if modified_files > 0 and new_files == 0:
|
|
||||||
return "enrich"
|
|
||||||
return "extract"
|
|
||||||
|
|
||||||
|
|
||||||
async def _record_contributor_attribution(conn, pr_number: int, branch: str):
|
|
||||||
"""Record contributor attribution after a successful merge.
|
|
||||||
|
|
||||||
Parses git trailers and claim frontmatter to identify contributors
|
|
||||||
and their roles. Upserts into contributors table. Refines commit_type
|
|
||||||
from diff content. Pipeline-only PRs (no knowledge files) are skipped.
|
|
||||||
"""
|
|
||||||
import re as _re
|
|
||||||
from datetime import date as _date, datetime as _dt
|
|
||||||
|
|
||||||
today = _date.today().isoformat()
|
|
||||||
|
|
||||||
# Get the PR diff to parse claim frontmatter for attribution blocks
|
|
||||||
diff = await get_pr_diff(pr_number)
|
|
||||||
if not diff:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Pipeline-only PRs (inbox, entities, agents) don't count toward CI
|
|
||||||
if not _is_knowledge_pr(diff):
|
|
||||||
logger.info("PR #%d: pipeline-only commit — skipping CI attribution", pr_number)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Refine commit_type from diff content (branch prefix may be too broad)
|
|
||||||
row = conn.execute("SELECT commit_type FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
|
||||||
branch_type = row["commit_type"] if row and row["commit_type"] else "extract"
|
|
||||||
refined_type = _refine_commit_type(diff, branch_type)
|
|
||||||
if refined_type != branch_type:
|
|
||||||
conn.execute("UPDATE prs SET commit_type = ? WHERE number = ?", (refined_type, pr_number))
|
|
||||||
logger.info("PR #%d: commit_type refined %s → %s", pr_number, branch_type, refined_type)
|
|
||||||
|
|
||||||
# Parse Pentagon-Agent trailer from branch commit messages
|
|
||||||
agents_found: set[str] = set()
|
|
||||||
rc, log_output = await _git(
|
|
||||||
"log", f"origin/main..origin/{branch}", "--format=%b%n%N",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
if rc == 0:
|
|
||||||
for match in _re.finditer(r"Pentagon-Agent:\s*(\S+)\s*<([^>]+)>", log_output):
|
|
||||||
agent_name = match.group(1).lower()
|
|
||||||
agent_uuid = match.group(2)
|
|
||||||
_upsert_contributor(
|
|
||||||
conn, agent_name, agent_uuid, "extractor", today,
|
|
||||||
)
|
|
||||||
agents_found.add(agent_name)
|
|
||||||
|
|
||||||
# Parse attribution blocks from claim frontmatter in diff
|
|
||||||
# Look for added lines with attribution YAML
|
|
||||||
current_role = None
|
|
||||||
for line in diff.split("\n"):
|
|
||||||
if not line.startswith("+") or line.startswith("+++"):
|
|
||||||
continue
|
|
||||||
stripped = line[1:].strip()
|
|
||||||
|
|
||||||
# Detect role sections in attribution block
|
|
||||||
for role in ("sourcer", "extractor", "challenger", "synthesizer", "reviewer"):
|
|
||||||
if stripped.startswith(f"{role}:"):
|
|
||||||
current_role = role
|
|
||||||
break
|
|
||||||
|
|
||||||
# Extract handle from attribution entries
|
|
||||||
handle_match = _re.match(r'-\s*handle:\s*["\']?([^"\']+)["\']?', stripped)
|
|
||||||
if handle_match and current_role:
|
|
||||||
handle = handle_match.group(1).strip().lower()
|
|
||||||
agent_id_match = _re.search(r'agent_id:\s*["\']?([^"\']+)', stripped)
|
|
||||||
agent_id = agent_id_match.group(1).strip() if agent_id_match else None
|
|
||||||
_upsert_contributor(conn, handle, agent_id, current_role, today)
|
|
||||||
|
|
||||||
# Fallback: if no attribution block found, credit the branch agent as extractor
|
|
||||||
if not agents_found:
|
|
||||||
# Try to infer agent from branch name (e.g., "extract/2026-03-05-...")
|
|
||||||
# The PR's agent field in SQLite is also available
|
|
||||||
row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
|
||||||
if row and row["agent"]:
|
|
||||||
_upsert_contributor(conn, row["agent"].lower(), None, "extractor", today)
|
|
||||||
|
|
||||||
# Increment claims_merged for all contributors on this PR
|
|
||||||
# (handled inside _upsert_contributor via the role counts)
|
|
||||||
|
|
||||||
|
|
||||||
def _upsert_contributor(
|
|
||||||
conn, handle: str, agent_id: str | None, role: str, date_str: str,
|
|
||||||
):
|
|
||||||
"""Upsert a contributor record, incrementing the appropriate role count."""
|
|
||||||
import json as _json
|
|
||||||
from datetime import datetime as _dt
|
|
||||||
|
|
||||||
role_col = f"{role}_count"
|
|
||||||
if role_col not in (
|
|
||||||
"sourcer_count", "extractor_count", "challenger_count",
|
|
||||||
"synthesizer_count", "reviewer_count",
|
|
||||||
):
|
|
||||||
logger.warning("Unknown contributor role: %s", role)
|
|
||||||
return
|
|
||||||
|
|
||||||
existing = conn.execute(
|
|
||||||
"SELECT handle FROM contributors WHERE handle = ?", (handle,)
|
|
||||||
).fetchone()
|
|
||||||
|
|
||||||
if existing:
|
|
||||||
conn.execute(
|
|
||||||
f"""UPDATE contributors SET
|
|
||||||
{role_col} = {role_col} + 1,
|
|
||||||
claims_merged = claims_merged + CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END,
|
|
||||||
last_contribution = ?,
|
|
||||||
updated_at = datetime('now')
|
|
||||||
WHERE handle = ?""",
|
|
||||||
(role, date_str, handle),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
conn.execute(
|
|
||||||
f"""INSERT INTO contributors (handle, agent_id, first_contribution, last_contribution, {role_col}, claims_merged)
|
|
||||||
VALUES (?, ?, ?, ?, 1, CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END)""",
|
|
||||||
(handle, agent_id, date_str, date_str, role),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Recalculate tier
|
|
||||||
_recalculate_tier(conn, handle)
|
|
||||||
|
|
||||||
|
|
||||||
def _recalculate_tier(conn, handle: str):
|
|
||||||
"""Recalculate contributor tier based on config rules."""
|
|
||||||
from datetime import date as _date, datetime as _dt
|
|
||||||
|
|
||||||
row = conn.execute(
|
|
||||||
"SELECT claims_merged, challenges_survived, first_contribution, tier FROM contributors WHERE handle = ?",
|
|
||||||
(handle,),
|
|
||||||
).fetchone()
|
|
||||||
if not row:
|
|
||||||
return
|
|
||||||
|
|
||||||
current_tier = row["tier"]
|
|
||||||
claims_merged = row["claims_merged"] or 0
|
|
||||||
challenges_survived = row["challenges_survived"] or 0
|
|
||||||
first_contribution = row["first_contribution"]
|
|
||||||
|
|
||||||
days_since_first = 0
|
|
||||||
if first_contribution:
|
|
||||||
try:
|
|
||||||
first_date = _dt.strptime(first_contribution, "%Y-%m-%d").date()
|
|
||||||
days_since_first = (_date.today() - first_date).days
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Check veteran first (higher tier)
|
|
||||||
vet_rules = config.CONTRIBUTOR_TIER_RULES["veteran"]
|
|
||||||
if (claims_merged >= vet_rules["claims_merged"]
|
|
||||||
and days_since_first >= vet_rules["min_days_since_first"]
|
|
||||||
and challenges_survived >= vet_rules["challenges_survived"]):
|
|
||||||
new_tier = "veteran"
|
|
||||||
elif claims_merged >= config.CONTRIBUTOR_TIER_RULES["contributor"]["claims_merged"]:
|
|
||||||
new_tier = "contributor"
|
|
||||||
else:
|
|
||||||
new_tier = "new"
|
|
||||||
|
|
||||||
if new_tier != current_tier:
|
|
||||||
conn.execute(
|
|
||||||
"UPDATE contributors SET tier = ?, updated_at = datetime('now') WHERE handle = ?",
|
|
||||||
(new_tier, handle),
|
|
||||||
)
|
|
||||||
logger.info("Contributor %s: tier %s → %s", handle, current_tier, new_tier)
|
|
||||||
db.audit(
|
|
||||||
conn, "contributor", "tier_change",
|
|
||||||
json.dumps({"handle": handle, "from": current_tier, "to": new_tier}),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# --- Source archiving after merge (Ganymede review: closes near-duplicate loop) ---
|
# --- Source archiving after merge (Ganymede review: closes near-duplicate loop) ---
|
||||||
|
|
||||||
# Accumulates source moves during a merge cycle, batch-committed at the end
|
# Accumulates source moves during a merge cycle, batch-committed at the end
|
||||||
|
|
@ -1532,7 +1298,7 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
|
||||||
|
|
||||||
# Record contributor attribution
|
# Record contributor attribution
|
||||||
try:
|
try:
|
||||||
await _record_contributor_attribution(conn, pr_num, branch)
|
await record_contributor_attribution(conn, pr_num, branch, _git)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("PR #%d: contributor attribution failed (non-fatal)", pr_num)
|
logger.exception("PR #%d: contributor attribution failed (non-fatal)", pr_num)
|
||||||
|
|
||||||
|
|
|
||||||
263
tests/test_contributor.py
Normal file
263
tests/test_contributor.py
Normal file
|
|
@ -0,0 +1,263 @@
|
||||||
|
"""Tests for lib/contributor.py — contributor attribution functions."""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
sys.modules.setdefault("aiohttp", MagicMock())
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
|
||||||
|
from lib.contributor import (
|
||||||
|
is_knowledge_pr,
|
||||||
|
refine_commit_type,
|
||||||
|
record_contributor_attribution,
|
||||||
|
upsert_contributor,
|
||||||
|
recalculate_tier,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# --- is_knowledge_pr ---
|
||||||
|
|
||||||
|
def test_knowledge_pr_domains():
|
||||||
|
diff = "+++ b/domains/crypto/some-claim.md\n"
|
||||||
|
assert is_knowledge_pr(diff) is True
|
||||||
|
|
||||||
|
def test_knowledge_pr_core():
|
||||||
|
diff = "+++ b/core/epistemology.md\n"
|
||||||
|
assert is_knowledge_pr(diff) is True
|
||||||
|
|
||||||
|
def test_knowledge_pr_foundations():
|
||||||
|
diff = "--- a/foundations/overview.md\n"
|
||||||
|
assert is_knowledge_pr(diff) is True
|
||||||
|
|
||||||
|
def test_knowledge_pr_decisions():
|
||||||
|
diff = "+++ b/decisions/some-decision.md\n"
|
||||||
|
assert is_knowledge_pr(diff) is True
|
||||||
|
|
||||||
|
def test_pipeline_only_pr():
|
||||||
|
diff = "+++ b/inbox/source.md\n+++ b/entities/metadao.md\n"
|
||||||
|
assert is_knowledge_pr(diff) is False
|
||||||
|
|
||||||
|
def test_mixed_pr_counts_as_knowledge():
|
||||||
|
diff = "+++ b/inbox/source.md\n+++ b/domains/crypto/claim.md\n"
|
||||||
|
assert is_knowledge_pr(diff) is True
|
||||||
|
|
||||||
|
def test_empty_diff():
|
||||||
|
assert is_knowledge_pr("") is False
|
||||||
|
|
||||||
|
|
||||||
|
# --- refine_commit_type ---
|
||||||
|
|
||||||
|
def test_refine_non_extract_unchanged():
|
||||||
|
assert refine_commit_type("anything", "research") == "research"
|
||||||
|
assert refine_commit_type("anything", "entity") == "entity"
|
||||||
|
|
||||||
|
def test_refine_extract_new_files():
|
||||||
|
diff = "diff --git a/x b/y\nnew file\n+++ b/domains/crypto/claim.md\n"
|
||||||
|
assert refine_commit_type(diff, "extract") == "extract"
|
||||||
|
|
||||||
|
def test_refine_extract_challenge():
|
||||||
|
diff = "diff --git a/x b/y\n+++ b/domains/crypto/claim.md\n+challenged_by: other\n"
|
||||||
|
assert refine_commit_type(diff, "extract") == "challenge"
|
||||||
|
|
||||||
|
def test_refine_extract_enrich():
|
||||||
|
diff = "diff --git a/x b/y\n+++ b/domains/crypto/claim.md\n+confidence: 0.8\n"
|
||||||
|
assert refine_commit_type(diff, "extract") == "enrich"
|
||||||
|
|
||||||
|
def test_refine_extract_mixed_new_and_modified():
|
||||||
|
diff = (
|
||||||
|
"diff --git a/x b/y\nnew file\n+++ b/domains/crypto/new.md\n"
|
||||||
|
"diff --git a/x b/z\n+++ b/domains/crypto/existing.md\n+foo\n"
|
||||||
|
)
|
||||||
|
assert refine_commit_type(diff, "extract") == "extract"
|
||||||
|
|
||||||
|
|
||||||
|
# --- upsert_contributor + recalculate_tier ---
|
||||||
|
|
||||||
|
def _make_db():
|
||||||
|
conn = sqlite3.connect(":memory:")
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("""CREATE TABLE contributors (
|
||||||
|
handle TEXT PRIMARY KEY,
|
||||||
|
agent_id TEXT,
|
||||||
|
tier TEXT DEFAULT 'new',
|
||||||
|
first_contribution TEXT,
|
||||||
|
last_contribution TEXT,
|
||||||
|
claims_merged INTEGER DEFAULT 0,
|
||||||
|
challenges_survived INTEGER DEFAULT 0,
|
||||||
|
sourcer_count INTEGER DEFAULT 0,
|
||||||
|
extractor_count INTEGER DEFAULT 0,
|
||||||
|
challenger_count INTEGER DEFAULT 0,
|
||||||
|
synthesizer_count INTEGER DEFAULT 0,
|
||||||
|
reviewer_count INTEGER DEFAULT 0,
|
||||||
|
updated_at TEXT
|
||||||
|
)""")
|
||||||
|
conn.execute("""CREATE TABLE audit_log (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
ts TEXT DEFAULT (datetime('now')),
|
||||||
|
stage TEXT,
|
||||||
|
event TEXT,
|
||||||
|
detail TEXT
|
||||||
|
)""")
|
||||||
|
return conn
|
||||||
|
|
||||||
|
def test_upsert_new_contributor():
|
||||||
|
conn = _make_db()
|
||||||
|
with patch("lib.contributor.config") as mock_config:
|
||||||
|
mock_config.CONTRIBUTOR_TIER_RULES = {
|
||||||
|
"veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
|
||||||
|
"contributor": {"claims_merged": 10},
|
||||||
|
}
|
||||||
|
upsert_contributor(conn, "rio", "uuid-123", "extractor", "2026-04-16")
|
||||||
|
row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone()
|
||||||
|
assert row["extractor_count"] == 1
|
||||||
|
assert row["claims_merged"] == 1
|
||||||
|
assert row["tier"] == "new"
|
||||||
|
|
||||||
|
def test_upsert_increment():
|
||||||
|
conn = _make_db()
|
||||||
|
upsert_contributor(conn, "rio", "uuid-123", "extractor", "2026-04-16")
|
||||||
|
upsert_contributor(conn, "rio", "uuid-123", "extractor", "2026-04-17")
|
||||||
|
row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone()
|
||||||
|
assert row["extractor_count"] == 2
|
||||||
|
assert row["claims_merged"] == 2
|
||||||
|
|
||||||
|
def test_upsert_reviewer_no_claim_increment():
|
||||||
|
conn = _make_db()
|
||||||
|
upsert_contributor(conn, "leo", None, "reviewer", "2026-04-16")
|
||||||
|
row = conn.execute("SELECT * FROM contributors WHERE handle = 'leo'").fetchone()
|
||||||
|
assert row["reviewer_count"] == 1
|
||||||
|
assert row["claims_merged"] == 0
|
||||||
|
|
||||||
|
def test_upsert_unknown_role():
|
||||||
|
conn = _make_db()
|
||||||
|
upsert_contributor(conn, "rio", None, "wizard", "2026-04-16")
|
||||||
|
row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone()
|
||||||
|
assert row is None # Should not insert
|
||||||
|
|
||||||
|
def test_recalculate_tier_contributor():
|
||||||
|
conn = _make_db()
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO contributors (handle, claims_merged, challenges_survived, first_contribution, tier)
|
||||||
|
VALUES ('rio', 15, 0, '2026-01-01', 'new')"""
|
||||||
|
)
|
||||||
|
with patch("lib.contributor.config") as mock_config:
|
||||||
|
mock_config.CONTRIBUTOR_TIER_RULES = {
|
||||||
|
"veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
|
||||||
|
"contributor": {"claims_merged": 10},
|
||||||
|
}
|
||||||
|
recalculate_tier(conn, "rio")
|
||||||
|
row = conn.execute("SELECT tier FROM contributors WHERE handle = 'rio'").fetchone()
|
||||||
|
assert row["tier"] == "contributor"
|
||||||
|
|
||||||
|
def test_recalculate_tier_veteran():
|
||||||
|
conn = _make_db()
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO contributors (handle, claims_merged, challenges_survived, first_contribution, tier)
|
||||||
|
VALUES ('rio', 60, 10, '2025-01-01', 'contributor')"""
|
||||||
|
)
|
||||||
|
with patch("lib.contributor.config") as mock_config:
|
||||||
|
mock_config.CONTRIBUTOR_TIER_RULES = {
|
||||||
|
"veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
|
||||||
|
"contributor": {"claims_merged": 10},
|
||||||
|
}
|
||||||
|
recalculate_tier(conn, "rio")
|
||||||
|
row = conn.execute("SELECT tier FROM contributors WHERE handle = 'rio'").fetchone()
|
||||||
|
assert row["tier"] == "veteran"
|
||||||
|
|
||||||
|
|
||||||
|
# --- record_contributor_attribution ---
|
||||||
|
|
||||||
|
def _make_attribution_db():
|
||||||
|
conn = _make_db()
|
||||||
|
conn.execute("""CREATE TABLE prs (
|
||||||
|
number INTEGER PRIMARY KEY,
|
||||||
|
commit_type TEXT,
|
||||||
|
agent TEXT
|
||||||
|
)""")
|
||||||
|
conn.execute("INSERT INTO prs VALUES (100, 'extract', 'rio')")
|
||||||
|
return conn
|
||||||
|
|
||||||
|
def test_record_skips_pipeline_only():
|
||||||
|
conn = _make_attribution_db()
|
||||||
|
mock_diff = "+++ b/inbox/source.md\n"
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
|
||||||
|
git_fn = AsyncMock(return_value=(0, ""))
|
||||||
|
await record_contributor_attribution(conn, 100, "extract/test", git_fn)
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
row = conn.execute("SELECT * FROM contributors").fetchone()
|
||||||
|
assert row is None # No attribution for pipeline-only
|
||||||
|
|
||||||
|
def test_record_fallback_to_pr_agent():
|
||||||
|
conn = _make_attribution_db()
|
||||||
|
mock_diff = "+++ b/domains/crypto/claim.md\n+some content\n"
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
|
||||||
|
git_fn = AsyncMock(return_value=(0, "no trailers here"))
|
||||||
|
with patch("lib.contributor.config") as mock_config:
|
||||||
|
mock_config.CONTRIBUTOR_TIER_RULES = {
|
||||||
|
"veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
|
||||||
|
"contributor": {"claims_merged": 10},
|
||||||
|
}
|
||||||
|
await record_contributor_attribution(conn, 100, "extract/test", git_fn)
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone()
|
||||||
|
assert row is not None
|
||||||
|
assert row["extractor_count"] == 1
|
||||||
|
|
||||||
|
def test_record_parses_pentagon_trailer():
|
||||||
|
conn = _make_attribution_db()
|
||||||
|
mock_diff = "+++ b/domains/crypto/claim.md\n+new file content\n"
|
||||||
|
trailer = "Pentagon-Agent: Theseus <uuid-456>"
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
|
||||||
|
git_fn = AsyncMock(return_value=(0, trailer))
|
||||||
|
with patch("lib.contributor.config") as mock_config:
|
||||||
|
mock_config.CONTRIBUTOR_TIER_RULES = {
|
||||||
|
"veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
|
||||||
|
"contributor": {"claims_merged": 10},
|
||||||
|
}
|
||||||
|
await record_contributor_attribution(conn, 100, "extract/test", git_fn)
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
row = conn.execute("SELECT * FROM contributors WHERE handle = 'theseus'").fetchone()
|
||||||
|
assert row is not None
|
||||||
|
assert row["agent_id"] == "uuid-456"
|
||||||
|
|
||||||
|
def test_record_refines_commit_type():
|
||||||
|
conn = _make_attribution_db()
|
||||||
|
mock_diff = "diff --git a/x b/y\n+++ b/domains/crypto/claim.md\n+challenged_by: foo\n"
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
|
||||||
|
git_fn = AsyncMock(return_value=(0, ""))
|
||||||
|
with patch("lib.contributor.config") as mock_config:
|
||||||
|
mock_config.CONTRIBUTOR_TIER_RULES = {
|
||||||
|
"veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
|
||||||
|
"contributor": {"claims_merged": 10},
|
||||||
|
}
|
||||||
|
await record_contributor_attribution(conn, 100, "extract/test", git_fn)
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
row = conn.execute("SELECT commit_type FROM prs WHERE number = 100").fetchone()
|
||||||
|
assert row["commit_type"] == "challenge"
|
||||||
|
|
||||||
|
def test_record_no_diff_returns_early():
|
||||||
|
conn = _make_attribution_db()
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=None):
|
||||||
|
git_fn = AsyncMock()
|
||||||
|
await record_contributor_attribution(conn, 100, "extract/test", git_fn)
|
||||||
|
git_fn.assert_not_called()
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
Loading…
Reference in a new issue