From 53dc18afd5cadaad5d6027865cb92c7a58ea79e9 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Thu, 16 Apr 2026 13:08:26 +0100 Subject: [PATCH] =?UTF-8?q?Phase=205:=20Extract=20contributor.py=20from=20?= =?UTF-8?q?merge.py=20(=E2=88=92234=20lines)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5 functions extracted: is_knowledge_pr, refine_commit_type, record_contributor_attribution, upsert_contributor, recalculate_tier. git_fn parameter injection avoids circular import (merge→contributor, contributor needs _git from merge). Single call site passes _git. merge.py: 1912 → 1678 lines. 23 new tests, zero regressions. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/contributor.py | 244 +++++++++++++++++++++++++++++++++++ lib/merge.py | 238 +--------------------------------- tests/test_contributor.py | 263 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 509 insertions(+), 236 deletions(-) create mode 100644 lib/contributor.py create mode 100644 tests/test_contributor.py diff --git a/lib/contributor.py b/lib/contributor.py new file mode 100644 index 0000000..5080af2 --- /dev/null +++ b/lib/contributor.py @@ -0,0 +1,244 @@ +"""Contributor attribution — tracks who contributed what and calculates tiers. + +Extracted from merge.py (Phase 5 decomposition). Functions: +- is_knowledge_pr: diff classification (knowledge vs pipeline-only) +- refine_commit_type: extract → challenge/enrich refinement from diff content +- record_contributor_attribution: parse trailers + frontmatter, upsert contributors +- upsert_contributor: insert/update contributor record with role counts +- recalculate_tier: tier promotion based on config rules +""" + +import json +import logging +import re + +from . import config, db +from .forgejo import get_pr_diff + +logger = logging.getLogger("pipeline.contributor") + + +def is_knowledge_pr(diff: str) -> bool: + """Check if a PR touches knowledge files (claims, decisions, core, foundations). + + Knowledge PRs get full CI attribution weight. + Pipeline-only PRs (inbox, entities, agents, archive) get zero CI weight. + + Mixed PRs count as knowledge — if a PR adds a claim, it gets attribution + even if it also moves source files. Knowledge takes priority. (Ganymede review) + """ + knowledge_prefixes = ("domains/", "core/", "foundations/", "decisions/") + + for line in diff.split("\n"): + if line.startswith("+++ b/") or line.startswith("--- a/"): + path = line.split("/", 1)[1] if "/" in line else "" + if any(path.startswith(p) for p in knowledge_prefixes): + return True + + return False + + +def refine_commit_type(diff: str, branch_commit_type: str) -> str: + """Refine commit_type from diff content when branch prefix is ambiguous. + + Branch prefix gives initial classification (extract, research, entity, etc.). + For 'extract' branches, diff content can distinguish: + - challenge: adds challenged_by edges to existing claims + - enrich: modifies existing claim frontmatter without new files + - extract: creates new claim files (default for extract branches) + + Only refines 'extract' type — other branch types (research, entity, reweave, fix) + are already specific enough. + """ + if branch_commit_type != "extract": + return branch_commit_type + + new_files = 0 + modified_files = 0 + has_challenge_edge = False + + in_diff_header = False + current_is_new = False + for line in diff.split("\n"): + if line.startswith("diff --git"): + in_diff_header = True + current_is_new = False + elif line.startswith("new file"): + current_is_new = True + elif line.startswith("+++ b/"): + path = line[6:] + if any(path.startswith(p) for p in ("domains/", "core/", "foundations/")): + if current_is_new: + new_files += 1 + else: + modified_files += 1 + in_diff_header = False + elif line.startswith("+") and not line.startswith("+++"): + if "challenged_by:" in line or "challenges:" in line: + has_challenge_edge = True + + if has_challenge_edge and new_files == 0: + return "challenge" + if modified_files > 0 and new_files == 0: + return "enrich" + return "extract" + + +async def record_contributor_attribution(conn, pr_number: int, branch: str, git_fn): + """Record contributor attribution after a successful merge. + + Parses git trailers and claim frontmatter to identify contributors + and their roles. Upserts into contributors table. Refines commit_type + from diff content. Pipeline-only PRs (no knowledge files) are skipped. + + Args: + git_fn: async callable matching _git signature (for git log parsing). + """ + from datetime import date as _date + + today = _date.today().isoformat() + + # Get the PR diff to parse claim frontmatter for attribution blocks + diff = await get_pr_diff(pr_number) + if not diff: + return + + # Pipeline-only PRs (inbox, entities, agents) don't count toward CI + if not is_knowledge_pr(diff): + logger.info("PR #%d: pipeline-only commit — skipping CI attribution", pr_number) + return + + # Refine commit_type from diff content (branch prefix may be too broad) + row = conn.execute("SELECT commit_type FROM prs WHERE number = ?", (pr_number,)).fetchone() + branch_type = row["commit_type"] if row and row["commit_type"] else "extract" + refined_type = refine_commit_type(diff, branch_type) + if refined_type != branch_type: + conn.execute("UPDATE prs SET commit_type = ? WHERE number = ?", (refined_type, pr_number)) + logger.info("PR #%d: commit_type refined %s → %s", pr_number, branch_type, refined_type) + + # Parse Pentagon-Agent trailer from branch commit messages + agents_found: set[str] = set() + rc, log_output = await git_fn( + "log", f"origin/main..origin/{branch}", "--format=%b%n%N", + timeout=10, + ) + if rc == 0: + for match in re.finditer(r"Pentagon-Agent:\s*(\S+)\s*<([^>]+)>", log_output): + agent_name = match.group(1).lower() + agent_uuid = match.group(2) + upsert_contributor( + conn, agent_name, agent_uuid, "extractor", today, + ) + agents_found.add(agent_name) + + # Parse attribution blocks from claim frontmatter in diff + # Look for added lines with attribution YAML + current_role = None + for line in diff.split("\n"): + if not line.startswith("+") or line.startswith("+++"): + continue + stripped = line[1:].strip() + + # Detect role sections in attribution block + for role in ("sourcer", "extractor", "challenger", "synthesizer", "reviewer"): + if stripped.startswith(f"{role}:"): + current_role = role + break + + # Extract handle from attribution entries + handle_match = re.match(r'-\s*handle:\s*["\']?([^"\']+)["\']?', stripped) + if handle_match and current_role: + handle = handle_match.group(1).strip().lower() + agent_id_match = re.search(r'agent_id:\s*["\']?([^"\']+)', stripped) + agent_id = agent_id_match.group(1).strip() if agent_id_match else None + upsert_contributor(conn, handle, agent_id, current_role, today) + + # Fallback: if no attribution block found, credit the branch agent as extractor + if not agents_found: + row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone() + if row and row["agent"]: + upsert_contributor(conn, row["agent"].lower(), None, "extractor", today) + + +def upsert_contributor( + conn, handle: str, agent_id: str | None, role: str, date_str: str, +): + """Upsert a contributor record, incrementing the appropriate role count.""" + role_col = f"{role}_count" + if role_col not in ( + "sourcer_count", "extractor_count", "challenger_count", + "synthesizer_count", "reviewer_count", + ): + logger.warning("Unknown contributor role: %s", role) + return + + existing = conn.execute( + "SELECT handle FROM contributors WHERE handle = ?", (handle,) + ).fetchone() + + if existing: + conn.execute( + f"""UPDATE contributors SET + {role_col} = {role_col} + 1, + claims_merged = claims_merged + CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END, + last_contribution = ?, + updated_at = datetime('now') + WHERE handle = ?""", + (role, date_str, handle), + ) + else: + conn.execute( + f"""INSERT INTO contributors (handle, agent_id, first_contribution, last_contribution, {role_col}, claims_merged) + VALUES (?, ?, ?, ?, 1, CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END)""", + (handle, agent_id, date_str, date_str, role), + ) + + # Recalculate tier + recalculate_tier(conn, handle) + + +def recalculate_tier(conn, handle: str): + """Recalculate contributor tier based on config rules.""" + from datetime import date as _date, datetime as _dt + + row = conn.execute( + "SELECT claims_merged, challenges_survived, first_contribution, tier FROM contributors WHERE handle = ?", + (handle,), + ).fetchone() + if not row: + return + + current_tier = row["tier"] + claims_merged = row["claims_merged"] or 0 + challenges_survived = row["challenges_survived"] or 0 + first_contribution = row["first_contribution"] + + days_since_first = 0 + if first_contribution: + try: + first_date = _dt.strptime(first_contribution, "%Y-%m-%d").date() + days_since_first = (_date.today() - first_date).days + except ValueError: + pass + + # Check veteran first (higher tier) + vet_rules = config.CONTRIBUTOR_TIER_RULES["veteran"] + if (claims_merged >= vet_rules["claims_merged"] + and days_since_first >= vet_rules["min_days_since_first"] + and challenges_survived >= vet_rules["challenges_survived"]): + new_tier = "veteran" + elif claims_merged >= config.CONTRIBUTOR_TIER_RULES["contributor"]["claims_merged"]: + new_tier = "contributor" + else: + new_tier = "new" + + if new_tier != current_tier: + conn.execute( + "UPDATE contributors SET tier = ?, updated_at = datetime('now') WHERE handle = ?", + (new_tier, handle), + ) + logger.info("Contributor %s: tier %s → %s", handle, current_tier, new_tier) + db.audit( + conn, "contributor", "tier_change", + json.dumps({"handle": handle, "from": current_tier, "to": new_tier}), + ) diff --git a/lib/merge.py b/lib/merge.py index 6c92ff1..bbfbdcc 100644 --- a/lib/merge.py +++ b/lib/merge.py @@ -21,6 +21,7 @@ from collections import defaultdict from . import config, db from .db import classify_branch +from .contributor import record_contributor_attribution from .dedup import dedup_evidence_blocks from .domains import detect_domain_from_branch from .forgejo import api as forgejo_api @@ -800,241 +801,6 @@ async def _delete_remote_branch(branch: str): logger.warning("Failed to delete remote branch %s — cosmetic, continuing", branch) -# --- Contributor attribution --- - - -def _is_knowledge_pr(diff: str) -> bool: - """Check if a PR touches knowledge files (claims, decisions, core, foundations). - - Knowledge PRs get full CI attribution weight. - Pipeline-only PRs (inbox, entities, agents, archive) get zero CI weight. - - Mixed PRs count as knowledge — if a PR adds a claim, it gets attribution - even if it also moves source files. Knowledge takes priority. (Ganymede review) - """ - knowledge_prefixes = ("domains/", "core/", "foundations/", "decisions/") - - for line in diff.split("\n"): - if line.startswith("+++ b/") or line.startswith("--- a/"): - path = line.split("/", 1)[1] if "/" in line else "" - if any(path.startswith(p) for p in knowledge_prefixes): - return True - - return False - - -def _refine_commit_type(diff: str, branch_commit_type: str) -> str: - """Refine commit_type from diff content when branch prefix is ambiguous. - - Branch prefix gives initial classification (extract, research, entity, etc.). - For 'extract' branches, diff content can distinguish: - - challenge: adds challenged_by edges to existing claims - - enrich: modifies existing claim frontmatter without new files - - extract: creates new claim files (default for extract branches) - - Only refines 'extract' type — other branch types (research, entity, reweave, fix) - are already specific enough. - """ - if branch_commit_type != "extract": - return branch_commit_type - - new_files = 0 - modified_files = 0 - has_challenge_edge = False - - in_diff_header = False - current_is_new = False - for line in diff.split("\n"): - if line.startswith("diff --git"): - in_diff_header = True - current_is_new = False - elif line.startswith("new file"): - current_is_new = True - elif line.startswith("+++ b/"): - path = line[6:] - if any(path.startswith(p) for p in ("domains/", "core/", "foundations/")): - if current_is_new: - new_files += 1 - else: - modified_files += 1 - in_diff_header = False - elif line.startswith("+") and not line.startswith("+++"): - if "challenged_by:" in line or "challenges:" in line: - has_challenge_edge = True - - if has_challenge_edge and new_files == 0: - return "challenge" - if modified_files > 0 and new_files == 0: - return "enrich" - return "extract" - - -async def _record_contributor_attribution(conn, pr_number: int, branch: str): - """Record contributor attribution after a successful merge. - - Parses git trailers and claim frontmatter to identify contributors - and their roles. Upserts into contributors table. Refines commit_type - from diff content. Pipeline-only PRs (no knowledge files) are skipped. - """ - import re as _re - from datetime import date as _date, datetime as _dt - - today = _date.today().isoformat() - - # Get the PR diff to parse claim frontmatter for attribution blocks - diff = await get_pr_diff(pr_number) - if not diff: - return - - # Pipeline-only PRs (inbox, entities, agents) don't count toward CI - if not _is_knowledge_pr(diff): - logger.info("PR #%d: pipeline-only commit — skipping CI attribution", pr_number) - return - - # Refine commit_type from diff content (branch prefix may be too broad) - row = conn.execute("SELECT commit_type FROM prs WHERE number = ?", (pr_number,)).fetchone() - branch_type = row["commit_type"] if row and row["commit_type"] else "extract" - refined_type = _refine_commit_type(diff, branch_type) - if refined_type != branch_type: - conn.execute("UPDATE prs SET commit_type = ? WHERE number = ?", (refined_type, pr_number)) - logger.info("PR #%d: commit_type refined %s → %s", pr_number, branch_type, refined_type) - - # Parse Pentagon-Agent trailer from branch commit messages - agents_found: set[str] = set() - rc, log_output = await _git( - "log", f"origin/main..origin/{branch}", "--format=%b%n%N", - timeout=10, - ) - if rc == 0: - for match in _re.finditer(r"Pentagon-Agent:\s*(\S+)\s*<([^>]+)>", log_output): - agent_name = match.group(1).lower() - agent_uuid = match.group(2) - _upsert_contributor( - conn, agent_name, agent_uuid, "extractor", today, - ) - agents_found.add(agent_name) - - # Parse attribution blocks from claim frontmatter in diff - # Look for added lines with attribution YAML - current_role = None - for line in diff.split("\n"): - if not line.startswith("+") or line.startswith("+++"): - continue - stripped = line[1:].strip() - - # Detect role sections in attribution block - for role in ("sourcer", "extractor", "challenger", "synthesizer", "reviewer"): - if stripped.startswith(f"{role}:"): - current_role = role - break - - # Extract handle from attribution entries - handle_match = _re.match(r'-\s*handle:\s*["\']?([^"\']+)["\']?', stripped) - if handle_match and current_role: - handle = handle_match.group(1).strip().lower() - agent_id_match = _re.search(r'agent_id:\s*["\']?([^"\']+)', stripped) - agent_id = agent_id_match.group(1).strip() if agent_id_match else None - _upsert_contributor(conn, handle, agent_id, current_role, today) - - # Fallback: if no attribution block found, credit the branch agent as extractor - if not agents_found: - # Try to infer agent from branch name (e.g., "extract/2026-03-05-...") - # The PR's agent field in SQLite is also available - row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone() - if row and row["agent"]: - _upsert_contributor(conn, row["agent"].lower(), None, "extractor", today) - - # Increment claims_merged for all contributors on this PR - # (handled inside _upsert_contributor via the role counts) - - -def _upsert_contributor( - conn, handle: str, agent_id: str | None, role: str, date_str: str, -): - """Upsert a contributor record, incrementing the appropriate role count.""" - import json as _json - from datetime import datetime as _dt - - role_col = f"{role}_count" - if role_col not in ( - "sourcer_count", "extractor_count", "challenger_count", - "synthesizer_count", "reviewer_count", - ): - logger.warning("Unknown contributor role: %s", role) - return - - existing = conn.execute( - "SELECT handle FROM contributors WHERE handle = ?", (handle,) - ).fetchone() - - if existing: - conn.execute( - f"""UPDATE contributors SET - {role_col} = {role_col} + 1, - claims_merged = claims_merged + CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END, - last_contribution = ?, - updated_at = datetime('now') - WHERE handle = ?""", - (role, date_str, handle), - ) - else: - conn.execute( - f"""INSERT INTO contributors (handle, agent_id, first_contribution, last_contribution, {role_col}, claims_merged) - VALUES (?, ?, ?, ?, 1, CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END)""", - (handle, agent_id, date_str, date_str, role), - ) - - # Recalculate tier - _recalculate_tier(conn, handle) - - -def _recalculate_tier(conn, handle: str): - """Recalculate contributor tier based on config rules.""" - from datetime import date as _date, datetime as _dt - - row = conn.execute( - "SELECT claims_merged, challenges_survived, first_contribution, tier FROM contributors WHERE handle = ?", - (handle,), - ).fetchone() - if not row: - return - - current_tier = row["tier"] - claims_merged = row["claims_merged"] or 0 - challenges_survived = row["challenges_survived"] or 0 - first_contribution = row["first_contribution"] - - days_since_first = 0 - if first_contribution: - try: - first_date = _dt.strptime(first_contribution, "%Y-%m-%d").date() - days_since_first = (_date.today() - first_date).days - except ValueError: - pass - - # Check veteran first (higher tier) - vet_rules = config.CONTRIBUTOR_TIER_RULES["veteran"] - if (claims_merged >= vet_rules["claims_merged"] - and days_since_first >= vet_rules["min_days_since_first"] - and challenges_survived >= vet_rules["challenges_survived"]): - new_tier = "veteran" - elif claims_merged >= config.CONTRIBUTOR_TIER_RULES["contributor"]["claims_merged"]: - new_tier = "contributor" - else: - new_tier = "new" - - if new_tier != current_tier: - conn.execute( - "UPDATE contributors SET tier = ?, updated_at = datetime('now') WHERE handle = ?", - (new_tier, handle), - ) - logger.info("Contributor %s: tier %s → %s", handle, current_tier, new_tier) - db.audit( - conn, "contributor", "tier_change", - json.dumps({"handle": handle, "from": current_tier, "to": new_tier}), - ) - - # --- Source archiving after merge (Ganymede review: closes near-duplicate loop) --- # Accumulates source moves during a merge cycle, batch-committed at the end @@ -1532,7 +1298,7 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]: # Record contributor attribution try: - await _record_contributor_attribution(conn, pr_num, branch) + await record_contributor_attribution(conn, pr_num, branch, _git) except Exception: logger.exception("PR #%d: contributor attribution failed (non-fatal)", pr_num) diff --git a/tests/test_contributor.py b/tests/test_contributor.py new file mode 100644 index 0000000..7de0463 --- /dev/null +++ b/tests/test_contributor.py @@ -0,0 +1,263 @@ +"""Tests for lib/contributor.py — contributor attribution functions.""" + +import sqlite3 +import asyncio +import sys +import os +from unittest.mock import AsyncMock, MagicMock, patch + +sys.modules.setdefault("aiohttp", MagicMock()) + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from lib.contributor import ( + is_knowledge_pr, + refine_commit_type, + record_contributor_attribution, + upsert_contributor, + recalculate_tier, +) + + +# --- is_knowledge_pr --- + +def test_knowledge_pr_domains(): + diff = "+++ b/domains/crypto/some-claim.md\n" + assert is_knowledge_pr(diff) is True + +def test_knowledge_pr_core(): + diff = "+++ b/core/epistemology.md\n" + assert is_knowledge_pr(diff) is True + +def test_knowledge_pr_foundations(): + diff = "--- a/foundations/overview.md\n" + assert is_knowledge_pr(diff) is True + +def test_knowledge_pr_decisions(): + diff = "+++ b/decisions/some-decision.md\n" + assert is_knowledge_pr(diff) is True + +def test_pipeline_only_pr(): + diff = "+++ b/inbox/source.md\n+++ b/entities/metadao.md\n" + assert is_knowledge_pr(diff) is False + +def test_mixed_pr_counts_as_knowledge(): + diff = "+++ b/inbox/source.md\n+++ b/domains/crypto/claim.md\n" + assert is_knowledge_pr(diff) is True + +def test_empty_diff(): + assert is_knowledge_pr("") is False + + +# --- refine_commit_type --- + +def test_refine_non_extract_unchanged(): + assert refine_commit_type("anything", "research") == "research" + assert refine_commit_type("anything", "entity") == "entity" + +def test_refine_extract_new_files(): + diff = "diff --git a/x b/y\nnew file\n+++ b/domains/crypto/claim.md\n" + assert refine_commit_type(diff, "extract") == "extract" + +def test_refine_extract_challenge(): + diff = "diff --git a/x b/y\n+++ b/domains/crypto/claim.md\n+challenged_by: other\n" + assert refine_commit_type(diff, "extract") == "challenge" + +def test_refine_extract_enrich(): + diff = "diff --git a/x b/y\n+++ b/domains/crypto/claim.md\n+confidence: 0.8\n" + assert refine_commit_type(diff, "extract") == "enrich" + +def test_refine_extract_mixed_new_and_modified(): + diff = ( + "diff --git a/x b/y\nnew file\n+++ b/domains/crypto/new.md\n" + "diff --git a/x b/z\n+++ b/domains/crypto/existing.md\n+foo\n" + ) + assert refine_commit_type(diff, "extract") == "extract" + + +# --- upsert_contributor + recalculate_tier --- + +def _make_db(): + conn = sqlite3.connect(":memory:") + conn.row_factory = sqlite3.Row + conn.execute("""CREATE TABLE contributors ( + handle TEXT PRIMARY KEY, + agent_id TEXT, + tier TEXT DEFAULT 'new', + first_contribution TEXT, + last_contribution TEXT, + claims_merged INTEGER DEFAULT 0, + challenges_survived INTEGER DEFAULT 0, + sourcer_count INTEGER DEFAULT 0, + extractor_count INTEGER DEFAULT 0, + challenger_count INTEGER DEFAULT 0, + synthesizer_count INTEGER DEFAULT 0, + reviewer_count INTEGER DEFAULT 0, + updated_at TEXT + )""") + conn.execute("""CREATE TABLE audit_log ( + id INTEGER PRIMARY KEY, + ts TEXT DEFAULT (datetime('now')), + stage TEXT, + event TEXT, + detail TEXT + )""") + return conn + +def test_upsert_new_contributor(): + conn = _make_db() + with patch("lib.contributor.config") as mock_config: + mock_config.CONTRIBUTOR_TIER_RULES = { + "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5}, + "contributor": {"claims_merged": 10}, + } + upsert_contributor(conn, "rio", "uuid-123", "extractor", "2026-04-16") + row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone() + assert row["extractor_count"] == 1 + assert row["claims_merged"] == 1 + assert row["tier"] == "new" + +def test_upsert_increment(): + conn = _make_db() + upsert_contributor(conn, "rio", "uuid-123", "extractor", "2026-04-16") + upsert_contributor(conn, "rio", "uuid-123", "extractor", "2026-04-17") + row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone() + assert row["extractor_count"] == 2 + assert row["claims_merged"] == 2 + +def test_upsert_reviewer_no_claim_increment(): + conn = _make_db() + upsert_contributor(conn, "leo", None, "reviewer", "2026-04-16") + row = conn.execute("SELECT * FROM contributors WHERE handle = 'leo'").fetchone() + assert row["reviewer_count"] == 1 + assert row["claims_merged"] == 0 + +def test_upsert_unknown_role(): + conn = _make_db() + upsert_contributor(conn, "rio", None, "wizard", "2026-04-16") + row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone() + assert row is None # Should not insert + +def test_recalculate_tier_contributor(): + conn = _make_db() + conn.execute( + """INSERT INTO contributors (handle, claims_merged, challenges_survived, first_contribution, tier) + VALUES ('rio', 15, 0, '2026-01-01', 'new')""" + ) + with patch("lib.contributor.config") as mock_config: + mock_config.CONTRIBUTOR_TIER_RULES = { + "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5}, + "contributor": {"claims_merged": 10}, + } + recalculate_tier(conn, "rio") + row = conn.execute("SELECT tier FROM contributors WHERE handle = 'rio'").fetchone() + assert row["tier"] == "contributor" + +def test_recalculate_tier_veteran(): + conn = _make_db() + conn.execute( + """INSERT INTO contributors (handle, claims_merged, challenges_survived, first_contribution, tier) + VALUES ('rio', 60, 10, '2025-01-01', 'contributor')""" + ) + with patch("lib.contributor.config") as mock_config: + mock_config.CONTRIBUTOR_TIER_RULES = { + "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5}, + "contributor": {"claims_merged": 10}, + } + recalculate_tier(conn, "rio") + row = conn.execute("SELECT tier FROM contributors WHERE handle = 'rio'").fetchone() + assert row["tier"] == "veteran" + + +# --- record_contributor_attribution --- + +def _make_attribution_db(): + conn = _make_db() + conn.execute("""CREATE TABLE prs ( + number INTEGER PRIMARY KEY, + commit_type TEXT, + agent TEXT + )""") + conn.execute("INSERT INTO prs VALUES (100, 'extract', 'rio')") + return conn + +def test_record_skips_pipeline_only(): + conn = _make_attribution_db() + mock_diff = "+++ b/inbox/source.md\n" + + async def run(): + with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff): + git_fn = AsyncMock(return_value=(0, "")) + await record_contributor_attribution(conn, 100, "extract/test", git_fn) + + asyncio.run(run()) + row = conn.execute("SELECT * FROM contributors").fetchone() + assert row is None # No attribution for pipeline-only + +def test_record_fallback_to_pr_agent(): + conn = _make_attribution_db() + mock_diff = "+++ b/domains/crypto/claim.md\n+some content\n" + + async def run(): + with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff): + git_fn = AsyncMock(return_value=(0, "no trailers here")) + with patch("lib.contributor.config") as mock_config: + mock_config.CONTRIBUTOR_TIER_RULES = { + "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5}, + "contributor": {"claims_merged": 10}, + } + await record_contributor_attribution(conn, 100, "extract/test", git_fn) + + asyncio.run(run()) + row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone() + assert row is not None + assert row["extractor_count"] == 1 + +def test_record_parses_pentagon_trailer(): + conn = _make_attribution_db() + mock_diff = "+++ b/domains/crypto/claim.md\n+new file content\n" + trailer = "Pentagon-Agent: Theseus " + + async def run(): + with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff): + git_fn = AsyncMock(return_value=(0, trailer)) + with patch("lib.contributor.config") as mock_config: + mock_config.CONTRIBUTOR_TIER_RULES = { + "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5}, + "contributor": {"claims_merged": 10}, + } + await record_contributor_attribution(conn, 100, "extract/test", git_fn) + + asyncio.run(run()) + row = conn.execute("SELECT * FROM contributors WHERE handle = 'theseus'").fetchone() + assert row is not None + assert row["agent_id"] == "uuid-456" + +def test_record_refines_commit_type(): + conn = _make_attribution_db() + mock_diff = "diff --git a/x b/y\n+++ b/domains/crypto/claim.md\n+challenged_by: foo\n" + + async def run(): + with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff): + git_fn = AsyncMock(return_value=(0, "")) + with patch("lib.contributor.config") as mock_config: + mock_config.CONTRIBUTOR_TIER_RULES = { + "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5}, + "contributor": {"claims_merged": 10}, + } + await record_contributor_attribution(conn, 100, "extract/test", git_fn) + + asyncio.run(run()) + row = conn.execute("SELECT commit_type FROM prs WHERE number = 100").fetchone() + assert row["commit_type"] == "challenge" + +def test_record_no_diff_returns_early(): + conn = _make_attribution_db() + + async def run(): + with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=None): + git_fn = AsyncMock() + await record_contributor_attribution(conn, 100, "extract/test", git_fn) + git_fn.assert_not_called() + + asyncio.run(run())