From 53dc18afd5cadaad5d6027865cb92c7a58ea79e9 Mon Sep 17 00:00:00 2001
From: m3taversal <m3taversal@gmail.com>
Date: Thu, 16 Apr 2026 13:08:26 +0100
Subject: [PATCH] =?UTF-8?q?Phase=205:=20Extract=20contributor.py=20from=20?=
 =?UTF-8?q?merge.py=20(=E2=88=92234=20lines)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

5 functions extracted: is_knowledge_pr, refine_commit_type,
record_contributor_attribution, upsert_contributor, recalculate_tier.

git_fn parameter injection avoids circular import (merge→contributor,
contributor needs _git from merge). Single call site passes _git.

merge.py: 1912 → 1678 lines. 23 new tests, zero regressions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 lib/contributor.py        | 244 +++++++++++++++++++++++++++++++++++
 lib/merge.py              | 238 +---------------------------------
 tests/test_contributor.py | 263 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 509 insertions(+), 236 deletions(-)
 create mode 100644 lib/contributor.py
 create mode 100644 tests/test_contributor.py

diff --git a/lib/contributor.py b/lib/contributor.py
new file mode 100644
index 0000000..5080af2
--- /dev/null
+++ b/lib/contributor.py
@@ -0,0 +1,244 @@
+"""Contributor attribution — tracks who contributed what and calculates tiers.
+
+Extracted from merge.py (Phase 5 decomposition). Functions:
+- is_knowledge_pr: diff classification (knowledge vs pipeline-only)
+- refine_commit_type: extract → challenge/enrich refinement from diff content
+- record_contributor_attribution: parse trailers + frontmatter, upsert contributors
+- upsert_contributor: insert/update contributor record with role counts
+- recalculate_tier: tier promotion based on config rules
+"""
+
+import json
+import logging
+import re
+
+from . import config, db
+from .forgejo import get_pr_diff
+
+logger = logging.getLogger("pipeline.contributor")
+
+
+def is_knowledge_pr(diff: str) -> bool:
+    """Check if a PR touches knowledge files (claims, decisions, core, foundations).
+
+    Knowledge PRs get full CI attribution weight.
+    Pipeline-only PRs (inbox, entities, agents, archive) get zero CI weight.
+
+    Mixed PRs count as knowledge — if a PR adds a claim, it gets attribution
+    even if it also moves source files. Knowledge takes priority. (Ganymede review)
+    """
+    knowledge_prefixes = ("domains/", "core/", "foundations/", "decisions/")
+
+    for line in diff.split("\n"):
+        if line.startswith("+++ b/") or line.startswith("--- a/"):
+            path = line.split("/", 1)[1] if "/" in line else ""
+            if any(path.startswith(p) for p in knowledge_prefixes):
+                return True
+
+    return False
+
+
+def refine_commit_type(diff: str, branch_commit_type: str) -> str:
+    """Refine commit_type from diff content when branch prefix is ambiguous.
+
+    Branch prefix gives initial classification (extract, research, entity, etc.).
+    For 'extract' branches, diff content can distinguish:
+    - challenge: adds challenged_by edges to existing claims
+    - enrich: modifies existing claim frontmatter without new files
+    - extract: creates new claim files (default for extract branches)
+
+    Only refines 'extract' type — other branch types (research, entity, reweave, fix)
+    are already specific enough.
+    """
+    if branch_commit_type != "extract":
+        return branch_commit_type
+
+    new_files = 0
+    modified_files = 0
+    has_challenge_edge = False
+
+    in_diff_header = False
+    current_is_new = False
+    for line in diff.split("\n"):
+        if line.startswith("diff --git"):
+            in_diff_header = True
+            current_is_new = False
+        elif line.startswith("new file"):
+            current_is_new = True
+        elif line.startswith("+++ b/"):
+            path = line[6:]
+            if any(path.startswith(p) for p in ("domains/", "core/", "foundations/")):
+                if current_is_new:
+                    new_files += 1
+                else:
+                    modified_files += 1
+            in_diff_header = False
+        elif line.startswith("+") and not line.startswith("+++"):
+            if "challenged_by:" in line or "challenges:" in line:
+                has_challenge_edge = True
+
+    if has_challenge_edge and new_files == 0:
+        return "challenge"
+    if modified_files > 0 and new_files == 0:
+        return "enrich"
+    return "extract"
+
+
+async def record_contributor_attribution(conn, pr_number: int, branch: str, git_fn):
+    """Record contributor attribution after a successful merge.
+
+    Parses git trailers and claim frontmatter to identify contributors
+    and their roles. Upserts into contributors table. Refines commit_type
+    from diff content. Pipeline-only PRs (no knowledge files) are skipped.
+
+    Args:
+        git_fn: async callable matching _git signature (for git log parsing).
+    """
+    from datetime import date as _date
+
+    today = _date.today().isoformat()
+
+    # Get the PR diff to parse claim frontmatter for attribution blocks
+    diff = await get_pr_diff(pr_number)
+    if not diff:
+        return
+
+    # Pipeline-only PRs (inbox, entities, agents) don't count toward CI
+    if not is_knowledge_pr(diff):
+        logger.info("PR #%d: pipeline-only commit — skipping CI attribution", pr_number)
+        return
+
+    # Refine commit_type from diff content (branch prefix may be too broad)
+    row = conn.execute("SELECT commit_type FROM prs WHERE number = ?", (pr_number,)).fetchone()
+    branch_type = row["commit_type"] if row and row["commit_type"] else "extract"
+    refined_type = refine_commit_type(diff, branch_type)
+    if refined_type != branch_type:
+        conn.execute("UPDATE prs SET commit_type = ? WHERE number = ?", (refined_type, pr_number))
+        logger.info("PR #%d: commit_type refined %s → %s", pr_number, branch_type, refined_type)
+
+    # Parse Pentagon-Agent trailer from branch commit messages
+    agents_found: set[str] = set()
+    rc, log_output = await git_fn(
+        "log", f"origin/main..origin/{branch}", "--format=%b%n%N",
+        timeout=10,
+    )
+    if rc == 0:
+        for match in re.finditer(r"Pentagon-Agent:\s*(\S+)\s*<([^>]+)>", log_output):
+            agent_name = match.group(1).lower()
+            agent_uuid = match.group(2)
+            upsert_contributor(
+                conn, agent_name, agent_uuid, "extractor", today,
+            )
+            agents_found.add(agent_name)
+
+    # Parse attribution blocks from claim frontmatter in diff
+    # Look for added lines with attribution YAML
+    current_role = None
+    for line in diff.split("\n"):
+        if not line.startswith("+") or line.startswith("+++"):
+            continue
+        stripped = line[1:].strip()
+
+        # Detect role sections in attribution block
+        for role in ("sourcer", "extractor", "challenger", "synthesizer", "reviewer"):
+            if stripped.startswith(f"{role}:"):
+                current_role = role
+                break
+
+        # Extract handle from attribution entries
+        handle_match = re.match(r'-\s*handle:\s*["\']?([^"\']+)["\']?', stripped)
+        if handle_match and current_role:
+            handle = handle_match.group(1).strip().lower()
+            agent_id_match = re.search(r'agent_id:\s*["\']?([^"\']+)', stripped)
+            agent_id = agent_id_match.group(1).strip() if agent_id_match else None
+            upsert_contributor(conn, handle, agent_id, current_role, today)
+
+    # Fallback: if no attribution block found, credit the branch agent as extractor
+    if not agents_found:
+        row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone()
+        if row and row["agent"]:
+            upsert_contributor(conn, row["agent"].lower(), None, "extractor", today)
+
+
+def upsert_contributor(
+    conn, handle: str, agent_id: str | None, role: str, date_str: str,
+):
+    """Upsert a contributor record, incrementing the appropriate role count."""
+    role_col = f"{role}_count"
+    if role_col not in (
+        "sourcer_count", "extractor_count", "challenger_count",
+        "synthesizer_count", "reviewer_count",
+    ):
+        logger.warning("Unknown contributor role: %s", role)
+        return
+
+    existing = conn.execute(
+        "SELECT handle FROM contributors WHERE handle = ?", (handle,)
+    ).fetchone()
+
+    if existing:
+        conn.execute(
+            f"""UPDATE contributors SET
+                {role_col} = {role_col} + 1,
+                claims_merged = claims_merged + CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END,
+                last_contribution = ?,
+                updated_at = datetime('now')
+            WHERE handle = ?""",
+            (role, date_str, handle),
+        )
+    else:
+        conn.execute(
+            f"""INSERT INTO contributors (handle, agent_id, first_contribution, last_contribution, {role_col}, claims_merged)
+            VALUES (?, ?, ?, ?, 1, CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END)""",
+            (handle, agent_id, date_str, date_str, role),
+        )
+
+    # Recalculate tier
+    recalculate_tier(conn, handle)
+
+
+def recalculate_tier(conn, handle: str):
+    """Recalculate contributor tier based on config rules."""
+    from datetime import date as _date, datetime as _dt
+
+    row = conn.execute(
+        "SELECT claims_merged, challenges_survived, first_contribution, tier FROM contributors WHERE handle = ?",
+        (handle,),
+    ).fetchone()
+    if not row:
+        return
+
+    current_tier = row["tier"]
+    claims_merged = row["claims_merged"] or 0
+    challenges_survived = row["challenges_survived"] or 0
+    first_contribution = row["first_contribution"]
+
+    days_since_first = 0
+    if first_contribution:
+        try:
+            first_date = _dt.strptime(first_contribution, "%Y-%m-%d").date()
+            days_since_first = (_date.today() - first_date).days
+        except ValueError:
+            pass
+
+    # Check veteran first (higher tier)
+    vet_rules = config.CONTRIBUTOR_TIER_RULES["veteran"]
+    if (claims_merged >= vet_rules["claims_merged"]
+            and days_since_first >= vet_rules["min_days_since_first"]
+            and challenges_survived >= vet_rules["challenges_survived"]):
+        new_tier = "veteran"
+    elif claims_merged >= config.CONTRIBUTOR_TIER_RULES["contributor"]["claims_merged"]:
+        new_tier = "contributor"
+    else:
+        new_tier = "new"
+
+    if new_tier != current_tier:
+        conn.execute(
+            "UPDATE contributors SET tier = ?, updated_at = datetime('now') WHERE handle = ?",
+            (new_tier, handle),
+        )
+        logger.info("Contributor %s: tier %s → %s", handle, current_tier, new_tier)
+        db.audit(
+            conn, "contributor", "tier_change",
+            json.dumps({"handle": handle, "from": current_tier, "to": new_tier}),
+        )
diff --git a/lib/merge.py b/lib/merge.py
index 6c92ff1..bbfbdcc 100644
--- a/lib/merge.py
+++ b/lib/merge.py
@@ -21,6 +21,7 @@ from collections import defaultdict
 
 from . import config, db
 from .db import classify_branch
+from .contributor import record_contributor_attribution
 from .dedup import dedup_evidence_blocks
 from .domains import detect_domain_from_branch
 from .forgejo import api as forgejo_api
@@ -800,241 +801,6 @@ async def _delete_remote_branch(branch: str):
         logger.warning("Failed to delete remote branch %s — cosmetic, continuing", branch)
 
 
-# --- Contributor attribution ---
-
-
-def _is_knowledge_pr(diff: str) -> bool:
-    """Check if a PR touches knowledge files (claims, decisions, core, foundations).
-
-    Knowledge PRs get full CI attribution weight.
-    Pipeline-only PRs (inbox, entities, agents, archive) get zero CI weight.
-
-    Mixed PRs count as knowledge — if a PR adds a claim, it gets attribution
-    even if it also moves source files. Knowledge takes priority. (Ganymede review)
-    """
-    knowledge_prefixes = ("domains/", "core/", "foundations/", "decisions/")
-
-    for line in diff.split("\n"):
-        if line.startswith("+++ b/") or line.startswith("--- a/"):
-            path = line.split("/", 1)[1] if "/" in line else ""
-            if any(path.startswith(p) for p in knowledge_prefixes):
-                return True
-
-    return False
-
-
-def _refine_commit_type(diff: str, branch_commit_type: str) -> str:
-    """Refine commit_type from diff content when branch prefix is ambiguous.
-
-    Branch prefix gives initial classification (extract, research, entity, etc.).
-    For 'extract' branches, diff content can distinguish:
-    - challenge: adds challenged_by edges to existing claims
-    - enrich: modifies existing claim frontmatter without new files
-    - extract: creates new claim files (default for extract branches)
-
-    Only refines 'extract' type — other branch types (research, entity, reweave, fix)
-    are already specific enough.
-    """
-    if branch_commit_type != "extract":
-        return branch_commit_type
-
-    new_files = 0
-    modified_files = 0
-    has_challenge_edge = False
-
-    in_diff_header = False
-    current_is_new = False
-    for line in diff.split("\n"):
-        if line.startswith("diff --git"):
-            in_diff_header = True
-            current_is_new = False
-        elif line.startswith("new file"):
-            current_is_new = True
-        elif line.startswith("+++ b/"):
-            path = line[6:]
-            if any(path.startswith(p) for p in ("domains/", "core/", "foundations/")):
-                if current_is_new:
-                    new_files += 1
-                else:
-                    modified_files += 1
-            in_diff_header = False
-        elif line.startswith("+") and not line.startswith("+++"):
-            if "challenged_by:" in line or "challenges:" in line:
-                has_challenge_edge = True
-
-    if has_challenge_edge and new_files == 0:
-        return "challenge"
-    if modified_files > 0 and new_files == 0:
-        return "enrich"
-    return "extract"
-
-
-async def _record_contributor_attribution(conn, pr_number: int, branch: str):
-    """Record contributor attribution after a successful merge.
-
-    Parses git trailers and claim frontmatter to identify contributors
-    and their roles. Upserts into contributors table. Refines commit_type
-    from diff content. Pipeline-only PRs (no knowledge files) are skipped.
-    """
-    import re as _re
-    from datetime import date as _date, datetime as _dt
-
-    today = _date.today().isoformat()
-
-    # Get the PR diff to parse claim frontmatter for attribution blocks
-    diff = await get_pr_diff(pr_number)
-    if not diff:
-        return
-
-    # Pipeline-only PRs (inbox, entities, agents) don't count toward CI
-    if not _is_knowledge_pr(diff):
-        logger.info("PR #%d: pipeline-only commit — skipping CI attribution", pr_number)
-        return
-
-    # Refine commit_type from diff content (branch prefix may be too broad)
-    row = conn.execute("SELECT commit_type FROM prs WHERE number = ?", (pr_number,)).fetchone()
-    branch_type = row["commit_type"] if row and row["commit_type"] else "extract"
-    refined_type = _refine_commit_type(diff, branch_type)
-    if refined_type != branch_type:
-        conn.execute("UPDATE prs SET commit_type = ? WHERE number = ?", (refined_type, pr_number))
-        logger.info("PR #%d: commit_type refined %s → %s", pr_number, branch_type, refined_type)
-
-    # Parse Pentagon-Agent trailer from branch commit messages
-    agents_found: set[str] = set()
-    rc, log_output = await _git(
-        "log", f"origin/main..origin/{branch}", "--format=%b%n%N",
-        timeout=10,
-    )
-    if rc == 0:
-        for match in _re.finditer(r"Pentagon-Agent:\s*(\S+)\s*<([^>]+)>", log_output):
-            agent_name = match.group(1).lower()
-            agent_uuid = match.group(2)
-            _upsert_contributor(
-                conn, agent_name, agent_uuid, "extractor", today,
-            )
-            agents_found.add(agent_name)
-
-    # Parse attribution blocks from claim frontmatter in diff
-    # Look for added lines with attribution YAML
-    current_role = None
-    for line in diff.split("\n"):
-        if not line.startswith("+") or line.startswith("+++"):
-            continue
-        stripped = line[1:].strip()
-
-        # Detect role sections in attribution block
-        for role in ("sourcer", "extractor", "challenger", "synthesizer", "reviewer"):
-            if stripped.startswith(f"{role}:"):
-                current_role = role
-                break
-
-        # Extract handle from attribution entries
-        handle_match = _re.match(r'-\s*handle:\s*["\']?([^"\']+)["\']?', stripped)
-        if handle_match and current_role:
-            handle = handle_match.group(1).strip().lower()
-            agent_id_match = _re.search(r'agent_id:\s*["\']?([^"\']+)', stripped)
-            agent_id = agent_id_match.group(1).strip() if agent_id_match else None
-            _upsert_contributor(conn, handle, agent_id, current_role, today)
-
-    # Fallback: if no attribution block found, credit the branch agent as extractor
-    if not agents_found:
-        # Try to infer agent from branch name (e.g., "extract/2026-03-05-...")
-        # The PR's agent field in SQLite is also available
-        row = conn.execute("SELECT agent FROM prs WHERE number = ?", (pr_number,)).fetchone()
-        if row and row["agent"]:
-            _upsert_contributor(conn, row["agent"].lower(), None, "extractor", today)
-
-    # Increment claims_merged for all contributors on this PR
-    # (handled inside _upsert_contributor via the role counts)
-
-
-def _upsert_contributor(
-    conn, handle: str, agent_id: str | None, role: str, date_str: str,
-):
-    """Upsert a contributor record, incrementing the appropriate role count."""
-    import json as _json
-    from datetime import datetime as _dt
-
-    role_col = f"{role}_count"
-    if role_col not in (
-        "sourcer_count", "extractor_count", "challenger_count",
-        "synthesizer_count", "reviewer_count",
-    ):
-        logger.warning("Unknown contributor role: %s", role)
-        return
-
-    existing = conn.execute(
-        "SELECT handle FROM contributors WHERE handle = ?", (handle,)
-    ).fetchone()
-
-    if existing:
-        conn.execute(
-            f"""UPDATE contributors SET
-                {role_col} = {role_col} + 1,
-                claims_merged = claims_merged + CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END,
-                last_contribution = ?,
-                updated_at = datetime('now')
-            WHERE handle = ?""",
-            (role, date_str, handle),
-        )
-    else:
-        conn.execute(
-            f"""INSERT INTO contributors (handle, agent_id, first_contribution, last_contribution, {role_col}, claims_merged)
-            VALUES (?, ?, ?, ?, 1, CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END)""",
-            (handle, agent_id, date_str, date_str, role),
-        )
-
-    # Recalculate tier
-    _recalculate_tier(conn, handle)
-
-
-def _recalculate_tier(conn, handle: str):
-    """Recalculate contributor tier based on config rules."""
-    from datetime import date as _date, datetime as _dt
-
-    row = conn.execute(
-        "SELECT claims_merged, challenges_survived, first_contribution, tier FROM contributors WHERE handle = ?",
-        (handle,),
-    ).fetchone()
-    if not row:
-        return
-
-    current_tier = row["tier"]
-    claims_merged = row["claims_merged"] or 0
-    challenges_survived = row["challenges_survived"] or 0
-    first_contribution = row["first_contribution"]
-
-    days_since_first = 0
-    if first_contribution:
-        try:
-            first_date = _dt.strptime(first_contribution, "%Y-%m-%d").date()
-            days_since_first = (_date.today() - first_date).days
-        except ValueError:
-            pass
-
-    # Check veteran first (higher tier)
-    vet_rules = config.CONTRIBUTOR_TIER_RULES["veteran"]
-    if (claims_merged >= vet_rules["claims_merged"]
-            and days_since_first >= vet_rules["min_days_since_first"]
-            and challenges_survived >= vet_rules["challenges_survived"]):
-        new_tier = "veteran"
-    elif claims_merged >= config.CONTRIBUTOR_TIER_RULES["contributor"]["claims_merged"]:
-        new_tier = "contributor"
-    else:
-        new_tier = "new"
-
-    if new_tier != current_tier:
-        conn.execute(
-            "UPDATE contributors SET tier = ?, updated_at = datetime('now') WHERE handle = ?",
-            (new_tier, handle),
-        )
-        logger.info("Contributor %s: tier %s → %s", handle, current_tier, new_tier)
-        db.audit(
-            conn, "contributor", "tier_change",
-            json.dumps({"handle": handle, "from": current_tier, "to": new_tier}),
-        )
-
-
 # --- Source archiving after merge (Ganymede review: closes near-duplicate loop) ---
 
 # Accumulates source moves during a merge cycle, batch-committed at the end
@@ -1532,7 +1298,7 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
 
             # Record contributor attribution
             try:
-                await _record_contributor_attribution(conn, pr_num, branch)
+                await record_contributor_attribution(conn, pr_num, branch, _git)
             except Exception:
                 logger.exception("PR #%d: contributor attribution failed (non-fatal)", pr_num)
 
diff --git a/tests/test_contributor.py b/tests/test_contributor.py
new file mode 100644
index 0000000..7de0463
--- /dev/null
+++ b/tests/test_contributor.py
@@ -0,0 +1,263 @@
+"""Tests for lib/contributor.py — contributor attribution functions."""
+
+import sqlite3
+import asyncio
+import sys
+import os
+from unittest.mock import AsyncMock, MagicMock, patch
+
+sys.modules.setdefault("aiohttp", MagicMock())
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from lib.contributor import (
+    is_knowledge_pr,
+    refine_commit_type,
+    record_contributor_attribution,
+    upsert_contributor,
+    recalculate_tier,
+)
+
+
+# --- is_knowledge_pr ---
+
+def test_knowledge_pr_domains():
+    diff = "+++ b/domains/crypto/some-claim.md\n"
+    assert is_knowledge_pr(diff) is True
+
+def test_knowledge_pr_core():
+    diff = "+++ b/core/epistemology.md\n"
+    assert is_knowledge_pr(diff) is True
+
+def test_knowledge_pr_foundations():
+    diff = "--- a/foundations/overview.md\n"
+    assert is_knowledge_pr(diff) is True
+
+def test_knowledge_pr_decisions():
+    diff = "+++ b/decisions/some-decision.md\n"
+    assert is_knowledge_pr(diff) is True
+
+def test_pipeline_only_pr():
+    diff = "+++ b/inbox/source.md\n+++ b/entities/metadao.md\n"
+    assert is_knowledge_pr(diff) is False
+
+def test_mixed_pr_counts_as_knowledge():
+    diff = "+++ b/inbox/source.md\n+++ b/domains/crypto/claim.md\n"
+    assert is_knowledge_pr(diff) is True
+
+def test_empty_diff():
+    assert is_knowledge_pr("") is False
+
+
+# --- refine_commit_type ---
+
+def test_refine_non_extract_unchanged():
+    assert refine_commit_type("anything", "research") == "research"
+    assert refine_commit_type("anything", "entity") == "entity"
+
+def test_refine_extract_new_files():
+    diff = "diff --git a/x b/y\nnew file\n+++ b/domains/crypto/claim.md\n"
+    assert refine_commit_type(diff, "extract") == "extract"
+
+def test_refine_extract_challenge():
+    diff = "diff --git a/x b/y\n+++ b/domains/crypto/claim.md\n+challenged_by: other\n"
+    assert refine_commit_type(diff, "extract") == "challenge"
+
+def test_refine_extract_enrich():
+    diff = "diff --git a/x b/y\n+++ b/domains/crypto/claim.md\n+confidence: 0.8\n"
+    assert refine_commit_type(diff, "extract") == "enrich"
+
+def test_refine_extract_mixed_new_and_modified():
+    diff = (
+        "diff --git a/x b/y\nnew file\n+++ b/domains/crypto/new.md\n"
+        "diff --git a/x b/z\n+++ b/domains/crypto/existing.md\n+foo\n"
+    )
+    assert refine_commit_type(diff, "extract") == "extract"
+
+
+# --- upsert_contributor + recalculate_tier ---
+
+def _make_db():
+    conn = sqlite3.connect(":memory:")
+    conn.row_factory = sqlite3.Row
+    conn.execute("""CREATE TABLE contributors (
+        handle TEXT PRIMARY KEY,
+        agent_id TEXT,
+        tier TEXT DEFAULT 'new',
+        first_contribution TEXT,
+        last_contribution TEXT,
+        claims_merged INTEGER DEFAULT 0,
+        challenges_survived INTEGER DEFAULT 0,
+        sourcer_count INTEGER DEFAULT 0,
+        extractor_count INTEGER DEFAULT 0,
+        challenger_count INTEGER DEFAULT 0,
+        synthesizer_count INTEGER DEFAULT 0,
+        reviewer_count INTEGER DEFAULT 0,
+        updated_at TEXT
+    )""")
+    conn.execute("""CREATE TABLE audit_log (
+        id INTEGER PRIMARY KEY,
+        ts TEXT DEFAULT (datetime('now')),
+        stage TEXT,
+        event TEXT,
+        detail TEXT
+    )""")
+    return conn
+
+def test_upsert_new_contributor():
+    conn = _make_db()
+    with patch("lib.contributor.config") as mock_config:
+        mock_config.CONTRIBUTOR_TIER_RULES = {
+            "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
+            "contributor": {"claims_merged": 10},
+        }
+        upsert_contributor(conn, "rio", "uuid-123", "extractor", "2026-04-16")
+    row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone()
+    assert row["extractor_count"] == 1
+    assert row["claims_merged"] == 1
+    assert row["tier"] == "new"
+
+def test_upsert_increment():
+    conn = _make_db()
+    upsert_contributor(conn, "rio", "uuid-123", "extractor", "2026-04-16")
+    upsert_contributor(conn, "rio", "uuid-123", "extractor", "2026-04-17")
+    row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone()
+    assert row["extractor_count"] == 2
+    assert row["claims_merged"] == 2
+
+def test_upsert_reviewer_no_claim_increment():
+    conn = _make_db()
+    upsert_contributor(conn, "leo", None, "reviewer", "2026-04-16")
+    row = conn.execute("SELECT * FROM contributors WHERE handle = 'leo'").fetchone()
+    assert row["reviewer_count"] == 1
+    assert row["claims_merged"] == 0
+
+def test_upsert_unknown_role():
+    conn = _make_db()
+    upsert_contributor(conn, "rio", None, "wizard", "2026-04-16")
+    row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone()
+    assert row is None  # Should not insert
+
+def test_recalculate_tier_contributor():
+    conn = _make_db()
+    conn.execute(
+        """INSERT INTO contributors (handle, claims_merged, challenges_survived, first_contribution, tier)
+        VALUES ('rio', 15, 0, '2026-01-01', 'new')"""
+    )
+    with patch("lib.contributor.config") as mock_config:
+        mock_config.CONTRIBUTOR_TIER_RULES = {
+            "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
+            "contributor": {"claims_merged": 10},
+        }
+        recalculate_tier(conn, "rio")
+    row = conn.execute("SELECT tier FROM contributors WHERE handle = 'rio'").fetchone()
+    assert row["tier"] == "contributor"
+
+def test_recalculate_tier_veteran():
+    conn = _make_db()
+    conn.execute(
+        """INSERT INTO contributors (handle, claims_merged, challenges_survived, first_contribution, tier)
+        VALUES ('rio', 60, 10, '2025-01-01', 'contributor')"""
+    )
+    with patch("lib.contributor.config") as mock_config:
+        mock_config.CONTRIBUTOR_TIER_RULES = {
+            "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
+            "contributor": {"claims_merged": 10},
+        }
+        recalculate_tier(conn, "rio")
+    row = conn.execute("SELECT tier FROM contributors WHERE handle = 'rio'").fetchone()
+    assert row["tier"] == "veteran"
+
+
+# --- record_contributor_attribution ---
+
+def _make_attribution_db():
+    conn = _make_db()
+    conn.execute("""CREATE TABLE prs (
+        number INTEGER PRIMARY KEY,
+        commit_type TEXT,
+        agent TEXT
+    )""")
+    conn.execute("INSERT INTO prs VALUES (100, 'extract', 'rio')")
+    return conn
+
+def test_record_skips_pipeline_only():
+    conn = _make_attribution_db()
+    mock_diff = "+++ b/inbox/source.md\n"
+
+    async def run():
+        with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
+            git_fn = AsyncMock(return_value=(0, ""))
+            await record_contributor_attribution(conn, 100, "extract/test", git_fn)
+
+    asyncio.run(run())
+    row = conn.execute("SELECT * FROM contributors").fetchone()
+    assert row is None  # No attribution for pipeline-only
+
+def test_record_fallback_to_pr_agent():
+    conn = _make_attribution_db()
+    mock_diff = "+++ b/domains/crypto/claim.md\n+some content\n"
+
+    async def run():
+        with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
+            git_fn = AsyncMock(return_value=(0, "no trailers here"))
+            with patch("lib.contributor.config") as mock_config:
+                mock_config.CONTRIBUTOR_TIER_RULES = {
+                    "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
+                    "contributor": {"claims_merged": 10},
+                }
+                await record_contributor_attribution(conn, 100, "extract/test", git_fn)
+
+    asyncio.run(run())
+    row = conn.execute("SELECT * FROM contributors WHERE handle = 'rio'").fetchone()
+    assert row is not None
+    assert row["extractor_count"] == 1
+
+def test_record_parses_pentagon_trailer():
+    conn = _make_attribution_db()
+    mock_diff = "+++ b/domains/crypto/claim.md\n+new file content\n"
+    trailer = "Pentagon-Agent: Theseus <uuid-456>"
+
+    async def run():
+        with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
+            git_fn = AsyncMock(return_value=(0, trailer))
+            with patch("lib.contributor.config") as mock_config:
+                mock_config.CONTRIBUTOR_TIER_RULES = {
+                    "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
+                    "contributor": {"claims_merged": 10},
+                }
+                await record_contributor_attribution(conn, 100, "extract/test", git_fn)
+
+    asyncio.run(run())
+    row = conn.execute("SELECT * FROM contributors WHERE handle = 'theseus'").fetchone()
+    assert row is not None
+    assert row["agent_id"] == "uuid-456"
+
+def test_record_refines_commit_type():
+    conn = _make_attribution_db()
+    mock_diff = "diff --git a/x b/y\n+++ b/domains/crypto/claim.md\n+challenged_by: foo\n"
+
+    async def run():
+        with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=mock_diff):
+            git_fn = AsyncMock(return_value=(0, ""))
+            with patch("lib.contributor.config") as mock_config:
+                mock_config.CONTRIBUTOR_TIER_RULES = {
+                    "veteran": {"claims_merged": 50, "min_days_since_first": 90, "challenges_survived": 5},
+                    "contributor": {"claims_merged": 10},
+                }
+                await record_contributor_attribution(conn, 100, "extract/test", git_fn)
+
+    asyncio.run(run())
+    row = conn.execute("SELECT commit_type FROM prs WHERE number = 100").fetchone()
+    assert row["commit_type"] == "challenge"
+
+def test_record_no_diff_returns_early():
+    conn = _make_attribution_db()
+
+    async def run():
+        with patch("lib.contributor.get_pr_diff", new_callable=AsyncMock, return_value=None):
+            git_fn = AsyncMock()
+            await record_contributor_attribution(conn, 100, "extract/test", git_fn)
+            git_fn.assert_not_called()
+
+    asyncio.run(run())