Some checks are pending
CI / lint-and-test (push) Waiting to run
Ganymede review findings on epimetheus/contributor-attribution-fix branch:
1. BUG: record_contributor_attribution used `git diff --name-only` (all modified
files), not just added. Enrich/challenge PRs re-credited the sourcer on every
subsequent modification. Fixed: --diff-filter=A restricts to new files only.
The synthesizer/challenger/reviewer roles for enrich PRs are still credited
via the Pentagon-Agent trailer path, so this doesn't lose any correct credit.
2. WARNING: Legacy `source`-field heuristic fabricated garbage handles from
descriptive strings ("sec-interpretive-release-s7-2026-09-(march-17",
"governance---meritocratic-voting-+-futarchy"). Removed outright + added
regex handle sanity filter (`^[a-z0-9][a-z0-9_-]{0,38}$`). Applied before
every return path in parse_attribution (the nested-block early return was
previously bypassing the filter).
Dry-run impact: unique handles 83→70 (13 garbage filtered), NEW contributors
49→48, EXISTING drift rows 34→22. The filter drops rows where the literal
garbage string lives in frontmatter (Slotkin case: attribution.sourcer.handle
was written as "senator-elissa-slotkin-/-the-hill" by the buggy legacy path).
3. NIT: Aligned knowledge_prefixes in the file walker to match is_knowledge_pr
(removed entities/, convictions/). Widening those requires Cory sign-off
since is_knowledge_pr currently gates entity-only PRs out of CI.
Tests: 17 pass (added test_bad_handles_filtered, test_valid_handle_with_hyphen_passes,
updated test_legacy_source_fallback → test_legacy_source_fallback_removed).
Ganymede review — 3-message protocol msg 3 pending.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
157 lines
5.8 KiB
Python
157 lines
5.8 KiB
Python
"""Tests for attribution module."""
|
|
|
|
import pytest
|
|
|
|
from lib.attribution import (
|
|
build_attribution_block,
|
|
parse_attribution,
|
|
role_counts_from_attribution,
|
|
validate_attribution,
|
|
)
|
|
|
|
|
|
class TestParseAttribution:
|
|
def test_nested_format(self):
|
|
fm = {
|
|
"type": "claim",
|
|
"attribution": {
|
|
"extractor": [{"handle": "rio", "agent_id": "760F7FE7"}],
|
|
"sourcer": [{"handle": "@theiaresearch", "context": "annual letter"}],
|
|
},
|
|
}
|
|
result = parse_attribution(fm)
|
|
assert len(result["extractor"]) == 1
|
|
assert result["extractor"][0]["handle"] == "rio"
|
|
assert result["sourcer"][0]["handle"] == "theiaresearch" # @ stripped
|
|
|
|
def test_flat_format(self):
|
|
fm = {
|
|
"type": "claim",
|
|
"attribution_extractor": "rio",
|
|
"attribution_sourcer": "@theiaresearch",
|
|
}
|
|
result = parse_attribution(fm)
|
|
assert result["extractor"][0]["handle"] == "rio"
|
|
assert result["sourcer"][0]["handle"] == "theiaresearch"
|
|
|
|
def test_legacy_source_fallback_removed(self):
|
|
"""Legacy `source` heuristic removed (Ganymede review, Apr 24).
|
|
|
|
It fabricated handles from descriptive strings (garbage like
|
|
'sec-interpretive-release-s7-2026-09-(march-17'). Claims without
|
|
explicit attribution now return empty — better to surface as data
|
|
hygiene than invent contributors.
|
|
"""
|
|
fm = {
|
|
"type": "claim",
|
|
"source": "@pineanalytics, Q4 2025 report",
|
|
}
|
|
result = parse_attribution(fm)
|
|
assert all(len(v) == 0 for v in result.values())
|
|
|
|
def test_bad_handles_filtered(self):
|
|
"""Handles with spaces, parens, or garbage chars are dropped."""
|
|
fm = {
|
|
"sourcer": "governance---meritocratic-voting-+-futarchy",
|
|
}
|
|
result = parse_attribution(fm)
|
|
assert len(result["sourcer"]) == 0
|
|
|
|
def test_valid_handle_with_hyphen_passes(self):
|
|
"""Legitimate handles like 'cameron-s1' survive the filter."""
|
|
fm = {"sourcer": "cameron-s1"}
|
|
result = parse_attribution(fm)
|
|
assert result["sourcer"][0]["handle"] == "cameron-s1"
|
|
|
|
def test_empty_attribution(self):
|
|
fm = {"type": "claim"}
|
|
result = parse_attribution(fm)
|
|
assert all(len(v) == 0 for v in result.values())
|
|
|
|
def test_string_entries(self):
|
|
fm = {
|
|
"attribution": {
|
|
"extractor": ["rio"],
|
|
"sourcer": "theiaresearch",
|
|
},
|
|
}
|
|
result = parse_attribution(fm)
|
|
assert result["extractor"][0]["handle"] == "rio"
|
|
assert result["sourcer"][0]["handle"] == "theiaresearch"
|
|
|
|
|
|
class TestValidateAttribution:
|
|
def test_valid_attribution(self):
|
|
fm = {
|
|
"attribution": {
|
|
"extractor": [{"handle": "rio"}],
|
|
},
|
|
}
|
|
issues = validate_attribution(fm)
|
|
assert len(issues) == 0
|
|
|
|
def test_missing_extractor(self):
|
|
fm = {"attribution": {"sourcer": [{"handle": "someone"}]}}
|
|
issues = validate_attribution(fm)
|
|
assert "missing_attribution_extractor" in issues
|
|
|
|
def test_no_attribution_block_passes(self):
|
|
"""Legacy claims without attribution block should NOT be blocked."""
|
|
fm = {"type": "claim", "source": "some source"}
|
|
issues = validate_attribution(fm)
|
|
assert len(issues) == 0 # No attribution block = legacy, not an error
|
|
|
|
def test_attribution_block_missing_extractor(self):
|
|
"""Claims WITH attribution block but missing extractor SHOULD be blocked."""
|
|
fm = {"type": "claim", "attribution": {"sourcer": [{"handle": "someone"}]}}
|
|
issues = validate_attribution(fm)
|
|
assert "missing_attribution_extractor" in issues
|
|
|
|
def test_missing_extractor_auto_fix_with_agent(self):
|
|
"""When agent is provided, auto-fix missing extractor instead of blocking."""
|
|
fm = {"attribution": {"sourcer": [{"handle": "someone"}]}}
|
|
issues = validate_attribution(fm, agent="leo")
|
|
assert "fixed_missing_extractor" in issues
|
|
assert "missing_attribution_extractor" not in issues
|
|
# Verify the fix was applied in-place
|
|
assert fm["attribution"]["extractor"] == [{"handle": "leo"}]
|
|
|
|
def test_missing_extractor_no_agent_still_blocks(self):
|
|
"""Without agent context, missing extractor is still a hard failure."""
|
|
fm = {"attribution": {"sourcer": [{"handle": "someone"}]}}
|
|
issues = validate_attribution(fm, agent=None)
|
|
assert "missing_attribution_extractor" in issues
|
|
|
|
|
|
class TestBuildAttributionBlock:
|
|
def test_basic_build(self):
|
|
attr = build_attribution_block("rio", agent_id="760F7FE7")
|
|
assert attr["extractor"][0]["handle"] == "rio"
|
|
assert attr["extractor"][0]["agent_id"] == "760F7FE7"
|
|
|
|
def test_with_sourcer(self):
|
|
attr = build_attribution_block("rio", source_handle="@PineAnalytics", source_context="Q4 report")
|
|
assert attr["sourcer"][0]["handle"] == "pineanalytics"
|
|
assert attr["sourcer"][0]["context"] == "Q4 report"
|
|
|
|
def test_empty_roles(self):
|
|
attr = build_attribution_block("rio")
|
|
assert attr["challenger"] == []
|
|
assert attr["synthesizer"] == []
|
|
assert attr["reviewer"] == []
|
|
|
|
|
|
class TestRoleCounts:
|
|
def test_basic_counts(self):
|
|
attribution = {
|
|
"extractor": [{"handle": "rio"}],
|
|
"sourcer": [{"handle": "theia"}, {"handle": "pine"}],
|
|
"challenger": [],
|
|
"synthesizer": [],
|
|
"reviewer": [{"handle": "leo"}],
|
|
}
|
|
counts = role_counts_from_attribution(attribution)
|
|
assert counts["extractor"] == ["rio"]
|
|
assert counts["sourcer"] == ["theia", "pine"]
|
|
assert "challenger" not in counts
|
|
assert counts["reviewer"] == ["leo"]
|