fix(attribution): credit research-session sources to agents, not m3taversal #7

Merged
m3taversal merged 5 commits from ship/research-attribution-fix into main 2026-04-27 11:59:55 +00:00
2 changed files with 13 additions and 2 deletions
Showing only changes of commit dea1b02aa6 - Show all commits

View file

@ -15,6 +15,7 @@ Epimetheus owns this module. Leo reviews changes.
import logging
import re
import sqlite3
from pathlib import Path
logger = logging.getLogger("pipeline.attribution")
@ -130,8 +131,11 @@ def is_publisher_handle(handle: str, conn) -> int | None:
).fetchone()
if row:
return row["id"] if hasattr(row, "keys") else row[0]
except Exception:
logger.debug("is_publisher_handle: lookup failed for %r", h, exc_info=True)
except sqlite3.OperationalError:
# Pre-v26 DB: publishers table doesn't exist yet. Fall through to None
# so writer behaves as before. Any other exception class is real signal
# (programming error, lock contention, corruption) — let it propagate.
logger.debug("is_publisher_handle: publishers table not present (pre-v26?)", exc_info=True)
return None

View file

@ -428,6 +428,13 @@ def upsert_contributor(
# Schema v26 gate: orgs/citations live in publishers table, not contributors.
# Skip without writing so the v26 classifier cleanup isn't undone by every
# merge that has `sourcer: cnbc` (or similar) in claim frontmatter.
#
# Note: bare normalization (lower + lstrip @), no alias resolution. This is
# consistent with the existing `SELECT handle FROM contributors WHERE handle = ?`
# below — both look up by canonical-form-as-stored. Today's classifier produces
# one publisher row per canonical handle, so bare lookup hits. Branch 3 will
# normalize alias→canonical at writer entry points (extract.py, post_extract);
# at that point this gate auto-tightens because callers pass canonical handles.
canonical_handle = handle.strip().lower().lstrip("@") if handle else ""
if canonical_handle and is_publisher_handle(canonical_handle, conn) is not None:
logger.debug("upsert_contributor: %r is a publisher — skipping contributor row", canonical_handle)