fix(attribution): credit research-session sources to agents, not m3taversal #7
2 changed files with 13 additions and 2 deletions
|
|
@ -15,6 +15,7 @@ Epimetheus owns this module. Leo reviews changes.
|
|||
|
||||
import logging
|
||||
import re
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger("pipeline.attribution")
|
||||
|
|
@ -130,8 +131,11 @@ def is_publisher_handle(handle: str, conn) -> int | None:
|
|||
).fetchone()
|
||||
if row:
|
||||
return row["id"] if hasattr(row, "keys") else row[0]
|
||||
except Exception:
|
||||
logger.debug("is_publisher_handle: lookup failed for %r", h, exc_info=True)
|
||||
except sqlite3.OperationalError:
|
||||
# Pre-v26 DB: publishers table doesn't exist yet. Fall through to None
|
||||
# so writer behaves as before. Any other exception class is real signal
|
||||
# (programming error, lock contention, corruption) — let it propagate.
|
||||
logger.debug("is_publisher_handle: publishers table not present (pre-v26?)", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -428,6 +428,13 @@ def upsert_contributor(
|
|||
# Schema v26 gate: orgs/citations live in publishers table, not contributors.
|
||||
# Skip without writing so the v26 classifier cleanup isn't undone by every
|
||||
# merge that has `sourcer: cnbc` (or similar) in claim frontmatter.
|
||||
#
|
||||
# Note: bare normalization (lower + lstrip @), no alias resolution. This is
|
||||
# consistent with the existing `SELECT handle FROM contributors WHERE handle = ?`
|
||||
# below — both look up by canonical-form-as-stored. Today's classifier produces
|
||||
# one publisher row per canonical handle, so bare lookup hits. Branch 3 will
|
||||
# normalize alias→canonical at writer entry points (extract.py, post_extract);
|
||||
# at that point this gate auto-tightens because callers pass canonical handles.
|
||||
canonical_handle = handle.strip().lower().lstrip("@") if handle else ""
|
||||
if canonical_handle and is_publisher_handle(canonical_handle, conn) is not None:
|
||||
logger.debug("upsert_contributor: %r is a publisher — skipping contributor row", canonical_handle)
|
||||
|
|
|
|||
Loading…
Reference in a new issue