diff --git a/lib/attribution.py b/lib/attribution.py index 68e69eb..8b571b9 100644 --- a/lib/attribution.py +++ b/lib/attribution.py @@ -15,6 +15,7 @@ Epimetheus owns this module. Leo reviews changes. import logging import re +import sqlite3 from pathlib import Path logger = logging.getLogger("pipeline.attribution") @@ -130,8 +131,11 @@ def is_publisher_handle(handle: str, conn) -> int | None: ).fetchone() if row: return row["id"] if hasattr(row, "keys") else row[0] - except Exception: - logger.debug("is_publisher_handle: lookup failed for %r", h, exc_info=True) + except sqlite3.OperationalError: + # Pre-v26 DB: publishers table doesn't exist yet. Fall through to None + # so writer behaves as before. Any other exception class is real signal + # (programming error, lock contention, corruption) — let it propagate. + logger.debug("is_publisher_handle: publishers table not present (pre-v26?)", exc_info=True) return None diff --git a/lib/contributor.py b/lib/contributor.py index b2cc11d..983fe6b 100644 --- a/lib/contributor.py +++ b/lib/contributor.py @@ -428,6 +428,13 @@ def upsert_contributor( # Schema v26 gate: orgs/citations live in publishers table, not contributors. # Skip without writing so the v26 classifier cleanup isn't undone by every # merge that has `sourcer: cnbc` (or similar) in claim frontmatter. + # + # Note: bare normalization (lower + lstrip @), no alias resolution. This is + # consistent with the existing `SELECT handle FROM contributors WHERE handle = ?` + # below — both look up by canonical-form-as-stored. Today's classifier produces + # one publisher row per canonical handle, so bare lookup hits. Branch 3 will + # normalize alias→canonical at writer entry points (extract.py, post_extract); + # at that point this gate auto-tightens because callers pass canonical handles. canonical_handle = handle.strip().lower().lstrip("@") if handle else "" if canonical_handle and is_publisher_handle(canonical_handle, conn) is not None: logger.debug("upsert_contributor: %r is a publisher — skipping contributor row", canonical_handle)