From dea1b02aa6d930876e3b5cce02cd1aca923ef7e5 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Sun, 26 Apr 2026 14:25:24 +0100 Subject: [PATCH] fix(attribution): narrow exception + document gate asymmetry (Ganymede review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-up fixes from Ganymede's review of d0fb4c9: 1. is_publisher_handle: narrow `except Exception` to sqlite3.OperationalError. Pre-v26 DB fallback only needs to catch the "table doesn't exist" case; broader exceptions (programming errors, locks, corruption) should propagate. 2. upsert_contributor gate: add comment documenting the alias-resolution asymmetry between insert_contribution_event (alias-resolved via normalize_handle) and upsert_contributor (bare lower+lstrip-@). Today this is fine because the v26 classifier produced one publisher row per canonical handle. Branch 3 will normalize alias→canonical at writer entry points, tightening this gate transparently. Unit tests for the gates (positive + negative + alias resolution) deferred to Branch 3 alongside the auto-create flow tests. Smoke-tested: - pre-v26 fallback (no publishers table) → None (correct) - case-insensitive match (CNBC → id=1) → correct - @ prefix strip (@cnbc → id=1) → correct - non-publisher handle (alexastrum) → None (correct) Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/attribution.py | 8 ++++++-- lib/contributor.py | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/attribution.py b/lib/attribution.py index 68e69eb..8b571b9 100644 --- a/lib/attribution.py +++ b/lib/attribution.py @@ -15,6 +15,7 @@ Epimetheus owns this module. Leo reviews changes. import logging import re +import sqlite3 from pathlib import Path logger = logging.getLogger("pipeline.attribution") @@ -130,8 +131,11 @@ def is_publisher_handle(handle: str, conn) -> int | None: ).fetchone() if row: return row["id"] if hasattr(row, "keys") else row[0] - except Exception: - logger.debug("is_publisher_handle: lookup failed for %r", h, exc_info=True) + except sqlite3.OperationalError: + # Pre-v26 DB: publishers table doesn't exist yet. Fall through to None + # so writer behaves as before. Any other exception class is real signal + # (programming error, lock contention, corruption) — let it propagate. + logger.debug("is_publisher_handle: publishers table not present (pre-v26?)", exc_info=True) return None diff --git a/lib/contributor.py b/lib/contributor.py index b2cc11d..983fe6b 100644 --- a/lib/contributor.py +++ b/lib/contributor.py @@ -428,6 +428,13 @@ def upsert_contributor( # Schema v26 gate: orgs/citations live in publishers table, not contributors. # Skip without writing so the v26 classifier cleanup isn't undone by every # merge that has `sourcer: cnbc` (or similar) in claim frontmatter. + # + # Note: bare normalization (lower + lstrip @), no alias resolution. This is + # consistent with the existing `SELECT handle FROM contributors WHERE handle = ?` + # below — both look up by canonical-form-as-stored. Today's classifier produces + # one publisher row per canonical handle, so bare lookup hits. Branch 3 will + # normalize alias→canonical at writer entry points (extract.py, post_extract); + # at that point this gate auto-tightens because callers pass canonical handles. canonical_handle = handle.strip().lower().lstrip("@") if handle else "" if canonical_handle and is_publisher_handle(canonical_handle, conn) is not None: logger.debug("upsert_contributor: %r is a publisher — skipping contributor row", canonical_handle)