diff --git a/diagnostics/activity_feed_api.py b/diagnostics/activity_feed_api.py index acc5ec9..c901ee7 100644 --- a/diagnostics/activity_feed_api.py +++ b/diagnostics/activity_feed_api.py @@ -129,12 +129,33 @@ def _github_pr_url(github_pr): return f"https://github.com/living-ip/teleo-codex/pull/{github_pr}" +# Canonicalize contributor labels so frontend links resolve to real +# /contributors/{handle} pages. Pipeline writers (extract.py, manual edits, +# the old backfill_submitted_by.py) historically wrote mixed-case agent +# names with a trailing decorator into prs.submitted_by — e.g. +# "Vida (self-directed)", "pipeline (reweave)", or "@m3taversal". +# These decorated strings do not exist as contributors and 404 the profile +# page. Strip the trailing parenthetical wholesale: valid handles match +# ^[a-z0-9][a-z0-9_-]{0,38}$ (see pipeline/lib/attribution._HANDLE_RE) and +# cannot contain parens, so this is lossless. +_TRAILING_PAREN_RE = re.compile(r"\s*\([^)]*\)\s*$") + + +def _canonicalize(raw): + if not raw: + return "" + h = raw.strip().lower().lstrip("@") + h = _TRAILING_PAREN_RE.sub("", h).strip() + return h + + def _normalize_contributor(submitted_by, agent): - if submitted_by and submitted_by.strip(): - name = submitted_by.strip().lstrip("@") + name = _canonicalize(submitted_by) + if name: + return name + name = _canonicalize(agent) + if name and name != "pipeline": return name - if agent and agent.strip() and agent != "pipeline": - return agent.strip() return "pipeline"