feat(activity-feed): add kind + target_url, fix research-session pseudo-slugs

The /api/activity-feed event shape didn't give the frontend a reliable clickability signal. Two failure modes: 1. Source-archive events (extract/* PRs that filed a paper into inbox/archive/ but didn't extract a claim) returned claim_slug="". Frontend rendered <Link href="/claims/"> which Next normalized to /claims and redirected to /knowledge-base. Wrong page. 2. Research/entity session commits (e.g. astra/research-2026-05-11) with empty descriptions fell through to "create" classification with a pseudo-slug like research-2026-05-11. Frontend rendered /claims/research-2026-05-11 -> 404. Fix: - Add `kind` enum (canonical): claim_merged | claim_enriched | claim_challenged | source_archived | session_digest. Replaces the internal `type` for downstream consumers; `type` kept populated for in-flight callers during migration. - Add `target_url`: explicit clickability signal. Frontend renders <Link> when non-null, <span> when null. No special-casing needed. * claim_* events -> /claims/{slug} * source_archived -> Forgejo blob URL at inbox/archive/{domain}/{slug}.md * session_digest -> null (no clickthrough surface yet) - Detect research/entity commits with empty descriptions as session_digest in _classify_event, instead of synthesizing a phantom create event with a date-shaped pseudo-slug. - type filter accepts both legacy `type` and new `kind` values so callers migrate at their own pace. Verified live: source events resolve to inbox/archive/{domain}/... Forgejo URLs, session-digest rows return target_url=null, claim_merged events keep /claims/{slug} unchanged.
2026-05-11 12:36:25 +01:00 · 2026-05-11 12:36:25 +01:00 · c3f2010a42
commit c3f2010a42
parent ed4893e837
1 changed files with 102 additions and 17 deletions
--- a/diagnostics/activity_feed_api.py
+++ b/diagnostics/activity_feed_api.py
@ -32,13 +32,12 @@ def _is_source_slug(slug):


 def _classify_event(branch, description, commit_type, candidate_slug=None):
-    """Return one of: create | enrich | challenge | source | None.
+    """Return one of: create | enrich | challenge | source | session_digest | None.

    Source-archive PRs are extract/* branches that filed a source into
-    inbox/archive/ but didn't produce a claim. Two signals classify them
-    as 'source' (defense in depth):
-      1. extract/* branch with empty description (no claim title produced)
-      2. candidate_slug matches YYYY-MM-DD-...-HASH4 (inbox filename pattern)
+    inbox/archive/ but didn't produce a claim. Session-digest PRs are
+    agent research/entity commits with no per-claim description — they
+    represent session-level rollups, not specific knowledge artifacts.
    """
    commit_type_l = (commit_type or "").lower()
    branch = branch or ""
@ -60,6 +59,12 @@ def _classify_event(branch, description, commit_type, candidate_slug=None):
            or branch.startswith("reweave/")):
        return "enrich"

+    # Research and entity commits with no description are session-level
+    # rollups (e.g. astra/research-2026-05-11). They have no claim to
+    # link to — surface as session_digest, not as a phantom create.
+    if commit_type_l in ("research", "entity") and not has_desc:
+        return "session_digest"
+
    # Source-only: extract/* with no claim description means inbox archive
    # landed but no domain claim was written.
    if branch.startswith("extract/") and not has_desc:
@ -76,6 +81,48 @@ def _classify_event(branch, description, commit_type, candidate_slug=None):
    return "create"


+# Internal classifier value -> canonical `kind` enum returned to frontend.
+_KIND_MAP = {
+    "create": "claim_merged",
+    "enrich": "claim_enriched",
+    "challenge": "claim_challenged",
+    "source": "source_archived",
+    "session_digest": "session_digest",
+}
+
+
+def _archive_slug_from_branch(branch):
+    """For extract/YYYY-MM-DD-...-HASH4, return YYYY-MM-DD-... (keep date,
+    drop the 4-hex hash suffix). Matches inbox/archive filename convention.
+    """
+    if not branch or "/" not in branch:
+        return ""
+    slug = branch.split("/", 1)[1]
+    return re.sub(r"-[a-f0-9]{4}$", "", slug)
+
+
+def _source_target_url(domain, archive_slug):
+    """Forgejo blob URL for an archived source file. Falls back to the
+    repo-wide inbox/archive directory when domain is unknown so the link
+    still resolves to something useful instead of a 404.
+    """
+    if not archive_slug:
+        return None
+    domain = (domain or "").strip()
+    if not domain or domain == "unknown":
+        return "https://git.livingip.xyz/teleo/teleo-codex/src/branch/main/inbox/archive"
+    return (
+        "https://git.livingip.xyz/teleo/teleo-codex/src/branch/main/inbox/archive/"
+        f"{domain}/{archive_slug}.md"
+    )
+
+
+def _claim_target_url(claim_slug):
+    if not claim_slug:
+        return None
+    return f"/claims/{claim_slug}"
+
+
 def _normalize_contributor(submitted_by, agent):
    if submitted_by and submitted_by.strip():
        name = submitted_by.strip().lstrip("@")
@ -152,23 +199,50 @@ def _build_events():

            contributor = _normalize_contributor(row["submitted_by"], row["agent"])
            merged_at = row["merged_at"] or ""
+            domain = row["domain"] or "unknown"
+            kind = _KIND_MAP.get(event_type, event_type)

-            ci_map = {"create": 0.35, "enrich": 0.25, "challenge": 0.40, "source": 0.15}
+            ci_map = {
+                "create": 0.35, "enrich": 0.25, "challenge": 0.40,
+                "source": 0.15, "session_digest": 0.05,
+            }
            ci_earned = ci_map.get(event_type, 0)

-            # Source events never carry a claim_slug — no claim was written —
-            # so the frontend can't produce a 404-ing claim link.
+            # Source events never carry a claim_slug — no claim was written.
+            # target_url points at the archived file on Forgejo instead.
            if event_type == "source":
+                archive_slug = _archive_slug_from_branch(row["branch"])
                summary_text = _summary_from_branch(row["branch"])
-                source_slug = (
-                    _summary_from_branch(row["branch"]).lower().replace(" ", "-")
-                    or row["branch"]
+                source_display_slug = (
+                    summary_text.lower().replace(" ", "-") or row["branch"]
                )
                events.append({
+                    "kind": kind,
                    "type": "source",
+                    "target_url": _source_target_url(domain, archive_slug),
                    "claim_slug": "",
-                    "source_slug": source_slug,
-                    "domain": row["domain"] or "unknown",
+                    "source_slug": source_display_slug,
+                    "domain": domain,
+                    "contributor": contributor,
+                    "timestamp": merged_at,
+                    "ci_earned": round(ci_earned, 2),
+                    "summary": summary_text,
+                    "pr_number": row["number"],
+                    "source_channel": row["source_channel"] or "unknown",
+                })
+                continue
+
+            # Session digests have no clickthrough surface yet (per-agent
+            # session pages not built). target_url=null so frontend renders
+            # plain text instead of a broken /claims/research-... link.
+            if event_type == "session_digest":
+                summary_text = _summary_from_branch(row["branch"]) or "Research session"
+                events.append({
+                    "kind": kind,
+                    "type": "session_digest",
+                    "target_url": None,
+                    "claim_slug": "",
+                    "domain": domain,
                    "contributor": contributor,
                    "timestamp": merged_at,
                    "ci_earned": round(ci_earned, 2),
@ -202,9 +276,11 @@ def _build_events():

            for slug in (slugs[:1] if slugs else [""]):
                events.append({
+                    "kind": kind,
                    "type": event_type,
+                    "target_url": _claim_target_url(slug),
                    "claim_slug": slug,
-                    "domain": row["domain"] or "unknown",
+                    "domain": domain,
                    "contributor": contributor,
                    "timestamp": merged_at,
                    "ci_earned": round(ci_earned, 2),
@ -234,8 +310,11 @@ def _sort_events(events, claim_activity, sort_mode, now_ts):
            return _hot_score(ca["challenges"], ca["enriches"], ca["signals"], hours)
        events.sort(key=hot_key, reverse=True)
    elif sort_mode == "important":
-        type_rank = {"challenge": 0, "enrich": 1, "create": 2, "source": 3}
-        events.sort(key=lambda e: (type_rank.get(e["type"], 4), -len(e["summary"])))
+        type_rank = {
+            "challenge": 0, "enrich": 1, "create": 2,
+            "source": 3, "session_digest": 4,
+        }
+        events.sort(key=lambda e: (type_rank.get(e["type"], 5), -len(e["summary"])))
    return events


@ -269,7 +348,13 @@ async def handle_activity_feed(request):
    if contributor:
        filtered = [e for e in filtered if e["contributor"] == contributor]
    if type_filter:
-        filtered = [e for e in filtered if e["type"] in type_filter]
+        # Accept both legacy `type` values (create/enrich/challenge/source/
+        # session_digest) and canonical `kind` values (claim_merged/etc.) so
+        # callers can migrate at their own pace.
+        filtered = [
+            e for e in filtered
+            if e["type"] in type_filter or e.get("kind") in type_filter
+        ]

    sorted_events = _sort_events(list(filtered), claim_activity, sort_mode, now)
    total = len(sorted_events)