diff --git a/diagnostics/activity_feed_api.py b/diagnostics/activity_feed_api.py index 6acb2b8..584835c 100644 --- a/diagnostics/activity_feed_api.py +++ b/diagnostics/activity_feed_api.py @@ -32,13 +32,12 @@ def _is_source_slug(slug): def _classify_event(branch, description, commit_type, candidate_slug=None): - """Return one of: create | enrich | challenge | source | None. + """Return one of: create | enrich | challenge | source | session_digest | None. Source-archive PRs are extract/* branches that filed a source into - inbox/archive/ but didn't produce a claim. Two signals classify them - as 'source' (defense in depth): - 1. extract/* branch with empty description (no claim title produced) - 2. candidate_slug matches YYYY-MM-DD-...-HASH4 (inbox filename pattern) + inbox/archive/ but didn't produce a claim. Session-digest PRs are + agent research/entity commits with no per-claim description — they + represent session-level rollups, not specific knowledge artifacts. """ commit_type_l = (commit_type or "").lower() branch = branch or "" @@ -60,6 +59,12 @@ def _classify_event(branch, description, commit_type, candidate_slug=None): or branch.startswith("reweave/")): return "enrich" + # Research and entity commits with no description are session-level + # rollups (e.g. astra/research-2026-05-11). They have no claim to + # link to — surface as session_digest, not as a phantom create. + if commit_type_l in ("research", "entity") and not has_desc: + return "session_digest" + # Source-only: extract/* with no claim description means inbox archive # landed but no domain claim was written. if branch.startswith("extract/") and not has_desc: @@ -76,6 +81,48 @@ def _classify_event(branch, description, commit_type, candidate_slug=None): return "create" +# Internal classifier value -> canonical `kind` enum returned to frontend. +_KIND_MAP = { + "create": "claim_merged", + "enrich": "claim_enriched", + "challenge": "claim_challenged", + "source": "source_archived", + "session_digest": "session_digest", +} + + +def _archive_slug_from_branch(branch): + """For extract/YYYY-MM-DD-...-HASH4, return YYYY-MM-DD-... (keep date, + drop the 4-hex hash suffix). Matches inbox/archive filename convention. + """ + if not branch or "/" not in branch: + return "" + slug = branch.split("/", 1)[1] + return re.sub(r"-[a-f0-9]{4}$", "", slug) + + +def _source_target_url(domain, archive_slug): + """Forgejo blob URL for an archived source file. Falls back to the + repo-wide inbox/archive directory when domain is unknown so the link + still resolves to something useful instead of a 404. + """ + if not archive_slug: + return None + domain = (domain or "").strip() + if not domain or domain == "unknown": + return "https://git.livingip.xyz/teleo/teleo-codex/src/branch/main/inbox/archive" + return ( + "https://git.livingip.xyz/teleo/teleo-codex/src/branch/main/inbox/archive/" + f"{domain}/{archive_slug}.md" + ) + + +def _claim_target_url(claim_slug): + if not claim_slug: + return None + return f"/claims/{claim_slug}" + + def _normalize_contributor(submitted_by, agent): if submitted_by and submitted_by.strip(): name = submitted_by.strip().lstrip("@") @@ -152,23 +199,50 @@ def _build_events(): contributor = _normalize_contributor(row["submitted_by"], row["agent"]) merged_at = row["merged_at"] or "" + domain = row["domain"] or "unknown" + kind = _KIND_MAP.get(event_type, event_type) - ci_map = {"create": 0.35, "enrich": 0.25, "challenge": 0.40, "source": 0.15} + ci_map = { + "create": 0.35, "enrich": 0.25, "challenge": 0.40, + "source": 0.15, "session_digest": 0.05, + } ci_earned = ci_map.get(event_type, 0) - # Source events never carry a claim_slug — no claim was written — - # so the frontend can't produce a 404-ing claim link. + # Source events never carry a claim_slug — no claim was written. + # target_url points at the archived file on Forgejo instead. if event_type == "source": + archive_slug = _archive_slug_from_branch(row["branch"]) summary_text = _summary_from_branch(row["branch"]) - source_slug = ( - _summary_from_branch(row["branch"]).lower().replace(" ", "-") - or row["branch"] + source_display_slug = ( + summary_text.lower().replace(" ", "-") or row["branch"] ) events.append({ + "kind": kind, "type": "source", + "target_url": _source_target_url(domain, archive_slug), "claim_slug": "", - "source_slug": source_slug, - "domain": row["domain"] or "unknown", + "source_slug": source_display_slug, + "domain": domain, + "contributor": contributor, + "timestamp": merged_at, + "ci_earned": round(ci_earned, 2), + "summary": summary_text, + "pr_number": row["number"], + "source_channel": row["source_channel"] or "unknown", + }) + continue + + # Session digests have no clickthrough surface yet (per-agent + # session pages not built). target_url=null so frontend renders + # plain text instead of a broken /claims/research-... link. + if event_type == "session_digest": + summary_text = _summary_from_branch(row["branch"]) or "Research session" + events.append({ + "kind": kind, + "type": "session_digest", + "target_url": None, + "claim_slug": "", + "domain": domain, "contributor": contributor, "timestamp": merged_at, "ci_earned": round(ci_earned, 2), @@ -202,9 +276,11 @@ def _build_events(): for slug in (slugs[:1] if slugs else [""]): events.append({ + "kind": kind, "type": event_type, + "target_url": _claim_target_url(slug), "claim_slug": slug, - "domain": row["domain"] or "unknown", + "domain": domain, "contributor": contributor, "timestamp": merged_at, "ci_earned": round(ci_earned, 2), @@ -234,8 +310,11 @@ def _sort_events(events, claim_activity, sort_mode, now_ts): return _hot_score(ca["challenges"], ca["enriches"], ca["signals"], hours) events.sort(key=hot_key, reverse=True) elif sort_mode == "important": - type_rank = {"challenge": 0, "enrich": 1, "create": 2, "source": 3} - events.sort(key=lambda e: (type_rank.get(e["type"], 4), -len(e["summary"]))) + type_rank = { + "challenge": 0, "enrich": 1, "create": 2, + "source": 3, "session_digest": 4, + } + events.sort(key=lambda e: (type_rank.get(e["type"], 5), -len(e["summary"]))) return events @@ -269,7 +348,13 @@ async def handle_activity_feed(request): if contributor: filtered = [e for e in filtered if e["contributor"] == contributor] if type_filter: - filtered = [e for e in filtered if e["type"] in type_filter] + # Accept both legacy `type` values (create/enrich/challenge/source/ + # session_digest) and canonical `kind` values (claim_merged/etc.) so + # callers can migrate at their own pace. + filtered = [ + e for e in filtered + if e["type"] in type_filter or e.get("kind") in type_filter + ] sorted_events = _sort_events(list(filtered), claim_activity, sort_mode, now) total = len(sorted_events)