feat: atomic extract-and-connect + stale PR monitor + response audit #4

Merged
m3taversal merged 70 commits from epimetheus/atomic-connect-and-stale-monitor into main 2026-03-30 11:03:35 +00:00
Showing only changes of commit 1dfc6dcc5c - Show all commits

View file

@ -142,17 +142,43 @@ _DOMAIN_KEYWORDS = {
}
def _classify_content(text: str) -> tuple[str, list[str]]:
"""Classify content into domain + sub-tags based on keywords.
# Author handle → domain map (Ganymede: counts as 1 keyword match)
_AUTHOR_DOMAIN_MAP = {
"metadaoproject": "internet-finance",
"metadaofi": "internet-finance",
"futardio": "internet-finance",
"p2pdotme": "internet-finance",
"oxranga": "internet-finance",
"metanallok": "internet-finance",
"proph3t_": "internet-finance",
"01resolved": "internet-finance",
"anthropicai": "ai-alignment",
"openai": "ai-alignment",
"daborai": "ai-alignment",
"deepmind": "ai-alignment",
"spacex": "space-development",
"blaborig": "space-development",
"nasa": "space-development",
}
def _classify_content(text: str, author: str = "") -> tuple[str, list[str]]:
"""Classify content into domain + sub-tags based on keywords + author.
Returns (domain, [sub-tags]). Default: internet-finance with no sub-tags.
"""
text_lower = text.lower()
author_lower = author.lower().lstrip("@")
# Author handle gives 1 keyword match toward domain threshold
author_domain = _AUTHOR_DOMAIN_MAP.get(author_lower, "")
# Check non-IF domains first
for domain, keywords in _DOMAIN_KEYWORDS.items():
matches = sum(1 for kw in keywords if kw in text_lower)
if matches >= 2: # Need 2+ keyword matches to override default domain
if author_domain == domain:
matches += 1 # Author signal counts as 1 match
if matches >= 2:
return domain, []
# Default to internet-finance, classify sub-topics
@ -1234,15 +1260,19 @@ def _extract_urls(text: str) -> list[str]:
def _archive_exchange(user_text: str, rio_response: str, user, msg,
url_content: str | None = None, urls: list[str] | None = None):
"""Archive a tagged exchange to inbox/queue/ for pipeline processing."""
"""Archive a tagged exchange. Conversations go to telegram-archives/conversations/
(not queue skips extraction). Sources with URLs already have standalone files."""
try:
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
username = user.username if user else "anonymous"
slug = re.sub(r"[^a-z0-9]+", "-", user_text[:50].lower()).strip("-")
filename = f"{date_str}-telegram-{username}-{slug}.md"
archive_path = Path(ARCHIVE_DIR) / filename
archive_path.parent.mkdir(parents=True, exist_ok=True)
# Conversations go to conversations/ subdir (Ganymede: skip extraction at source).
# The cron only moves top-level ARCHIVE_DIR/*.md to queue — subdirs are untouched.
conv_dir = Path(ARCHIVE_DIR) / "conversations"
conv_dir.mkdir(parents=True, exist_ok=True)
archive_path = conv_dir / filename
# Extract rationale (the user's text minus the @mention and URL)
rationale = re.sub(r"@\w+", "", user_text).strip()