From 9505e5b40a0c207815efbaf7bf7cd5fe4dd5980d Mon Sep 17 00:00:00 2001 From: m3taversal Date: Mon, 20 Apr 2026 22:19:42 +0100 Subject: [PATCH] feat: add /api/contributor-growth endpoint + cumulative growth script Adds async git-log-based endpoint for cumulative contributor and claim tracking. 5-minute cache, excludes bot accounts, tags founding contributors. Standalone CLI script also included for ad-hoc data generation. Co-Authored-By: Claude Opus 4.6 (1M context) --- diagnostics/dashboard_routes.py | 110 ++++++++++++++++ scripts/cumulative-growth.py | 223 ++++++++++++++++++++++++++++++++ 2 files changed, 333 insertions(+) create mode 100644 scripts/cumulative-growth.py diff --git a/diagnostics/dashboard_routes.py b/diagnostics/dashboard_routes.py index 7e95309..40eff47 100644 --- a/diagnostics/dashboard_routes.py +++ b/diagnostics/dashboard_routes.py @@ -10,6 +10,7 @@ Endpoints: Owner: Argus """ +import asyncio import json import logging import os @@ -17,6 +18,7 @@ import sqlite3 import statistics import time import urllib.request +from collections import defaultdict from datetime import datetime, timezone from pathlib import Path @@ -1182,6 +1184,113 @@ async def handle_telegram_extractions(request): conn.close() +# ─── GET /api/contributor-growth ───────────────────────────────────────── + +CODEX_WORKTREE = Path(os.environ.get("MAIN_WORKTREE", "/opt/teleo-eval/workspaces/main")) +FOUNDING_CUTOFF = "2026-03-15" +CONTRIBUTOR_EXCLUDE = {"Teleo Agents", "Teleo Pipeline"} + +_growth_cache: dict | None = None +_growth_cache_ts: float = 0 +GROWTH_CACHE_TTL = 300 + + +async def handle_contributor_growth(request): + """Cumulative unique contributors and claims over time from git log. + + Returns time-series data for Chart.js line charts. + Cached for 5 minutes since git log is expensive. + """ + global _growth_cache, _growth_cache_ts + now = time.monotonic() + if _growth_cache is not None and (now - _growth_cache_ts) < GROWTH_CACHE_TTL: + return web.json_response(_growth_cache) + + codex_path = str(CODEX_WORKTREE) + if not CODEX_WORKTREE.exists(): + return web.json_response( + {"error": "codex worktree not found", "path": codex_path}, status=404 + ) + + proc = await asyncio.create_subprocess_exec( + "git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all", + cwd=codex_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await proc.communicate() + if proc.returncode != 0: + return web.json_response( + {"error": "git log failed", "detail": stderr.decode()[:500]}, status=500 + ) + + first_seen: dict[str, str] = {} + daily_commits: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int)) + for line in stdout.decode().strip().split("\n"): + if "|" not in line: + continue + date, author = line.split("|", 1) + if author in CONTRIBUTOR_EXCLUDE: + continue + daily_commits[date][author] += 1 + if author not in first_seen or date < first_seen[author]: + first_seen[author] = date + + by_date: dict[str, list[str]] = defaultdict(list) + for author, date in first_seen.items(): + by_date[date].append(author) + + contributors_timeline = [] + seen: set[str] = set() + for date in sorted(by_date.keys()): + new_authors = by_date[date] + seen.update(new_authors) + contributors_timeline.append({ + "date": date, + "cumulative": len(seen), + "new": [{"name": a, "founding": date <= FOUNDING_CUTOFF} for a in sorted(new_authors)], + }) + + proc2 = await asyncio.create_subprocess_exec( + "git", "log", "--format=%ad", "--date=format:%Y-%m-%d", + "--all", "--diff-filter=A", "--", "domains/*.md", + cwd=codex_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout2, _ = await proc2.communicate() + claim_counts: dict[str, int] = defaultdict(int) + for line in stdout2.decode().strip().split("\n"): + line = line.strip() + if line: + claim_counts[line] += 1 + + claims_timeline = [] + cumulative = 0 + for date in sorted(claim_counts.keys()): + cumulative += claim_counts[date] + claims_timeline.append({"date": date, "cumulative": cumulative, "added": claim_counts[date]}) + + all_contributors = set(first_seen.keys()) + founding = sorted(a for a in all_contributors if first_seen[a] <= FOUNDING_CUTOFF) + + result = { + "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "summary": { + "total_contributors": len(all_contributors), + "founding_contributors": founding, + "total_claims": cumulative, + "days_active": (datetime.now(timezone.utc) - datetime(2026, 3, 5, tzinfo=timezone.utc)).days, + }, + "cumulative_contributors": contributors_timeline, + "cumulative_claims": claims_timeline, + } + + _growth_cache = result + _growth_cache_ts = now + return web.json_response(result) + + # ─── Registration ────────────────────────────────────────────────────────── def register_dashboard_routes(app: web.Application, get_conn): @@ -1199,3 +1308,4 @@ def register_dashboard_routes(app: web.Application, get_conn): app.router.add_get("/api/growth", handle_growth) app.router.add_get("/api/pr-lifecycle", handle_pr_lifecycle) app.router.add_get("/api/telegram-extractions", handle_telegram_extractions) + app.router.add_get("/api/contributor-growth", handle_contributor_growth) diff --git a/scripts/cumulative-growth.py b/scripts/cumulative-growth.py new file mode 100644 index 0000000..4217282 --- /dev/null +++ b/scripts/cumulative-growth.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +"""Generate cumulative growth time-series data for public dashboard. + +Produces JSON with three series: + - cumulative_contributors: unique git authors over time + - cumulative_claims: domain claim files added over time + - github_stars: star count snapshots (requires GitHub API) + +Data sources: git log (codex repo), GitHub API. +Output: JSON to stdout or file, suitable for Chart.js line charts. + +Usage: + python3 cumulative-growth.py --codex-path /path/to/teleo-codex [--output /path/to/output.json] + python3 cumulative-growth.py --codex-path /path/to/teleo-codex --format csv +""" + +import argparse +import json +import subprocess +import sys +from collections import defaultdict +from datetime import datetime, timedelta + +# Map bot/service accounts to their human principal or exclude them. +# "Teleo Agents" and "Teleo Pipeline" are bot accounts — attribute to system. +CONTRIBUTOR_ALIASES = { + "Teleo Agents": None, # system automation, not a contributor + "Teleo Pipeline": None, # pipeline bot +} + +# Founding contributors get a badge — anyone who contributed before this date. +FOUNDING_CUTOFF = "2026-03-15" + + +def git_log_contributors(codex_path: str) -> list[dict]: + """Extract per-commit author and date from git log.""" + result = subprocess.run( + ["git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all"], + capture_output=True, text=True, cwd=codex_path + ) + if result.returncode != 0: + print(f"git log failed: {result.stderr}", file=sys.stderr) + sys.exit(1) + + entries = [] + for line in result.stdout.strip().split("\n"): + if "|" not in line: + continue + date, author = line.split("|", 1) + canonical = CONTRIBUTOR_ALIASES.get(author, author) + if canonical is None: + continue + entries.append({"date": date, "author": canonical}) + return entries + + +def git_log_claims(codex_path: str) -> list[dict]: + """Extract claim file additions over time from git log.""" + result = subprocess.run( + ["git", "log", "--format=%ad", "--date=format:%Y-%m-%d", + "--all", "--diff-filter=A", "--", "domains/*.md"], + capture_output=True, text=True, cwd=codex_path + ) + if result.returncode != 0: + print(f"git log failed: {result.stderr}", file=sys.stderr) + sys.exit(1) + + counts = defaultdict(int) + for line in result.stdout.strip().split("\n"): + line = line.strip() + if line: + counts[line] += 1 + return [{"date": d, "count": c} for d, c in sorted(counts.items())] + + +def github_stars(repo: str = "living-ip/teleo-codex") -> int | None: + """Fetch current star count from GitHub API. Returns None on failure.""" + try: + result = subprocess.run( + ["gh", "api", f"repos/{repo}", "--jq", ".stargazers_count"], + capture_output=True, text=True, timeout=10 + ) + if result.returncode == 0: + return int(result.stdout.strip()) + except (subprocess.TimeoutExpired, ValueError): + pass + return None + + +def build_cumulative_contributors(entries: list[dict]) -> list[dict]: + """Build cumulative unique contributor count by date.""" + first_seen = {} + for e in entries: + author, date = e["author"], e["date"] + if author not in first_seen or date < first_seen[author]: + first_seen[author] = date + + by_date = defaultdict(list) + for author, date in first_seen.items(): + by_date[date].append(author) + + timeline = [] + seen = set() + for date in sorted(by_date.keys()): + new_authors = by_date[date] + seen.update(new_authors) + is_founding = date <= FOUNDING_CUTOFF + timeline.append({ + "date": date, + "cumulative": len(seen), + "new": [ + {"name": a, "founding": is_founding} + for a in sorted(new_authors) + ], + }) + return timeline + + +def build_cumulative_claims(claim_entries: list[dict]) -> list[dict]: + """Build cumulative claim count by date.""" + timeline = [] + cumulative = 0 + for entry in claim_entries: + cumulative += entry["count"] + timeline.append({ + "date": entry["date"], + "cumulative": cumulative, + "added": entry["count"], + }) + return timeline + + +def build_daily_commits(entries: list[dict]) -> list[dict]: + """Build daily commit volume by contributor.""" + daily = defaultdict(lambda: defaultdict(int)) + for e in entries: + daily[e["date"]][e["author"]] += 1 + + timeline = [] + for date in sorted(daily.keys()): + authors = daily[date] + timeline.append({ + "date": date, + "total": sum(authors.values()), + "by_contributor": dict(sorted(authors.items())), + }) + return timeline + + +def generate_report(codex_path: str) -> dict: + entries = git_log_contributors(codex_path) + claim_entries = git_log_claims(codex_path) + stars = github_stars() + + contributors_timeline = build_cumulative_contributors(entries) + claims_timeline = build_cumulative_claims(claim_entries) + commits_timeline = build_daily_commits(entries) + + all_contributors = set(e["author"] for e in entries) + founding = [ + a for a in all_contributors + if any( + e["date"] <= FOUNDING_CUTOFF and e["author"] == a + for e in entries + ) + ] + + return { + "generated_at": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), + "summary": { + "total_contributors": len(all_contributors), + "founding_contributors": sorted(founding), + "total_claims": claims_timeline[-1]["cumulative"] if claims_timeline else 0, + "github_stars": stars, + "codex_start_date": "2026-03-05", + "days_active": (datetime.utcnow() - datetime(2026, 3, 5)).days, + }, + "cumulative_contributors": contributors_timeline, + "cumulative_claims": claims_timeline, + "daily_activity": commits_timeline, + } + + +def format_csv(report: dict) -> str: + lines = ["date,cumulative_contributors,cumulative_claims"] + contrib_map = {e["date"]: e["cumulative"] for e in report["cumulative_contributors"]} + claims_map = {e["date"]: e["cumulative"] for e in report["cumulative_claims"]} + + all_dates = sorted(set(list(contrib_map.keys()) + list(claims_map.keys()))) + + last_contrib = 0 + last_claims = 0 + for d in all_dates: + last_contrib = contrib_map.get(d, last_contrib) + last_claims = claims_map.get(d, last_claims) + lines.append(f"{d},{last_contrib},{last_claims}") + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser(description="Generate cumulative growth data") + parser.add_argument("--codex-path", required=True, help="Path to teleo-codex repo") + parser.add_argument("--output", help="Output file path (default: stdout)") + parser.add_argument("--format", choices=["json", "csv"], default="json") + args = parser.parse_args() + + report = generate_report(args.codex_path) + + if args.format == "csv": + output = format_csv(report) + else: + output = json.dumps(report, indent=2) + + if args.output: + with open(args.output, "w") as f: + f.write(output) + print(f"Written to {args.output}", file=sys.stderr) + else: + print(output) + + +if __name__ == "__main__": + main()