feat: add /api/contributor-growth endpoint + cumulative growth script

Adds async git-log-based endpoint for cumulative contributor and claim tracking. 5-minute cache, excludes bot accounts, tags founding contributors. Standalone CLI script also included for ad-hoc data generation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 22:19:42 +01:00 · 2026-04-20 22:19:42 +01:00 · 9505e5b40a
commit 9505e5b40a
parent f0cf772182
2 changed files with 333 additions and 0 deletions
--- a/diagnostics/dashboard_routes.py
+++ b/diagnostics/dashboard_routes.py
@ -10,6 +10,7 @@ Endpoints:
 Owner: Argus
 """

+import asyncio
 import json
 import logging
 import os
@ -17,6 +18,7 @@ import sqlite3
 import statistics
 import time
 import urllib.request
+from collections import defaultdict
 from datetime import datetime, timezone
 from pathlib import Path

@ -1182,6 +1184,113 @@ async def handle_telegram_extractions(request):
        conn.close()


+# ─── GET /api/contributor-growth ─────────────────────────────────────────
+
+CODEX_WORKTREE = Path(os.environ.get("MAIN_WORKTREE", "/opt/teleo-eval/workspaces/main"))
+FOUNDING_CUTOFF = "2026-03-15"
+CONTRIBUTOR_EXCLUDE = {"Teleo Agents", "Teleo Pipeline"}
+
+_growth_cache: dict | None = None
+_growth_cache_ts: float = 0
+GROWTH_CACHE_TTL = 300
+
+
+async def handle_contributor_growth(request):
+    """Cumulative unique contributors and claims over time from git log.
+
+    Returns time-series data for Chart.js line charts.
+    Cached for 5 minutes since git log is expensive.
+    """
+    global _growth_cache, _growth_cache_ts
+    now = time.monotonic()
+    if _growth_cache is not None and (now - _growth_cache_ts) < GROWTH_CACHE_TTL:
+        return web.json_response(_growth_cache)
+
+    codex_path = str(CODEX_WORKTREE)
+    if not CODEX_WORKTREE.exists():
+        return web.json_response(
+            {"error": "codex worktree not found", "path": codex_path}, status=404
+        )
+
+    proc = await asyncio.create_subprocess_exec(
+        "git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all",
+        cwd=codex_path,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, stderr = await proc.communicate()
+    if proc.returncode != 0:
+        return web.json_response(
+            {"error": "git log failed", "detail": stderr.decode()[:500]}, status=500
+        )
+
+    first_seen: dict[str, str] = {}
+    daily_commits: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+    for line in stdout.decode().strip().split("\n"):
+        if "|" not in line:
+            continue
+        date, author = line.split("|", 1)
+        if author in CONTRIBUTOR_EXCLUDE:
+            continue
+        daily_commits[date][author] += 1
+        if author not in first_seen or date < first_seen[author]:
+            first_seen[author] = date
+
+    by_date: dict[str, list[str]] = defaultdict(list)
+    for author, date in first_seen.items():
+        by_date[date].append(author)
+
+    contributors_timeline = []
+    seen: set[str] = set()
+    for date in sorted(by_date.keys()):
+        new_authors = by_date[date]
+        seen.update(new_authors)
+        contributors_timeline.append({
+            "date": date,
+            "cumulative": len(seen),
+            "new": [{"name": a, "founding": date <= FOUNDING_CUTOFF} for a in sorted(new_authors)],
+        })
+
+    proc2 = await asyncio.create_subprocess_exec(
+        "git", "log", "--format=%ad", "--date=format:%Y-%m-%d",
+        "--all", "--diff-filter=A", "--", "domains/*.md",
+        cwd=codex_path,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout2, _ = await proc2.communicate()
+    claim_counts: dict[str, int] = defaultdict(int)
+    for line in stdout2.decode().strip().split("\n"):
+        line = line.strip()
+        if line:
+            claim_counts[line] += 1
+
+    claims_timeline = []
+    cumulative = 0
+    for date in sorted(claim_counts.keys()):
+        cumulative += claim_counts[date]
+        claims_timeline.append({"date": date, "cumulative": cumulative, "added": claim_counts[date]})
+
+    all_contributors = set(first_seen.keys())
+    founding = sorted(a for a in all_contributors if first_seen[a] <= FOUNDING_CUTOFF)
+
+    result = {
+        "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "summary": {
+            "total_contributors": len(all_contributors),
+            "founding_contributors": founding,
+            "total_claims": cumulative,
+            "days_active": (datetime.now(timezone.utc) - datetime(2026, 3, 5, tzinfo=timezone.utc)).days,
+        },
+        "cumulative_contributors": contributors_timeline,
+        "cumulative_claims": claims_timeline,
+    }
+
+    _growth_cache = result
+    _growth_cache_ts = now
+    return web.json_response(result)
+
+
 # ─── Registration ──────────────────────────────────────────────────────────

 def register_dashboard_routes(app: web.Application, get_conn):
@ -1199,3 +1308,4 @@ def register_dashboard_routes(app: web.Application, get_conn):
    app.router.add_get("/api/growth", handle_growth)
    app.router.add_get("/api/pr-lifecycle", handle_pr_lifecycle)
    app.router.add_get("/api/telegram-extractions", handle_telegram_extractions)
+    app.router.add_get("/api/contributor-growth", handle_contributor_growth)
--- a/scripts/cumulative-growth.py
+++ b/scripts/cumulative-growth.py
@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+"""Generate cumulative growth time-series data for public dashboard.
+
+Produces JSON with three series:
+  - cumulative_contributors: unique git authors over time
+  - cumulative_claims: domain claim files added over time
+  - github_stars: star count snapshots (requires GitHub API)
+
+Data sources: git log (codex repo), GitHub API.
+Output: JSON to stdout or file, suitable for Chart.js line charts.
+
+Usage:
+  python3 cumulative-growth.py --codex-path /path/to/teleo-codex [--output /path/to/output.json]
+  python3 cumulative-growth.py --codex-path /path/to/teleo-codex --format csv
+"""
+
+import argparse
+import json
+import subprocess
+import sys
+from collections import defaultdict
+from datetime import datetime, timedelta
+
+# Map bot/service accounts to their human principal or exclude them.
+# "Teleo Agents" and "Teleo Pipeline" are bot accounts — attribute to system.
+CONTRIBUTOR_ALIASES = {
+    "Teleo Agents": None,   # system automation, not a contributor
+    "Teleo Pipeline": None, # pipeline bot
+}
+
+# Founding contributors get a badge — anyone who contributed before this date.
+FOUNDING_CUTOFF = "2026-03-15"
+
+
+def git_log_contributors(codex_path: str) -> list[dict]:
+    """Extract per-commit author and date from git log."""
+    result = subprocess.run(
+        ["git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all"],
+        capture_output=True, text=True, cwd=codex_path
+    )
+    if result.returncode != 0:
+        print(f"git log failed: {result.stderr}", file=sys.stderr)
+        sys.exit(1)
+
+    entries = []
+    for line in result.stdout.strip().split("\n"):
+        if "|" not in line:
+            continue
+        date, author = line.split("|", 1)
+        canonical = CONTRIBUTOR_ALIASES.get(author, author)
+        if canonical is None:
+            continue
+        entries.append({"date": date, "author": canonical})
+    return entries
+
+
+def git_log_claims(codex_path: str) -> list[dict]:
+    """Extract claim file additions over time from git log."""
+    result = subprocess.run(
+        ["git", "log", "--format=%ad", "--date=format:%Y-%m-%d",
+         "--all", "--diff-filter=A", "--", "domains/*.md"],
+        capture_output=True, text=True, cwd=codex_path
+    )
+    if result.returncode != 0:
+        print(f"git log failed: {result.stderr}", file=sys.stderr)
+        sys.exit(1)
+
+    counts = defaultdict(int)
+    for line in result.stdout.strip().split("\n"):
+        line = line.strip()
+        if line:
+            counts[line] += 1
+    return [{"date": d, "count": c} for d, c in sorted(counts.items())]
+
+
+def github_stars(repo: str = "living-ip/teleo-codex") -> int | None:
+    """Fetch current star count from GitHub API. Returns None on failure."""
+    try:
+        result = subprocess.run(
+            ["gh", "api", f"repos/{repo}", "--jq", ".stargazers_count"],
+            capture_output=True, text=True, timeout=10
+        )
+        if result.returncode == 0:
+            return int(result.stdout.strip())
+    except (subprocess.TimeoutExpired, ValueError):
+        pass
+    return None
+
+
+def build_cumulative_contributors(entries: list[dict]) -> list[dict]:
+    """Build cumulative unique contributor count by date."""
+    first_seen = {}
+    for e in entries:
+        author, date = e["author"], e["date"]
+        if author not in first_seen or date < first_seen[author]:
+            first_seen[author] = date
+
+    by_date = defaultdict(list)
+    for author, date in first_seen.items():
+        by_date[date].append(author)
+
+    timeline = []
+    seen = set()
+    for date in sorted(by_date.keys()):
+        new_authors = by_date[date]
+        seen.update(new_authors)
+        is_founding = date <= FOUNDING_CUTOFF
+        timeline.append({
+            "date": date,
+            "cumulative": len(seen),
+            "new": [
+                {"name": a, "founding": is_founding}
+                for a in sorted(new_authors)
+            ],
+        })
+    return timeline
+
+
+def build_cumulative_claims(claim_entries: list[dict]) -> list[dict]:
+    """Build cumulative claim count by date."""
+    timeline = []
+    cumulative = 0
+    for entry in claim_entries:
+        cumulative += entry["count"]
+        timeline.append({
+            "date": entry["date"],
+            "cumulative": cumulative,
+            "added": entry["count"],
+        })
+    return timeline
+
+
+def build_daily_commits(entries: list[dict]) -> list[dict]:
+    """Build daily commit volume by contributor."""
+    daily = defaultdict(lambda: defaultdict(int))
+    for e in entries:
+        daily[e["date"]][e["author"]] += 1
+
+    timeline = []
+    for date in sorted(daily.keys()):
+        authors = daily[date]
+        timeline.append({
+            "date": date,
+            "total": sum(authors.values()),
+            "by_contributor": dict(sorted(authors.items())),
+        })
+    return timeline
+
+
+def generate_report(codex_path: str) -> dict:
+    entries = git_log_contributors(codex_path)
+    claim_entries = git_log_claims(codex_path)
+    stars = github_stars()
+
+    contributors_timeline = build_cumulative_contributors(entries)
+    claims_timeline = build_cumulative_claims(claim_entries)
+    commits_timeline = build_daily_commits(entries)
+
+    all_contributors = set(e["author"] for e in entries)
+    founding = [
+        a for a in all_contributors
+        if any(
+            e["date"] <= FOUNDING_CUTOFF and e["author"] == a
+            for e in entries
+        )
+    ]
+
+    return {
+        "generated_at": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "summary": {
+            "total_contributors": len(all_contributors),
+            "founding_contributors": sorted(founding),
+            "total_claims": claims_timeline[-1]["cumulative"] if claims_timeline else 0,
+            "github_stars": stars,
+            "codex_start_date": "2026-03-05",
+            "days_active": (datetime.utcnow() - datetime(2026, 3, 5)).days,
+        },
+        "cumulative_contributors": contributors_timeline,
+        "cumulative_claims": claims_timeline,
+        "daily_activity": commits_timeline,
+    }
+
+
+def format_csv(report: dict) -> str:
+    lines = ["date,cumulative_contributors,cumulative_claims"]
+    contrib_map = {e["date"]: e["cumulative"] for e in report["cumulative_contributors"]}
+    claims_map = {e["date"]: e["cumulative"] for e in report["cumulative_claims"]}
+
+    all_dates = sorted(set(list(contrib_map.keys()) + list(claims_map.keys())))
+
+    last_contrib = 0
+    last_claims = 0
+    for d in all_dates:
+        last_contrib = contrib_map.get(d, last_contrib)
+        last_claims = claims_map.get(d, last_claims)
+        lines.append(f"{d},{last_contrib},{last_claims}")
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate cumulative growth data")
+    parser.add_argument("--codex-path", required=True, help="Path to teleo-codex repo")
+    parser.add_argument("--output", help="Output file path (default: stdout)")
+    parser.add_argument("--format", choices=["json", "csv"], default="json")
+    args = parser.parse_args()
+
+    report = generate_report(args.codex_path)
+
+    if args.format == "csv":
+        output = format_csv(report)
+    else:
+        output = json.dumps(report, indent=2)
+
+    if args.output:
+        with open(args.output, "w") as f:
+            f.write(output)
+        print(f"Written to {args.output}", file=sys.stderr)
+    else:
+        print(output)
+
+
+if __name__ == "__main__":
+    main()