feat: add /api/contributor-growth endpoint + cumulative growth script

Adds async git-log-based endpoint for cumulative contributor and claim
tracking. 5-minute cache, excludes bot accounts, tags founding contributors.
Standalone CLI script also included for ad-hoc data generation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
m3taversal 2026-04-20 22:19:42 +01:00
parent f0cf772182
commit 9505e5b40a
2 changed files with 333 additions and 0 deletions

View file

@ -10,6 +10,7 @@ Endpoints:
Owner: Argus Owner: Argus
""" """
import asyncio
import json import json
import logging import logging
import os import os
@ -17,6 +18,7 @@ import sqlite3
import statistics import statistics
import time import time
import urllib.request import urllib.request
from collections import defaultdict
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
@ -1182,6 +1184,113 @@ async def handle_telegram_extractions(request):
conn.close() conn.close()
# ─── GET /api/contributor-growth ─────────────────────────────────────────
CODEX_WORKTREE = Path(os.environ.get("MAIN_WORKTREE", "/opt/teleo-eval/workspaces/main"))
FOUNDING_CUTOFF = "2026-03-15"
CONTRIBUTOR_EXCLUDE = {"Teleo Agents", "Teleo Pipeline"}
_growth_cache: dict | None = None
_growth_cache_ts: float = 0
GROWTH_CACHE_TTL = 300
async def handle_contributor_growth(request):
"""Cumulative unique contributors and claims over time from git log.
Returns time-series data for Chart.js line charts.
Cached for 5 minutes since git log is expensive.
"""
global _growth_cache, _growth_cache_ts
now = time.monotonic()
if _growth_cache is not None and (now - _growth_cache_ts) < GROWTH_CACHE_TTL:
return web.json_response(_growth_cache)
codex_path = str(CODEX_WORKTREE)
if not CODEX_WORKTREE.exists():
return web.json_response(
{"error": "codex worktree not found", "path": codex_path}, status=404
)
proc = await asyncio.create_subprocess_exec(
"git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all",
cwd=codex_path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
return web.json_response(
{"error": "git log failed", "detail": stderr.decode()[:500]}, status=500
)
first_seen: dict[str, str] = {}
daily_commits: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
for line in stdout.decode().strip().split("\n"):
if "|" not in line:
continue
date, author = line.split("|", 1)
if author in CONTRIBUTOR_EXCLUDE:
continue
daily_commits[date][author] += 1
if author not in first_seen or date < first_seen[author]:
first_seen[author] = date
by_date: dict[str, list[str]] = defaultdict(list)
for author, date in first_seen.items():
by_date[date].append(author)
contributors_timeline = []
seen: set[str] = set()
for date in sorted(by_date.keys()):
new_authors = by_date[date]
seen.update(new_authors)
contributors_timeline.append({
"date": date,
"cumulative": len(seen),
"new": [{"name": a, "founding": date <= FOUNDING_CUTOFF} for a in sorted(new_authors)],
})
proc2 = await asyncio.create_subprocess_exec(
"git", "log", "--format=%ad", "--date=format:%Y-%m-%d",
"--all", "--diff-filter=A", "--", "domains/*.md",
cwd=codex_path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout2, _ = await proc2.communicate()
claim_counts: dict[str, int] = defaultdict(int)
for line in stdout2.decode().strip().split("\n"):
line = line.strip()
if line:
claim_counts[line] += 1
claims_timeline = []
cumulative = 0
for date in sorted(claim_counts.keys()):
cumulative += claim_counts[date]
claims_timeline.append({"date": date, "cumulative": cumulative, "added": claim_counts[date]})
all_contributors = set(first_seen.keys())
founding = sorted(a for a in all_contributors if first_seen[a] <= FOUNDING_CUTOFF)
result = {
"generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
"summary": {
"total_contributors": len(all_contributors),
"founding_contributors": founding,
"total_claims": cumulative,
"days_active": (datetime.now(timezone.utc) - datetime(2026, 3, 5, tzinfo=timezone.utc)).days,
},
"cumulative_contributors": contributors_timeline,
"cumulative_claims": claims_timeline,
}
_growth_cache = result
_growth_cache_ts = now
return web.json_response(result)
# ─── Registration ────────────────────────────────────────────────────────── # ─── Registration ──────────────────────────────────────────────────────────
def register_dashboard_routes(app: web.Application, get_conn): def register_dashboard_routes(app: web.Application, get_conn):
@ -1199,3 +1308,4 @@ def register_dashboard_routes(app: web.Application, get_conn):
app.router.add_get("/api/growth", handle_growth) app.router.add_get("/api/growth", handle_growth)
app.router.add_get("/api/pr-lifecycle", handle_pr_lifecycle) app.router.add_get("/api/pr-lifecycle", handle_pr_lifecycle)
app.router.add_get("/api/telegram-extractions", handle_telegram_extractions) app.router.add_get("/api/telegram-extractions", handle_telegram_extractions)
app.router.add_get("/api/contributor-growth", handle_contributor_growth)

View file

@ -0,0 +1,223 @@
#!/usr/bin/env python3
"""Generate cumulative growth time-series data for public dashboard.
Produces JSON with three series:
- cumulative_contributors: unique git authors over time
- cumulative_claims: domain claim files added over time
- github_stars: star count snapshots (requires GitHub API)
Data sources: git log (codex repo), GitHub API.
Output: JSON to stdout or file, suitable for Chart.js line charts.
Usage:
python3 cumulative-growth.py --codex-path /path/to/teleo-codex [--output /path/to/output.json]
python3 cumulative-growth.py --codex-path /path/to/teleo-codex --format csv
"""
import argparse
import json
import subprocess
import sys
from collections import defaultdict
from datetime import datetime, timedelta
# Map bot/service accounts to their human principal or exclude them.
# "Teleo Agents" and "Teleo Pipeline" are bot accounts — attribute to system.
CONTRIBUTOR_ALIASES = {
"Teleo Agents": None, # system automation, not a contributor
"Teleo Pipeline": None, # pipeline bot
}
# Founding contributors get a badge — anyone who contributed before this date.
FOUNDING_CUTOFF = "2026-03-15"
def git_log_contributors(codex_path: str) -> list[dict]:
"""Extract per-commit author and date from git log."""
result = subprocess.run(
["git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all"],
capture_output=True, text=True, cwd=codex_path
)
if result.returncode != 0:
print(f"git log failed: {result.stderr}", file=sys.stderr)
sys.exit(1)
entries = []
for line in result.stdout.strip().split("\n"):
if "|" not in line:
continue
date, author = line.split("|", 1)
canonical = CONTRIBUTOR_ALIASES.get(author, author)
if canonical is None:
continue
entries.append({"date": date, "author": canonical})
return entries
def git_log_claims(codex_path: str) -> list[dict]:
"""Extract claim file additions over time from git log."""
result = subprocess.run(
["git", "log", "--format=%ad", "--date=format:%Y-%m-%d",
"--all", "--diff-filter=A", "--", "domains/*.md"],
capture_output=True, text=True, cwd=codex_path
)
if result.returncode != 0:
print(f"git log failed: {result.stderr}", file=sys.stderr)
sys.exit(1)
counts = defaultdict(int)
for line in result.stdout.strip().split("\n"):
line = line.strip()
if line:
counts[line] += 1
return [{"date": d, "count": c} for d, c in sorted(counts.items())]
def github_stars(repo: str = "living-ip/teleo-codex") -> int | None:
"""Fetch current star count from GitHub API. Returns None on failure."""
try:
result = subprocess.run(
["gh", "api", f"repos/{repo}", "--jq", ".stargazers_count"],
capture_output=True, text=True, timeout=10
)
if result.returncode == 0:
return int(result.stdout.strip())
except (subprocess.TimeoutExpired, ValueError):
pass
return None
def build_cumulative_contributors(entries: list[dict]) -> list[dict]:
"""Build cumulative unique contributor count by date."""
first_seen = {}
for e in entries:
author, date = e["author"], e["date"]
if author not in first_seen or date < first_seen[author]:
first_seen[author] = date
by_date = defaultdict(list)
for author, date in first_seen.items():
by_date[date].append(author)
timeline = []
seen = set()
for date in sorted(by_date.keys()):
new_authors = by_date[date]
seen.update(new_authors)
is_founding = date <= FOUNDING_CUTOFF
timeline.append({
"date": date,
"cumulative": len(seen),
"new": [
{"name": a, "founding": is_founding}
for a in sorted(new_authors)
],
})
return timeline
def build_cumulative_claims(claim_entries: list[dict]) -> list[dict]:
"""Build cumulative claim count by date."""
timeline = []
cumulative = 0
for entry in claim_entries:
cumulative += entry["count"]
timeline.append({
"date": entry["date"],
"cumulative": cumulative,
"added": entry["count"],
})
return timeline
def build_daily_commits(entries: list[dict]) -> list[dict]:
"""Build daily commit volume by contributor."""
daily = defaultdict(lambda: defaultdict(int))
for e in entries:
daily[e["date"]][e["author"]] += 1
timeline = []
for date in sorted(daily.keys()):
authors = daily[date]
timeline.append({
"date": date,
"total": sum(authors.values()),
"by_contributor": dict(sorted(authors.items())),
})
return timeline
def generate_report(codex_path: str) -> dict:
entries = git_log_contributors(codex_path)
claim_entries = git_log_claims(codex_path)
stars = github_stars()
contributors_timeline = build_cumulative_contributors(entries)
claims_timeline = build_cumulative_claims(claim_entries)
commits_timeline = build_daily_commits(entries)
all_contributors = set(e["author"] for e in entries)
founding = [
a for a in all_contributors
if any(
e["date"] <= FOUNDING_CUTOFF and e["author"] == a
for e in entries
)
]
return {
"generated_at": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
"summary": {
"total_contributors": len(all_contributors),
"founding_contributors": sorted(founding),
"total_claims": claims_timeline[-1]["cumulative"] if claims_timeline else 0,
"github_stars": stars,
"codex_start_date": "2026-03-05",
"days_active": (datetime.utcnow() - datetime(2026, 3, 5)).days,
},
"cumulative_contributors": contributors_timeline,
"cumulative_claims": claims_timeline,
"daily_activity": commits_timeline,
}
def format_csv(report: dict) -> str:
lines = ["date,cumulative_contributors,cumulative_claims"]
contrib_map = {e["date"]: e["cumulative"] for e in report["cumulative_contributors"]}
claims_map = {e["date"]: e["cumulative"] for e in report["cumulative_claims"]}
all_dates = sorted(set(list(contrib_map.keys()) + list(claims_map.keys())))
last_contrib = 0
last_claims = 0
for d in all_dates:
last_contrib = contrib_map.get(d, last_contrib)
last_claims = claims_map.get(d, last_claims)
lines.append(f"{d},{last_contrib},{last_claims}")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="Generate cumulative growth data")
parser.add_argument("--codex-path", required=True, help="Path to teleo-codex repo")
parser.add_argument("--output", help="Output file path (default: stdout)")
parser.add_argument("--format", choices=["json", "csv"], default="json")
args = parser.parse_args()
report = generate_report(args.codex_path)
if args.format == "csv":
output = format_csv(report)
else:
output = json.dumps(report, indent=2)
if args.output:
with open(args.output, "w") as f:
f.write(output)
print(f"Written to {args.output}", file=sys.stderr)
else:
print(output)
if __name__ == "__main__":
main()