feat: add /api/contributor-growth endpoint + cumulative growth script
Adds async git-log-based endpoint for cumulative contributor and claim tracking. 5-minute cache, excludes bot accounts, tags founding contributors. Standalone CLI script also included for ad-hoc data generation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f0cf772182
commit
9505e5b40a
2 changed files with 333 additions and 0 deletions
|
|
@ -10,6 +10,7 @@ Endpoints:
|
|||
Owner: Argus
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -17,6 +18,7 @@ import sqlite3
|
|||
import statistics
|
||||
import time
|
||||
import urllib.request
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
|
@ -1182,6 +1184,113 @@ async def handle_telegram_extractions(request):
|
|||
conn.close()
|
||||
|
||||
|
||||
# ─── GET /api/contributor-growth ─────────────────────────────────────────
|
||||
|
||||
CODEX_WORKTREE = Path(os.environ.get("MAIN_WORKTREE", "/opt/teleo-eval/workspaces/main"))
|
||||
FOUNDING_CUTOFF = "2026-03-15"
|
||||
CONTRIBUTOR_EXCLUDE = {"Teleo Agents", "Teleo Pipeline"}
|
||||
|
||||
_growth_cache: dict | None = None
|
||||
_growth_cache_ts: float = 0
|
||||
GROWTH_CACHE_TTL = 300
|
||||
|
||||
|
||||
async def handle_contributor_growth(request):
|
||||
"""Cumulative unique contributors and claims over time from git log.
|
||||
|
||||
Returns time-series data for Chart.js line charts.
|
||||
Cached for 5 minutes since git log is expensive.
|
||||
"""
|
||||
global _growth_cache, _growth_cache_ts
|
||||
now = time.monotonic()
|
||||
if _growth_cache is not None and (now - _growth_cache_ts) < GROWTH_CACHE_TTL:
|
||||
return web.json_response(_growth_cache)
|
||||
|
||||
codex_path = str(CODEX_WORKTREE)
|
||||
if not CODEX_WORKTREE.exists():
|
||||
return web.json_response(
|
||||
{"error": "codex worktree not found", "path": codex_path}, status=404
|
||||
)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all",
|
||||
cwd=codex_path,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
return web.json_response(
|
||||
{"error": "git log failed", "detail": stderr.decode()[:500]}, status=500
|
||||
)
|
||||
|
||||
first_seen: dict[str, str] = {}
|
||||
daily_commits: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
||||
for line in stdout.decode().strip().split("\n"):
|
||||
if "|" not in line:
|
||||
continue
|
||||
date, author = line.split("|", 1)
|
||||
if author in CONTRIBUTOR_EXCLUDE:
|
||||
continue
|
||||
daily_commits[date][author] += 1
|
||||
if author not in first_seen or date < first_seen[author]:
|
||||
first_seen[author] = date
|
||||
|
||||
by_date: dict[str, list[str]] = defaultdict(list)
|
||||
for author, date in first_seen.items():
|
||||
by_date[date].append(author)
|
||||
|
||||
contributors_timeline = []
|
||||
seen: set[str] = set()
|
||||
for date in sorted(by_date.keys()):
|
||||
new_authors = by_date[date]
|
||||
seen.update(new_authors)
|
||||
contributors_timeline.append({
|
||||
"date": date,
|
||||
"cumulative": len(seen),
|
||||
"new": [{"name": a, "founding": date <= FOUNDING_CUTOFF} for a in sorted(new_authors)],
|
||||
})
|
||||
|
||||
proc2 = await asyncio.create_subprocess_exec(
|
||||
"git", "log", "--format=%ad", "--date=format:%Y-%m-%d",
|
||||
"--all", "--diff-filter=A", "--", "domains/*.md",
|
||||
cwd=codex_path,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout2, _ = await proc2.communicate()
|
||||
claim_counts: dict[str, int] = defaultdict(int)
|
||||
for line in stdout2.decode().strip().split("\n"):
|
||||
line = line.strip()
|
||||
if line:
|
||||
claim_counts[line] += 1
|
||||
|
||||
claims_timeline = []
|
||||
cumulative = 0
|
||||
for date in sorted(claim_counts.keys()):
|
||||
cumulative += claim_counts[date]
|
||||
claims_timeline.append({"date": date, "cumulative": cumulative, "added": claim_counts[date]})
|
||||
|
||||
all_contributors = set(first_seen.keys())
|
||||
founding = sorted(a for a in all_contributors if first_seen[a] <= FOUNDING_CUTOFF)
|
||||
|
||||
result = {
|
||||
"generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"summary": {
|
||||
"total_contributors": len(all_contributors),
|
||||
"founding_contributors": founding,
|
||||
"total_claims": cumulative,
|
||||
"days_active": (datetime.now(timezone.utc) - datetime(2026, 3, 5, tzinfo=timezone.utc)).days,
|
||||
},
|
||||
"cumulative_contributors": contributors_timeline,
|
||||
"cumulative_claims": claims_timeline,
|
||||
}
|
||||
|
||||
_growth_cache = result
|
||||
_growth_cache_ts = now
|
||||
return web.json_response(result)
|
||||
|
||||
|
||||
# ─── Registration ──────────────────────────────────────────────────────────
|
||||
|
||||
def register_dashboard_routes(app: web.Application, get_conn):
|
||||
|
|
@ -1199,3 +1308,4 @@ def register_dashboard_routes(app: web.Application, get_conn):
|
|||
app.router.add_get("/api/growth", handle_growth)
|
||||
app.router.add_get("/api/pr-lifecycle", handle_pr_lifecycle)
|
||||
app.router.add_get("/api/telegram-extractions", handle_telegram_extractions)
|
||||
app.router.add_get("/api/contributor-growth", handle_contributor_growth)
|
||||
|
|
|
|||
223
scripts/cumulative-growth.py
Normal file
223
scripts/cumulative-growth.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Generate cumulative growth time-series data for public dashboard.
|
||||
|
||||
Produces JSON with three series:
|
||||
- cumulative_contributors: unique git authors over time
|
||||
- cumulative_claims: domain claim files added over time
|
||||
- github_stars: star count snapshots (requires GitHub API)
|
||||
|
||||
Data sources: git log (codex repo), GitHub API.
|
||||
Output: JSON to stdout or file, suitable for Chart.js line charts.
|
||||
|
||||
Usage:
|
||||
python3 cumulative-growth.py --codex-path /path/to/teleo-codex [--output /path/to/output.json]
|
||||
python3 cumulative-growth.py --codex-path /path/to/teleo-codex --format csv
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Map bot/service accounts to their human principal or exclude them.
|
||||
# "Teleo Agents" and "Teleo Pipeline" are bot accounts — attribute to system.
|
||||
CONTRIBUTOR_ALIASES = {
|
||||
"Teleo Agents": None, # system automation, not a contributor
|
||||
"Teleo Pipeline": None, # pipeline bot
|
||||
}
|
||||
|
||||
# Founding contributors get a badge — anyone who contributed before this date.
|
||||
FOUNDING_CUTOFF = "2026-03-15"
|
||||
|
||||
|
||||
def git_log_contributors(codex_path: str) -> list[dict]:
|
||||
"""Extract per-commit author and date from git log."""
|
||||
result = subprocess.run(
|
||||
["git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all"],
|
||||
capture_output=True, text=True, cwd=codex_path
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f"git log failed: {result.stderr}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
entries = []
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if "|" not in line:
|
||||
continue
|
||||
date, author = line.split("|", 1)
|
||||
canonical = CONTRIBUTOR_ALIASES.get(author, author)
|
||||
if canonical is None:
|
||||
continue
|
||||
entries.append({"date": date, "author": canonical})
|
||||
return entries
|
||||
|
||||
|
||||
def git_log_claims(codex_path: str) -> list[dict]:
|
||||
"""Extract claim file additions over time from git log."""
|
||||
result = subprocess.run(
|
||||
["git", "log", "--format=%ad", "--date=format:%Y-%m-%d",
|
||||
"--all", "--diff-filter=A", "--", "domains/*.md"],
|
||||
capture_output=True, text=True, cwd=codex_path
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f"git log failed: {result.stderr}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
counts = defaultdict(int)
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
line = line.strip()
|
||||
if line:
|
||||
counts[line] += 1
|
||||
return [{"date": d, "count": c} for d, c in sorted(counts.items())]
|
||||
|
||||
|
||||
def github_stars(repo: str = "living-ip/teleo-codex") -> int | None:
|
||||
"""Fetch current star count from GitHub API. Returns None on failure."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["gh", "api", f"repos/{repo}", "--jq", ".stargazers_count"],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return int(result.stdout.strip())
|
||||
except (subprocess.TimeoutExpired, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def build_cumulative_contributors(entries: list[dict]) -> list[dict]:
|
||||
"""Build cumulative unique contributor count by date."""
|
||||
first_seen = {}
|
||||
for e in entries:
|
||||
author, date = e["author"], e["date"]
|
||||
if author not in first_seen or date < first_seen[author]:
|
||||
first_seen[author] = date
|
||||
|
||||
by_date = defaultdict(list)
|
||||
for author, date in first_seen.items():
|
||||
by_date[date].append(author)
|
||||
|
||||
timeline = []
|
||||
seen = set()
|
||||
for date in sorted(by_date.keys()):
|
||||
new_authors = by_date[date]
|
||||
seen.update(new_authors)
|
||||
is_founding = date <= FOUNDING_CUTOFF
|
||||
timeline.append({
|
||||
"date": date,
|
||||
"cumulative": len(seen),
|
||||
"new": [
|
||||
{"name": a, "founding": is_founding}
|
||||
for a in sorted(new_authors)
|
||||
],
|
||||
})
|
||||
return timeline
|
||||
|
||||
|
||||
def build_cumulative_claims(claim_entries: list[dict]) -> list[dict]:
|
||||
"""Build cumulative claim count by date."""
|
||||
timeline = []
|
||||
cumulative = 0
|
||||
for entry in claim_entries:
|
||||
cumulative += entry["count"]
|
||||
timeline.append({
|
||||
"date": entry["date"],
|
||||
"cumulative": cumulative,
|
||||
"added": entry["count"],
|
||||
})
|
||||
return timeline
|
||||
|
||||
|
||||
def build_daily_commits(entries: list[dict]) -> list[dict]:
|
||||
"""Build daily commit volume by contributor."""
|
||||
daily = defaultdict(lambda: defaultdict(int))
|
||||
for e in entries:
|
||||
daily[e["date"]][e["author"]] += 1
|
||||
|
||||
timeline = []
|
||||
for date in sorted(daily.keys()):
|
||||
authors = daily[date]
|
||||
timeline.append({
|
||||
"date": date,
|
||||
"total": sum(authors.values()),
|
||||
"by_contributor": dict(sorted(authors.items())),
|
||||
})
|
||||
return timeline
|
||||
|
||||
|
||||
def generate_report(codex_path: str) -> dict:
|
||||
entries = git_log_contributors(codex_path)
|
||||
claim_entries = git_log_claims(codex_path)
|
||||
stars = github_stars()
|
||||
|
||||
contributors_timeline = build_cumulative_contributors(entries)
|
||||
claims_timeline = build_cumulative_claims(claim_entries)
|
||||
commits_timeline = build_daily_commits(entries)
|
||||
|
||||
all_contributors = set(e["author"] for e in entries)
|
||||
founding = [
|
||||
a for a in all_contributors
|
||||
if any(
|
||||
e["date"] <= FOUNDING_CUTOFF and e["author"] == a
|
||||
for e in entries
|
||||
)
|
||||
]
|
||||
|
||||
return {
|
||||
"generated_at": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"summary": {
|
||||
"total_contributors": len(all_contributors),
|
||||
"founding_contributors": sorted(founding),
|
||||
"total_claims": claims_timeline[-1]["cumulative"] if claims_timeline else 0,
|
||||
"github_stars": stars,
|
||||
"codex_start_date": "2026-03-05",
|
||||
"days_active": (datetime.utcnow() - datetime(2026, 3, 5)).days,
|
||||
},
|
||||
"cumulative_contributors": contributors_timeline,
|
||||
"cumulative_claims": claims_timeline,
|
||||
"daily_activity": commits_timeline,
|
||||
}
|
||||
|
||||
|
||||
def format_csv(report: dict) -> str:
|
||||
lines = ["date,cumulative_contributors,cumulative_claims"]
|
||||
contrib_map = {e["date"]: e["cumulative"] for e in report["cumulative_contributors"]}
|
||||
claims_map = {e["date"]: e["cumulative"] for e in report["cumulative_claims"]}
|
||||
|
||||
all_dates = sorted(set(list(contrib_map.keys()) + list(claims_map.keys())))
|
||||
|
||||
last_contrib = 0
|
||||
last_claims = 0
|
||||
for d in all_dates:
|
||||
last_contrib = contrib_map.get(d, last_contrib)
|
||||
last_claims = claims_map.get(d, last_claims)
|
||||
lines.append(f"{d},{last_contrib},{last_claims}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate cumulative growth data")
|
||||
parser.add_argument("--codex-path", required=True, help="Path to teleo-codex repo")
|
||||
parser.add_argument("--output", help="Output file path (default: stdout)")
|
||||
parser.add_argument("--format", choices=["json", "csv"], default="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
report = generate_report(args.codex_path)
|
||||
|
||||
if args.format == "csv":
|
||||
output = format_csv(report)
|
||||
else:
|
||||
output = json.dumps(report, indent=2)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, "w") as f:
|
||||
f.write(output)
|
||||
print(f"Written to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
print(output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue