teleo-infrastructure/scripts/cumulative-growth.py
m3taversal 9505e5b40a feat: add /api/contributor-growth endpoint + cumulative growth script
Adds async git-log-based endpoint for cumulative contributor and claim
tracking. 5-minute cache, excludes bot accounts, tags founding contributors.
Standalone CLI script also included for ad-hoc data generation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 22:19:42 +01:00

223 lines
7.3 KiB
Python

#!/usr/bin/env python3
"""Generate cumulative growth time-series data for public dashboard.
Produces JSON with three series:
- cumulative_contributors: unique git authors over time
- cumulative_claims: domain claim files added over time
- github_stars: star count snapshots (requires GitHub API)
Data sources: git log (codex repo), GitHub API.
Output: JSON to stdout or file, suitable for Chart.js line charts.
Usage:
python3 cumulative-growth.py --codex-path /path/to/teleo-codex [--output /path/to/output.json]
python3 cumulative-growth.py --codex-path /path/to/teleo-codex --format csv
"""
import argparse
import json
import subprocess
import sys
from collections import defaultdict
from datetime import datetime, timedelta
# Map bot/service accounts to their human principal or exclude them.
# "Teleo Agents" and "Teleo Pipeline" are bot accounts — attribute to system.
CONTRIBUTOR_ALIASES = {
"Teleo Agents": None, # system automation, not a contributor
"Teleo Pipeline": None, # pipeline bot
}
# Founding contributors get a badge — anyone who contributed before this date.
FOUNDING_CUTOFF = "2026-03-15"
def git_log_contributors(codex_path: str) -> list[dict]:
"""Extract per-commit author and date from git log."""
result = subprocess.run(
["git", "log", "--format=%ad|%an", "--date=format:%Y-%m-%d", "--all"],
capture_output=True, text=True, cwd=codex_path
)
if result.returncode != 0:
print(f"git log failed: {result.stderr}", file=sys.stderr)
sys.exit(1)
entries = []
for line in result.stdout.strip().split("\n"):
if "|" not in line:
continue
date, author = line.split("|", 1)
canonical = CONTRIBUTOR_ALIASES.get(author, author)
if canonical is None:
continue
entries.append({"date": date, "author": canonical})
return entries
def git_log_claims(codex_path: str) -> list[dict]:
"""Extract claim file additions over time from git log."""
result = subprocess.run(
["git", "log", "--format=%ad", "--date=format:%Y-%m-%d",
"--all", "--diff-filter=A", "--", "domains/*.md"],
capture_output=True, text=True, cwd=codex_path
)
if result.returncode != 0:
print(f"git log failed: {result.stderr}", file=sys.stderr)
sys.exit(1)
counts = defaultdict(int)
for line in result.stdout.strip().split("\n"):
line = line.strip()
if line:
counts[line] += 1
return [{"date": d, "count": c} for d, c in sorted(counts.items())]
def github_stars(repo: str = "living-ip/teleo-codex") -> int | None:
"""Fetch current star count from GitHub API. Returns None on failure."""
try:
result = subprocess.run(
["gh", "api", f"repos/{repo}", "--jq", ".stargazers_count"],
capture_output=True, text=True, timeout=10
)
if result.returncode == 0:
return int(result.stdout.strip())
except (subprocess.TimeoutExpired, ValueError):
pass
return None
def build_cumulative_contributors(entries: list[dict]) -> list[dict]:
"""Build cumulative unique contributor count by date."""
first_seen = {}
for e in entries:
author, date = e["author"], e["date"]
if author not in first_seen or date < first_seen[author]:
first_seen[author] = date
by_date = defaultdict(list)
for author, date in first_seen.items():
by_date[date].append(author)
timeline = []
seen = set()
for date in sorted(by_date.keys()):
new_authors = by_date[date]
seen.update(new_authors)
is_founding = date <= FOUNDING_CUTOFF
timeline.append({
"date": date,
"cumulative": len(seen),
"new": [
{"name": a, "founding": is_founding}
for a in sorted(new_authors)
],
})
return timeline
def build_cumulative_claims(claim_entries: list[dict]) -> list[dict]:
"""Build cumulative claim count by date."""
timeline = []
cumulative = 0
for entry in claim_entries:
cumulative += entry["count"]
timeline.append({
"date": entry["date"],
"cumulative": cumulative,
"added": entry["count"],
})
return timeline
def build_daily_commits(entries: list[dict]) -> list[dict]:
"""Build daily commit volume by contributor."""
daily = defaultdict(lambda: defaultdict(int))
for e in entries:
daily[e["date"]][e["author"]] += 1
timeline = []
for date in sorted(daily.keys()):
authors = daily[date]
timeline.append({
"date": date,
"total": sum(authors.values()),
"by_contributor": dict(sorted(authors.items())),
})
return timeline
def generate_report(codex_path: str) -> dict:
entries = git_log_contributors(codex_path)
claim_entries = git_log_claims(codex_path)
stars = github_stars()
contributors_timeline = build_cumulative_contributors(entries)
claims_timeline = build_cumulative_claims(claim_entries)
commits_timeline = build_daily_commits(entries)
all_contributors = set(e["author"] for e in entries)
founding = [
a for a in all_contributors
if any(
e["date"] <= FOUNDING_CUTOFF and e["author"] == a
for e in entries
)
]
return {
"generated_at": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
"summary": {
"total_contributors": len(all_contributors),
"founding_contributors": sorted(founding),
"total_claims": claims_timeline[-1]["cumulative"] if claims_timeline else 0,
"github_stars": stars,
"codex_start_date": "2026-03-05",
"days_active": (datetime.utcnow() - datetime(2026, 3, 5)).days,
},
"cumulative_contributors": contributors_timeline,
"cumulative_claims": claims_timeline,
"daily_activity": commits_timeline,
}
def format_csv(report: dict) -> str:
lines = ["date,cumulative_contributors,cumulative_claims"]
contrib_map = {e["date"]: e["cumulative"] for e in report["cumulative_contributors"]}
claims_map = {e["date"]: e["cumulative"] for e in report["cumulative_claims"]}
all_dates = sorted(set(list(contrib_map.keys()) + list(claims_map.keys())))
last_contrib = 0
last_claims = 0
for d in all_dates:
last_contrib = contrib_map.get(d, last_contrib)
last_claims = claims_map.get(d, last_claims)
lines.append(f"{d},{last_contrib},{last_claims}")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="Generate cumulative growth data")
parser.add_argument("--codex-path", required=True, help="Path to teleo-codex repo")
parser.add_argument("--output", help="Output file path (default: stdout)")
parser.add_argument("--format", choices=["json", "csv"], default="json")
args = parser.parse_args()
report = generate_report(args.codex_path)
if args.format == "csv":
output = format_csv(report)
else:
output = json.dumps(report, indent=2)
if args.output:
with open(args.output, "w") as f:
f.write(output)
print(f"Written to {args.output}", file=sys.stderr)
else:
print(output)
if __name__ == "__main__":
main()