From 153b3b3204ca9a5f7ff57d555615535f420c8a37 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Wed, 11 Mar 2026 19:00:58 +0000 Subject: [PATCH] Auto: 2 files | 2 files changed, 72 insertions(+), 2 deletions(-) --- schemas/attribution.md | 46 +++++++++++++++++++++++++++++-- schemas/contribution-weights.yaml | 28 +++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 schemas/contribution-weights.yaml diff --git a/schemas/attribution.md b/schemas/attribution.md index 059f9c34..33964c30 100644 --- a/schemas/attribution.md +++ b/schemas/attribution.md @@ -134,8 +134,50 @@ attribution_handle: "@theiaresearch" - Disputes about attribution are resolved through the normal PR process - Removing attribution requires justification (e.g., the sourcer was misidentified) +## Contribution Weights + +Role weights determine how much each contribution type counts toward a contributor's weighted score. Weights are **global policy**, not per-claim data — they live in `schemas/contribution-weights.yaml`, not in claim frontmatter. + +Why weights are global, not per-claim: +1. Weights are policy (how much we value each role), not data (who did what) +2. Weights evolve as bottlenecks shift — updating one config file beats migrating 400+ claims +3. Per-claim weights create gaming incentive to inflate role on high-value claims + +The build pipeline reads `contribution-weights.yaml` and multiplies role counts × weights to produce weighted scores. The frontend displays both raw counts (by role) and the weighted score. + +See `schemas/contribution-weights.yaml` for current weights and rationale. + +## Build Artifacts + +The website build pipeline (extract-graph-data.py) produces a `contributors.json` artifact alongside graph-data.json and claims-context.json: + +```json +{ + "contributors": [ + { + "handle": "naval", + "roles": {"sourcer": 12, "extractor": 0, "challenger": 3, "synthesizer": 1, "reviewer": 0}, + "weighted_score": 5.4, + "domains": {"internet-finance": 8, "grand-strategy": 5, "ai-alignment": 3}, + "first_contribution": "2026-02-15", + "latest_contribution": "2026-03-11", + "claim_count": 16, + "timeline": [ + {"date": "2026-02", "count": 3, "domains": ["internet-finance"]}, + {"date": "2026-03", "count": 13, "domains": ["internet-finance", "grand-strategy"]} + ] + } + ] +} +``` + +This is a static file rebuilt on every merge to main (~15 minute staleness). The frontend reads it at page load — no API or runtime queries needed. + +**Timeline**: Monthly granularity. Used by the frontend for contribution heatmap or sparkline graphic (Cory requirement). + ## Implementation Priority 1. **Now**: Add `attribution` block to new claims going forward. No backfill required. -2. **Soon**: Rhea builds contributor profile aggregation for the website. -3. **Later**: Automated attribution from the extraction pipeline (MiniMax → Haiku → agent). +2. **Soon**: Rhea adds attribution aggregation pass to extract-graph-data.py, producing contributors.json. +3. **Soon**: Frontend contributor profile pages — handle + sparkline + domain pie + top claims by role. +4. **Later**: Automated attribution from the extraction pipeline (MiniMax → Haiku → agent). diff --git a/schemas/contribution-weights.yaml b/schemas/contribution-weights.yaml new file mode 100644 index 00000000..1105861c --- /dev/null +++ b/schemas/contribution-weights.yaml @@ -0,0 +1,28 @@ +# Contribution Weights +# +# Global policy for how much each contributor role counts toward weighted scores. +# Used by the build pipeline (extract-graph-data.py) to compute weighted_score +# in contributors.json. Updated via PR — changes here affect all contributor profiles. +# +# Weights sum to 1.0. The build pipeline multiplies each contributor's role count +# by the corresponding weight, then sums across roles. +# +# Current rationale (2026-03-11): +# - Extraction is the current bottleneck and requires the most skill (reading sources, +# separating signal from noise, writing prose-as-title). Highest weight. +# - Challenge is the quality mechanism — adversarial review catches errors that +# self-review cannot. Second highest. +# - Sourcing discovers new information but is lower effort per instance. +# - Synthesis connects claims across domains — high value but rare. +# - Review is essential but is partially automated via the eval pipeline. +# +# These weights WILL change as the collective matures. When challenges become +# the bottleneck (more claims than reviewers), challenger weight should increase. +# When synthesis becomes the primary value-add, synthesizer weight increases. + +role_weights: + sourcer: 0.15 + extractor: 0.40 + challenger: 0.20 + synthesizer: 0.15 + reviewer: 0.10