From 153b3b3204ca9a5f7ff57d555615535f420c8a37 Mon Sep 17 00:00:00 2001
From: m3taversal <m3taversal@gmail.com>
Date: Wed, 11 Mar 2026 19:00:58 +0000
Subject: [PATCH] Auto: 2 files |  2 files changed, 72 insertions(+), 2
 deletions(-)

---
 schemas/attribution.md            | 46 +++++++++++++++++++++++++++++--
 schemas/contribution-weights.yaml | 28 +++++++++++++++++++
 2 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 schemas/contribution-weights.yaml

diff --git a/schemas/attribution.md b/schemas/attribution.md
index 059f9c34..33964c30 100644
--- a/schemas/attribution.md
+++ b/schemas/attribution.md
@@ -134,8 +134,50 @@ attribution_handle: "@theiaresearch"
 - Disputes about attribution are resolved through the normal PR process
 - Removing attribution requires justification (e.g., the sourcer was misidentified)
 
+## Contribution Weights
+
+Role weights determine how much each contribution type counts toward a contributor's weighted score. Weights are **global policy**, not per-claim data — they live in `schemas/contribution-weights.yaml`, not in claim frontmatter.
+
+Why weights are global, not per-claim:
+1. Weights are policy (how much we value each role), not data (who did what)
+2. Weights evolve as bottlenecks shift — updating one config file beats migrating 400+ claims
+3. Per-claim weights create gaming incentive to inflate role on high-value claims
+
+The build pipeline reads `contribution-weights.yaml` and multiplies role counts × weights to produce weighted scores. The frontend displays both raw counts (by role) and the weighted score.
+
+See `schemas/contribution-weights.yaml` for current weights and rationale.
+
+## Build Artifacts
+
+The website build pipeline (extract-graph-data.py) produces a `contributors.json` artifact alongside graph-data.json and claims-context.json:
+
+```json
+{
+  "contributors": [
+    {
+      "handle": "naval",
+      "roles": {"sourcer": 12, "extractor": 0, "challenger": 3, "synthesizer": 1, "reviewer": 0},
+      "weighted_score": 5.4,
+      "domains": {"internet-finance": 8, "grand-strategy": 5, "ai-alignment": 3},
+      "first_contribution": "2026-02-15",
+      "latest_contribution": "2026-03-11",
+      "claim_count": 16,
+      "timeline": [
+        {"date": "2026-02", "count": 3, "domains": ["internet-finance"]},
+        {"date": "2026-03", "count": 13, "domains": ["internet-finance", "grand-strategy"]}
+      ]
+    }
+  ]
+}
+```
+
+This is a static file rebuilt on every merge to main (~15 minute staleness). The frontend reads it at page load — no API or runtime queries needed.
+
+**Timeline**: Monthly granularity. Used by the frontend for contribution heatmap or sparkline graphic (Cory requirement).
+
 ## Implementation Priority
 
 1. **Now**: Add `attribution` block to new claims going forward. No backfill required.
-2. **Soon**: Rhea builds contributor profile aggregation for the website.
-3. **Later**: Automated attribution from the extraction pipeline (MiniMax → Haiku → agent).
+2. **Soon**: Rhea adds attribution aggregation pass to extract-graph-data.py, producing contributors.json.
+3. **Soon**: Frontend contributor profile pages — handle + sparkline + domain pie + top claims by role.
+4. **Later**: Automated attribution from the extraction pipeline (MiniMax → Haiku → agent).
diff --git a/schemas/contribution-weights.yaml b/schemas/contribution-weights.yaml
new file mode 100644
index 00000000..1105861c
--- /dev/null
+++ b/schemas/contribution-weights.yaml
@@ -0,0 +1,28 @@
+# Contribution Weights
+#
+# Global policy for how much each contributor role counts toward weighted scores.
+# Used by the build pipeline (extract-graph-data.py) to compute weighted_score
+# in contributors.json. Updated via PR — changes here affect all contributor profiles.
+#
+# Weights sum to 1.0. The build pipeline multiplies each contributor's role count
+# by the corresponding weight, then sums across roles.
+#
+# Current rationale (2026-03-11):
+# - Extraction is the current bottleneck and requires the most skill (reading sources,
+#   separating signal from noise, writing prose-as-title). Highest weight.
+# - Challenge is the quality mechanism — adversarial review catches errors that
+#   self-review cannot. Second highest.
+# - Sourcing discovers new information but is lower effort per instance.
+# - Synthesis connects claims across domains — high value but rare.
+# - Review is essential but is partially automated via the eval pipeline.
+#
+# These weights WILL change as the collective matures. When challenges become
+# the bottleneck (more claims than reviewers), challenger weight should increase.
+# When synthesis becomes the primary value-add, synthesizer weight increases.
+
+role_weights:
+  sourcer: 0.15
+  extractor: 0.40
+  challenger: 0.20
+  synthesizer: 0.15
+  reviewer: 0.10