From 751432360867755dc81514c9b246a7c6c7aa7c7a Mon Sep 17 00:00:00 2001
From: Teleo Agents <agents@livingip.xyz>
Date: Sun, 15 Mar 2026 19:29:03 +0000
Subject: [PATCH] extract:
 2025-06-00-li-scaling-human-judgment-community-notes-llms

Pentagon-Agent: Ganymede <F99EBFA6-547B-4096-BEEA-1D59C3E4028A>
---
 ...ocial-choice-without-normative-scrutiny.md |  6 ++
 ...g-human-judgment-community-notes-llms.json | 58 +++++++++++++++++++
 ...ing-human-judgment-community-notes-llms.md | 14 ++++-
 3 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 inbox/archive/.extraction-debug/2025-06-00-li-scaling-human-judgment-community-notes-llms.json

diff --git a/domains/ai-alignment/rlhf-is-implicit-social-choice-without-normative-scrutiny.md b/domains/ai-alignment/rlhf-is-implicit-social-choice-without-normative-scrutiny.md
index d8d679b8..dc59e956 100644
--- a/domains/ai-alignment/rlhf-is-implicit-social-choice-without-normative-scrutiny.md
+++ b/domains/ai-alignment/rlhf-is-implicit-social-choice-without-normative-scrutiny.md
@@ -27,6 +27,12 @@ This claim directly addresses the mechanism gap identified in [[RLHF and DPO bot
 
 The paper's proposed solution—RLCHF with explicit social welfare functions—connects to [[collective intelligence requires diversity as a structural precondition not a moral preference]] by formalizing how diverse evaluator input should be preserved rather than collapsed.
 
+
+### Additional Evidence (extend)
+*Source: [[2025-06-00-li-scaling-human-judgment-community-notes-llms]] | Added: 2026-03-15*
+
+RLCF makes the social choice mechanism explicit through the bridging algorithm (matrix factorization with intercept scores). Unlike standard RLHF which aggregates preferences opaquely through reward model training, RLCF's use of intercepts as the training signal is a deliberate choice to optimize for cross-partisan agreement—a specific social welfare function.
+
 ---
 
 Relevant Notes:
diff --git a/inbox/archive/.extraction-debug/2025-06-00-li-scaling-human-judgment-community-notes-llms.json b/inbox/archive/.extraction-debug/2025-06-00-li-scaling-human-judgment-community-notes-llms.json
new file mode 100644
index 00000000..38bd99dc
--- /dev/null
+++ b/inbox/archive/.extraction-debug/2025-06-00-li-scaling-human-judgment-community-notes-llms.json
@@ -0,0 +1,58 @@
+{
+  "rejected_claims": [
+    {
+      "filename": "rlcf-architecture-separates-ai-generation-from-human-evaluation-with-bridging-algorithm-selection.md",
+      "issues": [
+        "missing_attribution_extractor"
+      ]
+    },
+    {
+      "filename": "bridging-based-consensus-mechanisms-risk-homogenization-toward-optimally-inoffensive-content.md",
+      "issues": [
+        "no_frontmatter"
+      ]
+    },
+    {
+      "filename": "human-rating-authority-in-ai-systems-preserves-alignment-by-keeping-value-judgment-in-human-hands.md",
+      "issues": [
+        "missing_attribution_extractor"
+      ]
+    },
+    {
+      "filename": "stylistic-novelty-rewards-in-rlcf-balance-optimization-pressure-with-diversity-preservation.md",
+      "issues": [
+        "missing_attribution_extractor"
+      ]
+    }
+  ],
+  "validation_stats": {
+    "total": 4,
+    "kept": 0,
+    "fixed": 14,
+    "rejected": 4,
+    "fixes_applied": [
+      "rlcf-architecture-separates-ai-generation-from-human-evaluation-with-bridging-algorithm-selection.md:set_created:2026-03-15",
+      "rlcf-architecture-separates-ai-generation-from-human-evaluation-with-bridging-algorithm-selection.md:stripped_wiki_link:democratic-alignment-assemblies-produce-constitutions-as-eff",
+      "rlcf-architecture-separates-ai-generation-from-human-evaluation-with-bridging-algorithm-selection.md:stripped_wiki_link:community-centred-norm-elicitation-surfaces-alignment-target",
+      "rlcf-architecture-separates-ai-generation-from-human-evaluation-with-bridging-algorithm-selection.md:stripped_wiki_link:rlhf-is-implicit-social-choice-without-normative-scrutiny.md",
+      "bridging-based-consensus-mechanisms-risk-homogenization-toward-optimally-inoffensive-content.md:set_created:2026-03-15",
+      "bridging-based-consensus-mechanisms-risk-homogenization-toward-optimally-inoffensive-content.md:stripped_wiki_link:universal-alignment-is-mathematically-impossible-because-Arr",
+      "bridging-based-consensus-mechanisms-risk-homogenization-toward-optimally-inoffensive-content.md:stripped_wiki_link:pluralistic-alignment-must-accommodate-irreducibly-diverse-v",
+      "bridging-based-consensus-mechanisms-risk-homogenization-toward-optimally-inoffensive-content.md:stripped_wiki_link:some-disagreements-are-permanently-irreducible-because-they-",
+      "human-rating-authority-in-ai-systems-preserves-alignment-by-keeping-value-judgment-in-human-hands.md:set_created:2026-03-15",
+      "human-rating-authority-in-ai-systems-preserves-alignment-by-keeping-value-judgment-in-human-hands.md:stripped_wiki_link:coding-agents-cannot-take-accountability-for-mistakes-which-",
+      "human-rating-authority-in-ai-systems-preserves-alignment-by-keeping-value-judgment-in-human-hands.md:stripped_wiki_link:human-in-the-loop-at-the-architectural-level-means-humans-se",
+      "stylistic-novelty-rewards-in-rlcf-balance-optimization-pressure-with-diversity-preservation.md:set_created:2026-03-15",
+      "stylistic-novelty-rewards-in-rlcf-balance-optimization-pressure-with-diversity-preservation.md:stripped_wiki_link:pluralistic-ai-alignment-through-multiple-systems-preserves-",
+      "stylistic-novelty-rewards-in-rlcf-balance-optimization-pressure-with-diversity-preservation.md:stripped_wiki_link:high-AI-exposure-increases-collective-idea-diversity-without"
+    ],
+    "rejections": [
+      "rlcf-architecture-separates-ai-generation-from-human-evaluation-with-bridging-algorithm-selection.md:missing_attribution_extractor",
+      "bridging-based-consensus-mechanisms-risk-homogenization-toward-optimally-inoffensive-content.md:no_frontmatter",
+      "human-rating-authority-in-ai-systems-preserves-alignment-by-keeping-value-judgment-in-human-hands.md:missing_attribution_extractor",
+      "stylistic-novelty-rewards-in-rlcf-balance-optimization-pressure-with-diversity-preservation.md:missing_attribution_extractor"
+    ]
+  },
+  "model": "anthropic/claude-sonnet-4.5",
+  "date": "2026-03-15"
+}
\ No newline at end of file
diff --git a/inbox/archive/2025-06-00-li-scaling-human-judgment-community-notes-llms.md b/inbox/archive/2025-06-00-li-scaling-human-judgment-community-notes-llms.md
index 095a911b..2d14049b 100644
--- a/inbox/archive/2025-06-00-li-scaling-human-judgment-community-notes-llms.md
+++ b/inbox/archive/2025-06-00-li-scaling-human-judgment-community-notes-llms.md
@@ -7,9 +7,13 @@ date: 2025-06-30
 domain: ai-alignment
 secondary_domains: [collective-intelligence]
 format: paper
-status: unprocessed
+status: enrichment
 priority: high
 tags: [RLCF, community-notes, bridging-algorithm, pluralistic-alignment, human-AI-collaboration, LLM-alignment]
+processed_by: theseus
+processed_date: 2026-03-15
+enrichments_applied: ["rlhf-is-implicit-social-choice-without-normative-scrutiny.md"]
+extraction_model: "anthropic/claude-sonnet-4.5"
 ---
 
 ## Content
@@ -51,3 +55,11 @@ Proposes a hybrid model for Community Notes where both humans and LLMs write not
 PRIMARY CONNECTION: democratic alignment assemblies produce constitutions as effective as expert-designed ones while better representing diverse populations
 WHY ARCHIVED: First concrete specification of RLCF — transitions from design principle to implementable mechanism
 EXTRACTION HINT: Focus on the architecture (who generates, who rates, what selects) and the homogenization risk — the "optimally inoffensive" failure mode is a key tension with our bridging-based alignment thesis
+
+
+## Key Facts
+- Community Notes uses a hybrid model where both humans and LLMs write notes, but humans alone rate them
+- The bridging algorithm uses matrix factorization: y_ij = w_i * x_j + b_i + c_j where c_j is the bridging score
+- Notes must receive support from raters with diverse viewpoints to surface
+- The paper was published in the Journal of Online Trust and Safety in June 2025
+- Key risks identified: helpfulness hacking, declining human engagement, homogenization, rater capacity overwhelm