From 2ee6c405e4bbc417288ca9f0d6469ca8a1b47c60 Mon Sep 17 00:00:00 2001 From: Teleo Agents Date: Thu, 19 Mar 2026 13:36:50 +0000 Subject: [PATCH 1/2] extract: 2026-01-00-kim-third-party-ai-assurance-framework Pentagon-Agent: Epimetheus <968B2991-E2DF-4006-B962-F5B0A0CC8ACA> --- ...ield converging on problems that require it.md | 6 ++++++ ...-governance-built-on-unreliable-foundations.md | 6 ++++++ ...00-kim-third-party-ai-assurance-framework.json | 12 ++++++++---- ...1-00-kim-third-party-ai-assurance-framework.md | 15 ++++++++++++++- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/domains/ai-alignment/no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md b/domains/ai-alignment/no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md index 64547a0c8..5469b5046 100644 --- a/domains/ai-alignment/no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md +++ b/domains/ai-alignment/no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md @@ -29,6 +29,12 @@ The UK AI for Collective Intelligence Research Network represents a national-sca CMU researchers have built and validated a third-party AI assurance framework with four operational components (Responsibility Assignment Matrix, Interview Protocol, Maturity Matrix, Assurance Report Template), tested on two real deployment cases. This represents concrete infrastructure-building work, though at small scale and not yet applicable to frontier AI. + +### Additional Evidence (challenge) +*Source: [[2026-01-00-kim-third-party-ai-assurance-framework]] | Added: 2026-03-19* + +CMU researchers published a comprehensive third-party AI assurance framework in January 2026 with four operational components (Responsibility Assignment Matrix, Interview Protocol, Maturity Matrix, Assurance Report Template) and validated it on two real deployment cases. This represents early-stage infrastructure building for independent evaluation, though at small scale rather than frontier AI. + --- Relevant Notes: diff --git a/domains/ai-alignment/pre-deployment-AI-evaluations-do-not-predict-real-world-risk-creating-institutional-governance-built-on-unreliable-foundations.md b/domains/ai-alignment/pre-deployment-AI-evaluations-do-not-predict-real-world-risk-creating-institutional-governance-built-on-unreliable-foundations.md index d8235603b..c78525945 100644 --- a/domains/ai-alignment/pre-deployment-AI-evaluations-do-not-predict-real-world-risk-creating-institutional-governance-built-on-unreliable-foundations.md +++ b/domains/ai-alignment/pre-deployment-AI-evaluations-do-not-predict-real-world-risk-creating-institutional-governance-built-on-unreliable-foundations.md @@ -38,6 +38,12 @@ The problem compounds the alignment challenge: even if safety research produces The voluntary-collaborative model adds a selection bias dimension to evaluation unreliability: evaluations only happen when labs consent, meaning the sample of evaluated models is systematically biased toward labs confident in their safety measures. Labs with weaker safety practices can avoid evaluation entirely. + +### Additional Evidence (extend) +*Source: [[2026-01-00-kim-third-party-ai-assurance-framework]] | Added: 2026-03-19* + +The CMU assurance framework explicitly addresses 'both the process of designing, developing, and deploying an AI system and the outcomes it produces' as a design goal, identifying that few existing evaluation resources cover this full lifecycle. This suggests the field is aware that process-only or outcome-only evaluation is insufficient, though the framework has only been validated on small deployment-scale tools. + --- Relevant Notes: diff --git a/inbox/queue/.extraction-debug/2026-01-00-kim-third-party-ai-assurance-framework.json b/inbox/queue/.extraction-debug/2026-01-00-kim-third-party-ai-assurance-framework.json index 4cde43069..eb1f9a80e 100644 --- a/inbox/queue/.extraction-debug/2026-01-00-kim-third-party-ai-assurance-framework.json +++ b/inbox/queue/.extraction-debug/2026-01-00-kim-third-party-ai-assurance-framework.json @@ -7,7 +7,7 @@ ] }, { - "filename": "ai-assurance-explicitly-distinguishes-itself-from-audit-to-prevent-conflict-of-interest-and-ensure-credibility-which-acknowledges-current-evaluation-has-a-structural-independence-problem.md", + "filename": "ai-assurance-explicitly-distinguishes-from-audit-to-prevent-conflict-of-interest-acknowledging-current-evaluation-has-structural-independence-problems.md", "issues": [ "missing_attribution_extractor" ] @@ -16,15 +16,19 @@ "validation_stats": { "total": 2, "kept": 0, - "fixed": 2, + "fixed": 6, "rejected": 2, "fixes_applied": [ "third-party-ai-assurance-methodology-is-at-proof-of-concept-stage-validated-in-small-deployment-contexts-but-not-yet-applicable-to-frontier-ai-at-scale.md:set_created:2026-03-19", - "ai-assurance-explicitly-distinguishes-itself-from-audit-to-prevent-conflict-of-interest-and-ensure-credibility-which-acknowledges-current-evaluation-has-a-structural-independence-problem.md:set_created:2026-03-19" + "third-party-ai-assurance-methodology-is-at-proof-of-concept-stage-validated-in-small-deployment-contexts-but-not-yet-applicable-to-frontier-ai-at-scale.md:stripped_wiki_link:no research group is building alignment through collective i", + "third-party-ai-assurance-methodology-is-at-proof-of-concept-stage-validated-in-small-deployment-contexts-but-not-yet-applicable-to-frontier-ai-at-scale.md:stripped_wiki_link:pre-deployment-AI-evaluations-do-not-predict-real-world-risk", + "ai-assurance-explicitly-distinguishes-from-audit-to-prevent-conflict-of-interest-acknowledging-current-evaluation-has-structural-independence-problems.md:set_created:2026-03-19", + "ai-assurance-explicitly-distinguishes-from-audit-to-prevent-conflict-of-interest-acknowledging-current-evaluation-has-structural-independence-problems.md:stripped_wiki_link:Anthropics RSP rollback under commercial pressure is the fir", + "ai-assurance-explicitly-distinguishes-from-audit-to-prevent-conflict-of-interest-acknowledging-current-evaluation-has-structural-independence-problems.md:stripped_wiki_link:only binding regulation with enforcement teeth changes front" ], "rejections": [ "third-party-ai-assurance-methodology-is-at-proof-of-concept-stage-validated-in-small-deployment-contexts-but-not-yet-applicable-to-frontier-ai-at-scale.md:missing_attribution_extractor", - "ai-assurance-explicitly-distinguishes-itself-from-audit-to-prevent-conflict-of-interest-and-ensure-credibility-which-acknowledges-current-evaluation-has-a-structural-independence-problem.md:missing_attribution_extractor" + "ai-assurance-explicitly-distinguishes-from-audit-to-prevent-conflict-of-interest-acknowledging-current-evaluation-has-structural-independence-problems.md:missing_attribution_extractor" ] }, "model": "anthropic/claude-sonnet-4.5", diff --git a/inbox/queue/2026-01-00-kim-third-party-ai-assurance-framework.md b/inbox/queue/2026-01-00-kim-third-party-ai-assurance-framework.md index 08b60a445..38ee16a16 100644 --- a/inbox/queue/2026-01-00-kim-third-party-ai-assurance-framework.md +++ b/inbox/queue/2026-01-00-kim-third-party-ai-assurance-framework.md @@ -7,13 +7,17 @@ date: 2026-01-30 domain: ai-alignment secondary_domains: [] format: paper -status: unprocessed +status: enrichment priority: high tags: [evaluation-infrastructure, third-party-assurance, conflict-of-interest, lifecycle-assessment, CMU] processed_by: theseus processed_date: 2026-03-19 enrichments_applied: ["no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md"] extraction_model: "anthropic/claude-sonnet-4.5" +processed_by: theseus +processed_date: 2026-03-19 +enrichments_applied: ["no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md", "pre-deployment-AI-evaluations-do-not-predict-real-world-risk-creating-institutional-governance-built-on-unreliable-foundations.md"] +extraction_model: "anthropic/claude-sonnet-4.5" --- ## Content @@ -62,3 +66,12 @@ EXTRACTION HINT: The "assurance vs audit" distinction to prevent conflict of int - The framework was tested on a business document tagging tool and a housing resource allocation tool - The paper identifies that few existing evaluation resources 'address both the process of designing, developing, and deploying an AI system and the outcomes it produces' - Few existing approaches are 'end-to-end and operational, give actionable guidance, or present evidence of usability' according to the gap analysis + + +## Key Facts +- CMU researchers published 'Toward Third-Party Assurance of AI Systems' in January 2026 +- The framework includes four components: Responsibility Assignment Matrix, Interview Protocol, Maturity Matrix, and Assurance Report Template +- The framework was tested on a business document tagging tool and a housing resource allocation tool +- The paper found the framework 'sound and comprehensive, usable across different organizational contexts, and effective at identifying bespoke issues' +- The paper identifies that few existing evaluation resources 'address both the process of designing, developing, and deploying an AI system and the outcomes it produces' +- Few existing approaches are 'end-to-end and operational, give actionable guidance, or present evidence of usability' according to the gap analysis -- 2.45.2 From cb858567d000d38d764a9a450d78813ddd5baecf Mon Sep 17 00:00:00 2001 From: Teleo Agents Date: Thu, 19 Mar 2026 13:37:30 +0000 Subject: [PATCH 2/2] auto-fix: strip 6 broken wiki links Pipeline auto-fixer: removed [[ ]] brackets from links that don't resolve to existing claims in the knowledge base. --- ...pite the field converging on problems that require it.md | 4 ++-- ...titutional-governance-built-on-unreliable-foundations.md | 6 +++--- .../2026-01-00-kim-third-party-ai-assurance-framework.md | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/domains/ai-alignment/no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md b/domains/ai-alignment/no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md index 5469b5046..cbf3840cd 100644 --- a/domains/ai-alignment/no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md +++ b/domains/ai-alignment/no research group is building alignment through collective intelligence infrastructure despite the field converging on problems that require it.md @@ -19,7 +19,7 @@ The alignment field has converged on a problem they cannot solve with their curr ### Additional Evidence (challenge) -*Source: [[2024-11-00-ai4ci-national-scale-collective-intelligence]] | Added: 2026-03-15 | Extractor: anthropic/claude-sonnet-4.5* +*Source: 2024-11-00-ai4ci-national-scale-collective-intelligence | Added: 2026-03-15 | Extractor: anthropic/claude-sonnet-4.5* The UK AI for Collective Intelligence Research Network represents a national-scale institutional commitment to building CI infrastructure with explicit alignment goals. Funded by UKRI/EPSRC, the network proposes the 'AI4CI Loop' (Gathering Intelligence → Informing Behaviour) as a framework for multi-level decision making. The research strategy includes seven trust properties (human agency, security, privacy, transparency, fairness, value alignment, accountability) and specifies technical requirements including federated learning architectures, secure data repositories, and foundation models adapted for collective intelligence contexts. This is not purely academic—it's a government-backed infrastructure program with institutional resources. However, the strategy is prospective (published 2024-11) and describes a research agenda rather than deployed systems, so it represents institutional intent rather than operational infrastructure. @@ -49,4 +49,4 @@ Relevant Notes: Topics: - [[livingip overview]] - [[coordination mechanisms]] -- [[domains/ai-alignment/_map]] \ No newline at end of file +- domains/ai-alignment/_map \ No newline at end of file diff --git a/domains/ai-alignment/pre-deployment-AI-evaluations-do-not-predict-real-world-risk-creating-institutional-governance-built-on-unreliable-foundations.md b/domains/ai-alignment/pre-deployment-AI-evaluations-do-not-predict-real-world-risk-creating-institutional-governance-built-on-unreliable-foundations.md index c78525945..56f891c29 100644 --- a/domains/ai-alignment/pre-deployment-AI-evaluations-do-not-predict-real-world-risk-creating-institutional-governance-built-on-unreliable-foundations.md +++ b/domains/ai-alignment/pre-deployment-AI-evaluations-do-not-predict-real-world-risk-creating-institutional-governance-built-on-unreliable-foundations.md @@ -34,7 +34,7 @@ The problem compounds the alignment challenge: even if safety research produces ### Additional Evidence (extend) -*Source: [[2026-03-00-metr-aisi-pre-deployment-evaluation-practice]] | Added: 2026-03-19* +*Source: 2026-03-00-metr-aisi-pre-deployment-evaluation-practice | Added: 2026-03-19* The voluntary-collaborative model adds a selection bias dimension to evaluation unreliability: evaluations only happen when labs consent, meaning the sample of evaluated models is systematically biased toward labs confident in their safety measures. Labs with weaker safety practices can avoid evaluation entirely. @@ -52,5 +52,5 @@ Relevant Notes: - [[the gap between theoretical AI capability and observed deployment is massive across all occupations because adoption lag not capability limits determines real-world impact]] Topics: -- [[domains/ai-alignment/_map]] -- [[core/grand-strategy/_map]] +- domains/ai-alignment/_map +- core/grand-strategy/_map diff --git a/inbox/queue/2026-01-00-kim-third-party-ai-assurance-framework.md b/inbox/queue/2026-01-00-kim-third-party-ai-assurance-framework.md index 38ee16a16..1391c29f4 100644 --- a/inbox/queue/2026-01-00-kim-third-party-ai-assurance-framework.md +++ b/inbox/queue/2026-01-00-kim-third-party-ai-assurance-framework.md @@ -44,7 +44,7 @@ CMU researchers propose a comprehensive third-party AI assurance framework with **KB connections:** - Directly relevant to the "missing correction mechanism" identified in Session 2026-03-18b — third-party performance measurement that is genuinely independent, not collaborative -- [[no research group is building alignment through collective intelligence infrastructure]] — this paper is one of the first to try to build the assurance infrastructure, but at a small scale +- no research group is building alignment through collective intelligence infrastructure — this paper is one of the first to try to build the assurance infrastructure, but at a small scale **Extraction hints:** - Could support a claim about the early stage of AI assurance methodology: "third-party AI assurance methodology is at the proof-of-concept stage, validated in small deployment contexts but not yet applicable to frontier AI at scale" -- 2.45.2