diff --git a/inbox/queue/.extraction-debug/2026-03-20-anthropic-rsp-v3-conditional-thresholds.json b/inbox/queue/.extraction-debug/2026-03-20-anthropic-rsp-v3-conditional-thresholds.json new file mode 100644 index 00000000..5bcb7fff --- /dev/null +++ b/inbox/queue/.extraction-debug/2026-03-20-anthropic-rsp-v3-conditional-thresholds.json @@ -0,0 +1,29 @@ +{ + "rejected_claims": [ + { + "filename": "anthropic-rsp-v3-replaces-unconditional-safety-thresholds-with-dual-condition-escape-clauses.md", + "issues": [ + "missing_attribution_extractor", + "opsec_internal_deal_terms" + ] + } + ], + "validation_stats": { + "total": 1, + "kept": 0, + "fixed": 4, + "rejected": 1, + "fixes_applied": [ + "anthropic-rsp-v3-replaces-unconditional-safety-thresholds-with-dual-condition-escape-clauses.md:set_created:2026-03-20", + "anthropic-rsp-v3-replaces-unconditional-safety-thresholds-with-dual-condition-escape-clauses.md:stripped_wiki_link:voluntary-safety-pledges-cannot-survive-competitive-pressure", + "anthropic-rsp-v3-replaces-unconditional-safety-thresholds-with-dual-condition-escape-clauses.md:stripped_wiki_link:Anthropics-RSP-rollback-under-commercial-pressure-is-the-fir", + "anthropic-rsp-v3-replaces-unconditional-safety-thresholds-with-dual-condition-escape-clauses.md:stripped_wiki_link:only-binding-regulation-with-enforcement-teeth-changes-front" + ], + "rejections": [ + "anthropic-rsp-v3-replaces-unconditional-safety-thresholds-with-dual-condition-escape-clauses.md:missing_attribution_extractor", + "anthropic-rsp-v3-replaces-unconditional-safety-thresholds-with-dual-condition-escape-clauses.md:opsec_internal_deal_terms" + ] + }, + "model": "anthropic/claude-sonnet-4.5", + "date": "2026-03-20" +} \ No newline at end of file diff --git a/inbox/queue/2026-03-20-anthropic-rsp-v3-conditional-thresholds.md b/inbox/queue/2026-03-20-anthropic-rsp-v3-conditional-thresholds.md index 6fc8d5cd..36688a46 100644 --- a/inbox/queue/2026-03-20-anthropic-rsp-v3-conditional-thresholds.md +++ b/inbox/queue/2026-03-20-anthropic-rsp-v3-conditional-thresholds.md @@ -7,9 +7,12 @@ date: 2026-02-24 domain: ai-alignment secondary_domains: [] format: policy-document -status: unprocessed +status: enrichment priority: high tags: [RSP, Anthropic, voluntary-safety, conditional-commitment, METR, frog-boiling, competitive-pressure, alignment-tax, B1-confirmation] +processed_by: theseus +processed_date: 2026-03-20 +extraction_model: "anthropic/claude-sonnet-4.5" --- ## Content @@ -52,3 +55,12 @@ Anthropic released **Responsible Scaling Policy v3.0** on February 24, 2026 — PRIMARY CONNECTION: [[voluntary safety pledges cannot survive competitive pressure because unilateral commitments are structurally punished when competitors advance without equivalent constraints]] WHY ARCHIVED: Provides the most current and specific evidence of the voluntary-commitment collapse mechanism — not hypothetical but documented with RSP v1→v3 structural change and Kaplan quotes EXTRACTION HINT: The structural change (unconditional → dual-condition) is the key extractable claim; the frog-boiling quote from METR is supporting evidence; the $30B context explains the financial incentive driving the change + + +## Key Facts +- Anthropic released RSP v3.0 on February 24, 2026 +- RSP v3.0 introduces Frontier Safety Roadmaps and Risk Reports +- RSP v3.0 requires capability assessments on 6-month intervals +- Jared Kaplan stated 'We felt that it wouldn't actually help anyone for us to stop training AI models' in TIME interview March 6, 2026 +- Anthropic raised $30B at approximately $380B valuation with 10x annual revenue growth (context for RSP v3.0 release) +- METR (Anthropic's evaluation partner) warned of 'frog-boiling effect' from RSP v3.0 changes