diff --git a/inbox/queue/.extraction-debug/2026-03-25-aisi-self-replication-roundup-no-end-to-end-evaluation.json b/inbox/queue/.extraction-debug/2026-03-25-aisi-self-replication-roundup-no-end-to-end-evaluation.json new file mode 100644 index 00000000..8b2fe7b6 --- /dev/null +++ b/inbox/queue/.extraction-debug/2026-03-25-aisi-self-replication-roundup-no-end-to-end-evaluation.json @@ -0,0 +1,36 @@ +{ + "rejected_claims": [ + { + "filename": "ai-self-replication-component-success-does-not-predict-end-to-end-capability-under-realistic-security.md", + "issues": [ + "missing_attribution_extractor" + ] + }, + { + "filename": "google-deepmind-end-to-end-self-replication-evaluation-shows-proximity-without-success.md", + "issues": [ + "missing_attribution_extractor" + ] + } + ], + "validation_stats": { + "total": 2, + "kept": 0, + "fixed": 6, + "rejected": 2, + "fixes_applied": [ + "ai-self-replication-component-success-does-not-predict-end-to-end-capability-under-realistic-security.md:set_created:2026-03-25", + "ai-self-replication-component-success-does-not-predict-end-to-end-capability-under-realistic-security.md:stripped_wiki_link:three conditions gate AI takeover risk autonomy robotics and", + "ai-self-replication-component-success-does-not-predict-end-to-end-capability-under-realistic-security.md:stripped_wiki_link:instrumental convergence risks may be less imminent than ori", + "google-deepmind-end-to-end-self-replication-evaluation-shows-proximity-without-success.md:set_created:2026-03-25", + "google-deepmind-end-to-end-self-replication-evaluation-shows-proximity-without-success.md:stripped_wiki_link:three conditions gate AI takeover risk autonomy robotics and", + "google-deepmind-end-to-end-self-replication-evaluation-shows-proximity-without-success.md:stripped_wiki_link:AI capability and reliability are independent dimensions bec" + ], + "rejections": [ + "ai-self-replication-component-success-does-not-predict-end-to-end-capability-under-realistic-security.md:missing_attribution_extractor", + "google-deepmind-end-to-end-self-replication-evaluation-shows-proximity-without-success.md:missing_attribution_extractor" + ] + }, + "model": "anthropic/claude-sonnet-4.5", + "date": "2026-03-25" +} \ No newline at end of file diff --git a/inbox/queue/2026-03-25-aisi-self-replication-roundup-no-end-to-end-evaluation.md b/inbox/queue/2026-03-25-aisi-self-replication-roundup-no-end-to-end-evaluation.md index 940e91a2..3227661c 100644 --- a/inbox/queue/2026-03-25-aisi-self-replication-roundup-no-end-to-end-evaluation.md +++ b/inbox/queue/2026-03-25-aisi-self-replication-roundup-no-end-to-end-evaluation.md @@ -7,9 +7,13 @@ date: 2025-10-01 domain: ai-alignment secondary_domains: [] format: blog-post -status: unprocessed +status: null-result priority: medium tags: [self-replication, replibench, end-to-end-evaluation, Pan-et-al, SOCK-benchmark, Google-DeepMind, security-conditions] +processed_by: theseus +processed_date: 2026-03-25 +extraction_model: "anthropic/claude-sonnet-4.5" +extraction_notes: "LLM returned 2 claims, 2 rejected by validator" --- ## Content @@ -62,3 +66,12 @@ A comprehensive roundup of all published self-replication evaluations for fronti PRIMARY CONNECTION: [[three conditions gate AI takeover risk autonomy robotics and production chain control and current AI satisfies none of them]] — this roundup provides updated evidence that the autonomy condition (self-replication) remains unmet operationally despite high component benchmark scores WHY ARCHIVED: Closes the loop on the self-replication benchmark-reality gap; confirms that the absence of end-to-end evaluations is comprehensive, not accidental EXTRACTION HINT: The extractor should check the existing [[three conditions gate AI takeover risk]] claim — it may need updating with the Google DeepMind end-to-end failure data. Also check [[instrumental convergence risks may be less imminent than originally argued]] — this roundup is additional evidence for that claim's experimental confidence rating. + + +## Key Facts +- RepliBench released by UK AISI in spring 2025 +- Claude 3.7 achieved >50% probability on 15/20 RepliBench task families +- SOCK benchmark released September 2025 +- Google DeepMind conducted 11-task end-to-end self-replication evaluation in 2025 +- Pan et al. published open-weights self-replication claims in 2024/2025 +- Bradford Saad published comprehensive self-replication roundup October 1, 2025