diff --git a/inbox/archive/.extraction-debug/2025-07-10-metr-ai-developer-productivity-rct.json b/inbox/archive/.extraction-debug/2025-07-10-metr-ai-developer-productivity-rct.json new file mode 100644 index 00000000..f11f18de --- /dev/null +++ b/inbox/archive/.extraction-debug/2025-07-10-metr-ai-developer-productivity-rct.json @@ -0,0 +1,37 @@ +{ + "rejected_claims": [ + { + "filename": "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md", + "issues": [ + "missing_attribution_extractor" + ] + }, + { + "filename": "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md", + "issues": [ + "missing_attribution_extractor" + ] + } + ], + "validation_stats": { + "total": 2, + "kept": 0, + "fixed": 7, + "rejected": 2, + "fixes_applied": [ + "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:set_created:2026-03-18", + "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:stripped_wiki_link:deep-technical-expertise-is-a-greater-force-multiplier-when-", + "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:stripped_wiki_link:agent-generated-code-creates-cognitive-debt", + "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:stripped_wiki_link:AI-capability-and-reliability-are-independent-dimensions", + "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:set_created:2026-03-18", + "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:stripped_wiki_link:AI-optimization-of-industry-subsystems-induces-demand-for-mo", + "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:stripped_wiki_link:economic-forces-push-humans-out-of-every-cognitive-loop-wher" + ], + "rejections": [ + "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:missing_attribution_extractor", + "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:missing_attribution_extractor" + ] + }, + "model": "anthropic/claude-sonnet-4.5", + "date": "2026-03-18" +} \ No newline at end of file diff --git a/inbox/archive/2025-07-10-metr-ai-developer-productivity-rct.md b/inbox/archive/2025-07-10-metr-ai-developer-productivity-rct.md index 689b20fb..2e8ddea5 100644 --- a/inbox/archive/2025-07-10-metr-ai-developer-productivity-rct.md +++ b/inbox/archive/2025-07-10-metr-ai-developer-productivity-rct.md @@ -7,10 +7,14 @@ date: 2025-07-10 domain: ai-alignment secondary_domains: [collective-intelligence] format: paper -status: unprocessed +status: null-result priority: high triage_tag: claim tags: [developer-productivity, rct, ai-tools, over-reliance, perception-gap, automation-overshoot] +processed_by: theseus +processed_date: 2026-03-18 +extraction_model: "anthropic/claude-sonnet-4.5" +extraction_notes: "LLM returned 2 claims, 2 rejected by validator" --- ## Content @@ -47,3 +51,15 @@ Randomized controlled trial: 16 experienced open-source developers, 246 tasks, m ## Curator Notes PRIMARY CONNECTION: deep technical expertise is a greater force multiplier when combined with AI agents WHY ARCHIVED: RCT evidence that challenges the expertise-multiplier claim for expert-on-familiar-codebase context. The 39-point perception gap is a novel finding that explains HOW automation overshoot occurs — practitioners' self-reports systematically mislead adoption decisions. + + +## Key Facts +- METR conducted RCT with 16 experienced open-source developers on 246 tasks +- Codebases averaged 22k+ GitHub stars, 1M+ lines of code, 5+ years developer experience +- Primary tool was Cursor Pro with Claude 3.5/3.7 Sonnet +- Developers had ~50 hours of AI coding tool experience +- Measured productivity: 19% slower with AI tools +- Predicted productivity (before): 24% faster +- Estimated productivity (after): 20% faster +- AI suggestion acceptance rate: less than 44% +- Study published 2025-07-10 by METR (@METR_Evals)