Merge pull request 'extract: 2025-07-10-metr-ai-developer-productivity-rct' (#1206) from extract/2025-07-10-metr-ai-developer-productivity-rct into main
This commit is contained in:
commit
b5e24aef7c
2 changed files with 54 additions and 1 deletions
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"rejected_claims": [
|
||||||
|
{
|
||||||
|
"filename": "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md",
|
||||||
|
"issues": [
|
||||||
|
"missing_attribution_extractor"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md",
|
||||||
|
"issues": [
|
||||||
|
"missing_attribution_extractor"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"validation_stats": {
|
||||||
|
"total": 2,
|
||||||
|
"kept": 0,
|
||||||
|
"fixed": 7,
|
||||||
|
"rejected": 2,
|
||||||
|
"fixes_applied": [
|
||||||
|
"experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:set_created:2026-03-18",
|
||||||
|
"experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:stripped_wiki_link:deep-technical-expertise-is-a-greater-force-multiplier-when-",
|
||||||
|
"experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:stripped_wiki_link:agent-generated-code-creates-cognitive-debt",
|
||||||
|
"experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:stripped_wiki_link:AI-capability-and-reliability-are-independent-dimensions",
|
||||||
|
"practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:set_created:2026-03-18",
|
||||||
|
"practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:stripped_wiki_link:AI-optimization-of-industry-subsystems-induces-demand-for-mo",
|
||||||
|
"practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:stripped_wiki_link:economic-forces-push-humans-out-of-every-cognitive-loop-wher"
|
||||||
|
],
|
||||||
|
"rejections": [
|
||||||
|
"experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:missing_attribution_extractor",
|
||||||
|
"practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:missing_attribution_extractor"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"model": "anthropic/claude-sonnet-4.5",
|
||||||
|
"date": "2026-03-18"
|
||||||
|
}
|
||||||
|
|
@ -7,10 +7,14 @@ date: 2025-07-10
|
||||||
domain: ai-alignment
|
domain: ai-alignment
|
||||||
secondary_domains: [collective-intelligence]
|
secondary_domains: [collective-intelligence]
|
||||||
format: paper
|
format: paper
|
||||||
status: unprocessed
|
status: null-result
|
||||||
priority: high
|
priority: high
|
||||||
triage_tag: claim
|
triage_tag: claim
|
||||||
tags: [developer-productivity, rct, ai-tools, over-reliance, perception-gap, automation-overshoot]
|
tags: [developer-productivity, rct, ai-tools, over-reliance, perception-gap, automation-overshoot]
|
||||||
|
processed_by: theseus
|
||||||
|
processed_date: 2026-03-18
|
||||||
|
extraction_model: "anthropic/claude-sonnet-4.5"
|
||||||
|
extraction_notes: "LLM returned 2 claims, 2 rejected by validator"
|
||||||
---
|
---
|
||||||
|
|
||||||
## Content
|
## Content
|
||||||
|
|
@ -47,3 +51,15 @@ Randomized controlled trial: 16 experienced open-source developers, 246 tasks, m
|
||||||
## Curator Notes
|
## Curator Notes
|
||||||
PRIMARY CONNECTION: deep technical expertise is a greater force multiplier when combined with AI agents
|
PRIMARY CONNECTION: deep technical expertise is a greater force multiplier when combined with AI agents
|
||||||
WHY ARCHIVED: RCT evidence that challenges the expertise-multiplier claim for expert-on-familiar-codebase context. The 39-point perception gap is a novel finding that explains HOW automation overshoot occurs — practitioners' self-reports systematically mislead adoption decisions.
|
WHY ARCHIVED: RCT evidence that challenges the expertise-multiplier claim for expert-on-familiar-codebase context. The 39-point perception gap is a novel finding that explains HOW automation overshoot occurs — practitioners' self-reports systematically mislead adoption decisions.
|
||||||
|
|
||||||
|
|
||||||
|
## Key Facts
|
||||||
|
- METR conducted RCT with 16 experienced open-source developers on 246 tasks
|
||||||
|
- Codebases averaged 22k+ GitHub stars, 1M+ lines of code, 5+ years developer experience
|
||||||
|
- Primary tool was Cursor Pro with Claude 3.5/3.7 Sonnet
|
||||||
|
- Developers had ~50 hours of AI coding tool experience
|
||||||
|
- Measured productivity: 19% slower with AI tools
|
||||||
|
- Predicted productivity (before): 24% faster
|
||||||
|
- Estimated productivity (after): 20% faster
|
||||||
|
- AI suggestion acceptance rate: less than 44%
|
||||||
|
- Study published 2025-07-10 by METR (@METR_Evals)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue