2026-03-18 09:26:52 +00:00
2 changed files with 54 additions and 1 deletions
--- a/inbox/archive/.extraction-debug/2025-07-10-metr-ai-developer-productivity-rct.json
+++ b/inbox/archive/.extraction-debug/2025-07-10-metr-ai-developer-productivity-rct.json
@ -0,0 +1,37 @@
+{
+  "rejected_claims": [
+    {
+      "filename": "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md",
+      "issues": [
+        "missing_attribution_extractor"
+      ]
+    },
+    {
+      "filename": "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md",
+      "issues": [
+        "missing_attribution_extractor"
+      ]
+    }
+  ],
+  "validation_stats": {
+    "total": 2,
+    "kept": 0,
+    "fixed": 7,
+    "rejected": 2,
+    "fixes_applied": [
+      "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:set_created:2026-03-18",
+      "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:stripped_wiki_link:deep-technical-expertise-is-a-greater-force-multiplier-when-",
+      "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:stripped_wiki_link:agent-generated-code-creates-cognitive-debt",
+      "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:stripped_wiki_link:AI-capability-and-reliability-are-independent-dimensions",
+      "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:set_created:2026-03-18",
+      "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:stripped_wiki_link:AI-optimization-of-industry-subsystems-induces-demand-for-mo",
+      "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:stripped_wiki_link:economic-forces-push-humans-out-of-every-cognitive-loop-wher"
+    ],
+    "rejections": [
+      "experienced-developers-slower-with-ai-tools-while-believing-faster-revealing-systematic-perception-gap.md:missing_attribution_extractor",
+      "practitioner-self-reports-systematically-overestimate-ai-productivity-creating-adoption-signal-distortion.md:missing_attribution_extractor"
+    ]
+  },
+  "model": "anthropic/claude-sonnet-4.5",
+  "date": "2026-03-18"
+}
--- a/inbox/archive/2025-07-10-metr-ai-developer-productivity-rct.md
+++ b/inbox/archive/2025-07-10-metr-ai-developer-productivity-rct.md
@ -7,10 +7,14 @@ date: 2025-07-10
 domain: ai-alignment
 secondary_domains: [collective-intelligence]
 format: paper
-status: unprocessed
+status: null-result
 priority: high
 triage_tag: claim
 tags: [developer-productivity, rct, ai-tools, over-reliance, perception-gap, automation-overshoot]
+processed_by: theseus
+processed_date: 2026-03-18
+extraction_model: "anthropic/claude-sonnet-4.5"
+extraction_notes: "LLM returned 2 claims, 2 rejected by validator"
 ---

 ## Content
@ -47,3 +51,15 @@ Randomized controlled trial: 16 experienced open-source developers, 246 tasks, m
 ## Curator Notes
 PRIMARY CONNECTION: deep technical expertise is a greater force multiplier when combined with AI agents
 WHY ARCHIVED: RCT evidence that challenges the expertise-multiplier claim for expert-on-familiar-codebase context. The 39-point perception gap is a novel finding that explains HOW automation overshoot occurs — practitioners' self-reports systematically mislead adoption decisions.
+
+
+## Key Facts
+- METR conducted RCT with 16 experienced open-source developers on 246 tasks
+- Codebases averaged 22k+ GitHub stars, 1M+ lines of code, 5+ years developer experience
+- Primary tool was Cursor Pro with Claude 3.5/3.7 Sonnet
+- Developers had ~50 hours of AI coding tool experience
+- Measured productivity: 19% slower with AI tools
+- Predicted productivity (before): 24% faster
+- Estimated productivity (after): 20% faster
+- AI suggestion acceptance rate: less than 44%
+- Study published 2025-07-10 by METR (@METR_Evals)