teleo-codex/inbox/queue/.extraction-debug/2025-11-01-jmir-knowledge-practice-gap-39-benchmarks-systematic-review.json
Teleo Agents b41a80ab0e
Some checks are pending
Sync Graph Data to teleo-app / sync (push) Waiting to run
extract: 2025-11-01-jmir-knowledge-practice-gap-39-benchmarks-systematic-review
Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
2026-03-24 04:33:11 +00:00

32 lines
No EOL
1.3 KiB
JSON

{
"rejected_claims": [
{
"filename": "clinical-llm-evaluation-uses-medical-exam-questions-not-real-patient-data-creating-systematic-benchmark-validity-gap.md",
"issues": [
"missing_attribution_extractor"
]
},
{
"filename": "conversational-clinical-ai-shows-19-point-accuracy-drop-versus-single-turn-questions-revealing-interaction-complexity-gap.md",
"issues": [
"missing_attribution_extractor"
]
}
],
"validation_stats": {
"total": 2,
"kept": 0,
"fixed": 2,
"rejected": 2,
"fixes_applied": [
"clinical-llm-evaluation-uses-medical-exam-questions-not-real-patient-data-creating-systematic-benchmark-validity-gap.md:set_created:2026-03-24",
"conversational-clinical-ai-shows-19-point-accuracy-drop-versus-single-turn-questions-revealing-interaction-complexity-gap.md:set_created:2026-03-24"
],
"rejections": [
"clinical-llm-evaluation-uses-medical-exam-questions-not-real-patient-data-creating-systematic-benchmark-validity-gap.md:missing_attribution_extractor",
"conversational-clinical-ai-shows-19-point-accuracy-drop-versus-single-turn-questions-revealing-interaction-complexity-gap.md:missing_attribution_extractor"
]
},
"model": "anthropic/claude-sonnet-4.5",
"date": "2026-03-24"
}