Some checks are pending
Sync Graph Data to teleo-app / sync (push) Waiting to run
Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
34 lines
No EOL
2.3 KiB
JSON
34 lines
No EOL
2.3 KiB
JSON
{
|
|
"rejected_claims": [
|
|
{
|
|
"filename": "algorithmic-benchmark-scoring-overstates-ai-capability-by-2-3x-versus-holistic-human-review-because-automated-metrics-measure-core-implementation-while-missing-documentation-testing-and-code-quality.md",
|
|
"issues": [
|
|
"missing_attribution_extractor"
|
|
]
|
|
},
|
|
{
|
|
"filename": "capability-benchmark-version-instability-creates-governance-discontinuity-because-HCAST-time-horizon-estimates-shifted-50-percent-between-annual-versions-making-safety-thresholds-a-moving-target.md",
|
|
"issues": [
|
|
"missing_attribution_extractor"
|
|
]
|
|
}
|
|
],
|
|
"validation_stats": {
|
|
"total": 2,
|
|
"kept": 0,
|
|
"fixed": 4,
|
|
"rejected": 2,
|
|
"fixes_applied": [
|
|
"algorithmic-benchmark-scoring-overstates-ai-capability-by-2-3x-versus-holistic-human-review-because-automated-metrics-measure-core-implementation-while-missing-documentation-testing-and-code-quality.md:set_created:2026-03-26",
|
|
"algorithmic-benchmark-scoring-overstates-ai-capability-by-2-3x-versus-holistic-human-review-because-automated-metrics-measure-core-implementation-while-missing-documentation-testing-and-code-quality.md:stripped_wiki_link:AI-capability-and-reliability-are-independent-dimensions-bec",
|
|
"capability-benchmark-version-instability-creates-governance-discontinuity-because-HCAST-time-horizon-estimates-shifted-50-percent-between-annual-versions-making-safety-thresholds-a-moving-target.md:set_created:2026-03-26",
|
|
"capability-benchmark-version-instability-creates-governance-discontinuity-because-HCAST-time-horizon-estimates-shifted-50-percent-between-annual-versions-making-safety-thresholds-a-moving-target.md:stripped_wiki_link:Anthropics-RSP-rollback-under-commercial-pressure-is-the-fir"
|
|
],
|
|
"rejections": [
|
|
"algorithmic-benchmark-scoring-overstates-ai-capability-by-2-3x-versus-holistic-human-review-because-automated-metrics-measure-core-implementation-while-missing-documentation-testing-and-code-quality.md:missing_attribution_extractor",
|
|
"capability-benchmark-version-instability-creates-governance-discontinuity-because-HCAST-time-horizon-estimates-shifted-50-percent-between-annual-versions-making-safety-thresholds-a-moving-target.md:missing_attribution_extractor"
|
|
]
|
|
},
|
|
"model": "anthropic/claude-sonnet-4.5",
|
|
"date": "2026-03-26"
|
|
} |