teleo-infrastructure/tests/test_reweave.py
m3taversal 5e0cdfc63a feat: consolidate eval pipeline, reweave fixes, enrichment dedup, cherry-pick merge, TG batching
Merges all work from epimetheus/enrichment-dedup-fix and epimetheus/eval-and-reweave-fixes:

- Eval pipeline: _LLMResponse in call_openrouter, URL fabrication check, confidence floor, cost alerts
- Reweave fixes: _is_entity gate, _same_source filter, temp 0.3, blank line sanitization
- Enrichment dedup: three-layer fix (source-slug, PR-number, post-rebase scan)
- Cherry-pick merge: replaces rebase-retry, --ours entity conflict resolution
- TG batching: group by chat_id + time proximity, force-split on unparseable timestamps
- Schema migration v10: response_audit columns for cost/confidence/blocking

67 tests pass.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 13:21:59 +01:00

203 lines
7.6 KiB
Python

"""Tests for reweave.py — orphan detection, entity filtering, same-source detection, frontmatter editing."""
import sys
import tempfile
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from reweave import (
_is_entity,
_same_source,
_parse_frontmatter,
_get_edge_targets,
_claim_name_variants,
find_all_claims,
build_reverse_link_index,
find_orphans,
write_edge,
_count_reweave_edges,
CLASSIFY_PROMPT,
)
@pytest.fixture
def kb_dir(tmp_path):
"""Create a minimal KB structure for testing."""
domains = tmp_path / "domains" / "ai-alignment"
domains.mkdir(parents=True)
entities = tmp_path / "entities" / "ai-alignment"
entities.mkdir(parents=True)
return tmp_path
def _write_claim(path: Path, name: str, type_: str = "claim", **extra_fm):
fm_lines = [f"name: {name}", f"type: {type_}"]
for k, v in extra_fm.items():
if isinstance(v, list):
fm_lines.append(f"{k}:")
for item in v:
fm_lines.append(f" - {item}")
else:
fm_lines.append(f"{k}: {v}")
fm = "\n".join(fm_lines)
path.write_text(f"---\n{fm}\n---\n\nBody of {name}.\n")
# ─── Entity Detection ──────────────────────────────────────────────────────
class TestEntityDetection:
def test_entity_detected(self, kb_dir):
p = kb_dir / "entities" / "ai-alignment" / "anthropic.md"
_write_claim(p, "Anthropic", type_="entity")
assert _is_entity(p) is True
def test_claim_not_entity(self, kb_dir):
p = kb_dir / "domains" / "ai-alignment" / "rlhf-works.md"
_write_claim(p, "RLHF works", type_="claim")
assert _is_entity(p) is False
def test_no_frontmatter(self, tmp_path):
p = tmp_path / "bare.md"
p.write_text("No frontmatter here.")
assert _is_entity(p) is False
# ─── Same Source Detection ──────────────────────────────────────────────────
class TestSameSourceDetection:
def test_same_source_field(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
a = d / "claim-a.md"
b = d / "claim-b.md"
_write_claim(a, "Claim A", source="paper-xyz.md")
_write_claim(b, "Claim B", source="paper-xyz.md")
assert _same_source(a, b) is True
def test_different_source(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
a = d / "claim-a.md"
b = d / "claim-b.md"
_write_claim(a, "Claim A", source="paper-xyz.md")
_write_claim(b, "Claim B", source="paper-abc.md")
assert _same_source(a, b) is False
def test_same_source_file_field(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
a = d / "claim-a.md"
b = d / "claim-b.md"
_write_claim(a, "Claim A", source_file="sources/arxiv/1234.md")
_write_claim(b, "Claim B", source_file="sources/arxiv/1234.md")
assert _same_source(a, b) is True
def test_no_source_field(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
a = d / "claim-a.md"
b = d / "claim-b.md"
_write_claim(a, "Claim A")
_write_claim(b, "Claim B")
assert _same_source(a, b) is False
# ─── Orphan Detection ──────────────────────────────────────────────────────
class TestOrphanDetection:
def test_orphan_found(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
a = d / "connected-claim.md"
b = d / "orphan-claim.md"
_write_claim(a, "Connected Claim", related=["orphan-claim"])
_write_claim(b, "Orphan Claim")
claims = find_all_claims(kb_dir)
incoming = build_reverse_link_index(claims)
orphans = find_orphans(claims, incoming, kb_dir)
orphan_names = [p.stem for p in orphans]
assert "connected-claim" not in orphan_names or "orphan-claim" not in orphan_names
# connected-claim has no incoming either (only outgoing), so both may be orphans
# but the key point: orphan detection runs without error
def test_no_orphans_when_connected(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
a = d / "claim-a.md"
b = d / "claim-b.md"
_write_claim(a, "Claim A", related=["claim-b"])
_write_claim(b, "Claim B", related=["claim-a"])
claims = find_all_claims(kb_dir)
incoming = build_reverse_link_index(claims)
orphans = find_orphans(claims, incoming, kb_dir)
assert len(orphans) == 0
# ─── Frontmatter Editing ───────────────────────────────────────────────────
class TestWriteEdge:
def test_write_edge_adds_field(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
p = d / "neighbor.md"
_write_claim(p, "Neighbor Claim")
ok = write_edge(p, "Orphan Title", "related", "2026-03-31")
assert ok is True
text = p.read_text()
assert "Orphan Title" in text
assert "reweave_edges" in text
def test_no_duplicate_edges(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
p = d / "neighbor.md"
_write_claim(p, "Neighbor Claim", related=["Orphan Title"])
ok = write_edge(p, "Orphan Title", "related", "2026-03-31")
assert ok is False # duplicate detected
def test_per_file_cap(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
p = d / "neighbor.md"
# Create a file with 10 reweave_edges already
rw = [f"edge-{i}|related|2026-03-31" for i in range(10)]
_write_claim(p, "Neighbor Claim", reweave_edges=rw)
ok = write_edge(p, "New Orphan", "related", "2026-03-31")
assert ok is False # cap reached
def test_no_blank_lines_in_frontmatter(self, kb_dir):
d = kb_dir / "domains" / "ai-alignment"
p = d / "neighbor.md"
_write_claim(p, "Neighbor Claim", supports=["existing-claim"])
write_edge(p, "New Orphan", "related", "2026-03-31")
text = p.read_text()
# Find frontmatter section
start = text.index("---") + 3
end = text.index("---", start)
fm_section = text[start:end]
# No blank lines in frontmatter
for line in fm_section.strip().split("\n"):
if line.strip() == "":
pytest.fail(f"Blank line found in frontmatter: {repr(fm_section)}")
# ─── Prompt Content ─────────────────────────────────────────────────────────
class TestClassifyPrompt:
def test_challenges_guidance_present(self):
assert "challenges" in CLASSIFY_PROMPT
assert "underused" in CLASSIFY_PROMPT.lower()
def test_related_is_weakest(self):
assert "WEAKEST" in CLASSIFY_PROMPT
# ─── Name Variants ──────────────────────────────────────────────────────────
class TestNameVariants:
def test_stem_variants(self, kb_dir):
p = kb_dir / "domains" / "ai-alignment" / "rlhf-reward-hacking.md"
_write_claim(p, "RLHF Reward Hacking")
variants = _claim_name_variants(p, kb_dir)
assert "rlhf-reward-hacking" in variants
assert "rlhf reward hacking" in variants