fix: tighten output gate patterns to eliminate false positives on public content
5 patterns were too broad — matched common English words: - "extraction" (concept) matched pipeline extraction pattern - "class X" (English) matched Python class definition pattern - ".md " (product name) matched file extension pattern - "threshold" (concept) matched internal metrics pattern Fixes: - extraction: require pipeline context words (queue/PR/branch/cron) - class/def/import: require line-start (actual code, not prose) - .py/.yaml/.json: require path-like prefix (not bare .md) - threshold: require pipeline context (cosine/vector/Qdrant) All 3 Hermes dry-run drafts now pass. 18/18 tests pass. 11/11 system content regression tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2b58ffc765
commit
10b4e27c28
1 changed files with 8 additions and 6 deletions
|
|
@ -16,7 +16,8 @@ import re
|
|||
_SYSTEM_PATTERNS = [
|
||||
# Pipeline operations
|
||||
re.compile(r"\b(PR\s*#\d+|pull request|merge|rebase|cherry.?pick)\b", re.IGNORECASE),
|
||||
re.compile(r"\b(extraction|extracted|extractor|extract/)\b", re.IGNORECASE),
|
||||
re.compile(r"\b(batch.?extract|extract/|extractor)\b", re.IGNORECASE),
|
||||
re.compile(r"\bextract(?:ed|ion)\b.*\b(pipeline|queue|PR|branch|source|cron)\b", re.IGNORECASE),
|
||||
re.compile(r"\b(pipeline|cron|batch.?extract|systemd|teleo-pipeline)\b", re.IGNORECASE),
|
||||
re.compile(r"\b(conflict.?permanent|conflict.?closed|merge.?conflict)\b", re.IGNORECASE),
|
||||
|
||||
|
|
@ -53,13 +54,14 @@ _SYSTEM_PATTERNS = [
|
|||
# UUIDs (conversation IDs, agent IDs)
|
||||
re.compile(r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", re.IGNORECASE),
|
||||
|
||||
# Code / technical
|
||||
re.compile(r"\b(def\s+\w+|import\s+\w+|class\s+\w+)\b"),
|
||||
re.compile(r"\b(\.py|\.yaml|\.json|\.md)\s", re.IGNORECASE),
|
||||
# Code / technical — require line-start or code context to avoid matching English "class"
|
||||
re.compile(r"^\s*(def|import|class)\s+\w+", re.MULTILINE),
|
||||
re.compile(r"[\w/]+\.(py|yaml|json)\b", re.IGNORECASE),
|
||||
re.compile(r"\b(sqlite3?|pipeline\.db|response_audit)\b", re.IGNORECASE),
|
||||
|
||||
# Internal metrics / debugging
|
||||
re.compile(r"\b(cosine.?sim|threshold|PRIOR_ART_THRESHOLD)\b", re.IGNORECASE),
|
||||
# Internal metrics / debugging — require pipeline context, not bare English words
|
||||
re.compile(r"\b(cosine.?sim|PRIOR_ART_THRESHOLD|SCHEMA_VERSION)\b", re.IGNORECASE),
|
||||
re.compile(r"\bthreshold\b.*\b(cosine|vector|Qdrant|embedding|pre.?screen)\b", re.IGNORECASE),
|
||||
re.compile(r"\b(pre.?screen|Layer\s*[01234]|RRF|entity.?boost)\b", re.IGNORECASE),
|
||||
|
||||
# Paths
|
||||
|
|
|
|||
Loading…
Reference in a new issue