Pulled from /opt/teleo-eval/telegram/ on VPS. Includes: - bot.py (92K), kb_retrieval.py, kb_tools.py (agentic retrieval) - retrieval.py (RRF merge, query decomposition, entity traversal) - response.py (system prompt builder, response parser) - agent_config.py, agent_runner.py (multi-agent template unit support) - approval_stages.py, approvals.py, digest.py (approval workflow) - eval_checks.py, eval.py (response quality checks) - output_gate.py, x_publisher.py, x_client.py, x_search.py (X pipeline) - market_data.py, worktree_lock.py (utilities) - rio.yaml, theseus.yaml (agent configs) These files were deployed to VPS but never committed to the repo. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
719 lines
26 KiB
Python
719 lines
26 KiB
Python
#!/usr/bin/env python3
|
|
"""KB tools for LLM function-calling — source tracing + entity/claim lookup.
|
|
|
|
These tools let the agent trace claims back to their original sources,
|
|
find all claims from a specific piece of research, and read source documents.
|
|
|
|
Epimetheus owns this module.
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
logger = logging.getLogger("tg.kb_tools")
|
|
|
|
|
|
# ─── Tool definitions (OpenAI function-calling format) ───────────────
|
|
|
|
TOOL_DEFINITIONS = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "find_by_source",
|
|
"description": (
|
|
"Find all claims extracted from a specific source (article, paper, thread). "
|
|
"Search by author name, source title, or keywords. Returns all claims from "
|
|
"matching sources with their frontmatter."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Author name, source title, or keywords to match against claim source fields",
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "read_source",
|
|
"description": (
|
|
"Read the original source document (article, thread, paper) that claims were "
|
|
"extracted from. Use when you need the full context behind a claim, not just "
|
|
"the extracted summary."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"source_title": {
|
|
"type": "string",
|
|
"description": "Title or slug of the source document to read",
|
|
},
|
|
},
|
|
"required": ["source_title"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "read_entity",
|
|
"description": "Read the full profile of a KB entity (project, person, protocol).",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"name": {
|
|
"type": "string",
|
|
"description": "Entity name or slug",
|
|
},
|
|
},
|
|
"required": ["name"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "list_entity_links",
|
|
"description": "List all entities and claims linked from an entity's wiki-links.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"name": {
|
|
"type": "string",
|
|
"description": "Entity name or slug",
|
|
},
|
|
},
|
|
"required": ["name"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "read_claim",
|
|
"description": "Read the full content of a specific claim file.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"title": {
|
|
"type": "string",
|
|
"description": "Claim title or slug",
|
|
},
|
|
},
|
|
"required": ["title"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "search_kb",
|
|
"description": "Search the KB for claims matching a query. Uses keyword matching.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
"max_results": {
|
|
"type": "integer",
|
|
"description": "Max results to return (default 5)",
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "explore_graph",
|
|
"description": (
|
|
"Follow knowledge graph edges from a claim to find connected claims. "
|
|
"Returns all claims linked via supports, challenges, depends_on, and related edges. "
|
|
"Use this to discover the full argument structure around a claim — what supports it, "
|
|
"what challenges it, and what it depends on."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"claim_title": {
|
|
"type": "string",
|
|
"description": "Title or slug of the claim to explore edges from",
|
|
},
|
|
},
|
|
"required": ["claim_title"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "search_sources",
|
|
"description": (
|
|
"Search the source archive for original documents by topic, author, or title. "
|
|
"Returns matching source files with their titles and first few lines. "
|
|
"Use this when you want to find the original research/article/thread, not just extracted claims."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Topic, author name, or keywords to search source documents",
|
|
},
|
|
"max_results": {
|
|
"type": "integer",
|
|
"description": "Max results to return (default 5)",
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "pr_status",
|
|
"description": (
|
|
"Check the status of a pipeline PR by number. Returns eval verdicts, "
|
|
"merge status, time in queue, rejection reasons, and retry counts."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"pr_number": {
|
|
"type": "integer",
|
|
"description": "PR number to look up",
|
|
},
|
|
},
|
|
"required": ["pr_number"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "check_duplicate",
|
|
"description": (
|
|
"Check if a claim is a near-duplicate of existing KB content. "
|
|
"Returns top-3 closest matches with similarity scores. "
|
|
">=0.85 = likely duplicate, 0.70-0.85 = check manually, <0.70 = novel."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"text": {
|
|
"type": "string",
|
|
"description": "The claim text to check for duplicates",
|
|
},
|
|
},
|
|
"required": ["text"],
|
|
},
|
|
},
|
|
},
|
|
]
|
|
|
|
|
|
# ─── Tool implementations ────────────────────────────────────────────
|
|
|
|
|
|
def find_by_source(query: str, kb_dir: str) -> str:
|
|
"""Find all claims extracted from sources matching the query.
|
|
|
|
Searches claim frontmatter `source:` fields for author names, titles, keywords.
|
|
Returns structured list of all claims from matching sources.
|
|
"""
|
|
query_lower = query.lower()
|
|
query_tokens = [t for t in re.findall(r'\w+', query_lower) if len(t) >= 3]
|
|
|
|
# Scan all claim files for matching source fields
|
|
matches: list[dict] = []
|
|
claim_dirs = [
|
|
Path(kb_dir) / "domains",
|
|
Path(kb_dir) / "core",
|
|
Path(kb_dir) / "foundations",
|
|
]
|
|
|
|
for claim_dir in claim_dirs:
|
|
if not claim_dir.exists():
|
|
continue
|
|
for md_file in claim_dir.rglob("*.md"):
|
|
if md_file.name.startswith("_"):
|
|
continue
|
|
try:
|
|
fm, body = _parse_frontmatter(md_file)
|
|
if not fm:
|
|
continue
|
|
source = fm.get("source", "")
|
|
source_file = fm.get("source_file", "")
|
|
searchable = f"{source} {source_file}".lower()
|
|
|
|
# Score: how many query tokens appear in the source field
|
|
score = sum(1 for t in query_tokens if t in searchable)
|
|
if score >= max(1, len(query_tokens) // 2):
|
|
matches.append({
|
|
"title": md_file.stem.replace("-", " "),
|
|
"path": str(md_file.relative_to(kb_dir)),
|
|
"source": source,
|
|
"source_file": source_file,
|
|
"domain": fm.get("domain", "unknown"),
|
|
"confidence": fm.get("confidence", "unknown"),
|
|
"description": fm.get("description", ""),
|
|
"score": score,
|
|
})
|
|
except Exception:
|
|
continue
|
|
|
|
if not matches:
|
|
return f"No claims found from sources matching '{query}'."
|
|
|
|
# Sort by score desc, group by source
|
|
matches.sort(key=lambda m: m["score"], reverse=True)
|
|
|
|
# Group by source
|
|
by_source: dict[str, list[dict]] = {}
|
|
for m in matches:
|
|
key = m["source"] or "unknown"
|
|
by_source.setdefault(key, []).append(m)
|
|
|
|
lines = [f"Found {len(matches)} claims from {len(by_source)} matching sources:\n"]
|
|
for source_name, claims in list(by_source.items())[:5]: # Cap at 5 sources
|
|
lines.append(f"## Source: {source_name}")
|
|
if claims[0].get("source_file"):
|
|
lines.append(f"File: {claims[0]['source_file']}")
|
|
for c in claims[:10]: # Cap at 10 claims per source
|
|
lines.append(f"- **{c['title']}** ({c['confidence']}, {c['domain']})")
|
|
if c["description"]:
|
|
lines.append(f" {c['description'][:200]}")
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)[:4000]
|
|
|
|
|
|
def read_source(source_title: str, kb_dir: str) -> str:
|
|
"""Read the original source document from the archive.
|
|
|
|
Looks in inbox/archive/ and sources/ for matching files.
|
|
"""
|
|
title_lower = source_title.lower()
|
|
slug = re.sub(r'[^a-z0-9]+', '-', title_lower).strip('-')
|
|
|
|
# Search paths for source files
|
|
search_dirs = [
|
|
Path(kb_dir) / "inbox" / "archive",
|
|
Path(kb_dir) / "sources",
|
|
Path(kb_dir) / "inbox" / "queue",
|
|
]
|
|
|
|
best_match = None
|
|
best_score = 0
|
|
|
|
for search_dir in search_dirs:
|
|
if not search_dir.exists():
|
|
continue
|
|
for md_file in search_dir.rglob("*.md"):
|
|
file_slug = md_file.stem.lower()
|
|
# Score by token overlap
|
|
score = 0
|
|
for token in re.findall(r'\w+', title_lower):
|
|
if len(token) >= 3 and token in file_slug:
|
|
score += 1
|
|
if slug in file_slug:
|
|
score += 5 # Exact slug match
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = md_file
|
|
|
|
if not best_match:
|
|
return f"Source document '{source_title}' not found in archive."
|
|
|
|
try:
|
|
content = best_match.read_text(errors="replace")
|
|
# Truncate to 4K for prompt safety
|
|
if len(content) > 4000:
|
|
content = content[:4000] + "\n\n[... truncated, full document is longer ...]"
|
|
return f"## Source: {best_match.name}\n\n{content}"
|
|
except Exception as e:
|
|
return f"Error reading source: {e}"
|
|
|
|
|
|
def read_entity(name: str, kb_dir: str) -> str:
|
|
"""Read the full profile of a KB entity."""
|
|
entity_file = _find_file(name, [
|
|
Path(kb_dir) / "entities",
|
|
Path(kb_dir) / "decisions",
|
|
])
|
|
if not entity_file:
|
|
return f"Entity '{name}' not found."
|
|
try:
|
|
content = entity_file.read_text(errors="replace")
|
|
return content[:4000]
|
|
except Exception as e:
|
|
return f"Error reading entity: {e}"
|
|
|
|
|
|
def list_entity_links(name: str, kb_dir: str) -> str:
|
|
"""List all wiki-links from an entity file, with dedup."""
|
|
entity_file = _find_file(name, [
|
|
Path(kb_dir) / "entities",
|
|
Path(kb_dir) / "decisions",
|
|
])
|
|
if not entity_file:
|
|
return f"Entity '{name}' not found."
|
|
|
|
try:
|
|
content = entity_file.read_text(errors="replace")
|
|
links = re.findall(r"\[\[([^\]]+)\]\]", content)
|
|
# Dedup while preserving order
|
|
seen = set()
|
|
unique_links = []
|
|
for link in links:
|
|
if link.lower() not in seen:
|
|
seen.add(link.lower())
|
|
unique_links.append(link)
|
|
if not unique_links:
|
|
return f"Entity '{name}' has no wiki-links."
|
|
return f"Entity '{name}' links to {len(unique_links)} items:\n" + "\n".join(
|
|
f"- [[{link}]]" for link in unique_links
|
|
)
|
|
except Exception as e:
|
|
return f"Error reading entity links: {e}"
|
|
|
|
|
|
def read_claim(title: str, kb_dir: str) -> str:
|
|
"""Read the full content of a claim file."""
|
|
claim_file = _find_file(title, [
|
|
Path(kb_dir) / "domains",
|
|
Path(kb_dir) / "core",
|
|
Path(kb_dir) / "foundations",
|
|
])
|
|
if not claim_file:
|
|
return f"Claim '{title}' not found."
|
|
try:
|
|
content = claim_file.read_text(errors="replace")
|
|
return content[:4000]
|
|
except Exception as e:
|
|
return f"Error reading claim: {e}"
|
|
|
|
|
|
def search_kb(query: str, kb_dir: str, max_results: int = 5) -> str:
|
|
"""Search KB claims by keyword matching."""
|
|
from kb_retrieval import KBIndex, retrieve_context
|
|
index = KBIndex(kb_dir)
|
|
index.ensure_fresh()
|
|
ctx = retrieve_context(query, kb_dir, index=index, max_claims=max_results)
|
|
if not ctx.claims:
|
|
return f"No claims found for '{query}'."
|
|
lines = [f"Found {len(ctx.claims)} claims:"]
|
|
for c in ctx.claims:
|
|
lines.append(f"- **{c.title}** ({c.confidence}, {c.domain}, score: {c.score:.1f})")
|
|
if c.description:
|
|
lines.append(f" {c.description[:200]}")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def explore_graph(claim_title: str, kb_dir: str) -> str:
|
|
"""Follow knowledge graph edges from a claim to find connected claims.
|
|
|
|
Uses lib/search.py graph_expand() for 1-hop traversal of supports/challenges/
|
|
depends_on/related edges in frontmatter.
|
|
"""
|
|
# Find the claim file first
|
|
claim_file = _find_file(claim_title, [
|
|
Path(kb_dir) / "domains",
|
|
Path(kb_dir) / "core",
|
|
Path(kb_dir) / "foundations",
|
|
])
|
|
if not claim_file:
|
|
return f"Claim '{claim_title}' not found. Try a different title or use search_kb to find it first."
|
|
|
|
try:
|
|
rel_path = str(claim_file.relative_to(kb_dir))
|
|
except ValueError:
|
|
rel_path = str(claim_file)
|
|
|
|
# Use the existing graph_expand from lib/search.py
|
|
try:
|
|
from lib.search import graph_expand
|
|
expanded = graph_expand([rel_path], repo_root=Path(kb_dir), max_expanded=20)
|
|
except ImportError:
|
|
# Fallback: parse edges directly from the file
|
|
expanded = []
|
|
fm, body = _parse_frontmatter(claim_file)
|
|
if fm:
|
|
for edge_type in ("supports", "challenges", "challenged_by", "depends_on", "related"):
|
|
targets = fm.get(edge_type, [])
|
|
if isinstance(targets, str):
|
|
targets = [targets]
|
|
if isinstance(targets, list):
|
|
for t in targets:
|
|
expanded.append({"claim_title": t, "edge_type": edge_type, "edge_weight": 1.0})
|
|
|
|
if not expanded:
|
|
return f"Claim '{claim_title}' has no graph edges (no supports, challenges, or related claims)."
|
|
|
|
# Group by edge type for readability
|
|
by_type: dict[str, list[dict]] = {}
|
|
for e in expanded:
|
|
by_type.setdefault(e["edge_type"], []).append(e)
|
|
|
|
lines = [f"Graph edges from '{claim_title}' ({len(expanded)} connected claims):\n"]
|
|
type_labels = {
|
|
"supports": "Supports (this claim backs these up)",
|
|
"challenges": "Challenges (this claim argues against these)",
|
|
"challenged_by": "Challenged by (these argue against this claim)",
|
|
"depends_on": "Depends on (prerequisites for this claim)",
|
|
"related": "Related (connected by topic)",
|
|
"wiki_links": "Wiki-linked (mentioned in body text)",
|
|
}
|
|
for edge_type, items in by_type.items():
|
|
label = type_labels.get(edge_type, edge_type)
|
|
lines.append(f"### {label}")
|
|
for item in items:
|
|
title = item.get("claim_title", "unknown")
|
|
weight = item.get("edge_weight", 1.0)
|
|
lines.append(f"- {title}" + (f" (weight: {weight})" if weight != 1.0 else ""))
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)[:4000]
|
|
|
|
|
|
def search_sources(query: str, kb_dir: str, max_results: int = 5) -> str:
|
|
"""Search the source archive for original documents by topic/author/title.
|
|
|
|
Scans inbox/archive/ and sources/ directories, scoring by token overlap.
|
|
"""
|
|
query_lower = query.lower()
|
|
query_tokens = [t for t in re.findall(r'\w+', query_lower) if len(t) >= 3]
|
|
|
|
if not query_tokens:
|
|
return "Query too short — provide at least one keyword with 3+ characters."
|
|
|
|
search_dirs = [
|
|
Path(kb_dir) / "inbox" / "archive",
|
|
Path(kb_dir) / "sources",
|
|
Path(kb_dir) / "inbox" / "queue",
|
|
]
|
|
|
|
matches: list[dict] = []
|
|
for search_dir in search_dirs:
|
|
if not search_dir.exists():
|
|
continue
|
|
for md_file in search_dir.rglob("*.md"):
|
|
if md_file.name.startswith("_"):
|
|
continue
|
|
file_stem = md_file.stem.lower().replace("-", " ")
|
|
# Score by token overlap with filename
|
|
score = sum(1 for t in query_tokens if t in file_stem)
|
|
# Also check first 500 chars of file content for author/topic
|
|
if score == 0:
|
|
try:
|
|
head = md_file.read_text(errors="replace")[:500].lower()
|
|
score = sum(0.5 for t in query_tokens if t in head)
|
|
except Exception:
|
|
continue
|
|
if score >= max(1, len(query_tokens) // 3):
|
|
# Read first few lines for preview
|
|
try:
|
|
preview = md_file.read_text(errors="replace")[:300].strip()
|
|
except Exception:
|
|
preview = "(could not read)"
|
|
matches.append({
|
|
"title": md_file.stem.replace("-", " "),
|
|
"path": str(md_file.relative_to(kb_dir)),
|
|
"score": score,
|
|
"preview": preview,
|
|
})
|
|
|
|
if not matches:
|
|
return f"No source documents found matching '{query}'. Try different keywords or check find_by_source for claims from that source."
|
|
|
|
matches.sort(key=lambda m: m["score"], reverse=True)
|
|
matches = matches[:max_results]
|
|
|
|
lines = [f"Found {len(matches)} source documents:\n"]
|
|
for m in matches:
|
|
lines.append(f"### {m['title']}")
|
|
lines.append(f"Path: {m['path']}")
|
|
lines.append(f"{m['preview'][:200]}")
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)[:4000]
|
|
|
|
|
|
# ─── Tool dispatcher ─────────────────────────────────────────────────
|
|
|
|
|
|
def execute_tool(tool_name: str, args: dict, kb_dir: str) -> str:
|
|
"""Dispatch a tool call by name. Returns the tool's string result."""
|
|
if tool_name == "find_by_source":
|
|
return find_by_source(args.get("query", ""), kb_dir)
|
|
elif tool_name == "read_source":
|
|
return read_source(args.get("source_title", ""), kb_dir)
|
|
elif tool_name == "read_entity":
|
|
return read_entity(args.get("name", ""), kb_dir)
|
|
elif tool_name == "list_entity_links":
|
|
return list_entity_links(args.get("name", ""), kb_dir)
|
|
elif tool_name == "read_claim":
|
|
return read_claim(args.get("title", ""), kb_dir)
|
|
elif tool_name == "search_kb":
|
|
return search_kb(args.get("query", ""), kb_dir, args.get("max_results", 5))
|
|
elif tool_name == "explore_graph":
|
|
return explore_graph(args.get("claim_title", ""), kb_dir)
|
|
elif tool_name == "search_sources":
|
|
return search_sources(args.get("query", ""), kb_dir, args.get("max_results", 5))
|
|
elif tool_name == "pr_status":
|
|
return _tool_pr_status(args.get("pr_number", 0))
|
|
elif tool_name == "check_duplicate":
|
|
return _tool_check_duplicate(args.get("text", ""))
|
|
else:
|
|
return f"Unknown tool: {tool_name}"
|
|
|
|
|
|
# ─── Helpers ─────────────────────────────────────────────────────────
|
|
|
|
|
|
def _parse_frontmatter(path: Path) -> tuple[dict | None, str]:
|
|
"""Parse YAML frontmatter and body from a markdown file."""
|
|
try:
|
|
text = path.read_text(errors="replace")
|
|
except Exception:
|
|
return None, ""
|
|
|
|
if not text.startswith("---"):
|
|
return None, text
|
|
|
|
end = text.find("\n---", 3)
|
|
if end == -1:
|
|
return None, text
|
|
|
|
try:
|
|
fm = yaml.safe_load(text[3:end])
|
|
if not isinstance(fm, dict):
|
|
return None, text
|
|
body = text[end + 4:].strip()
|
|
return fm, body
|
|
except yaml.YAMLError:
|
|
return None, text
|
|
|
|
|
|
def _find_file(name: str, search_dirs: list[Path]) -> Path | None:
|
|
"""Find a markdown file by name/slug across search directories."""
|
|
slug = re.sub(r'[^a-z0-9]+', '-', name.lower()).strip('-')
|
|
name_lower = name.lower()
|
|
|
|
for search_dir in search_dirs:
|
|
if not search_dir.exists():
|
|
continue
|
|
for md_file in search_dir.rglob("*.md"):
|
|
if md_file.name.startswith("_"):
|
|
continue
|
|
stem_lower = md_file.stem.lower()
|
|
# Exact slug match
|
|
if stem_lower == slug:
|
|
return md_file
|
|
# Normalized match (spaces vs hyphens)
|
|
if stem_lower.replace("-", " ") == name_lower.replace("-", " "):
|
|
return md_file
|
|
# Substring match for long titles
|
|
if len(slug) >= 8 and slug in stem_lower:
|
|
return md_file
|
|
|
|
return None
|
|
|
|
|
|
# ─── Pipeline DB tools ──────────────────────────────────────────────
|
|
|
|
|
|
def _tool_pr_status(pr_number: int) -> str:
|
|
"""Wrapper for pr_status() — connects to pipeline DB, returns formatted string."""
|
|
import json
|
|
import sqlite3
|
|
|
|
db_path = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
|
|
try:
|
|
conn = sqlite3.connect(db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
row = conn.execute(
|
|
"""SELECT number, branch, source_path, status, domain, agent,
|
|
commit_type, tier, leo_verdict, domain_verdict,
|
|
domain_agent, eval_issues, priority, origin,
|
|
cost_usd, created_at, merged_at, last_attempt, last_error,
|
|
transient_retries, substantive_retries, description
|
|
FROM prs WHERE number = ?""",
|
|
(pr_number,),
|
|
).fetchone()
|
|
conn.close()
|
|
|
|
if not row:
|
|
return f"PR #{pr_number} not found."
|
|
|
|
issues = []
|
|
try:
|
|
issues = json.loads(row["eval_issues"] or "[]")
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass
|
|
|
|
lines = [
|
|
f"PR #{row['number']} — {row['status'].upper()}",
|
|
f"Branch: {row['branch']}",
|
|
f"Domain: {row['domain'] or 'unknown'} | Agent: {row['agent'] or 'pipeline'}",
|
|
f"Type: {row['commit_type'] or 'unknown'} | Tier: {row['tier'] or 'unknown'}",
|
|
f"Leo verdict: {row['leo_verdict']} | Domain verdict: {row['domain_verdict']}",
|
|
]
|
|
if row["description"]:
|
|
lines.append(f"Description: {row['description']}")
|
|
if issues:
|
|
lines.append(f"Eval issues: {', '.join(str(i) for i in issues)}")
|
|
if row["last_error"]:
|
|
lines.append(f"Last error: {row['last_error'][:200]}")
|
|
lines.append(f"Retries: {row['transient_retries']} transient, {row['substantive_retries']} substantive")
|
|
lines.append(f"Created: {row['created_at']} | Last attempt: {row['last_attempt']}")
|
|
if row["merged_at"]:
|
|
lines.append(f"Merged: {row['merged_at']}")
|
|
if row["cost_usd"]:
|
|
lines.append(f"Eval cost: ${row['cost_usd']:.4f}")
|
|
|
|
return "\n".join(lines)
|
|
except Exception as e:
|
|
return f"Error querying PR #{pr_number}: {e}"
|
|
|
|
|
|
def _tool_check_duplicate(text: str) -> str:
|
|
"""Wrapper for check_duplicate() — calls Qdrant, returns formatted string."""
|
|
import sys
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
from lib.search import check_duplicate as _check_dup
|
|
|
|
if not text:
|
|
return "Error: text is required."
|
|
|
|
result = _check_dup(text)
|
|
|
|
if result.get("error"):
|
|
return f"Error: {result['error']}"
|
|
|
|
lines = [f"Verdict: {result['verdict'].upper()} (highest score: {result['highest_score']:.4f})"]
|
|
|
|
for i, m in enumerate(result["matches"], 1):
|
|
lines.append(
|
|
f" {i}. [{m['score']:.4f}] {m['claim_title'][:80]}"
|
|
f"\n Path: {m['claim_path']}"
|
|
)
|
|
|
|
if not result["matches"]:
|
|
lines.append(" No matches found above minimum threshold.")
|
|
|
|
return "\n".join(lines)
|