feat: embed-on-merge — auto-index new claims into Qdrant after PR merge
After a PR merges successfully, _embed_merged_claims() diffs the merged SHA against its parent to find new/changed .md files in knowledge directories (domains/, core/, foundations/, decisions/, entities/). Each file is embedded via embed-claims.py --file (OpenRouter, text-embedding-3-small). Non-fatal: embedding failure logs a warning but does not block the merge pipeline. This keeps vector search current without requiring manual re-embeds. Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
This commit is contained in:
parent
f5b27ccd73
commit
89692fda2d
1 changed files with 54 additions and 0 deletions
54
lib/merge.py
54
lib/merge.py
|
|
@ -612,6 +612,57 @@ def _update_source_frontmatter_status(path: str, new_status: str):
|
|||
logger.warning("Failed to update source status in %s: %s", path, e)
|
||||
|
||||
|
||||
async def _embed_merged_claims(branch_sha: str):
|
||||
"""Embed new/changed claim files from a merged PR into Qdrant.
|
||||
|
||||
Finds .md files changed between main~1 and the merged SHA, then calls
|
||||
embed-claims.py --file for each. Non-fatal — embedding failure does not
|
||||
block the merge pipeline.
|
||||
"""
|
||||
try:
|
||||
rc, diff_out = await _git(
|
||||
"diff", "--name-only", "--diff-filter=ACMR",
|
||||
f"{branch_sha}~1", branch_sha,
|
||||
cwd=str(config.MAIN_WORKTREE),
|
||||
timeout=10,
|
||||
)
|
||||
if rc != 0:
|
||||
logger.warning("embed: diff failed (rc=%d), skipping", rc)
|
||||
return
|
||||
|
||||
embed_dirs = {"domains/", "core/", "foundations/", "decisions/", "entities/"}
|
||||
md_files = [
|
||||
f for f in diff_out.strip().split("\n")
|
||||
if f.endswith(".md")
|
||||
and any(f.startswith(d) for d in embed_dirs)
|
||||
and not f.split("/")[-1].startswith("_")
|
||||
]
|
||||
|
||||
if not md_files:
|
||||
return
|
||||
|
||||
embedded = 0
|
||||
for fpath in md_files:
|
||||
full_path = config.MAIN_WORKTREE / fpath
|
||||
if not full_path.exists():
|
||||
continue
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"python3", "/opt/teleo-eval/embed-claims.py", "--file", str(full_path),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
|
||||
if proc.returncode == 0 and b"OK" in stdout:
|
||||
embedded += 1
|
||||
else:
|
||||
logger.warning("embed: failed for %s: %s", fpath, stderr.decode()[:200])
|
||||
|
||||
if embedded:
|
||||
logger.info("embed: %d/%d files embedded into Qdrant", embedded, len(md_files))
|
||||
except Exception:
|
||||
logger.exception("embed: post-merge embedding failed (non-fatal)")
|
||||
|
||||
|
||||
def _archive_source_for_pr(branch: str, domain: str, merged: bool = True):
|
||||
"""Move source from queue/ to archive/{domain}/ after PR merge or close.
|
||||
|
||||
|
|
@ -830,6 +881,9 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
|
|||
# Archive source file (closes near-duplicate loop — Ganymede review)
|
||||
_archive_source_for_pr(branch, domain)
|
||||
|
||||
# Embed new/changed claims into Qdrant (non-fatal)
|
||||
await _embed_merged_claims(branch_sha)
|
||||
|
||||
# Delete remote branch immediately (Ganymede Q4)
|
||||
await _delete_remote_branch(branch)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue