feat: embed-on-merge — auto-index new claims into Qdrant after PR merge

After a PR merges successfully, _embed_merged_claims() diffs the merged SHA
against its parent to find new/changed .md files in knowledge directories
(domains/, core/, foundations/, decisions/, entities/). Each file is embedded
via embed-claims.py --file (OpenRouter, text-embedding-3-small).

Non-fatal: embedding failure logs a warning but does not block the merge
pipeline. This keeps vector search current without requiring manual re-embeds.

Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
This commit is contained in:
m3taversal 2026-03-26 17:53:18 +00:00
parent f5b27ccd73
commit 89692fda2d

View file

@ -612,6 +612,57 @@ def _update_source_frontmatter_status(path: str, new_status: str):
logger.warning("Failed to update source status in %s: %s", path, e)
async def _embed_merged_claims(branch_sha: str):
"""Embed new/changed claim files from a merged PR into Qdrant.
Finds .md files changed between main~1 and the merged SHA, then calls
embed-claims.py --file for each. Non-fatal embedding failure does not
block the merge pipeline.
"""
try:
rc, diff_out = await _git(
"diff", "--name-only", "--diff-filter=ACMR",
f"{branch_sha}~1", branch_sha,
cwd=str(config.MAIN_WORKTREE),
timeout=10,
)
if rc != 0:
logger.warning("embed: diff failed (rc=%d), skipping", rc)
return
embed_dirs = {"domains/", "core/", "foundations/", "decisions/", "entities/"}
md_files = [
f for f in diff_out.strip().split("\n")
if f.endswith(".md")
and any(f.startswith(d) for d in embed_dirs)
and not f.split("/")[-1].startswith("_")
]
if not md_files:
return
embedded = 0
for fpath in md_files:
full_path = config.MAIN_WORKTREE / fpath
if not full_path.exists():
continue
proc = await asyncio.create_subprocess_exec(
"python3", "/opt/teleo-eval/embed-claims.py", "--file", str(full_path),
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
if proc.returncode == 0 and b"OK" in stdout:
embedded += 1
else:
logger.warning("embed: failed for %s: %s", fpath, stderr.decode()[:200])
if embedded:
logger.info("embed: %d/%d files embedded into Qdrant", embedded, len(md_files))
except Exception:
logger.exception("embed: post-merge embedding failed (non-fatal)")
def _archive_source_for_pr(branch: str, domain: str, merged: bool = True):
"""Move source from queue/ to archive/{domain}/ after PR merge or close.
@ -830,6 +881,9 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
# Archive source file (closes near-duplicate loop — Ganymede review)
_archive_source_for_pr(branch, domain)
# Embed new/changed claims into Qdrant (non-fatal)
await _embed_merged_claims(branch_sha)
# Delete remote branch immediately (Ganymede Q4)
await _delete_remote_branch(branch)