From 89692fda2de48688995b1fdcfcadc1060f4dd353 Mon Sep 17 00:00:00 2001 From: m3taversal Date: Thu, 26 Mar 2026 17:53:18 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20embed-on-merge=20=E2=80=94=20auto-index?= =?UTF-8?q?=20new=20claims=20into=20Qdrant=20after=20PR=20merge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a PR merges successfully, _embed_merged_claims() diffs the merged SHA against its parent to find new/changed .md files in knowledge directories (domains/, core/, foundations/, decisions/, entities/). Each file is embedded via embed-claims.py --file (OpenRouter, text-embedding-3-small). Non-fatal: embedding failure logs a warning but does not block the merge pipeline. This keeps vector search current without requiring manual re-embeds. Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70> --- lib/merge.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/lib/merge.py b/lib/merge.py index 0e4205e..00ab1f3 100644 --- a/lib/merge.py +++ b/lib/merge.py @@ -612,6 +612,57 @@ def _update_source_frontmatter_status(path: str, new_status: str): logger.warning("Failed to update source status in %s: %s", path, e) +async def _embed_merged_claims(branch_sha: str): + """Embed new/changed claim files from a merged PR into Qdrant. + + Finds .md files changed between main~1 and the merged SHA, then calls + embed-claims.py --file for each. Non-fatal — embedding failure does not + block the merge pipeline. + """ + try: + rc, diff_out = await _git( + "diff", "--name-only", "--diff-filter=ACMR", + f"{branch_sha}~1", branch_sha, + cwd=str(config.MAIN_WORKTREE), + timeout=10, + ) + if rc != 0: + logger.warning("embed: diff failed (rc=%d), skipping", rc) + return + + embed_dirs = {"domains/", "core/", "foundations/", "decisions/", "entities/"} + md_files = [ + f for f in diff_out.strip().split("\n") + if f.endswith(".md") + and any(f.startswith(d) for d in embed_dirs) + and not f.split("/")[-1].startswith("_") + ] + + if not md_files: + return + + embedded = 0 + for fpath in md_files: + full_path = config.MAIN_WORKTREE / fpath + if not full_path.exists(): + continue + proc = await asyncio.create_subprocess_exec( + "python3", "/opt/teleo-eval/embed-claims.py", "--file", str(full_path), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30) + if proc.returncode == 0 and b"OK" in stdout: + embedded += 1 + else: + logger.warning("embed: failed for %s: %s", fpath, stderr.decode()[:200]) + + if embedded: + logger.info("embed: %d/%d files embedded into Qdrant", embedded, len(md_files)) + except Exception: + logger.exception("embed: post-merge embedding failed (non-fatal)") + + def _archive_source_for_pr(branch: str, domain: str, merged: bool = True): """Move source from queue/ to archive/{domain}/ after PR merge or close. @@ -830,6 +881,9 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]: # Archive source file (closes near-duplicate loop — Ganymede review) _archive_source_for_pr(branch, domain) + # Embed new/changed claims into Qdrant (non-fatal) + await _embed_merged_claims(branch_sha) + # Delete remote branch immediately (Ganymede Q4) await _delete_remote_branch(branch)