feat: embed-on-merge — auto-index new claims into Qdrant after PR merge
After a PR merges successfully, _embed_merged_claims() diffs the merged SHA against its parent to find new/changed .md files in knowledge directories (domains/, core/, foundations/, decisions/, entities/). Each file is embedded via embed-claims.py --file (OpenRouter, text-embedding-3-small). Non-fatal: embedding failure logs a warning but does not block the merge pipeline. This keeps vector search current without requiring manual re-embeds. Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
This commit is contained in:
parent
f5b27ccd73
commit
89692fda2d
1 changed files with 54 additions and 0 deletions
54
lib/merge.py
54
lib/merge.py
|
|
@ -612,6 +612,57 @@ def _update_source_frontmatter_status(path: str, new_status: str):
|
||||||
logger.warning("Failed to update source status in %s: %s", path, e)
|
logger.warning("Failed to update source status in %s: %s", path, e)
|
||||||
|
|
||||||
|
|
||||||
|
async def _embed_merged_claims(branch_sha: str):
|
||||||
|
"""Embed new/changed claim files from a merged PR into Qdrant.
|
||||||
|
|
||||||
|
Finds .md files changed between main~1 and the merged SHA, then calls
|
||||||
|
embed-claims.py --file for each. Non-fatal — embedding failure does not
|
||||||
|
block the merge pipeline.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
rc, diff_out = await _git(
|
||||||
|
"diff", "--name-only", "--diff-filter=ACMR",
|
||||||
|
f"{branch_sha}~1", branch_sha,
|
||||||
|
cwd=str(config.MAIN_WORKTREE),
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if rc != 0:
|
||||||
|
logger.warning("embed: diff failed (rc=%d), skipping", rc)
|
||||||
|
return
|
||||||
|
|
||||||
|
embed_dirs = {"domains/", "core/", "foundations/", "decisions/", "entities/"}
|
||||||
|
md_files = [
|
||||||
|
f for f in diff_out.strip().split("\n")
|
||||||
|
if f.endswith(".md")
|
||||||
|
and any(f.startswith(d) for d in embed_dirs)
|
||||||
|
and not f.split("/")[-1].startswith("_")
|
||||||
|
]
|
||||||
|
|
||||||
|
if not md_files:
|
||||||
|
return
|
||||||
|
|
||||||
|
embedded = 0
|
||||||
|
for fpath in md_files:
|
||||||
|
full_path = config.MAIN_WORKTREE / fpath
|
||||||
|
if not full_path.exists():
|
||||||
|
continue
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"python3", "/opt/teleo-eval/embed-claims.py", "--file", str(full_path),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
|
||||||
|
if proc.returncode == 0 and b"OK" in stdout:
|
||||||
|
embedded += 1
|
||||||
|
else:
|
||||||
|
logger.warning("embed: failed for %s: %s", fpath, stderr.decode()[:200])
|
||||||
|
|
||||||
|
if embedded:
|
||||||
|
logger.info("embed: %d/%d files embedded into Qdrant", embedded, len(md_files))
|
||||||
|
except Exception:
|
||||||
|
logger.exception("embed: post-merge embedding failed (non-fatal)")
|
||||||
|
|
||||||
|
|
||||||
def _archive_source_for_pr(branch: str, domain: str, merged: bool = True):
|
def _archive_source_for_pr(branch: str, domain: str, merged: bool = True):
|
||||||
"""Move source from queue/ to archive/{domain}/ after PR merge or close.
|
"""Move source from queue/ to archive/{domain}/ after PR merge or close.
|
||||||
|
|
||||||
|
|
@ -830,6 +881,9 @@ async def _merge_domain_queue(conn, domain: str) -> tuple[int, int]:
|
||||||
# Archive source file (closes near-duplicate loop — Ganymede review)
|
# Archive source file (closes near-duplicate loop — Ganymede review)
|
||||||
_archive_source_for_pr(branch, domain)
|
_archive_source_for_pr(branch, domain)
|
||||||
|
|
||||||
|
# Embed new/changed claims into Qdrant (non-fatal)
|
||||||
|
await _embed_merged_claims(branch_sha)
|
||||||
|
|
||||||
# Delete remote branch immediately (Ganymede Q4)
|
# Delete remote branch immediately (Ganymede Q4)
|
||||||
await _delete_remote_branch(branch)
|
await _delete_remote_branch(branch)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue