fix: vector-gc ID normalization for Qdrant UUID format

Qdrant auto-formats 32-char MD5 hex as UUID with dashes. Normalize both sides for comparison. Also read claim_path from payload correctly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-30 01:23:10 +01:00 · 2026-03-30 01:23:10 +01:00 · d70788a91c
commit d70788a91c
parent 6fb3f2258f
1 changed files with 20 additions and 8 deletions
--- a/ops/vector-gc.py
+++ b/ops/vector-gc.py
@ -26,7 +26,11 @@ EMBED_DIRS = ["domains", "core", "foundations", "decisions", "entities"]
 def make_point_id(path: str) -> str:
-    """Deterministic UUID from file path (must match embed-claims.py)."""
+    """Deterministic UUID from file path (must match embed-claims.py).
    Qdrant auto-formats 32-char hex as UUID with dashes, so we normalize
    by stripping dashes for comparison.
    """
    return hashlib.md5(path.encode()).hexdigest()
@ -99,7 +103,12 @@ def main():
    expected = get_expected_ids()
    print(f"  Filesystem files: {len(expected)}")
-    qdrant_ids = {p["id"] for p in points}
+    # Normalize IDs: Qdrant formats 32-char hex as UUID with dashes
    def normalize_id(pid: str) -> str:
        return pid.replace("-", "")
    qdrant_map = {normalize_id(p["id"]): p for p in points}
    qdrant_ids = set(qdrant_map.keys())
    expected_ids = set(expected.keys())
    orphan_ids = qdrant_ids - expected_ids
@ -107,9 +116,10 @@ def main():
    # Categorize orphans by their payload path
    orphan_details = []
-    for p in points:
+    for nid in orphan_ids:
-        if p["id"] in orphan_ids:
+        p = qdrant_map[nid]
-            path = p.get("payload", {}).get("path", "unknown")
+        payload = p.get("payload", {})
        path = payload.get("claim_path") or payload.get("path", "unknown")
        orphan_details.append({"id": p["id"], "path": path})
    print(f"\n=== Vector GC Report ===")
@ -130,8 +140,10 @@ def main():
                print(f"  {mid[:12]}  {expected[mid].relative_to(REPO_DIR)}")
    if args.purge and orphan_ids:
-        print(f"\nPurging {len(orphan_ids)} orphan vectors...")
+        # Use original Qdrant IDs (with dashes) for deletion
-        result = delete_points(list(orphan_ids))
+        original_orphan_ids = [qdrant_map[nid]["id"] for nid in orphan_ids]
        print(f"\nPurging {len(original_orphan_ids)} orphan vectors...")
        result = delete_points(original_orphan_ids)
        print(f"  Done: {result}")
    elif orphan_ids and not args.purge:
        print(f"\nRun with --purge to delete orphan vectors.")