#!/usr/bin/env python3 """Verbatim copy of the IACE Qdrant knowledge-base collections to another Qdrant. There is no RAG/embedding service on prod, so the normal ingest_iace_kb.sh has no target there. Instead we copy the already-embedded points (id + vector + payload) 1:1 from the source Qdrant (macmini) to the destination (prod). No re-embedding, no re-chunking → the destination is byte-identical and /sdk/v1/rag/search reads it the same way. Idempotent: same point ids → upsert overwrites, no duplicates. Usage (run on macmini; reads local Qdrant, writes prod Qdrant): SRC_QDRANT=http://localhost:6333 \ DST_QDRANT=https://qdrant-dev.breakpilot.ai \ DST_QDRANT_KEY= \ python3 copy_iace_collections_to_prod.py """ import json import os import urllib.error import urllib.request SRC = os.environ.get("SRC_QDRANT", "http://localhost:6333").rstrip("/") DST = os.environ["DST_QDRANT"].rstrip("/") KEY = os.environ["DST_QDRANT_KEY"] COLLECTIONS = os.environ.get( "COLLECTIONS", "bp_iace_accident_stats,bp_iace_safety_kb,bp_iace_failure_kb" ).split(",") BATCH = 128 def _req(method, url, body=None, key=None): data = json.dumps(body).encode() if body is not None else None r = urllib.request.Request(url, data=data, method=method) r.add_header("Content-Type", "application/json") if key: r.add_header("api-key", key) with urllib.request.urlopen(r, timeout=120) as resp: return json.loads(resp.read()) def _exists(base, col, key=None) -> bool: try: _req("GET", f"{base}/collections/{col}", key=key) return True except urllib.error.HTTPError as e: if e.code == 404: return False raise def copy_collection(col: str) -> None: src_cfg = _req("GET", f"{SRC}/collections/{col}")["result"]["config"]["params"]["vectors"] size, dist = src_cfg["size"], src_cfg["distance"] if _exists(DST, col, KEY): print(f" {col}: dst exists — upserting into it") else: _req("PUT", f"{DST}/collections/{col}", {"vectors": {"size": size, "distance": dist}}, KEY) print(f" {col}: created on dst ({size}d {dist})") offset, total = None, 0 while True: body = {"limit": BATCH, "with_vector": True, "with_payload": True} if offset is not None: body["offset"] = offset res = _req("POST", f"{SRC}/collections/{col}/points/scroll", body)["result"] pts = res.get("points", []) if not pts: break upsert = [{"id": p["id"], "vector": p["vector"], "payload": p.get("payload", {})} for p in pts] _req("PUT", f"{DST}/collections/{col}/points?wait=true", {"points": upsert}, KEY) total += len(pts) offset = res.get("next_page_offset") if offset is None: break src_n = _req("POST", f"{SRC}/collections/{col}/points/count", {"exact": True})["result"]["count"] dst_n = _req("POST", f"{DST}/collections/{col}/points/count", {"exact": True}, KEY)["result"]["count"] flag = "OK" if dst_n >= src_n else "MISMATCH" print(f" {col}: copied {total} | src={src_n} dst={dst_n} [{flag}]") def main() -> None: print(f"Copy IACE collections {SRC} -> {DST}") for col in COLLECTIONS: copy_collection(col.strip()) print("Done.") if __name__ == "__main__": main()