""" Qdrant Vector Database Service — QdrantService class for NiBiS Ingestion Pipeline. """ from typing import List, Dict, Optional from qdrant_client import QdrantClient from qdrant_client.models import VectorParams, Distance, PointStruct, Filter, FieldCondition, MatchValue from qdrant_core import QDRANT_URL, VECTOR_SIZE class QdrantService: """ Class-based Qdrant service for flexible collection management. Used by nibis_ingestion.py for bulk indexing. """ def __init__(self, url: str = None): self.url = url or QDRANT_URL self._client = None @property def client(self) -> QdrantClient: if self._client is None: self._client = QdrantClient(url=self.url) return self._client async def ensure_collection(self, collection_name: str, vector_size: int = VECTOR_SIZE) -> bool: """ Ensure collection exists, create if needed. Args: collection_name: Name of the collection vector_size: Dimension of vectors Returns: True if collection exists/created """ try: collections = self.client.get_collections().collections collection_names = [c.name for c in collections] if collection_name not in collection_names: self.client.create_collection( collection_name=collection_name, vectors_config=VectorParams( size=vector_size, distance=Distance.COSINE ) ) print(f"Created collection: {collection_name}") return True except Exception as e: print(f"Error ensuring collection: {e}") return False async def upsert_points(self, collection_name: str, points: List[Dict]) -> int: """ Upsert points into collection. Args: collection_name: Target collection points: List of {id, vector, payload} Returns: Number of upserted points """ import uuid if not points: return 0 qdrant_points = [] for p in points: # Convert string ID to UUID for Qdrant compatibility point_id = p["id"] if isinstance(point_id, str): # Use uuid5 with DNS namespace for deterministic UUID from string point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, point_id)) qdrant_points.append( PointStruct( id=point_id, vector=p["vector"], payload={**p.get("payload", {}), "original_id": p["id"]} # Keep original ID in payload ) ) self.client.upsert(collection_name=collection_name, points=qdrant_points) return len(qdrant_points) async def search( self, collection_name: str, query_vector: List[float], filter_conditions: Optional[Dict] = None, limit: int = 10 ) -> List[Dict]: """ Semantic search in collection. Args: collection_name: Collection to search query_vector: Query embedding filter_conditions: Optional filters (key: value pairs) limit: Max results Returns: List of matching points with scores """ query_filter = None if filter_conditions: must_conditions = [ FieldCondition(key=k, match=MatchValue(value=v)) for k, v in filter_conditions.items() ] query_filter = Filter(must=must_conditions) results = self.client.search( collection_name=collection_name, query_vector=query_vector, query_filter=query_filter, limit=limit ) return [ { "id": str(r.id), "score": r.score, "payload": r.payload } for r in results ] async def get_stats(self, collection_name: str) -> Dict: """Get collection statistics.""" try: info = self.client.get_collection(collection_name) return { "name": collection_name, "vectors_count": info.vectors_count, "points_count": info.points_count, "status": info.status.value } except Exception as e: return {"error": str(e), "name": collection_name}