""" Qdrant Vector Database Service — core client and BYOEH functions. """ import os from typing import List, Dict, Optional from qdrant_client import QdrantClient from qdrant_client.http import models from qdrant_client.models import VectorParams, Distance, PointStruct, Filter, FieldCondition, MatchValue QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333") COLLECTION_NAME = "bp_eh" VECTOR_SIZE = 1536 # OpenAI text-embedding-3-small _client: Optional[QdrantClient] = None def get_qdrant_client() -> QdrantClient: """Get or create Qdrant client singleton.""" global _client if _client is None: _client = QdrantClient(url=QDRANT_URL) return _client async def init_qdrant_collection() -> bool: """Initialize Qdrant collection for BYOEH if not exists.""" try: client = get_qdrant_client() # Check if collection exists collections = client.get_collections().collections collection_names = [c.name for c in collections] if COLLECTION_NAME not in collection_names: client.create_collection( collection_name=COLLECTION_NAME, vectors_config=VectorParams( size=VECTOR_SIZE, distance=Distance.COSINE ) ) print(f"Created Qdrant collection: {COLLECTION_NAME}") else: print(f"Qdrant collection {COLLECTION_NAME} already exists") return True except Exception as e: print(f"Failed to initialize Qdrant: {e}") return False async def index_eh_chunks( eh_id: str, tenant_id: str, subject: str, chunks: List[Dict] ) -> int: """ Index EH chunks in Qdrant. Args: eh_id: Erwartungshorizont ID tenant_id: Tenant/School ID for isolation subject: Subject (deutsch, englisch, etc.) chunks: List of {text, embedding, encrypted_content} Returns: Number of indexed chunks """ client = get_qdrant_client() points = [] for i, chunk in enumerate(chunks): point_id = f"{eh_id}_{i}" points.append( PointStruct( id=point_id, vector=chunk["embedding"], payload={ "tenant_id": tenant_id, "eh_id": eh_id, "chunk_index": i, "subject": subject, "encrypted_content": chunk.get("encrypted_content", ""), "training_allowed": False # ALWAYS FALSE - critical for compliance } ) ) if points: client.upsert(collection_name=COLLECTION_NAME, points=points) return len(points) async def search_eh( query_embedding: List[float], tenant_id: str, subject: Optional[str] = None, limit: int = 5 ) -> List[Dict]: """ Semantic search in tenant's Erwartungshorizonte. Args: query_embedding: Query vector (1536 dimensions) tenant_id: Tenant ID for isolation subject: Optional subject filter limit: Max results Returns: List of matching chunks with scores """ client = get_qdrant_client() # Build filter conditions must_conditions = [ FieldCondition(key="tenant_id", match=MatchValue(value=tenant_id)) ] if subject: must_conditions.append( FieldCondition(key="subject", match=MatchValue(value=subject)) ) query_filter = Filter(must=must_conditions) results = client.search( collection_name=COLLECTION_NAME, query_vector=query_embedding, query_filter=query_filter, limit=limit ) return [ { "id": str(r.id), "score": r.score, "eh_id": r.payload.get("eh_id"), "chunk_index": r.payload.get("chunk_index"), "encrypted_content": r.payload.get("encrypted_content"), "subject": r.payload.get("subject") } for r in results ] async def delete_eh_vectors(eh_id: str) -> int: """ Delete all vectors for a specific Erwartungshorizont. Args: eh_id: Erwartungshorizont ID Returns: Number of deleted points """ client = get_qdrant_client() # Get all points for this EH first scroll_result = client.scroll( collection_name=COLLECTION_NAME, scroll_filter=Filter( must=[FieldCondition(key="eh_id", match=MatchValue(value=eh_id))] ), limit=1000 ) point_ids = [str(p.id) for p in scroll_result[0]] if point_ids: client.delete( collection_name=COLLECTION_NAME, points_selector=models.PointIdsList(points=point_ids) ) return len(point_ids) async def get_collection_info() -> Dict: """Get collection statistics.""" try: client = get_qdrant_client() info = client.get_collection(COLLECTION_NAME) return { "name": COLLECTION_NAME, "vectors_count": info.vectors_count, "points_count": info.points_count, "status": info.status.value } except Exception as e: return {"error": str(e)}