[split-required] Split 500-850 LOC files (batch 2)
backend-lehrer (10 files): - game/database.py (785 → 5), correction_api.py (683 → 4) - classroom_engine/antizipation.py (676 → 5) - llm_gateway schools/edu_search already done in prior batch klausur-service (12 files): - orientation_crop_api.py (694 → 5), pdf_export.py (677 → 4) - zeugnis_crawler.py (676 → 5), grid_editor_api.py (671 → 5) - eh_templates.py (658 → 5), mail/api.py (651 → 5) - qdrant_service.py (638 → 5), training_api.py (625 → 4) website (6 pages): - middleware (696 → 8), mail (733 → 6), consent (628 → 8) - compliance/risks (622 → 5), export (502 → 5), brandbook (629 → 7) studio-v2 (3 components): - B2BMigrationWizard (848 → 3), CleanupPanel (765 → 2) - dashboard-experimental (739 → 2) admin-lehrer (4 files): - uebersetzungen (769 → 4), manager (670 → 2) - ChunkBrowserQA (675 → 6), dsfa/page (674 → 5) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
146
klausur-service/backend/qdrant_class.py
Normal file
146
klausur-service/backend/qdrant_class.py
Normal file
@@ -0,0 +1,146 @@
|
||||
"""
|
||||
Qdrant Vector Database Service — QdrantService class for NiBiS Ingestion Pipeline.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Optional
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import VectorParams, Distance, PointStruct, Filter, FieldCondition, MatchValue
|
||||
|
||||
from qdrant_core import QDRANT_URL, VECTOR_SIZE
|
||||
|
||||
|
||||
class QdrantService:
|
||||
"""
|
||||
Class-based Qdrant service for flexible collection management.
|
||||
Used by nibis_ingestion.py for bulk indexing.
|
||||
"""
|
||||
|
||||
def __init__(self, url: str = None):
|
||||
self.url = url or QDRANT_URL
|
||||
self._client = None
|
||||
|
||||
@property
|
||||
def client(self) -> QdrantClient:
|
||||
if self._client is None:
|
||||
self._client = QdrantClient(url=self.url)
|
||||
return self._client
|
||||
|
||||
async def ensure_collection(self, collection_name: str, vector_size: int = VECTOR_SIZE) -> bool:
|
||||
"""
|
||||
Ensure collection exists, create if needed.
|
||||
|
||||
Args:
|
||||
collection_name: Name of the collection
|
||||
vector_size: Dimension of vectors
|
||||
|
||||
Returns:
|
||||
True if collection exists/created
|
||||
"""
|
||||
try:
|
||||
collections = self.client.get_collections().collections
|
||||
collection_names = [c.name for c in collections]
|
||||
|
||||
if collection_name not in collection_names:
|
||||
self.client.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(
|
||||
size=vector_size,
|
||||
distance=Distance.COSINE
|
||||
)
|
||||
)
|
||||
print(f"Created collection: {collection_name}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error ensuring collection: {e}")
|
||||
return False
|
||||
|
||||
async def upsert_points(self, collection_name: str, points: List[Dict]) -> int:
|
||||
"""
|
||||
Upsert points into collection.
|
||||
|
||||
Args:
|
||||
collection_name: Target collection
|
||||
points: List of {id, vector, payload}
|
||||
|
||||
Returns:
|
||||
Number of upserted points
|
||||
"""
|
||||
import uuid
|
||||
|
||||
if not points:
|
||||
return 0
|
||||
|
||||
qdrant_points = []
|
||||
for p in points:
|
||||
# Convert string ID to UUID for Qdrant compatibility
|
||||
point_id = p["id"]
|
||||
if isinstance(point_id, str):
|
||||
# Use uuid5 with DNS namespace for deterministic UUID from string
|
||||
point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, point_id))
|
||||
|
||||
qdrant_points.append(
|
||||
PointStruct(
|
||||
id=point_id,
|
||||
vector=p["vector"],
|
||||
payload={**p.get("payload", {}), "original_id": p["id"]} # Keep original ID in payload
|
||||
)
|
||||
)
|
||||
|
||||
self.client.upsert(collection_name=collection_name, points=qdrant_points)
|
||||
return len(qdrant_points)
|
||||
|
||||
async def search(
|
||||
self,
|
||||
collection_name: str,
|
||||
query_vector: List[float],
|
||||
filter_conditions: Optional[Dict] = None,
|
||||
limit: int = 10
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Semantic search in collection.
|
||||
|
||||
Args:
|
||||
collection_name: Collection to search
|
||||
query_vector: Query embedding
|
||||
filter_conditions: Optional filters (key: value pairs)
|
||||
limit: Max results
|
||||
|
||||
Returns:
|
||||
List of matching points with scores
|
||||
"""
|
||||
query_filter = None
|
||||
if filter_conditions:
|
||||
must_conditions = [
|
||||
FieldCondition(key=k, match=MatchValue(value=v))
|
||||
for k, v in filter_conditions.items()
|
||||
]
|
||||
query_filter = Filter(must=must_conditions)
|
||||
|
||||
results = self.client.search(
|
||||
collection_name=collection_name,
|
||||
query_vector=query_vector,
|
||||
query_filter=query_filter,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"id": str(r.id),
|
||||
"score": r.score,
|
||||
"payload": r.payload
|
||||
}
|
||||
for r in results
|
||||
]
|
||||
|
||||
async def get_stats(self, collection_name: str) -> Dict:
|
||||
"""Get collection statistics."""
|
||||
try:
|
||||
info = self.client.get_collection(collection_name)
|
||||
return {
|
||||
"name": collection_name,
|
||||
"vectors_count": info.vectors_count,
|
||||
"points_count": info.points_count,
|
||||
"status": info.status.value
|
||||
}
|
||||
except Exception as e:
|
||||
return {"error": str(e), "name": collection_name}
|
||||
Reference in New Issue
Block a user