[split-required] Split 500-850 LOC files (batch 2)
backend-lehrer (10 files): - game/database.py (785 → 5), correction_api.py (683 → 4) - classroom_engine/antizipation.py (676 → 5) - llm_gateway schools/edu_search already done in prior batch klausur-service (12 files): - orientation_crop_api.py (694 → 5), pdf_export.py (677 → 4) - zeugnis_crawler.py (676 → 5), grid_editor_api.py (671 → 5) - eh_templates.py (658 → 5), mail/api.py (651 → 5) - qdrant_service.py (638 → 5), training_api.py (625 → 4) website (6 pages): - middleware (696 → 8), mail (733 → 6), consent (628 → 8) - compliance/risks (622 → 5), export (502 → 5), brandbook (629 → 7) studio-v2 (3 components): - B2BMigrationWizard (848 → 3), CleanupPanel (765 → 2) - dashboard-experimental (739 → 2) admin-lehrer (4 files): - uebersetzungen (769 → 4), manager (670 → 2) - ChunkBrowserQA (675 → 6), dsfa/page (674 → 5) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
193
klausur-service/backend/qdrant_core.py
Normal file
193
klausur-service/backend/qdrant_core.py
Normal file
@@ -0,0 +1,193 @@
|
||||
"""
|
||||
Qdrant Vector Database Service — core client and BYOEH functions.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List, Dict, Optional
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models
|
||||
from qdrant_client.models import VectorParams, Distance, PointStruct, Filter, FieldCondition, MatchValue
|
||||
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
||||
COLLECTION_NAME = "bp_eh"
|
||||
VECTOR_SIZE = 1536 # OpenAI text-embedding-3-small
|
||||
|
||||
_client: Optional[QdrantClient] = None
|
||||
|
||||
|
||||
def get_qdrant_client() -> QdrantClient:
|
||||
"""Get or create Qdrant client singleton."""
|
||||
global _client
|
||||
if _client is None:
|
||||
_client = QdrantClient(url=QDRANT_URL)
|
||||
return _client
|
||||
|
||||
|
||||
async def init_qdrant_collection() -> bool:
|
||||
"""Initialize Qdrant collection for BYOEH if not exists."""
|
||||
try:
|
||||
client = get_qdrant_client()
|
||||
|
||||
# Check if collection exists
|
||||
collections = client.get_collections().collections
|
||||
collection_names = [c.name for c in collections]
|
||||
|
||||
if COLLECTION_NAME not in collection_names:
|
||||
client.create_collection(
|
||||
collection_name=COLLECTION_NAME,
|
||||
vectors_config=VectorParams(
|
||||
size=VECTOR_SIZE,
|
||||
distance=Distance.COSINE
|
||||
)
|
||||
)
|
||||
print(f"Created Qdrant collection: {COLLECTION_NAME}")
|
||||
else:
|
||||
print(f"Qdrant collection {COLLECTION_NAME} already exists")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Failed to initialize Qdrant: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def index_eh_chunks(
|
||||
eh_id: str,
|
||||
tenant_id: str,
|
||||
subject: str,
|
||||
chunks: List[Dict]
|
||||
) -> int:
|
||||
"""
|
||||
Index EH chunks in Qdrant.
|
||||
|
||||
Args:
|
||||
eh_id: Erwartungshorizont ID
|
||||
tenant_id: Tenant/School ID for isolation
|
||||
subject: Subject (deutsch, englisch, etc.)
|
||||
chunks: List of {text, embedding, encrypted_content}
|
||||
|
||||
Returns:
|
||||
Number of indexed chunks
|
||||
"""
|
||||
client = get_qdrant_client()
|
||||
|
||||
points = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
point_id = f"{eh_id}_{i}"
|
||||
points.append(
|
||||
PointStruct(
|
||||
id=point_id,
|
||||
vector=chunk["embedding"],
|
||||
payload={
|
||||
"tenant_id": tenant_id,
|
||||
"eh_id": eh_id,
|
||||
"chunk_index": i,
|
||||
"subject": subject,
|
||||
"encrypted_content": chunk.get("encrypted_content", ""),
|
||||
"training_allowed": False # ALWAYS FALSE - critical for compliance
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
if points:
|
||||
client.upsert(collection_name=COLLECTION_NAME, points=points)
|
||||
|
||||
return len(points)
|
||||
|
||||
|
||||
async def search_eh(
|
||||
query_embedding: List[float],
|
||||
tenant_id: str,
|
||||
subject: Optional[str] = None,
|
||||
limit: int = 5
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Semantic search in tenant's Erwartungshorizonte.
|
||||
|
||||
Args:
|
||||
query_embedding: Query vector (1536 dimensions)
|
||||
tenant_id: Tenant ID for isolation
|
||||
subject: Optional subject filter
|
||||
limit: Max results
|
||||
|
||||
Returns:
|
||||
List of matching chunks with scores
|
||||
"""
|
||||
client = get_qdrant_client()
|
||||
|
||||
# Build filter conditions
|
||||
must_conditions = [
|
||||
FieldCondition(key="tenant_id", match=MatchValue(value=tenant_id))
|
||||
]
|
||||
|
||||
if subject:
|
||||
must_conditions.append(
|
||||
FieldCondition(key="subject", match=MatchValue(value=subject))
|
||||
)
|
||||
|
||||
query_filter = Filter(must=must_conditions)
|
||||
|
||||
results = client.search(
|
||||
collection_name=COLLECTION_NAME,
|
||||
query_vector=query_embedding,
|
||||
query_filter=query_filter,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"id": str(r.id),
|
||||
"score": r.score,
|
||||
"eh_id": r.payload.get("eh_id"),
|
||||
"chunk_index": r.payload.get("chunk_index"),
|
||||
"encrypted_content": r.payload.get("encrypted_content"),
|
||||
"subject": r.payload.get("subject")
|
||||
}
|
||||
for r in results
|
||||
]
|
||||
|
||||
|
||||
async def delete_eh_vectors(eh_id: str) -> int:
|
||||
"""
|
||||
Delete all vectors for a specific Erwartungshorizont.
|
||||
|
||||
Args:
|
||||
eh_id: Erwartungshorizont ID
|
||||
|
||||
Returns:
|
||||
Number of deleted points
|
||||
"""
|
||||
client = get_qdrant_client()
|
||||
|
||||
# Get all points for this EH first
|
||||
scroll_result = client.scroll(
|
||||
collection_name=COLLECTION_NAME,
|
||||
scroll_filter=Filter(
|
||||
must=[FieldCondition(key="eh_id", match=MatchValue(value=eh_id))]
|
||||
),
|
||||
limit=1000
|
||||
)
|
||||
|
||||
point_ids = [str(p.id) for p in scroll_result[0]]
|
||||
|
||||
if point_ids:
|
||||
client.delete(
|
||||
collection_name=COLLECTION_NAME,
|
||||
points_selector=models.PointIdsList(points=point_ids)
|
||||
)
|
||||
|
||||
return len(point_ids)
|
||||
|
||||
|
||||
async def get_collection_info() -> Dict:
|
||||
"""Get collection statistics."""
|
||||
try:
|
||||
client = get_qdrant_client()
|
||||
info = client.get_collection(COLLECTION_NAME)
|
||||
return {
|
||||
"name": COLLECTION_NAME,
|
||||
"vectors_count": info.vectors_count,
|
||||
"points_count": info.points_count,
|
||||
"status": info.status.value
|
||||
}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
Reference in New Issue
Block a user