[split-required] Split 500-850 LOC files (batch 2)

backend-lehrer (10 files): - game/database.py (785 → 5), correction_api.py (683 → 4) - classroom_engine/antizipation.py (676 → 5) - llm_gateway schools/edu_search already done in prior batch klausur-service (12 files): - orientation_crop_api.py (694 → 5), pdf_export.py (677 → 4) - zeugnis_crawler.py (676 → 5), grid_editor_api.py (671 → 5) - eh_templates.py (658 → 5), mail/api.py (651 → 5) - qdrant_service.py (638 → 5), training_api.py (625 → 4) website (6 pages): - middleware (696 → 8), mail (733 → 6), consent (628 → 8) - compliance/risks (622 → 5), export (502 → 5), brandbook (629 → 7) studio-v2 (3 components): - B2BMigrationWizard (848 → 3), CleanupPanel (765 → 2) - dashboard-experimental (739 → 2) admin-lehrer (4 files): - uebersetzungen (769 → 4), manager (670 → 2) - ChunkBrowserQA (675 → 6), dsfa/page (674 → 5) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:24:01 +02:00
parent 34da9f4cda
commit b4613e26f3
118 changed files with 15258 additions and 14680 deletions
--- a/klausur-service/backend/qdrant_core.py
+++ b/klausur-service/backend/qdrant_core.py
@@ -0,0 +1,193 @@
+"""
+Qdrant Vector Database Service — core client and BYOEH functions.
+"""
+
+import os
+from typing import List, Dict, Optional
+from qdrant_client import QdrantClient
+from qdrant_client.http import models
+from qdrant_client.models import VectorParams, Distance, PointStruct, Filter, FieldCondition, MatchValue
+
+QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
+COLLECTION_NAME = "bp_eh"
+VECTOR_SIZE = 1536  # OpenAI text-embedding-3-small
+
+_client: Optional[QdrantClient] = None
+
+
+def get_qdrant_client() -> QdrantClient:
+    """Get or create Qdrant client singleton."""
+    global _client
+    if _client is None:
+        _client = QdrantClient(url=QDRANT_URL)
+    return _client
+
+
+async def init_qdrant_collection() -> bool:
+    """Initialize Qdrant collection for BYOEH if not exists."""
+    try:
+        client = get_qdrant_client()
+
+        # Check if collection exists
+        collections = client.get_collections().collections
+        collection_names = [c.name for c in collections]
+
+        if COLLECTION_NAME not in collection_names:
+            client.create_collection(
+                collection_name=COLLECTION_NAME,
+                vectors_config=VectorParams(
+                    size=VECTOR_SIZE,
+                    distance=Distance.COSINE
+                )
+            )
+            print(f"Created Qdrant collection: {COLLECTION_NAME}")
+        else:
+            print(f"Qdrant collection {COLLECTION_NAME} already exists")
+
+        return True
+    except Exception as e:
+        print(f"Failed to initialize Qdrant: {e}")
+        return False
+
+
+async def index_eh_chunks(
+    eh_id: str,
+    tenant_id: str,
+    subject: str,
+    chunks: List[Dict]
+) -> int:
+    """
+    Index EH chunks in Qdrant.
+
+    Args:
+        eh_id: Erwartungshorizont ID
+        tenant_id: Tenant/School ID for isolation
+        subject: Subject (deutsch, englisch, etc.)
+        chunks: List of {text, embedding, encrypted_content}
+
+    Returns:
+        Number of indexed chunks
+    """
+    client = get_qdrant_client()
+
+    points = []
+    for i, chunk in enumerate(chunks):
+        point_id = f"{eh_id}_{i}"
+        points.append(
+            PointStruct(
+                id=point_id,
+                vector=chunk["embedding"],
+                payload={
+                    "tenant_id": tenant_id,
+                    "eh_id": eh_id,
+                    "chunk_index": i,
+                    "subject": subject,
+                    "encrypted_content": chunk.get("encrypted_content", ""),
+                    "training_allowed": False  # ALWAYS FALSE - critical for compliance
+                }
+            )
+        )
+
+    if points:
+        client.upsert(collection_name=COLLECTION_NAME, points=points)
+
+    return len(points)
+
+
+async def search_eh(
+    query_embedding: List[float],
+    tenant_id: str,
+    subject: Optional[str] = None,
+    limit: int = 5
+) -> List[Dict]:
+    """
+    Semantic search in tenant's Erwartungshorizonte.
+
+    Args:
+        query_embedding: Query vector (1536 dimensions)
+        tenant_id: Tenant ID for isolation
+        subject: Optional subject filter
+        limit: Max results
+
+    Returns:
+        List of matching chunks with scores
+    """
+    client = get_qdrant_client()
+
+    # Build filter conditions
+    must_conditions = [
+        FieldCondition(key="tenant_id", match=MatchValue(value=tenant_id))
+    ]
+
+    if subject:
+        must_conditions.append(
+            FieldCondition(key="subject", match=MatchValue(value=subject))
+        )
+
+    query_filter = Filter(must=must_conditions)
+
+    results = client.search(
+        collection_name=COLLECTION_NAME,
+        query_vector=query_embedding,
+        query_filter=query_filter,
+        limit=limit
+    )
+
+    return [
+        {
+            "id": str(r.id),
+            "score": r.score,
+            "eh_id": r.payload.get("eh_id"),
+            "chunk_index": r.payload.get("chunk_index"),
+            "encrypted_content": r.payload.get("encrypted_content"),
+            "subject": r.payload.get("subject")
+        }
+        for r in results
+    ]
+
+
+async def delete_eh_vectors(eh_id: str) -> int:
+    """
+    Delete all vectors for a specific Erwartungshorizont.
+
+    Args:
+        eh_id: Erwartungshorizont ID
+
+    Returns:
+        Number of deleted points
+    """
+    client = get_qdrant_client()
+
+    # Get all points for this EH first
+    scroll_result = client.scroll(
+        collection_name=COLLECTION_NAME,
+        scroll_filter=Filter(
+            must=[FieldCondition(key="eh_id", match=MatchValue(value=eh_id))]
+        ),
+        limit=1000
+    )
+
+    point_ids = [str(p.id) for p in scroll_result[0]]
+
+    if point_ids:
+        client.delete(
+            collection_name=COLLECTION_NAME,
+            points_selector=models.PointIdsList(points=point_ids)
+        )
+
+    return len(point_ids)
+
+
+async def get_collection_info() -> Dict:
+    """Get collection statistics."""
+    try:
+        client = get_qdrant_client()
+        info = client.get_collection(COLLECTION_NAME)
+        return {
+            "name": COLLECTION_NAME,
+            "vectors_count": info.vectors_count,
+            "points_count": info.points_count,
+            "status": info.status.value
+        }
+    except Exception as e:
+        return {"error": str(e)}