[split-required] Split final batch of monoliths >1000 LOC

Python (6 files in klausur-service): - rbac.py (1,132 → 4), admin_api.py (1,012 → 4) - routes/eh.py (1,111 → 4), ocr_pipeline_geometry.py (1,105 → 5) Python (2 files in backend-lehrer): - unit_api.py (1,226 → 6), game_api.py (1,129 → 5) Website (6 page files): - 4x klausur-korrektur pages (1,249-1,328 LOC each) → shared components in website/components/klausur-korrektur/ (17 shared files) - companion (1,057 → 10), magic-help (1,017 → 8) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 23:17:30 +02:00
parent b2a0126f14
commit 6811264756
67 changed files with 12270 additions and 13651 deletions
@@ -0,0 +1,316 @@
+"""
+Admin API - NiBiS Ingestion & Search
+
+Endpoints for NiBiS data discovery, ingestion, search, and statistics.
+Extracted from admin_api.py for file-size compliance.
+"""
+
+from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
+from pydantic import BaseModel
+from typing import Optional, List, Dict
+from datetime import datetime
+
+from nibis_ingestion import (
+    run_ingestion,
+    discover_documents,
+    extract_zip_files,
+    DOCS_BASE_PATH,
+)
+from qdrant_service import QdrantService, search_nibis_eh, get_qdrant_client
+from eh_pipeline import generate_single_embedding
+
+router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
+
+# Store for background task status
+_ingestion_status: Dict = {
+    "running": False,
+    "last_run": None,
+    "last_result": None,
+}
+
+
+# =============================================================================
+# Models
+# =============================================================================
+
+class IngestionRequest(BaseModel):
+    ewh_only: bool = True
+    year_filter: Optional[int] = None
+    subject_filter: Optional[str] = None
+
+
+class IngestionStatus(BaseModel):
+    running: bool
+    last_run: Optional[str]
+    documents_indexed: Optional[int]
+    chunks_created: Optional[int]
+    errors: Optional[List[str]]
+
+
+class NiBiSSearchRequest(BaseModel):
+    query: str
+    year: Optional[int] = None
+    subject: Optional[str] = None
+    niveau: Optional[str] = None
+    limit: int = 5
+
+
+class NiBiSSearchResult(BaseModel):
+    id: str
+    score: float
+    text: str
+    year: Optional[int]
+    subject: Optional[str]
+    niveau: Optional[str]
+    task_number: Optional[int]
+
+
+class DataSourceStats(BaseModel):
+    source_dir: str
+    year: int
+    document_count: int
+    subjects: List[str]
+
+
+# =============================================================================
+# Endpoints
+# =============================================================================
+
+@router.get("/nibis/status", response_model=IngestionStatus)
+async def get_ingestion_status():
+    """Get status of NiBiS ingestion pipeline."""
+    last_result = _ingestion_status.get("last_result") or {}
+    return IngestionStatus(
+        running=_ingestion_status["running"],
+        last_run=_ingestion_status.get("last_run"),
+        documents_indexed=last_result.get("documents_indexed"),
+        chunks_created=last_result.get("chunks_created"),
+        errors=(last_result.get("errors") or [])[:10],
+    )
+
+
+@router.post("/nibis/extract-zips")
+async def extract_zip_files_endpoint():
+    """Extract all ZIP files in za-download directories."""
+    try:
+        extracted = extract_zip_files(DOCS_BASE_PATH)
+        return {
+            "status": "success",
+            "extracted_count": len(extracted),
+            "directories": [str(d) for d in extracted],
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/nibis/discover")
+async def discover_nibis_documents(
+    ewh_only: bool = Query(True, description="Only return Erwartungshorizonte"),
+    year: Optional[int] = Query(None, description="Filter by year"),
+    subject: Optional[str] = Query(None, description="Filter by subject"),
+):
+    """
+    Discover available NiBiS documents without indexing.
+    Useful for previewing what will be indexed.
+    """
+    try:
+        documents = discover_documents(DOCS_BASE_PATH, ewh_only=ewh_only)
+
+        # Apply filters
+        if year:
+            documents = [d for d in documents if d.year == year]
+        if subject:
+            documents = [d for d in documents if subject.lower() in d.subject.lower()]
+
+        # Group by year and subject
+        by_year: Dict[int, int] = {}
+        by_subject: Dict[str, int] = {}
+        for doc in documents:
+            by_year[doc.year] = by_year.get(doc.year, 0) + 1
+            by_subject[doc.subject] = by_subject.get(doc.subject, 0) + 1
+
+        return {
+            "total_documents": len(documents),
+            "by_year": dict(sorted(by_year.items())),
+            "by_subject": dict(sorted(by_subject.items(), key=lambda x: -x[1])),
+            "sample_documents": [
+                {
+                    "id": d.id,
+                    "filename": d.raw_filename,
+                    "year": d.year,
+                    "subject": d.subject,
+                    "niveau": d.niveau,
+                    "doc_type": d.doc_type,
+                }
+                for d in documents[:20]
+            ],
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/nibis/ingest")
+async def start_ingestion(
+    request: IngestionRequest,
+    background_tasks: BackgroundTasks,
+):
+    """
+    Start NiBiS data ingestion in background.
+    """
+    if _ingestion_status["running"]:
+        raise HTTPException(
+            status_code=409,
+            detail="Ingestion already running. Check /nibis/status for progress."
+        )
+
+    async def run_ingestion_task():
+        global _ingestion_status
+        _ingestion_status["running"] = True
+        _ingestion_status["last_run"] = datetime.now().isoformat()
+
+        try:
+            result = await run_ingestion(
+                ewh_only=request.ewh_only,
+                dry_run=False,
+                year_filter=request.year_filter,
+                subject_filter=request.subject_filter,
+            )
+            _ingestion_status["last_result"] = result
+        except Exception as e:
+            _ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
+        finally:
+            _ingestion_status["running"] = False
+
+    background_tasks.add_task(run_ingestion_task)
+
+    return {
+        "status": "started",
+        "message": "Ingestion started in background. Check /nibis/status for progress.",
+        "filters": {
+            "ewh_only": request.ewh_only,
+            "year": request.year_filter,
+            "subject": request.subject_filter,
+        },
+    }
+
+
+@router.post("/nibis/search", response_model=List[NiBiSSearchResult])
+async def search_nibis(request: NiBiSSearchRequest):
+    """
+    Semantic search in NiBiS Erwartungshorizonte.
+    """
+    try:
+        query_embedding = await generate_single_embedding(request.query)
+
+        if not query_embedding:
+            raise HTTPException(status_code=500, detail="Failed to generate embedding")
+
+        results = await search_nibis_eh(
+            query_embedding=query_embedding,
+            year=request.year,
+            subject=request.subject,
+            niveau=request.niveau,
+            limit=request.limit,
+        )
+
+        return [
+            NiBiSSearchResult(
+                id=r["id"],
+                score=r["score"],
+                text=r.get("text", "")[:500],
+                year=r.get("year"),
+                subject=r.get("subject"),
+                niveau=r.get("niveau"),
+                task_number=r.get("task_number"),
+            )
+            for r in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/nibis/collections")
+async def get_collections_info():
+    """Get information about all Qdrant collections."""
+    try:
+        client = get_qdrant_client()
+        collections = client.get_collections().collections
+
+        result = []
+        for c in collections:
+            try:
+                info = client.get_collection(c.name)
+                result.append({
+                    "name": c.name,
+                    "vectors_count": info.vectors_count,
+                    "points_count": info.points_count,
+                    "status": info.status.value,
+                })
+            except Exception as e:
+                result.append({
+                    "name": c.name,
+                    "error": str(e),
+                })
+
+        return {"collections": result}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/nibis/stats")
+async def get_nibis_stats():
+    """Get detailed statistics about indexed NiBiS data."""
+    try:
+        qdrant = QdrantService()
+        stats = await qdrant.get_stats("bp_nibis_eh")
+
+        if "error" in stats:
+            return {
+                "indexed": False,
+                "message": "NiBiS collection not yet created. Run ingestion first.",
+            }
+
+        client = get_qdrant_client()
+        scroll_result = client.scroll(
+            collection_name="bp_nibis_eh",
+            limit=1000,
+            with_payload=True,
+            with_vectors=False,
+        )
+
+        years = set()
+        subjects = set()
+        niveaus = set()
+
+        for point in scroll_result[0]:
+            if point.payload:
+                if "year" in point.payload:
+                    years.add(point.payload["year"])
+                if "subject" in point.payload:
+                    subjects.add(point.payload["subject"])
+                if "niveau" in point.payload:
+                    niveaus.add(point.payload["niveau"])
+
+        return {
+            "indexed": True,
+            "total_chunks": stats.get("points_count", 0),
+            "years": sorted(list(years)),
+            "subjects": sorted(list(subjects)),
+            "niveaus": sorted(list(niveaus)),
+        }
+    except Exception as e:
+        return {
+            "indexed": False,
+            "error": str(e),
+        }
+
+
+@router.delete("/nibis/collection")
+async def delete_nibis_collection():
+    """Delete the entire NiBiS collection. WARNING: removes all indexed data!"""
+    try:
+        client = get_qdrant_client()
+        client.delete_collection("bp_nibis_eh")
+        return {"status": "deleted", "collection": "bp_nibis_eh"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
@@ -0,0 +1,281 @@
+"""
+Admin API - RAG Upload & Metrics
+
+Endpoints for uploading documents, tracking uploads, RAG metrics,
+search feedback, storage stats, and service initialization.
+Extracted from admin_api.py for file-size compliance.
+"""
+
+from fastapi import APIRouter, HTTPException, BackgroundTasks, Query, UploadFile, File, Form
+from pydantic import BaseModel
+from typing import Optional, List, Dict
+from datetime import datetime
+from pathlib import Path
+import zipfile
+import tempfile
+import os
+
+from nibis_ingestion import run_ingestion, DOCS_BASE_PATH
+
+# Import ingestion status from nibis module for auto-ingest
+from admin_nibis import _ingestion_status
+
+# Optional: MinIO and PostgreSQL integrations
+try:
+    from minio_storage import upload_rag_document, get_storage_stats, init_minio_bucket
+    MINIO_AVAILABLE = True
+except ImportError:
+    MINIO_AVAILABLE = False
+
+try:
+    from metrics_db import (
+        init_metrics_tables, store_feedback, log_search, log_upload,
+        calculate_metrics, get_recent_feedback, get_upload_history
+    )
+    METRICS_DB_AVAILABLE = True
+except ImportError:
+    METRICS_DB_AVAILABLE = False
+
+router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
+
+# Upload directory configuration
+RAG_UPLOAD_BASE = Path(os.getenv("RAG_UPLOAD_BASE", str(DOCS_BASE_PATH)))
+
+# Store for upload tracking
+_upload_history: List[Dict] = []
+
+
+class UploadResult(BaseModel):
+    status: str
+    files_received: int
+    pdfs_extracted: int
+    target_directory: str
+    errors: List[str]
+
+
+@router.post("/rag/upload", response_model=UploadResult)
+async def upload_rag_documents(
+    background_tasks: BackgroundTasks,
+    file: UploadFile = File(...),
+    collection: str = Form(default="bp_nibis_eh"),
+    year: Optional[int] = Form(default=None),
+    auto_ingest: bool = Form(default=False),
+):
+    """
+    Upload documents for RAG indexing.
+
+    Supports:
+    - ZIP archives (automatically extracted)
+    - Individual PDF files
+    """
+    errors = []
+    pdfs_extracted = 0
+
+    # Determine target year
+    target_year = year or datetime.now().year
+
+    # Target directory: za-download/YYYY/
+    target_dir = RAG_UPLOAD_BASE / "za-download" / str(target_year)
+    target_dir.mkdir(parents=True, exist_ok=True)
+
+    try:
+        filename = file.filename or "upload"
+
+        if filename.lower().endswith(".zip"):
+            # Handle ZIP file
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
+                content = await file.read()
+                tmp.write(content)
+                tmp_path = tmp.name
+
+            try:
+                with zipfile.ZipFile(tmp_path, 'r') as zf:
+                    for member in zf.namelist():
+                        if member.lower().endswith(".pdf") and not member.startswith("__MACOSX"):
+                            pdf_name = Path(member).name
+                            if pdf_name:
+                                target_path = target_dir / pdf_name
+                                with zf.open(member) as src:
+                                    with open(target_path, 'wb') as dst:
+                                        dst.write(src.read())
+                                pdfs_extracted += 1
+            finally:
+                os.unlink(tmp_path)
+
+        elif filename.lower().endswith(".pdf"):
+            target_path = target_dir / filename
+            content = await file.read()
+            with open(target_path, 'wb') as f:
+                f.write(content)
+            pdfs_extracted = 1
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Unsupported file type: {filename}. Only .zip and .pdf are allowed."
+            )
+
+        # Track upload in memory
+        upload_record = {
+            "timestamp": datetime.now().isoformat(),
+            "filename": filename,
+            "collection": collection,
+            "year": target_year,
+            "pdfs_extracted": pdfs_extracted,
+            "target_directory": str(target_dir),
+        }
+        _upload_history.append(upload_record)
+
+        # Keep only last 100 uploads in memory
+        if len(_upload_history) > 100:
+            _upload_history.pop(0)
+
+        # Store in PostgreSQL if available
+        if METRICS_DB_AVAILABLE:
+            await log_upload(
+                filename=filename,
+                collection_name=collection,
+                year=target_year,
+                pdfs_extracted=pdfs_extracted,
+                minio_path=str(target_dir),
+            )
+
+        # Auto-ingest if requested
+        if auto_ingest and not _ingestion_status["running"]:
+            async def run_auto_ingest():
+                global _ingestion_status
+                _ingestion_status["running"] = True
+                _ingestion_status["last_run"] = datetime.now().isoformat()
+
+                try:
+                    result = await run_ingestion(
+                        ewh_only=True,
+                        dry_run=False,
+                        year_filter=target_year,
+                    )
+                    _ingestion_status["last_result"] = result
+                except Exception as e:
+                    _ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
+                finally:
+                    _ingestion_status["running"] = False
+
+            background_tasks.add_task(run_auto_ingest)
+
+        return UploadResult(
+            status="success",
+            files_received=1,
+            pdfs_extracted=pdfs_extracted,
+            target_directory=str(target_dir),
+            errors=errors,
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        errors.append(str(e))
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/rag/upload/history")
+async def get_upload_history_endpoint(limit: int = Query(default=20, le=100)):
+    """Get recent upload history."""
+    return {
+        "uploads": _upload_history[-limit:][::-1],
+        "total": len(_upload_history),
+    }
+
+
+@router.get("/rag/metrics")
+async def get_rag_metrics(
+    collection: Optional[str] = Query(default=None),
+    days: int = Query(default=7, le=90),
+):
+    """Get RAG quality metrics."""
+    if METRICS_DB_AVAILABLE:
+        metrics = await calculate_metrics(collection_name=collection, days=days)
+        if metrics.get("connected"):
+            return metrics
+
+    # Fallback: Return placeholder metrics
+    return {
+        "precision_at_5": 0.78,
+        "recall_at_10": 0.85,
+        "mrr": 0.72,
+        "avg_latency_ms": 52,
+        "total_ratings": len(_upload_history),
+        "error_rate": 0.3,
+        "score_distribution": {
+            "0.9+": 23,
+            "0.7-0.9": 41,
+            "0.5-0.7": 28,
+            "<0.5": 8,
+        },
+        "note": "Placeholder metrics - PostgreSQL not connected",
+        "connected": False,
+    }
+
+
+@router.post("/rag/search/feedback")
+async def submit_search_feedback(
+    result_id: str = Form(...),
+    rating: int = Form(..., ge=1, le=5),
+    notes: Optional[str] = Form(default=None),
+    query: Optional[str] = Form(default=None),
+    collection: Optional[str] = Form(default=None),
+    score: Optional[float] = Form(default=None),
+):
+    """Submit feedback for a search result."""
+    feedback_record = {
+        "timestamp": datetime.now().isoformat(),
+        "result_id": result_id,
+        "rating": rating,
+        "notes": notes,
+    }
+
+    stored = False
+    if METRICS_DB_AVAILABLE:
+        stored = await store_feedback(
+            result_id=result_id,
+            rating=rating,
+            query_text=query,
+            collection_name=collection,
+            score=score,
+            notes=notes,
+        )
+
+    return {
+        "status": "stored" if stored else "received",
+        "feedback": feedback_record,
+        "persisted": stored,
+    }
+
+
+@router.get("/rag/storage/stats")
+async def get_storage_statistics():
+    """Get MinIO storage statistics."""
+    if MINIO_AVAILABLE:
+        stats = await get_storage_stats()
+        return stats
+    return {
+        "error": "MinIO not available",
+        "connected": False,
+    }
+
+
+@router.post("/rag/init")
+async def initialize_rag_services():
+    """Initialize RAG services (MinIO bucket, PostgreSQL tables)."""
+    results = {
+        "minio": False,
+        "postgres": False,
+    }
+
+    if MINIO_AVAILABLE:
+        results["minio"] = await init_minio_bucket()
+
+    if METRICS_DB_AVAILABLE:
+        results["postgres"] = await init_metrics_tables()
+
+    return {
+        "status": "initialized",
+        "services": results,
+    }
@@ -0,0 +1,389 @@
+"""
+Admin API - Legal Templates
+
+Endpoints for legal template ingestion, search, source management,
+license info, and collection management.
+Extracted from admin_api.py for file-size compliance.
+"""
+
+from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
+from pydantic import BaseModel
+from typing import Optional, List, Dict
+from datetime import datetime
+
+from eh_pipeline import generate_single_embedding
+
+# Import legal templates modules
+try:
+    from legal_templates_ingestion import (
+        LegalTemplatesIngestion,
+        LEGAL_TEMPLATES_COLLECTION,
+    )
+    from template_sources import (
+        TEMPLATE_SOURCES,
+        TEMPLATE_TYPES,
+        JURISDICTIONS,
+        LicenseType,
+        get_enabled_sources,
+        get_sources_by_priority,
+    )
+    from qdrant_service import (
+        search_legal_templates,
+        get_legal_templates_stats,
+        init_legal_templates_collection,
+    )
+    LEGAL_TEMPLATES_AVAILABLE = True
+except ImportError as e:
+    print(f"Legal templates module not available: {e}")
+    LEGAL_TEMPLATES_AVAILABLE = False
+
+router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
+
+# Store for templates ingestion status
+_templates_ingestion_status: Dict = {
+    "running": False,
+    "last_run": None,
+    "current_source": None,
+    "results": {},
+}
+
+
+class TemplatesSearchRequest(BaseModel):
+    query: str
+    template_type: Optional[str] = None
+    license_types: Optional[List[str]] = None
+    language: Optional[str] = None
+    jurisdiction: Optional[str] = None
+    attribution_required: Optional[bool] = None
+    limit: int = 10
+
+
+class TemplatesSearchResult(BaseModel):
+    id: str
+    score: float
+    text: str
+    document_title: Optional[str]
+    template_type: Optional[str]
+    clause_category: Optional[str]
+    language: Optional[str]
+    jurisdiction: Optional[str]
+    license_id: Optional[str]
+    license_name: Optional[str]
+    attribution_required: Optional[bool]
+    attribution_text: Optional[str]
+    source_name: Optional[str]
+    source_url: Optional[str]
+    placeholders: Optional[List[str]]
+    is_complete_document: Optional[bool]
+    requires_customization: Optional[bool]
+
+
+class SourceIngestRequest(BaseModel):
+    source_name: str
+
+
+@router.get("/templates/status")
+async def get_templates_status():
+    """Get status of legal templates collection and ingestion."""
+    if not LEGAL_TEMPLATES_AVAILABLE:
+        return {
+            "available": False,
+            "error": "Legal templates module not available",
+        }
+
+    try:
+        stats = await get_legal_templates_stats()
+
+        return {
+            "available": True,
+            "collection": LEGAL_TEMPLATES_COLLECTION,
+            "ingestion": {
+                "running": _templates_ingestion_status["running"],
+                "last_run": _templates_ingestion_status.get("last_run"),
+                "current_source": _templates_ingestion_status.get("current_source"),
+                "results": _templates_ingestion_status.get("results", {}),
+            },
+            "stats": stats,
+        }
+    except Exception as e:
+        return {
+            "available": True,
+            "error": str(e),
+            "ingestion": _templates_ingestion_status,
+        }
+
+
+@router.get("/templates/sources")
+async def get_templates_sources():
+    """Get list of all template sources with their configuration."""
+    if not LEGAL_TEMPLATES_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Legal templates module not available")
+
+    sources = []
+    for source in TEMPLATE_SOURCES:
+        sources.append({
+            "name": source.name,
+            "description": source.description,
+            "license_type": source.license_type.value,
+            "license_name": source.license_info.name,
+            "template_types": source.template_types,
+            "languages": source.languages,
+            "jurisdiction": source.jurisdiction,
+            "repo_url": source.repo_url,
+            "web_url": source.web_url,
+            "priority": source.priority,
+            "enabled": source.enabled,
+            "attribution_required": source.license_info.attribution_required,
+        })
+
+    return {
+        "sources": sources,
+        "total": len(sources),
+        "enabled": len([s for s in TEMPLATE_SOURCES if s.enabled]),
+        "template_types": TEMPLATE_TYPES,
+        "jurisdictions": JURISDICTIONS,
+    }
+
+
+@router.get("/templates/licenses")
+async def get_templates_licenses():
+    """Get license statistics for indexed templates."""
+    if not LEGAL_TEMPLATES_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Legal templates module not available")
+
+    try:
+        stats = await get_legal_templates_stats()
+        return {
+            "licenses": stats.get("licenses", {}),
+            "total_chunks": stats.get("points_count", 0),
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/templates/ingest")
+async def start_templates_ingestion(
+    background_tasks: BackgroundTasks,
+    max_priority: int = Query(default=3, ge=1, le=5, description="Maximum priority level (1=highest)"),
+):
+    """
+    Start legal templates ingestion in background.
+    Ingests all enabled sources up to the specified priority level.
+    """
+    if not LEGAL_TEMPLATES_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Legal templates module not available")
+
+    if _templates_ingestion_status["running"]:
+        raise HTTPException(
+            status_code=409,
+            detail="Templates ingestion already running. Check /templates/status for progress."
+        )
+
+    async def run_templates_ingestion():
+        global _templates_ingestion_status
+        _templates_ingestion_status["running"] = True
+        _templates_ingestion_status["last_run"] = datetime.now().isoformat()
+        _templates_ingestion_status["results"] = {}
+
+        try:
+            ingestion = LegalTemplatesIngestion()
+            sources = get_sources_by_priority(max_priority)
+
+            for source in sources:
+                _templates_ingestion_status["current_source"] = source.name
+
+                try:
+                    status = await ingestion.ingest_source(source)
+                    _templates_ingestion_status["results"][source.name] = {
+                        "status": status.status,
+                        "documents_found": status.documents_found,
+                        "chunks_indexed": status.chunks_indexed,
+                        "errors": status.errors[:5] if status.errors else [],
+                    }
+                except Exception as e:
+                    _templates_ingestion_status["results"][source.name] = {
+                        "status": "failed",
+                        "error": str(e),
+                    }
+
+            await ingestion.close()
+
+        except Exception as e:
+            _templates_ingestion_status["results"]["_global_error"] = str(e)
+        finally:
+            _templates_ingestion_status["running"] = False
+            _templates_ingestion_status["current_source"] = None
+
+    background_tasks.add_task(run_templates_ingestion)
+
+    sources = get_sources_by_priority(max_priority)
+    return {
+        "status": "started",
+        "message": f"Ingesting {len(sources)} sources up to priority {max_priority}",
+        "sources": [s.name for s in sources],
+    }
+
+
+@router.post("/templates/ingest-source")
+async def ingest_single_source(
+    request: SourceIngestRequest,
+    background_tasks: BackgroundTasks,
+):
+    """Ingest a single template source by name."""
+    if not LEGAL_TEMPLATES_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Legal templates module not available")
+
+    source = next((s for s in TEMPLATE_SOURCES if s.name == request.source_name), None)
+    if not source:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Source not found: {request.source_name}. Use /templates/sources to list available sources."
+        )
+
+    if not source.enabled:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Source is disabled: {request.source_name}"
+        )
+
+    if _templates_ingestion_status["running"]:
+        raise HTTPException(
+            status_code=409,
+            detail="Templates ingestion already running."
+        )
+
+    async def run_single_ingestion():
+        global _templates_ingestion_status
+        _templates_ingestion_status["running"] = True
+        _templates_ingestion_status["current_source"] = source.name
+        _templates_ingestion_status["last_run"] = datetime.now().isoformat()
+
+        try:
+            ingestion = LegalTemplatesIngestion()
+            status = await ingestion.ingest_source(source)
+            _templates_ingestion_status["results"][source.name] = {
+                "status": status.status,
+                "documents_found": status.documents_found,
+                "chunks_indexed": status.chunks_indexed,
+                "errors": status.errors[:5] if status.errors else [],
+            }
+            await ingestion.close()
+
+        except Exception as e:
+            _templates_ingestion_status["results"][source.name] = {
+                "status": "failed",
+                "error": str(e),
+            }
+        finally:
+            _templates_ingestion_status["running"] = False
+            _templates_ingestion_status["current_source"] = None
+
+    background_tasks.add_task(run_single_ingestion)
+
+    return {
+        "status": "started",
+        "source": source.name,
+        "license": source.license_type.value,
+        "template_types": source.template_types,
+    }
+
+
+@router.post("/templates/search", response_model=List[TemplatesSearchResult])
+async def search_templates(request: TemplatesSearchRequest):
+    """Semantic search in legal templates collection."""
+    if not LEGAL_TEMPLATES_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Legal templates module not available")
+
+    try:
+        query_embedding = await generate_single_embedding(request.query)
+
+        if not query_embedding:
+            raise HTTPException(status_code=500, detail="Failed to generate embedding")
+
+        results = await search_legal_templates(
+            query_embedding=query_embedding,
+            template_type=request.template_type,
+            license_types=request.license_types,
+            language=request.language,
+            jurisdiction=request.jurisdiction,
+            attribution_required=request.attribution_required,
+            limit=request.limit,
+        )
+
+        return [
+            TemplatesSearchResult(
+                id=r["id"],
+                score=r["score"],
+                text=r.get("text", "")[:1000],
+                document_title=r.get("document_title"),
+                template_type=r.get("template_type"),
+                clause_category=r.get("clause_category"),
+                language=r.get("language"),
+                jurisdiction=r.get("jurisdiction"),
+                license_id=r.get("license_id"),
+                license_name=r.get("license_name"),
+                attribution_required=r.get("attribution_required"),
+                attribution_text=r.get("attribution_text"),
+                source_name=r.get("source_name"),
+                source_url=r.get("source_url"),
+                placeholders=r.get("placeholders"),
+                is_complete_document=r.get("is_complete_document"),
+                requires_customization=r.get("requires_customization"),
+            )
+            for r in results
+        ]
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.delete("/templates/reset")
+async def reset_templates_collection():
+    """Delete and recreate the legal templates collection."""
+    if not LEGAL_TEMPLATES_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Legal templates module not available")
+
+    if _templates_ingestion_status["running"]:
+        raise HTTPException(
+            status_code=409,
+            detail="Cannot reset while ingestion is running"
+        )
+
+    try:
+        ingestion = LegalTemplatesIngestion()
+        ingestion.reset_collection()
+        await ingestion.close()
+
+        _templates_ingestion_status["results"] = {}
+
+        return {
+            "status": "reset",
+            "collection": LEGAL_TEMPLATES_COLLECTION,
+            "message": "Collection deleted and recreated. Run ingestion to populate.",
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.delete("/templates/source/{source_name}")
+async def delete_templates_source(source_name: str):
+    """Delete all templates from a specific source."""
+    if not LEGAL_TEMPLATES_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Legal templates module not available")
+
+    try:
+        from qdrant_service import delete_legal_templates_by_source
+
+        count = await delete_legal_templates_by_source(source_name)
+
+        if source_name in _templates_ingestion_status.get("results", {}):
+            del _templates_ingestion_status["results"][source_name]
+
+        return {
+            "status": "deleted",
+            "source": source_name,
+            "chunks_deleted": count,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
@@ -0,0 +1,293 @@
+"""
+OCR Pipeline Column Detection Endpoints (Step 5)
+
+Detect invisible columns, manual column override, and ground truth.
+Extracted from ocr_pipeline_geometry.py for file-size compliance.
+"""
+
+import logging
+import time
+from dataclasses import asdict
+from datetime import datetime
+from typing import Dict, List
+
+import cv2
+from fastapi import APIRouter, HTTPException
+
+from cv_vocab_pipeline import (
+    _detect_header_footer_gaps,
+    _detect_sub_columns,
+    classify_column_types,
+    create_layout_image,
+    create_ocr_image,
+    analyze_layout,
+    detect_column_geometry_zoned,
+    expand_narrow_columns,
+)
+from ocr_pipeline_session_store import (
+    get_session_db,
+    update_session_db,
+)
+from ocr_pipeline_common import (
+    _cache,
+    _load_session_to_cache,
+    _get_cached,
+    _append_pipeline_log,
+    ManualColumnsRequest,
+    ColumnGroundTruthRequest,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
+
+
+@router.post("/sessions/{session_id}/columns")
+async def detect_columns(session_id: str):
+    """Run column detection on the cropped (or dewarped) image."""
+    if session_id not in _cache:
+        await _load_session_to_cache(session_id)
+    cached = _get_cached(session_id)
+
+    img_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
+    if img_bgr is None:
+        raise HTTPException(status_code=400, detail="Crop or dewarp must be completed before column detection")
+
+    # -----------------------------------------------------------------------
+    # Sub-sessions (box crops): skip column detection entirely.
+    # Instead, create a single pseudo-column spanning the full image width.
+    # Also run Tesseract + binarization here so that the row detection step
+    # can reuse the cached intermediates (_word_dicts, _inv, _content_bounds)
+    # instead of falling back to detect_column_geometry() which may fail
+    # on small box images with < 5 words.
+    # -----------------------------------------------------------------------
+    session = await get_session_db(session_id)
+    if session and session.get("parent_session_id"):
+        h, w = img_bgr.shape[:2]
+
+        # Binarize + invert for row detection (horizontal projection profile)
+        ocr_img = create_ocr_image(img_bgr)
+        inv = cv2.bitwise_not(ocr_img)
+
+        # Run Tesseract to get word bounding boxes.
+        try:
+            from PIL import Image as PILImage
+            pil_img = PILImage.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
+            import pytesseract
+            data = pytesseract.image_to_data(pil_img, lang='eng+deu', output_type=pytesseract.Output.DICT)
+            word_dicts = []
+            for i in range(len(data['text'])):
+                conf = int(data['conf'][i]) if str(data['conf'][i]).lstrip('-').isdigit() else -1
+                text = str(data['text'][i]).strip()
+                if conf < 30 or not text:
+                    continue
+                word_dicts.append({
+                    'text': text, 'conf': conf,
+                    'left': int(data['left'][i]),
+                    'top': int(data['top'][i]),
+                    'width': int(data['width'][i]),
+                    'height': int(data['height'][i]),
+                })
+            # Log all words including low-confidence ones for debugging
+            all_count = sum(1 for i in range(len(data['text']))
+                            if str(data['text'][i]).strip())
+            low_conf = [(str(data['text'][i]).strip(), int(data['conf'][i]) if str(data['conf'][i]).lstrip('-').isdigit() else -1)
+                        for i in range(len(data['text']))
+                        if str(data['text'][i]).strip()
+                        and (int(data['conf'][i]) if str(data['conf'][i]).lstrip('-').isdigit() else -1) < 30
+                        and (int(data['conf'][i]) if str(data['conf'][i]).lstrip('-').isdigit() else -1) >= 0]
+            if low_conf:
+                logger.info(f"OCR Pipeline: sub-session {session_id}: {len(low_conf)} words below conf 30: {low_conf[:20]}")
+            logger.info(f"OCR Pipeline: sub-session {session_id}: Tesseract found {len(word_dicts)}/{all_count} words (conf>=30)")
+        except Exception as e:
+            logger.warning(f"OCR Pipeline: sub-session {session_id}: Tesseract failed: {e}")
+            word_dicts = []
+
+        # Cache intermediates for row detection (detect_rows reuses these)
+        cached["_word_dicts"] = word_dicts
+        cached["_inv"] = inv
+        cached["_content_bounds"] = (0, w, 0, h)
+
+        column_result = {
+            "columns": [{
+                "type": "column_text",
+                "x": 0, "y": 0,
+                "width": w, "height": h,
+            }],
+            "zones": None,
+            "boxes_detected": 0,
+            "duration_seconds": 0,
+            "method": "sub_session_pseudo_column",
+        }
+        await update_session_db(
+            session_id,
+            column_result=column_result,
+            row_result=None,
+            word_result=None,
+            current_step=6,
+        )
+        cached["column_result"] = column_result
+        cached.pop("row_result", None)
+        cached.pop("word_result", None)
+        logger.info(f"OCR Pipeline: sub-session {session_id}: pseudo-column {w}x{h}px")
+        return {"session_id": session_id, **column_result}
+
+    t0 = time.time()
+
+    # Binarized image for layout analysis
+    ocr_img = create_ocr_image(img_bgr)
+    h, w = ocr_img.shape[:2]
+
+    # Phase A: Zone-aware geometry detection
+    zoned_result = detect_column_geometry_zoned(ocr_img, img_bgr)
+
+    boxes_detected = 0
+    if zoned_result is None:
+        # Fallback to projection-based layout
+        layout_img = create_layout_image(img_bgr)
+        regions = analyze_layout(layout_img, ocr_img)
+        zones_data = None
+    else:
+        geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv, zones_data, boxes = zoned_result
+        content_w = right_x - left_x
+        boxes_detected = len(boxes)
+
+        # Cache intermediates for row detection (avoids second Tesseract run)
+        cached["_word_dicts"] = word_dicts
+        cached["_inv"] = inv
+        cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
+        cached["_zones_data"] = zones_data
+        cached["_boxes_detected"] = boxes_detected
+
+        # Detect header/footer early so sub-column clustering ignores them
+        header_y, footer_y = _detect_header_footer_gaps(inv, w, h) if inv is not None else (None, None)
+
+        # Split sub-columns (e.g. page references) before classification
+        geometries = _detect_sub_columns(geometries, content_w, left_x=left_x,
+                                          top_y=top_y, header_y=header_y, footer_y=footer_y)
+
+        # Expand narrow columns (sub-columns are often very narrow)
+        geometries = expand_narrow_columns(geometries, content_w, left_x, word_dicts)
+
+        # Phase B: Content-based classification
+        regions = classify_column_types(geometries, content_w, top_y, w, h, bottom_y,
+                                        left_x=left_x, right_x=right_x, inv=inv)
+
+    duration = time.time() - t0
+
+    columns = [asdict(r) for r in regions]
+
+    # Determine classification methods used
+    methods = list(set(
+        c.get("classification_method", "") for c in columns
+        if c.get("classification_method")
+    ))
+
+    column_result = {
+        "columns": columns,
+        "classification_methods": methods,
+        "duration_seconds": round(duration, 2),
+        "boxes_detected": boxes_detected,
+    }
+
+    # Add zone data when boxes are present
+    if zones_data and boxes_detected > 0:
+        column_result["zones"] = zones_data
+
+    # Persist to DB -- also invalidate downstream results (rows, words)
+    await update_session_db(
+        session_id,
+        column_result=column_result,
+        row_result=None,
+        word_result=None,
+        current_step=6,
+    )
+
+    # Update cache
+    cached["column_result"] = column_result
+    cached.pop("row_result", None)
+    cached.pop("word_result", None)
+
+    col_count = len([c for c in columns if c["type"].startswith("column")])
+    logger.info(f"OCR Pipeline: columns session {session_id}: "
+                f"{col_count} columns detected, {boxes_detected} box(es) ({duration:.2f}s)")
+
+    img_w = img_bgr.shape[1]
+    await _append_pipeline_log(session_id, "columns", {
+        "total_columns": len(columns),
+        "column_widths_pct": [round(c["width"] / img_w * 100, 1) for c in columns],
+        "column_types": [c["type"] for c in columns],
+        "boxes_detected": boxes_detected,
+    }, duration_ms=int(duration * 1000))
+
+    return {
+        "session_id": session_id,
+        **column_result,
+    }
+
+
+@router.post("/sessions/{session_id}/columns/manual")
+async def set_manual_columns(session_id: str, req: ManualColumnsRequest):
+    """Override detected columns with manual definitions."""
+    column_result = {
+        "columns": req.columns,
+        "duration_seconds": 0,
+        "method": "manual",
+    }
+
+    await update_session_db(session_id, column_result=column_result,
+                           row_result=None, word_result=None)
+
+    if session_id in _cache:
+        _cache[session_id]["column_result"] = column_result
+        _cache[session_id].pop("row_result", None)
+        _cache[session_id].pop("word_result", None)
+
+    logger.info(f"OCR Pipeline: manual columns session {session_id}: "
+                f"{len(req.columns)} columns set")
+
+    return {"session_id": session_id, **column_result}
+
+
+@router.post("/sessions/{session_id}/ground-truth/columns")
+async def save_column_ground_truth(session_id: str, req: ColumnGroundTruthRequest):
+    """Save ground truth feedback for the column detection step."""
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+    ground_truth = session.get("ground_truth") or {}
+    gt = {
+        "is_correct": req.is_correct,
+        "corrected_columns": req.corrected_columns,
+        "notes": req.notes,
+        "saved_at": datetime.utcnow().isoformat(),
+        "column_result": session.get("column_result"),
+    }
+    ground_truth["columns"] = gt
+
+    await update_session_db(session_id, ground_truth=ground_truth)
+
+    if session_id in _cache:
+        _cache[session_id]["ground_truth"] = ground_truth
+
+    return {"session_id": session_id, "ground_truth": gt}
+
+
+@router.get("/sessions/{session_id}/ground-truth/columns")
+async def get_column_ground_truth(session_id: str):
+    """Retrieve saved ground truth for column detection, including auto vs GT diff."""
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+    ground_truth = session.get("ground_truth") or {}
+    columns_gt = ground_truth.get("columns")
+    if not columns_gt:
+        raise HTTPException(status_code=404, detail="No column ground truth saved")
+
+    return {
+        "session_id": session_id,
+        "columns_gt": columns_gt,
+        "columns_auto": session.get("column_result"),
+    }
@@ -0,0 +1,236 @@
+"""
+OCR Pipeline Deskew Endpoints (Step 2)
+
+Auto deskew, manual deskew, and ground truth for the deskew step.
+Extracted from ocr_pipeline_geometry.py for file-size compliance.
+"""
+
+import logging
+import time
+from datetime import datetime
+
+import cv2
+from fastapi import APIRouter, HTTPException
+
+from cv_vocab_pipeline import (
+    create_ocr_image,
+    deskew_image,
+    deskew_image_by_word_alignment,
+    deskew_two_pass,
+)
+from ocr_pipeline_session_store import (
+    get_session_db,
+    update_session_db,
+)
+from ocr_pipeline_common import (
+    _cache,
+    _load_session_to_cache,
+    _get_cached,
+    _append_pipeline_log,
+    ManualDeskewRequest,
+    DeskewGroundTruthRequest,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
+
+
+@router.post("/sessions/{session_id}/deskew")
+async def auto_deskew(session_id: str):
+    """Two-pass deskew: iterative projection (wide range) + word-alignment residual."""
+    # Ensure session is in cache
+    if session_id not in _cache:
+        await _load_session_to_cache(session_id)
+    cached = _get_cached(session_id)
+
+    # Deskew runs right after orientation -- use oriented image, fall back to original
+    img_bgr = next((v for k in ("oriented_bgr", "original_bgr")
+                     if (v := cached.get(k)) is not None), None)
+    if img_bgr is None:
+        raise HTTPException(status_code=400, detail="No image available for deskewing")
+
+    t0 = time.time()
+
+    # Two-pass deskew: iterative (+-5 deg) + word-alignment residual check
+    deskewed_bgr, angle_applied, two_pass_debug = deskew_two_pass(img_bgr.copy())
+
+    # Also run individual methods for reporting (non-authoritative)
+    try:
+        _, angle_hough = deskew_image(img_bgr.copy())
+    except Exception:
+        angle_hough = 0.0
+
+    success_enc, png_orig = cv2.imencode(".png", img_bgr)
+    orig_bytes = png_orig.tobytes() if success_enc else b""
+    try:
+        _, angle_wa = deskew_image_by_word_alignment(orig_bytes)
+    except Exception:
+        angle_wa = 0.0
+
+    angle_iterative = two_pass_debug.get("pass1_angle", 0.0)
+    angle_residual = two_pass_debug.get("pass2_angle", 0.0)
+    angle_textline = two_pass_debug.get("pass3_angle", 0.0)
+
+    duration = time.time() - t0
+
+    method_used = "three_pass" if abs(angle_textline) >= 0.01 else (
+        "two_pass" if abs(angle_residual) >= 0.01 else "iterative"
+    )
+
+    # Encode as PNG
+    success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
+    deskewed_png = deskewed_png_buf.tobytes() if success else b""
+
+    # Create binarized version
+    binarized_png = None
+    try:
+        binarized = create_ocr_image(deskewed_bgr)
+        success_bin, bin_buf = cv2.imencode(".png", binarized)
+        binarized_png = bin_buf.tobytes() if success_bin else None
+    except Exception as e:
+        logger.warning(f"Binarization failed: {e}")
+
+    confidence = max(0.5, 1.0 - abs(angle_applied) / 5.0)
+
+    deskew_result = {
+        "angle_hough": round(angle_hough, 3),
+        "angle_word_alignment": round(angle_wa, 3),
+        "angle_iterative": round(angle_iterative, 3),
+        "angle_residual": round(angle_residual, 3),
+        "angle_textline": round(angle_textline, 3),
+        "angle_applied": round(angle_applied, 3),
+        "method_used": method_used,
+        "confidence": round(confidence, 2),
+        "duration_seconds": round(duration, 2),
+        "two_pass_debug": two_pass_debug,
+    }
+
+    # Update cache
+    cached["deskewed_bgr"] = deskewed_bgr
+    cached["binarized_png"] = binarized_png
+    cached["deskew_result"] = deskew_result
+
+    # Persist to DB
+    db_update = {
+        "deskewed_png": deskewed_png,
+        "deskew_result": deskew_result,
+        "current_step": 3,
+    }
+    if binarized_png:
+        db_update["binarized_png"] = binarized_png
+    await update_session_db(session_id, **db_update)
+
+    logger.info(f"OCR Pipeline: deskew session {session_id}: "
+                f"hough={angle_hough:.2f} wa={angle_wa:.2f} "
+                f"iter={angle_iterative:.2f} residual={angle_residual:.2f} "
+                f"textline={angle_textline:.2f} "
+                f"-> {method_used} total={angle_applied:.2f}")
+
+    await _append_pipeline_log(session_id, "deskew", {
+        "angle_applied": round(angle_applied, 3),
+        "angle_iterative": round(angle_iterative, 3),
+        "angle_residual": round(angle_residual, 3),
+        "angle_textline": round(angle_textline, 3),
+        "confidence": round(confidence, 2),
+        "method": method_used,
+    }, duration_ms=int(duration * 1000))
+
+    return {
+        "session_id": session_id,
+        **deskew_result,
+        "deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
+        "binarized_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/binarized",
+    }
+
+
+@router.post("/sessions/{session_id}/deskew/manual")
+async def manual_deskew(session_id: str, req: ManualDeskewRequest):
+    """Apply a manual rotation angle to the oriented image."""
+    if session_id not in _cache:
+        await _load_session_to_cache(session_id)
+    cached = _get_cached(session_id)
+
+    img_bgr = next((v for k in ("oriented_bgr", "original_bgr")
+                     if (v := cached.get(k)) is not None), None)
+    if img_bgr is None:
+        raise HTTPException(status_code=400, detail="No image available for deskewing")
+
+    angle = max(-5.0, min(5.0, req.angle))
+
+    h, w = img_bgr.shape[:2]
+    center = (w // 2, h // 2)
+    M = cv2.getRotationMatrix2D(center, angle, 1.0)
+    rotated = cv2.warpAffine(img_bgr, M, (w, h),
+                              flags=cv2.INTER_LINEAR,
+                              borderMode=cv2.BORDER_REPLICATE)
+
+    success, png_buf = cv2.imencode(".png", rotated)
+    deskewed_png = png_buf.tobytes() if success else b""
+
+    # Binarize
+    binarized_png = None
+    try:
+        binarized = create_ocr_image(rotated)
+        success_bin, bin_buf = cv2.imencode(".png", binarized)
+        binarized_png = bin_buf.tobytes() if success_bin else None
+    except Exception:
+        pass
+
+    deskew_result = {
+        **(cached.get("deskew_result") or {}),
+        "angle_applied": round(angle, 3),
+        "method_used": "manual",
+    }
+
+    # Update cache
+    cached["deskewed_bgr"] = rotated
+    cached["binarized_png"] = binarized_png
+    cached["deskew_result"] = deskew_result
+
+    # Persist to DB
+    db_update = {
+        "deskewed_png": deskewed_png,
+        "deskew_result": deskew_result,
+    }
+    if binarized_png:
+        db_update["binarized_png"] = binarized_png
+    await update_session_db(session_id, **db_update)
+
+    logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}")
+
+    return {
+        "session_id": session_id,
+        "angle_applied": round(angle, 3),
+        "method_used": "manual",
+        "deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
+    }
+
+
+@router.post("/sessions/{session_id}/ground-truth/deskew")
+async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthRequest):
+    """Save ground truth feedback for the deskew step."""
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+    ground_truth = session.get("ground_truth") or {}
+    gt = {
+        "is_correct": req.is_correct,
+        "corrected_angle": req.corrected_angle,
+        "notes": req.notes,
+        "saved_at": datetime.utcnow().isoformat(),
+        "deskew_result": session.get("deskew_result"),
+    }
+    ground_truth["deskew"] = gt
+
+    await update_session_db(session_id, ground_truth=ground_truth)
+
+    # Update cache
+    if session_id in _cache:
+        _cache[session_id]["ground_truth"] = ground_truth
+
+    logger.info(f"OCR Pipeline: ground truth deskew session {session_id}: "
+                f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
+
+    return {"session_id": session_id, "ground_truth": gt}
@@ -0,0 +1,346 @@
+"""
+OCR Pipeline Dewarp Endpoints
+
+Auto dewarp (with VLM/CV ensemble), manual dewarp, combined
+rotation+shear adjustment, and ground truth.
+Extracted from ocr_pipeline_geometry.py for file-size compliance.
+"""
+
+import json
+import logging
+import os
+import re
+import time
+from datetime import datetime
+from typing import Any, Dict
+
+import cv2
+from fastapi import APIRouter, HTTPException, Query
+
+from cv_vocab_pipeline import (
+    _apply_shear,
+    create_ocr_image,
+    dewarp_image,
+    dewarp_image_manual,
+)
+from ocr_pipeline_session_store import (
+    get_session_db,
+    update_session_db,
+)
+from ocr_pipeline_common import (
+    _cache,
+    _load_session_to_cache,
+    _get_cached,
+    _append_pipeline_log,
+    ManualDewarpRequest,
+    CombinedAdjustRequest,
+    DewarpGroundTruthRequest,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
+
+
+async def _detect_shear_with_vlm(image_bytes: bytes) -> Dict[str, Any]:
+    """Ask qwen2.5vl:32b to estimate the vertical shear angle of a scanned page.
+
+    The VLM is shown the image and asked: are the column/table borders tilted?
+    If yes, by how many degrees? Returns a dict with shear_degrees and confidence.
+    Confidence is 0.0 if Ollama is unavailable or parsing fails.
+    """
+    import httpx
+    import base64
+
+    ollama_base = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
+    model = os.getenv("OLLAMA_HTR_MODEL", "qwen2.5vl:32b")
+
+    prompt = (
+        "This is a scanned vocabulary worksheet. Look at the vertical borders of the table columns. "
+        "Are they perfectly vertical, or do they tilt slightly? "
+        "If they tilt, estimate the tilt angle in degrees (positive = top tilts right, negative = top tilts left). "
+        "Reply with ONLY a JSON object like: {\"shear_degrees\": 1.2, \"confidence\": 0.8} "
+        "Use confidence 0.0-1.0 based on how clearly you can see the tilt. "
+        "If the columns look straight, return {\"shear_degrees\": 0.0, \"confidence\": 0.9}"
+    )
+
+    img_b64 = base64.b64encode(image_bytes).decode("utf-8")
+    payload = {
+        "model": model,
+        "prompt": prompt,
+        "images": [img_b64],
+        "stream": False,
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            resp = await client.post(f"{ollama_base}/api/generate", json=payload)
+            resp.raise_for_status()
+            text = resp.json().get("response", "")
+
+        # Parse JSON from response (may have surrounding text)
+        match = re.search(r'\{[^}]+\}', text)
+        if match:
+            data = json.loads(match.group(0))
+            shear = float(data.get("shear_degrees", 0.0))
+            conf = float(data.get("confidence", 0.0))
+            # Clamp to reasonable range
+            shear = max(-3.0, min(3.0, shear))
+            conf = max(0.0, min(1.0, conf))
+            return {"method": "vlm_qwen2.5vl", "shear_degrees": round(shear, 3), "confidence": round(conf, 2)}
+    except Exception as e:
+        logger.warning(f"VLM dewarp failed: {e}")
+
+    return {"method": "vlm_qwen2.5vl", "shear_degrees": 0.0, "confidence": 0.0}
+
+
+@router.post("/sessions/{session_id}/dewarp")
+async def auto_dewarp(
+    session_id: str,
+    method: str = Query("ensemble", description="Detection method: ensemble | vlm | cv"),
+):
+    """Detect and correct vertical shear on the deskewed image.
+
+    Methods:
+    - **ensemble** (default): 3-method CV ensemble (vertical edges + projection + Hough)
+    - **cv**: CV ensemble only (same as ensemble)
+    - **vlm**: Ask qwen2.5vl:32b to estimate the shear angle visually
+    """
+    if method not in ("ensemble", "cv", "vlm"):
+        raise HTTPException(status_code=400, detail="method must be one of: ensemble, cv, vlm")
+
+    if session_id not in _cache:
+        await _load_session_to_cache(session_id)
+    cached = _get_cached(session_id)
+
+    deskewed_bgr = cached.get("deskewed_bgr")
+    if deskewed_bgr is None:
+        raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
+
+    t0 = time.time()
+
+    if method == "vlm":
+        # Encode deskewed image to PNG for VLM
+        success, png_buf = cv2.imencode(".png", deskewed_bgr)
+        img_bytes = png_buf.tobytes() if success else b""
+        vlm_det = await _detect_shear_with_vlm(img_bytes)
+        shear_deg = vlm_det["shear_degrees"]
+        if abs(shear_deg) >= 0.05 and vlm_det["confidence"] >= 0.3:
+            dewarped_bgr = _apply_shear(deskewed_bgr, -shear_deg)
+        else:
+            dewarped_bgr = deskewed_bgr
+        dewarp_info = {
+            "method": vlm_det["method"],
+            "shear_degrees": shear_deg,
+            "confidence": vlm_det["confidence"],
+            "detections": [vlm_det],
+        }
+    else:
+        dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
+
+    duration = time.time() - t0
+
+    # Encode as PNG
+    success, png_buf = cv2.imencode(".png", dewarped_bgr)
+    dewarped_png = png_buf.tobytes() if success else b""
+
+    dewarp_result = {
+        "method_used": dewarp_info["method"],
+        "shear_degrees": dewarp_info["shear_degrees"],
+        "confidence": dewarp_info["confidence"],
+        "duration_seconds": round(duration, 2),
+        "detections": dewarp_info.get("detections", []),
+    }
+
+    # Update cache
+    cached["dewarped_bgr"] = dewarped_bgr
+    cached["dewarp_result"] = dewarp_result
+
+    # Persist to DB
+    await update_session_db(
+        session_id,
+        dewarped_png=dewarped_png,
+        dewarp_result=dewarp_result,
+        auto_shear_degrees=dewarp_info.get("shear_degrees", 0.0),
+        current_step=4,
+    )
+
+    logger.info(f"OCR Pipeline: dewarp session {session_id}: "
+                f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f} "
+                f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
+
+    await _append_pipeline_log(session_id, "dewarp", {
+        "shear_degrees": dewarp_info["shear_degrees"],
+        "confidence": dewarp_info["confidence"],
+        "method": dewarp_info["method"],
+        "ensemble_methods": [d.get("method", "") for d in dewarp_info.get("detections", [])],
+    }, duration_ms=int(duration * 1000))
+
+    return {
+        "session_id": session_id,
+        **dewarp_result,
+        "dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
+    }
+
+
+@router.post("/sessions/{session_id}/dewarp/manual")
+async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
+    """Apply shear correction with a manual angle."""
+    if session_id not in _cache:
+        await _load_session_to_cache(session_id)
+    cached = _get_cached(session_id)
+
+    deskewed_bgr = cached.get("deskewed_bgr")
+    if deskewed_bgr is None:
+        raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
+
+    shear_deg = max(-2.0, min(2.0, req.shear_degrees))
+
+    if abs(shear_deg) < 0.001:
+        dewarped_bgr = deskewed_bgr
+    else:
+        dewarped_bgr = dewarp_image_manual(deskewed_bgr, shear_deg)
+
+    success, png_buf = cv2.imencode(".png", dewarped_bgr)
+    dewarped_png = png_buf.tobytes() if success else b""
+
+    dewarp_result = {
+        **(cached.get("dewarp_result") or {}),
+        "method_used": "manual",
+        "shear_degrees": round(shear_deg, 3),
+    }
+
+    # Update cache
+    cached["dewarped_bgr"] = dewarped_bgr
+    cached["dewarp_result"] = dewarp_result
+
+    # Persist to DB
+    await update_session_db(
+        session_id,
+        dewarped_png=dewarped_png,
+        dewarp_result=dewarp_result,
+    )
+
+    logger.info(f"OCR Pipeline: manual dewarp session {session_id}: shear={shear_deg:.3f}")
+
+    return {
+        "session_id": session_id,
+        "shear_degrees": round(shear_deg, 3),
+        "method_used": "manual",
+        "dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
+    }
+
+
+@router.post("/sessions/{session_id}/adjust-combined")
+async def adjust_combined(session_id: str, req: CombinedAdjustRequest):
+    """Apply rotation + shear combined to the original image.
+
+    Used by the fine-tuning sliders to preview arbitrary rotation/shear
+    combinations without re-running the full deskew/dewarp pipeline.
+    """
+    if session_id not in _cache:
+        await _load_session_to_cache(session_id)
+    cached = _get_cached(session_id)
+
+    img_bgr = cached.get("original_bgr")
+    if img_bgr is None:
+        raise HTTPException(status_code=400, detail="Original image not available")
+
+    rotation = max(-15.0, min(15.0, req.rotation_degrees))
+    shear_deg = max(-5.0, min(5.0, req.shear_degrees))
+
+    h, w = img_bgr.shape[:2]
+    result_bgr = img_bgr
+
+    # Step 1: Apply rotation
+    if abs(rotation) >= 0.001:
+        center = (w // 2, h // 2)
+        M = cv2.getRotationMatrix2D(center, rotation, 1.0)
+        result_bgr = cv2.warpAffine(result_bgr, M, (w, h),
+                                     flags=cv2.INTER_LINEAR,
+                                     borderMode=cv2.BORDER_REPLICATE)
+
+    # Step 2: Apply shear
+    if abs(shear_deg) >= 0.001:
+        result_bgr = dewarp_image_manual(result_bgr, shear_deg)
+
+    # Encode
+    success, png_buf = cv2.imencode(".png", result_bgr)
+    dewarped_png = png_buf.tobytes() if success else b""
+
+    # Binarize
+    binarized_png = None
+    try:
+        binarized = create_ocr_image(result_bgr)
+        success_bin, bin_buf = cv2.imencode(".png", binarized)
+        binarized_png = bin_buf.tobytes() if success_bin else None
+    except Exception:
+        pass
+
+    # Build combined result dicts
+    deskew_result = {
+        **(cached.get("deskew_result") or {}),
+        "angle_applied": round(rotation, 3),
+        "method_used": "manual_combined",
+    }
+    dewarp_result = {
+        **(cached.get("dewarp_result") or {}),
+        "method_used": "manual_combined",
+        "shear_degrees": round(shear_deg, 3),
+    }
+
+    # Update cache
+    cached["deskewed_bgr"] = result_bgr
+    cached["dewarped_bgr"] = result_bgr
+    cached["deskew_result"] = deskew_result
+    cached["dewarp_result"] = dewarp_result
+
+    # Persist to DB
+    db_update = {
+        "dewarped_png": dewarped_png,
+        "deskew_result": deskew_result,
+        "dewarp_result": dewarp_result,
+    }
+    if binarized_png:
+        db_update["binarized_png"] = binarized_png
+        db_update["deskewed_png"] = dewarped_png
+    await update_session_db(session_id, **db_update)
+
+    logger.info(f"OCR Pipeline: combined adjust session {session_id}: "
+                f"rotation={rotation:.3f} shear={shear_deg:.3f}")
+
+    return {
+        "session_id": session_id,
+        "rotation_degrees": round(rotation, 3),
+        "shear_degrees": round(shear_deg, 3),
+        "method_used": "manual_combined",
+        "dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
+    }
+
+
+@router.post("/sessions/{session_id}/ground-truth/dewarp")
+async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthRequest):
+    """Save ground truth feedback for the dewarp step."""
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+    ground_truth = session.get("ground_truth") or {}
+    gt = {
+        "is_correct": req.is_correct,
+        "corrected_shear": req.corrected_shear,
+        "notes": req.notes,
+        "saved_at": datetime.utcnow().isoformat(),
+        "dewarp_result": session.get("dewarp_result"),
+    }
+    ground_truth["dewarp"] = gt
+
+    await update_session_db(session_id, ground_truth=ground_truth)
+
+    if session_id in _cache:
+        _cache[session_id]["ground_truth"] = ground_truth
+
+    logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
+                f"correct={req.is_correct}, corrected_shear={req.corrected_shear}")
+
+    return {"session_id": session_id, "ground_truth": gt}
@@ -0,0 +1,299 @@
+"""
+OCR Pipeline Structure Detection and Exclude Regions
+
+Detect document structure (boxes, zones, color regions, graphics)
+and manage user-drawn exclude regions.
+Extracted from ocr_pipeline_geometry.py for file-size compliance.
+"""
+
+import logging
+import time
+from typing import Any, Dict, List
+
+import cv2
+import numpy as np
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+from cv_box_detect import detect_boxes
+from cv_color_detect import _COLOR_RANGES, _COLOR_HEX
+from cv_graphic_detect import detect_graphic_elements
+from ocr_pipeline_session_store import (
+    get_session_db,
+    update_session_db,
+)
+from ocr_pipeline_common import (
+    _cache,
+    _load_session_to_cache,
+    _get_cached,
+    _filter_border_ghost_words,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
+
+
+# ---------------------------------------------------------------------------
+# Structure Detection Endpoint
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/detect-structure")
+async def detect_structure(session_id: str):
+    """Detect document structure: boxes, zones, and color regions.
+
+    Runs box detection (line + shading) and color analysis on the cropped
+    image.  Returns structured JSON with all detected elements for the
+    structure visualization step.
+    """
+    if session_id not in _cache:
+        await _load_session_to_cache(session_id)
+    cached = _get_cached(session_id)
+
+    img_bgr = (
+        cached.get("cropped_bgr")
+        if cached.get("cropped_bgr") is not None
+        else cached.get("dewarped_bgr")
+    )
+    if img_bgr is None:
+        raise HTTPException(status_code=400, detail="Crop or dewarp must be completed first")
+
+    t0 = time.time()
+    h, w = img_bgr.shape[:2]
+
+    # --- Content bounds from word result (if available) or full image ---
+    word_result = cached.get("word_result")
+    words: List[Dict] = []
+    if word_result and word_result.get("cells"):
+        for cell in word_result["cells"]:
+            for wb in (cell.get("word_boxes") or []):
+                words.append(wb)
+    # Fallback: use raw OCR words if cell word_boxes are empty
+    if not words and word_result:
+        for key in ("raw_paddle_words_split", "raw_tesseract_words", "raw_paddle_words"):
+            raw = word_result.get(key, [])
+            if raw:
+                words = raw
+                logger.info("detect-structure: using %d words from %s (no cell word_boxes)", len(words), key)
+                break
+    # If no words yet, use image dimensions with small margin
+    if words:
+        content_x = max(0, min(int(wb["left"]) for wb in words))
+        content_y = max(0, min(int(wb["top"]) for wb in words))
+        content_r = min(w, max(int(wb["left"] + wb["width"]) for wb in words))
+        content_b = min(h, max(int(wb["top"] + wb["height"]) for wb in words))
+        content_w_px = content_r - content_x
+        content_h_px = content_b - content_y
+    else:
+        margin = int(min(w, h) * 0.03)
+        content_x, content_y = margin, margin
+        content_w_px = w - 2 * margin
+        content_h_px = h - 2 * margin
+
+    # --- Box detection ---
+    boxes = detect_boxes(
+        img_bgr,
+        content_x=content_x,
+        content_w=content_w_px,
+        content_y=content_y,
+        content_h=content_h_px,
+    )
+
+    # --- Zone splitting ---
+    from cv_box_detect import split_page_into_zones as _split_zones
+    zones = _split_zones(content_x, content_y, content_w_px, content_h_px, boxes)
+
+    # --- Color region sampling ---
+    # Sample background shading in each detected box
+    hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
+    box_colors = []
+    for box in boxes:
+        # Sample the center region of each box
+        cy1 = box.y + box.height // 4
+        cy2 = box.y + 3 * box.height // 4
+        cx1 = box.x + box.width // 4
+        cx2 = box.x + 3 * box.width // 4
+        cy1 = max(0, min(cy1, h - 1))
+        cy2 = max(0, min(cy2, h - 1))
+        cx1 = max(0, min(cx1, w - 1))
+        cx2 = max(0, min(cx2, w - 1))
+        if cy2 > cy1 and cx2 > cx1:
+            roi_hsv = hsv[cy1:cy2, cx1:cx2]
+            med_h = float(np.median(roi_hsv[:, :, 0]))
+            med_s = float(np.median(roi_hsv[:, :, 1]))
+            med_v = float(np.median(roi_hsv[:, :, 2]))
+            if med_s > 15:
+                from cv_color_detect import _hue_to_color_name
+                bg_name = _hue_to_color_name(med_h)
+                bg_hex = _COLOR_HEX.get(bg_name, "#6b7280")
+            else:
+                bg_name = "gray" if med_v < 220 else "white"
+                bg_hex = "#6b7280" if bg_name == "gray" else "#ffffff"
+        else:
+            bg_name = "unknown"
+            bg_hex = "#6b7280"
+        box_colors.append({"color_name": bg_name, "color_hex": bg_hex})
+
+    # --- Color text detection overview ---
+    # Quick scan for colored text regions across the page
+    color_summary: Dict[str, int] = {}
+    for color_name, ranges in _COLOR_RANGES.items():
+        mask = np.zeros((h, w), dtype=np.uint8)
+        for lower, upper in ranges:
+            mask = cv2.bitwise_or(mask, cv2.inRange(hsv, lower, upper))
+        pixel_count = int(np.sum(mask > 0))
+        if pixel_count > 50:  # minimum threshold
+            color_summary[color_name] = pixel_count
+
+    # --- Graphic element detection ---
+    box_dicts = [
+        {"x": b.x, "y": b.y, "w": b.width, "h": b.height}
+        for b in boxes
+    ]
+    graphics = detect_graphic_elements(
+        img_bgr, words,
+        detected_boxes=box_dicts,
+    )
+
+    # --- Filter border-ghost words from OCR result ---
+    ghost_count = 0
+    if boxes and word_result:
+        ghost_count = _filter_border_ghost_words(word_result, boxes)
+        if ghost_count:
+            logger.info("detect-structure: removed %d border-ghost words", ghost_count)
+            await update_session_db(session_id, word_result=word_result)
+            cached["word_result"] = word_result
+
+    duration = time.time() - t0
+
+    # Preserve user-drawn exclude regions from previous run
+    prev_sr = cached.get("structure_result") or {}
+    prev_exclude = prev_sr.get("exclude_regions", [])
+
+    result_dict = {
+        "image_width": w,
+        "image_height": h,
+        "content_bounds": {
+            "x": content_x, "y": content_y,
+            "w": content_w_px, "h": content_h_px,
+        },
+        "boxes": [
+            {
+                "x": b.x, "y": b.y, "w": b.width, "h": b.height,
+                "confidence": b.confidence,
+                "border_thickness": b.border_thickness,
+                "bg_color_name": box_colors[i]["color_name"],
+                "bg_color_hex": box_colors[i]["color_hex"],
+            }
+            for i, b in enumerate(boxes)
+        ],
+        "zones": [
+            {
+                "index": z.index,
+                "zone_type": z.zone_type,
+                "y": z.y, "h": z.height,
+                "x": z.x, "w": z.width,
+            }
+            for z in zones
+        ],
+        "graphics": [
+            {
+                "x": g.x, "y": g.y, "w": g.width, "h": g.height,
+                "area": g.area,
+                "shape": g.shape,
+                "color_name": g.color_name,
+                "color_hex": g.color_hex,
+                "confidence": round(g.confidence, 2),
+            }
+            for g in graphics
+        ],
+        "exclude_regions": prev_exclude,
+        "color_pixel_counts": color_summary,
+        "has_words": len(words) > 0,
+        "word_count": len(words),
+        "border_ghosts_removed": ghost_count,
+        "duration_seconds": round(duration, 2),
+    }
+
+    # Persist to session
+    await update_session_db(session_id, structure_result=result_dict)
+    cached["structure_result"] = result_dict
+
+    logger.info("detect-structure session %s: %d boxes, %d zones, %d graphics, %.2fs",
+                session_id, len(boxes), len(zones), len(graphics), duration)
+
+    return {"session_id": session_id, **result_dict}
+
+
+# ---------------------------------------------------------------------------
+# Exclude Regions -- user-drawn rectangles to exclude from OCR results
+# ---------------------------------------------------------------------------
+
+class _ExcludeRegionIn(BaseModel):
+    x: int
+    y: int
+    w: int
+    h: int
+    label: str = ""
+
+
+class _ExcludeRegionsBatchIn(BaseModel):
+    regions: list[_ExcludeRegionIn]
+
+
+@router.put("/sessions/{session_id}/exclude-regions")
+async def set_exclude_regions(session_id: str, body: _ExcludeRegionsBatchIn):
+    """Replace all exclude regions for a session.
+
+    Regions are stored inside ``structure_result.exclude_regions``.
+    """
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Session not found")
+
+    sr = session.get("structure_result") or {}
+    sr["exclude_regions"] = [r.model_dump() for r in body.regions]
+
+    # Invalidate grid so it rebuilds with new exclude regions
+    await update_session_db(session_id, structure_result=sr, grid_editor_result=None)
+
+    # Update cache
+    if session_id in _cache:
+        _cache[session_id]["structure_result"] = sr
+        _cache[session_id].pop("grid_editor_result", None)
+
+    return {
+        "session_id": session_id,
+        "exclude_regions": sr["exclude_regions"],
+        "count": len(sr["exclude_regions"]),
+    }
+
+
+@router.delete("/sessions/{session_id}/exclude-regions/{region_index}")
+async def delete_exclude_region(session_id: str, region_index: int):
+    """Remove a single exclude region by index."""
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Session not found")
+
+    sr = session.get("structure_result") or {}
+    regions = sr.get("exclude_regions", [])
+
+    if region_index < 0 or region_index >= len(regions):
+        raise HTTPException(status_code=404, detail="Region index out of range")
+
+    removed = regions.pop(region_index)
+    sr["exclude_regions"] = regions
+
+    # Invalidate grid so it rebuilds with new exclude regions
+    await update_session_db(session_id, structure_result=sr, grid_editor_result=None)
+
+    if session_id in _cache:
+        _cache[session_id]["structure_result"] = sr
+        _cache[session_id].pop("grid_editor_result", None)
+
+    return {
+        "session_id": session_id,
+        "removed": removed,
+        "remaining": len(regions),
+    }
@@ -0,0 +1,498 @@
+"""
+RBAC Policy Engine
+
+Core engine for RBAC/ABAC permission checks,
+role assignments, key shares, and default policies.
+Extracted from rbac.py for file-size compliance.
+"""
+
+from typing import Optional, List, Dict, Set
+from datetime import datetime, timezone
+import uuid
+from functools import wraps
+
+from fastapi import HTTPException, Request
+
+from rbac_types import (
+    Role,
+    Action,
+    ResourceType,
+    ZKVisibilityMode,
+    PolicySet,
+    RoleAssignment,
+    KeyShare,
+)
+from rbac_permissions import DEFAULT_PERMISSIONS
+
+
+# =============================================
+# POLICY ENGINE
+# =============================================
+
+class PolicyEngine:
+    """
+    Engine fuer RBAC/ABAC Entscheidungen.
+
+    Prueft:
+    1. Basis-Rollenberechtigung (RBAC)
+    2. Policy-Einschraenkungen (ABAC)
+    3. Key Share Berechtigungen
+    """
+
+    def __init__(self):
+        self.policy_sets: Dict[str, PolicySet] = {}
+        self.role_assignments: Dict[str, List[RoleAssignment]] = {}  # user_id -> assignments
+        self.key_shares: Dict[str, List[KeyShare]] = {}  # user_id -> shares
+
+    def register_policy_set(self, policy: PolicySet):
+        """Registriere ein Policy Set."""
+        self.policy_sets[policy.id] = policy
+
+    def get_policy_for_context(
+        self,
+        bundesland: str,
+        jahr: int,
+        fach: Optional[str] = None,
+        verfahren: str = "abitur"
+    ) -> Optional[PolicySet]:
+        """Finde das passende Policy Set fuer einen Kontext."""
+        # Exakte Uebereinstimmung
+        for policy in self.policy_sets.values():
+            if (policy.bundesland == bundesland and
+                policy.jahr == jahr and
+                policy.verfahren == verfahren):
+                if policy.fach is None or policy.fach == fach:
+                    return policy
+
+        # Fallback: Default Policy
+        for policy in self.policy_sets.values():
+            if policy.bundesland == "DEFAULT":
+                return policy
+
+        return None
+
+    def assign_role(
+        self,
+        user_id: str,
+        role: Role,
+        resource_type: ResourceType,
+        resource_id: str,
+        granted_by: str,
+        tenant_id: Optional[str] = None,
+        namespace_id: Optional[str] = None,
+        valid_to: Optional[datetime] = None
+    ) -> RoleAssignment:
+        """Weise einem User eine Rolle zu."""
+        assignment = RoleAssignment(
+            id=str(uuid.uuid4()),
+            user_id=user_id,
+            role=role,
+            resource_type=resource_type,
+            resource_id=resource_id,
+            tenant_id=tenant_id,
+            namespace_id=namespace_id,
+            granted_by=granted_by,
+            valid_to=valid_to
+        )
+
+        if user_id not in self.role_assignments:
+            self.role_assignments[user_id] = []
+        self.role_assignments[user_id].append(assignment)
+
+        return assignment
+
+    def revoke_role(self, assignment_id: str, revoked_by: str) -> bool:
+        """Widerrufe eine Rollenzuweisung."""
+        for user_assignments in self.role_assignments.values():
+            for assignment in user_assignments:
+                if assignment.id == assignment_id:
+                    assignment.revoked_at = datetime.now(timezone.utc)
+                    return True
+        return False
+
+    def get_user_roles(
+        self,
+        user_id: str,
+        resource_type: Optional[ResourceType] = None,
+        resource_id: Optional[str] = None
+    ) -> List[Role]:
+        """Hole alle aktiven Rollen eines Users."""
+        assignments = self.role_assignments.get(user_id, [])
+        roles = []
+
+        for assignment in assignments:
+            if not assignment.is_active():
+                continue
+            if resource_type and assignment.resource_type != resource_type:
+                continue
+            if resource_id and assignment.resource_id != resource_id:
+                continue
+            roles.append(assignment.role)
+
+        return list(set(roles))
+
+    def create_key_share(
+        self,
+        user_id: str,
+        package_id: str,
+        permissions: Set[str],
+        granted_by: str,
+        scope: str = "full",
+        invite_token: Optional[str] = None
+    ) -> KeyShare:
+        """Erstelle einen Key Share."""
+        share = KeyShare(
+            id=str(uuid.uuid4()),
+            user_id=user_id,
+            package_id=package_id,
+            permissions=permissions,
+            scope=scope,
+            granted_by=granted_by,
+            invite_token=invite_token
+        )
+
+        if user_id not in self.key_shares:
+            self.key_shares[user_id] = []
+        self.key_shares[user_id].append(share)
+
+        return share
+
+    def accept_key_share(self, share_id: str, token: str) -> bool:
+        """Akzeptiere einen Key Share via Invite Token."""
+        for user_shares in self.key_shares.values():
+            for share in user_shares:
+                if share.id == share_id and share.invite_token == token:
+                    share.accepted_at = datetime.now(timezone.utc)
+                    return True
+        return False
+
+    def revoke_key_share(self, share_id: str, revoked_by: str) -> bool:
+        """Widerrufe einen Key Share."""
+        for user_shares in self.key_shares.values():
+            for share in user_shares:
+                if share.id == share_id:
+                    share.revoked_at = datetime.now(timezone.utc)
+                    share.revoked_by = revoked_by
+                    return True
+        return False
+
+    def check_permission(
+        self,
+        user_id: str,
+        action: Action,
+        resource_type: ResourceType,
+        resource_id: str,
+        policy: Optional[PolicySet] = None,
+        package_id: Optional[str] = None
+    ) -> bool:
+        """
+        Pruefe ob ein User eine Aktion ausfuehren darf.
+
+        Prueft:
+        1. Basis-RBAC
+        2. Policy-Einschraenkungen
+        3. Key Share (falls package_id angegeben)
+        """
+        # 1. Hole aktive Rollen
+        roles = self.get_user_roles(user_id, resource_type, resource_id)
+
+        if not roles:
+            return False
+
+        # 2. Pruefe Basis-RBAC
+        has_permission = False
+        for role in roles:
+            role_permissions = DEFAULT_PERMISSIONS.get(role, {})
+            resource_permissions = role_permissions.get(resource_type, set())
+            if action in resource_permissions:
+                has_permission = True
+                break
+
+        if not has_permission:
+            return False
+
+        # 3. Pruefe Policy-Einschraenkungen
+        if policy:
+            # ZK Visibility Mode
+            if Role.ZWEITKORREKTOR in roles:
+                if policy.zk_visibility_mode == ZKVisibilityMode.BLIND:
+                    # Blind: ZK darf EK-Outputs nicht sehen
+                    if resource_type in [ResourceType.EVALUATION, ResourceType.REPORT, ResourceType.GRADE_DECISION]:
+                        if action == Action.READ:
+                            # Pruefe ob es EK-Outputs sind (muesste ueber Metadaten geprueft werden)
+                            pass  # Implementierung abhaengig von Datenmodell
+
+                elif policy.zk_visibility_mode == ZKVisibilityMode.SEMI:
+                    # Semi: ZK sieht Annotationen, aber keine Note
+                    if resource_type == ResourceType.GRADE_DECISION and action == Action.READ:
+                        return False
+
+        # 4. Pruefe Key Share (falls Package-basiert)
+        if package_id:
+            user_shares = self.key_shares.get(user_id, [])
+            has_key_share = any(
+                share.package_id == package_id and share.is_active()
+                for share in user_shares
+            )
+            if not has_key_share:
+                return False
+
+        return True
+
+    def get_allowed_actions(
+        self,
+        user_id: str,
+        resource_type: ResourceType,
+        resource_id: str,
+        policy: Optional[PolicySet] = None
+    ) -> Set[Action]:
+        """Hole alle erlaubten Aktionen fuer einen User auf einer Ressource."""
+        roles = self.get_user_roles(user_id, resource_type, resource_id)
+        allowed = set()
+
+        for role in roles:
+            role_permissions = DEFAULT_PERMISSIONS.get(role, {})
+            resource_permissions = role_permissions.get(resource_type, set())
+            allowed.update(resource_permissions)
+
+        # Policy-Einschraenkungen anwenden
+        if policy and Role.ZWEITKORREKTOR in roles:
+            if policy.zk_visibility_mode == ZKVisibilityMode.BLIND:
+                # Entferne READ fuer bestimmte Ressourcen
+                pass  # Detailimplementierung
+
+        return allowed
+
+
+# =============================================
+# DEFAULT POLICY SETS (alle Bundeslaender)
+# =============================================
+
+def create_default_policy_sets() -> List[PolicySet]:
+    """
+    Erstelle Default Policy Sets fuer alle Bundeslaender.
+
+    Diese koennen spaeter pro Land verfeinert werden.
+    """
+    bundeslaender = [
+        "baden-wuerttemberg", "bayern", "berlin", "brandenburg",
+        "bremen", "hamburg", "hessen", "mecklenburg-vorpommern",
+        "niedersachsen", "nordrhein-westfalen", "rheinland-pfalz",
+        "saarland", "sachsen", "sachsen-anhalt", "schleswig-holstein",
+        "thueringen"
+    ]
+
+    policies = []
+
+    # Default Policy (Fallback)
+    policies.append(PolicySet(
+        id="DEFAULT-2025",
+        bundesland="DEFAULT",
+        jahr=2025,
+        fach=None,
+        verfahren="abitur",
+        zk_visibility_mode=ZKVisibilityMode.FULL,
+        eh_visibility_mode=PolicySet.__dataclass_fields__["eh_visibility_mode"].default,
+        allow_teacher_uploaded_eh=True,
+        allow_land_uploaded_eh=True,
+        require_rights_confirmation_on_upload=True,
+        third_correction_threshold=4,
+        final_signoff_role="fachvorsitz"
+    ))
+
+    # Niedersachsen (Beispiel mit spezifischen Anpassungen)
+    policies.append(PolicySet(
+        id="NI-2025-ABITUR",
+        bundesland="niedersachsen",
+        jahr=2025,
+        fach=None,
+        verfahren="abitur",
+        zk_visibility_mode=ZKVisibilityMode.FULL,  # In NI sieht ZK alles
+        allow_teacher_uploaded_eh=True,
+        allow_land_uploaded_eh=True,
+        require_rights_confirmation_on_upload=True,
+        third_correction_threshold=4,
+        final_signoff_role="fachvorsitz",
+        export_template_id="niedersachsen-abitur"
+    ))
+
+    # Bayern (Beispiel mit SEMI visibility)
+    policies.append(PolicySet(
+        id="BY-2025-ABITUR",
+        bundesland="bayern",
+        jahr=2025,
+        fach=None,
+        verfahren="abitur",
+        zk_visibility_mode=ZKVisibilityMode.SEMI,  # ZK sieht Annotationen, nicht Note
+        allow_teacher_uploaded_eh=True,
+        allow_land_uploaded_eh=True,
+        require_rights_confirmation_on_upload=True,
+        third_correction_threshold=4,
+        final_signoff_role="fachvorsitz",
+        export_template_id="bayern-abitur"
+    ))
+
+    # NRW (Beispiel)
+    policies.append(PolicySet(
+        id="NW-2025-ABITUR",
+        bundesland="nordrhein-westfalen",
+        jahr=2025,
+        fach=None,
+        verfahren="abitur",
+        zk_visibility_mode=ZKVisibilityMode.FULL,
+        allow_teacher_uploaded_eh=True,
+        allow_land_uploaded_eh=True,
+        require_rights_confirmation_on_upload=True,
+        third_correction_threshold=4,
+        final_signoff_role="fachvorsitz",
+        export_template_id="nrw-abitur"
+    ))
+
+    # Generiere Basis-Policies fuer alle anderen Bundeslaender
+    for bl in bundeslaender:
+        if bl not in ["niedersachsen", "bayern", "nordrhein-westfalen"]:
+            policies.append(PolicySet(
+                id=f"{bl[:2].upper()}-2025-ABITUR",
+                bundesland=bl,
+                jahr=2025,
+                fach=None,
+                verfahren="abitur",
+                zk_visibility_mode=ZKVisibilityMode.FULL,
+                allow_teacher_uploaded_eh=True,
+                allow_land_uploaded_eh=True,
+                require_rights_confirmation_on_upload=True,
+                third_correction_threshold=4,
+                final_signoff_role="fachvorsitz"
+            ))
+
+    return policies
+
+
+# =============================================
+# GLOBAL POLICY ENGINE INSTANCE
+# =============================================
+
+# Singleton Policy Engine
+_policy_engine: Optional[PolicyEngine] = None
+
+
+def get_policy_engine() -> PolicyEngine:
+    """Hole die globale Policy Engine Instanz."""
+    global _policy_engine
+    if _policy_engine is None:
+        _policy_engine = PolicyEngine()
+        # Registriere Default Policies
+        for policy in create_default_policy_sets():
+            _policy_engine.register_policy_set(policy)
+    return _policy_engine
+
+
+# =============================================
+# API GUARDS (Decorators fuer FastAPI)
+# =============================================
+
+def require_permission(
+    action: Action,
+    resource_type: ResourceType,
+    resource_id_param: str = "resource_id"
+):
+    """
+    Decorator fuer FastAPI Endpoints.
+
+    Prueft ob der aktuelle User die angegebene Berechtigung hat.
+
+    Usage:
+        @app.get("/api/v1/packages/{package_id}")
+        @require_permission(Action.READ, ResourceType.EXAM_PACKAGE, "package_id")
+        async def get_package(package_id: str, request: Request):
+            ...
+    """
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            request = kwargs.get('request')
+            if not request:
+                for arg in args:
+                    if isinstance(arg, Request):
+                        request = arg
+                        break
+
+            if not request:
+                raise HTTPException(status_code=500, detail="Request not found")
+
+            # User aus Token holen
+            user = getattr(request.state, 'user', None)
+            if not user:
+                raise HTTPException(status_code=401, detail="Not authenticated")
+
+            user_id = user.get('user_id')
+            resource_id = kwargs.get(resource_id_param)
+
+            # Policy Engine pruefen
+            engine = get_policy_engine()
+
+            # Optional: Policy aus Kontext laden
+            policy = None
+            bundesland = user.get('bundesland')
+            if bundesland:
+                policy = engine.get_policy_for_context(bundesland, 2025)
+
+            if not engine.check_permission(
+                user_id=user_id,
+                action=action,
+                resource_type=resource_type,
+                resource_id=resource_id,
+                policy=policy
+            ):
+                raise HTTPException(
+                    status_code=403,
+                    detail=f"Permission denied: {action.value} on {resource_type.value}"
+                )
+
+            return await func(*args, **kwargs)
+
+        return wrapper
+    return decorator
+
+
+def require_role(role: Role):
+    """
+    Decorator der prueft ob User eine bestimmte Rolle hat.
+
+    Usage:
+        @app.post("/api/v1/eh/publish")
+        @require_role(Role.LAND_ADMIN)
+        async def publish_eh(request: Request):
+            ...
+    """
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            request = kwargs.get('request')
+            if not request:
+                for arg in args:
+                    if isinstance(arg, Request):
+                        request = arg
+                        break
+
+            if not request:
+                raise HTTPException(status_code=500, detail="Request not found")
+
+            user = getattr(request.state, 'user', None)
+            if not user:
+                raise HTTPException(status_code=401, detail="Not authenticated")
+
+            user_id = user.get('user_id')
+            engine = get_policy_engine()
+
+            user_roles = engine.get_user_roles(user_id)
+            if role not in user_roles:
+                raise HTTPException(
+                    status_code=403,
+                    detail=f"Role required: {role.value}"
+                )
+
+            return await func(*args, **kwargs)
+
+        return wrapper
+    return decorator
@@ -0,0 +1,221 @@
+"""
+RBAC Permission Matrix
+
+Default role-to-resource permission mappings for
+Klausur-Korrektur and Zeugnis workflows.
+Extracted from rbac.py for file-size compliance.
+"""
+
+from typing import Dict, Set
+
+from rbac_types import Role, Action, ResourceType
+
+
+# =============================================
+# RBAC PERMISSION MATRIX
+# =============================================
+
+# Standard-Berechtigungsmatrix (kann durch Policies ueberschrieben werden)
+DEFAULT_PERMISSIONS: Dict[Role, Dict[ResourceType, Set[Action]]] = {
+    # Erstkorrektor
+    Role.ERSTKORREKTOR: {
+        ResourceType.EXAM_PACKAGE: {Action.READ, Action.UPDATE, Action.SHARE_KEY, Action.LOCK},
+        ResourceType.STUDENT_WORK: {Action.READ, Action.UPDATE},
+        ResourceType.EH_DOCUMENT: {Action.READ, Action.UPLOAD, Action.UPDATE},
+        ResourceType.RUBRIC: {Action.READ, Action.UPDATE},
+        ResourceType.ANNOTATION: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
+        ResourceType.EVALUATION: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.REPORT: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.GRADE_DECISION: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Zweitkorrektor (Standard: FULL visibility)
+    Role.ZWEITKORREKTOR: {
+        ResourceType.EXAM_PACKAGE: {Action.READ},
+        ResourceType.STUDENT_WORK: {Action.READ, Action.UPDATE},
+        ResourceType.EH_DOCUMENT: {Action.READ},
+        ResourceType.RUBRIC: {Action.READ},
+        ResourceType.ANNOTATION: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.EVALUATION: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.REPORT: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.GRADE_DECISION: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.EXPORT: {Action.READ, Action.DOWNLOAD},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Drittkorrektor
+    Role.DRITTKORREKTOR: {
+        ResourceType.EXAM_PACKAGE: {Action.READ},
+        ResourceType.STUDENT_WORK: {Action.READ, Action.UPDATE},
+        ResourceType.EH_DOCUMENT: {Action.READ},
+        ResourceType.RUBRIC: {Action.READ},
+        ResourceType.ANNOTATION: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.EVALUATION: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.REPORT: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.GRADE_DECISION: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Fachvorsitz
+    Role.FACHVORSITZ: {
+        ResourceType.TENANT: {Action.READ},
+        ResourceType.NAMESPACE: {Action.READ, Action.UPDATE},
+        ResourceType.EXAM_PACKAGE: {Action.READ, Action.UPDATE, Action.LOCK, Action.UNLOCK, Action.SIGN_OFF},
+        ResourceType.STUDENT_WORK: {Action.READ, Action.UPDATE},
+        ResourceType.EH_DOCUMENT: {Action.READ, Action.UPLOAD, Action.UPDATE},
+        ResourceType.RUBRIC: {Action.READ, Action.UPDATE},
+        ResourceType.ANNOTATION: {Action.READ, Action.UPDATE},
+        ResourceType.EVALUATION: {Action.READ, Action.UPDATE},
+        ResourceType.REPORT: {Action.READ, Action.UPDATE},
+        ResourceType.GRADE_DECISION: {Action.READ, Action.UPDATE, Action.SIGN_OFF},
+        ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Pruefungsvorsitz
+    Role.PRUEFUNGSVORSITZ: {
+        ResourceType.TENANT: {Action.READ},
+        ResourceType.NAMESPACE: {Action.READ, Action.CREATE},
+        ResourceType.EXAM_PACKAGE: {Action.READ, Action.SIGN_OFF},
+        ResourceType.STUDENT_WORK: {Action.READ},
+        ResourceType.EH_DOCUMENT: {Action.READ},
+        ResourceType.GRADE_DECISION: {Action.READ, Action.SIGN_OFF},
+        ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Schul-Admin
+    Role.SCHUL_ADMIN: {
+        ResourceType.TENANT: {Action.READ, Action.UPDATE},
+        ResourceType.NAMESPACE: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
+        ResourceType.EXAM_PACKAGE: {Action.CREATE, Action.READ, Action.DELETE, Action.ASSIGN_ROLE},
+        ResourceType.EH_DOCUMENT: {Action.READ, Action.UPLOAD, Action.DELETE},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Land-Admin (Behoerde)
+    Role.LAND_ADMIN: {
+        ResourceType.TENANT: {Action.READ},
+        ResourceType.EH_DOCUMENT: {Action.READ, Action.UPLOAD, Action.UPDATE, Action.DELETE, Action.PUBLISH_OFFICIAL},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Auditor
+    Role.AUDITOR: {
+        ResourceType.AUDIT_LOG: {Action.READ},
+        ResourceType.EXAM_PACKAGE: {Action.READ},  # Nur Metadaten
+        # Kein Zugriff auf Inhalte!
+    },
+
+    # Operator
+    Role.OPERATOR: {
+        ResourceType.TENANT: {Action.READ},
+        ResourceType.NAMESPACE: {Action.READ},
+        ResourceType.EXAM_PACKAGE: {Action.READ},  # Nur Metadaten
+        ResourceType.AUDIT_LOG: {Action.READ},
+        # Break-glass separat gehandhabt
+    },
+
+    # Teacher Assistant
+    Role.TEACHER_ASSISTANT: {
+        ResourceType.STUDENT_WORK: {Action.READ},
+        ResourceType.ANNOTATION: {Action.CREATE, Action.READ},  # Nur bestimmte Typen
+        ResourceType.EH_DOCUMENT: {Action.READ},
+    },
+
+    # Exam Author (nur Vorabi)
+    Role.EXAM_AUTHOR: {
+        ResourceType.EH_DOCUMENT: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
+        ResourceType.RUBRIC: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
+    },
+
+    # =============================================
+    # ZEUGNIS-WORKFLOW ROLLEN
+    # =============================================
+
+    # Klassenlehrer - Erstellt Zeugnisse, Kopfnoten, Bemerkungen
+    Role.KLASSENLEHRER: {
+        ResourceType.NAMESPACE: {Action.READ},
+        ResourceType.ZEUGNIS: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.ZEUGNIS_ENTWURF: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
+        ResourceType.ZEUGNIS_VORLAGE: {Action.READ},
+        ResourceType.SCHUELER_DATEN: {Action.READ, Action.UPDATE},
+        ResourceType.FACHNOTE: {Action.READ},  # Liest Fachnoten der Fachlehrer
+        ResourceType.KOPFNOTE: {Action.CREATE, Action.READ, Action.UPDATE},
+        ResourceType.FEHLZEITEN: {Action.READ, Action.UPDATE},
+        ResourceType.BEMERKUNG: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
+        ResourceType.VERSETZUNG: {Action.READ},
+        ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Fachlehrer - Traegt Fachnoten ein
+    Role.FACHLEHRER: {
+        ResourceType.NAMESPACE: {Action.READ},
+        ResourceType.SCHUELER_DATEN: {Action.READ},  # Nur eigene Schueler
+        ResourceType.FACHNOTE: {Action.CREATE, Action.READ, Action.UPDATE},  # Nur eigenes Fach
+        ResourceType.BEMERKUNG: {Action.CREATE, Action.READ},  # Fachbezogene Bemerkungen
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Zeugnisbeauftragter - Qualitaetskontrolle
+    Role.ZEUGNISBEAUFTRAGTER: {
+        ResourceType.NAMESPACE: {Action.READ, Action.UPDATE},
+        ResourceType.ZEUGNIS: {Action.READ, Action.UPDATE},
+        ResourceType.ZEUGNIS_ENTWURF: {Action.READ, Action.UPDATE},
+        ResourceType.ZEUGNIS_VORLAGE: {Action.READ, Action.UPDATE, Action.UPLOAD},
+        ResourceType.SCHUELER_DATEN: {Action.READ},
+        ResourceType.FACHNOTE: {Action.READ},
+        ResourceType.KOPFNOTE: {Action.READ, Action.UPDATE},
+        ResourceType.FEHLZEITEN: {Action.READ},
+        ResourceType.BEMERKUNG: {Action.READ, Action.UPDATE},
+        ResourceType.VERSETZUNG: {Action.READ},
+        ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Sekretariat - Druck, Versand, Archivierung
+    Role.SEKRETARIAT: {
+        ResourceType.ZEUGNIS: {Action.READ, Action.DOWNLOAD},
+        ResourceType.ZEUGNIS_VORLAGE: {Action.READ},
+        ResourceType.SCHUELER_DATEN: {Action.READ},  # Fuer Adressdaten
+        ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Schulleitung - Finale Zeugnis-Freigabe
+    Role.SCHULLEITUNG: {
+        ResourceType.TENANT: {Action.READ},
+        ResourceType.NAMESPACE: {Action.READ, Action.CREATE},
+        ResourceType.ZEUGNIS: {Action.READ, Action.SIGN_OFF, Action.LOCK},
+        ResourceType.ZEUGNIS_ENTWURF: {Action.READ, Action.UPDATE},
+        ResourceType.ZEUGNIS_VORLAGE: {Action.READ, Action.UPDATE},
+        ResourceType.SCHUELER_DATEN: {Action.READ},
+        ResourceType.FACHNOTE: {Action.READ},
+        ResourceType.KOPFNOTE: {Action.READ, Action.UPDATE},
+        ResourceType.FEHLZEITEN: {Action.READ},
+        ResourceType.BEMERKUNG: {Action.READ, Action.UPDATE},
+        ResourceType.KONFERENZ_BESCHLUSS: {Action.CREATE, Action.READ, Action.UPDATE, Action.SIGN_OFF},
+        ResourceType.VERSETZUNG: {Action.CREATE, Action.READ, Action.UPDATE, Action.SIGN_OFF},
+        ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+
+    # Stufenleitung - Stufenkoordination (z.B. Oberstufe)
+    Role.STUFENLEITUNG: {
+        ResourceType.NAMESPACE: {Action.READ, Action.UPDATE},
+        ResourceType.ZEUGNIS: {Action.READ, Action.UPDATE},
+        ResourceType.ZEUGNIS_ENTWURF: {Action.READ, Action.UPDATE},
+        ResourceType.SCHUELER_DATEN: {Action.READ},
+        ResourceType.FACHNOTE: {Action.READ},
+        ResourceType.KOPFNOTE: {Action.READ},
+        ResourceType.FEHLZEITEN: {Action.READ},
+        ResourceType.BEMERKUNG: {Action.READ, Action.UPDATE},
+        ResourceType.KONFERENZ_BESCHLUSS: {Action.READ},
+        ResourceType.VERSETZUNG: {Action.READ, Action.UPDATE},
+        ResourceType.EXPORT: {Action.READ, Action.DOWNLOAD},
+        ResourceType.AUDIT_LOG: {Action.READ},
+    },
+}
@@ -0,0 +1,438 @@
+"""
+RBAC/ABAC Type Definitions
+
+Enums, data structures, and models for the policy system.
+Extracted from rbac.py for file-size compliance.
+"""
+
+import json
+from enum import Enum
+from dataclasses import dataclass, field, asdict
+from typing import Optional, List, Dict, Set, Any
+from datetime import datetime, timezone
+import uuid
+
+
+# =============================================
+# ENUMS: Roles, Actions, Resources
+# =============================================
+
+class Role(str, Enum):
+    """Fachliche Rollen in Korrektur- und Zeugniskette."""
+
+    # === Klausur-Korrekturkette ===
+    ERSTKORREKTOR = "erstkorrektor"      # EK
+    ZWEITKORREKTOR = "zweitkorrektor"    # ZK
+    DRITTKORREKTOR = "drittkorrektor"    # DK
+
+    # === Zeugnis-Workflow ===
+    KLASSENLEHRER = "klassenlehrer"      # KL - Erstellt Zeugnis, Kopfnoten, Bemerkungen
+    FACHLEHRER = "fachlehrer"            # FL - Traegt Fachnoten ein
+    ZEUGNISBEAUFTRAGTER = "zeugnisbeauftragter"  # ZB - Qualitaetskontrolle
+    SEKRETARIAT = "sekretariat"          # SEK - Druck, Versand, Archivierung
+
+    # === Leitung (Klausur + Zeugnis) ===
+    FACHVORSITZ = "fachvorsitz"          # FVL - Fachpruefungsleitung
+    PRUEFUNGSVORSITZ = "pruefungsvorsitz" # PV - Schulleitung / Pruefungsvorsitz
+    SCHULLEITUNG = "schulleitung"        # SL - Finale Zeugnis-Freigabe
+    STUFENLEITUNG = "stufenleitung"      # STL - Stufenkoordination
+
+    # === Administration ===
+    SCHUL_ADMIN = "schul_admin"          # SA
+    LAND_ADMIN = "land_admin"            # LA - Behoerde
+
+    # === Spezial ===
+    AUDITOR = "auditor"                  # DSB/Auditor
+    OPERATOR = "operator"                # OPS - Support
+    TEACHER_ASSISTANT = "teacher_assistant"  # TA - Referendar
+    EXAM_AUTHOR = "exam_author"          # EA - nur Vorabi
+
+
+class Action(str, Enum):
+    """Moegliche Operationen auf Ressourcen."""
+    CREATE = "create"
+    READ = "read"
+    UPDATE = "update"
+    DELETE = "delete"
+
+    ASSIGN_ROLE = "assign_role"
+    INVITE_USER = "invite_user"
+    REMOVE_USER = "remove_user"
+
+    UPLOAD = "upload"
+    DOWNLOAD = "download"
+
+    LOCK = "lock"              # Finalisieren
+    UNLOCK = "unlock"          # Nur mit Sonderrecht
+    SIGN_OFF = "sign_off"      # Freigabe
+
+    SHARE_KEY = "share_key"    # Key Share erzeugen
+    VIEW_PII = "view_pii"      # Falls PII vorhanden
+    BREAK_GLASS = "break_glass"  # Notfallzugriff
+
+    PUBLISH_OFFICIAL = "publish_official"  # Amtliche EH verteilen
+
+
+class ResourceType(str, Enum):
+    """Ressourcentypen im System."""
+    TENANT = "tenant"
+    NAMESPACE = "namespace"
+
+    # === Klausur-Korrektur ===
+    EXAM_PACKAGE = "exam_package"
+    STUDENT_WORK = "student_work"
+    EH_DOCUMENT = "eh_document"
+    RUBRIC = "rubric"              # Punkteraster
+    ANNOTATION = "annotation"
+    EVALUATION = "evaluation"      # Kriterien/Punkte
+    REPORT = "report"              # Gutachten
+    GRADE_DECISION = "grade_decision"
+
+    # === Zeugnisgenerator ===
+    ZEUGNIS = "zeugnis"                        # Zeugnisdokument
+    ZEUGNIS_VORLAGE = "zeugnis_vorlage"        # Zeugnisvorlage/Template
+    ZEUGNIS_ENTWURF = "zeugnis_entwurf"        # Zeugnisentwurf (vor Freigabe)
+    SCHUELER_DATEN = "schueler_daten"          # Schueler-Stammdaten, Noten
+    FACHNOTE = "fachnote"                      # Einzelne Fachnote
+    KOPFNOTE = "kopfnote"                      # Arbeits-/Sozialverhalten
+    FEHLZEITEN = "fehlzeiten"                  # Fehlzeiten
+    BEMERKUNG = "bemerkung"                    # Zeugnisbemerkungen
+    KONFERENZ_BESCHLUSS = "konferenz_beschluss"  # Konferenzergebnis
+    VERSETZUNG = "versetzung"                  # Versetzungsentscheidung
+
+    # === Allgemein ===
+    DOCUMENT = "document"          # Generischer Dokumenttyp (EH, Vorlagen, etc.)
+    TEMPLATE = "template"          # Generische Vorlagen
+    EXPORT = "export"
+    AUDIT_LOG = "audit_log"
+    KEY_MATERIAL = "key_material"
+
+
+class ZKVisibilityMode(str, Enum):
+    """Sichtbarkeitsmodus fuer Zweitkorrektoren."""
+    BLIND = "blind"    # ZK sieht keine EK-Note/Gutachten
+    SEMI = "semi"      # ZK sieht Annotationen, aber keine Note
+    FULL = "full"      # ZK sieht alles
+
+
+class EHVisibilityMode(str, Enum):
+    """Sichtbarkeitsmodus fuer Erwartungshorizonte."""
+    BLIND = "blind"    # ZK sieht EH nicht (selten)
+    SHARED = "shared"  # ZK sieht EH (Standard)
+
+
+class VerfahrenType(str, Enum):
+    """Verfahrenstypen fuer Klausuren und Zeugnisse."""
+
+    # === Klausur/Pruefungsverfahren ===
+    ABITUR = "abitur"
+    VORABITUR = "vorabitur"
+    KLAUSUR = "klausur"
+    NACHPRUEFUNG = "nachpruefung"
+
+    # === Zeugnisverfahren ===
+    HALBJAHRESZEUGNIS = "halbjahreszeugnis"
+    JAHRESZEUGNIS = "jahreszeugnis"
+    ABSCHLUSSZEUGNIS = "abschlusszeugnis"
+    ABGANGSZEUGNIS = "abgangszeugnis"
+
+    @classmethod
+    def is_exam_type(cls, verfahren: str) -> bool:
+        """Pruefe ob Verfahren ein Pruefungstyp ist."""
+        exam_types = {cls.ABITUR, cls.VORABITUR, cls.KLAUSUR, cls.NACHPRUEFUNG}
+        try:
+            return cls(verfahren) in exam_types
+        except ValueError:
+            return False
+
+    @classmethod
+    def is_certificate_type(cls, verfahren: str) -> bool:
+        """Pruefe ob Verfahren ein Zeugnistyp ist."""
+        cert_types = {cls.HALBJAHRESZEUGNIS, cls.JAHRESZEUGNIS, cls.ABSCHLUSSZEUGNIS, cls.ABGANGSZEUGNIS}
+        try:
+            return cls(verfahren) in cert_types
+        except ValueError:
+            return False
+
+
+# =============================================
+# DATA STRUCTURES
+# =============================================
+
+@dataclass
+class PolicySet:
+    """
+    Policy-Konfiguration pro Bundesland/Jahr/Fach.
+
+    Ermoeglicht bundesland-spezifische Unterschiede ohne
+    harte Codierung im Quellcode.
+
+    Unterstuetzte Verfahrenstypen:
+    - Pruefungen: abitur, vorabitur, klausur, nachpruefung
+    - Zeugnisse: halbjahreszeugnis, jahreszeugnis, abschlusszeugnis, abgangszeugnis
+    """
+    id: str
+    bundesland: str
+    jahr: int
+    fach: Optional[str]  # None = gilt fuer alle Faecher
+    verfahren: str       # See VerfahrenType enum
+
+    # Sichtbarkeitsregeln (Klausur)
+    zk_visibility_mode: ZKVisibilityMode = ZKVisibilityMode.FULL
+    eh_visibility_mode: EHVisibilityMode = EHVisibilityMode.SHARED
+
+    # EH-Quellen (Klausur)
+    allow_teacher_uploaded_eh: bool = True
+    allow_land_uploaded_eh: bool = True
+    require_rights_confirmation_on_upload: bool = True
+    require_dual_control_for_official_eh_update: bool = False
+
+    # Korrekturregeln (Klausur)
+    third_correction_threshold: int = 4  # Notenpunkte Abweichung
+    final_signoff_role: str = "fachvorsitz"
+
+    # Zeugnisregeln (Zeugnis)
+    require_klassenlehrer_approval: bool = True
+    require_schulleitung_signoff: bool = True
+    allow_sekretariat_edit_after_approval: bool = False
+    konferenz_protokoll_required: bool = True
+    bemerkungen_require_review: bool = True
+    fehlzeiten_auto_import: bool = True
+    kopfnoten_enabled: bool = False
+    versetzung_auto_calculate: bool = True
+
+    # Export & Anzeige
+    quote_verbatim_allowed: bool = False  # Amtliche Texte in UI
+    export_template_id: str = "default"
+
+    # Zusaetzliche Flags
+    flags: Dict[str, Any] = field(default_factory=dict)
+
+    created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+
+    def is_exam_policy(self) -> bool:
+        """Pruefe ob diese Policy fuer Pruefungen ist."""
+        return VerfahrenType.is_exam_type(self.verfahren)
+
+    def is_certificate_policy(self) -> bool:
+        """Pruefe ob diese Policy fuer Zeugnisse ist."""
+        return VerfahrenType.is_certificate_type(self.verfahren)
+
+    def to_dict(self):
+        d = asdict(self)
+        d['zk_visibility_mode'] = self.zk_visibility_mode.value
+        d['eh_visibility_mode'] = self.eh_visibility_mode.value
+        d['created_at'] = self.created_at.isoformat()
+        return d
+
+
+@dataclass
+class RoleAssignment:
+    """
+    Zuweisung einer Rolle zu einem User fuer eine spezifische Ressource.
+    """
+    id: str
+    user_id: str
+    role: Role
+    resource_type: ResourceType
+    resource_id: str
+
+    # Optionale Einschraenkungen
+    tenant_id: Optional[str] = None
+    namespace_id: Optional[str] = None
+
+    # Gueltigkeit
+    valid_from: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    valid_to: Optional[datetime] = None
+
+    # Metadaten
+    granted_by: str = ""
+    granted_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    revoked_at: Optional[datetime] = None
+
+    def is_active(self) -> bool:
+        now = datetime.now(timezone.utc)
+        if self.revoked_at:
+            return False
+        if self.valid_to and now > self.valid_to:
+            return False
+        return now >= self.valid_from
+
+    def to_dict(self):
+        return {
+            'id': self.id,
+            'user_id': self.user_id,
+            'role': self.role.value,
+            'resource_type': self.resource_type.value,
+            'resource_id': self.resource_id,
+            'tenant_id': self.tenant_id,
+            'namespace_id': self.namespace_id,
+            'valid_from': self.valid_from.isoformat(),
+            'valid_to': self.valid_to.isoformat() if self.valid_to else None,
+            'granted_by': self.granted_by,
+            'granted_at': self.granted_at.isoformat(),
+            'revoked_at': self.revoked_at.isoformat() if self.revoked_at else None,
+            'is_active': self.is_active()
+        }
+
+
+@dataclass
+class KeyShare:
+    """
+    Berechtigung fuer einen User, auf verschluesselte Inhalte zuzugreifen.
+
+    Ein KeyShare ist KEIN Schluessel im Klartext, sondern eine
+    Berechtigung in Verbindung mit Role Assignment.
+    """
+    id: str
+    user_id: str
+    package_id: str
+
+    # Berechtigungsumfang
+    permissions: Set[str] = field(default_factory=set)
+    # z.B. {"read_original", "read_eh", "read_ek_outputs", "write_annotations"}
+
+    # Optionale Einschraenkungen
+    scope: str = "full"  # "full", "original_only", "eh_only", "outputs_only"
+
+    # Kette
+    granted_by: str = ""
+    granted_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+
+    # Akzeptanz (fuer Invite-Flow)
+    invite_token: Optional[str] = None
+    accepted_at: Optional[datetime] = None
+
+    # Widerruf
+    revoked_at: Optional[datetime] = None
+    revoked_by: Optional[str] = None
+
+    def is_active(self) -> bool:
+        return self.revoked_at is None and (
+            self.invite_token is None or self.accepted_at is not None
+        )
+
+    def to_dict(self):
+        return {
+            'id': self.id,
+            'user_id': self.user_id,
+            'package_id': self.package_id,
+            'permissions': list(self.permissions),
+            'scope': self.scope,
+            'granted_by': self.granted_by,
+            'granted_at': self.granted_at.isoformat(),
+            'invite_token': self.invite_token,
+            'accepted_at': self.accepted_at.isoformat() if self.accepted_at else None,
+            'revoked_at': self.revoked_at.isoformat() if self.revoked_at else None,
+            'is_active': self.is_active()
+        }
+
+
+@dataclass
+class Tenant:
+    """
+    Hoechste Isolationseinheit - typischerweise eine Schule.
+    """
+    id: str
+    name: str
+    bundesland: str
+    tenant_type: str = "school"  # "school", "pruefungszentrum", "behoerde"
+
+    # Verschluesselung
+    encryption_enabled: bool = True
+
+    # Metadaten
+    created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    deleted_at: Optional[datetime] = None
+
+    def to_dict(self):
+        return {
+            'id': self.id,
+            'name': self.name,
+            'bundesland': self.bundesland,
+            'tenant_type': self.tenant_type,
+            'encryption_enabled': self.encryption_enabled,
+            'created_at': self.created_at.isoformat()
+        }
+
+
+@dataclass
+class Namespace:
+    """
+    Arbeitsraum innerhalb eines Tenants.
+    z.B. "Abitur 2026 - Deutsch LK - Kurs 12a"
+    """
+    id: str
+    tenant_id: str
+    name: str
+
+    # Kontext
+    jahr: int
+    fach: str
+    kurs: Optional[str] = None
+    pruefungsart: str = "abitur"  # "abitur", "vorabitur"
+
+    # Policy
+    policy_set_id: Optional[str] = None
+
+    # Metadaten
+    created_by: str = ""
+    created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    deleted_at: Optional[datetime] = None
+
+    def to_dict(self):
+        return {
+            'id': self.id,
+            'tenant_id': self.tenant_id,
+            'name': self.name,
+            'jahr': self.jahr,
+            'fach': self.fach,
+            'kurs': self.kurs,
+            'pruefungsart': self.pruefungsart,
+            'policy_set_id': self.policy_set_id,
+            'created_by': self.created_by,
+            'created_at': self.created_at.isoformat()
+        }
+
+
+@dataclass
+class ExamPackage:
+    """
+    Pruefungspaket - kompletter Satz Arbeiten mit allen Artefakten.
+    """
+    id: str
+    namespace_id: str
+    tenant_id: str
+
+    name: str
+    beschreibung: Optional[str] = None
+
+    # Workflow-Status
+    status: str = "draft"  # "draft", "in_progress", "locked", "signed_off"
+
+    # Beteiligte (Rollen werden separat zugewiesen)
+    owner_id: str = ""  # Typischerweise EK
+
+    # Verschluesselung
+    encryption_key_id: Optional[str] = None
+
+    # Timestamps
+    created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    locked_at: Optional[datetime] = None
+    signed_off_at: Optional[datetime] = None
+    signed_off_by: Optional[str] = None
+
+    def to_dict(self):
+        return {
+            'id': self.id,
+            'namespace_id': self.namespace_id,
+            'tenant_id': self.tenant_id,
+            'name': self.name,
+            'beschreibung': self.beschreibung,
+            'status': self.status,
+            'owner_id': self.owner_id,
+            'created_at': self.created_at.isoformat(),
+            'locked_at': self.locked_at.isoformat() if self.locked_at else None,
+            'signed_off_at': self.signed_off_at.isoformat() if self.signed_off_at else None,
+            'signed_off_by': self.signed_off_by
+        }
@@ -0,0 +1,343 @@
+"""
+BYOEH Invitation Flow Routes
+
+Endpoints for inviting users, listing/accepting/declining/revoking
+invitations to access Erwartungshorizonte.
+Extracted from routes/eh.py for file-size compliance.
+"""
+
+import uuid
+from datetime import datetime, timezone, timedelta
+
+from fastapi import APIRouter, HTTPException, Request
+
+from models.eh import EHKeyShare, EHShareInvitation
+from models.requests import EHInviteRequest, EHAcceptInviteRequest
+from services.auth_service import get_current_user
+from services.eh_service import log_eh_audit
+import storage
+
+router = APIRouter()
+
+
+# =============================================
+# INVITATION FLOW
+# =============================================
+
+@router.post("/api/v1/eh/{eh_id}/invite")
+async def invite_to_eh(
+    eh_id: str,
+    invite_request: EHInviteRequest,
+    request: Request
+):
+    """
+    Invite another user to access an Erwartungshorizont.
+
+    This creates a pending invitation that the recipient must accept.
+    """
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+
+    # Check EH exists and belongs to user
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+    if eh.teacher_id != user["user_id"]:
+        raise HTTPException(status_code=403, detail="Only the owner can invite others")
+
+    # Validate role
+    valid_roles = ['second_examiner', 'third_examiner', 'supervisor', 'department_head', 'fachvorsitz']
+    if invite_request.role not in valid_roles:
+        raise HTTPException(status_code=400, detail=f"Invalid role. Must be one of: {valid_roles}")
+
+    # Check for existing pending invitation to same user
+    for inv in storage.eh_invitations_db.values():
+        if (inv.eh_id == eh_id and
+            inv.invitee_email == invite_request.invitee_email and
+            inv.status == 'pending'):
+            raise HTTPException(
+                status_code=409,
+                detail="Pending invitation already exists for this user"
+            )
+
+    # Create invitation
+    invitation_id = str(uuid.uuid4())
+    now = datetime.now(timezone.utc)
+    expires_at = now + timedelta(days=invite_request.expires_in_days)
+
+    invitation = EHShareInvitation(
+        id=invitation_id,
+        eh_id=eh_id,
+        inviter_id=user["user_id"],
+        invitee_id=invite_request.invitee_id or "",
+        invitee_email=invite_request.invitee_email,
+        role=invite_request.role,
+        klausur_id=invite_request.klausur_id,
+        message=invite_request.message,
+        status='pending',
+        expires_at=expires_at,
+        created_at=now,
+        accepted_at=None,
+        declined_at=None
+    )
+
+    storage.eh_invitations_db[invitation_id] = invitation
+
+    # Audit log
+    log_eh_audit(
+        tenant_id=tenant_id,
+        user_id=user["user_id"],
+        action="invite",
+        eh_id=eh_id,
+        details={
+            "invitation_id": invitation_id,
+            "invitee_email": invite_request.invitee_email,
+            "role": invite_request.role,
+            "expires_at": expires_at.isoformat()
+        }
+    )
+
+    return {
+        "status": "invited",
+        "invitation_id": invitation_id,
+        "eh_id": eh_id,
+        "invitee_email": invite_request.invitee_email,
+        "role": invite_request.role,
+        "expires_at": expires_at.isoformat(),
+        "eh_title": eh.title
+    }
+
+
+@router.get("/api/v1/eh/invitations/pending")
+async def list_pending_invitations(request: Request):
+    """List all pending invitations for the current user."""
+    user = get_current_user(request)
+    user_email = user.get("email", "")
+    user_id = user["user_id"]
+    now = datetime.now(timezone.utc)
+
+    pending = []
+    for inv in storage.eh_invitations_db.values():
+        # Match by email or user_id
+        if (inv.invitee_email == user_email or inv.invitee_id == user_id):
+            if inv.status == 'pending' and inv.expires_at > now:
+                # Get EH info
+                eh_info = None
+                if inv.eh_id in storage.eh_db:
+                    eh = storage.eh_db[inv.eh_id]
+                    eh_info = {
+                        "id": eh.id,
+                        "title": eh.title,
+                        "subject": eh.subject,
+                        "niveau": eh.niveau,
+                        "year": eh.year
+                    }
+
+                pending.append({
+                    "invitation": inv.to_dict(),
+                    "eh": eh_info
+                })
+
+    return pending
+
+
+@router.get("/api/v1/eh/invitations/sent")
+async def list_sent_invitations(request: Request):
+    """List all invitations sent by the current user."""
+    user = get_current_user(request)
+    user_id = user["user_id"]
+
+    sent = []
+    for inv in storage.eh_invitations_db.values():
+        if inv.inviter_id == user_id:
+            # Get EH info
+            eh_info = None
+            if inv.eh_id in storage.eh_db:
+                eh = storage.eh_db[inv.eh_id]
+                eh_info = {
+                    "id": eh.id,
+                    "title": eh.title,
+                    "subject": eh.subject
+                }
+
+            sent.append({
+                "invitation": inv.to_dict(),
+                "eh": eh_info
+            })
+
+    return sent
+
+
+@router.post("/api/v1/eh/invitations/{invitation_id}/accept")
+async def accept_eh_invitation(
+    invitation_id: str,
+    accept_request: EHAcceptInviteRequest,
+    request: Request
+):
+    """Accept an invitation and receive access to the EH."""
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+    user_email = user.get("email", "")
+    user_id = user["user_id"]
+    now = datetime.now(timezone.utc)
+
+    # Find invitation
+    if invitation_id not in storage.eh_invitations_db:
+        raise HTTPException(status_code=404, detail="Invitation not found")
+
+    invitation = storage.eh_invitations_db[invitation_id]
+
+    # Verify recipient
+    if invitation.invitee_email != user_email and invitation.invitee_id != user_id:
+        raise HTTPException(status_code=403, detail="This invitation is not for you")
+
+    # Check status
+    if invitation.status != 'pending':
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invitation is {invitation.status}, cannot accept"
+        )
+
+    # Check expiration
+    if invitation.expires_at < now:
+        invitation.status = 'expired'
+        raise HTTPException(status_code=400, detail="Invitation has expired")
+
+    # Create key share
+    share_id = str(uuid.uuid4())
+    key_share = EHKeyShare(
+        id=share_id,
+        eh_id=invitation.eh_id,
+        user_id=user_id,
+        encrypted_passphrase=accept_request.encrypted_passphrase,
+        passphrase_hint="",
+        granted_by=invitation.inviter_id,
+        granted_at=now,
+        role=invitation.role,
+        klausur_id=invitation.klausur_id,
+        active=True
+    )
+
+    # Store key share
+    if invitation.eh_id not in storage.eh_key_shares_db:
+        storage.eh_key_shares_db[invitation.eh_id] = []
+    storage.eh_key_shares_db[invitation.eh_id].append(key_share)
+
+    # Update invitation status
+    invitation.status = 'accepted'
+    invitation.accepted_at = now
+    invitation.invitee_id = user_id  # Update with actual user ID
+
+    # Audit log
+    log_eh_audit(
+        tenant_id=tenant_id,
+        user_id=user_id,
+        action="accept_invite",
+        eh_id=invitation.eh_id,
+        details={
+            "invitation_id": invitation_id,
+            "share_id": share_id,
+            "role": invitation.role
+        }
+    )
+
+    return {
+        "status": "accepted",
+        "share_id": share_id,
+        "eh_id": invitation.eh_id,
+        "role": invitation.role,
+        "klausur_id": invitation.klausur_id
+    }
+
+
+@router.post("/api/v1/eh/invitations/{invitation_id}/decline")
+async def decline_eh_invitation(invitation_id: str, request: Request):
+    """Decline an invitation."""
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+    user_email = user.get("email", "")
+    user_id = user["user_id"]
+    now = datetime.now(timezone.utc)
+
+    # Find invitation
+    if invitation_id not in storage.eh_invitations_db:
+        raise HTTPException(status_code=404, detail="Invitation not found")
+
+    invitation = storage.eh_invitations_db[invitation_id]
+
+    # Verify recipient
+    if invitation.invitee_email != user_email and invitation.invitee_id != user_id:
+        raise HTTPException(status_code=403, detail="This invitation is not for you")
+
+    # Check status
+    if invitation.status != 'pending':
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invitation is {invitation.status}, cannot decline"
+        )
+
+    # Update status
+    invitation.status = 'declined'
+    invitation.declined_at = now
+
+    # Audit log
+    log_eh_audit(
+        tenant_id=tenant_id,
+        user_id=user_id,
+        action="decline_invite",
+        eh_id=invitation.eh_id,
+        details={"invitation_id": invitation_id}
+    )
+
+    return {
+        "status": "declined",
+        "invitation_id": invitation_id,
+        "eh_id": invitation.eh_id
+    }
+
+
+@router.delete("/api/v1/eh/invitations/{invitation_id}")
+async def revoke_eh_invitation(invitation_id: str, request: Request):
+    """Revoke a pending invitation (by the inviter)."""
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+    user_id = user["user_id"]
+
+    # Find invitation
+    if invitation_id not in storage.eh_invitations_db:
+        raise HTTPException(status_code=404, detail="Invitation not found")
+
+    invitation = storage.eh_invitations_db[invitation_id]
+
+    # Verify inviter
+    if invitation.inviter_id != user_id:
+        raise HTTPException(status_code=403, detail="Only the inviter can revoke")
+
+    # Check status
+    if invitation.status != 'pending':
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invitation is {invitation.status}, cannot revoke"
+        )
+
+    # Update status
+    invitation.status = 'revoked'
+
+    # Audit log
+    log_eh_audit(
+        tenant_id=tenant_id,
+        user_id=user_id,
+        action="revoke_invite",
+        eh_id=invitation.eh_id,
+        details={
+            "invitation_id": invitation_id,
+            "invitee_email": invitation.invitee_email
+        }
+    )
+
+    return {
+        "status": "revoked",
+        "invitation_id": invitation_id,
+        "eh_id": invitation.eh_id
+    }
@@ -0,0 +1,347 @@
+"""
+BYOEH Key Sharing and Klausur Linking Routes
+
+Endpoints for sharing EH access with other examiners
+and linking EH to Klausuren.
+Extracted from routes/eh.py for file-size compliance.
+"""
+
+import uuid
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, HTTPException, Request
+
+from models.eh import EHKeyShare, EHKlausurLink
+from models.requests import EHShareRequest, EHLinkKlausurRequest
+from services.auth_service import get_current_user
+from services.eh_service import log_eh_audit
+import storage
+
+router = APIRouter()
+
+
+# =============================================
+# BYOEH KEY SHARING
+# =============================================
+
+@router.post("/api/v1/eh/{eh_id}/share")
+async def share_erwartungshorizont(
+    eh_id: str,
+    share_request: EHShareRequest,
+    request: Request
+):
+    """
+    Share an Erwartungshorizont with another examiner.
+
+    The first examiner shares their EH by providing an encrypted passphrase
+    that the recipient can use.
+    """
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+
+    # Check EH exists and belongs to user
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+    if eh.teacher_id != user["user_id"]:
+        raise HTTPException(status_code=403, detail="Only the owner can share this EH")
+
+    # Validate role
+    valid_roles = ['second_examiner', 'third_examiner', 'supervisor', 'department_head']
+    if share_request.role not in valid_roles:
+        raise HTTPException(status_code=400, detail=f"Invalid role. Must be one of: {valid_roles}")
+
+    # Create key share entry
+    share_id = str(uuid.uuid4())
+    key_share = EHKeyShare(
+        id=share_id,
+        eh_id=eh_id,
+        user_id=share_request.user_id,
+        encrypted_passphrase=share_request.encrypted_passphrase,
+        passphrase_hint=share_request.passphrase_hint or "",
+        granted_by=user["user_id"],
+        granted_at=datetime.now(timezone.utc),
+        role=share_request.role,
+        klausur_id=share_request.klausur_id,
+        active=True
+    )
+
+    # Store in memory
+    if eh_id not in storage.eh_key_shares_db:
+        storage.eh_key_shares_db[eh_id] = []
+    storage.eh_key_shares_db[eh_id].append(key_share)
+
+    # Audit log
+    log_eh_audit(
+        tenant_id=tenant_id,
+        user_id=user["user_id"],
+        action="share",
+        eh_id=eh_id,
+        details={
+            "shared_with": share_request.user_id,
+            "role": share_request.role,
+            "klausur_id": share_request.klausur_id
+        }
+    )
+
+    return {
+        "status": "shared",
+        "share_id": share_id,
+        "eh_id": eh_id,
+        "shared_with": share_request.user_id,
+        "role": share_request.role
+    }
+
+
+@router.get("/api/v1/eh/{eh_id}/shares")
+async def list_eh_shares(eh_id: str, request: Request):
+    """List all users who have access to an EH."""
+    user = get_current_user(request)
+
+    # Check EH exists and belongs to user
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+    if eh.teacher_id != user["user_id"]:
+        raise HTTPException(status_code=403, detail="Only the owner can view shares")
+
+    shares = storage.eh_key_shares_db.get(eh_id, [])
+    return [share.to_dict() for share in shares if share.active]
+
+
+@router.delete("/api/v1/eh/{eh_id}/shares/{share_id}")
+async def revoke_eh_share(eh_id: str, share_id: str, request: Request):
+    """Revoke a shared EH access."""
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+
+    # Check EH exists and belongs to user
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+    if eh.teacher_id != user["user_id"]:
+        raise HTTPException(status_code=403, detail="Only the owner can revoke shares")
+
+    # Find and deactivate share
+    shares = storage.eh_key_shares_db.get(eh_id, [])
+    for share in shares:
+        if share.id == share_id:
+            share.active = False
+
+            log_eh_audit(
+                tenant_id=tenant_id,
+                user_id=user["user_id"],
+                action="revoke_share",
+                eh_id=eh_id,
+                details={"revoked_user": share.user_id, "share_id": share_id}
+            )
+
+            return {"status": "revoked", "share_id": share_id}
+
+    raise HTTPException(status_code=404, detail="Share not found")
+
+
+# =============================================
+# KLAUSUR LINKING
+# =============================================
+
+@router.post("/api/v1/eh/{eh_id}/link-klausur")
+async def link_eh_to_klausur(
+    eh_id: str,
+    link_request: EHLinkKlausurRequest,
+    request: Request
+):
+    """
+    Link an Erwartungshorizont to a Klausur.
+
+    This creates an association between the EH and a specific Klausur.
+    """
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+
+    # Check EH exists and user has access
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+    user_has_access = (
+        eh.teacher_id == user["user_id"] or
+        any(
+            share.user_id == user["user_id"] and share.active
+            for share in storage.eh_key_shares_db.get(eh_id, [])
+        )
+    )
+
+    if not user_has_access:
+        raise HTTPException(status_code=403, detail="No access to this EH")
+
+    # Check Klausur exists
+    klausur_id = link_request.klausur_id
+    if klausur_id not in storage.klausuren_db:
+        raise HTTPException(status_code=404, detail="Klausur not found")
+
+    # Create link
+    link_id = str(uuid.uuid4())
+    link = EHKlausurLink(
+        id=link_id,
+        eh_id=eh_id,
+        klausur_id=klausur_id,
+        linked_by=user["user_id"],
+        linked_at=datetime.now(timezone.utc)
+    )
+
+    if klausur_id not in storage.eh_klausur_links_db:
+        storage.eh_klausur_links_db[klausur_id] = []
+    storage.eh_klausur_links_db[klausur_id].append(link)
+
+    # Audit log
+    log_eh_audit(
+        tenant_id=tenant_id,
+        user_id=user["user_id"],
+        action="link_klausur",
+        eh_id=eh_id,
+        details={"klausur_id": klausur_id}
+    )
+
+    return {
+        "status": "linked",
+        "link_id": link_id,
+        "eh_id": eh_id,
+        "klausur_id": klausur_id
+    }
+
+
+@router.get("/api/v1/klausuren/{klausur_id}/linked-eh")
+async def get_linked_eh(klausur_id: str, request: Request):
+    """Get all EH linked to a specific Klausur."""
+    user = get_current_user(request)
+    user_id = user["user_id"]
+
+    # Check Klausur exists
+    if klausur_id not in storage.klausuren_db:
+        raise HTTPException(status_code=404, detail="Klausur not found")
+
+    # Get all links for this Klausur
+    links = storage.eh_klausur_links_db.get(klausur_id, [])
+
+    linked_ehs = []
+    for link in links:
+        if link.eh_id in storage.eh_db:
+            eh = storage.eh_db[link.eh_id]
+
+            # Check if user has access to this EH
+            is_owner = eh.teacher_id == user_id
+            is_shared = any(
+                share.user_id == user_id and share.active
+                for share in storage.eh_key_shares_db.get(link.eh_id, [])
+            )
+
+            if is_owner or is_shared:
+                # Find user's share info if shared
+                share_info = None
+                if is_shared:
+                    for share in storage.eh_key_shares_db.get(link.eh_id, []):
+                        if share.user_id == user_id and share.active:
+                            share_info = share.to_dict()
+                            break
+
+                linked_ehs.append({
+                    "eh": eh.to_dict(),
+                    "link": link.to_dict(),
+                    "is_owner": is_owner,
+                    "share": share_info
+                })
+
+    return linked_ehs
+
+
+@router.delete("/api/v1/eh/{eh_id}/link-klausur/{klausur_id}")
+async def unlink_eh_from_klausur(eh_id: str, klausur_id: str, request: Request):
+    """Remove the link between an EH and a Klausur."""
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+
+    # Check EH exists and user has access
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+    if eh.teacher_id != user["user_id"]:
+        raise HTTPException(status_code=403, detail="Only the owner can unlink")
+
+    # Find and remove link
+    links = storage.eh_klausur_links_db.get(klausur_id, [])
+    for i, link in enumerate(links):
+        if link.eh_id == eh_id:
+            del links[i]
+
+            log_eh_audit(
+                tenant_id=tenant_id,
+                user_id=user["user_id"],
+                action="unlink_klausur",
+                eh_id=eh_id,
+                details={"klausur_id": klausur_id}
+            )
+
+            return {"status": "unlinked", "eh_id": eh_id, "klausur_id": klausur_id}
+
+    raise HTTPException(status_code=404, detail="Link not found")
+
+
+@router.get("/api/v1/eh/{eh_id}/access-chain")
+async def get_eh_access_chain(eh_id: str, request: Request):
+    """
+    Get the complete access chain for an EH.
+
+    Shows the correction chain: EK -> ZK -> DK -> FVL
+    with their current access status.
+    """
+    user = get_current_user(request)
+
+    # Check EH exists
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+
+    # Check access - owner or shared user
+    is_owner = eh.teacher_id == user["user_id"]
+    is_shared = any(
+        share.user_id == user["user_id"] and share.active
+        for share in storage.eh_key_shares_db.get(eh_id, [])
+    )
+
+    if not is_owner and not is_shared:
+        raise HTTPException(status_code=403, detail="No access to this EH")
+
+    # Build access chain
+    chain = {
+        "eh_id": eh_id,
+        "eh_title": eh.title,
+        "owner": {
+            "user_id": eh.teacher_id,
+            "role": "erstkorrektor"
+        },
+        "active_shares": [],
+        "pending_invitations": [],
+        "revoked_shares": []
+    }
+
+    # Active shares
+    for share in storage.eh_key_shares_db.get(eh_id, []):
+        share_dict = share.to_dict()
+        if share.active:
+            chain["active_shares"].append(share_dict)
+        else:
+            chain["revoked_shares"].append(share_dict)
+
+    # Pending invitations (only for owner)
+    if is_owner:
+        for inv in storage.eh_invitations_db.values():
+            if inv.eh_id == eh_id and inv.status == 'pending':
+                chain["pending_invitations"].append(inv.to_dict())
+
+    return chain
@@ -0,0 +1,455 @@
+"""
+BYOEH Upload, List, and Core CRUD Routes
+
+Endpoints for uploading, listing, getting, deleting,
+indexing, and RAG-querying Erwartungshorizonte.
+Extracted from routes/eh.py for file-size compliance.
+"""
+
+import os
+import uuid
+import json
+from datetime import datetime, timezone
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, Request, UploadFile, File, Form, BackgroundTasks
+
+from models.enums import EHStatus
+from models.eh import (
+    Erwartungshorizont,
+    EHRightsConfirmation,
+)
+from models.requests import (
+    EHUploadMetadata,
+    EHRAGQuery,
+    EHIndexRequest,
+)
+from services.auth_service import get_current_user
+from services.eh_service import log_eh_audit
+from config import EH_UPLOAD_DIR, OPENAI_API_KEY, ENVIRONMENT, RIGHTS_CONFIRMATION_TEXT
+import storage
+
+# BYOEH imports
+from qdrant_service import (
+    get_collection_info, delete_eh_vectors, search_eh, index_eh_chunks
+)
+from eh_pipeline import (
+    decrypt_text, verify_key_hash, process_eh_for_indexing,
+    generate_single_embedding, EncryptionError, EmbeddingError
+)
+
+router = APIRouter()
+
+
+# =============================================
+# EH UPLOAD & LIST
+# =============================================
+
+@router.post("/api/v1/eh/upload")
+async def upload_erwartungshorizont(
+    file: UploadFile = File(...),
+    metadata_json: str = Form(...),
+    request: Request = None,
+    background_tasks: BackgroundTasks = None
+):
+    """
+    Upload an encrypted Erwartungshorizont.
+
+    The file MUST be client-side encrypted.
+    Server stores only the encrypted blob + key hash (never the passphrase).
+    """
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+
+    try:
+        data = EHUploadMetadata(**json.loads(metadata_json))
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Invalid metadata: {str(e)}")
+
+    if not data.rights_confirmed:
+        raise HTTPException(status_code=400, detail="Rights confirmation required")
+
+    eh_id = str(uuid.uuid4())
+
+    # Create tenant-isolated directory
+    upload_dir = f"{EH_UPLOAD_DIR}/{tenant_id}/{eh_id}"
+    os.makedirs(upload_dir, exist_ok=True)
+
+    # Save encrypted file
+    encrypted_path = f"{upload_dir}/encrypted.bin"
+    content = await file.read()
+    with open(encrypted_path, "wb") as f:
+        f.write(content)
+
+    # Save salt separately
+    with open(f"{upload_dir}/salt.txt", "w") as f:
+        f.write(data.salt)
+
+    # Create EH record
+    eh = Erwartungshorizont(
+        id=eh_id,
+        tenant_id=tenant_id,
+        teacher_id=user["user_id"],
+        title=data.metadata.title,
+        subject=data.metadata.subject,
+        niveau=data.metadata.niveau,
+        year=data.metadata.year,
+        aufgaben_nummer=data.metadata.aufgaben_nummer,
+        encryption_key_hash=data.encryption_key_hash,
+        salt=data.salt,
+        encrypted_file_path=encrypted_path,
+        file_size_bytes=len(content),
+        original_filename=data.original_filename,
+        rights_confirmed=True,
+        rights_confirmed_at=datetime.now(timezone.utc),
+        status=EHStatus.PENDING_RIGHTS,
+        chunk_count=0,
+        indexed_at=None,
+        error_message=None,
+        training_allowed=False,  # ALWAYS FALSE - critical for compliance
+        created_at=datetime.now(timezone.utc),
+        deleted_at=None
+    )
+
+    storage.eh_db[eh_id] = eh
+
+    # Store rights confirmation
+    rights_confirmation = EHRightsConfirmation(
+        id=str(uuid.uuid4()),
+        eh_id=eh_id,
+        teacher_id=user["user_id"],
+        confirmation_type="upload",
+        confirmation_text=RIGHTS_CONFIRMATION_TEXT,
+        ip_address=request.client.host if request.client else None,
+        user_agent=request.headers.get("user-agent"),
+        confirmed_at=datetime.now(timezone.utc)
+    )
+    storage.eh_rights_db[rights_confirmation.id] = rights_confirmation
+
+    # Audit log
+    log_eh_audit(
+        tenant_id=tenant_id,
+        user_id=user["user_id"],
+        action="upload",
+        eh_id=eh_id,
+        details={
+            "subject": data.metadata.subject,
+            "year": data.metadata.year,
+            "file_size": len(content)
+        },
+        ip_address=request.client.host if request.client else None,
+        user_agent=request.headers.get("user-agent")
+    )
+
+    return eh.to_dict()
+
+
+@router.get("/api/v1/eh")
+async def list_erwartungshorizonte(
+    request: Request,
+    subject: Optional[str] = None,
+    year: Optional[int] = None
+):
+    """List all Erwartungshorizonte for the current teacher."""
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+
+    results = []
+    for eh in storage.eh_db.values():
+        if eh.tenant_id == tenant_id and eh.deleted_at is None:
+            if subject and eh.subject != subject:
+                continue
+            if year and eh.year != year:
+                continue
+            results.append(eh.to_dict())
+
+    return results
+
+
+# =============================================
+# SPECIFIC EH ROUTES (must come before {eh_id} catch-all)
+# =============================================
+
+@router.get("/api/v1/eh/audit-log")
+async def get_eh_audit_log(
+    request: Request,
+    eh_id: Optional[str] = None,
+    limit: int = 100
+):
+    """Get BYOEH audit log entries."""
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+
+    # Filter by tenant
+    entries = [e for e in storage.eh_audit_db if e.tenant_id == tenant_id]
+
+    # Filter by EH if specified
+    if eh_id:
+        entries = [e for e in entries if e.eh_id == eh_id]
+
+    # Sort and limit
+    entries = sorted(entries, key=lambda e: e.created_at, reverse=True)[:limit]
+
+    return [e.to_dict() for e in entries]
+
+
+@router.get("/api/v1/eh/rights-text")
+async def get_rights_confirmation_text():
+    """Get the rights confirmation text for display in UI."""
+    return {
+        "text": RIGHTS_CONFIRMATION_TEXT,
+        "version": "v1.0"
+    }
+
+
+@router.get("/api/v1/eh/qdrant-status")
+async def get_qdrant_status(request: Request):
+    """Get Qdrant collection status (admin only)."""
+    user = get_current_user(request)
+    if user.get("role") != "admin" and ENVIRONMENT != "development":
+        raise HTTPException(status_code=403, detail="Admin access required")
+
+    return await get_collection_info()
+
+
+@router.get("/api/v1/eh/shared-with-me")
+async def list_shared_eh(request: Request):
+    """List all EH shared with the current user."""
+    user = get_current_user(request)
+    user_id = user["user_id"]
+
+    shared_ehs = []
+    for eh_id, shares in storage.eh_key_shares_db.items():
+        for share in shares:
+            if share.user_id == user_id and share.active:
+                if eh_id in storage.eh_db:
+                    eh = storage.eh_db[eh_id]
+                    shared_ehs.append({
+                        "eh": eh.to_dict(),
+                        "share": share.to_dict()
+                    })
+
+    return shared_ehs
+
+
+# =============================================
+# GENERIC EH ROUTES
+# =============================================
+
+@router.get("/api/v1/eh/{eh_id}")
+async def get_erwartungshorizont(eh_id: str, request: Request):
+    """Get a specific Erwartungshorizont by ID."""
+    user = get_current_user(request)
+
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+    if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    if eh.deleted_at is not None:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont was deleted")
+
+    return eh.to_dict()
+
+
+@router.delete("/api/v1/eh/{eh_id}")
+async def delete_erwartungshorizont(eh_id: str, request: Request):
+    """Soft-delete an Erwartungshorizont and remove vectors from Qdrant."""
+    user = get_current_user(request)
+
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+    if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Soft delete
+    eh.deleted_at = datetime.now(timezone.utc)
+
+    # Delete vectors from Qdrant
+    try:
+        deleted_count = await delete_eh_vectors(eh_id)
+        print(f"Deleted {deleted_count} vectors for EH {eh_id}")
+    except Exception as e:
+        print(f"Warning: Failed to delete vectors: {e}")
+
+    # Audit log
+    log_eh_audit(
+        tenant_id=eh.tenant_id,
+        user_id=user["user_id"],
+        action="delete",
+        eh_id=eh_id,
+        ip_address=request.client.host if request.client else None,
+        user_agent=request.headers.get("user-agent")
+    )
+
+    return {"status": "deleted", "id": eh_id}
+
+
+@router.post("/api/v1/eh/{eh_id}/index")
+async def index_erwartungshorizont(
+    eh_id: str,
+    data: EHIndexRequest,
+    request: Request
+):
+    """
+    Index an Erwartungshorizont for RAG queries.
+
+    Requires the passphrase to decrypt, chunk, embed, and re-encrypt chunks.
+    The passphrase is only used transiently and never stored.
+    """
+    user = get_current_user(request)
+
+    if eh_id not in storage.eh_db:
+        raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
+
+    eh = storage.eh_db[eh_id]
+    if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Verify passphrase matches key hash
+    if not verify_key_hash(data.passphrase, eh.salt, eh.encryption_key_hash):
+        raise HTTPException(status_code=401, detail="Invalid passphrase")
+
+    eh.status = EHStatus.PROCESSING
+
+    try:
+        # Read encrypted file
+        with open(eh.encrypted_file_path, "rb") as f:
+            encrypted_content = f.read()
+
+        # Decrypt the file
+        decrypted_text = decrypt_text(
+            encrypted_content.decode('utf-8'),
+            data.passphrase,
+            eh.salt
+        )
+
+        # Process for indexing
+        chunk_count, chunks_data = await process_eh_for_indexing(
+            eh_id=eh_id,
+            tenant_id=eh.tenant_id,
+            subject=eh.subject,
+            text_content=decrypted_text,
+            passphrase=data.passphrase,
+            salt_hex=eh.salt
+        )
+
+        # Index in Qdrant
+        await index_eh_chunks(
+            eh_id=eh_id,
+            tenant_id=eh.tenant_id,
+            subject=eh.subject,
+            chunks=chunks_data
+        )
+
+        # Update EH record
+        eh.status = EHStatus.INDEXED
+        eh.chunk_count = chunk_count
+        eh.indexed_at = datetime.now(timezone.utc)
+
+        # Audit log
+        log_eh_audit(
+            tenant_id=eh.tenant_id,
+            user_id=user["user_id"],
+            action="indexed",
+            eh_id=eh_id,
+            details={"chunk_count": chunk_count}
+        )
+
+        return {
+            "status": "indexed",
+            "id": eh_id,
+            "chunk_count": chunk_count
+        }
+
+    except EncryptionError as e:
+        eh.status = EHStatus.ERROR
+        eh.error_message = str(e)
+        raise HTTPException(status_code=400, detail=f"Decryption failed: {str(e)}")
+    except EmbeddingError as e:
+        eh.status = EHStatus.ERROR
+        eh.error_message = str(e)
+        raise HTTPException(status_code=500, detail=f"Embedding generation failed: {str(e)}")
+    except Exception as e:
+        eh.status = EHStatus.ERROR
+        eh.error_message = str(e)
+        raise HTTPException(status_code=500, detail=f"Indexing failed: {str(e)}")
+
+
+@router.post("/api/v1/eh/rag-query")
+async def rag_query_eh(data: EHRAGQuery, request: Request):
+    """
+    RAG query against teacher's Erwartungshorizonte.
+
+    1. Semantic search in Qdrant (tenant-isolated)
+    2. Decrypt relevant chunks on-the-fly
+    3. Return context for LLM usage
+    """
+    user = get_current_user(request)
+    tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
+
+    if not OPENAI_API_KEY:
+        raise HTTPException(status_code=500, detail="OpenAI API key not configured")
+
+    try:
+        # Generate embedding for query
+        query_embedding = await generate_single_embedding(data.query_text)
+
+        # Search in Qdrant (tenant-isolated)
+        results = await search_eh(
+            query_embedding=query_embedding,
+            tenant_id=tenant_id,
+            subject=data.subject,
+            limit=data.limit
+        )
+
+        # Decrypt matching chunks
+        decrypted_chunks = []
+        for r in results:
+            eh = storage.eh_db.get(r["eh_id"])
+            if eh and r.get("encrypted_content"):
+                try:
+                    decrypted = decrypt_text(
+                        r["encrypted_content"],
+                        data.passphrase,
+                        eh.salt
+                    )
+                    decrypted_chunks.append({
+                        "text": decrypted,
+                        "eh_id": r["eh_id"],
+                        "eh_title": eh.title,
+                        "chunk_index": r["chunk_index"],
+                        "score": r["score"]
+                    })
+                except EncryptionError:
+                    # Skip chunks that can't be decrypted (wrong passphrase for different EH)
+                    pass
+
+        # Audit log
+        log_eh_audit(
+            tenant_id=tenant_id,
+            user_id=user["user_id"],
+            action="rag_query",
+            details={
+                "query_length": len(data.query_text),
+                "results_count": len(results),
+                "decrypted_count": len(decrypted_chunks)
+            },
+            ip_address=request.client.host if request.client else None,
+            user_agent=request.headers.get("user-agent")
+        )
+
+        return {
+            "context": "\n\n---\n\n".join([c["text"] for c in decrypted_chunks]),
+            "sources": decrypted_chunks,
+            "query": data.query_text
+        }
+
+    except EmbeddingError as e:
+        raise HTTPException(status_code=500, detail=f"Query embedding failed: {str(e)}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"RAG query failed: {str(e)}")