Restructure: Move 52 files into 7 domain packages

korrektur/ zeugnis/ admin/ compliance/ worksheet/ training/ metrics/ 52 shims, relative imports, RAG untouched. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 22:10:48 +02:00
parent 0504d22b8e
commit 165c493d1e
111 changed files with 11859 additions and 11609 deletions
--- a/klausur-service/backend/worksheet/init.py
+++ b/klausur-service/backend/worksheet/init.py
@@ -0,0 +1,6 @@
+"""
+worksheet package — worksheet editor, NRU generator, cleanup.
+
+Backward-compatible re-exports: consumers can still use
+``from worksheet_editor_api import ...`` etc. via the shim files in backend/.
+"""
--- a/klausur-service/backend/worksheet/cleanup_api.py
+++ b/klausur-service/backend/worksheet/cleanup_api.py
@@ -0,0 +1,491 @@
+"""
+Worksheet Cleanup API - Handschrift-Entfernung und Layout-Rekonstruktion
+
+Endpoints:
+- POST /api/v1/worksheet/detect-handwriting - Erkennt Handschrift und gibt Maske zurueck
+- POST /api/v1/worksheet/remove-handwriting - Entfernt Handschrift aus Bild
+- POST /api/v1/worksheet/reconstruct - Rekonstruiert Layout als Fabric.js JSON
+- POST /api/v1/worksheet/cleanup-pipeline - Vollstaendige Pipeline (Erkennung + Entfernung + Layout)
+
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal auf dem Mac Mini.
+"""
+
+import io
+import base64
+import logging
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, UploadFile, File, Form
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel
+
+from services.handwriting_detection import (
+    detect_handwriting,
+    detect_handwriting_regions,
+    mask_to_png
+)
+from services.inpainting_service import (
+    inpaint_image,
+    remove_handwriting,
+    InpaintingMethod,
+    check_lama_available
+)
+from services.layout_reconstruction_service import (
+    reconstruct_layout,
+    layout_to_fabric_json,
+    reconstruct_and_clean
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/worksheet", tags=["Worksheet Cleanup"])
+
+
+# =============================================================================
+# Pydantic Models
+# =============================================================================
+
+class DetectionResponse(BaseModel):
+    has_handwriting: bool
+    confidence: float
+    handwriting_ratio: float
+    detection_method: str
+    mask_base64: Optional[str] = None
+
+
+class InpaintingResponse(BaseModel):
+    success: bool
+    method_used: str
+    processing_time_ms: float
+    image_base64: Optional[str] = None
+    error: Optional[str] = None
+
+
+class ReconstructionResponse(BaseModel):
+    success: bool
+    element_count: int
+    page_width: int
+    page_height: int
+    fabric_json: dict
+    table_count: int = 0
+
+
+class PipelineResponse(BaseModel):
+    success: bool
+    handwriting_detected: bool
+    handwriting_removed: bool
+    layout_reconstructed: bool
+    cleaned_image_base64: Optional[str] = None
+    fabric_json: Optional[dict] = None
+    metadata: dict = {}
+
+
+class CapabilitiesResponse(BaseModel):
+    opencv_available: bool = True
+    lama_available: bool = False
+    paddleocr_available: bool = False
+
+
+# =============================================================================
+# API Endpoints
+# =============================================================================
+
+@router.get("/capabilities")
+async def get_capabilities() -> CapabilitiesResponse:
+    """
+    Get available cleanup capabilities on this server.
+    """
+    # Check PaddleOCR
+    paddleocr_available = False
+    try:
+        from hybrid_vocab_extractor import get_paddle_ocr
+        ocr = get_paddle_ocr()
+        paddleocr_available = ocr is not None
+    except Exception:
+        pass
+
+    return CapabilitiesResponse(
+        opencv_available=True,
+        lama_available=check_lama_available(),
+        paddleocr_available=paddleocr_available
+    )
+
+
+@router.post("/detect-handwriting")
+async def detect_handwriting_endpoint(
+    image: UploadFile = File(...),
+    return_mask: bool = Form(default=True),
+    min_confidence: float = Form(default=0.3)
+) -> DetectionResponse:
+    """
+    Detect handwriting in an image.
+
+    Args:
+        image: Input image (PNG, JPG)
+        return_mask: Whether to return the binary mask as base64
+        min_confidence: Minimum confidence threshold
+
+    Returns:
+        DetectionResponse with detection results and optional mask
+    """
+    logger.info(f"Handwriting detection request: {image.filename}")
+
+    # Validate file type
+    content_type = image.content_type or ""
+    if not content_type.startswith("image/"):
+        raise HTTPException(
+            status_code=400,
+            detail="Only image files (PNG, JPG) are supported"
+        )
+
+    try:
+        image_bytes = await image.read()
+
+        # Detect handwriting
+        result = detect_handwriting(image_bytes)
+
+        has_handwriting = (
+            result.confidence >= min_confidence and
+            result.handwriting_ratio > 0.005
+        )
+
+        response = DetectionResponse(
+            has_handwriting=has_handwriting,
+            confidence=result.confidence,
+            handwriting_ratio=result.handwriting_ratio,
+            detection_method=result.detection_method
+        )
+
+        if return_mask:
+            mask_bytes = mask_to_png(result.mask)
+            response.mask_base64 = base64.b64encode(mask_bytes).decode('utf-8')
+
+        logger.info(f"Detection complete: handwriting={has_handwriting}, "
+                    f"confidence={result.confidence:.2f}")
+
+        return response
+
+    except Exception as e:
+        logger.error(f"Handwriting detection failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/detect-handwriting/mask")
+async def get_handwriting_mask(
+    image: UploadFile = File(...)
+) -> StreamingResponse:
+    """
+    Get handwriting detection mask as PNG image.
+
+    Returns binary mask where white (255) = handwriting.
+    """
+    content_type = image.content_type or ""
+    if not content_type.startswith("image/"):
+        raise HTTPException(
+            status_code=400,
+            detail="Only image files are supported"
+        )
+
+    try:
+        image_bytes = await image.read()
+        result = detect_handwriting(image_bytes)
+        mask_bytes = mask_to_png(result.mask)
+
+        return StreamingResponse(
+            io.BytesIO(mask_bytes),
+            media_type="image/png",
+            headers={
+                "Content-Disposition": "attachment; filename=handwriting_mask.png"
+            }
+        )
+
+    except Exception as e:
+        logger.error(f"Mask generation failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/remove-handwriting")
+async def remove_handwriting_endpoint(
+    image: UploadFile = File(...),
+    mask: Optional[UploadFile] = File(default=None),
+    method: str = Form(default="auto"),
+    return_base64: bool = Form(default=False)
+):
+    """
+    Remove handwriting from an image.
+
+    Args:
+        image: Input image with handwriting
+        mask: Optional pre-computed mask (if not provided, auto-detected)
+        method: Inpainting method (auto, opencv_telea, opencv_ns, lama)
+        return_base64: If True, return image as base64, else as file
+
+    Returns:
+        Cleaned image (as PNG file or base64 in JSON)
+    """
+    logger.info(f"Remove handwriting request: {image.filename}, method={method}")
+
+    content_type = image.content_type or ""
+    if not content_type.startswith("image/"):
+        raise HTTPException(
+            status_code=400,
+            detail="Only image files are supported"
+        )
+
+    try:
+        image_bytes = await image.read()
+
+        # Get mask if provided
+        mask_array = None
+        if mask is not None:
+            mask_bytes = await mask.read()
+            from PIL import Image
+            import numpy as np
+            mask_img = Image.open(io.BytesIO(mask_bytes))
+            mask_array = np.array(mask_img)
+
+        # Select inpainting method
+        inpainting_method = InpaintingMethod.AUTO
+        if method == "opencv_telea":
+            inpainting_method = InpaintingMethod.OPENCV_TELEA
+        elif method == "opencv_ns":
+            inpainting_method = InpaintingMethod.OPENCV_NS
+        elif method == "lama":
+            inpainting_method = InpaintingMethod.LAMA
+
+        # Remove handwriting
+        cleaned_bytes, metadata = remove_handwriting(
+            image_bytes,
+            mask=mask_array,
+            method=inpainting_method
+        )
+
+        if return_base64:
+            return JSONResponse({
+                "success": True,
+                "image_base64": base64.b64encode(cleaned_bytes).decode('utf-8'),
+                "metadata": metadata
+            })
+        else:
+            return StreamingResponse(
+                io.BytesIO(cleaned_bytes),
+                media_type="image/png",
+                headers={
+                    "Content-Disposition": "attachment; filename=cleaned.png",
+                    "X-Method-Used": metadata.get("method_used", "unknown"),
+                    "X-Processing-Time-Ms": str(metadata.get("processing_time_ms", 0))
+                }
+            )
+
+    except Exception as e:
+        logger.error(f"Handwriting removal failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/reconstruct")
+async def reconstruct_layout_endpoint(
+    image: UploadFile = File(...),
+    clean_handwriting: bool = Form(default=True),
+    detect_tables: bool = Form(default=True)
+) -> ReconstructionResponse:
+    """
+    Reconstruct worksheet layout and generate Fabric.js JSON.
+
+    Args:
+        image: Input image (can contain handwriting)
+        clean_handwriting: Whether to remove handwriting first
+        detect_tables: Whether to detect table structures
+
+    Returns:
+        ReconstructionResponse with Fabric.js JSON
+    """
+    logger.info(f"Layout reconstruction request: {image.filename}")
+
+    content_type = image.content_type or ""
+    if not content_type.startswith("image/"):
+        raise HTTPException(
+            status_code=400,
+            detail="Only image files are supported"
+        )
+
+    try:
+        image_bytes = await image.read()
+
+        # Run reconstruction pipeline
+        if clean_handwriting:
+            cleaned_bytes, layout = reconstruct_and_clean(image_bytes)
+        else:
+            layout = reconstruct_layout(image_bytes, detect_tables=detect_tables)
+
+        return ReconstructionResponse(
+            success=True,
+            element_count=len(layout.elements),
+            page_width=layout.page_width,
+            page_height=layout.page_height,
+            fabric_json=layout.fabric_json,
+            table_count=len(layout.table_regions)
+        )
+
+    except Exception as e:
+        logger.error(f"Layout reconstruction failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/cleanup-pipeline")
+async def full_cleanup_pipeline(
+    image: UploadFile = File(...),
+    remove_hw: bool = Form(default=True, alias="remove_handwriting"),
+    reconstruct: bool = Form(default=True),
+    inpainting_method: str = Form(default="auto")
+) -> PipelineResponse:
+    """
+    Full cleanup pipeline: detect, remove handwriting, reconstruct layout.
+
+    This is the recommended endpoint for processing filled worksheets.
+
+    Args:
+        image: Input image (scan/photo of filled worksheet)
+        remove_handwriting: Whether to remove detected handwriting
+        reconstruct: Whether to reconstruct layout as Fabric.js JSON
+        inpainting_method: Method for inpainting (auto, opencv_telea, opencv_ns, lama)
+
+    Returns:
+        PipelineResponse with cleaned image and Fabric.js JSON
+    """
+    logger.info(f"Full cleanup pipeline: {image.filename}")
+
+    content_type = image.content_type or ""
+    if not content_type.startswith("image/"):
+        raise HTTPException(
+            status_code=400,
+            detail="Only image files are supported"
+        )
+
+    try:
+        image_bytes = await image.read()
+        metadata = {}
+
+        # Step 1: Detect handwriting
+        detection = detect_handwriting(image_bytes)
+        handwriting_detected = (
+            detection.confidence >= 0.3 and
+            detection.handwriting_ratio > 0.005
+        )
+
+        metadata["detection"] = {
+            "confidence": detection.confidence,
+            "handwriting_ratio": detection.handwriting_ratio,
+            "method": detection.detection_method
+        }
+
+        # Step 2: Remove handwriting if requested and detected
+        cleaned_bytes = image_bytes
+        handwriting_removed = False
+
+        if remove_hw and handwriting_detected:
+            method = InpaintingMethod.AUTO
+            if inpainting_method == "opencv_telea":
+                method = InpaintingMethod.OPENCV_TELEA
+            elif inpainting_method == "opencv_ns":
+                method = InpaintingMethod.OPENCV_NS
+            elif inpainting_method == "lama":
+                method = InpaintingMethod.LAMA
+
+            cleaned_bytes, inpaint_metadata = remove_handwriting(
+                image_bytes,
+                mask=detection.mask,
+                method=method
+            )
+            handwriting_removed = inpaint_metadata.get("inpainting_performed", False)
+            metadata["inpainting"] = inpaint_metadata
+
+        # Step 3: Reconstruct layout if requested
+        fabric_json = None
+        layout_reconstructed = False
+
+        if reconstruct:
+            layout = reconstruct_layout(cleaned_bytes)
+            fabric_json = layout.fabric_json
+            layout_reconstructed = len(layout.elements) > 0
+            metadata["layout"] = {
+                "element_count": len(layout.elements),
+                "table_count": len(layout.table_regions),
+                "page_width": layout.page_width,
+                "page_height": layout.page_height
+            }
+
+        # Encode cleaned image as base64
+        cleaned_base64 = base64.b64encode(cleaned_bytes).decode('utf-8')
+
+        logger.info(f"Pipeline complete: detected={handwriting_detected}, "
+                    f"removed={handwriting_removed}, layout={layout_reconstructed}")
+
+        return PipelineResponse(
+            success=True,
+            handwriting_detected=handwriting_detected,
+            handwriting_removed=handwriting_removed,
+            layout_reconstructed=layout_reconstructed,
+            cleaned_image_base64=cleaned_base64,
+            fabric_json=fabric_json,
+            metadata=metadata
+        )
+
+    except Exception as e:
+        logger.error(f"Cleanup pipeline failed: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/preview-cleanup")
+async def preview_cleanup(
+    image: UploadFile = File(...)
+) -> JSONResponse:
+    """
+    Quick preview of cleanup results without full processing.
+
+    Returns detection results and estimated processing time.
+    """
+    content_type = image.content_type or ""
+    if not content_type.startswith("image/"):
+        raise HTTPException(
+            status_code=400,
+            detail="Only image files are supported"
+        )
+
+    try:
+        image_bytes = await image.read()
+
+        # Quick detection only
+        result = detect_handwriting_regions(image_bytes)
+
+        # Estimate processing time based on image size
+        from PIL import Image
+        img = Image.open(io.BytesIO(image_bytes))
+        pixel_count = img.width * img.height
+
+        # Rough estimates
+        est_detection_ms = 100 + (pixel_count / 1000000) * 200
+        est_inpainting_ms = 500 + (pixel_count / 1000000) * 1000
+        est_reconstruction_ms = 200 + (pixel_count / 1000000) * 300
+
+        return JSONResponse({
+            "has_handwriting": result["has_handwriting"],
+            "confidence": result["confidence"],
+            "handwriting_ratio": result["handwriting_ratio"],
+            "image_width": img.width,
+            "image_height": img.height,
+            "estimated_times_ms": {
+                "detection": est_detection_ms,
+                "inpainting": est_inpainting_ms if result["has_handwriting"] else 0,
+                "reconstruction": est_reconstruction_ms,
+                "total": est_detection_ms + (est_inpainting_ms if result["has_handwriting"] else 0) + est_reconstruction_ms
+            },
+            "capabilities": {
+                "lama_available": check_lama_available()
+            }
+        })
+
+    except Exception as e:
+        logger.error(f"Preview failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
--- a/klausur-service/backend/worksheet/editor_ai.py
+++ b/klausur-service/backend/worksheet/editor_ai.py
@@ -0,0 +1,485 @@
+"""
+Worksheet Editor AI — AI image generation and AI worksheet modification.
+"""
+
+import io
+import json
+import base64
+import logging
+import re
+import time
+import random
+from typing import List, Dict
+
+import httpx
+
+from .editor_models import (
+    AIImageRequest,
+    AIImageResponse,
+    AIImageStyle,
+    AIModifyRequest,
+    AIModifyResponse,
+    OLLAMA_URL,
+    STYLE_PROMPTS,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================
+# AI IMAGE GENERATION
+# =============================================
+
+async def generate_ai_image_logic(request: AIImageRequest) -> AIImageResponse:
+    """
+    Generate an AI image using Ollama with a text-to-image model.
+
+    Falls back to a placeholder if Ollama is not available.
+    """
+    from fastapi import HTTPException
+
+    try:
+        # Build enhanced prompt with style
+        style_modifier = STYLE_PROMPTS.get(request.style, "")
+        enhanced_prompt = f"{request.prompt}, {style_modifier}"
+
+        logger.info(f"Generating AI image: {enhanced_prompt[:100]}...")
+
+        # Check if Ollama is available
+        async with httpx.AsyncClient(timeout=10.0) as check_client:
+            try:
+                health_response = await check_client.get(f"{OLLAMA_URL}/api/tags")
+                if health_response.status_code != 200:
+                    raise HTTPException(status_code=503, detail="Ollama service not available")
+            except httpx.ConnectError:
+                logger.warning("Ollama not reachable, returning placeholder")
+                return _generate_placeholder_image(request, enhanced_prompt)
+
+        try:
+            async with httpx.AsyncClient(timeout=300.0) as client:
+                tags_response = await client.get(f"{OLLAMA_URL}/api/tags")
+                available_models = [m.get("name", "") for m in tags_response.json().get("models", [])]
+
+                sd_model = None
+                for model in available_models:
+                    if "stable" in model.lower() or "sd" in model.lower() or "diffusion" in model.lower():
+                        sd_model = model
+                        break
+
+                if not sd_model:
+                    logger.warning("No Stable Diffusion model found in Ollama")
+                    return _generate_placeholder_image(request, enhanced_prompt)
+
+                logger.info(f"SD model found: {sd_model}, but image generation API not implemented")
+                return _generate_placeholder_image(request, enhanced_prompt)
+
+        except Exception as e:
+            logger.error(f"Image generation failed: {e}")
+            return _generate_placeholder_image(request, enhanced_prompt)
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"AI image generation error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+def _generate_placeholder_image(request: AIImageRequest, prompt: str) -> AIImageResponse:
+    """
+    Generate a placeholder image when AI generation is not available.
+    Creates a simple SVG-based placeholder with the prompt text.
+    """
+    from PIL import Image, ImageDraw, ImageFont
+
+    width, height = request.width, request.height
+
+    style_colors = {
+        AIImageStyle.REALISTIC: ("#2563eb", "#dbeafe"),
+        AIImageStyle.CARTOON: ("#f97316", "#ffedd5"),
+        AIImageStyle.SKETCH: ("#6b7280", "#f3f4f6"),
+        AIImageStyle.CLIPART: ("#8b5cf6", "#ede9fe"),
+        AIImageStyle.EDUCATIONAL: ("#059669", "#d1fae5"),
+    }
+
+    fg_color, bg_color = style_colors.get(request.style, ("#6366f1", "#e0e7ff"))
+
+    img = Image.new('RGB', (width, height), bg_color)
+    draw = ImageDraw.Draw(img)
+
+    draw.rectangle([5, 5, width-6, height-6], outline=fg_color, width=3)
+
+    cx, cy = width // 2, height // 2 - 30
+    draw.ellipse([cx-40, cy-40, cx+40, cy+40], outline=fg_color, width=3)
+    draw.line([cx-20, cy-10, cx+20, cy-10], fill=fg_color, width=3)
+    draw.line([cx, cy-10, cx, cy+20], fill=fg_color, width=3)
+
+    try:
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
+    except Exception:
+        font = ImageFont.load_default()
+
+    max_chars = 40
+    lines = []
+    words = prompt[:200].split()
+    current_line = ""
+    for word in words:
+        if len(current_line) + len(word) + 1 <= max_chars:
+            current_line += (" " + word if current_line else word)
+        else:
+            if current_line:
+                lines.append(current_line)
+            current_line = word
+    if current_line:
+        lines.append(current_line)
+
+    text_y = cy + 60
+    for line in lines[:4]:
+        bbox = draw.textbbox((0, 0), line, font=font)
+        text_width = bbox[2] - bbox[0]
+        draw.text((cx - text_width // 2, text_y), line, fill=fg_color, font=font)
+        text_y += 20
+
+    badge_text = "KI-Bild (Platzhalter)"
+    try:
+        badge_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 10)
+    except Exception:
+        badge_font = font
+    draw.rectangle([10, height-30, 150, height-10], fill=fg_color)
+    draw.text((15, height-27), badge_text, fill="white", font=badge_font)
+
+    buffer = io.BytesIO()
+    img.save(buffer, format='PNG')
+    buffer.seek(0)
+
+    image_base64 = f"data:image/png;base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
+
+    return AIImageResponse(
+        image_base64=image_base64,
+        prompt_used=prompt,
+        error="AI image generation not available. Using placeholder."
+    )
+
+
+# =============================================
+# AI WORKSHEET MODIFICATION
+# =============================================
+
+async def modify_worksheet_with_ai_logic(request: AIModifyRequest) -> AIModifyResponse:
+    """
+    Modify a worksheet using AI based on natural language prompt.
+    """
+    try:
+        logger.info(f"AI modify request: {request.prompt[:100]}...")
+
+        try:
+            canvas_data = json.loads(request.canvas_json)
+        except json.JSONDecodeError:
+            return AIModifyResponse(
+                message="Fehler beim Parsen des Canvas",
+                error="Invalid canvas JSON"
+            )
+
+        system_prompt = """Du bist ein Assistent fuer die Bearbeitung von Arbeitsblaettern.
+Du erhaeltst den aktuellen Zustand eines Canvas im JSON-Format und eine Anweisung des Nutzers.
+Deine Aufgabe ist es, die gewuenschten Aenderungen am Canvas vorzunehmen.
+
+Der Canvas verwendet Fabric.js. Hier sind die wichtigsten Objekttypen:
+- i-text: Interaktiver Text mit fontFamily, fontSize, fill, left, top
+- rect: Rechteck mit left, top, width, height, fill, stroke, strokeWidth
+- circle: Kreis mit left, top, radius, fill, stroke, strokeWidth
+- line: Linie mit x1, y1, x2, y2, stroke, strokeWidth
+
+Das Canvas ist 794x1123 Pixel (A4 bei 96 DPI).
+
+Antworte NUR mit einem JSON-Objekt in diesem Format:
+{
+  "action": "modify" oder "add" oder "delete" oder "info",
+  "objects": [...],  // Neue/modifizierte Objekte (bei modify/add)
+  "message": "Kurze Beschreibung der Aenderung"
+}
+
+Wenn du Objekte hinzufuegst, generiere eindeutige IDs im Format "obj_<timestamp>_<random>".
+"""
+
+        user_prompt = f"""Aktueller Canvas-Zustand:
+```json
+{json.dumps(canvas_data, indent=2)[:5000]}
+```
+
+Nutzer-Anweisung: {request.prompt}
+
+Fuehre die Aenderung durch und antworte mit dem JSON-Objekt."""
+
+        try:
+            async with httpx.AsyncClient(timeout=120.0) as client:
+                response = await client.post(
+                    f"{OLLAMA_URL}/api/generate",
+                    json={
+                        "model": request.model,
+                        "prompt": user_prompt,
+                        "system": system_prompt,
+                        "stream": False,
+                        "options": {
+                            "temperature": 0.3,
+                            "num_predict": 4096
+                        }
+                    }
+                )
+
+                if response.status_code != 200:
+                    logger.warning(f"Ollama error: {response.status_code}, trying local fallback")
+                    return _handle_simple_modification(request.prompt, canvas_data)
+
+                ai_response = response.json().get("response", "")
+
+        except httpx.ConnectError:
+            logger.warning("Ollama not reachable")
+            return _handle_simple_modification(request.prompt, canvas_data)
+        except httpx.TimeoutException:
+            logger.warning("Ollama timeout, trying local fallback")
+            return _handle_simple_modification(request.prompt, canvas_data)
+
+        try:
+            json_start = ai_response.find('{')
+            json_end = ai_response.rfind('}') + 1
+
+            if json_start == -1 or json_end <= json_start:
+                logger.warning(f"No JSON found in AI response: {ai_response[:200]}")
+                return AIModifyResponse(
+                    message="KI konnte die Anfrage nicht verarbeiten",
+                    error="No JSON in response"
+                )
+
+            ai_json = json.loads(ai_response[json_start:json_end])
+            action = ai_json.get("action", "info")
+            message = ai_json.get("message", "Aenderungen angewendet")
+            new_objects = ai_json.get("objects", [])
+
+            if action == "info":
+                return AIModifyResponse(message=message)
+
+            if action == "add" and new_objects:
+                existing_objects = canvas_data.get("objects", [])
+                existing_objects.extend(new_objects)
+                canvas_data["objects"] = existing_objects
+                return AIModifyResponse(
+                    modified_canvas_json=json.dumps(canvas_data),
+                    message=message
+                )
+
+            if action == "modify" and new_objects:
+                existing_objects = canvas_data.get("objects", [])
+                new_ids = {obj.get("id") for obj in new_objects if obj.get("id")}
+                kept_objects = [obj for obj in existing_objects if obj.get("id") not in new_ids]
+                kept_objects.extend(new_objects)
+                canvas_data["objects"] = kept_objects
+                return AIModifyResponse(
+                    modified_canvas_json=json.dumps(canvas_data),
+                    message=message
+                )
+
+            if action == "delete":
+                delete_ids = ai_json.get("delete_ids", [])
+                if delete_ids:
+                    existing_objects = canvas_data.get("objects", [])
+                    canvas_data["objects"] = [obj for obj in existing_objects if obj.get("id") not in delete_ids]
+                    return AIModifyResponse(
+                        modified_canvas_json=json.dumps(canvas_data),
+                        message=message
+                    )
+
+            return AIModifyResponse(message=message)
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse AI JSON: {e}")
+            return AIModifyResponse(
+                message="Fehler beim Verarbeiten der KI-Antwort",
+                error=str(e)
+            )
+
+    except Exception as e:
+        logger.error(f"AI modify error: {e}")
+        return AIModifyResponse(
+            message="Ein unerwarteter Fehler ist aufgetreten",
+            error=str(e)
+        )
+
+
+def _handle_simple_modification(prompt: str, canvas_data: dict) -> AIModifyResponse:
+    """
+    Handle simple modifications locally when Ollama is not available.
+    Supports basic commands like adding headings, lines, etc.
+    """
+    prompt_lower = prompt.lower()
+    objects = canvas_data.get("objects", [])
+
+    def generate_id():
+        return f"obj_{int(time.time()*1000)}_{random.randint(1000, 9999)}"
+
+    # Add heading
+    if "ueberschrift" in prompt_lower or "titel" in prompt_lower or "heading" in prompt_lower:
+        text_match = re.search(r'"([^"]+)"', prompt)
+        text = text_match.group(1) if text_match else "Ueberschrift"
+
+        new_text = {
+            "type": "i-text", "id": generate_id(), "text": text,
+            "left": 397, "top": 50, "originX": "center",
+            "fontFamily": "Arial", "fontSize": 28, "fontWeight": "bold", "fill": "#000000"
+        }
+        objects.append(new_text)
+        canvas_data["objects"] = objects
+        return AIModifyResponse(
+            modified_canvas_json=json.dumps(canvas_data),
+            message=f"Ueberschrift '{text}' hinzugefuegt"
+        )
+
+    # Add lines for writing
+    if "linie" in prompt_lower or "line" in prompt_lower or "schreib" in prompt_lower:
+        num_match = re.search(r'(\d+)', prompt)
+        num_lines = int(num_match.group(1)) if num_match else 5
+        num_lines = min(num_lines, 20)
+
+        start_y = 150
+        line_spacing = 40
+
+        for i in range(num_lines):
+            new_line = {
+                "type": "line", "id": generate_id(),
+                "x1": 60, "y1": start_y + i * line_spacing,
+                "x2": 734, "y2": start_y + i * line_spacing,
+                "stroke": "#cccccc", "strokeWidth": 1
+            }
+            objects.append(new_line)
+
+        canvas_data["objects"] = objects
+        return AIModifyResponse(
+            modified_canvas_json=json.dumps(canvas_data),
+            message=f"{num_lines} Schreiblinien hinzugefuegt"
+        )
+
+    # Make text bigger
+    if "groesser" in prompt_lower or "bigger" in prompt_lower or "larger" in prompt_lower:
+        modified = 0
+        for obj in objects:
+            if obj.get("type") in ["i-text", "text", "textbox"]:
+                current_size = obj.get("fontSize", 16)
+                obj["fontSize"] = int(current_size * 1.25)
+                modified += 1
+
+        canvas_data["objects"] = objects
+        if modified > 0:
+            return AIModifyResponse(
+                modified_canvas_json=json.dumps(canvas_data),
+                message=f"{modified} Texte vergroessert"
+            )
+
+    # Center elements
+    if "zentrier" in prompt_lower or "center" in prompt_lower or "mitte" in prompt_lower:
+        center_x = 397
+        for obj in objects:
+            if not obj.get("isGrid"):
+                obj["left"] = center_x
+                obj["originX"] = "center"
+
+        canvas_data["objects"] = objects
+        return AIModifyResponse(
+            modified_canvas_json=json.dumps(canvas_data),
+            message="Elemente zentriert"
+        )
+
+    # Add numbering
+    if "nummer" in prompt_lower or "nummerier" in prompt_lower or "1-10" in prompt_lower:
+        range_match = re.search(r'(\d+)\s*[-bis]+\s*(\d+)', prompt)
+        if range_match:
+            start, end = int(range_match.group(1)), int(range_match.group(2))
+        else:
+            start, end = 1, 10
+
+        y = 100
+        for i in range(start, min(end + 1, start + 20)):
+            new_text = {
+                "type": "i-text", "id": generate_id(), "text": f"{i}.",
+                "left": 40, "top": y, "fontFamily": "Arial", "fontSize": 14, "fill": "#000000"
+            }
+            objects.append(new_text)
+            y += 35
+
+        canvas_data["objects"] = objects
+        return AIModifyResponse(
+            modified_canvas_json=json.dumps(canvas_data),
+            message=f"Nummerierung {start}-{end} hinzugefuegt"
+        )
+
+    # Add rectangle/box
+    if "rechteck" in prompt_lower or "box" in prompt_lower or "kasten" in prompt_lower:
+        new_rect = {
+            "type": "rect", "id": generate_id(),
+            "left": 100, "top": 200, "width": 200, "height": 100,
+            "fill": "transparent", "stroke": "#000000", "strokeWidth": 2
+        }
+        objects.append(new_rect)
+        canvas_data["objects"] = objects
+        return AIModifyResponse(
+            modified_canvas_json=json.dumps(canvas_data),
+            message="Rechteck hinzugefuegt"
+        )
+
+    # Add grid/raster
+    if "raster" in prompt_lower or "grid" in prompt_lower or "tabelle" in prompt_lower:
+        dim_match = re.search(r'(\d+)\s*[x/\u00d7\*mal by]\s*(\d+)', prompt_lower)
+        if dim_match:
+            cols = int(dim_match.group(1))
+            rows = int(dim_match.group(2))
+        else:
+            nums = re.findall(r'(\d+)', prompt)
+            if len(nums) >= 2:
+                cols, rows = int(nums[0]), int(nums[1])
+            else:
+                cols, rows = 3, 4
+
+        cols = min(max(1, cols), 10)
+        rows = min(max(1, rows), 15)
+
+        canvas_width = 794
+        canvas_height = 1123
+        margin = 60
+        available_width = canvas_width - 2 * margin
+        available_height = canvas_height - 2 * margin - 80
+
+        cell_width = available_width / cols
+        cell_height = min(available_height / rows, 80)
+
+        start_x = margin
+        start_y = 120
+
+        grid_objects = []
+        for r in range(rows + 1):
+            y = start_y + r * cell_height
+            grid_objects.append({
+                "type": "line", "id": generate_id(),
+                "x1": start_x, "y1": y,
+                "x2": start_x + cols * cell_width, "y2": y,
+                "stroke": "#666666", "strokeWidth": 1, "isGrid": True
+            })
+
+        for c in range(cols + 1):
+            x = start_x + c * cell_width
+            grid_objects.append({
+                "type": "line", "id": generate_id(),
+                "x1": x, "y1": start_y,
+                "x2": x, "y2": start_y + rows * cell_height,
+                "stroke": "#666666", "strokeWidth": 1, "isGrid": True
+            })
+
+        objects.extend(grid_objects)
+        canvas_data["objects"] = objects
+        return AIModifyResponse(
+            modified_canvas_json=json.dumps(canvas_data),
+            message=f"{cols}x{rows} Raster hinzugefuegt ({cols} Spalten, {rows} Zeilen)"
+        )
+
+    # Default: Ollama needed
+    return AIModifyResponse(
+        message="Diese Aenderung erfordert den KI-Service. Bitte stellen Sie sicher, dass Ollama laeuft.",
+        error="Complex modification requires Ollama"
+    )
--- a/klausur-service/backend/worksheet/editor_api.py
+++ b/klausur-service/backend/worksheet/editor_api.py
@@ -0,0 +1,388 @@
+"""
+Worksheet Editor API - Backend Endpoints for Visual Worksheet Editor
+
+Provides endpoints for:
+- AI Image generation via Ollama/Stable Diffusion
+- Worksheet Save/Load
+- PDF Export
+
+Split modules:
+- worksheet_editor_models: Enums, Pydantic models, configuration
+- worksheet_editor_ai: AI image generation and AI worksheet modification
+- worksheet_editor_reconstruct: Document reconstruction from vocab sessions
+"""
+
+import os
+import io
+import json
+import logging
+from datetime import datetime, timezone
+import uuid
+
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+import httpx
+
+# Re-export everything from sub-modules for backward compatibility
+from .editor_models import (  # noqa: F401
+    AIImageStyle,
+    WorksheetStatus,
+    AIImageRequest,
+    AIImageResponse,
+    PageData,
+    PageFormat,
+    WorksheetSaveRequest,
+    WorksheetResponse,
+    AIModifyRequest,
+    AIModifyResponse,
+    ReconstructRequest,
+    ReconstructResponse,
+    worksheets_db,
+    OLLAMA_URL,
+    SD_MODEL,
+    WORKSHEET_STORAGE_DIR,
+    STYLE_PROMPTS,
+    REPORTLAB_AVAILABLE,
+)
+
+from .editor_ai import (  # noqa: F401
+    generate_ai_image_logic,
+    _generate_placeholder_image,
+    modify_worksheet_with_ai_logic,
+    _handle_simple_modification,
+)
+
+from .editor_reconstruct import (  # noqa: F401
+    reconstruct_document_logic,
+    _detect_image_regions,
+)
+
+logger = logging.getLogger(__name__)
+
+# =============================================
+# ROUTER
+# =============================================
+
+router = APIRouter(prefix="/api/v1/worksheet", tags=["Worksheet Editor"])
+
+# =============================================
+# AI IMAGE GENERATION
+# =============================================
+
+@router.post("/ai-image", response_model=AIImageResponse)
+async def generate_ai_image(request: AIImageRequest):
+    """
+    Generate an AI image using Ollama with a text-to-image model.
+
+    Supported models:
+    - stable-diffusion (via Ollama)
+    - sd3.5-medium
+    - llava (for image understanding, not generation)
+
+    Falls back to a placeholder if Ollama is not available.
+    """
+    return await generate_ai_image_logic(request)
+
+
+# =============================================
+# WORKSHEET SAVE/LOAD
+# =============================================
+
+@router.post("/save", response_model=WorksheetResponse)
+async def save_worksheet(request: WorksheetSaveRequest):
+    """
+    Save a worksheet document.
+
+    - If id is provided, updates existing worksheet
+    - If id is not provided, creates new worksheet
+    """
+    try:
+        now = datetime.now(timezone.utc).isoformat()
+
+        worksheet_id = request.id or f"ws_{uuid.uuid4().hex[:12]}"
+
+        worksheet = {
+            "id": worksheet_id,
+            "title": request.title,
+            "description": request.description,
+            "pages": [p.dict() for p in request.pages],
+            "pageFormat": (request.pageFormat or PageFormat()).dict(),
+            "createdAt": worksheets_db.get(worksheet_id, {}).get("createdAt", now),
+            "updatedAt": now
+        }
+
+        worksheets_db[worksheet_id] = worksheet
+
+        filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json")
+        with open(filepath, 'w', encoding='utf-8') as f:
+            json.dump(worksheet, f, ensure_ascii=False, indent=2)
+
+        logger.info(f"Saved worksheet: {worksheet_id}")
+
+        return WorksheetResponse(**worksheet)
+
+    except Exception as e:
+        logger.error(f"Failed to save worksheet: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to save: {str(e)}")
+
+
+@router.get("/{worksheet_id}", response_model=WorksheetResponse)
+async def get_worksheet(worksheet_id: str):
+    """Load a worksheet document by ID."""
+    try:
+        if worksheet_id in worksheets_db:
+            return WorksheetResponse(**worksheets_db[worksheet_id])
+
+        filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json")
+        if os.path.exists(filepath):
+            with open(filepath, 'r', encoding='utf-8') as f:
+                worksheet = json.load(f)
+                worksheets_db[worksheet_id] = worksheet
+                return WorksheetResponse(**worksheet)
+
+        raise HTTPException(status_code=404, detail="Worksheet not found")
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to load worksheet {worksheet_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to load: {str(e)}")
+
+
+@router.get("/list/all")
+async def list_worksheets():
+    """List all available worksheets."""
+    try:
+        worksheets = []
+
+        for filename in os.listdir(WORKSHEET_STORAGE_DIR):
+            if filename.endswith('.json'):
+                filepath = os.path.join(WORKSHEET_STORAGE_DIR, filename)
+                try:
+                    with open(filepath, 'r', encoding='utf-8') as f:
+                        worksheet = json.load(f)
+                        worksheets.append({
+                            "id": worksheet.get("id"),
+                            "title": worksheet.get("title"),
+                            "description": worksheet.get("description"),
+                            "pageCount": len(worksheet.get("pages", [])),
+                            "updatedAt": worksheet.get("updatedAt"),
+                            "createdAt": worksheet.get("createdAt")
+                        })
+                except Exception as e:
+                    logger.warning(f"Failed to load {filename}: {e}")
+
+        worksheets.sort(key=lambda x: x.get("updatedAt", ""), reverse=True)
+
+        return {"worksheets": worksheets, "total": len(worksheets)}
+
+    except Exception as e:
+        logger.error(f"Failed to list worksheets: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.delete("/{worksheet_id}")
+async def delete_worksheet(worksheet_id: str):
+    """Delete a worksheet document."""
+    try:
+        if worksheet_id in worksheets_db:
+            del worksheets_db[worksheet_id]
+
+        filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json")
+        if os.path.exists(filepath):
+            os.remove(filepath)
+            logger.info(f"Deleted worksheet: {worksheet_id}")
+            return {"status": "deleted", "id": worksheet_id}
+
+        raise HTTPException(status_code=404, detail="Worksheet not found")
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to delete worksheet {worksheet_id}: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# =============================================
+# PDF EXPORT
+# =============================================
+
+@router.post("/{worksheet_id}/export-pdf")
+async def export_worksheet_pdf(worksheet_id: str):
+    """
+    Export worksheet as PDF.
+
+    Note: This creates a basic PDF. For full canvas rendering,
+    the frontend should use pdf-lib with canvas.toDataURL().
+    """
+    if not REPORTLAB_AVAILABLE:
+        raise HTTPException(status_code=501, detail="PDF export not available (reportlab not installed)")
+
+    try:
+        from reportlab.lib.pagesizes import A4
+        from reportlab.pdfgen import canvas
+
+        worksheet = worksheets_db.get(worksheet_id)
+        if not worksheet:
+            filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json")
+            if os.path.exists(filepath):
+                with open(filepath, 'r', encoding='utf-8') as f:
+                    worksheet = json.load(f)
+            else:
+                raise HTTPException(status_code=404, detail="Worksheet not found")
+
+        buffer = io.BytesIO()
+        c = canvas.Canvas(buffer, pagesize=A4)
+
+        page_width, page_height = A4
+
+        for page_data in worksheet.get("pages", []):
+            if page_data.get("index", 0) == 0:
+                c.setFont("Helvetica-Bold", 18)
+                c.drawString(50, page_height - 50, worksheet.get("title", "Arbeitsblatt"))
+                c.setFont("Helvetica", 10)
+                c.drawString(50, page_height - 70, f"Erstellt: {worksheet.get('createdAt', '')[:10]}")
+
+            canvas_json_str = page_data.get("canvasJSON", "{}")
+            if canvas_json_str:
+                try:
+                    canvas_data = json.loads(canvas_json_str)
+                    objects = canvas_data.get("objects", [])
+
+                    for obj in objects:
+                        obj_type = obj.get("type", "")
+
+                        if obj_type in ["text", "i-text", "textbox"]:
+                            text = obj.get("text", "")
+                            left = obj.get("left", 50)
+                            top = obj.get("top", 100)
+                            font_size = obj.get("fontSize", 12)
+
+                            pdf_x = left * 0.75
+                            pdf_y = page_height - (top * 0.75)
+
+                            c.setFont("Helvetica", min(font_size, 24))
+                            c.drawString(pdf_x, pdf_y, text[:100])
+
+                        elif obj_type == "rect":
+                            left = obj.get("left", 0) * 0.75
+                            top = obj.get("top", 0) * 0.75
+                            width = obj.get("width", 50) * 0.75
+                            height = obj.get("height", 30) * 0.75
+                            c.rect(left, page_height - top - height, width, height)
+
+                        elif obj_type == "circle":
+                            left = obj.get("left", 0) * 0.75
+                            top = obj.get("top", 0) * 0.75
+                            radius = obj.get("radius", 25) * 0.75
+                            c.circle(left + radius, page_height - top - radius, radius)
+
+                except json.JSONDecodeError:
+                    pass
+
+            c.showPage()
+
+        c.save()
+        buffer.seek(0)
+
+        filename = f"{worksheet.get('title', 'worksheet').replace(' ', '_')}.pdf"
+
+        return StreamingResponse(
+            buffer,
+            media_type="application/pdf",
+            headers={"Content-Disposition": f"attachment; filename={filename}"}
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"PDF export failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# =============================================
+# AI WORKSHEET MODIFICATION
+# =============================================
+
+@router.post("/ai-modify", response_model=AIModifyResponse)
+async def modify_worksheet_with_ai(request: AIModifyRequest):
+    """
+    Modify a worksheet using AI based on natural language prompt.
+
+    Uses Ollama with qwen2.5vl:32b to understand the canvas state
+    and generate modifications based on the user's request.
+    """
+    return await modify_worksheet_with_ai_logic(request)
+
+
+# =============================================
+# HEALTH CHECK
+# =============================================
+
+@router.get("/health/check")
+async def health_check():
+    """Check worksheet editor API health and dependencies."""
+    status = {
+        "status": "healthy",
+        "ollama": False,
+        "storage": os.path.exists(WORKSHEET_STORAGE_DIR),
+        "reportlab": REPORTLAB_AVAILABLE,
+        "worksheets_count": len(worksheets_db)
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            response = await client.get(f"{OLLAMA_URL}/api/tags")
+            status["ollama"] = response.status_code == 200
+    except Exception:
+        pass
+
+    return status
+
+
+# =============================================
+# DOCUMENT RECONSTRUCTION FROM VOCAB SESSION
+# =============================================
+
+@router.post("/reconstruct-from-session", response_model=ReconstructResponse)
+async def reconstruct_document_from_session(request: ReconstructRequest):
+    """
+    Reconstruct a document from a vocab session into Fabric.js canvas format.
+
+    Returns canvas JSON ready to load into the worksheet editor.
+    """
+    try:
+        return await reconstruct_document_logic(request)
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Document reconstruction failed: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/sessions/available")
+async def get_available_sessions():
+    """Get list of available vocab sessions that can be reconstructed."""
+    try:
+        from vocab_worksheet_api import _sessions
+
+        available = []
+        for session_id, session in _sessions.items():
+            if session.get("pdf_data"):
+                available.append({
+                    "id": session_id,
+                    "name": session.get("name", "Unnamed"),
+                    "description": session.get("description"),
+                    "vocabulary_count": len(session.get("vocabulary", [])),
+                    "page_count": session.get("pdf_page_count", 1),
+                    "status": session.get("status", "unknown"),
+                    "created_at": session.get("created_at", "").isoformat() if session.get("created_at") else None
+                })
+
+        return {"sessions": available, "total": len(available)}
+
+    except Exception as e:
+        logger.error(f"Failed to list sessions: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
--- a/klausur-service/backend/worksheet/editor_models.py
+++ b/klausur-service/backend/worksheet/editor_models.py
@@ -0,0 +1,133 @@
+"""
+Worksheet Editor Models — Enums, Pydantic models, and configuration.
+"""
+
+import os
+import logging
+from typing import Optional, List, Dict
+from enum import Enum
+
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+# =============================================
+# CONFIGURATION
+# =============================================
+
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
+SD_MODEL = os.getenv("SD_MODEL", "stable-diffusion")  # or specific SD model
+WORKSHEET_STORAGE_DIR = os.getenv("WORKSHEET_STORAGE_DIR",
+    os.path.join(os.path.dirname(os.path.abspath(__file__)), "worksheet-storage"))
+
+# Ensure storage directory exists
+os.makedirs(WORKSHEET_STORAGE_DIR, exist_ok=True)
+
+# =============================================
+# ENUMS & MODELS
+# =============================================
+
+class AIImageStyle(str, Enum):
+    REALISTIC = "realistic"
+    CARTOON = "cartoon"
+    SKETCH = "sketch"
+    CLIPART = "clipart"
+    EDUCATIONAL = "educational"
+
+class WorksheetStatus(str, Enum):
+    DRAFT = "draft"
+    PUBLISHED = "published"
+    ARCHIVED = "archived"
+
+# Style prompt modifiers
+STYLE_PROMPTS = {
+    AIImageStyle.REALISTIC: "photorealistic, high detail, professional photography",
+    AIImageStyle.CARTOON: "cartoon style, colorful, child-friendly, simple shapes",
+    AIImageStyle.SKETCH: "pencil sketch, hand-drawn, black and white, artistic",
+    AIImageStyle.CLIPART: "clipart style, flat design, simple, vector-like",
+    AIImageStyle.EDUCATIONAL: "educational illustration, clear, informative, textbook style"
+}
+
+# =============================================
+# REQUEST/RESPONSE MODELS
+# =============================================
+
+class AIImageRequest(BaseModel):
+    prompt: str = Field(..., min_length=3, max_length=500)
+    style: AIImageStyle = AIImageStyle.EDUCATIONAL
+    width: int = Field(512, ge=256, le=1024)
+    height: int = Field(512, ge=256, le=1024)
+
+class AIImageResponse(BaseModel):
+    image_base64: str
+    prompt_used: str
+    error: Optional[str] = None
+
+class PageData(BaseModel):
+    id: str
+    index: int
+    canvasJSON: str
+
+class PageFormat(BaseModel):
+    width: float = 210
+    height: float = 297
+    orientation: str = "portrait"
+    margins: Dict[str, float] = {"top": 15, "right": 15, "bottom": 15, "left": 15}
+
+class WorksheetSaveRequest(BaseModel):
+    id: Optional[str] = None
+    title: str
+    description: Optional[str] = None
+    pages: List[PageData]
+    pageFormat: Optional[PageFormat] = None
+
+class WorksheetResponse(BaseModel):
+    id: str
+    title: str
+    description: Optional[str]
+    pages: List[PageData]
+    pageFormat: PageFormat
+    createdAt: str
+    updatedAt: str
+
+class AIModifyRequest(BaseModel):
+    prompt: str = Field(..., min_length=3, max_length=1000)
+    canvas_json: str
+    model: str = "qwen2.5vl:32b"
+
+class AIModifyResponse(BaseModel):
+    modified_canvas_json: Optional[str] = None
+    message: str
+    error: Optional[str] = None
+
+class ReconstructRequest(BaseModel):
+    session_id: str
+    page_number: int = 1
+    include_images: bool = True
+    regenerate_graphics: bool = False
+
+class ReconstructResponse(BaseModel):
+    canvas_json: str
+    page_width: int
+    page_height: int
+    elements_count: int
+    vocabulary_matched: int
+    message: str
+    error: Optional[str] = None
+
+# =============================================
+# IN-MEMORY STORAGE (Development)
+# =============================================
+
+worksheets_db: Dict[str, Dict] = {}
+
+# PDF Generation availability
+try:
+    from reportlab.lib import colors  # noqa: F401
+    from reportlab.lib.pagesizes import A4  # noqa: F401
+    from reportlab.lib.units import mm  # noqa: F401
+    from reportlab.pdfgen import canvas  # noqa: F401
+    from reportlab.lib.styles import getSampleStyleSheet  # noqa: F401
+    REPORTLAB_AVAILABLE = True
+except ImportError:
+    REPORTLAB_AVAILABLE = False
--- a/klausur-service/backend/worksheet/editor_reconstruct.py
+++ b/klausur-service/backend/worksheet/editor_reconstruct.py
@@ -0,0 +1,255 @@
+"""
+Worksheet Editor Reconstruct — Document reconstruction from vocab sessions.
+"""
+
+import io
+import uuid
+import base64
+import logging
+from typing import List, Dict
+
+import numpy as np
+
+from .editor_models import (
+    ReconstructRequest,
+    ReconstructResponse,
+)
+
+logger = logging.getLogger(__name__)
+
+
+async def reconstruct_document_logic(request: ReconstructRequest) -> ReconstructResponse:
+    """
+    Reconstruct a document from a vocab session into Fabric.js canvas format.
+
+    This function:
+    1. Loads the original PDF from the vocab session
+    2. Runs OCR with position tracking
+    3. Creates Fabric.js canvas JSON with positioned elements
+    4. Maps extracted vocabulary to their positions
+
+    Returns ReconstructResponse ready to send to the client.
+    """
+    from fastapi import HTTPException
+    from vocab_worksheet_api import _sessions, convert_pdf_page_to_image
+
+    # Check if session exists
+    if request.session_id not in _sessions:
+        raise HTTPException(status_code=404, detail=f"Session {request.session_id} not found")
+
+    session = _sessions[request.session_id]
+
+    if not session.get("pdf_data"):
+        raise HTTPException(status_code=400, detail="Session has no PDF data")
+
+    pdf_data = session["pdf_data"]
+    page_count = session.get("pdf_page_count", 1)
+
+    if request.page_number < 1 or request.page_number > page_count:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Page {request.page_number} not found. PDF has {page_count} pages."
+        )
+
+    vocabulary = session.get("vocabulary", [])
+    page_vocab = [v for v in vocabulary if v.get("source_page") == request.page_number]
+
+    logger.info(f"Reconstructing page {request.page_number} from session {request.session_id}")
+    logger.info(f"Found {len(page_vocab)} vocabulary items for this page")
+
+    image_bytes = await convert_pdf_page_to_image(pdf_data, request.page_number)
+    if not image_bytes:
+        raise HTTPException(status_code=500, detail="Failed to convert PDF page to image")
+
+    from PIL import Image
+    img = Image.open(io.BytesIO(image_bytes))
+    img_width, img_height = img.size
+
+    from hybrid_vocab_extractor import run_paddle_ocr
+    ocr_regions, raw_text = run_paddle_ocr(image_bytes)
+
+    logger.info(f"OCR found {len(ocr_regions)} text regions")
+
+    A4_WIDTH = 794
+    A4_HEIGHT = 1123
+    scale_x = A4_WIDTH / img_width
+    scale_y = A4_HEIGHT / img_height
+
+    fabric_objects = []
+
+    # 1. Add white background
+    fabric_objects.append({
+        "type": "rect", "left": 0, "top": 0,
+        "width": A4_WIDTH, "height": A4_HEIGHT,
+        "fill": "#ffffff", "selectable": False,
+        "evented": False, "isBackground": True
+    })
+
+    # 2. Group OCR regions by Y-coordinate to detect rows
+    sorted_regions = sorted(ocr_regions, key=lambda r: (r.y1, r.x1))
+
+    # 3. Detect headers (larger text at top)
+    headers = []
+    for region in sorted_regions:
+        height = region.y2 - region.y1
+        if region.y1 < img_height * 0.15 and height > 30:
+            headers.append(region)
+
+    # 4. Create text objects for each region
+    vocab_matched = 0
+
+    for region in sorted_regions:
+        left = int(region.x1 * scale_x)
+        top = int(region.y1 * scale_y)
+
+        is_header = region in headers
+
+        region_height = region.y2 - region.y1
+        base_font_size = max(10, min(32, int(region_height * scale_y * 0.8)))
+
+        if is_header:
+            base_font_size = max(base_font_size, 24)
+
+        is_vocab = False
+        vocab_match = None
+        for v in page_vocab:
+            if v.get("english", "").lower() in region.text.lower() or \
+               v.get("german", "").lower() in region.text.lower():
+                is_vocab = True
+                vocab_match = v
+                vocab_matched += 1
+                break
+
+        text_obj = {
+            "type": "i-text",
+            "id": f"text_{uuid.uuid4().hex[:8]}",
+            "left": left, "top": top,
+            "text": region.text,
+            "fontFamily": "Arial",
+            "fontSize": base_font_size,
+            "fontWeight": "bold" if is_header else "normal",
+            "fill": "#000000",
+            "originX": "left", "originY": "top",
+        }
+
+        if is_vocab and vocab_match:
+            text_obj["isVocabulary"] = True
+            text_obj["vocabularyId"] = vocab_match.get("id")
+            text_obj["english"] = vocab_match.get("english")
+            text_obj["german"] = vocab_match.get("german")
+
+        fabric_objects.append(text_obj)
+
+    # 5. If include_images, detect and extract image regions
+    if request.include_images:
+        image_regions = await _detect_image_regions(image_bytes, ocr_regions, img_width, img_height)
+
+        for i, img_region in enumerate(image_regions):
+            img_x1 = int(img_region["x1"])
+            img_y1 = int(img_region["y1"])
+            img_x2 = int(img_region["x2"])
+            img_y2 = int(img_region["y2"])
+
+            cropped = img.crop((img_x1, img_y1, img_x2, img_y2))
+
+            buffer = io.BytesIO()
+            cropped.save(buffer, format='PNG')
+            buffer.seek(0)
+            img_base64 = f"data:image/png;base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
+
+            fabric_objects.append({
+                "type": "image",
+                "id": f"img_{uuid.uuid4().hex[:8]}",
+                "left": int(img_x1 * scale_x),
+                "top": int(img_y1 * scale_y),
+                "width": int((img_x2 - img_x1) * scale_x),
+                "height": int((img_y2 - img_y1) * scale_y),
+                "src": img_base64,
+                "scaleX": 1, "scaleY": 1,
+            })
+
+    import json
+    canvas_data = {
+        "version": "6.0.0",
+        "objects": fabric_objects,
+        "background": "#ffffff"
+    }
+
+    return ReconstructResponse(
+        canvas_json=json.dumps(canvas_data),
+        page_width=A4_WIDTH,
+        page_height=A4_HEIGHT,
+        elements_count=len(fabric_objects),
+        vocabulary_matched=vocab_matched,
+        message=f"Reconstructed page {request.page_number} with {len(fabric_objects)} elements, "
+                f"{vocab_matched} vocabulary items matched"
+    )
+
+
+async def _detect_image_regions(
+    image_bytes: bytes,
+    ocr_regions: list,
+    img_width: int,
+    img_height: int
+) -> List[Dict]:
+    """
+    Detect image/graphic regions in the document.
+
+    Uses a simple approach:
+    1. Find large gaps between text regions (potential image areas)
+    2. Use edge detection to find bounded regions
+    3. Filter out text areas
+    """
+    from PIL import Image
+    import cv2
+
+    try:
+        img = Image.open(io.BytesIO(image_bytes))
+        img_array = np.array(img.convert('L'))
+
+        text_mask = np.ones_like(img_array, dtype=bool)
+        for region in ocr_regions:
+            x1 = max(0, region.x1 - 5)
+            y1 = max(0, region.y1 - 5)
+            x2 = min(img_width, region.x2 + 5)
+            y2 = min(img_height, region.y2 + 5)
+            text_mask[y1:y2, x1:x2] = False
+
+        image_regions = []
+
+        edges = cv2.Canny(img_array, 50, 150)
+        edges[~text_mask] = 0
+
+        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+        for contour in contours:
+            x, y, w, h = cv2.boundingRect(contour)
+
+            if w > 50 and h > 50:
+                if w < img_width * 0.9 and h < img_height * 0.9:
+                    region_content = img_array[y:y+h, x:x+w]
+                    variance = np.var(region_content)
+
+                    if variance > 500:
+                        image_regions.append({
+                            "x1": x, "y1": y,
+                            "x2": x + w, "y2": y + h
+                        })
+
+        filtered_regions = []
+        for region in sorted(image_regions, key=lambda r: (r["x2"]-r["x1"])*(r["y2"]-r["y1"]), reverse=True):
+            overlaps = False
+            for existing in filtered_regions:
+                if not (region["x2"] < existing["x1"] or region["x1"] > existing["x2"] or
+                        region["y2"] < existing["y1"] or region["y1"] > existing["y2"]):
+                    overlaps = True
+                    break
+            if not overlaps:
+                filtered_regions.append(region)
+
+        logger.info(f"Detected {len(filtered_regions)} image regions")
+        return filtered_regions[:10]
+
+    except Exception as e:
+        logger.warning(f"Image region detection failed: {e}")
+        return []
--- a/klausur-service/backend/worksheet/nru_generator.py
+++ b/klausur-service/backend/worksheet/nru_generator.py
@@ -0,0 +1,26 @@
+"""
+NRU Worksheet Generator — barrel re-export.
+
+All implementation split into:
+  nru_worksheet_models — data classes, entry separation
+  nru_worksheet_html   — HTML generation
+  nru_worksheet_pdf    — PDF generation
+
+Per scanned page, we generate 2 worksheet pages.
+"""
+
+# Models
+from .nru_models import (  # noqa: F401
+    VocabEntry,
+    SentenceEntry,
+    separate_vocab_and_sentences,
+)
+
+# HTML generation
+from .nru_html import (  # noqa: F401
+    generate_nru_html,
+    generate_nru_worksheet_html,
+)
+
+# PDF generation
+from .nru_pdf import generate_nru_pdf  # noqa: F401
--- a/klausur-service/backend/worksheet/nru_html.py
+++ b/klausur-service/backend/worksheet/nru_html.py
@@ -0,0 +1,466 @@
+"""
+NRU Worksheet HTML — HTML generation for vocabulary worksheets.
+
+Extracted from nru_worksheet_generator.py for modularity.
+"""
+
+import logging
+from typing import List, Dict
+
+from .nru_models import VocabEntry, SentenceEntry, separate_vocab_and_sentences
+
+logger = logging.getLogger(__name__)
+
+
+def generate_nru_html(
+    vocab_list: List[VocabEntry],
+    sentence_list: List[SentenceEntry],
+    page_number: int,
+    title: str = "Vokabeltest",
+    show_solutions: bool = False,
+    line_height_px: int = 28
+) -> str:
+    """
+    Generate HTML for NRU-format worksheet.
+
+    Returns HTML for 2 pages:
+    - Page 1: Vocabulary table (3 columns)
+    - Page 2: Sentence practice (full width)
+    """
+
+    # Filter by page
+    page_vocab = [v for v in vocab_list if v.source_page == page_number]
+    page_sentences = [s for s in sentence_list if s.source_page == page_number]
+
+    html = f"""<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <style>
+        @page {{
+            size: A4;
+            margin: 1.5cm 2cm;
+        }}
+        * {{
+            box-sizing: border-box;
+        }}
+        body {{
+            font-family: Arial, Helvetica, sans-serif;
+            font-size: 12pt;
+            line-height: 1.4;
+            margin: 0;
+            padding: 0;
+        }}
+        .page {{
+            page-break-after: always;
+            min-height: 100%;
+        }}
+        .page:last-child {{
+            page-break-after: avoid;
+        }}
+        h1 {{
+            font-size: 16pt;
+            margin: 0 0 8px 0;
+            text-align: center;
+        }}
+        .header {{
+            margin-bottom: 15px;
+        }}
+        .name-line {{
+            font-size: 11pt;
+            margin-bottom: 10px;
+        }}
+
+        /* Vocabulary Table - 3 columns */
+        .vocab-table {{
+            width: 100%;
+            border-collapse: collapse;
+            table-layout: fixed;
+        }}
+        .vocab-table th {{
+            background: #f0f0f0;
+            border: 1px solid #333;
+            padding: 6px 8px;
+            font-weight: bold;
+            font-size: 11pt;
+            text-align: left;
+        }}
+        .vocab-table td {{
+            border: 1px solid #333;
+            padding: 4px 8px;
+            height: {line_height_px}px;
+            vertical-align: middle;
+        }}
+        .vocab-table .col-english {{ width: 35%; }}
+        .vocab-table .col-german {{ width: 35%; }}
+        .vocab-table .col-correction {{ width: 30%; }}
+        .vocab-answer {{
+            color: #0066cc;
+            font-style: italic;
+        }}
+
+        /* Sentence Table - full width */
+        .sentence-table {{
+            width: 100%;
+            border-collapse: collapse;
+            margin-bottom: 15px;
+        }}
+        .sentence-table td {{
+            border: 1px solid #333;
+            padding: 6px 10px;
+        }}
+        .sentence-header {{
+            background: #f5f5f5;
+            font-weight: normal;
+            min-height: 30px;
+        }}
+        .sentence-line {{
+            height: {line_height_px + 4}px;
+        }}
+        .sentence-answer {{
+            color: #0066cc;
+            font-style: italic;
+            font-size: 11pt;
+        }}
+
+        .page-info {{
+            font-size: 9pt;
+            color: #666;
+            text-align: right;
+            margin-top: 10px;
+        }}
+    </style>
+</head>
+<body>
+"""
+
+    # ========== PAGE 1: VOCABULARY TABLE ==========
+    if page_vocab:
+        html += f"""
+    <div class="page">
+        <div class="header">
+            <h1>{title} - Vokabeln (Seite {page_number})</h1>
+            <div class="name-line">Name: _________________________ Datum: _____________</div>
+        </div>
+
+        <table class="vocab-table">
+            <thead>
+                <tr>
+                    <th class="col-english">Englisch</th>
+                    <th class="col-german">Deutsch</th>
+                    <th class="col-correction">Korrektur</th>
+                </tr>
+            </thead>
+            <tbody>
+"""
+        for v in page_vocab:
+            if show_solutions:
+                html += f"""
+                <tr>
+                    <td>{v.english}</td>
+                    <td class="vocab-answer">{v.german}</td>
+                    <td></td>
+                </tr>
+"""
+            else:
+                html += f"""
+                <tr>
+                    <td>{v.english}</td>
+                    <td></td>
+                    <td></td>
+                </tr>
+"""
+
+        html += """
+            </tbody>
+        </table>
+        <div class="page-info">Vokabeln aus Unit</div>
+    </div>
+"""
+
+    # ========== PAGE 2: SENTENCE PRACTICE ==========
+    if page_sentences:
+        html += f"""
+    <div class="page">
+        <div class="header">
+            <h1>{title} - Lernsaetze (Seite {page_number})</h1>
+            <div class="name-line">Name: _________________________ Datum: _____________</div>
+        </div>
+"""
+        for s in page_sentences:
+            html += f"""
+        <table class="sentence-table">
+            <tr>
+                <td class="sentence-header">{s.german}</td>
+            </tr>
+"""
+            if show_solutions:
+                html += f"""
+            <tr>
+                <td class="sentence-line sentence-answer">{s.english}</td>
+            </tr>
+            <tr>
+                <td class="sentence-line"></td>
+            </tr>
+"""
+            else:
+                html += """
+            <tr>
+                <td class="sentence-line"></td>
+            </tr>
+            <tr>
+                <td class="sentence-line"></td>
+            </tr>
+"""
+            html += """
+        </table>
+"""
+
+        html += """
+        <div class="page-info">Lernsaetze aus Unit</div>
+    </div>
+"""
+
+    html += """
+</body>
+</html>
+"""
+    return html
+
+
+def generate_nru_worksheet_html(
+    entries: List[Dict],
+    title: str = "Vokabeltest",
+    show_solutions: bool = False,
+    specific_pages: List[int] = None
+) -> str:
+    """
+    Generate complete NRU worksheet HTML for all pages.
+
+    Args:
+        entries: List of vocabulary entries with source_page
+        title: Worksheet title
+        show_solutions: Whether to show answers
+        specific_pages: List of specific page numbers to include (1-indexed)
+
+    Returns:
+        Complete HTML document
+    """
+    # Separate into vocab and sentences
+    vocab_list, sentence_list = separate_vocab_and_sentences(entries)
+
+    # Get unique page numbers
+    all_pages = set()
+    for v in vocab_list:
+        all_pages.add(v.source_page)
+    for s in sentence_list:
+        all_pages.add(s.source_page)
+
+    # Filter to specific pages if requested
+    if specific_pages:
+        all_pages = all_pages.intersection(set(specific_pages))
+
+    pages_sorted = sorted(all_pages)
+
+    logger.info(f"Generating NRU worksheet for pages {pages_sorted}")
+    logger.info(f"Total vocab: {len(vocab_list)}, Total sentences: {len(sentence_list)}")
+
+    # Generate HTML for each page
+    combined_html = """<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <style>
+        @page {
+            size: A4;
+            margin: 1.5cm 2cm;
+        }
+        * {
+            box-sizing: border-box;
+        }
+        body {
+            font-family: Arial, Helvetica, sans-serif;
+            font-size: 12pt;
+            line-height: 1.4;
+            margin: 0;
+            padding: 0;
+        }
+        .page {
+            page-break-after: always;
+            min-height: 100%;
+        }
+        .page:last-child {
+            page-break-after: avoid;
+        }
+        h1 {
+            font-size: 16pt;
+            margin: 0 0 8px 0;
+            text-align: center;
+        }
+        .header {
+            margin-bottom: 15px;
+        }
+        .name-line {
+            font-size: 11pt;
+            margin-bottom: 10px;
+        }
+
+        /* Vocabulary Table - 3 columns */
+        .vocab-table {
+            width: 100%;
+            border-collapse: collapse;
+            table-layout: fixed;
+        }
+        .vocab-table th {
+            background: #f0f0f0;
+            border: 1px solid #333;
+            padding: 6px 8px;
+            font-weight: bold;
+            font-size: 11pt;
+            text-align: left;
+        }
+        .vocab-table td {
+            border: 1px solid #333;
+            padding: 4px 8px;
+            height: 28px;
+            vertical-align: middle;
+        }
+        .vocab-table .col-english { width: 35%; }
+        .vocab-table .col-german { width: 35%; }
+        .vocab-table .col-correction { width: 30%; }
+        .vocab-answer {
+            color: #0066cc;
+            font-style: italic;
+        }
+
+        /* Sentence Table - full width */
+        .sentence-table {
+            width: 100%;
+            border-collapse: collapse;
+            margin-bottom: 15px;
+        }
+        .sentence-table td {
+            border: 1px solid #333;
+            padding: 6px 10px;
+        }
+        .sentence-header {
+            background: #f5f5f5;
+            font-weight: normal;
+            min-height: 30px;
+        }
+        .sentence-line {
+            height: 32px;
+        }
+        .sentence-answer {
+            color: #0066cc;
+            font-style: italic;
+            font-size: 11pt;
+        }
+
+        .page-info {
+            font-size: 9pt;
+            color: #666;
+            text-align: right;
+            margin-top: 10px;
+        }
+    </style>
+</head>
+<body>
+"""
+
+    for page_num in pages_sorted:
+        page_vocab = [v for v in vocab_list if v.source_page == page_num]
+        page_sentences = [s for s in sentence_list if s.source_page == page_num]
+
+        # PAGE 1: VOCABULARY TABLE
+        if page_vocab:
+            combined_html += f"""
+    <div class="page">
+        <div class="header">
+            <h1>{title} - Vokabeln (Seite {page_num})</h1>
+            <div class="name-line">Name: _________________________ Datum: _____________</div>
+        </div>
+
+        <table class="vocab-table">
+            <thead>
+                <tr>
+                    <th class="col-english">Englisch</th>
+                    <th class="col-german">Deutsch</th>
+                    <th class="col-correction">Korrektur</th>
+                </tr>
+            </thead>
+            <tbody>
+"""
+            for v in page_vocab:
+                if show_solutions:
+                    combined_html += f"""
+                <tr>
+                    <td>{v.english}</td>
+                    <td class="vocab-answer">{v.german}</td>
+                    <td></td>
+                </tr>
+"""
+                else:
+                    combined_html += f"""
+                <tr>
+                    <td>{v.english}</td>
+                    <td></td>
+                    <td></td>
+                </tr>
+"""
+
+            combined_html += f"""
+            </tbody>
+        </table>
+        <div class="page-info">{title} - Seite {page_num}</div>
+    </div>
+"""
+
+        # PAGE 2: SENTENCE PRACTICE
+        if page_sentences:
+            combined_html += f"""
+    <div class="page">
+        <div class="header">
+            <h1>{title} - Lernsaetze (Seite {page_num})</h1>
+            <div class="name-line">Name: _________________________ Datum: _____________</div>
+        </div>
+"""
+            for s in page_sentences:
+                combined_html += f"""
+        <table class="sentence-table">
+            <tr>
+                <td class="sentence-header">{s.german}</td>
+            </tr>
+"""
+                if show_solutions:
+                    combined_html += f"""
+            <tr>
+                <td class="sentence-line sentence-answer">{s.english}</td>
+            </tr>
+            <tr>
+                <td class="sentence-line"></td>
+            </tr>
+"""
+                else:
+                    combined_html += """
+            <tr>
+                <td class="sentence-line"></td>
+            </tr>
+            <tr>
+                <td class="sentence-line"></td>
+            </tr>
+"""
+                combined_html += """
+        </table>
+"""
+
+            combined_html += f"""
+        <div class="page-info">{title} - Seite {page_num}</div>
+    </div>
+"""
+
+    combined_html += """
+</body>
+</html>
+"""
+    return combined_html
--- a/klausur-service/backend/worksheet/nru_models.py
+++ b/klausur-service/backend/worksheet/nru_models.py
@@ -0,0 +1,70 @@
+"""
+NRU Worksheet Models — data classes and entry separation logic.
+
+Extracted from nru_worksheet_generator.py for modularity.
+"""
+
+import logging
+from typing import List, Dict, Tuple
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class VocabEntry:
+    english: str
+    german: str
+    source_page: int = 1
+
+
+@dataclass
+class SentenceEntry:
+    german: str
+    english: str  # For solution sheet
+    source_page: int = 1
+
+
+def separate_vocab_and_sentences(entries: List[Dict]) -> Tuple[List[VocabEntry], List[SentenceEntry]]:
+    """
+    Separate vocabulary entries into single words/phrases and full sentences.
+
+    Sentences are identified by:
+    - Ending with punctuation (. ! ?)
+    - Being longer than 40 characters
+    - Containing multiple words with capital letters mid-sentence
+    """
+    vocab_list = []
+    sentence_list = []
+
+    for entry in entries:
+        english = entry.get("english", "").strip()
+        german = entry.get("german", "").strip()
+        source_page = entry.get("source_page", 1)
+
+        if not english or not german:
+            continue
+
+        # Detect if this is a sentence
+        is_sentence = (
+            english.endswith('.') or
+            english.endswith('!') or
+            english.endswith('?') or
+            len(english) > 50 or
+            (len(english.split()) > 5 and any(w[0].isupper() for w in english.split()[1:] if w))
+        )
+
+        if is_sentence:
+            sentence_list.append(SentenceEntry(
+                german=german,
+                english=english,
+                source_page=source_page
+            ))
+        else:
+            vocab_list.append(VocabEntry(
+                english=english,
+                german=german,
+                source_page=source_page
+            ))
+
+    return vocab_list, sentence_list
--- a/klausur-service/backend/worksheet/nru_pdf.py
+++ b/klausur-service/backend/worksheet/nru_pdf.py
@@ -0,0 +1,31 @@
+"""
+NRU Worksheet PDF — PDF generation using weasyprint.
+
+Extracted from nru_worksheet_generator.py for modularity.
+"""
+
+from typing import List, Dict, Tuple
+
+from .nru_html import generate_nru_worksheet_html
+
+
+async def generate_nru_pdf(entries: List[Dict], title: str = "Vokabeltest", include_solutions: bool = True) -> Tuple[bytes, bytes]:
+    """
+    Generate NRU worksheet PDFs.
+
+    Returns:
+        Tuple of (worksheet_pdf_bytes, solution_pdf_bytes)
+    """
+    from weasyprint import HTML
+
+    # Generate worksheet HTML
+    worksheet_html = generate_nru_worksheet_html(entries, title, show_solutions=False)
+    worksheet_pdf = HTML(string=worksheet_html).write_pdf()
+
+    # Generate solution HTML
+    solution_pdf = None
+    if include_solutions:
+        solution_html = generate_nru_worksheet_html(entries, title, show_solutions=True)
+        solution_pdf = HTML(string=solution_html).write_pdf()
+
+    return worksheet_pdf, solution_pdf