Restructure: Move 52 files into 7 domain packages
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s
korrektur/ zeugnis/ admin/ compliance/ worksheet/ training/ metrics/ 52 shims, relative imports, RAG untouched. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
6
klausur-service/backend/worksheet/__init__.py
Normal file
6
klausur-service/backend/worksheet/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
worksheet package — worksheet editor, NRU generator, cleanup.
|
||||
|
||||
Backward-compatible re-exports: consumers can still use
|
||||
``from worksheet_editor_api import ...`` etc. via the shim files in backend/.
|
||||
"""
|
||||
491
klausur-service/backend/worksheet/cleanup_api.py
Normal file
491
klausur-service/backend/worksheet/cleanup_api.py
Normal file
@@ -0,0 +1,491 @@
|
||||
"""
|
||||
Worksheet Cleanup API - Handschrift-Entfernung und Layout-Rekonstruktion
|
||||
|
||||
Endpoints:
|
||||
- POST /api/v1/worksheet/detect-handwriting - Erkennt Handschrift und gibt Maske zurueck
|
||||
- POST /api/v1/worksheet/remove-handwriting - Entfernt Handschrift aus Bild
|
||||
- POST /api/v1/worksheet/reconstruct - Rekonstruiert Layout als Fabric.js JSON
|
||||
- POST /api/v1/worksheet/cleanup-pipeline - Vollstaendige Pipeline (Erkennung + Entfernung + Layout)
|
||||
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal auf dem Mac Mini.
|
||||
"""
|
||||
|
||||
import io
|
||||
import base64
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
|
||||
from fastapi.responses import StreamingResponse, JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.handwriting_detection import (
|
||||
detect_handwriting,
|
||||
detect_handwriting_regions,
|
||||
mask_to_png
|
||||
)
|
||||
from services.inpainting_service import (
|
||||
inpaint_image,
|
||||
remove_handwriting,
|
||||
InpaintingMethod,
|
||||
check_lama_available
|
||||
)
|
||||
from services.layout_reconstruction_service import (
|
||||
reconstruct_layout,
|
||||
layout_to_fabric_json,
|
||||
reconstruct_and_clean
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/worksheet", tags=["Worksheet Cleanup"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pydantic Models
|
||||
# =============================================================================
|
||||
|
||||
class DetectionResponse(BaseModel):
|
||||
has_handwriting: bool
|
||||
confidence: float
|
||||
handwriting_ratio: float
|
||||
detection_method: str
|
||||
mask_base64: Optional[str] = None
|
||||
|
||||
|
||||
class InpaintingResponse(BaseModel):
|
||||
success: bool
|
||||
method_used: str
|
||||
processing_time_ms: float
|
||||
image_base64: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class ReconstructionResponse(BaseModel):
|
||||
success: bool
|
||||
element_count: int
|
||||
page_width: int
|
||||
page_height: int
|
||||
fabric_json: dict
|
||||
table_count: int = 0
|
||||
|
||||
|
||||
class PipelineResponse(BaseModel):
|
||||
success: bool
|
||||
handwriting_detected: bool
|
||||
handwriting_removed: bool
|
||||
layout_reconstructed: bool
|
||||
cleaned_image_base64: Optional[str] = None
|
||||
fabric_json: Optional[dict] = None
|
||||
metadata: dict = {}
|
||||
|
||||
|
||||
class CapabilitiesResponse(BaseModel):
|
||||
opencv_available: bool = True
|
||||
lama_available: bool = False
|
||||
paddleocr_available: bool = False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/capabilities")
|
||||
async def get_capabilities() -> CapabilitiesResponse:
|
||||
"""
|
||||
Get available cleanup capabilities on this server.
|
||||
"""
|
||||
# Check PaddleOCR
|
||||
paddleocr_available = False
|
||||
try:
|
||||
from hybrid_vocab_extractor import get_paddle_ocr
|
||||
ocr = get_paddle_ocr()
|
||||
paddleocr_available = ocr is not None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return CapabilitiesResponse(
|
||||
opencv_available=True,
|
||||
lama_available=check_lama_available(),
|
||||
paddleocr_available=paddleocr_available
|
||||
)
|
||||
|
||||
|
||||
@router.post("/detect-handwriting")
|
||||
async def detect_handwriting_endpoint(
|
||||
image: UploadFile = File(...),
|
||||
return_mask: bool = Form(default=True),
|
||||
min_confidence: float = Form(default=0.3)
|
||||
) -> DetectionResponse:
|
||||
"""
|
||||
Detect handwriting in an image.
|
||||
|
||||
Args:
|
||||
image: Input image (PNG, JPG)
|
||||
return_mask: Whether to return the binary mask as base64
|
||||
min_confidence: Minimum confidence threshold
|
||||
|
||||
Returns:
|
||||
DetectionResponse with detection results and optional mask
|
||||
"""
|
||||
logger.info(f"Handwriting detection request: {image.filename}")
|
||||
|
||||
# Validate file type
|
||||
content_type = image.content_type or ""
|
||||
if not content_type.startswith("image/"):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Only image files (PNG, JPG) are supported"
|
||||
)
|
||||
|
||||
try:
|
||||
image_bytes = await image.read()
|
||||
|
||||
# Detect handwriting
|
||||
result = detect_handwriting(image_bytes)
|
||||
|
||||
has_handwriting = (
|
||||
result.confidence >= min_confidence and
|
||||
result.handwriting_ratio > 0.005
|
||||
)
|
||||
|
||||
response = DetectionResponse(
|
||||
has_handwriting=has_handwriting,
|
||||
confidence=result.confidence,
|
||||
handwriting_ratio=result.handwriting_ratio,
|
||||
detection_method=result.detection_method
|
||||
)
|
||||
|
||||
if return_mask:
|
||||
mask_bytes = mask_to_png(result.mask)
|
||||
response.mask_base64 = base64.b64encode(mask_bytes).decode('utf-8')
|
||||
|
||||
logger.info(f"Detection complete: handwriting={has_handwriting}, "
|
||||
f"confidence={result.confidence:.2f}")
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Handwriting detection failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/detect-handwriting/mask")
|
||||
async def get_handwriting_mask(
|
||||
image: UploadFile = File(...)
|
||||
) -> StreamingResponse:
|
||||
"""
|
||||
Get handwriting detection mask as PNG image.
|
||||
|
||||
Returns binary mask where white (255) = handwriting.
|
||||
"""
|
||||
content_type = image.content_type or ""
|
||||
if not content_type.startswith("image/"):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Only image files are supported"
|
||||
)
|
||||
|
||||
try:
|
||||
image_bytes = await image.read()
|
||||
result = detect_handwriting(image_bytes)
|
||||
mask_bytes = mask_to_png(result.mask)
|
||||
|
||||
return StreamingResponse(
|
||||
io.BytesIO(mask_bytes),
|
||||
media_type="image/png",
|
||||
headers={
|
||||
"Content-Disposition": "attachment; filename=handwriting_mask.png"
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Mask generation failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/remove-handwriting")
|
||||
async def remove_handwriting_endpoint(
|
||||
image: UploadFile = File(...),
|
||||
mask: Optional[UploadFile] = File(default=None),
|
||||
method: str = Form(default="auto"),
|
||||
return_base64: bool = Form(default=False)
|
||||
):
|
||||
"""
|
||||
Remove handwriting from an image.
|
||||
|
||||
Args:
|
||||
image: Input image with handwriting
|
||||
mask: Optional pre-computed mask (if not provided, auto-detected)
|
||||
method: Inpainting method (auto, opencv_telea, opencv_ns, lama)
|
||||
return_base64: If True, return image as base64, else as file
|
||||
|
||||
Returns:
|
||||
Cleaned image (as PNG file or base64 in JSON)
|
||||
"""
|
||||
logger.info(f"Remove handwriting request: {image.filename}, method={method}")
|
||||
|
||||
content_type = image.content_type or ""
|
||||
if not content_type.startswith("image/"):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Only image files are supported"
|
||||
)
|
||||
|
||||
try:
|
||||
image_bytes = await image.read()
|
||||
|
||||
# Get mask if provided
|
||||
mask_array = None
|
||||
if mask is not None:
|
||||
mask_bytes = await mask.read()
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
mask_img = Image.open(io.BytesIO(mask_bytes))
|
||||
mask_array = np.array(mask_img)
|
||||
|
||||
# Select inpainting method
|
||||
inpainting_method = InpaintingMethod.AUTO
|
||||
if method == "opencv_telea":
|
||||
inpainting_method = InpaintingMethod.OPENCV_TELEA
|
||||
elif method == "opencv_ns":
|
||||
inpainting_method = InpaintingMethod.OPENCV_NS
|
||||
elif method == "lama":
|
||||
inpainting_method = InpaintingMethod.LAMA
|
||||
|
||||
# Remove handwriting
|
||||
cleaned_bytes, metadata = remove_handwriting(
|
||||
image_bytes,
|
||||
mask=mask_array,
|
||||
method=inpainting_method
|
||||
)
|
||||
|
||||
if return_base64:
|
||||
return JSONResponse({
|
||||
"success": True,
|
||||
"image_base64": base64.b64encode(cleaned_bytes).decode('utf-8'),
|
||||
"metadata": metadata
|
||||
})
|
||||
else:
|
||||
return StreamingResponse(
|
||||
io.BytesIO(cleaned_bytes),
|
||||
media_type="image/png",
|
||||
headers={
|
||||
"Content-Disposition": "attachment; filename=cleaned.png",
|
||||
"X-Method-Used": metadata.get("method_used", "unknown"),
|
||||
"X-Processing-Time-Ms": str(metadata.get("processing_time_ms", 0))
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Handwriting removal failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/reconstruct")
|
||||
async def reconstruct_layout_endpoint(
|
||||
image: UploadFile = File(...),
|
||||
clean_handwriting: bool = Form(default=True),
|
||||
detect_tables: bool = Form(default=True)
|
||||
) -> ReconstructionResponse:
|
||||
"""
|
||||
Reconstruct worksheet layout and generate Fabric.js JSON.
|
||||
|
||||
Args:
|
||||
image: Input image (can contain handwriting)
|
||||
clean_handwriting: Whether to remove handwriting first
|
||||
detect_tables: Whether to detect table structures
|
||||
|
||||
Returns:
|
||||
ReconstructionResponse with Fabric.js JSON
|
||||
"""
|
||||
logger.info(f"Layout reconstruction request: {image.filename}")
|
||||
|
||||
content_type = image.content_type or ""
|
||||
if not content_type.startswith("image/"):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Only image files are supported"
|
||||
)
|
||||
|
||||
try:
|
||||
image_bytes = await image.read()
|
||||
|
||||
# Run reconstruction pipeline
|
||||
if clean_handwriting:
|
||||
cleaned_bytes, layout = reconstruct_and_clean(image_bytes)
|
||||
else:
|
||||
layout = reconstruct_layout(image_bytes, detect_tables=detect_tables)
|
||||
|
||||
return ReconstructionResponse(
|
||||
success=True,
|
||||
element_count=len(layout.elements),
|
||||
page_width=layout.page_width,
|
||||
page_height=layout.page_height,
|
||||
fabric_json=layout.fabric_json,
|
||||
table_count=len(layout.table_regions)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Layout reconstruction failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/cleanup-pipeline")
|
||||
async def full_cleanup_pipeline(
|
||||
image: UploadFile = File(...),
|
||||
remove_hw: bool = Form(default=True, alias="remove_handwriting"),
|
||||
reconstruct: bool = Form(default=True),
|
||||
inpainting_method: str = Form(default="auto")
|
||||
) -> PipelineResponse:
|
||||
"""
|
||||
Full cleanup pipeline: detect, remove handwriting, reconstruct layout.
|
||||
|
||||
This is the recommended endpoint for processing filled worksheets.
|
||||
|
||||
Args:
|
||||
image: Input image (scan/photo of filled worksheet)
|
||||
remove_handwriting: Whether to remove detected handwriting
|
||||
reconstruct: Whether to reconstruct layout as Fabric.js JSON
|
||||
inpainting_method: Method for inpainting (auto, opencv_telea, opencv_ns, lama)
|
||||
|
||||
Returns:
|
||||
PipelineResponse with cleaned image and Fabric.js JSON
|
||||
"""
|
||||
logger.info(f"Full cleanup pipeline: {image.filename}")
|
||||
|
||||
content_type = image.content_type or ""
|
||||
if not content_type.startswith("image/"):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Only image files are supported"
|
||||
)
|
||||
|
||||
try:
|
||||
image_bytes = await image.read()
|
||||
metadata = {}
|
||||
|
||||
# Step 1: Detect handwriting
|
||||
detection = detect_handwriting(image_bytes)
|
||||
handwriting_detected = (
|
||||
detection.confidence >= 0.3 and
|
||||
detection.handwriting_ratio > 0.005
|
||||
)
|
||||
|
||||
metadata["detection"] = {
|
||||
"confidence": detection.confidence,
|
||||
"handwriting_ratio": detection.handwriting_ratio,
|
||||
"method": detection.detection_method
|
||||
}
|
||||
|
||||
# Step 2: Remove handwriting if requested and detected
|
||||
cleaned_bytes = image_bytes
|
||||
handwriting_removed = False
|
||||
|
||||
if remove_hw and handwriting_detected:
|
||||
method = InpaintingMethod.AUTO
|
||||
if inpainting_method == "opencv_telea":
|
||||
method = InpaintingMethod.OPENCV_TELEA
|
||||
elif inpainting_method == "opencv_ns":
|
||||
method = InpaintingMethod.OPENCV_NS
|
||||
elif inpainting_method == "lama":
|
||||
method = InpaintingMethod.LAMA
|
||||
|
||||
cleaned_bytes, inpaint_metadata = remove_handwriting(
|
||||
image_bytes,
|
||||
mask=detection.mask,
|
||||
method=method
|
||||
)
|
||||
handwriting_removed = inpaint_metadata.get("inpainting_performed", False)
|
||||
metadata["inpainting"] = inpaint_metadata
|
||||
|
||||
# Step 3: Reconstruct layout if requested
|
||||
fabric_json = None
|
||||
layout_reconstructed = False
|
||||
|
||||
if reconstruct:
|
||||
layout = reconstruct_layout(cleaned_bytes)
|
||||
fabric_json = layout.fabric_json
|
||||
layout_reconstructed = len(layout.elements) > 0
|
||||
metadata["layout"] = {
|
||||
"element_count": len(layout.elements),
|
||||
"table_count": len(layout.table_regions),
|
||||
"page_width": layout.page_width,
|
||||
"page_height": layout.page_height
|
||||
}
|
||||
|
||||
# Encode cleaned image as base64
|
||||
cleaned_base64 = base64.b64encode(cleaned_bytes).decode('utf-8')
|
||||
|
||||
logger.info(f"Pipeline complete: detected={handwriting_detected}, "
|
||||
f"removed={handwriting_removed}, layout={layout_reconstructed}")
|
||||
|
||||
return PipelineResponse(
|
||||
success=True,
|
||||
handwriting_detected=handwriting_detected,
|
||||
handwriting_removed=handwriting_removed,
|
||||
layout_reconstructed=layout_reconstructed,
|
||||
cleaned_image_base64=cleaned_base64,
|
||||
fabric_json=fabric_json,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Cleanup pipeline failed: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/preview-cleanup")
|
||||
async def preview_cleanup(
|
||||
image: UploadFile = File(...)
|
||||
) -> JSONResponse:
|
||||
"""
|
||||
Quick preview of cleanup results without full processing.
|
||||
|
||||
Returns detection results and estimated processing time.
|
||||
"""
|
||||
content_type = image.content_type or ""
|
||||
if not content_type.startswith("image/"):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Only image files are supported"
|
||||
)
|
||||
|
||||
try:
|
||||
image_bytes = await image.read()
|
||||
|
||||
# Quick detection only
|
||||
result = detect_handwriting_regions(image_bytes)
|
||||
|
||||
# Estimate processing time based on image size
|
||||
from PIL import Image
|
||||
img = Image.open(io.BytesIO(image_bytes))
|
||||
pixel_count = img.width * img.height
|
||||
|
||||
# Rough estimates
|
||||
est_detection_ms = 100 + (pixel_count / 1000000) * 200
|
||||
est_inpainting_ms = 500 + (pixel_count / 1000000) * 1000
|
||||
est_reconstruction_ms = 200 + (pixel_count / 1000000) * 300
|
||||
|
||||
return JSONResponse({
|
||||
"has_handwriting": result["has_handwriting"],
|
||||
"confidence": result["confidence"],
|
||||
"handwriting_ratio": result["handwriting_ratio"],
|
||||
"image_width": img.width,
|
||||
"image_height": img.height,
|
||||
"estimated_times_ms": {
|
||||
"detection": est_detection_ms,
|
||||
"inpainting": est_inpainting_ms if result["has_handwriting"] else 0,
|
||||
"reconstruction": est_reconstruction_ms,
|
||||
"total": est_detection_ms + (est_inpainting_ms if result["has_handwriting"] else 0) + est_reconstruction_ms
|
||||
},
|
||||
"capabilities": {
|
||||
"lama_available": check_lama_available()
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Preview failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
485
klausur-service/backend/worksheet/editor_ai.py
Normal file
485
klausur-service/backend/worksheet/editor_ai.py
Normal file
@@ -0,0 +1,485 @@
|
||||
"""
|
||||
Worksheet Editor AI — AI image generation and AI worksheet modification.
|
||||
"""
|
||||
|
||||
import io
|
||||
import json
|
||||
import base64
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
import random
|
||||
from typing import List, Dict
|
||||
|
||||
import httpx
|
||||
|
||||
from .editor_models import (
|
||||
AIImageRequest,
|
||||
AIImageResponse,
|
||||
AIImageStyle,
|
||||
AIModifyRequest,
|
||||
AIModifyResponse,
|
||||
OLLAMA_URL,
|
||||
STYLE_PROMPTS,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================
|
||||
# AI IMAGE GENERATION
|
||||
# =============================================
|
||||
|
||||
async def generate_ai_image_logic(request: AIImageRequest) -> AIImageResponse:
|
||||
"""
|
||||
Generate an AI image using Ollama with a text-to-image model.
|
||||
|
||||
Falls back to a placeholder if Ollama is not available.
|
||||
"""
|
||||
from fastapi import HTTPException
|
||||
|
||||
try:
|
||||
# Build enhanced prompt with style
|
||||
style_modifier = STYLE_PROMPTS.get(request.style, "")
|
||||
enhanced_prompt = f"{request.prompt}, {style_modifier}"
|
||||
|
||||
logger.info(f"Generating AI image: {enhanced_prompt[:100]}...")
|
||||
|
||||
# Check if Ollama is available
|
||||
async with httpx.AsyncClient(timeout=10.0) as check_client:
|
||||
try:
|
||||
health_response = await check_client.get(f"{OLLAMA_URL}/api/tags")
|
||||
if health_response.status_code != 200:
|
||||
raise HTTPException(status_code=503, detail="Ollama service not available")
|
||||
except httpx.ConnectError:
|
||||
logger.warning("Ollama not reachable, returning placeholder")
|
||||
return _generate_placeholder_image(request, enhanced_prompt)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||
tags_response = await client.get(f"{OLLAMA_URL}/api/tags")
|
||||
available_models = [m.get("name", "") for m in tags_response.json().get("models", [])]
|
||||
|
||||
sd_model = None
|
||||
for model in available_models:
|
||||
if "stable" in model.lower() or "sd" in model.lower() or "diffusion" in model.lower():
|
||||
sd_model = model
|
||||
break
|
||||
|
||||
if not sd_model:
|
||||
logger.warning("No Stable Diffusion model found in Ollama")
|
||||
return _generate_placeholder_image(request, enhanced_prompt)
|
||||
|
||||
logger.info(f"SD model found: {sd_model}, but image generation API not implemented")
|
||||
return _generate_placeholder_image(request, enhanced_prompt)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Image generation failed: {e}")
|
||||
return _generate_placeholder_image(request, enhanced_prompt)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"AI image generation error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
def _generate_placeholder_image(request: AIImageRequest, prompt: str) -> AIImageResponse:
|
||||
"""
|
||||
Generate a placeholder image when AI generation is not available.
|
||||
Creates a simple SVG-based placeholder with the prompt text.
|
||||
"""
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
width, height = request.width, request.height
|
||||
|
||||
style_colors = {
|
||||
AIImageStyle.REALISTIC: ("#2563eb", "#dbeafe"),
|
||||
AIImageStyle.CARTOON: ("#f97316", "#ffedd5"),
|
||||
AIImageStyle.SKETCH: ("#6b7280", "#f3f4f6"),
|
||||
AIImageStyle.CLIPART: ("#8b5cf6", "#ede9fe"),
|
||||
AIImageStyle.EDUCATIONAL: ("#059669", "#d1fae5"),
|
||||
}
|
||||
|
||||
fg_color, bg_color = style_colors.get(request.style, ("#6366f1", "#e0e7ff"))
|
||||
|
||||
img = Image.new('RGB', (width, height), bg_color)
|
||||
draw = ImageDraw.Draw(img)
|
||||
|
||||
draw.rectangle([5, 5, width-6, height-6], outline=fg_color, width=3)
|
||||
|
||||
cx, cy = width // 2, height // 2 - 30
|
||||
draw.ellipse([cx-40, cy-40, cx+40, cy+40], outline=fg_color, width=3)
|
||||
draw.line([cx-20, cy-10, cx+20, cy-10], fill=fg_color, width=3)
|
||||
draw.line([cx, cy-10, cx, cy+20], fill=fg_color, width=3)
|
||||
|
||||
try:
|
||||
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
|
||||
except Exception:
|
||||
font = ImageFont.load_default()
|
||||
|
||||
max_chars = 40
|
||||
lines = []
|
||||
words = prompt[:200].split()
|
||||
current_line = ""
|
||||
for word in words:
|
||||
if len(current_line) + len(word) + 1 <= max_chars:
|
||||
current_line += (" " + word if current_line else word)
|
||||
else:
|
||||
if current_line:
|
||||
lines.append(current_line)
|
||||
current_line = word
|
||||
if current_line:
|
||||
lines.append(current_line)
|
||||
|
||||
text_y = cy + 60
|
||||
for line in lines[:4]:
|
||||
bbox = draw.textbbox((0, 0), line, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
draw.text((cx - text_width // 2, text_y), line, fill=fg_color, font=font)
|
||||
text_y += 20
|
||||
|
||||
badge_text = "KI-Bild (Platzhalter)"
|
||||
try:
|
||||
badge_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 10)
|
||||
except Exception:
|
||||
badge_font = font
|
||||
draw.rectangle([10, height-30, 150, height-10], fill=fg_color)
|
||||
draw.text((15, height-27), badge_text, fill="white", font=badge_font)
|
||||
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='PNG')
|
||||
buffer.seek(0)
|
||||
|
||||
image_base64 = f"data:image/png;base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
|
||||
|
||||
return AIImageResponse(
|
||||
image_base64=image_base64,
|
||||
prompt_used=prompt,
|
||||
error="AI image generation not available. Using placeholder."
|
||||
)
|
||||
|
||||
|
||||
# =============================================
|
||||
# AI WORKSHEET MODIFICATION
|
||||
# =============================================
|
||||
|
||||
async def modify_worksheet_with_ai_logic(request: AIModifyRequest) -> AIModifyResponse:
|
||||
"""
|
||||
Modify a worksheet using AI based on natural language prompt.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"AI modify request: {request.prompt[:100]}...")
|
||||
|
||||
try:
|
||||
canvas_data = json.loads(request.canvas_json)
|
||||
except json.JSONDecodeError:
|
||||
return AIModifyResponse(
|
||||
message="Fehler beim Parsen des Canvas",
|
||||
error="Invalid canvas JSON"
|
||||
)
|
||||
|
||||
system_prompt = """Du bist ein Assistent fuer die Bearbeitung von Arbeitsblaettern.
|
||||
Du erhaeltst den aktuellen Zustand eines Canvas im JSON-Format und eine Anweisung des Nutzers.
|
||||
Deine Aufgabe ist es, die gewuenschten Aenderungen am Canvas vorzunehmen.
|
||||
|
||||
Der Canvas verwendet Fabric.js. Hier sind die wichtigsten Objekttypen:
|
||||
- i-text: Interaktiver Text mit fontFamily, fontSize, fill, left, top
|
||||
- rect: Rechteck mit left, top, width, height, fill, stroke, strokeWidth
|
||||
- circle: Kreis mit left, top, radius, fill, stroke, strokeWidth
|
||||
- line: Linie mit x1, y1, x2, y2, stroke, strokeWidth
|
||||
|
||||
Das Canvas ist 794x1123 Pixel (A4 bei 96 DPI).
|
||||
|
||||
Antworte NUR mit einem JSON-Objekt in diesem Format:
|
||||
{
|
||||
"action": "modify" oder "add" oder "delete" oder "info",
|
||||
"objects": [...], // Neue/modifizierte Objekte (bei modify/add)
|
||||
"message": "Kurze Beschreibung der Aenderung"
|
||||
}
|
||||
|
||||
Wenn du Objekte hinzufuegst, generiere eindeutige IDs im Format "obj_<timestamp>_<random>".
|
||||
"""
|
||||
|
||||
user_prompt = f"""Aktueller Canvas-Zustand:
|
||||
```json
|
||||
{json.dumps(canvas_data, indent=2)[:5000]}
|
||||
```
|
||||
|
||||
Nutzer-Anweisung: {request.prompt}
|
||||
|
||||
Fuehre die Aenderung durch und antworte mit dem JSON-Objekt."""
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
response = await client.post(
|
||||
f"{OLLAMA_URL}/api/generate",
|
||||
json={
|
||||
"model": request.model,
|
||||
"prompt": user_prompt,
|
||||
"system": system_prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": 0.3,
|
||||
"num_predict": 4096
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.warning(f"Ollama error: {response.status_code}, trying local fallback")
|
||||
return _handle_simple_modification(request.prompt, canvas_data)
|
||||
|
||||
ai_response = response.json().get("response", "")
|
||||
|
||||
except httpx.ConnectError:
|
||||
logger.warning("Ollama not reachable")
|
||||
return _handle_simple_modification(request.prompt, canvas_data)
|
||||
except httpx.TimeoutException:
|
||||
logger.warning("Ollama timeout, trying local fallback")
|
||||
return _handle_simple_modification(request.prompt, canvas_data)
|
||||
|
||||
try:
|
||||
json_start = ai_response.find('{')
|
||||
json_end = ai_response.rfind('}') + 1
|
||||
|
||||
if json_start == -1 or json_end <= json_start:
|
||||
logger.warning(f"No JSON found in AI response: {ai_response[:200]}")
|
||||
return AIModifyResponse(
|
||||
message="KI konnte die Anfrage nicht verarbeiten",
|
||||
error="No JSON in response"
|
||||
)
|
||||
|
||||
ai_json = json.loads(ai_response[json_start:json_end])
|
||||
action = ai_json.get("action", "info")
|
||||
message = ai_json.get("message", "Aenderungen angewendet")
|
||||
new_objects = ai_json.get("objects", [])
|
||||
|
||||
if action == "info":
|
||||
return AIModifyResponse(message=message)
|
||||
|
||||
if action == "add" and new_objects:
|
||||
existing_objects = canvas_data.get("objects", [])
|
||||
existing_objects.extend(new_objects)
|
||||
canvas_data["objects"] = existing_objects
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message=message
|
||||
)
|
||||
|
||||
if action == "modify" and new_objects:
|
||||
existing_objects = canvas_data.get("objects", [])
|
||||
new_ids = {obj.get("id") for obj in new_objects if obj.get("id")}
|
||||
kept_objects = [obj for obj in existing_objects if obj.get("id") not in new_ids]
|
||||
kept_objects.extend(new_objects)
|
||||
canvas_data["objects"] = kept_objects
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message=message
|
||||
)
|
||||
|
||||
if action == "delete":
|
||||
delete_ids = ai_json.get("delete_ids", [])
|
||||
if delete_ids:
|
||||
existing_objects = canvas_data.get("objects", [])
|
||||
canvas_data["objects"] = [obj for obj in existing_objects if obj.get("id") not in delete_ids]
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message=message
|
||||
)
|
||||
|
||||
return AIModifyResponse(message=message)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse AI JSON: {e}")
|
||||
return AIModifyResponse(
|
||||
message="Fehler beim Verarbeiten der KI-Antwort",
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AI modify error: {e}")
|
||||
return AIModifyResponse(
|
||||
message="Ein unerwarteter Fehler ist aufgetreten",
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
|
||||
def _handle_simple_modification(prompt: str, canvas_data: dict) -> AIModifyResponse:
|
||||
"""
|
||||
Handle simple modifications locally when Ollama is not available.
|
||||
Supports basic commands like adding headings, lines, etc.
|
||||
"""
|
||||
prompt_lower = prompt.lower()
|
||||
objects = canvas_data.get("objects", [])
|
||||
|
||||
def generate_id():
|
||||
return f"obj_{int(time.time()*1000)}_{random.randint(1000, 9999)}"
|
||||
|
||||
# Add heading
|
||||
if "ueberschrift" in prompt_lower or "titel" in prompt_lower or "heading" in prompt_lower:
|
||||
text_match = re.search(r'"([^"]+)"', prompt)
|
||||
text = text_match.group(1) if text_match else "Ueberschrift"
|
||||
|
||||
new_text = {
|
||||
"type": "i-text", "id": generate_id(), "text": text,
|
||||
"left": 397, "top": 50, "originX": "center",
|
||||
"fontFamily": "Arial", "fontSize": 28, "fontWeight": "bold", "fill": "#000000"
|
||||
}
|
||||
objects.append(new_text)
|
||||
canvas_data["objects"] = objects
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message=f"Ueberschrift '{text}' hinzugefuegt"
|
||||
)
|
||||
|
||||
# Add lines for writing
|
||||
if "linie" in prompt_lower or "line" in prompt_lower or "schreib" in prompt_lower:
|
||||
num_match = re.search(r'(\d+)', prompt)
|
||||
num_lines = int(num_match.group(1)) if num_match else 5
|
||||
num_lines = min(num_lines, 20)
|
||||
|
||||
start_y = 150
|
||||
line_spacing = 40
|
||||
|
||||
for i in range(num_lines):
|
||||
new_line = {
|
||||
"type": "line", "id": generate_id(),
|
||||
"x1": 60, "y1": start_y + i * line_spacing,
|
||||
"x2": 734, "y2": start_y + i * line_spacing,
|
||||
"stroke": "#cccccc", "strokeWidth": 1
|
||||
}
|
||||
objects.append(new_line)
|
||||
|
||||
canvas_data["objects"] = objects
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message=f"{num_lines} Schreiblinien hinzugefuegt"
|
||||
)
|
||||
|
||||
# Make text bigger
|
||||
if "groesser" in prompt_lower or "bigger" in prompt_lower or "larger" in prompt_lower:
|
||||
modified = 0
|
||||
for obj in objects:
|
||||
if obj.get("type") in ["i-text", "text", "textbox"]:
|
||||
current_size = obj.get("fontSize", 16)
|
||||
obj["fontSize"] = int(current_size * 1.25)
|
||||
modified += 1
|
||||
|
||||
canvas_data["objects"] = objects
|
||||
if modified > 0:
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message=f"{modified} Texte vergroessert"
|
||||
)
|
||||
|
||||
# Center elements
|
||||
if "zentrier" in prompt_lower or "center" in prompt_lower or "mitte" in prompt_lower:
|
||||
center_x = 397
|
||||
for obj in objects:
|
||||
if not obj.get("isGrid"):
|
||||
obj["left"] = center_x
|
||||
obj["originX"] = "center"
|
||||
|
||||
canvas_data["objects"] = objects
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message="Elemente zentriert"
|
||||
)
|
||||
|
||||
# Add numbering
|
||||
if "nummer" in prompt_lower or "nummerier" in prompt_lower or "1-10" in prompt_lower:
|
||||
range_match = re.search(r'(\d+)\s*[-bis]+\s*(\d+)', prompt)
|
||||
if range_match:
|
||||
start, end = int(range_match.group(1)), int(range_match.group(2))
|
||||
else:
|
||||
start, end = 1, 10
|
||||
|
||||
y = 100
|
||||
for i in range(start, min(end + 1, start + 20)):
|
||||
new_text = {
|
||||
"type": "i-text", "id": generate_id(), "text": f"{i}.",
|
||||
"left": 40, "top": y, "fontFamily": "Arial", "fontSize": 14, "fill": "#000000"
|
||||
}
|
||||
objects.append(new_text)
|
||||
y += 35
|
||||
|
||||
canvas_data["objects"] = objects
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message=f"Nummerierung {start}-{end} hinzugefuegt"
|
||||
)
|
||||
|
||||
# Add rectangle/box
|
||||
if "rechteck" in prompt_lower or "box" in prompt_lower or "kasten" in prompt_lower:
|
||||
new_rect = {
|
||||
"type": "rect", "id": generate_id(),
|
||||
"left": 100, "top": 200, "width": 200, "height": 100,
|
||||
"fill": "transparent", "stroke": "#000000", "strokeWidth": 2
|
||||
}
|
||||
objects.append(new_rect)
|
||||
canvas_data["objects"] = objects
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message="Rechteck hinzugefuegt"
|
||||
)
|
||||
|
||||
# Add grid/raster
|
||||
if "raster" in prompt_lower or "grid" in prompt_lower or "tabelle" in prompt_lower:
|
||||
dim_match = re.search(r'(\d+)\s*[x/\u00d7\*mal by]\s*(\d+)', prompt_lower)
|
||||
if dim_match:
|
||||
cols = int(dim_match.group(1))
|
||||
rows = int(dim_match.group(2))
|
||||
else:
|
||||
nums = re.findall(r'(\d+)', prompt)
|
||||
if len(nums) >= 2:
|
||||
cols, rows = int(nums[0]), int(nums[1])
|
||||
else:
|
||||
cols, rows = 3, 4
|
||||
|
||||
cols = min(max(1, cols), 10)
|
||||
rows = min(max(1, rows), 15)
|
||||
|
||||
canvas_width = 794
|
||||
canvas_height = 1123
|
||||
margin = 60
|
||||
available_width = canvas_width - 2 * margin
|
||||
available_height = canvas_height - 2 * margin - 80
|
||||
|
||||
cell_width = available_width / cols
|
||||
cell_height = min(available_height / rows, 80)
|
||||
|
||||
start_x = margin
|
||||
start_y = 120
|
||||
|
||||
grid_objects = []
|
||||
for r in range(rows + 1):
|
||||
y = start_y + r * cell_height
|
||||
grid_objects.append({
|
||||
"type": "line", "id": generate_id(),
|
||||
"x1": start_x, "y1": y,
|
||||
"x2": start_x + cols * cell_width, "y2": y,
|
||||
"stroke": "#666666", "strokeWidth": 1, "isGrid": True
|
||||
})
|
||||
|
||||
for c in range(cols + 1):
|
||||
x = start_x + c * cell_width
|
||||
grid_objects.append({
|
||||
"type": "line", "id": generate_id(),
|
||||
"x1": x, "y1": start_y,
|
||||
"x2": x, "y2": start_y + rows * cell_height,
|
||||
"stroke": "#666666", "strokeWidth": 1, "isGrid": True
|
||||
})
|
||||
|
||||
objects.extend(grid_objects)
|
||||
canvas_data["objects"] = objects
|
||||
return AIModifyResponse(
|
||||
modified_canvas_json=json.dumps(canvas_data),
|
||||
message=f"{cols}x{rows} Raster hinzugefuegt ({cols} Spalten, {rows} Zeilen)"
|
||||
)
|
||||
|
||||
# Default: Ollama needed
|
||||
return AIModifyResponse(
|
||||
message="Diese Aenderung erfordert den KI-Service. Bitte stellen Sie sicher, dass Ollama laeuft.",
|
||||
error="Complex modification requires Ollama"
|
||||
)
|
||||
388
klausur-service/backend/worksheet/editor_api.py
Normal file
388
klausur-service/backend/worksheet/editor_api.py
Normal file
@@ -0,0 +1,388 @@
|
||||
"""
|
||||
Worksheet Editor API - Backend Endpoints for Visual Worksheet Editor
|
||||
|
||||
Provides endpoints for:
|
||||
- AI Image generation via Ollama/Stable Diffusion
|
||||
- Worksheet Save/Load
|
||||
- PDF Export
|
||||
|
||||
Split modules:
|
||||
- worksheet_editor_models: Enums, Pydantic models, configuration
|
||||
- worksheet_editor_ai: AI image generation and AI worksheet modification
|
||||
- worksheet_editor_reconstruct: Document reconstruction from vocab sessions
|
||||
"""
|
||||
|
||||
import os
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
import httpx
|
||||
|
||||
# Re-export everything from sub-modules for backward compatibility
|
||||
from .editor_models import ( # noqa: F401
|
||||
AIImageStyle,
|
||||
WorksheetStatus,
|
||||
AIImageRequest,
|
||||
AIImageResponse,
|
||||
PageData,
|
||||
PageFormat,
|
||||
WorksheetSaveRequest,
|
||||
WorksheetResponse,
|
||||
AIModifyRequest,
|
||||
AIModifyResponse,
|
||||
ReconstructRequest,
|
||||
ReconstructResponse,
|
||||
worksheets_db,
|
||||
OLLAMA_URL,
|
||||
SD_MODEL,
|
||||
WORKSHEET_STORAGE_DIR,
|
||||
STYLE_PROMPTS,
|
||||
REPORTLAB_AVAILABLE,
|
||||
)
|
||||
|
||||
from .editor_ai import ( # noqa: F401
|
||||
generate_ai_image_logic,
|
||||
_generate_placeholder_image,
|
||||
modify_worksheet_with_ai_logic,
|
||||
_handle_simple_modification,
|
||||
)
|
||||
|
||||
from .editor_reconstruct import ( # noqa: F401
|
||||
reconstruct_document_logic,
|
||||
_detect_image_regions,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =============================================
|
||||
# ROUTER
|
||||
# =============================================
|
||||
|
||||
router = APIRouter(prefix="/api/v1/worksheet", tags=["Worksheet Editor"])
|
||||
|
||||
# =============================================
|
||||
# AI IMAGE GENERATION
|
||||
# =============================================
|
||||
|
||||
@router.post("/ai-image", response_model=AIImageResponse)
|
||||
async def generate_ai_image(request: AIImageRequest):
|
||||
"""
|
||||
Generate an AI image using Ollama with a text-to-image model.
|
||||
|
||||
Supported models:
|
||||
- stable-diffusion (via Ollama)
|
||||
- sd3.5-medium
|
||||
- llava (for image understanding, not generation)
|
||||
|
||||
Falls back to a placeholder if Ollama is not available.
|
||||
"""
|
||||
return await generate_ai_image_logic(request)
|
||||
|
||||
|
||||
# =============================================
|
||||
# WORKSHEET SAVE/LOAD
|
||||
# =============================================
|
||||
|
||||
@router.post("/save", response_model=WorksheetResponse)
|
||||
async def save_worksheet(request: WorksheetSaveRequest):
|
||||
"""
|
||||
Save a worksheet document.
|
||||
|
||||
- If id is provided, updates existing worksheet
|
||||
- If id is not provided, creates new worksheet
|
||||
"""
|
||||
try:
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
worksheet_id = request.id or f"ws_{uuid.uuid4().hex[:12]}"
|
||||
|
||||
worksheet = {
|
||||
"id": worksheet_id,
|
||||
"title": request.title,
|
||||
"description": request.description,
|
||||
"pages": [p.dict() for p in request.pages],
|
||||
"pageFormat": (request.pageFormat or PageFormat()).dict(),
|
||||
"createdAt": worksheets_db.get(worksheet_id, {}).get("createdAt", now),
|
||||
"updatedAt": now
|
||||
}
|
||||
|
||||
worksheets_db[worksheet_id] = worksheet
|
||||
|
||||
filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json")
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(worksheet, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(f"Saved worksheet: {worksheet_id}")
|
||||
|
||||
return WorksheetResponse(**worksheet)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save worksheet: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to save: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{worksheet_id}", response_model=WorksheetResponse)
|
||||
async def get_worksheet(worksheet_id: str):
|
||||
"""Load a worksheet document by ID."""
|
||||
try:
|
||||
if worksheet_id in worksheets_db:
|
||||
return WorksheetResponse(**worksheets_db[worksheet_id])
|
||||
|
||||
filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json")
|
||||
if os.path.exists(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
worksheet = json.load(f)
|
||||
worksheets_db[worksheet_id] = worksheet
|
||||
return WorksheetResponse(**worksheet)
|
||||
|
||||
raise HTTPException(status_code=404, detail="Worksheet not found")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load worksheet {worksheet_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to load: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/list/all")
|
||||
async def list_worksheets():
|
||||
"""List all available worksheets."""
|
||||
try:
|
||||
worksheets = []
|
||||
|
||||
for filename in os.listdir(WORKSHEET_STORAGE_DIR):
|
||||
if filename.endswith('.json'):
|
||||
filepath = os.path.join(WORKSHEET_STORAGE_DIR, filename)
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
worksheet = json.load(f)
|
||||
worksheets.append({
|
||||
"id": worksheet.get("id"),
|
||||
"title": worksheet.get("title"),
|
||||
"description": worksheet.get("description"),
|
||||
"pageCount": len(worksheet.get("pages", [])),
|
||||
"updatedAt": worksheet.get("updatedAt"),
|
||||
"createdAt": worksheet.get("createdAt")
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load {filename}: {e}")
|
||||
|
||||
worksheets.sort(key=lambda x: x.get("updatedAt", ""), reverse=True)
|
||||
|
||||
return {"worksheets": worksheets, "total": len(worksheets)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list worksheets: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/{worksheet_id}")
|
||||
async def delete_worksheet(worksheet_id: str):
|
||||
"""Delete a worksheet document."""
|
||||
try:
|
||||
if worksheet_id in worksheets_db:
|
||||
del worksheets_db[worksheet_id]
|
||||
|
||||
filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json")
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
logger.info(f"Deleted worksheet: {worksheet_id}")
|
||||
return {"status": "deleted", "id": worksheet_id}
|
||||
|
||||
raise HTTPException(status_code=404, detail="Worksheet not found")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete worksheet {worksheet_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================
|
||||
# PDF EXPORT
|
||||
# =============================================
|
||||
|
||||
@router.post("/{worksheet_id}/export-pdf")
|
||||
async def export_worksheet_pdf(worksheet_id: str):
|
||||
"""
|
||||
Export worksheet as PDF.
|
||||
|
||||
Note: This creates a basic PDF. For full canvas rendering,
|
||||
the frontend should use pdf-lib with canvas.toDataURL().
|
||||
"""
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
raise HTTPException(status_code=501, detail="PDF export not available (reportlab not installed)")
|
||||
|
||||
try:
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
worksheet = worksheets_db.get(worksheet_id)
|
||||
if not worksheet:
|
||||
filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json")
|
||||
if os.path.exists(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
worksheet = json.load(f)
|
||||
else:
|
||||
raise HTTPException(status_code=404, detail="Worksheet not found")
|
||||
|
||||
buffer = io.BytesIO()
|
||||
c = canvas.Canvas(buffer, pagesize=A4)
|
||||
|
||||
page_width, page_height = A4
|
||||
|
||||
for page_data in worksheet.get("pages", []):
|
||||
if page_data.get("index", 0) == 0:
|
||||
c.setFont("Helvetica-Bold", 18)
|
||||
c.drawString(50, page_height - 50, worksheet.get("title", "Arbeitsblatt"))
|
||||
c.setFont("Helvetica", 10)
|
||||
c.drawString(50, page_height - 70, f"Erstellt: {worksheet.get('createdAt', '')[:10]}")
|
||||
|
||||
canvas_json_str = page_data.get("canvasJSON", "{}")
|
||||
if canvas_json_str:
|
||||
try:
|
||||
canvas_data = json.loads(canvas_json_str)
|
||||
objects = canvas_data.get("objects", [])
|
||||
|
||||
for obj in objects:
|
||||
obj_type = obj.get("type", "")
|
||||
|
||||
if obj_type in ["text", "i-text", "textbox"]:
|
||||
text = obj.get("text", "")
|
||||
left = obj.get("left", 50)
|
||||
top = obj.get("top", 100)
|
||||
font_size = obj.get("fontSize", 12)
|
||||
|
||||
pdf_x = left * 0.75
|
||||
pdf_y = page_height - (top * 0.75)
|
||||
|
||||
c.setFont("Helvetica", min(font_size, 24))
|
||||
c.drawString(pdf_x, pdf_y, text[:100])
|
||||
|
||||
elif obj_type == "rect":
|
||||
left = obj.get("left", 0) * 0.75
|
||||
top = obj.get("top", 0) * 0.75
|
||||
width = obj.get("width", 50) * 0.75
|
||||
height = obj.get("height", 30) * 0.75
|
||||
c.rect(left, page_height - top - height, width, height)
|
||||
|
||||
elif obj_type == "circle":
|
||||
left = obj.get("left", 0) * 0.75
|
||||
top = obj.get("top", 0) * 0.75
|
||||
radius = obj.get("radius", 25) * 0.75
|
||||
c.circle(left + radius, page_height - top - radius, radius)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
c.showPage()
|
||||
|
||||
c.save()
|
||||
buffer.seek(0)
|
||||
|
||||
filename = f"{worksheet.get('title', 'worksheet').replace(' ', '_')}.pdf"
|
||||
|
||||
return StreamingResponse(
|
||||
buffer,
|
||||
media_type="application/pdf",
|
||||
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"PDF export failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================
|
||||
# AI WORKSHEET MODIFICATION
|
||||
# =============================================
|
||||
|
||||
@router.post("/ai-modify", response_model=AIModifyResponse)
|
||||
async def modify_worksheet_with_ai(request: AIModifyRequest):
|
||||
"""
|
||||
Modify a worksheet using AI based on natural language prompt.
|
||||
|
||||
Uses Ollama with qwen2.5vl:32b to understand the canvas state
|
||||
and generate modifications based on the user's request.
|
||||
"""
|
||||
return await modify_worksheet_with_ai_logic(request)
|
||||
|
||||
|
||||
# =============================================
|
||||
# HEALTH CHECK
|
||||
# =============================================
|
||||
|
||||
@router.get("/health/check")
|
||||
async def health_check():
|
||||
"""Check worksheet editor API health and dependencies."""
|
||||
status = {
|
||||
"status": "healthy",
|
||||
"ollama": False,
|
||||
"storage": os.path.exists(WORKSHEET_STORAGE_DIR),
|
||||
"reportlab": REPORTLAB_AVAILABLE,
|
||||
"worksheets_count": len(worksheets_db)
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(f"{OLLAMA_URL}/api/tags")
|
||||
status["ollama"] = response.status_code == 200
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return status
|
||||
|
||||
|
||||
# =============================================
|
||||
# DOCUMENT RECONSTRUCTION FROM VOCAB SESSION
|
||||
# =============================================
|
||||
|
||||
@router.post("/reconstruct-from-session", response_model=ReconstructResponse)
|
||||
async def reconstruct_document_from_session(request: ReconstructRequest):
|
||||
"""
|
||||
Reconstruct a document from a vocab session into Fabric.js canvas format.
|
||||
|
||||
Returns canvas JSON ready to load into the worksheet editor.
|
||||
"""
|
||||
try:
|
||||
return await reconstruct_document_logic(request)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Document reconstruction failed: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/sessions/available")
|
||||
async def get_available_sessions():
|
||||
"""Get list of available vocab sessions that can be reconstructed."""
|
||||
try:
|
||||
from vocab_worksheet_api import _sessions
|
||||
|
||||
available = []
|
||||
for session_id, session in _sessions.items():
|
||||
if session.get("pdf_data"):
|
||||
available.append({
|
||||
"id": session_id,
|
||||
"name": session.get("name", "Unnamed"),
|
||||
"description": session.get("description"),
|
||||
"vocabulary_count": len(session.get("vocabulary", [])),
|
||||
"page_count": session.get("pdf_page_count", 1),
|
||||
"status": session.get("status", "unknown"),
|
||||
"created_at": session.get("created_at", "").isoformat() if session.get("created_at") else None
|
||||
})
|
||||
|
||||
return {"sessions": available, "total": len(available)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list sessions: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
133
klausur-service/backend/worksheet/editor_models.py
Normal file
133
klausur-service/backend/worksheet/editor_models.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Worksheet Editor Models — Enums, Pydantic models, and configuration.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional, List, Dict
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =============================================
|
||||
# CONFIGURATION
|
||||
# =============================================
|
||||
|
||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||
SD_MODEL = os.getenv("SD_MODEL", "stable-diffusion") # or specific SD model
|
||||
WORKSHEET_STORAGE_DIR = os.getenv("WORKSHEET_STORAGE_DIR",
|
||||
os.path.join(os.path.dirname(os.path.abspath(__file__)), "worksheet-storage"))
|
||||
|
||||
# Ensure storage directory exists
|
||||
os.makedirs(WORKSHEET_STORAGE_DIR, exist_ok=True)
|
||||
|
||||
# =============================================
|
||||
# ENUMS & MODELS
|
||||
# =============================================
|
||||
|
||||
class AIImageStyle(str, Enum):
|
||||
REALISTIC = "realistic"
|
||||
CARTOON = "cartoon"
|
||||
SKETCH = "sketch"
|
||||
CLIPART = "clipart"
|
||||
EDUCATIONAL = "educational"
|
||||
|
||||
class WorksheetStatus(str, Enum):
|
||||
DRAFT = "draft"
|
||||
PUBLISHED = "published"
|
||||
ARCHIVED = "archived"
|
||||
|
||||
# Style prompt modifiers
|
||||
STYLE_PROMPTS = {
|
||||
AIImageStyle.REALISTIC: "photorealistic, high detail, professional photography",
|
||||
AIImageStyle.CARTOON: "cartoon style, colorful, child-friendly, simple shapes",
|
||||
AIImageStyle.SKETCH: "pencil sketch, hand-drawn, black and white, artistic",
|
||||
AIImageStyle.CLIPART: "clipart style, flat design, simple, vector-like",
|
||||
AIImageStyle.EDUCATIONAL: "educational illustration, clear, informative, textbook style"
|
||||
}
|
||||
|
||||
# =============================================
|
||||
# REQUEST/RESPONSE MODELS
|
||||
# =============================================
|
||||
|
||||
class AIImageRequest(BaseModel):
|
||||
prompt: str = Field(..., min_length=3, max_length=500)
|
||||
style: AIImageStyle = AIImageStyle.EDUCATIONAL
|
||||
width: int = Field(512, ge=256, le=1024)
|
||||
height: int = Field(512, ge=256, le=1024)
|
||||
|
||||
class AIImageResponse(BaseModel):
|
||||
image_base64: str
|
||||
prompt_used: str
|
||||
error: Optional[str] = None
|
||||
|
||||
class PageData(BaseModel):
|
||||
id: str
|
||||
index: int
|
||||
canvasJSON: str
|
||||
|
||||
class PageFormat(BaseModel):
|
||||
width: float = 210
|
||||
height: float = 297
|
||||
orientation: str = "portrait"
|
||||
margins: Dict[str, float] = {"top": 15, "right": 15, "bottom": 15, "left": 15}
|
||||
|
||||
class WorksheetSaveRequest(BaseModel):
|
||||
id: Optional[str] = None
|
||||
title: str
|
||||
description: Optional[str] = None
|
||||
pages: List[PageData]
|
||||
pageFormat: Optional[PageFormat] = None
|
||||
|
||||
class WorksheetResponse(BaseModel):
|
||||
id: str
|
||||
title: str
|
||||
description: Optional[str]
|
||||
pages: List[PageData]
|
||||
pageFormat: PageFormat
|
||||
createdAt: str
|
||||
updatedAt: str
|
||||
|
||||
class AIModifyRequest(BaseModel):
|
||||
prompt: str = Field(..., min_length=3, max_length=1000)
|
||||
canvas_json: str
|
||||
model: str = "qwen2.5vl:32b"
|
||||
|
||||
class AIModifyResponse(BaseModel):
|
||||
modified_canvas_json: Optional[str] = None
|
||||
message: str
|
||||
error: Optional[str] = None
|
||||
|
||||
class ReconstructRequest(BaseModel):
|
||||
session_id: str
|
||||
page_number: int = 1
|
||||
include_images: bool = True
|
||||
regenerate_graphics: bool = False
|
||||
|
||||
class ReconstructResponse(BaseModel):
|
||||
canvas_json: str
|
||||
page_width: int
|
||||
page_height: int
|
||||
elements_count: int
|
||||
vocabulary_matched: int
|
||||
message: str
|
||||
error: Optional[str] = None
|
||||
|
||||
# =============================================
|
||||
# IN-MEMORY STORAGE (Development)
|
||||
# =============================================
|
||||
|
||||
worksheets_db: Dict[str, Dict] = {}
|
||||
|
||||
# PDF Generation availability
|
||||
try:
|
||||
from reportlab.lib import colors # noqa: F401
|
||||
from reportlab.lib.pagesizes import A4 # noqa: F401
|
||||
from reportlab.lib.units import mm # noqa: F401
|
||||
from reportlab.pdfgen import canvas # noqa: F401
|
||||
from reportlab.lib.styles import getSampleStyleSheet # noqa: F401
|
||||
REPORTLAB_AVAILABLE = True
|
||||
except ImportError:
|
||||
REPORTLAB_AVAILABLE = False
|
||||
255
klausur-service/backend/worksheet/editor_reconstruct.py
Normal file
255
klausur-service/backend/worksheet/editor_reconstruct.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
Worksheet Editor Reconstruct — Document reconstruction from vocab sessions.
|
||||
"""
|
||||
|
||||
import io
|
||||
import uuid
|
||||
import base64
|
||||
import logging
|
||||
from typing import List, Dict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .editor_models import (
|
||||
ReconstructRequest,
|
||||
ReconstructResponse,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def reconstruct_document_logic(request: ReconstructRequest) -> ReconstructResponse:
|
||||
"""
|
||||
Reconstruct a document from a vocab session into Fabric.js canvas format.
|
||||
|
||||
This function:
|
||||
1. Loads the original PDF from the vocab session
|
||||
2. Runs OCR with position tracking
|
||||
3. Creates Fabric.js canvas JSON with positioned elements
|
||||
4. Maps extracted vocabulary to their positions
|
||||
|
||||
Returns ReconstructResponse ready to send to the client.
|
||||
"""
|
||||
from fastapi import HTTPException
|
||||
from vocab_worksheet_api import _sessions, convert_pdf_page_to_image
|
||||
|
||||
# Check if session exists
|
||||
if request.session_id not in _sessions:
|
||||
raise HTTPException(status_code=404, detail=f"Session {request.session_id} not found")
|
||||
|
||||
session = _sessions[request.session_id]
|
||||
|
||||
if not session.get("pdf_data"):
|
||||
raise HTTPException(status_code=400, detail="Session has no PDF data")
|
||||
|
||||
pdf_data = session["pdf_data"]
|
||||
page_count = session.get("pdf_page_count", 1)
|
||||
|
||||
if request.page_number < 1 or request.page_number > page_count:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Page {request.page_number} not found. PDF has {page_count} pages."
|
||||
)
|
||||
|
||||
vocabulary = session.get("vocabulary", [])
|
||||
page_vocab = [v for v in vocabulary if v.get("source_page") == request.page_number]
|
||||
|
||||
logger.info(f"Reconstructing page {request.page_number} from session {request.session_id}")
|
||||
logger.info(f"Found {len(page_vocab)} vocabulary items for this page")
|
||||
|
||||
image_bytes = await convert_pdf_page_to_image(pdf_data, request.page_number)
|
||||
if not image_bytes:
|
||||
raise HTTPException(status_code=500, detail="Failed to convert PDF page to image")
|
||||
|
||||
from PIL import Image
|
||||
img = Image.open(io.BytesIO(image_bytes))
|
||||
img_width, img_height = img.size
|
||||
|
||||
from hybrid_vocab_extractor import run_paddle_ocr
|
||||
ocr_regions, raw_text = run_paddle_ocr(image_bytes)
|
||||
|
||||
logger.info(f"OCR found {len(ocr_regions)} text regions")
|
||||
|
||||
A4_WIDTH = 794
|
||||
A4_HEIGHT = 1123
|
||||
scale_x = A4_WIDTH / img_width
|
||||
scale_y = A4_HEIGHT / img_height
|
||||
|
||||
fabric_objects = []
|
||||
|
||||
# 1. Add white background
|
||||
fabric_objects.append({
|
||||
"type": "rect", "left": 0, "top": 0,
|
||||
"width": A4_WIDTH, "height": A4_HEIGHT,
|
||||
"fill": "#ffffff", "selectable": False,
|
||||
"evented": False, "isBackground": True
|
||||
})
|
||||
|
||||
# 2. Group OCR regions by Y-coordinate to detect rows
|
||||
sorted_regions = sorted(ocr_regions, key=lambda r: (r.y1, r.x1))
|
||||
|
||||
# 3. Detect headers (larger text at top)
|
||||
headers = []
|
||||
for region in sorted_regions:
|
||||
height = region.y2 - region.y1
|
||||
if region.y1 < img_height * 0.15 and height > 30:
|
||||
headers.append(region)
|
||||
|
||||
# 4. Create text objects for each region
|
||||
vocab_matched = 0
|
||||
|
||||
for region in sorted_regions:
|
||||
left = int(region.x1 * scale_x)
|
||||
top = int(region.y1 * scale_y)
|
||||
|
||||
is_header = region in headers
|
||||
|
||||
region_height = region.y2 - region.y1
|
||||
base_font_size = max(10, min(32, int(region_height * scale_y * 0.8)))
|
||||
|
||||
if is_header:
|
||||
base_font_size = max(base_font_size, 24)
|
||||
|
||||
is_vocab = False
|
||||
vocab_match = None
|
||||
for v in page_vocab:
|
||||
if v.get("english", "").lower() in region.text.lower() or \
|
||||
v.get("german", "").lower() in region.text.lower():
|
||||
is_vocab = True
|
||||
vocab_match = v
|
||||
vocab_matched += 1
|
||||
break
|
||||
|
||||
text_obj = {
|
||||
"type": "i-text",
|
||||
"id": f"text_{uuid.uuid4().hex[:8]}",
|
||||
"left": left, "top": top,
|
||||
"text": region.text,
|
||||
"fontFamily": "Arial",
|
||||
"fontSize": base_font_size,
|
||||
"fontWeight": "bold" if is_header else "normal",
|
||||
"fill": "#000000",
|
||||
"originX": "left", "originY": "top",
|
||||
}
|
||||
|
||||
if is_vocab and vocab_match:
|
||||
text_obj["isVocabulary"] = True
|
||||
text_obj["vocabularyId"] = vocab_match.get("id")
|
||||
text_obj["english"] = vocab_match.get("english")
|
||||
text_obj["german"] = vocab_match.get("german")
|
||||
|
||||
fabric_objects.append(text_obj)
|
||||
|
||||
# 5. If include_images, detect and extract image regions
|
||||
if request.include_images:
|
||||
image_regions = await _detect_image_regions(image_bytes, ocr_regions, img_width, img_height)
|
||||
|
||||
for i, img_region in enumerate(image_regions):
|
||||
img_x1 = int(img_region["x1"])
|
||||
img_y1 = int(img_region["y1"])
|
||||
img_x2 = int(img_region["x2"])
|
||||
img_y2 = int(img_region["y2"])
|
||||
|
||||
cropped = img.crop((img_x1, img_y1, img_x2, img_y2))
|
||||
|
||||
buffer = io.BytesIO()
|
||||
cropped.save(buffer, format='PNG')
|
||||
buffer.seek(0)
|
||||
img_base64 = f"data:image/png;base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
|
||||
|
||||
fabric_objects.append({
|
||||
"type": "image",
|
||||
"id": f"img_{uuid.uuid4().hex[:8]}",
|
||||
"left": int(img_x1 * scale_x),
|
||||
"top": int(img_y1 * scale_y),
|
||||
"width": int((img_x2 - img_x1) * scale_x),
|
||||
"height": int((img_y2 - img_y1) * scale_y),
|
||||
"src": img_base64,
|
||||
"scaleX": 1, "scaleY": 1,
|
||||
})
|
||||
|
||||
import json
|
||||
canvas_data = {
|
||||
"version": "6.0.0",
|
||||
"objects": fabric_objects,
|
||||
"background": "#ffffff"
|
||||
}
|
||||
|
||||
return ReconstructResponse(
|
||||
canvas_json=json.dumps(canvas_data),
|
||||
page_width=A4_WIDTH,
|
||||
page_height=A4_HEIGHT,
|
||||
elements_count=len(fabric_objects),
|
||||
vocabulary_matched=vocab_matched,
|
||||
message=f"Reconstructed page {request.page_number} with {len(fabric_objects)} elements, "
|
||||
f"{vocab_matched} vocabulary items matched"
|
||||
)
|
||||
|
||||
|
||||
async def _detect_image_regions(
|
||||
image_bytes: bytes,
|
||||
ocr_regions: list,
|
||||
img_width: int,
|
||||
img_height: int
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Detect image/graphic regions in the document.
|
||||
|
||||
Uses a simple approach:
|
||||
1. Find large gaps between text regions (potential image areas)
|
||||
2. Use edge detection to find bounded regions
|
||||
3. Filter out text areas
|
||||
"""
|
||||
from PIL import Image
|
||||
import cv2
|
||||
|
||||
try:
|
||||
img = Image.open(io.BytesIO(image_bytes))
|
||||
img_array = np.array(img.convert('L'))
|
||||
|
||||
text_mask = np.ones_like(img_array, dtype=bool)
|
||||
for region in ocr_regions:
|
||||
x1 = max(0, region.x1 - 5)
|
||||
y1 = max(0, region.y1 - 5)
|
||||
x2 = min(img_width, region.x2 + 5)
|
||||
y2 = min(img_height, region.y2 + 5)
|
||||
text_mask[y1:y2, x1:x2] = False
|
||||
|
||||
image_regions = []
|
||||
|
||||
edges = cv2.Canny(img_array, 50, 150)
|
||||
edges[~text_mask] = 0
|
||||
|
||||
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
for contour in contours:
|
||||
x, y, w, h = cv2.boundingRect(contour)
|
||||
|
||||
if w > 50 and h > 50:
|
||||
if w < img_width * 0.9 and h < img_height * 0.9:
|
||||
region_content = img_array[y:y+h, x:x+w]
|
||||
variance = np.var(region_content)
|
||||
|
||||
if variance > 500:
|
||||
image_regions.append({
|
||||
"x1": x, "y1": y,
|
||||
"x2": x + w, "y2": y + h
|
||||
})
|
||||
|
||||
filtered_regions = []
|
||||
for region in sorted(image_regions, key=lambda r: (r["x2"]-r["x1"])*(r["y2"]-r["y1"]), reverse=True):
|
||||
overlaps = False
|
||||
for existing in filtered_regions:
|
||||
if not (region["x2"] < existing["x1"] or region["x1"] > existing["x2"] or
|
||||
region["y2"] < existing["y1"] or region["y1"] > existing["y2"]):
|
||||
overlaps = True
|
||||
break
|
||||
if not overlaps:
|
||||
filtered_regions.append(region)
|
||||
|
||||
logger.info(f"Detected {len(filtered_regions)} image regions")
|
||||
return filtered_regions[:10]
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Image region detection failed: {e}")
|
||||
return []
|
||||
26
klausur-service/backend/worksheet/nru_generator.py
Normal file
26
klausur-service/backend/worksheet/nru_generator.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
NRU Worksheet Generator — barrel re-export.
|
||||
|
||||
All implementation split into:
|
||||
nru_worksheet_models — data classes, entry separation
|
||||
nru_worksheet_html — HTML generation
|
||||
nru_worksheet_pdf — PDF generation
|
||||
|
||||
Per scanned page, we generate 2 worksheet pages.
|
||||
"""
|
||||
|
||||
# Models
|
||||
from .nru_models import ( # noqa: F401
|
||||
VocabEntry,
|
||||
SentenceEntry,
|
||||
separate_vocab_and_sentences,
|
||||
)
|
||||
|
||||
# HTML generation
|
||||
from .nru_html import ( # noqa: F401
|
||||
generate_nru_html,
|
||||
generate_nru_worksheet_html,
|
||||
)
|
||||
|
||||
# PDF generation
|
||||
from .nru_pdf import generate_nru_pdf # noqa: F401
|
||||
466
klausur-service/backend/worksheet/nru_html.py
Normal file
466
klausur-service/backend/worksheet/nru_html.py
Normal file
@@ -0,0 +1,466 @@
|
||||
"""
|
||||
NRU Worksheet HTML — HTML generation for vocabulary worksheets.
|
||||
|
||||
Extracted from nru_worksheet_generator.py for modularity.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict
|
||||
|
||||
from .nru_models import VocabEntry, SentenceEntry, separate_vocab_and_sentences
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_nru_html(
|
||||
vocab_list: List[VocabEntry],
|
||||
sentence_list: List[SentenceEntry],
|
||||
page_number: int,
|
||||
title: str = "Vokabeltest",
|
||||
show_solutions: bool = False,
|
||||
line_height_px: int = 28
|
||||
) -> str:
|
||||
"""
|
||||
Generate HTML for NRU-format worksheet.
|
||||
|
||||
Returns HTML for 2 pages:
|
||||
- Page 1: Vocabulary table (3 columns)
|
||||
- Page 2: Sentence practice (full width)
|
||||
"""
|
||||
|
||||
# Filter by page
|
||||
page_vocab = [v for v in vocab_list if v.source_page == page_number]
|
||||
page_sentences = [s for s in sentence_list if s.source_page == page_number]
|
||||
|
||||
html = f"""<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>
|
||||
@page {{
|
||||
size: A4;
|
||||
margin: 1.5cm 2cm;
|
||||
}}
|
||||
* {{
|
||||
box-sizing: border-box;
|
||||
}}
|
||||
body {{
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
font-size: 12pt;
|
||||
line-height: 1.4;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}}
|
||||
.page {{
|
||||
page-break-after: always;
|
||||
min-height: 100%;
|
||||
}}
|
||||
.page:last-child {{
|
||||
page-break-after: avoid;
|
||||
}}
|
||||
h1 {{
|
||||
font-size: 16pt;
|
||||
margin: 0 0 8px 0;
|
||||
text-align: center;
|
||||
}}
|
||||
.header {{
|
||||
margin-bottom: 15px;
|
||||
}}
|
||||
.name-line {{
|
||||
font-size: 11pt;
|
||||
margin-bottom: 10px;
|
||||
}}
|
||||
|
||||
/* Vocabulary Table - 3 columns */
|
||||
.vocab-table {{
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
table-layout: fixed;
|
||||
}}
|
||||
.vocab-table th {{
|
||||
background: #f0f0f0;
|
||||
border: 1px solid #333;
|
||||
padding: 6px 8px;
|
||||
font-weight: bold;
|
||||
font-size: 11pt;
|
||||
text-align: left;
|
||||
}}
|
||||
.vocab-table td {{
|
||||
border: 1px solid #333;
|
||||
padding: 4px 8px;
|
||||
height: {line_height_px}px;
|
||||
vertical-align: middle;
|
||||
}}
|
||||
.vocab-table .col-english {{ width: 35%; }}
|
||||
.vocab-table .col-german {{ width: 35%; }}
|
||||
.vocab-table .col-correction {{ width: 30%; }}
|
||||
.vocab-answer {{
|
||||
color: #0066cc;
|
||||
font-style: italic;
|
||||
}}
|
||||
|
||||
/* Sentence Table - full width */
|
||||
.sentence-table {{
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 15px;
|
||||
}}
|
||||
.sentence-table td {{
|
||||
border: 1px solid #333;
|
||||
padding: 6px 10px;
|
||||
}}
|
||||
.sentence-header {{
|
||||
background: #f5f5f5;
|
||||
font-weight: normal;
|
||||
min-height: 30px;
|
||||
}}
|
||||
.sentence-line {{
|
||||
height: {line_height_px + 4}px;
|
||||
}}
|
||||
.sentence-answer {{
|
||||
color: #0066cc;
|
||||
font-style: italic;
|
||||
font-size: 11pt;
|
||||
}}
|
||||
|
||||
.page-info {{
|
||||
font-size: 9pt;
|
||||
color: #666;
|
||||
text-align: right;
|
||||
margin-top: 10px;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
"""
|
||||
|
||||
# ========== PAGE 1: VOCABULARY TABLE ==========
|
||||
if page_vocab:
|
||||
html += f"""
|
||||
<div class="page">
|
||||
<div class="header">
|
||||
<h1>{title} - Vokabeln (Seite {page_number})</h1>
|
||||
<div class="name-line">Name: _________________________ Datum: _____________</div>
|
||||
</div>
|
||||
|
||||
<table class="vocab-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="col-english">Englisch</th>
|
||||
<th class="col-german">Deutsch</th>
|
||||
<th class="col-correction">Korrektur</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
"""
|
||||
for v in page_vocab:
|
||||
if show_solutions:
|
||||
html += f"""
|
||||
<tr>
|
||||
<td>{v.english}</td>
|
||||
<td class="vocab-answer">{v.german}</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
"""
|
||||
else:
|
||||
html += f"""
|
||||
<tr>
|
||||
<td>{v.english}</td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
"""
|
||||
|
||||
html += """
|
||||
</tbody>
|
||||
</table>
|
||||
<div class="page-info">Vokabeln aus Unit</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
# ========== PAGE 2: SENTENCE PRACTICE ==========
|
||||
if page_sentences:
|
||||
html += f"""
|
||||
<div class="page">
|
||||
<div class="header">
|
||||
<h1>{title} - Lernsaetze (Seite {page_number})</h1>
|
||||
<div class="name-line">Name: _________________________ Datum: _____________</div>
|
||||
</div>
|
||||
"""
|
||||
for s in page_sentences:
|
||||
html += f"""
|
||||
<table class="sentence-table">
|
||||
<tr>
|
||||
<td class="sentence-header">{s.german}</td>
|
||||
</tr>
|
||||
"""
|
||||
if show_solutions:
|
||||
html += f"""
|
||||
<tr>
|
||||
<td class="sentence-line sentence-answer">{s.english}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="sentence-line"></td>
|
||||
</tr>
|
||||
"""
|
||||
else:
|
||||
html += """
|
||||
<tr>
|
||||
<td class="sentence-line"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="sentence-line"></td>
|
||||
</tr>
|
||||
"""
|
||||
html += """
|
||||
</table>
|
||||
"""
|
||||
|
||||
html += """
|
||||
<div class="page-info">Lernsaetze aus Unit</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
html += """
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return html
|
||||
|
||||
|
||||
def generate_nru_worksheet_html(
|
||||
entries: List[Dict],
|
||||
title: str = "Vokabeltest",
|
||||
show_solutions: bool = False,
|
||||
specific_pages: List[int] = None
|
||||
) -> str:
|
||||
"""
|
||||
Generate complete NRU worksheet HTML for all pages.
|
||||
|
||||
Args:
|
||||
entries: List of vocabulary entries with source_page
|
||||
title: Worksheet title
|
||||
show_solutions: Whether to show answers
|
||||
specific_pages: List of specific page numbers to include (1-indexed)
|
||||
|
||||
Returns:
|
||||
Complete HTML document
|
||||
"""
|
||||
# Separate into vocab and sentences
|
||||
vocab_list, sentence_list = separate_vocab_and_sentences(entries)
|
||||
|
||||
# Get unique page numbers
|
||||
all_pages = set()
|
||||
for v in vocab_list:
|
||||
all_pages.add(v.source_page)
|
||||
for s in sentence_list:
|
||||
all_pages.add(s.source_page)
|
||||
|
||||
# Filter to specific pages if requested
|
||||
if specific_pages:
|
||||
all_pages = all_pages.intersection(set(specific_pages))
|
||||
|
||||
pages_sorted = sorted(all_pages)
|
||||
|
||||
logger.info(f"Generating NRU worksheet for pages {pages_sorted}")
|
||||
logger.info(f"Total vocab: {len(vocab_list)}, Total sentences: {len(sentence_list)}")
|
||||
|
||||
# Generate HTML for each page
|
||||
combined_html = """<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>
|
||||
@page {
|
||||
size: A4;
|
||||
margin: 1.5cm 2cm;
|
||||
}
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
body {
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
font-size: 12pt;
|
||||
line-height: 1.4;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
.page {
|
||||
page-break-after: always;
|
||||
min-height: 100%;
|
||||
}
|
||||
.page:last-child {
|
||||
page-break-after: avoid;
|
||||
}
|
||||
h1 {
|
||||
font-size: 16pt;
|
||||
margin: 0 0 8px 0;
|
||||
text-align: center;
|
||||
}
|
||||
.header {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.name-line {
|
||||
font-size: 11pt;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
/* Vocabulary Table - 3 columns */
|
||||
.vocab-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
table-layout: fixed;
|
||||
}
|
||||
.vocab-table th {
|
||||
background: #f0f0f0;
|
||||
border: 1px solid #333;
|
||||
padding: 6px 8px;
|
||||
font-weight: bold;
|
||||
font-size: 11pt;
|
||||
text-align: left;
|
||||
}
|
||||
.vocab-table td {
|
||||
border: 1px solid #333;
|
||||
padding: 4px 8px;
|
||||
height: 28px;
|
||||
vertical-align: middle;
|
||||
}
|
||||
.vocab-table .col-english { width: 35%; }
|
||||
.vocab-table .col-german { width: 35%; }
|
||||
.vocab-table .col-correction { width: 30%; }
|
||||
.vocab-answer {
|
||||
color: #0066cc;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* Sentence Table - full width */
|
||||
.sentence-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.sentence-table td {
|
||||
border: 1px solid #333;
|
||||
padding: 6px 10px;
|
||||
}
|
||||
.sentence-header {
|
||||
background: #f5f5f5;
|
||||
font-weight: normal;
|
||||
min-height: 30px;
|
||||
}
|
||||
.sentence-line {
|
||||
height: 32px;
|
||||
}
|
||||
.sentence-answer {
|
||||
color: #0066cc;
|
||||
font-style: italic;
|
||||
font-size: 11pt;
|
||||
}
|
||||
|
||||
.page-info {
|
||||
font-size: 9pt;
|
||||
color: #666;
|
||||
text-align: right;
|
||||
margin-top: 10px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
"""
|
||||
|
||||
for page_num in pages_sorted:
|
||||
page_vocab = [v for v in vocab_list if v.source_page == page_num]
|
||||
page_sentences = [s for s in sentence_list if s.source_page == page_num]
|
||||
|
||||
# PAGE 1: VOCABULARY TABLE
|
||||
if page_vocab:
|
||||
combined_html += f"""
|
||||
<div class="page">
|
||||
<div class="header">
|
||||
<h1>{title} - Vokabeln (Seite {page_num})</h1>
|
||||
<div class="name-line">Name: _________________________ Datum: _____________</div>
|
||||
</div>
|
||||
|
||||
<table class="vocab-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="col-english">Englisch</th>
|
||||
<th class="col-german">Deutsch</th>
|
||||
<th class="col-correction">Korrektur</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
"""
|
||||
for v in page_vocab:
|
||||
if show_solutions:
|
||||
combined_html += f"""
|
||||
<tr>
|
||||
<td>{v.english}</td>
|
||||
<td class="vocab-answer">{v.german}</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
"""
|
||||
else:
|
||||
combined_html += f"""
|
||||
<tr>
|
||||
<td>{v.english}</td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
"""
|
||||
|
||||
combined_html += f"""
|
||||
</tbody>
|
||||
</table>
|
||||
<div class="page-info">{title} - Seite {page_num}</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
# PAGE 2: SENTENCE PRACTICE
|
||||
if page_sentences:
|
||||
combined_html += f"""
|
||||
<div class="page">
|
||||
<div class="header">
|
||||
<h1>{title} - Lernsaetze (Seite {page_num})</h1>
|
||||
<div class="name-line">Name: _________________________ Datum: _____________</div>
|
||||
</div>
|
||||
"""
|
||||
for s in page_sentences:
|
||||
combined_html += f"""
|
||||
<table class="sentence-table">
|
||||
<tr>
|
||||
<td class="sentence-header">{s.german}</td>
|
||||
</tr>
|
||||
"""
|
||||
if show_solutions:
|
||||
combined_html += f"""
|
||||
<tr>
|
||||
<td class="sentence-line sentence-answer">{s.english}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="sentence-line"></td>
|
||||
</tr>
|
||||
"""
|
||||
else:
|
||||
combined_html += """
|
||||
<tr>
|
||||
<td class="sentence-line"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="sentence-line"></td>
|
||||
</tr>
|
||||
"""
|
||||
combined_html += """
|
||||
</table>
|
||||
"""
|
||||
|
||||
combined_html += f"""
|
||||
<div class="page-info">{title} - Seite {page_num}</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
combined_html += """
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return combined_html
|
||||
70
klausur-service/backend/worksheet/nru_models.py
Normal file
70
klausur-service/backend/worksheet/nru_models.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""
|
||||
NRU Worksheet Models — data classes and entry separation logic.
|
||||
|
||||
Extracted from nru_worksheet_generator.py for modularity.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Tuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VocabEntry:
|
||||
english: str
|
||||
german: str
|
||||
source_page: int = 1
|
||||
|
||||
|
||||
@dataclass
|
||||
class SentenceEntry:
|
||||
german: str
|
||||
english: str # For solution sheet
|
||||
source_page: int = 1
|
||||
|
||||
|
||||
def separate_vocab_and_sentences(entries: List[Dict]) -> Tuple[List[VocabEntry], List[SentenceEntry]]:
|
||||
"""
|
||||
Separate vocabulary entries into single words/phrases and full sentences.
|
||||
|
||||
Sentences are identified by:
|
||||
- Ending with punctuation (. ! ?)
|
||||
- Being longer than 40 characters
|
||||
- Containing multiple words with capital letters mid-sentence
|
||||
"""
|
||||
vocab_list = []
|
||||
sentence_list = []
|
||||
|
||||
for entry in entries:
|
||||
english = entry.get("english", "").strip()
|
||||
german = entry.get("german", "").strip()
|
||||
source_page = entry.get("source_page", 1)
|
||||
|
||||
if not english or not german:
|
||||
continue
|
||||
|
||||
# Detect if this is a sentence
|
||||
is_sentence = (
|
||||
english.endswith('.') or
|
||||
english.endswith('!') or
|
||||
english.endswith('?') or
|
||||
len(english) > 50 or
|
||||
(len(english.split()) > 5 and any(w[0].isupper() for w in english.split()[1:] if w))
|
||||
)
|
||||
|
||||
if is_sentence:
|
||||
sentence_list.append(SentenceEntry(
|
||||
german=german,
|
||||
english=english,
|
||||
source_page=source_page
|
||||
))
|
||||
else:
|
||||
vocab_list.append(VocabEntry(
|
||||
english=english,
|
||||
german=german,
|
||||
source_page=source_page
|
||||
))
|
||||
|
||||
return vocab_list, sentence_list
|
||||
31
klausur-service/backend/worksheet/nru_pdf.py
Normal file
31
klausur-service/backend/worksheet/nru_pdf.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
NRU Worksheet PDF — PDF generation using weasyprint.
|
||||
|
||||
Extracted from nru_worksheet_generator.py for modularity.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Tuple
|
||||
|
||||
from .nru_html import generate_nru_worksheet_html
|
||||
|
||||
|
||||
async def generate_nru_pdf(entries: List[Dict], title: str = "Vokabeltest", include_solutions: bool = True) -> Tuple[bytes, bytes]:
|
||||
"""
|
||||
Generate NRU worksheet PDFs.
|
||||
|
||||
Returns:
|
||||
Tuple of (worksheet_pdf_bytes, solution_pdf_bytes)
|
||||
"""
|
||||
from weasyprint import HTML
|
||||
|
||||
# Generate worksheet HTML
|
||||
worksheet_html = generate_nru_worksheet_html(entries, title, show_solutions=False)
|
||||
worksheet_pdf = HTML(string=worksheet_html).write_pdf()
|
||||
|
||||
# Generate solution HTML
|
||||
solution_pdf = None
|
||||
if include_solutions:
|
||||
solution_html = generate_nru_worksheet_html(entries, title, show_solutions=True)
|
||||
solution_pdf = HTML(string=solution_html).write_pdf()
|
||||
|
||||
return worksheet_pdf, solution_pdf
|
||||
Reference in New Issue
Block a user