Files
breakpilot-lehrer/klausur-service/backend/worksheet_cleanup_api.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

492 lines
16 KiB
Python

"""
Worksheet Cleanup API - Handschrift-Entfernung und Layout-Rekonstruktion
Endpoints:
- POST /api/v1/worksheet/detect-handwriting - Erkennt Handschrift und gibt Maske zurueck
- POST /api/v1/worksheet/remove-handwriting - Entfernt Handschrift aus Bild
- POST /api/v1/worksheet/reconstruct - Rekonstruiert Layout als Fabric.js JSON
- POST /api/v1/worksheet/cleanup-pipeline - Vollstaendige Pipeline (Erkennung + Entfernung + Layout)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal auf dem Mac Mini.
"""
import io
import base64
import logging
from typing import Optional
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from fastapi.responses import StreamingResponse, JSONResponse
from pydantic import BaseModel
from services.handwriting_detection import (
detect_handwriting,
detect_handwriting_regions,
mask_to_png
)
from services.inpainting_service import (
inpaint_image,
remove_handwriting,
InpaintingMethod,
check_lama_available
)
from services.layout_reconstruction_service import (
reconstruct_layout,
layout_to_fabric_json,
reconstruct_and_clean
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/worksheet", tags=["Worksheet Cleanup"])
# =============================================================================
# Pydantic Models
# =============================================================================
class DetectionResponse(BaseModel):
has_handwriting: bool
confidence: float
handwriting_ratio: float
detection_method: str
mask_base64: Optional[str] = None
class InpaintingResponse(BaseModel):
success: bool
method_used: str
processing_time_ms: float
image_base64: Optional[str] = None
error: Optional[str] = None
class ReconstructionResponse(BaseModel):
success: bool
element_count: int
page_width: int
page_height: int
fabric_json: dict
table_count: int = 0
class PipelineResponse(BaseModel):
success: bool
handwriting_detected: bool
handwriting_removed: bool
layout_reconstructed: bool
cleaned_image_base64: Optional[str] = None
fabric_json: Optional[dict] = None
metadata: dict = {}
class CapabilitiesResponse(BaseModel):
opencv_available: bool = True
lama_available: bool = False
paddleocr_available: bool = False
# =============================================================================
# API Endpoints
# =============================================================================
@router.get("/capabilities")
async def get_capabilities() -> CapabilitiesResponse:
"""
Get available cleanup capabilities on this server.
"""
# Check PaddleOCR
paddleocr_available = False
try:
from hybrid_vocab_extractor import get_paddle_ocr
ocr = get_paddle_ocr()
paddleocr_available = ocr is not None
except Exception:
pass
return CapabilitiesResponse(
opencv_available=True,
lama_available=check_lama_available(),
paddleocr_available=paddleocr_available
)
@router.post("/detect-handwriting")
async def detect_handwriting_endpoint(
image: UploadFile = File(...),
return_mask: bool = Form(default=True),
min_confidence: float = Form(default=0.3)
) -> DetectionResponse:
"""
Detect handwriting in an image.
Args:
image: Input image (PNG, JPG)
return_mask: Whether to return the binary mask as base64
min_confidence: Minimum confidence threshold
Returns:
DetectionResponse with detection results and optional mask
"""
logger.info(f"Handwriting detection request: {image.filename}")
# Validate file type
content_type = image.content_type or ""
if not content_type.startswith("image/"):
raise HTTPException(
status_code=400,
detail="Only image files (PNG, JPG) are supported"
)
try:
image_bytes = await image.read()
# Detect handwriting
result = detect_handwriting(image_bytes)
has_handwriting = (
result.confidence >= min_confidence and
result.handwriting_ratio > 0.005
)
response = DetectionResponse(
has_handwriting=has_handwriting,
confidence=result.confidence,
handwriting_ratio=result.handwriting_ratio,
detection_method=result.detection_method
)
if return_mask:
mask_bytes = mask_to_png(result.mask)
response.mask_base64 = base64.b64encode(mask_bytes).decode('utf-8')
logger.info(f"Detection complete: handwriting={has_handwriting}, "
f"confidence={result.confidence:.2f}")
return response
except Exception as e:
logger.error(f"Handwriting detection failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/detect-handwriting/mask")
async def get_handwriting_mask(
image: UploadFile = File(...)
) -> StreamingResponse:
"""
Get handwriting detection mask as PNG image.
Returns binary mask where white (255) = handwriting.
"""
content_type = image.content_type or ""
if not content_type.startswith("image/"):
raise HTTPException(
status_code=400,
detail="Only image files are supported"
)
try:
image_bytes = await image.read()
result = detect_handwriting(image_bytes)
mask_bytes = mask_to_png(result.mask)
return StreamingResponse(
io.BytesIO(mask_bytes),
media_type="image/png",
headers={
"Content-Disposition": "attachment; filename=handwriting_mask.png"
}
)
except Exception as e:
logger.error(f"Mask generation failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/remove-handwriting")
async def remove_handwriting_endpoint(
image: UploadFile = File(...),
mask: Optional[UploadFile] = File(default=None),
method: str = Form(default="auto"),
return_base64: bool = Form(default=False)
):
"""
Remove handwriting from an image.
Args:
image: Input image with handwriting
mask: Optional pre-computed mask (if not provided, auto-detected)
method: Inpainting method (auto, opencv_telea, opencv_ns, lama)
return_base64: If True, return image as base64, else as file
Returns:
Cleaned image (as PNG file or base64 in JSON)
"""
logger.info(f"Remove handwriting request: {image.filename}, method={method}")
content_type = image.content_type or ""
if not content_type.startswith("image/"):
raise HTTPException(
status_code=400,
detail="Only image files are supported"
)
try:
image_bytes = await image.read()
# Get mask if provided
mask_array = None
if mask is not None:
mask_bytes = await mask.read()
from PIL import Image
import numpy as np
mask_img = Image.open(io.BytesIO(mask_bytes))
mask_array = np.array(mask_img)
# Select inpainting method
inpainting_method = InpaintingMethod.AUTO
if method == "opencv_telea":
inpainting_method = InpaintingMethod.OPENCV_TELEA
elif method == "opencv_ns":
inpainting_method = InpaintingMethod.OPENCV_NS
elif method == "lama":
inpainting_method = InpaintingMethod.LAMA
# Remove handwriting
cleaned_bytes, metadata = remove_handwriting(
image_bytes,
mask=mask_array,
method=inpainting_method
)
if return_base64:
return JSONResponse({
"success": True,
"image_base64": base64.b64encode(cleaned_bytes).decode('utf-8'),
"metadata": metadata
})
else:
return StreamingResponse(
io.BytesIO(cleaned_bytes),
media_type="image/png",
headers={
"Content-Disposition": "attachment; filename=cleaned.png",
"X-Method-Used": metadata.get("method_used", "unknown"),
"X-Processing-Time-Ms": str(metadata.get("processing_time_ms", 0))
}
)
except Exception as e:
logger.error(f"Handwriting removal failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/reconstruct")
async def reconstruct_layout_endpoint(
image: UploadFile = File(...),
clean_handwriting: bool = Form(default=True),
detect_tables: bool = Form(default=True)
) -> ReconstructionResponse:
"""
Reconstruct worksheet layout and generate Fabric.js JSON.
Args:
image: Input image (can contain handwriting)
clean_handwriting: Whether to remove handwriting first
detect_tables: Whether to detect table structures
Returns:
ReconstructionResponse with Fabric.js JSON
"""
logger.info(f"Layout reconstruction request: {image.filename}")
content_type = image.content_type or ""
if not content_type.startswith("image/"):
raise HTTPException(
status_code=400,
detail="Only image files are supported"
)
try:
image_bytes = await image.read()
# Run reconstruction pipeline
if clean_handwriting:
cleaned_bytes, layout = reconstruct_and_clean(image_bytes)
else:
layout = reconstruct_layout(image_bytes, detect_tables=detect_tables)
return ReconstructionResponse(
success=True,
element_count=len(layout.elements),
page_width=layout.page_width,
page_height=layout.page_height,
fabric_json=layout.fabric_json,
table_count=len(layout.table_regions)
)
except Exception as e:
logger.error(f"Layout reconstruction failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/cleanup-pipeline")
async def full_cleanup_pipeline(
image: UploadFile = File(...),
remove_hw: bool = Form(default=True, alias="remove_handwriting"),
reconstruct: bool = Form(default=True),
inpainting_method: str = Form(default="auto")
) -> PipelineResponse:
"""
Full cleanup pipeline: detect, remove handwriting, reconstruct layout.
This is the recommended endpoint for processing filled worksheets.
Args:
image: Input image (scan/photo of filled worksheet)
remove_handwriting: Whether to remove detected handwriting
reconstruct: Whether to reconstruct layout as Fabric.js JSON
inpainting_method: Method for inpainting (auto, opencv_telea, opencv_ns, lama)
Returns:
PipelineResponse with cleaned image and Fabric.js JSON
"""
logger.info(f"Full cleanup pipeline: {image.filename}")
content_type = image.content_type or ""
if not content_type.startswith("image/"):
raise HTTPException(
status_code=400,
detail="Only image files are supported"
)
try:
image_bytes = await image.read()
metadata = {}
# Step 1: Detect handwriting
detection = detect_handwriting(image_bytes)
handwriting_detected = (
detection.confidence >= 0.3 and
detection.handwriting_ratio > 0.005
)
metadata["detection"] = {
"confidence": detection.confidence,
"handwriting_ratio": detection.handwriting_ratio,
"method": detection.detection_method
}
# Step 2: Remove handwriting if requested and detected
cleaned_bytes = image_bytes
handwriting_removed = False
if remove_hw and handwriting_detected:
method = InpaintingMethod.AUTO
if inpainting_method == "opencv_telea":
method = InpaintingMethod.OPENCV_TELEA
elif inpainting_method == "opencv_ns":
method = InpaintingMethod.OPENCV_NS
elif inpainting_method == "lama":
method = InpaintingMethod.LAMA
cleaned_bytes, inpaint_metadata = remove_handwriting(
image_bytes,
mask=detection.mask,
method=method
)
handwriting_removed = inpaint_metadata.get("inpainting_performed", False)
metadata["inpainting"] = inpaint_metadata
# Step 3: Reconstruct layout if requested
fabric_json = None
layout_reconstructed = False
if reconstruct:
layout = reconstruct_layout(cleaned_bytes)
fabric_json = layout.fabric_json
layout_reconstructed = len(layout.elements) > 0
metadata["layout"] = {
"element_count": len(layout.elements),
"table_count": len(layout.table_regions),
"page_width": layout.page_width,
"page_height": layout.page_height
}
# Encode cleaned image as base64
cleaned_base64 = base64.b64encode(cleaned_bytes).decode('utf-8')
logger.info(f"Pipeline complete: detected={handwriting_detected}, "
f"removed={handwriting_removed}, layout={layout_reconstructed}")
return PipelineResponse(
success=True,
handwriting_detected=handwriting_detected,
handwriting_removed=handwriting_removed,
layout_reconstructed=layout_reconstructed,
cleaned_image_base64=cleaned_base64,
fabric_json=fabric_json,
metadata=metadata
)
except Exception as e:
logger.error(f"Cleanup pipeline failed: {e}")
import traceback
logger.error(traceback.format_exc())
raise HTTPException(status_code=500, detail=str(e))
@router.post("/preview-cleanup")
async def preview_cleanup(
image: UploadFile = File(...)
) -> JSONResponse:
"""
Quick preview of cleanup results without full processing.
Returns detection results and estimated processing time.
"""
content_type = image.content_type or ""
if not content_type.startswith("image/"):
raise HTTPException(
status_code=400,
detail="Only image files are supported"
)
try:
image_bytes = await image.read()
# Quick detection only
result = detect_handwriting_regions(image_bytes)
# Estimate processing time based on image size
from PIL import Image
img = Image.open(io.BytesIO(image_bytes))
pixel_count = img.width * img.height
# Rough estimates
est_detection_ms = 100 + (pixel_count / 1000000) * 200
est_inpainting_ms = 500 + (pixel_count / 1000000) * 1000
est_reconstruction_ms = 200 + (pixel_count / 1000000) * 300
return JSONResponse({
"has_handwriting": result["has_handwriting"],
"confidence": result["confidence"],
"handwriting_ratio": result["handwriting_ratio"],
"image_width": img.width,
"image_height": img.height,
"estimated_times_ms": {
"detection": est_detection_ms,
"inpainting": est_inpainting_ms if result["has_handwriting"] else 0,
"reconstruction": est_reconstruction_ms,
"total": est_detection_ms + (est_inpainting_ms if result["has_handwriting"] else 0) + est_reconstruction_ms
},
"capabilities": {
"lama_available": check_lama_available()
}
})
except Exception as e:
logger.error(f"Preview failed: {e}")
raise HTTPException(status_code=500, detail=str(e))