""" Worksheet Cleanup API - Handschrift-Entfernung und Layout-Rekonstruktion Endpoints: - POST /api/v1/worksheet/detect-handwriting - Erkennt Handschrift und gibt Maske zurueck - POST /api/v1/worksheet/remove-handwriting - Entfernt Handschrift aus Bild - POST /api/v1/worksheet/reconstruct - Rekonstruiert Layout als Fabric.js JSON - POST /api/v1/worksheet/cleanup-pipeline - Vollstaendige Pipeline (Erkennung + Entfernung + Layout) DATENSCHUTZ: Alle Verarbeitung erfolgt lokal auf dem Mac Mini. """ import io import base64 import logging from typing import Optional from fastapi import APIRouter, HTTPException, UploadFile, File, Form from fastapi.responses import StreamingResponse, JSONResponse from pydantic import BaseModel from services.handwriting_detection import ( detect_handwriting, detect_handwriting_regions, mask_to_png ) from services.inpainting_service import ( inpaint_image, remove_handwriting, InpaintingMethod, check_lama_available ) from services.layout_reconstruction_service import ( reconstruct_layout, layout_to_fabric_json, reconstruct_and_clean ) logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/v1/worksheet", tags=["Worksheet Cleanup"]) # ============================================================================= # Pydantic Models # ============================================================================= class DetectionResponse(BaseModel): has_handwriting: bool confidence: float handwriting_ratio: float detection_method: str mask_base64: Optional[str] = None class InpaintingResponse(BaseModel): success: bool method_used: str processing_time_ms: float image_base64: Optional[str] = None error: Optional[str] = None class ReconstructionResponse(BaseModel): success: bool element_count: int page_width: int page_height: int fabric_json: dict table_count: int = 0 class PipelineResponse(BaseModel): success: bool handwriting_detected: bool handwriting_removed: bool layout_reconstructed: bool cleaned_image_base64: Optional[str] = None fabric_json: Optional[dict] = None metadata: dict = {} class CapabilitiesResponse(BaseModel): opencv_available: bool = True lama_available: bool = False paddleocr_available: bool = False # ============================================================================= # API Endpoints # ============================================================================= @router.get("/capabilities") async def get_capabilities() -> CapabilitiesResponse: """ Get available cleanup capabilities on this server. """ # Check PaddleOCR paddleocr_available = False try: from hybrid_vocab_extractor import get_paddle_ocr ocr = get_paddle_ocr() paddleocr_available = ocr is not None except Exception: pass return CapabilitiesResponse( opencv_available=True, lama_available=check_lama_available(), paddleocr_available=paddleocr_available ) @router.post("/detect-handwriting") async def detect_handwriting_endpoint( image: UploadFile = File(...), return_mask: bool = Form(default=True), min_confidence: float = Form(default=0.3) ) -> DetectionResponse: """ Detect handwriting in an image. Args: image: Input image (PNG, JPG) return_mask: Whether to return the binary mask as base64 min_confidence: Minimum confidence threshold Returns: DetectionResponse with detection results and optional mask """ logger.info(f"Handwriting detection request: {image.filename}") # Validate file type content_type = image.content_type or "" if not content_type.startswith("image/"): raise HTTPException( status_code=400, detail="Only image files (PNG, JPG) are supported" ) try: image_bytes = await image.read() # Detect handwriting result = detect_handwriting(image_bytes) has_handwriting = ( result.confidence >= min_confidence and result.handwriting_ratio > 0.005 ) response = DetectionResponse( has_handwriting=has_handwriting, confidence=result.confidence, handwriting_ratio=result.handwriting_ratio, detection_method=result.detection_method ) if return_mask: mask_bytes = mask_to_png(result.mask) response.mask_base64 = base64.b64encode(mask_bytes).decode('utf-8') logger.info(f"Detection complete: handwriting={has_handwriting}, " f"confidence={result.confidence:.2f}") return response except Exception as e: logger.error(f"Handwriting detection failed: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/detect-handwriting/mask") async def get_handwriting_mask( image: UploadFile = File(...) ) -> StreamingResponse: """ Get handwriting detection mask as PNG image. Returns binary mask where white (255) = handwriting. """ content_type = image.content_type or "" if not content_type.startswith("image/"): raise HTTPException( status_code=400, detail="Only image files are supported" ) try: image_bytes = await image.read() result = detect_handwriting(image_bytes) mask_bytes = mask_to_png(result.mask) return StreamingResponse( io.BytesIO(mask_bytes), media_type="image/png", headers={ "Content-Disposition": "attachment; filename=handwriting_mask.png" } ) except Exception as e: logger.error(f"Mask generation failed: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/remove-handwriting") async def remove_handwriting_endpoint( image: UploadFile = File(...), mask: Optional[UploadFile] = File(default=None), method: str = Form(default="auto"), return_base64: bool = Form(default=False) ): """ Remove handwriting from an image. Args: image: Input image with handwriting mask: Optional pre-computed mask (if not provided, auto-detected) method: Inpainting method (auto, opencv_telea, opencv_ns, lama) return_base64: If True, return image as base64, else as file Returns: Cleaned image (as PNG file or base64 in JSON) """ logger.info(f"Remove handwriting request: {image.filename}, method={method}") content_type = image.content_type or "" if not content_type.startswith("image/"): raise HTTPException( status_code=400, detail="Only image files are supported" ) try: image_bytes = await image.read() # Get mask if provided mask_array = None if mask is not None: mask_bytes = await mask.read() from PIL import Image import numpy as np mask_img = Image.open(io.BytesIO(mask_bytes)) mask_array = np.array(mask_img) # Select inpainting method inpainting_method = InpaintingMethod.AUTO if method == "opencv_telea": inpainting_method = InpaintingMethod.OPENCV_TELEA elif method == "opencv_ns": inpainting_method = InpaintingMethod.OPENCV_NS elif method == "lama": inpainting_method = InpaintingMethod.LAMA # Remove handwriting cleaned_bytes, metadata = remove_handwriting( image_bytes, mask=mask_array, method=inpainting_method ) if return_base64: return JSONResponse({ "success": True, "image_base64": base64.b64encode(cleaned_bytes).decode('utf-8'), "metadata": metadata }) else: return StreamingResponse( io.BytesIO(cleaned_bytes), media_type="image/png", headers={ "Content-Disposition": "attachment; filename=cleaned.png", "X-Method-Used": metadata.get("method_used", "unknown"), "X-Processing-Time-Ms": str(metadata.get("processing_time_ms", 0)) } ) except Exception as e: logger.error(f"Handwriting removal failed: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/reconstruct") async def reconstruct_layout_endpoint( image: UploadFile = File(...), clean_handwriting: bool = Form(default=True), detect_tables: bool = Form(default=True) ) -> ReconstructionResponse: """ Reconstruct worksheet layout and generate Fabric.js JSON. Args: image: Input image (can contain handwriting) clean_handwriting: Whether to remove handwriting first detect_tables: Whether to detect table structures Returns: ReconstructionResponse with Fabric.js JSON """ logger.info(f"Layout reconstruction request: {image.filename}") content_type = image.content_type or "" if not content_type.startswith("image/"): raise HTTPException( status_code=400, detail="Only image files are supported" ) try: image_bytes = await image.read() # Run reconstruction pipeline if clean_handwriting: cleaned_bytes, layout = reconstruct_and_clean(image_bytes) else: layout = reconstruct_layout(image_bytes, detect_tables=detect_tables) return ReconstructionResponse( success=True, element_count=len(layout.elements), page_width=layout.page_width, page_height=layout.page_height, fabric_json=layout.fabric_json, table_count=len(layout.table_regions) ) except Exception as e: logger.error(f"Layout reconstruction failed: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/cleanup-pipeline") async def full_cleanup_pipeline( image: UploadFile = File(...), remove_hw: bool = Form(default=True, alias="remove_handwriting"), reconstruct: bool = Form(default=True), inpainting_method: str = Form(default="auto") ) -> PipelineResponse: """ Full cleanup pipeline: detect, remove handwriting, reconstruct layout. This is the recommended endpoint for processing filled worksheets. Args: image: Input image (scan/photo of filled worksheet) remove_handwriting: Whether to remove detected handwriting reconstruct: Whether to reconstruct layout as Fabric.js JSON inpainting_method: Method for inpainting (auto, opencv_telea, opencv_ns, lama) Returns: PipelineResponse with cleaned image and Fabric.js JSON """ logger.info(f"Full cleanup pipeline: {image.filename}") content_type = image.content_type or "" if not content_type.startswith("image/"): raise HTTPException( status_code=400, detail="Only image files are supported" ) try: image_bytes = await image.read() metadata = {} # Step 1: Detect handwriting detection = detect_handwriting(image_bytes) handwriting_detected = ( detection.confidence >= 0.3 and detection.handwriting_ratio > 0.005 ) metadata["detection"] = { "confidence": detection.confidence, "handwriting_ratio": detection.handwriting_ratio, "method": detection.detection_method } # Step 2: Remove handwriting if requested and detected cleaned_bytes = image_bytes handwriting_removed = False if remove_hw and handwriting_detected: method = InpaintingMethod.AUTO if inpainting_method == "opencv_telea": method = InpaintingMethod.OPENCV_TELEA elif inpainting_method == "opencv_ns": method = InpaintingMethod.OPENCV_NS elif inpainting_method == "lama": method = InpaintingMethod.LAMA cleaned_bytes, inpaint_metadata = remove_handwriting( image_bytes, mask=detection.mask, method=method ) handwriting_removed = inpaint_metadata.get("inpainting_performed", False) metadata["inpainting"] = inpaint_metadata # Step 3: Reconstruct layout if requested fabric_json = None layout_reconstructed = False if reconstruct: layout = reconstruct_layout(cleaned_bytes) fabric_json = layout.fabric_json layout_reconstructed = len(layout.elements) > 0 metadata["layout"] = { "element_count": len(layout.elements), "table_count": len(layout.table_regions), "page_width": layout.page_width, "page_height": layout.page_height } # Encode cleaned image as base64 cleaned_base64 = base64.b64encode(cleaned_bytes).decode('utf-8') logger.info(f"Pipeline complete: detected={handwriting_detected}, " f"removed={handwriting_removed}, layout={layout_reconstructed}") return PipelineResponse( success=True, handwriting_detected=handwriting_detected, handwriting_removed=handwriting_removed, layout_reconstructed=layout_reconstructed, cleaned_image_base64=cleaned_base64, fabric_json=fabric_json, metadata=metadata ) except Exception as e: logger.error(f"Cleanup pipeline failed: {e}") import traceback logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) @router.post("/preview-cleanup") async def preview_cleanup( image: UploadFile = File(...) ) -> JSONResponse: """ Quick preview of cleanup results without full processing. Returns detection results and estimated processing time. """ content_type = image.content_type or "" if not content_type.startswith("image/"): raise HTTPException( status_code=400, detail="Only image files are supported" ) try: image_bytes = await image.read() # Quick detection only result = detect_handwriting_regions(image_bytes) # Estimate processing time based on image size from PIL import Image img = Image.open(io.BytesIO(image_bytes)) pixel_count = img.width * img.height # Rough estimates est_detection_ms = 100 + (pixel_count / 1000000) * 200 est_inpainting_ms = 500 + (pixel_count / 1000000) * 1000 est_reconstruction_ms = 200 + (pixel_count / 1000000) * 300 return JSONResponse({ "has_handwriting": result["has_handwriting"], "confidence": result["confidence"], "handwriting_ratio": result["handwriting_ratio"], "image_width": img.width, "image_height": img.height, "estimated_times_ms": { "detection": est_detection_ms, "inpainting": est_inpainting_ms if result["has_handwriting"] else 0, "reconstruction": est_reconstruction_ms, "total": est_detection_ms + (est_inpainting_ms if result["has_handwriting"] else 0) + est_reconstruction_ms }, "capabilities": { "lama_available": check_lama_available() } }) except Exception as e: logger.error(f"Preview failed: {e}") raise HTTPException(status_code=500, detail=str(e))