""" Handwriting Detection Service for Worksheet Cleanup Detects handwritten content in scanned worksheets and returns binary masks. Uses multiple detection methods: 1. Color-based detection (blue/red ink) 2. Stroke analysis (thin irregular strokes) 3. Edge density variance 4. Pencil detection (gray ink) DATENSCHUTZ: All processing happens locally on Mac Mini. """ import numpy as np from PIL import Image import io import logging from typing import Tuple, Optional from dataclasses import dataclass # OpenCV is optional - only required for actual handwriting detection try: import cv2 CV2_AVAILABLE = True except ImportError: cv2 = None CV2_AVAILABLE = False logger = logging.getLogger(__name__) @dataclass class DetectionResult: """Result of handwriting detection.""" mask: np.ndarray # Binary mask (255 = handwriting, 0 = background/printed) confidence: float # Overall confidence score handwriting_ratio: float # Ratio of handwriting pixels to total detection_method: str # Which method was primarily used def detect_handwriting(image_bytes: bytes, target_ink: str = "all") -> DetectionResult: """ Detect handwriting in an image. Args: image_bytes: Image as bytes (PNG, JPG, etc.) target_ink: Which ink types to detect: - "all" → all methods combined (incl. pencil) - "colored" → only color-based (blue/red/green pen) - "pencil" → only pencil (gray ink) Returns: DetectionResult with binary mask where handwriting is white (255) Raises: ImportError: If OpenCV is not available """ if not CV2_AVAILABLE: raise ImportError( "OpenCV (cv2) is required for handwriting detection. " "Install with: pip install opencv-python-headless" ) # Load image img = Image.open(io.BytesIO(image_bytes)) img_array = np.array(img) # Convert to BGR if needed (OpenCV format) if len(img_array.shape) == 2: img_bgr = cv2.cvtColor(img_array, cv2.COLOR_GRAY2BGR) elif img_array.shape[2] == 4: img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGBA2BGR) elif img_array.shape[2] == 3: img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) else: img_bgr = img_array # Select detection methods based on target_ink masks_and_weights = [] if target_ink in ("all", "colored"): color_mask, color_conf = _detect_by_color(img_bgr) masks_and_weights.append((color_mask, color_conf, "color")) if target_ink == "all": stroke_mask, stroke_conf = _detect_by_stroke_analysis(img_bgr) variance_mask, variance_conf = _detect_by_variance(img_bgr) masks_and_weights.append((stroke_mask, stroke_conf, "stroke")) masks_and_weights.append((variance_mask, variance_conf, "variance")) if target_ink in ("all", "pencil"): pencil_mask, pencil_conf = _detect_pencil(img_bgr) masks_and_weights.append((pencil_mask, pencil_conf, "pencil")) if not masks_and_weights: # Fallback: use all methods color_mask, color_conf = _detect_by_color(img_bgr) stroke_mask, stroke_conf = _detect_by_stroke_analysis(img_bgr) variance_mask, variance_conf = _detect_by_variance(img_bgr) pencil_mask, pencil_conf = _detect_pencil(img_bgr) masks_and_weights = [ (color_mask, color_conf, "color"), (stroke_mask, stroke_conf, "stroke"), (variance_mask, variance_conf, "variance"), (pencil_mask, pencil_conf, "pencil"), ] # Combine masks using weighted average total_weight = sum(w for _, w, _ in masks_and_weights) if total_weight > 0: combined_mask = sum( m.astype(np.float32) * w for m, w, _ in masks_and_weights ) / total_weight combined_mask = (combined_mask > 127).astype(np.uint8) * 255 else: combined_mask = np.zeros(img_bgr.shape[:2], dtype=np.uint8) # Post-processing: Remove small noise combined_mask = _clean_mask(combined_mask) # Calculate metrics total_pixels = combined_mask.size handwriting_pixels = np.sum(combined_mask > 0) handwriting_ratio = handwriting_pixels / total_pixels if total_pixels > 0 else 0 # Determine primary method (highest confidence) primary_method = max(masks_and_weights, key=lambda x: x[1])[2] if masks_and_weights else "combined" overall_confidence = total_weight / len(masks_and_weights) if masks_and_weights else 0.0 logger.info(f"Handwriting detection (target_ink={target_ink}): {handwriting_ratio:.2%} handwriting, " f"confidence={overall_confidence:.2f}, method={primary_method}") return DetectionResult( mask=combined_mask, confidence=overall_confidence, handwriting_ratio=handwriting_ratio, detection_method=primary_method ) def _detect_by_color(img_bgr: np.ndarray) -> Tuple[np.ndarray, float]: """ Detect handwriting by ink color (blue, red, black pen). Blue and red ink are common for corrections and handwriting. Black pen has different characteristics than printed black. """ # Convert to HSV for color detection hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV) # Blue ink detection (Hue: 100-130, Saturation: 50-255, Value: 30-200) blue_lower = np.array([100, 50, 30]) blue_upper = np.array([130, 255, 200]) blue_mask = cv2.inRange(hsv, blue_lower, blue_upper) # Red ink detection (Hue: 0-10 and 170-180) red_lower1 = np.array([0, 50, 50]) red_upper1 = np.array([10, 255, 255]) red_mask1 = cv2.inRange(hsv, red_lower1, red_upper1) red_lower2 = np.array([170, 50, 50]) red_upper2 = np.array([180, 255, 255]) red_mask2 = cv2.inRange(hsv, red_lower2, red_upper2) red_mask = cv2.bitwise_or(red_mask1, red_mask2) # Green ink (less common but sometimes used) green_lower = np.array([35, 50, 50]) green_upper = np.array([85, 255, 200]) green_mask = cv2.inRange(hsv, green_lower, green_upper) # Combine colored ink masks color_mask = cv2.bitwise_or(blue_mask, red_mask) color_mask = cv2.bitwise_or(color_mask, green_mask) # Dilate to connect nearby regions kernel = np.ones((3, 3), np.uint8) color_mask = cv2.dilate(color_mask, kernel, iterations=1) # Calculate confidence based on detected pixels total_pixels = color_mask.size colored_pixels = np.sum(color_mask > 0) ratio = colored_pixels / total_pixels if total_pixels > 0 else 0 # High confidence if we found significant colored ink (1-20% of image) if 0.005 < ratio < 0.3: confidence = 0.9 elif ratio > 0: confidence = 0.5 else: confidence = 0.1 return color_mask, confidence def _detect_pencil(img_bgr: np.ndarray) -> Tuple[np.ndarray, float]: """ Detect pencil marks (gray ink, ~140-220 on 255-scale). Paper is usually >230, dark ink <130. Pencil falls in the 140-220 gray range. """ gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) pencil_mask = cv2.inRange(gray, 140, 220) # Remove small noise artifacts kernel = np.ones((2, 2), np.uint8) pencil_mask = cv2.morphologyEx(pencil_mask, cv2.MORPH_OPEN, kernel, iterations=1) ratio = np.sum(pencil_mask > 0) / pencil_mask.size # Good confidence if pencil pixels are in a plausible range confidence = 0.75 if 0.002 < ratio < 0.2 else 0.2 return pencil_mask, confidence def _detect_by_stroke_analysis(img_bgr: np.ndarray) -> Tuple[np.ndarray, float]: """ Detect handwriting by analyzing stroke characteristics. Handwriting typically has: - Thinner, more variable stroke widths - More curved lines - Connected components """ # Convert to grayscale gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) # Adaptive thresholding to extract text binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # Find edges (handwriting has more irregular edges) edges = cv2.Canny(gray, 50, 150) # Morphological gradient for stroke detection kernel = np.ones((2, 2), np.uint8) gradient = cv2.morphologyEx(binary, cv2.MORPH_GRADIENT, kernel) # Skeleton to analyze stroke width # Thin strokes (handwriting) will have more skeleton pixels relative to mass skeleton = _skeletonize(binary) # Detect thin strokes by comparing skeleton to original # Dilate skeleton and XOR with original to find thick regions (printed) dilated_skeleton = cv2.dilate(skeleton, np.ones((5, 5), np.uint8), iterations=1) thick_regions = cv2.bitwise_and(binary, cv2.bitwise_not(dilated_skeleton)) thin_regions = cv2.bitwise_and(binary, dilated_skeleton) # Handwriting tends to be in thin regions with irregular edges handwriting_mask = thin_regions # Calculate confidence total_ink = np.sum(binary > 0) thin_ink = np.sum(thin_regions > 0) if total_ink > 0: thin_ratio = thin_ink / total_ink confidence = min(thin_ratio * 1.5, 0.8) # Cap at 0.8 else: confidence = 0.1 return handwriting_mask, confidence def _detect_by_variance(img_bgr: np.ndarray) -> Tuple[np.ndarray, float]: """ Detect handwriting by local variance analysis. Handwriting has higher local variance in stroke direction and width compared to uniform printed text. """ gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) # Calculate local variance using a sliding window kernel_size = 15 mean = cv2.blur(gray.astype(np.float32), (kernel_size, kernel_size)) sqr_mean = cv2.blur((gray.astype(np.float32))**2, (kernel_size, kernel_size)) variance = sqr_mean - mean**2 # Normalize variance variance = cv2.normalize(variance, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8) # High variance regions might be handwriting # But also edges of printed text, so we need to filter # Get text regions first binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # High variance within text regions high_variance_mask = cv2.threshold(variance, 100, 255, cv2.THRESH_BINARY)[1] handwriting_mask = cv2.bitwise_and(high_variance_mask, binary) # Calculate confidence based on variance distribution text_pixels = np.sum(binary > 0) high_var_pixels = np.sum(handwriting_mask > 0) if text_pixels > 0: var_ratio = high_var_pixels / text_pixels # If 5-40% of text has high variance, likely handwriting present if 0.05 < var_ratio < 0.5: confidence = 0.7 else: confidence = 0.3 else: confidence = 0.1 return handwriting_mask, confidence def _skeletonize(binary: np.ndarray) -> np.ndarray: """ Morphological skeletonization. """ skeleton = np.zeros(binary.shape, np.uint8) element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3)) img = binary.copy() while True: eroded = cv2.erode(img, element) temp = cv2.dilate(eroded, element) temp = cv2.subtract(img, temp) skeleton = cv2.bitwise_or(skeleton, temp) img = eroded.copy() if cv2.countNonZero(img) == 0: break return skeleton def _clean_mask(mask: np.ndarray, min_area: int = 50) -> np.ndarray: """ Clean up the mask by removing small noise regions. """ # Find connected components num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats( mask, connectivity=8 ) # Create clean mask keeping only components above minimum area clean = np.zeros_like(mask) for i in range(1, num_labels): # Skip background (label 0) area = stats[i, cv2.CC_STAT_AREA] if area >= min_area: clean[labels == i] = 255 return clean def mask_to_png(mask: np.ndarray) -> bytes: """ Convert a mask to PNG bytes. """ img = Image.fromarray(mask) buffer = io.BytesIO() img.save(buffer, format='PNG') return buffer.getvalue() def detect_handwriting_regions( image_bytes: bytes, min_confidence: float = 0.3 ) -> dict: """ High-level function that returns structured detection results. Args: image_bytes: Input image min_confidence: Minimum confidence to report detection Returns: Dictionary with detection results """ result = detect_handwriting(image_bytes) has_handwriting = ( result.confidence >= min_confidence and result.handwriting_ratio > 0.005 # At least 0.5% handwriting ) return { "has_handwriting": has_handwriting, "confidence": result.confidence, "handwriting_ratio": result.handwriting_ratio, "detection_method": result.detection_method, "mask_shape": result.mask.shape, }