Files
breakpilot-lehrer/klausur-service/backend/services/handwriting_detection.py
Benjamin Admin 2e0f8632f8
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m49s
CI / test-python-agent-core (push) Successful in 14s
CI / test-nodejs-website (push) Successful in 15s
feat(klausur): Handschrift entfernen + Klausur-HTR implementiert
Feature 1: Handschrift entfernen via OCR-Pipeline Session
- services/handwriting_detection.py: _detect_pencil() + target_ink Parameter
  ("all" | "colored" | "pencil") für gezielte Tinten-Erkennung
- ocr_pipeline_session_store.py: clean_png + handwriting_removal_meta Spalten
  (idempotentes ALTER TABLE in init_ocr_pipeline_tables)
- ocr_pipeline_api.py: POST /sessions/{id}/remove-handwriting Endpoint
  + "clean" zu valid_types für Image-Serving hinzugefügt

Feature 2: Klausur-HTR (Hochwertige Handschriftenerkennung)
- handwriting_htr_api.py: Neuer Router /api/v1/htr/recognize + /recognize-session
  Primary: qwen2.5vl:32b via Ollama, Fallback: trocr-large-handwritten
- services/trocr_service.py: size Parameter (base | large) für get_trocr_model()
  + run_trocr_ocr() - unterstützt jetzt trocr-large-handwritten
- main.py: HTR Router registriert

Config:
- docker-compose.yml: OLLAMA_HTR_MODEL, HTR_FALLBACK_MODEL
- .env.example: HTR Env-Vars dokumentiert

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-03 12:04:26 +01:00

394 lines
13 KiB
Python

"""
Handwriting Detection Service for Worksheet Cleanup
Detects handwritten content in scanned worksheets and returns binary masks.
Uses multiple detection methods:
1. Color-based detection (blue/red ink)
2. Stroke analysis (thin irregular strokes)
3. Edge density variance
4. Pencil detection (gray ink)
DATENSCHUTZ: All processing happens locally on Mac Mini.
"""
import numpy as np
from PIL import Image
import io
import logging
from typing import Tuple, Optional
from dataclasses import dataclass
# OpenCV is optional - only required for actual handwriting detection
try:
import cv2
CV2_AVAILABLE = True
except ImportError:
cv2 = None
CV2_AVAILABLE = False
logger = logging.getLogger(__name__)
@dataclass
class DetectionResult:
"""Result of handwriting detection."""
mask: np.ndarray # Binary mask (255 = handwriting, 0 = background/printed)
confidence: float # Overall confidence score
handwriting_ratio: float # Ratio of handwriting pixels to total
detection_method: str # Which method was primarily used
def detect_handwriting(image_bytes: bytes, target_ink: str = "all") -> DetectionResult:
"""
Detect handwriting in an image.
Args:
image_bytes: Image as bytes (PNG, JPG, etc.)
target_ink: Which ink types to detect:
- "all" → all methods combined (incl. pencil)
- "colored" → only color-based (blue/red/green pen)
- "pencil" → only pencil (gray ink)
Returns:
DetectionResult with binary mask where handwriting is white (255)
Raises:
ImportError: If OpenCV is not available
"""
if not CV2_AVAILABLE:
raise ImportError(
"OpenCV (cv2) is required for handwriting detection. "
"Install with: pip install opencv-python-headless"
)
# Load image
img = Image.open(io.BytesIO(image_bytes))
img_array = np.array(img)
# Convert to BGR if needed (OpenCV format)
if len(img_array.shape) == 2:
img_bgr = cv2.cvtColor(img_array, cv2.COLOR_GRAY2BGR)
elif img_array.shape[2] == 4:
img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGBA2BGR)
elif img_array.shape[2] == 3:
img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
else:
img_bgr = img_array
# Select detection methods based on target_ink
masks_and_weights = []
if target_ink in ("all", "colored"):
color_mask, color_conf = _detect_by_color(img_bgr)
masks_and_weights.append((color_mask, color_conf, "color"))
if target_ink == "all":
stroke_mask, stroke_conf = _detect_by_stroke_analysis(img_bgr)
variance_mask, variance_conf = _detect_by_variance(img_bgr)
masks_and_weights.append((stroke_mask, stroke_conf, "stroke"))
masks_and_weights.append((variance_mask, variance_conf, "variance"))
if target_ink in ("all", "pencil"):
pencil_mask, pencil_conf = _detect_pencil(img_bgr)
masks_and_weights.append((pencil_mask, pencil_conf, "pencil"))
if not masks_and_weights:
# Fallback: use all methods
color_mask, color_conf = _detect_by_color(img_bgr)
stroke_mask, stroke_conf = _detect_by_stroke_analysis(img_bgr)
variance_mask, variance_conf = _detect_by_variance(img_bgr)
pencil_mask, pencil_conf = _detect_pencil(img_bgr)
masks_and_weights = [
(color_mask, color_conf, "color"),
(stroke_mask, stroke_conf, "stroke"),
(variance_mask, variance_conf, "variance"),
(pencil_mask, pencil_conf, "pencil"),
]
# Combine masks using weighted average
total_weight = sum(w for _, w, _ in masks_and_weights)
if total_weight > 0:
combined_mask = sum(
m.astype(np.float32) * w for m, w, _ in masks_and_weights
) / total_weight
combined_mask = (combined_mask > 127).astype(np.uint8) * 255
else:
combined_mask = np.zeros(img_bgr.shape[:2], dtype=np.uint8)
# Post-processing: Remove small noise
combined_mask = _clean_mask(combined_mask)
# Calculate metrics
total_pixels = combined_mask.size
handwriting_pixels = np.sum(combined_mask > 0)
handwriting_ratio = handwriting_pixels / total_pixels if total_pixels > 0 else 0
# Determine primary method (highest confidence)
primary_method = max(masks_and_weights, key=lambda x: x[1])[2] if masks_and_weights else "combined"
overall_confidence = total_weight / len(masks_and_weights) if masks_and_weights else 0.0
logger.info(f"Handwriting detection (target_ink={target_ink}): {handwriting_ratio:.2%} handwriting, "
f"confidence={overall_confidence:.2f}, method={primary_method}")
return DetectionResult(
mask=combined_mask,
confidence=overall_confidence,
handwriting_ratio=handwriting_ratio,
detection_method=primary_method
)
def _detect_by_color(img_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
"""
Detect handwriting by ink color (blue, red, black pen).
Blue and red ink are common for corrections and handwriting.
Black pen has different characteristics than printed black.
"""
# Convert to HSV for color detection
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
# Blue ink detection (Hue: 100-130, Saturation: 50-255, Value: 30-200)
blue_lower = np.array([100, 50, 30])
blue_upper = np.array([130, 255, 200])
blue_mask = cv2.inRange(hsv, blue_lower, blue_upper)
# Red ink detection (Hue: 0-10 and 170-180)
red_lower1 = np.array([0, 50, 50])
red_upper1 = np.array([10, 255, 255])
red_mask1 = cv2.inRange(hsv, red_lower1, red_upper1)
red_lower2 = np.array([170, 50, 50])
red_upper2 = np.array([180, 255, 255])
red_mask2 = cv2.inRange(hsv, red_lower2, red_upper2)
red_mask = cv2.bitwise_or(red_mask1, red_mask2)
# Green ink (less common but sometimes used)
green_lower = np.array([35, 50, 50])
green_upper = np.array([85, 255, 200])
green_mask = cv2.inRange(hsv, green_lower, green_upper)
# Combine colored ink masks
color_mask = cv2.bitwise_or(blue_mask, red_mask)
color_mask = cv2.bitwise_or(color_mask, green_mask)
# Dilate to connect nearby regions
kernel = np.ones((3, 3), np.uint8)
color_mask = cv2.dilate(color_mask, kernel, iterations=1)
# Calculate confidence based on detected pixels
total_pixels = color_mask.size
colored_pixels = np.sum(color_mask > 0)
ratio = colored_pixels / total_pixels if total_pixels > 0 else 0
# High confidence if we found significant colored ink (1-20% of image)
if 0.005 < ratio < 0.3:
confidence = 0.9
elif ratio > 0:
confidence = 0.5
else:
confidence = 0.1
return color_mask, confidence
def _detect_pencil(img_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
"""
Detect pencil marks (gray ink, ~140-220 on 255-scale).
Paper is usually >230, dark ink <130.
Pencil falls in the 140-220 gray range.
"""
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
pencil_mask = cv2.inRange(gray, 140, 220)
# Remove small noise artifacts
kernel = np.ones((2, 2), np.uint8)
pencil_mask = cv2.morphologyEx(pencil_mask, cv2.MORPH_OPEN, kernel, iterations=1)
ratio = np.sum(pencil_mask > 0) / pencil_mask.size
# Good confidence if pencil pixels are in a plausible range
confidence = 0.75 if 0.002 < ratio < 0.2 else 0.2
return pencil_mask, confidence
def _detect_by_stroke_analysis(img_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
"""
Detect handwriting by analyzing stroke characteristics.
Handwriting typically has:
- Thinner, more variable stroke widths
- More curved lines
- Connected components
"""
# Convert to grayscale
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
# Adaptive thresholding to extract text
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2
)
# Find edges (handwriting has more irregular edges)
edges = cv2.Canny(gray, 50, 150)
# Morphological gradient for stroke detection
kernel = np.ones((2, 2), np.uint8)
gradient = cv2.morphologyEx(binary, cv2.MORPH_GRADIENT, kernel)
# Skeleton to analyze stroke width
# Thin strokes (handwriting) will have more skeleton pixels relative to mass
skeleton = _skeletonize(binary)
# Detect thin strokes by comparing skeleton to original
# Dilate skeleton and XOR with original to find thick regions (printed)
dilated_skeleton = cv2.dilate(skeleton, np.ones((5, 5), np.uint8), iterations=1)
thick_regions = cv2.bitwise_and(binary, cv2.bitwise_not(dilated_skeleton))
thin_regions = cv2.bitwise_and(binary, dilated_skeleton)
# Handwriting tends to be in thin regions with irregular edges
handwriting_mask = thin_regions
# Calculate confidence
total_ink = np.sum(binary > 0)
thin_ink = np.sum(thin_regions > 0)
if total_ink > 0:
thin_ratio = thin_ink / total_ink
confidence = min(thin_ratio * 1.5, 0.8) # Cap at 0.8
else:
confidence = 0.1
return handwriting_mask, confidence
def _detect_by_variance(img_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
"""
Detect handwriting by local variance analysis.
Handwriting has higher local variance in stroke direction and width
compared to uniform printed text.
"""
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
# Calculate local variance using a sliding window
kernel_size = 15
mean = cv2.blur(gray.astype(np.float32), (kernel_size, kernel_size))
sqr_mean = cv2.blur((gray.astype(np.float32))**2, (kernel_size, kernel_size))
variance = sqr_mean - mean**2
# Normalize variance
variance = cv2.normalize(variance, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
# High variance regions might be handwriting
# But also edges of printed text, so we need to filter
# Get text regions first
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2
)
# High variance within text regions
high_variance_mask = cv2.threshold(variance, 100, 255, cv2.THRESH_BINARY)[1]
handwriting_mask = cv2.bitwise_and(high_variance_mask, binary)
# Calculate confidence based on variance distribution
text_pixels = np.sum(binary > 0)
high_var_pixels = np.sum(handwriting_mask > 0)
if text_pixels > 0:
var_ratio = high_var_pixels / text_pixels
# If 5-40% of text has high variance, likely handwriting present
if 0.05 < var_ratio < 0.5:
confidence = 0.7
else:
confidence = 0.3
else:
confidence = 0.1
return handwriting_mask, confidence
def _skeletonize(binary: np.ndarray) -> np.ndarray:
"""
Morphological skeletonization.
"""
skeleton = np.zeros(binary.shape, np.uint8)
element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
img = binary.copy()
while True:
eroded = cv2.erode(img, element)
temp = cv2.dilate(eroded, element)
temp = cv2.subtract(img, temp)
skeleton = cv2.bitwise_or(skeleton, temp)
img = eroded.copy()
if cv2.countNonZero(img) == 0:
break
return skeleton
def _clean_mask(mask: np.ndarray, min_area: int = 50) -> np.ndarray:
"""
Clean up the mask by removing small noise regions.
"""
# Find connected components
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
mask, connectivity=8
)
# Create clean mask keeping only components above minimum area
clean = np.zeros_like(mask)
for i in range(1, num_labels): # Skip background (label 0)
area = stats[i, cv2.CC_STAT_AREA]
if area >= min_area:
clean[labels == i] = 255
return clean
def mask_to_png(mask: np.ndarray) -> bytes:
"""
Convert a mask to PNG bytes.
"""
img = Image.fromarray(mask)
buffer = io.BytesIO()
img.save(buffer, format='PNG')
return buffer.getvalue()
def detect_handwriting_regions(
image_bytes: bytes,
min_confidence: float = 0.3
) -> dict:
"""
High-level function that returns structured detection results.
Args:
image_bytes: Input image
min_confidence: Minimum confidence to report detection
Returns:
Dictionary with detection results
"""
result = detect_handwriting(image_bytes)
has_handwriting = (
result.confidence >= min_confidence and
result.handwriting_ratio > 0.005 # At least 0.5% handwriting
)
return {
"has_handwriting": has_handwriting,
"confidence": result.confidence,
"handwriting_ratio": result.handwriting_ratio,
"detection_method": result.detection_method,
"mask_shape": result.mask.shape,
}