Files
breakpilot-core/backend-core/services/image_processing.py
Benjamin Admin 92c86ec6ba [split-required] [guardrail-change] Enforce 500 LOC budget across all services
Install LOC guardrails (check-loc.sh, architecture.md, pre-commit hook)
and split all 44 files exceeding 500 LOC into domain-focused modules:

- consent-service (Go): models, handlers, services, database splits
- backend-core (Python): security_api, rbac_api, pdf_service, auth splits
- admin-core (TypeScript): 5 page.tsx + sidebar extractions
- pitch-deck (TypeScript): 6 slides, 3 UI components, engine.ts splits
- voice-service (Python): enhanced_task_orchestrator split

Result: 0 violations, 36 exempted (pipeline, tests, pure-data files).
Go build verified clean. No behavior changes — pure structural splits.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 00:09:30 +02:00

214 lines
6.1 KiB
Python

"""
Image Processing and OCR Service.
Handles:
- Image preprocessing for better OCR results (grayscale, denoising, binarization)
- PaddleOCR integration for text recognition
- Handwriting region extraction from scanned documents
Used by FileProcessor for image and PDF-to-image OCR workflows.
"""
import logging
from typing import Optional, List, Dict, Any, Tuple
import cv2
import numpy as np
from PIL import Image
from .file_processor_types import ProcessedRegion
logger = logging.getLogger(__name__)
class ImageProcessor:
"""
Image preprocessing and OCR for BreakPilot.
Supports:
- PaddleOCR for German handwriting and printed text
- OpenCV-based preprocessing (denoising, CLAHE, adaptive binarization)
- Handwriting region extraction for exam correction
"""
def __init__(self, ocr_lang: str = "de", use_gpu: bool = False):
self.ocr_lang = ocr_lang
self.use_gpu = use_gpu
self._ocr_engine = None
@property
def ocr_engine(self):
"""Lazy-Loading des OCR-Engines."""
if self._ocr_engine is None:
self._ocr_engine = self._init_ocr_engine()
return self._ocr_engine
def _init_ocr_engine(self):
"""Initialisiert PaddleOCR oder Fallback."""
try:
from paddleocr import PaddleOCR
return PaddleOCR(
use_angle_cls=True,
lang='german',
use_gpu=self.use_gpu,
show_log=False
)
except ImportError:
logger.warning("PaddleOCR nicht installiert - verwende Fallback")
return None
def preprocess_image(self, img: Image.Image) -> Image.Image:
"""
Vorverarbeitung des Bildes fuer bessere OCR-Ergebnisse.
- Konvertierung zu Graustufen
- Kontrastverstaerkung
- Rauschunterdrueckung
- Binarisierung
"""
# PIL zu OpenCV
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# Zu Graustufen konvertieren
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
# Rauschunterdrueckung
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Kontrastverstaerkung (CLAHE)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(denoised)
# Adaptive Binarisierung
binary = cv2.adaptiveThreshold(
enhanced,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
11,
2
)
# Zurueck zu PIL
return Image.fromarray(binary)
def ocr_image(self, img: Image.Image) -> Dict[str, Any]:
"""
Fuehrt OCR auf einem Bild aus.
Returns:
Dict mit text, confidence und regions
"""
if self.ocr_engine is None:
return {
"text": "[OCR nicht verfuegbar - bitte PaddleOCR installieren]",
"confidence": 0.0,
"regions": []
}
# PIL zu numpy array
img_array = np.array(img)
# Wenn Graustufen, zu RGB konvertieren (PaddleOCR erwartet RGB)
if len(img_array.shape) == 2:
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
# OCR ausfuehren
result = self.ocr_engine.ocr(img_array, cls=True)
if not result or not result[0]:
return {"text": "", "confidence": 0.0, "regions": []}
all_text = []
all_regions = []
total_confidence = 0.0
for line in result[0]:
bbox_points = line[0] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
text, confidence = line[1]
# Bounding Box zu x1, y1, x2, y2 konvertieren
x_coords = [p[0] for p in bbox_points]
y_coords = [p[1] for p in bbox_points]
bbox = (
int(min(x_coords)),
int(min(y_coords)),
int(max(x_coords)),
int(max(y_coords))
)
all_text.append(text)
all_regions.append(ProcessedRegion(
text=text,
confidence=confidence,
bbox=bbox
))
total_confidence += confidence
avg_confidence = total_confidence / len(all_regions) if all_regions else 0.0
return {
"text": "\n".join(all_text),
"confidence": avg_confidence,
"regions": all_regions
}
def extract_handwriting_regions(
self,
img: Image.Image,
min_area: int = 500
) -> List[Dict[str, Any]]:
"""
Erkennt und extrahiert handschriftliche Bereiche aus einem Bild.
Nuetzlich fuer Klausuren mit gedruckten Fragen und handschriftlichen Antworten.
Args:
img: Eingabebild
min_area: Minimale Flaeche fuer erkannte Regionen
Returns:
Liste von Regionen mit Koordinaten und erkanntem Text
"""
# Bildvorverarbeitung
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
# Kanten erkennen
edges = cv2.Canny(gray, 50, 150)
# Morphologische Operationen zum Verbinden
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))
dilated = cv2.dilate(edges, kernel, iterations=2)
# Konturen finden
contours, _ = cv2.findContours(
dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
regions = []
for contour in contours:
area = cv2.contourArea(contour)
if area < min_area:
continue
x, y, w, h = cv2.boundingRect(contour)
# Region ausschneiden
region_img = img.crop((x, y, x + w, y + h))
# OCR auf Region anwenden
ocr_result = self.ocr_image(region_img)
regions.append({
"bbox": (x, y, x + w, y + h),
"area": area,
"text": ocr_result["text"],
"confidence": ocr_result["confidence"]
})
# Nach Y-Position sortieren (oben nach unten)
regions.sort(key=lambda r: r["bbox"][1])
return regions