Install LOC guardrails (check-loc.sh, architecture.md, pre-commit hook) and split all 44 files exceeding 500 LOC into domain-focused modules: - consent-service (Go): models, handlers, services, database splits - backend-core (Python): security_api, rbac_api, pdf_service, auth splits - admin-core (TypeScript): 5 page.tsx + sidebar extractions - pitch-deck (TypeScript): 6 slides, 3 UI components, engine.ts splits - voice-service (Python): enhanced_task_orchestrator split Result: 0 violations, 36 exempted (pipeline, tests, pure-data files). Go build verified clean. No behavior changes — pure structural splits. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
214 lines
6.1 KiB
Python
214 lines
6.1 KiB
Python
"""
|
|
Image Processing and OCR Service.
|
|
|
|
Handles:
|
|
- Image preprocessing for better OCR results (grayscale, denoising, binarization)
|
|
- PaddleOCR integration for text recognition
|
|
- Handwriting region extraction from scanned documents
|
|
|
|
Used by FileProcessor for image and PDF-to-image OCR workflows.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Optional, List, Dict, Any, Tuple
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
from .file_processor_types import ProcessedRegion
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ImageProcessor:
|
|
"""
|
|
Image preprocessing and OCR for BreakPilot.
|
|
|
|
Supports:
|
|
- PaddleOCR for German handwriting and printed text
|
|
- OpenCV-based preprocessing (denoising, CLAHE, adaptive binarization)
|
|
- Handwriting region extraction for exam correction
|
|
"""
|
|
|
|
def __init__(self, ocr_lang: str = "de", use_gpu: bool = False):
|
|
self.ocr_lang = ocr_lang
|
|
self.use_gpu = use_gpu
|
|
self._ocr_engine = None
|
|
|
|
@property
|
|
def ocr_engine(self):
|
|
"""Lazy-Loading des OCR-Engines."""
|
|
if self._ocr_engine is None:
|
|
self._ocr_engine = self._init_ocr_engine()
|
|
return self._ocr_engine
|
|
|
|
def _init_ocr_engine(self):
|
|
"""Initialisiert PaddleOCR oder Fallback."""
|
|
try:
|
|
from paddleocr import PaddleOCR
|
|
return PaddleOCR(
|
|
use_angle_cls=True,
|
|
lang='german',
|
|
use_gpu=self.use_gpu,
|
|
show_log=False
|
|
)
|
|
except ImportError:
|
|
logger.warning("PaddleOCR nicht installiert - verwende Fallback")
|
|
return None
|
|
|
|
def preprocess_image(self, img: Image.Image) -> Image.Image:
|
|
"""
|
|
Vorverarbeitung des Bildes fuer bessere OCR-Ergebnisse.
|
|
|
|
- Konvertierung zu Graustufen
|
|
- Kontrastverstaerkung
|
|
- Rauschunterdrueckung
|
|
- Binarisierung
|
|
"""
|
|
# PIL zu OpenCV
|
|
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
|
|
|
# Zu Graustufen konvertieren
|
|
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Rauschunterdrueckung
|
|
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
|
|
|
|
# Kontrastverstaerkung (CLAHE)
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
enhanced = clahe.apply(denoised)
|
|
|
|
# Adaptive Binarisierung
|
|
binary = cv2.adaptiveThreshold(
|
|
enhanced,
|
|
255,
|
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
cv2.THRESH_BINARY,
|
|
11,
|
|
2
|
|
)
|
|
|
|
# Zurueck zu PIL
|
|
return Image.fromarray(binary)
|
|
|
|
def ocr_image(self, img: Image.Image) -> Dict[str, Any]:
|
|
"""
|
|
Fuehrt OCR auf einem Bild aus.
|
|
|
|
Returns:
|
|
Dict mit text, confidence und regions
|
|
"""
|
|
if self.ocr_engine is None:
|
|
return {
|
|
"text": "[OCR nicht verfuegbar - bitte PaddleOCR installieren]",
|
|
"confidence": 0.0,
|
|
"regions": []
|
|
}
|
|
|
|
# PIL zu numpy array
|
|
img_array = np.array(img)
|
|
|
|
# Wenn Graustufen, zu RGB konvertieren (PaddleOCR erwartet RGB)
|
|
if len(img_array.shape) == 2:
|
|
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
|
|
|
|
# OCR ausfuehren
|
|
result = self.ocr_engine.ocr(img_array, cls=True)
|
|
|
|
if not result or not result[0]:
|
|
return {"text": "", "confidence": 0.0, "regions": []}
|
|
|
|
all_text = []
|
|
all_regions = []
|
|
total_confidence = 0.0
|
|
|
|
for line in result[0]:
|
|
bbox_points = line[0] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
|
text, confidence = line[1]
|
|
|
|
# Bounding Box zu x1, y1, x2, y2 konvertieren
|
|
x_coords = [p[0] for p in bbox_points]
|
|
y_coords = [p[1] for p in bbox_points]
|
|
bbox = (
|
|
int(min(x_coords)),
|
|
int(min(y_coords)),
|
|
int(max(x_coords)),
|
|
int(max(y_coords))
|
|
)
|
|
|
|
all_text.append(text)
|
|
all_regions.append(ProcessedRegion(
|
|
text=text,
|
|
confidence=confidence,
|
|
bbox=bbox
|
|
))
|
|
total_confidence += confidence
|
|
|
|
avg_confidence = total_confidence / len(all_regions) if all_regions else 0.0
|
|
|
|
return {
|
|
"text": "\n".join(all_text),
|
|
"confidence": avg_confidence,
|
|
"regions": all_regions
|
|
}
|
|
|
|
def extract_handwriting_regions(
|
|
self,
|
|
img: Image.Image,
|
|
min_area: int = 500
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Erkennt und extrahiert handschriftliche Bereiche aus einem Bild.
|
|
|
|
Nuetzlich fuer Klausuren mit gedruckten Fragen und handschriftlichen Antworten.
|
|
|
|
Args:
|
|
img: Eingabebild
|
|
min_area: Minimale Flaeche fuer erkannte Regionen
|
|
|
|
Returns:
|
|
Liste von Regionen mit Koordinaten und erkanntem Text
|
|
"""
|
|
# Bildvorverarbeitung
|
|
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
|
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Kanten erkennen
|
|
edges = cv2.Canny(gray, 50, 150)
|
|
|
|
# Morphologische Operationen zum Verbinden
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))
|
|
dilated = cv2.dilate(edges, kernel, iterations=2)
|
|
|
|
# Konturen finden
|
|
contours, _ = cv2.findContours(
|
|
dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
|
)
|
|
|
|
regions = []
|
|
for contour in contours:
|
|
area = cv2.contourArea(contour)
|
|
if area < min_area:
|
|
continue
|
|
|
|
x, y, w, h = cv2.boundingRect(contour)
|
|
|
|
# Region ausschneiden
|
|
region_img = img.crop((x, y, x + w, y + h))
|
|
|
|
# OCR auf Region anwenden
|
|
ocr_result = self.ocr_image(region_img)
|
|
|
|
regions.append({
|
|
"bbox": (x, y, x + w, y + h),
|
|
"area": area,
|
|
"text": ocr_result["text"],
|
|
"confidence": ocr_result["confidence"]
|
|
})
|
|
|
|
# Nach Y-Position sortieren (oben nach unten)
|
|
regions.sort(key=lambda r: r["bbox"][1])
|
|
|
|
return regions
|