[split-required] [guardrail-change] Enforce 500 LOC budget across all services
Install LOC guardrails (check-loc.sh, architecture.md, pre-commit hook) and split all 44 files exceeding 500 LOC into domain-focused modules: - consent-service (Go): models, handlers, services, database splits - backend-core (Python): security_api, rbac_api, pdf_service, auth splits - admin-core (TypeScript): 5 page.tsx + sidebar extractions - pitch-deck (TypeScript): 6 slides, 3 UI components, engine.ts splits - voice-service (Python): enhanced_task_orchestrator split Result: 0 violations, 36 exempted (pipeline, tests, pure-data files). Go build verified clean. No behavior changes — pure structural splits. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
213
backend-core/services/image_processing.py
Normal file
213
backend-core/services/image_processing.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Image Processing and OCR Service.
|
||||
|
||||
Handles:
|
||||
- Image preprocessing for better OCR results (grayscale, denoising, binarization)
|
||||
- PaddleOCR integration for text recognition
|
||||
- Handwriting region extraction from scanned documents
|
||||
|
||||
Used by FileProcessor for image and PDF-to-image OCR workflows.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional, List, Dict, Any, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from .file_processor_types import ProcessedRegion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ImageProcessor:
|
||||
"""
|
||||
Image preprocessing and OCR for BreakPilot.
|
||||
|
||||
Supports:
|
||||
- PaddleOCR for German handwriting and printed text
|
||||
- OpenCV-based preprocessing (denoising, CLAHE, adaptive binarization)
|
||||
- Handwriting region extraction for exam correction
|
||||
"""
|
||||
|
||||
def __init__(self, ocr_lang: str = "de", use_gpu: bool = False):
|
||||
self.ocr_lang = ocr_lang
|
||||
self.use_gpu = use_gpu
|
||||
self._ocr_engine = None
|
||||
|
||||
@property
|
||||
def ocr_engine(self):
|
||||
"""Lazy-Loading des OCR-Engines."""
|
||||
if self._ocr_engine is None:
|
||||
self._ocr_engine = self._init_ocr_engine()
|
||||
return self._ocr_engine
|
||||
|
||||
def _init_ocr_engine(self):
|
||||
"""Initialisiert PaddleOCR oder Fallback."""
|
||||
try:
|
||||
from paddleocr import PaddleOCR
|
||||
return PaddleOCR(
|
||||
use_angle_cls=True,
|
||||
lang='german',
|
||||
use_gpu=self.use_gpu,
|
||||
show_log=False
|
||||
)
|
||||
except ImportError:
|
||||
logger.warning("PaddleOCR nicht installiert - verwende Fallback")
|
||||
return None
|
||||
|
||||
def preprocess_image(self, img: Image.Image) -> Image.Image:
|
||||
"""
|
||||
Vorverarbeitung des Bildes fuer bessere OCR-Ergebnisse.
|
||||
|
||||
- Konvertierung zu Graustufen
|
||||
- Kontrastverstaerkung
|
||||
- Rauschunterdrueckung
|
||||
- Binarisierung
|
||||
"""
|
||||
# PIL zu OpenCV
|
||||
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
||||
|
||||
# Zu Graustufen konvertieren
|
||||
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Rauschunterdrueckung
|
||||
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
|
||||
|
||||
# Kontrastverstaerkung (CLAHE)
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(denoised)
|
||||
|
||||
# Adaptive Binarisierung
|
||||
binary = cv2.adaptiveThreshold(
|
||||
enhanced,
|
||||
255,
|
||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY,
|
||||
11,
|
||||
2
|
||||
)
|
||||
|
||||
# Zurueck zu PIL
|
||||
return Image.fromarray(binary)
|
||||
|
||||
def ocr_image(self, img: Image.Image) -> Dict[str, Any]:
|
||||
"""
|
||||
Fuehrt OCR auf einem Bild aus.
|
||||
|
||||
Returns:
|
||||
Dict mit text, confidence und regions
|
||||
"""
|
||||
if self.ocr_engine is None:
|
||||
return {
|
||||
"text": "[OCR nicht verfuegbar - bitte PaddleOCR installieren]",
|
||||
"confidence": 0.0,
|
||||
"regions": []
|
||||
}
|
||||
|
||||
# PIL zu numpy array
|
||||
img_array = np.array(img)
|
||||
|
||||
# Wenn Graustufen, zu RGB konvertieren (PaddleOCR erwartet RGB)
|
||||
if len(img_array.shape) == 2:
|
||||
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
|
||||
|
||||
# OCR ausfuehren
|
||||
result = self.ocr_engine.ocr(img_array, cls=True)
|
||||
|
||||
if not result or not result[0]:
|
||||
return {"text": "", "confidence": 0.0, "regions": []}
|
||||
|
||||
all_text = []
|
||||
all_regions = []
|
||||
total_confidence = 0.0
|
||||
|
||||
for line in result[0]:
|
||||
bbox_points = line[0] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
||||
text, confidence = line[1]
|
||||
|
||||
# Bounding Box zu x1, y1, x2, y2 konvertieren
|
||||
x_coords = [p[0] for p in bbox_points]
|
||||
y_coords = [p[1] for p in bbox_points]
|
||||
bbox = (
|
||||
int(min(x_coords)),
|
||||
int(min(y_coords)),
|
||||
int(max(x_coords)),
|
||||
int(max(y_coords))
|
||||
)
|
||||
|
||||
all_text.append(text)
|
||||
all_regions.append(ProcessedRegion(
|
||||
text=text,
|
||||
confidence=confidence,
|
||||
bbox=bbox
|
||||
))
|
||||
total_confidence += confidence
|
||||
|
||||
avg_confidence = total_confidence / len(all_regions) if all_regions else 0.0
|
||||
|
||||
return {
|
||||
"text": "\n".join(all_text),
|
||||
"confidence": avg_confidence,
|
||||
"regions": all_regions
|
||||
}
|
||||
|
||||
def extract_handwriting_regions(
|
||||
self,
|
||||
img: Image.Image,
|
||||
min_area: int = 500
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Erkennt und extrahiert handschriftliche Bereiche aus einem Bild.
|
||||
|
||||
Nuetzlich fuer Klausuren mit gedruckten Fragen und handschriftlichen Antworten.
|
||||
|
||||
Args:
|
||||
img: Eingabebild
|
||||
min_area: Minimale Flaeche fuer erkannte Regionen
|
||||
|
||||
Returns:
|
||||
Liste von Regionen mit Koordinaten und erkanntem Text
|
||||
"""
|
||||
# Bildvorverarbeitung
|
||||
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
||||
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Kanten erkennen
|
||||
edges = cv2.Canny(gray, 50, 150)
|
||||
|
||||
# Morphologische Operationen zum Verbinden
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))
|
||||
dilated = cv2.dilate(edges, kernel, iterations=2)
|
||||
|
||||
# Konturen finden
|
||||
contours, _ = cv2.findContours(
|
||||
dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
||||
)
|
||||
|
||||
regions = []
|
||||
for contour in contours:
|
||||
area = cv2.contourArea(contour)
|
||||
if area < min_area:
|
||||
continue
|
||||
|
||||
x, y, w, h = cv2.boundingRect(contour)
|
||||
|
||||
# Region ausschneiden
|
||||
region_img = img.crop((x, y, x + w, y + h))
|
||||
|
||||
# OCR auf Region anwenden
|
||||
ocr_result = self.ocr_image(region_img)
|
||||
|
||||
regions.append({
|
||||
"bbox": (x, y, x + w, y + h),
|
||||
"area": area,
|
||||
"text": ocr_result["text"],
|
||||
"confidence": ocr_result["confidence"]
|
||||
})
|
||||
|
||||
# Nach Y-Position sortieren (oben nach unten)
|
||||
regions.sort(key=lambda r: r["bbox"][1])
|
||||
|
||||
return regions
|
||||
Reference in New Issue
Block a user