""" OCR Labeling - Helper Functions and OCR Wrappers Extracted from ocr_labeling_api.py to keep files under 500 LOC. DATENSCHUTZ/PRIVACY: - Alle Verarbeitung erfolgt lokal (Mac Mini mit Ollama) - Keine Daten werden an externe Server gesendet """ import os import hashlib from ocr_labeling_models import LOCAL_STORAGE_PATH # Try to import Vision OCR service try: import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', 'backend', 'klausur', 'services')) from vision_ocr_service import get_vision_ocr_service, VisionOCRService VISION_OCR_AVAILABLE = True except ImportError: VISION_OCR_AVAILABLE = False print("Warning: Vision OCR service not available") # Try to import PaddleOCR from hybrid_vocab_extractor try: from hybrid_vocab_extractor import run_paddle_ocr PADDLEOCR_AVAILABLE = True except ImportError: PADDLEOCR_AVAILABLE = False print("Warning: PaddleOCR not available") # Try to import TrOCR service try: from services.trocr_service import run_trocr_ocr TROCR_AVAILABLE = True except ImportError: TROCR_AVAILABLE = False print("Warning: TrOCR service not available") # Try to import Donut service try: from services.donut_ocr_service import run_donut_ocr DONUT_AVAILABLE = True except ImportError: DONUT_AVAILABLE = False print("Warning: Donut OCR service not available") # Try to import MinIO storage try: from minio_storage import upload_ocr_image, get_ocr_image, MINIO_BUCKET MINIO_AVAILABLE = True except ImportError: MINIO_AVAILABLE = False print("Warning: MinIO storage not available, using local storage") # Try to import Training Export Service try: from training_export_service import ( TrainingExportService, TrainingSample, get_training_export_service, ) TRAINING_EXPORT_AVAILABLE = True except ImportError: TRAINING_EXPORT_AVAILABLE = False print("Warning: Training export service not available") # ============================================================================= # Helper Functions # ============================================================================= def compute_image_hash(image_data: bytes) -> str: """Compute SHA256 hash of image data.""" return hashlib.sha256(image_data).hexdigest() async def run_ocr_on_image(image_data: bytes, filename: str, model: str = "llama3.2-vision:11b") -> tuple: """ Run OCR on an image using the specified model. Models: - llama3.2-vision:11b: Vision LLM (default, best for handwriting) - trocr: Microsoft TrOCR (fast for printed text) - paddleocr: PaddleOCR + LLM hybrid (4x faster) - donut: Document Understanding Transformer (structured documents) Returns: Tuple of (ocr_text, confidence) """ print(f"Running OCR with model: {model}") # Route to appropriate OCR service based on model if model == "paddleocr": return await run_paddleocr_wrapper(image_data, filename) elif model == "donut": return await run_donut_wrapper(image_data, filename) elif model == "trocr": return await run_trocr_wrapper(image_data, filename) else: # Default: Vision LLM (llama3.2-vision or similar) return await run_vision_ocr_wrapper(image_data, filename) async def run_vision_ocr_wrapper(image_data: bytes, filename: str) -> tuple: """Vision LLM OCR wrapper.""" if not VISION_OCR_AVAILABLE: print("Vision OCR service not available") return None, 0.0 try: service = get_vision_ocr_service() if not await service.is_available(): print("Vision OCR service not available (is_available check failed)") return None, 0.0 result = await service.extract_text( image_data, filename=filename, is_handwriting=True ) return result.text, result.confidence except Exception as e: print(f"Vision OCR failed: {e}") return None, 0.0 async def run_paddleocr_wrapper(image_data: bytes, filename: str) -> tuple: """PaddleOCR wrapper - uses hybrid_vocab_extractor.""" if not PADDLEOCR_AVAILABLE: print("PaddleOCR not available, falling back to Vision OCR") return await run_vision_ocr_wrapper(image_data, filename) try: # run_paddle_ocr returns (regions, raw_text) regions, raw_text = run_paddle_ocr(image_data) if not raw_text: print("PaddleOCR returned empty text") return None, 0.0 # Calculate average confidence from regions if regions: avg_confidence = sum(r.confidence for r in regions) / len(regions) else: avg_confidence = 0.5 return raw_text, avg_confidence except Exception as e: print(f"PaddleOCR failed: {e}, falling back to Vision OCR") return await run_vision_ocr_wrapper(image_data, filename) async def run_trocr_wrapper(image_data: bytes, filename: str) -> tuple: """TrOCR wrapper.""" if not TROCR_AVAILABLE: print("TrOCR not available, falling back to Vision OCR") return await run_vision_ocr_wrapper(image_data, filename) try: text, confidence = await run_trocr_ocr(image_data) return text, confidence except Exception as e: print(f"TrOCR failed: {e}, falling back to Vision OCR") return await run_vision_ocr_wrapper(image_data, filename) async def run_donut_wrapper(image_data: bytes, filename: str) -> tuple: """Donut OCR wrapper.""" if not DONUT_AVAILABLE: print("Donut not available, falling back to Vision OCR") return await run_vision_ocr_wrapper(image_data, filename) try: text, confidence = await run_donut_ocr(image_data) return text, confidence except Exception as e: print(f"Donut OCR failed: {e}, falling back to Vision OCR") return await run_vision_ocr_wrapper(image_data, filename) def save_image_locally(session_id: str, item_id: str, image_data: bytes, extension: str = "png") -> str: """Save image to local storage.""" session_dir = os.path.join(LOCAL_STORAGE_PATH, session_id) os.makedirs(session_dir, exist_ok=True) filename = f"{item_id}.{extension}" filepath = os.path.join(session_dir, filename) with open(filepath, 'wb') as f: f.write(image_data) return filepath def get_image_url(image_path: str) -> str: """Get URL for an image.""" # For local images, return a relative path that the frontend can use if image_path.startswith(LOCAL_STORAGE_PATH): relative_path = image_path[len(LOCAL_STORAGE_PATH):].lstrip('/') return f"/api/v1/ocr-label/images/{relative_path}" # For MinIO images, the path is already a URL or key return image_path