""" LightOnOCR-2-1B Service End-to-end VLM OCR fuer gedruckten und gemischten Text. 1B Parameter, Apple MPS-faehig (M-Serie). Modell: lightonai/LightOnOCR-2-1B Lizenz: Apache 2.0 Quelle: https://huggingface.co/lightonai/LightOnOCR-2-1B Unterstuetzte Dokumenttypen: - Buchseiten, Vokabelseiten - Arbeitsblaetter, Klausuren - Gemischt gedruckt/handschriftlich DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ import io import logging import os from typing import Optional, Tuple logger = logging.getLogger(__name__) LIGHTON_MODEL_ID = os.getenv("LIGHTON_OCR_MODEL", "lightonai/LightOnOCR-2-1B") _lighton_model = None _lighton_processor = None _lighton_available: Optional[bool] = None def _check_lighton_available() -> bool: """Check if LightOnOCR dependencies (transformers, torch) are available.""" global _lighton_available if _lighton_available is not None: return _lighton_available try: from transformers import AutoModelForImageTextToText, AutoProcessor # noqa: F401 import torch # noqa: F401 _lighton_available = True except ImportError as e: logger.warning(f"LightOnOCR deps not available: {e}") _lighton_available = False return _lighton_available def get_lighton_model() -> Tuple: """ Lazy-load LightOnOCR-2-1B processor and model. Returns (processor, model) or (None, None) on failure. Device priority: MPS (Apple Silicon) > CUDA > CPU. """ global _lighton_model, _lighton_processor if _lighton_model is not None: return _lighton_processor, _lighton_model if not _check_lighton_available(): return None, None try: import torch from transformers import AutoModelForImageTextToText, AutoProcessor if torch.backends.mps.is_available(): device = "mps" elif torch.cuda.is_available(): device = "cuda" else: device = "cpu" dtype = torch.bfloat16 logger.info(f"Loading LightOnOCR-2-1B on {device} ({dtype}) from {LIGHTON_MODEL_ID} ...") _lighton_processor = AutoProcessor.from_pretrained(LIGHTON_MODEL_ID) _lighton_model = AutoModelForImageTextToText.from_pretrained( LIGHTON_MODEL_ID, torch_dtype=dtype ).to(device) _lighton_model.eval() logger.info("LightOnOCR-2-1B loaded successfully") except Exception as e: logger.error(f"Failed to load LightOnOCR-2-1B: {e}") _lighton_model = None _lighton_processor = None return _lighton_processor, _lighton_model def run_lighton_ocr_sync(image_bytes: bytes) -> Optional[str]: """ Run LightOnOCR on image bytes (synchronous). Returns extracted text or None on error. Generic — works for any document/page region. """ processor, model = get_lighton_model() if processor is None or model is None: return None try: import torch from PIL import Image as _PILImage pil_img = _PILImage.open(io.BytesIO(image_bytes)).convert("RGB") conversation = [{"role": "user", "content": [{"type": "image"}]}] inputs = processor.apply_chat_template( conversation, images=[pil_img], add_generation_prompt=True, return_tensors="pt" ).to(model.device) with torch.no_grad(): output_ids = model.generate(**inputs, max_new_tokens=1024) text = processor.decode(output_ids[0], skip_special_tokens=True) return text.strip() if text else None except Exception as e: logger.error(f"LightOnOCR inference failed: {e}") return None