Deleted pages: - /ai/model-management (mock data only, no real backend) - /ai/ocr-compare (old /vocab/ backend, replaced by ocr-kombi) - /ai/ocr-pipeline (minimal session browser, redundant) - /ai/ocr-overlay (legacy monolith, redundant) - /ai/gpu (vast.ai GPU management, no longer used) - /infrastructure/gpu (same) - /communication/video-chat (moved to core) - /communication/matrix (moved to core) Deleted backends: - backend-lehrer/infra/vast_client.py + vast_power.py - backend-lehrer/meetings_api.py + jitsi_api.py - website/app/api/admin/gpu/ - edu-search-service/scripts/vast_ai_extractor.py Total: ~7,800 LOC removed. All code preserved in git history. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
120 lines
3.6 KiB
Python
120 lines
3.6 KiB
Python
"""
|
|
LightOnOCR-2-1B Service
|
|
|
|
End-to-end VLM OCR fuer gedruckten und gemischten Text.
|
|
1B Parameter, Apple MPS-faehig (M-Serie).
|
|
|
|
Modell: lightonai/LightOnOCR-2-1B
|
|
Lizenz: Apache 2.0
|
|
Quelle: https://huggingface.co/lightonai/LightOnOCR-2-1B
|
|
|
|
Unterstuetzte Dokumenttypen:
|
|
- Buchseiten, Vokabelseiten
|
|
- Arbeitsblaetter, Klausuren
|
|
- Gemischt gedruckt/handschriftlich
|
|
|
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|
"""
|
|
import io
|
|
import logging
|
|
import os
|
|
from typing import Optional, Tuple
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
LIGHTON_MODEL_ID = os.getenv("LIGHTON_OCR_MODEL", "lightonai/LightOnOCR-2-1B")
|
|
|
|
_lighton_model = None
|
|
_lighton_processor = None
|
|
_lighton_available: Optional[bool] = None
|
|
|
|
|
|
def _check_lighton_available() -> bool:
|
|
"""Check if LightOnOCR dependencies (transformers, torch) are available."""
|
|
global _lighton_available
|
|
if _lighton_available is not None:
|
|
return _lighton_available
|
|
try:
|
|
from transformers import AutoModelForImageTextToText, AutoProcessor # noqa: F401
|
|
import torch # noqa: F401
|
|
_lighton_available = True
|
|
except ImportError as e:
|
|
logger.warning(f"LightOnOCR deps not available: {e}")
|
|
_lighton_available = False
|
|
return _lighton_available
|
|
|
|
|
|
def get_lighton_model() -> Tuple:
|
|
"""
|
|
Lazy-load LightOnOCR-2-1B processor and model.
|
|
|
|
Returns (processor, model) or (None, None) on failure.
|
|
Device priority: MPS (Apple Silicon) > CUDA > CPU.
|
|
"""
|
|
global _lighton_model, _lighton_processor
|
|
if _lighton_model is not None:
|
|
return _lighton_processor, _lighton_model
|
|
if not _check_lighton_available():
|
|
return None, None
|
|
|
|
try:
|
|
import torch
|
|
from transformers import AutoModelForImageTextToText, AutoProcessor
|
|
|
|
if torch.backends.mps.is_available():
|
|
device = "mps"
|
|
elif torch.cuda.is_available():
|
|
device = "cuda"
|
|
else:
|
|
device = "cpu"
|
|
dtype = torch.bfloat16
|
|
|
|
logger.info(f"Loading LightOnOCR-2-1B on {device} ({dtype}) from {LIGHTON_MODEL_ID} ...")
|
|
_lighton_processor = AutoProcessor.from_pretrained(LIGHTON_MODEL_ID)
|
|
_lighton_model = AutoModelForImageTextToText.from_pretrained(
|
|
LIGHTON_MODEL_ID, torch_dtype=dtype
|
|
).to(device)
|
|
_lighton_model.eval()
|
|
logger.info("LightOnOCR-2-1B loaded successfully")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to load LightOnOCR-2-1B: {e}")
|
|
_lighton_model = None
|
|
_lighton_processor = None
|
|
|
|
return _lighton_processor, _lighton_model
|
|
|
|
|
|
def run_lighton_ocr_sync(image_bytes: bytes) -> Optional[str]:
|
|
"""
|
|
Run LightOnOCR on image bytes (synchronous).
|
|
|
|
Returns extracted text or None on error.
|
|
Generic — works for any document/page region.
|
|
"""
|
|
processor, model = get_lighton_model()
|
|
if processor is None or model is None:
|
|
return None
|
|
|
|
try:
|
|
import torch
|
|
from PIL import Image as _PILImage
|
|
|
|
pil_img = _PILImage.open(io.BytesIO(image_bytes)).convert("RGB")
|
|
conversation = [{"role": "user", "content": [{"type": "image"}]}]
|
|
|
|
inputs = processor.apply_chat_template(
|
|
conversation, images=[pil_img],
|
|
add_generation_prompt=True, return_tensors="pt"
|
|
).to(model.device)
|
|
|
|
with torch.no_grad():
|
|
output_ids = model.generate(**inputs, max_new_tokens=1024)
|
|
|
|
text = processor.decode(output_ids[0], skip_special_tokens=True)
|
|
return text.strip() if text else None
|
|
|
|
except Exception as e:
|
|
logger.error(f"LightOnOCR inference failed: {e}")
|
|
return None
|