refactor: remove unused pages and backends (model-management, OCR legacy, GPU/vast.ai, video-chat, matrix)

Deleted pages:
- /ai/model-management (mock data only, no real backend)
- /ai/ocr-compare (old /vocab/ backend, replaced by ocr-kombi)
- /ai/ocr-pipeline (minimal session browser, redundant)
- /ai/ocr-overlay (legacy monolith, redundant)
- /ai/gpu (vast.ai GPU management, no longer used)
- /infrastructure/gpu (same)
- /communication/video-chat (moved to core)
- /communication/matrix (moved to core)

Deleted backends:
- backend-lehrer/infra/vast_client.py + vast_power.py
- backend-lehrer/meetings_api.py + jitsi_api.py
- website/app/api/admin/gpu/
- edu-search-service/scripts/vast_ai_extractor.py

Total: ~7,800 LOC removed. All code preserved in git history.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-23 13:14:12 +02:00
parent 5abdfa202e
commit f39cbe9283
30 changed files with 1089 additions and 9567 deletions

View File

@@ -0,0 +1,119 @@
"""
LightOnOCR-2-1B Service
End-to-end VLM OCR fuer gedruckten und gemischten Text.
1B Parameter, Apple MPS-faehig (M-Serie).
Modell: lightonai/LightOnOCR-2-1B
Lizenz: Apache 2.0
Quelle: https://huggingface.co/lightonai/LightOnOCR-2-1B
Unterstuetzte Dokumenttypen:
- Buchseiten, Vokabelseiten
- Arbeitsblaetter, Klausuren
- Gemischt gedruckt/handschriftlich
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
import io
import logging
import os
from typing import Optional, Tuple
logger = logging.getLogger(__name__)
LIGHTON_MODEL_ID = os.getenv("LIGHTON_OCR_MODEL", "lightonai/LightOnOCR-2-1B")
_lighton_model = None
_lighton_processor = None
_lighton_available: Optional[bool] = None
def _check_lighton_available() -> bool:
"""Check if LightOnOCR dependencies (transformers, torch) are available."""
global _lighton_available
if _lighton_available is not None:
return _lighton_available
try:
from transformers import AutoModelForImageTextToText, AutoProcessor # noqa: F401
import torch # noqa: F401
_lighton_available = True
except ImportError as e:
logger.warning(f"LightOnOCR deps not available: {e}")
_lighton_available = False
return _lighton_available
def get_lighton_model() -> Tuple:
"""
Lazy-load LightOnOCR-2-1B processor and model.
Returns (processor, model) or (None, None) on failure.
Device priority: MPS (Apple Silicon) > CUDA > CPU.
"""
global _lighton_model, _lighton_processor
if _lighton_model is not None:
return _lighton_processor, _lighton_model
if not _check_lighton_available():
return None, None
try:
import torch
from transformers import AutoModelForImageTextToText, AutoProcessor
if torch.backends.mps.is_available():
device = "mps"
elif torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
dtype = torch.bfloat16
logger.info(f"Loading LightOnOCR-2-1B on {device} ({dtype}) from {LIGHTON_MODEL_ID} ...")
_lighton_processor = AutoProcessor.from_pretrained(LIGHTON_MODEL_ID)
_lighton_model = AutoModelForImageTextToText.from_pretrained(
LIGHTON_MODEL_ID, torch_dtype=dtype
).to(device)
_lighton_model.eval()
logger.info("LightOnOCR-2-1B loaded successfully")
except Exception as e:
logger.error(f"Failed to load LightOnOCR-2-1B: {e}")
_lighton_model = None
_lighton_processor = None
return _lighton_processor, _lighton_model
def run_lighton_ocr_sync(image_bytes: bytes) -> Optional[str]:
"""
Run LightOnOCR on image bytes (synchronous).
Returns extracted text or None on error.
Generic — works for any document/page region.
"""
processor, model = get_lighton_model()
if processor is None or model is None:
return None
try:
import torch
from PIL import Image as _PILImage
pil_img = _PILImage.open(io.BytesIO(image_bytes)).convert("RGB")
conversation = [{"role": "user", "content": [{"type": "image"}]}]
inputs = processor.apply_chat_template(
conversation, images=[pil_img],
add_generation_prompt=True, return_tensors="pt"
).to(model.device)
with torch.no_grad():
output_ids = model.generate(**inputs, max_new_tokens=1024)
text = processor.decode(output_ids[0], skip_special_tokens=True)
return text.strip() if text else None
except Exception as e:
logger.error(f"LightOnOCR inference failed: {e}")
return None