feat(klausur): Handschrift entfernen + Klausur-HTR implementiert
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m49s
CI / test-python-agent-core (push) Successful in 14s
CI / test-nodejs-website (push) Successful in 15s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m49s
CI / test-python-agent-core (push) Successful in 14s
CI / test-nodejs-website (push) Successful in 15s
Feature 1: Handschrift entfernen via OCR-Pipeline Session
- services/handwriting_detection.py: _detect_pencil() + target_ink Parameter
("all" | "colored" | "pencil") für gezielte Tinten-Erkennung
- ocr_pipeline_session_store.py: clean_png + handwriting_removal_meta Spalten
(idempotentes ALTER TABLE in init_ocr_pipeline_tables)
- ocr_pipeline_api.py: POST /sessions/{id}/remove-handwriting Endpoint
+ "clean" zu valid_types für Image-Serving hinzugefügt
Feature 2: Klausur-HTR (Hochwertige Handschriftenerkennung)
- handwriting_htr_api.py: Neuer Router /api/v1/htr/recognize + /recognize-session
Primary: qwen2.5vl:32b via Ollama, Fallback: trocr-large-handwritten
- services/trocr_service.py: size Parameter (base | large) für get_trocr_model()
+ run_trocr_ocr() - unterstützt jetzt trocr-large-handwritten
- main.py: HTR Router registriert
Config:
- docker-compose.yml: OLLAMA_HTR_MODEL, HTR_FALLBACK_MODEL
- .env.example: HTR Env-Vars dokumentiert
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -31,8 +31,10 @@ from datetime import datetime, timedelta
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Lazy loading for heavy dependencies
|
||||
_trocr_processor = None
|
||||
_trocr_model = None
|
||||
# Cache keyed by model_name to support base and large variants simultaneously
|
||||
_trocr_models: dict = {} # {model_name: (processor, model)}
|
||||
_trocr_processor = None # backwards-compat alias → base-printed
|
||||
_trocr_model = None # backwards-compat alias → base-printed
|
||||
_trocr_available = None
|
||||
_model_loaded_at = None
|
||||
|
||||
@@ -124,12 +126,14 @@ def _check_trocr_available() -> bool:
|
||||
return _trocr_available
|
||||
|
||||
|
||||
def get_trocr_model(handwritten: bool = False):
|
||||
def get_trocr_model(handwritten: bool = False, size: str = "base"):
|
||||
"""
|
||||
Lazy load TrOCR model and processor.
|
||||
|
||||
Args:
|
||||
handwritten: Use handwritten model instead of printed model
|
||||
size: Model size — "base" (300 MB) or "large" (340 MB, higher accuracy
|
||||
for exam HTR). Only applies to handwritten variant.
|
||||
|
||||
Returns tuple of (processor, model) or (None, None) if unavailable.
|
||||
"""
|
||||
@@ -138,31 +142,42 @@ def get_trocr_model(handwritten: bool = False):
|
||||
if not _check_trocr_available():
|
||||
return None, None
|
||||
|
||||
if _trocr_processor is None or _trocr_model is None:
|
||||
try:
|
||||
import torch
|
||||
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||
# Select model name
|
||||
if size == "large" and handwritten:
|
||||
model_name = "microsoft/trocr-large-handwritten"
|
||||
elif handwritten:
|
||||
model_name = "microsoft/trocr-base-handwritten"
|
||||
else:
|
||||
model_name = "microsoft/trocr-base-printed"
|
||||
|
||||
# Choose model based on use case
|
||||
if handwritten:
|
||||
model_name = "microsoft/trocr-base-handwritten"
|
||||
else:
|
||||
model_name = "microsoft/trocr-base-printed"
|
||||
if model_name in _trocr_models:
|
||||
return _trocr_models[model_name]
|
||||
|
||||
logger.info(f"Loading TrOCR model: {model_name}")
|
||||
_trocr_processor = TrOCRProcessor.from_pretrained(model_name)
|
||||
_trocr_model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
||||
try:
|
||||
import torch
|
||||
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||
|
||||
# Use GPU if available
|
||||
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
||||
_trocr_model.to(device)
|
||||
logger.info(f"TrOCR model loaded on device: {device}")
|
||||
logger.info(f"Loading TrOCR model: {model_name}")
|
||||
processor = TrOCRProcessor.from_pretrained(model_name)
|
||||
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load TrOCR model: {e}")
|
||||
return None, None
|
||||
# Use GPU if available
|
||||
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
||||
model.to(device)
|
||||
logger.info(f"TrOCR model loaded on device: {device}")
|
||||
|
||||
return _trocr_processor, _trocr_model
|
||||
_trocr_models[model_name] = (processor, model)
|
||||
|
||||
# Keep backwards-compat globals pointing at base-printed
|
||||
if model_name == "microsoft/trocr-base-printed":
|
||||
_trocr_processor = processor
|
||||
_trocr_model = model
|
||||
|
||||
return processor, model
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load TrOCR model {model_name}: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def preload_trocr_model(handwritten: bool = True) -> bool:
|
||||
@@ -209,7 +224,8 @@ def get_model_status() -> Dict[str, Any]:
|
||||
async def run_trocr_ocr(
|
||||
image_data: bytes,
|
||||
handwritten: bool = False,
|
||||
split_lines: bool = True
|
||||
split_lines: bool = True,
|
||||
size: str = "base",
|
||||
) -> Tuple[Optional[str], float]:
|
||||
"""
|
||||
Run TrOCR on an image.
|
||||
@@ -223,11 +239,12 @@ async def run_trocr_ocr(
|
||||
image_data: Raw image bytes
|
||||
handwritten: Use handwritten model (slower but better for handwriting)
|
||||
split_lines: Whether to split image into lines first
|
||||
size: "base" or "large" (only for handwritten variant)
|
||||
|
||||
Returns:
|
||||
Tuple of (extracted_text, confidence)
|
||||
"""
|
||||
processor, model = get_trocr_model(handwritten=handwritten)
|
||||
processor, model = get_trocr_model(handwritten=handwritten, size=size)
|
||||
|
||||
if processor is None or model is None:
|
||||
logger.error("TrOCR model not available")
|
||||
|
||||
Reference in New Issue
Block a user