backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
81 lines
1.8 KiB
Python
81 lines
1.8 KiB
Python
"""
|
|
TrOCR Service — Barrel Re-export
|
|
|
|
Microsoft's Transformer-based OCR for text recognition.
|
|
Split into submodules:
|
|
- trocr_models.py — Dataclasses, cache, model loading, line splitting
|
|
- trocr_ocr.py — Core OCR inference (PyTorch/ONNX routing, enhanced)
|
|
- trocr_batch.py — Batch processing and SSE streaming
|
|
|
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|
"""
|
|
|
|
# Models, cache, and model loading
|
|
from .trocr_models import (
|
|
OCRResult,
|
|
BatchOCRResult,
|
|
_compute_image_hash,
|
|
_cache_get,
|
|
_cache_set,
|
|
get_cache_stats,
|
|
_check_trocr_available,
|
|
get_trocr_model,
|
|
preload_trocr_model,
|
|
get_model_status,
|
|
get_active_backend,
|
|
_split_into_lines,
|
|
)
|
|
|
|
# Core OCR execution
|
|
from .trocr_ocr import (
|
|
run_trocr_ocr,
|
|
run_trocr_ocr_enhanced,
|
|
_run_pytorch_ocr,
|
|
)
|
|
|
|
# Batch processing & streaming
|
|
from .trocr_batch import (
|
|
run_trocr_batch,
|
|
run_trocr_batch_stream,
|
|
test_trocr_ocr,
|
|
)
|
|
|
|
__all__ = [
|
|
# Dataclasses
|
|
"OCRResult",
|
|
"BatchOCRResult",
|
|
# Cache
|
|
"_compute_image_hash",
|
|
"_cache_get",
|
|
"_cache_set",
|
|
"get_cache_stats",
|
|
# Model loading
|
|
"_check_trocr_available",
|
|
"get_trocr_model",
|
|
"preload_trocr_model",
|
|
"get_model_status",
|
|
"get_active_backend",
|
|
"_split_into_lines",
|
|
# OCR execution
|
|
"run_trocr_ocr",
|
|
"run_trocr_ocr_enhanced",
|
|
"_run_pytorch_ocr",
|
|
# Batch
|
|
"run_trocr_batch",
|
|
"run_trocr_batch_stream",
|
|
"test_trocr_ocr",
|
|
]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import asyncio
|
|
import sys
|
|
|
|
handwritten = "--handwritten" in sys.argv
|
|
args = [a for a in sys.argv[1:] if not a.startswith("--")]
|
|
|
|
if args:
|
|
asyncio.run(test_trocr_ocr(args[0], handwritten=handwritten))
|
|
else:
|
|
print("Usage: python trocr_service.py <image_path> [--handwritten]")
|