breakpilot-lehrer/klausur-service/backend/cv_vocab_pipeline.py

"""
CV-based Document Reconstruction Pipeline for Vocabulary Extraction.

Re-export facade — all logic lives in the sub-modules:

  cv_vocab_types      Dataklassen, Konstanten, IPA, Feature-Flags
  cv_preprocessing    Bild-I/O, Orientierung, Deskew, Dewarp
  cv_layout           Dokumenttyp, Spalten, Zeilen, Klassifikation
  cv_ocr_engines      OCR-Engines, Vocab-Postprocessing, Text-Cleaning
  cv_cell_grid        Cell-Grid (v2 + Legacy), Vocab-Konvertierung
  cv_review           LLM/Spell Review, Pipeline-Orchestrierung

Lizenz: Apache 2.0 (kommerziell nutzbar)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""

from cv_vocab_types import *       # noqa: F401,F403
from cv_preprocessing import *     # noqa: F401,F403
from cv_layout import *            # noqa: F401,F403
from cv_ocr_engines import *       # noqa: F401,F403
from cv_cell_grid import *         # noqa: F401,F403
from cv_box_detect import *         # noqa: F401,F403
from cv_review import *            # noqa: F401,F403

# Private names used by consumers — not covered by wildcard re-exports.
from cv_preprocessing import _apply_shear  # noqa: F401
from cv_layout import (  # noqa: F401
    _detect_header_footer_gaps,
    _detect_sub_columns,
    _split_broad_columns,
)
from cv_ocr_engines import (  # noqa: F401
    _fix_character_confusion,
    _fix_phonetic_brackets,
)
from cv_cell_grid import _cells_to_vocab_entries  # noqa: F401
from cv_words_first import build_grid_from_words  # noqa: F401