Fix: Update all old-style imports inside packages to new paths

65 files in klausur-service packages + 3 in backend-lehrer packages had stale imports referencing deleted shim modules. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-26 00:19:13 +02:00
parent 5f2ed44654
commit eecb5472dd
68 changed files with 132 additions and 132 deletions
--- a/klausur-service/backend/vocab/init.py
+++ b/klausur-service/backend/vocab/init.py
@@ -2,5 +2,5 @@
 Vocab package — restructured from vocab_* flat modules.

 Backward-compatible re-exports: consumers can still use
-``from vocab_worksheet_api import ...`` etc. via the shim files in backend/.
+``from vocab.worksheet.api import ...`` etc. via the shim files in backend/.
 """
--- a/klausur-service/backend/vocab/worksheet/analysis_api.py
+++ b/klausur-service/backend/vocab/worksheet/analysis_api.py
@@ -27,7 +27,7 @@ from .generation import convert_pdf_page_to_image

 # Try to import Tesseract extractor
 try:
-    from tesseract_vocab_extractor import (
+    from ocr.engines.tesseract_extractor import (
        extract_bounding_boxes, TESSERACT_AVAILABLE,
    )
 except ImportError:
@@ -264,7 +264,7 @@ async def extract_with_boxes(session_id: str, page_number: int):
    # Deskew image before OCR
    deskew_angle = 0.0
    try:
-        from cv_vocab_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE
+        from ocr.cv_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE
        if CV2_AVAILABLE:
            image_data, deskew_angle = deskew_image_by_word_alignment(image_data)
            logger.info(f"Deskew: {deskew_angle:.2f}° for page {page_number}")
--- a/klausur-service/backend/vocab/worksheet/compare_api.py
+++ b/klausur-service/backend/vocab/worksheet/compare_api.py
@@ -23,7 +23,7 @@ from .generation import convert_pdf_page_to_image

 # Try to import Tesseract extractor
 try:
-    from tesseract_vocab_extractor import (
+    from ocr.engines.tesseract_extractor import (
        run_tesseract_pipeline,
        match_positions_to_vocab, TESSERACT_AVAILABLE,
    )
@@ -32,7 +32,7 @@ except ImportError:

 # Try to import CV Pipeline
 try:
-    from cv_vocab_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE
+    from ocr.cv_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE
 except ImportError:
    CV_PIPELINE_AVAILABLE = False

@@ -328,7 +328,7 @@ async def analyze_grid(session_id: str, page_number: int, use_tesseract: bool =
            # Run Tesseract if not already cached
            if not tess_page_data:
                logger.info("Running Tesseract for grid analysis (not cached)")
-                from tesseract_vocab_extractor import run_tesseract_pipeline as _run_tess
+                from ocr.engines.tesseract_extractor import run_tesseract_pipeline as _run_tess
                tess_page_data = await _run_tess(image_data, lang="eng+deu")
                session[f"tesseract_page_{page_number}"] = tess_page_data
                session["tesseract_words"] = tess_page_data.get("words", [])
--- a/klausur-service/backend/vocab/worksheet/ocr.py
+++ b/klausur-service/backend/vocab/worksheet/ocr.py
@@ -39,7 +39,7 @@ except ImportError:

 # CV pipeline helpers
 try:
-    from cv_vocab_pipeline import (
+    from ocr.cv_pipeline import (
        deskew_two_pass,
        dewarp_image,
        detect_and_fix_orientation,
@@ -54,7 +54,7 @@ except ImportError:
    _fix_phonetic_brackets = None  # type: ignore[assignment]

 try:
-    from cv_cell_grid import (
+    from ocr.cell_grid.cell_grid import (
        _merge_wrapped_rows,
        _merge_phonetic_continuation_rows,
        _merge_continuation_rows,
@@ -65,17 +65,17 @@ except ImportError:
    _merge_continuation_rows = None  # type: ignore[assignment]

 try:
-    from cv_ocr_engines import ocr_region_rapid
+    from ocr.engines.engines import ocr_region_rapid
 except ImportError:
    ocr_region_rapid = None  # type: ignore[assignment]

 try:
-    from cv_vocab_types import PageRegion
+    from ocr.types import PageRegion
 except ImportError:
    PageRegion = None  # type: ignore[assignment]

 try:
-    from ocr_pipeline_ocr_merge import (
+    from ocr.pipeline.ocr_merge import (
        _split_paddle_multi_words,
        _merge_paddle_tesseract,
        _deduplicate_words,
@@ -86,12 +86,12 @@ except ImportError:
    _deduplicate_words = None  # type: ignore[assignment]

 try:
-    from cv_words_first import build_grid_from_words
+    from ocr.words_first import build_grid_from_words
 except ImportError:
    build_grid_from_words = None  # type: ignore[assignment]

 try:
-    from ocr_pipeline_session_store import (
+    from ocr.pipeline.session_store import (
        create_session_db as create_pipeline_session_db,
        update_session_db as update_pipeline_session_db,
    )
@@ -173,7 +173,7 @@ async def _run_ocr_pipeline_for_page(
    # 5. Content crop (removes scanner borders, gutter shadows)
    t0 = _time.time()
    try:
-        from page_crop import detect_and_crop_page
+        from ocr.pipeline.page_crop import detect_and_crop_page
        cropped_bgr, crop_result = detect_and_crop_page(dewarped_bgr)
        if crop_result.get("crop_applied"):
            dewarped_bgr = cropped_bgr
@@ -186,7 +186,7 @@ async def _run_ocr_pipeline_for_page(
    # 5b. Scan quality assessment
    scan_quality_report = None
    try:
-        from scan_quality import score_scan_quality
+        from ocr.pipeline.scan_quality import score_scan_quality
        scan_quality_report = score_scan_quality(dewarped_bgr)
    except Exception as e:
        logger.warning(f"  scan quality: failed ({e})")
@@ -200,7 +200,7 @@ async def _run_ocr_pipeline_for_page(
    is_degraded = scan_quality_report.is_degraded if scan_quality_report else False
    if is_degraded and enable_enhance:
        try:
-            from ocr_image_enhance import enhance_for_ocr
+            from ocr.image_enhance import enhance_for_ocr
            dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True)
            logger.info("  enhancement: applied (degraded scan)")
        except Exception as e:
@@ -212,8 +212,8 @@ async def _run_ocr_pipeline_for_page(

    # RapidOCR (local ONNX)
    try:
-        from cv_ocr_engines import ocr_region_rapid
-        from cv_vocab_types import PageRegion
+        from ocr.engines.engines import ocr_region_rapid
+        from ocr.types import PageRegion
        full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h)
        rapid_words = ocr_region_rapid(dewarped_bgr, full_region) or []
    except Exception as e:
@@ -243,8 +243,8 @@ async def _run_ocr_pipeline_for_page(
        })

    # Merge dual-engine results
-    from ocr_pipeline_ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
-    from cv_words_first import build_grid_from_words
+    from ocr.pipeline.ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
+    from ocr.words_first import build_grid_from_words

    rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else []
    if rapid_split or tess_words:
@@ -300,7 +300,7 @@ async def _run_ocr_pipeline_for_page(
    # 8. Run full grid-build (with pipe-autocorrect, word-gap merge, etc.)
    t0 = _time.time()
    try:
-        from grid_editor_api import _build_grid_core
+        from grid.editor.api import _build_grid_core
        session_data = {
            "word_result": word_result,
        }
--- a/klausur-service/backend/vocab/worksheet/upload_api.py
+++ b/klausur-service/backend/vocab/worksheet/upload_api.py
@@ -36,7 +36,7 @@ LOCAL_STORAGE_PATH = os.getenv("VOCAB_STORAGE_PATH", "/app/vocab-worksheets")

 try:
    import numpy as np
-    from cv_preprocessing import render_pdf_high_res, detect_and_fix_orientation
+    from ocr.preprocessing.preprocessing import render_pdf_high_res, detect_and_fix_orientation
    OCR_PIPELINE_AVAILABLE = True
 except ImportError:
    np = None  # type: ignore[assignment]