Cleanup: Delete ALL 242 shims, update ALL consumer imports

klausur-service: 183 shims deleted, 26 test files + 8 source files updated backend-lehrer: 59 shims deleted, main.py + 8 source files updated All imports now use the new package paths directly. Zero shims remaining in the entire codebase. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-26 00:11:33 +02:00
parent d093a4d388
commit 5f2ed44654
288 changed files with 214 additions and 1182 deletions
@@ -6,7 +6,7 @@ sys.path.insert(0, "/app/backend")
 import cv2
 import numpy as np
 import pytesseract
-from ocr_pipeline_session_store import get_session_db
+from ocr.pipeline.session_store import get_session_db

 SESSION_ID = "3dcb1897-09a6-4b80-91b5-7e4207980bf3"

@@ -74,7 +74,7 @@ async def main():

    # Also test the 4 dewarp methods directly
    print("\n--- Dewarp method results on deskewed image ---")
-    from cv_vocab_pipeline import (
+    from ocr.cv_pipeline import (
        _detect_shear_angle, _detect_shear_by_projection,
        _detect_shear_by_hough, _detect_shear_by_text_lines,
    )
@@ -257,7 +257,7 @@ class TestPDFExtraction:

    def test_pdf_extraction_config(self):
        """Test PDF extraction configuration."""
-        from pdf_extraction import PDF_BACKEND, get_pdf_extraction_info
+        from korrektur.pdf_extraction import PDF_BACKEND, get_pdf_extraction_info

        info = get_pdf_extraction_info()
        assert "configured_backend" in info
@@ -266,7 +266,7 @@ class TestPDFExtraction:

    def test_detect_available_backends(self):
        """Test backend detection."""
-        from pdf_extraction import _detect_available_backends
+        from korrektur.pdf_extraction import _detect_available_backends

        backends = _detect_available_backends()
        assert isinstance(backends, list)
@@ -280,7 +280,7 @@ class TestPDFExtraction:

    def test_pdf_extraction_result_class(self):
        """Test PDFExtractionResult data class."""
-        from pdf_extraction import PDFExtractionResult
+        from korrektur.pdf_extraction import PDFExtractionResult

        result = PDFExtractionResult(
            text="Extracted text",
@@ -305,7 +305,7 @@ class TestPDFExtraction:

    def test_pdf_extraction_error(self):
        """Test PDF extraction error handling."""
-        from pdf_extraction import PDFExtractionError
+        from korrektur.pdf_extraction import PDFExtractionError

        with pytest.raises(PDFExtractionError):
            raise PDFExtractionError("Test error")
@@ -313,7 +313,7 @@ class TestPDFExtraction:
    @pytest.mark.xfail(reason="_extract_with_pypdf is internal function not exposed in API")
    def test_pypdf_extraction(self):
        """Test pypdf extraction with a simple PDF (BSD-3-Clause licensed)."""
-        from pdf_extraction import _extract_with_pypdf, PDFExtractionError
+        from korrektur.pdf_extraction import _extract_with_pypdf, PDFExtractionError

        # Create a minimal valid PDF
        # This is a very simple PDF that PyPDF2 can parse
@@ -517,7 +517,7 @@ class TestModuleAvailability:

    def test_pdf_extraction_import(self):
        """Test PDF Extraction module import."""
-        from pdf_extraction import (
+        from korrektur.pdf_extraction import (
            extract_text_from_pdf,
            extract_text_from_pdf_enhanced,
            get_pdf_extraction_info,
@@ -551,7 +551,7 @@ class TestFeatureVerification:
        from hyde import get_hyde_info
        from hybrid_search import get_hybrid_search_info
        from rag_evaluation import get_evaluation_info
-        from pdf_extraction import get_pdf_extraction_info
+        from korrektur.pdf_extraction import get_pdf_extraction_info
        from self_rag import get_self_rag_info

        infos = [
@@ -598,7 +598,7 @@ class TestRAGAdminAPI:
    @pytest.mark.asyncio
    async def test_rag_documentation_markdown_format(self):
        """Test RAG documentation endpoint returns markdown."""
-        from admin_api import get_rag_documentation
+        from admin.api import get_rag_documentation

        result = await get_rag_documentation(format="markdown")

@@ -610,7 +610,7 @@ class TestRAGAdminAPI:
    @pytest.mark.asyncio
    async def test_rag_documentation_html_format(self):
        """Test RAG documentation endpoint returns HTML with tables."""
-        from admin_api import get_rag_documentation
+        from admin.api import get_rag_documentation

        result = await get_rag_documentation(format="html")

@@ -628,7 +628,7 @@ class TestRAGAdminAPI:
    @pytest.mark.asyncio
    async def test_rag_system_info_has_feature_status(self):
        """Test RAG system-info includes feature status."""
-        from admin_api import get_rag_system_info
+        from admin.api import get_rag_system_info

        result = await get_rag_system_info()

@@ -639,7 +639,7 @@ class TestRAGAdminAPI:
    @pytest.mark.asyncio
    async def test_rag_system_info_has_privacy_notes(self):
        """Test RAG system-info includes privacy notes."""
-        from admin_api import get_rag_system_info
+        from admin.api import get_rag_system_info

        result = await get_rag_system_info()

@@ -15,8 +15,8 @@ import os

 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

-from ocr_pipeline_api import _filter_border_ghost_words, _BORDER_GHOST_CHARS
-from cv_vocab_types import DetectedBox
+from ocr.pipeline.api import _filter_border_ghost_words, _BORDER_GHOST_CHARS
+from ocr.types import DetectedBox


 # ---------------------------------------------------------------------------
@@ -14,7 +14,7 @@ import os

 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

-from cv_words_first import build_grid_from_words, _cluster_columns
+from ocr.words_first import build_grid_from_words, _cluster_columns


 # ---------------------------------------------------------------------------
@@ -4,7 +4,7 @@ import pytest
 import sys, os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

-from cv_box_layout import classify_box_layout, build_box_zone_grid, _group_into_lines
+from ocr.detect.box_layout import classify_box_layout, build_box_zone_grid, _group_into_lines


 def _make_words(lines_data):
@@ -514,7 +514,7 @@ class TestEncryptionUtils:

    def test_hash_key(self):
        """Key hashing produces consistent results."""
-        from eh_pipeline import hash_key
+        from korrektur.eh_pipeline import hash_key
        import os

        passphrase = "test-secret-passphrase"
@@ -527,7 +527,7 @@ class TestEncryptionUtils:

    def test_verify_key_hash(self):
        """Key hash verification works correctly."""
-        from eh_pipeline import hash_key, verify_key_hash
+        from korrektur.eh_pipeline import hash_key, verify_key_hash
        import os

        passphrase = "test-secret-passphrase"
@@ -539,7 +539,7 @@ class TestEncryptionUtils:

    def test_chunk_text(self):
        """Text chunking produces correct overlap."""
-        from eh_pipeline import chunk_text
+        from korrektur.eh_pipeline import chunk_text

        text = "A" * 2000  # 2000 characters
        chunks = chunk_text(text, chunk_size=1000, overlap=200)
@@ -550,7 +550,7 @@ class TestEncryptionUtils:

    def test_encrypt_decrypt_text(self):
        """Text encryption and decryption round-trip."""
-        from eh_pipeline import encrypt_text, decrypt_text
+        from korrektur.eh_pipeline import encrypt_text, decrypt_text

        plaintext = "Dies ist ein geheimer Text."
        passphrase = "geheim123"
@@ -13,71 +13,71 @@ class TestInsertMissingIpa:

    def test_single_headword_gets_ipa(self):
        """Single English headword should get IPA inserted."""
-        from cv_ocr_engines import _insert_missing_ipa
+        from ocr.engines.engines import _insert_missing_ipa
        result = _insert_missing_ipa("badge", "british")
        assert "[" in result and "]" in result
        assert result.startswith("badge [")

    def test_short_phrase_first_word_gets_ipa(self):
        """First real word in short phrase gets IPA."""
-        from cv_ocr_engines import _insert_missing_ipa
+        from ocr.engines.engines import _insert_missing_ipa
        result = _insert_missing_ipa("film", "british")
        assert "[" in result

    def test_long_sentence_unchanged(self):
        """Sentences with >6 words should not get IPA."""
-        from cv_ocr_engines import _insert_missing_ipa
+        from ocr.engines.engines import _insert_missing_ipa
        text = "Can I borrow your CD player from you please"
        result = _insert_missing_ipa(text, "british")
        assert result == text

    def test_existing_brackets_unchanged(self):
        """Text with existing brackets should not get double IPA."""
-        from cv_ocr_engines import _insert_missing_ipa
+        from ocr.engines.engines import _insert_missing_ipa
        text = "dance [dˈɑːns]"
        result = _insert_missing_ipa(text, "british")
        assert result == text

    def test_empty_text_unchanged(self):
        """Empty text returns empty."""
-        from cv_ocr_engines import _insert_missing_ipa
+        from ocr.engines.engines import _insert_missing_ipa
        assert _insert_missing_ipa("", "british") == ""
        assert _insert_missing_ipa("  ", "british") == ""

    def test_grammar_words_skipped(self):
        """Grammar particles should not get IPA."""
-        from cv_ocr_engines import _insert_missing_ipa
+        from ocr.engines.engines import _insert_missing_ipa
        # "sth" is in _GRAMMAR_BRACKET_WORDS
        result = _insert_missing_ipa("sth", "british")
        assert "[" not in result

    def test_german_word_no_ipa(self):
        """German words (no IPA entry) stay unchanged."""
-        from cv_ocr_engines import _insert_missing_ipa
+        from ocr.engines.engines import _insert_missing_ipa
        result = _insert_missing_ipa("Anstecknadel", "british")
        assert result == "Anstecknadel"

    def test_compound_word_schoolbag_gets_ipa(self):
        """R07: Compound word 'schoolbag' should get decomposed IPA (school+bag)."""
-        from cv_ocr_engines import _insert_missing_ipa
+        from ocr.engines.engines import _insert_missing_ipa
        result = _insert_missing_ipa("schoolbag", "british")
        assert "[" in result and "]" in result
        assert result.startswith("schoolbag [")

    def test_compound_word_blackbird(self):
        """Compound word 'blackbird' should get decomposed IPA."""
-        from cv_ocr_engines import _insert_missing_ipa
+        from ocr.engines.engines import _insert_missing_ipa
        result = _insert_missing_ipa("blackbird", "british")
        assert "[" in result and "]" in result

    def test_compound_word_too_short(self):
        """Words shorter than 6 chars should not attempt compound decomposition."""
-        from cv_ocr_engines import _decompose_compound
+        from ocr.engines.engines import _decompose_compound
        assert _decompose_compound("bag", "british") is None

    def test_decompose_compound_direct(self):
        """Direct test of _decompose_compound for known compounds."""
-        from cv_ocr_engines import _decompose_compound
+        from ocr.engines.engines import _decompose_compound
        # schoolbag = school + bag — both should be in dictionary
        result = _decompose_compound("schoolbag", "british")
        assert result is not None
@@ -88,14 +88,14 @@ class TestStripPostBracketGarbled:

    def test_simple_trailing_garbled(self):
        """R21-simple: 'sea [sˈiː] si:' → trailing IPA marker removed."""
-        from cv_ocr_engines import _strip_post_bracket_garbled
+        from ocr.engines.engines import _strip_post_bracket_garbled
        result = _strip_post_bracket_garbled("sea [sˈiː] si:")
        assert "si:" not in result
        assert result.startswith("sea [sˈiː]")

    def test_multi_word_trailing_garbled(self):
        """R21: 'seat [sˈiːt] belt si:t belt' → keep 'belt', remove garbled."""
-        from cv_ocr_engines import _strip_post_bracket_garbled
+        from ocr.engines.engines import _strip_post_bracket_garbled
        result = _strip_post_bracket_garbled("seat [sˈiːt] belt si:t belt")
        assert "belt" in result  # real word kept
        assert "si:t" not in result  # garbled removed
@@ -104,13 +104,13 @@ class TestStripPostBracketGarbled:

    def test_delimiter_after_bracket_kept(self):
        """Delimiters after IPA bracket are kept."""
-        from cv_ocr_engines import _strip_post_bracket_garbled
+        from ocr.engines.engines import _strip_post_bracket_garbled
        result = _strip_post_bracket_garbled("dance [dˈɑːns] – tanzen")
        assert "– tanzen" in result

    def test_german_after_bracket_kept(self):
        """German words (uppercase) after IPA bracket are kept."""
-        from cv_ocr_engines import _strip_post_bracket_garbled
+        from ocr.engines.engines import _strip_post_bracket_garbled
        result = _strip_post_bracket_garbled("badge [bædʒ] Abzeichen")
        assert "Abzeichen" in result

@@ -120,7 +120,7 @@ class TestFixCellPhonetics:

    def test_english_column_cells_processed(self):
        """Cells with col_type column_en should be processed."""
-        from cv_ocr_engines import fix_cell_phonetics
+        from ocr.engines.engines import fix_cell_phonetics
        cells = [
            {"cell_id": "c1", "col_type": "column_en", "text": "badge"},
            {"cell_id": "c2", "col_type": "column_de", "text": "Anstecknadel"},
@@ -133,7 +133,7 @@ class TestFixCellPhonetics:

    def test_column_text_cells_processed(self):
        """Cells with col_type column_text should be processed."""
-        from cv_ocr_engines import fix_cell_phonetics
+        from ocr.engines.engines import fix_cell_phonetics
        cells = [
            {"cell_id": "c1", "col_type": "column_text", "text": "challenge"},
        ]
@@ -142,7 +142,7 @@ class TestFixCellPhonetics:

    def test_garbled_ipa_replaced(self):
        """Garbled IPA brackets should be replaced with correct IPA."""
-        from cv_ocr_engines import fix_cell_phonetics
+        from ocr.engines.engines import fix_cell_phonetics
        cells = [
            {"cell_id": "c1", "col_type": "column_en", "text": "dance {'tfatno]"},
        ]
@@ -154,7 +154,7 @@ class TestFixCellPhonetics:

    def test_empty_cells_unchanged(self):
        """Empty cells should not cause errors."""
-        from cv_ocr_engines import fix_cell_phonetics
+        from ocr.engines.engines import fix_cell_phonetics
        cells = [
            {"cell_id": "c1", "col_type": "column_en", "text": ""},
            {"cell_id": "c2", "col_type": "column_en", "text": None},
@@ -164,7 +164,7 @@ class TestFixCellPhonetics:

    def test_non_english_col_types_skipped(self):
        """Cells with column_de, column_example etc. should not be processed."""
-        from cv_ocr_engines import fix_cell_phonetics
+        from ocr.engines.engines import fix_cell_phonetics
        cells = [
            {"cell_id": "c1", "col_type": "column_de", "text": "Eis (gefrorenes Wasser)"},
            {"cell_id": "c2", "col_type": "column_example", "text": "(sich beschweren)"},
@@ -9,8 +9,8 @@ import pytest

 import cv2

-from cv_box_detect import detect_boxes, split_page_into_zones
-from cv_vocab_types import DetectedBox, PageZone
+from ocr.detect.box_detect import detect_boxes, split_page_into_zones
+from ocr.types import DetectedBox, PageZone


 # ---------------------------------------------------------------------------
@@ -9,7 +9,7 @@ import pytest

 import cv2

-from cv_graphic_detect import detect_graphic_elements, GraphicElement, _dominant_color
+from ocr.detect.graphic_detect import detect_graphic_elements, GraphicElement, _dominant_color


 # ---------------------------------------------------------------------------
@@ -23,7 +23,7 @@ from unittest.mock import AsyncMock, MagicMock, patch, PropertyMock
 from dataclasses import asdict

 # Import module under test
-from cv_vocab_pipeline import (
+from ocr.cv_pipeline import (
    ColumnGeometry,
    DocumentTypeResult,
    PageRegion,
@@ -1408,7 +1408,7 @@ class TestCellsToVocabEntriesPageRef:

    def test_page_ref_mapped_to_source_page(self):
        """Cell with col_type='page_ref' → source_page field populated."""
-        from cv_vocab_pipeline import _cells_to_vocab_entries
+        from ocr.cv_pipeline import _cells_to_vocab_entries

        cells = [
            {
@@ -1450,7 +1450,7 @@ class TestCellsToVocabEntriesPageRef:

    def test_no_page_ref_defaults_empty(self):
        """Without page_ref cell, source_page defaults to empty string."""
-        from cv_vocab_pipeline import _cells_to_vocab_entries
+        from ocr.cv_pipeline import _cells_to_vocab_entries

        cells = [
            {
@@ -1472,7 +1472,7 @@ class TestCellsToVocabEntriesPageRef:

    def test_marker_only_row_included(self):
        """Row with only a marker (no english/german/example) is kept."""
-        from cv_vocab_pipeline import _cells_to_vocab_entries
+        from ocr.cv_pipeline import _cells_to_vocab_entries

        cells = [
            # Row 0: has english + marker
@@ -1543,7 +1543,7 @@ class TestCellsToVocabEntriesPageRef:

    def test_page_ref_only_row_included(self):
        """Row with only source_page text is kept (no english/german/example)."""
-        from cv_vocab_pipeline import _cells_to_vocab_entries
+        from ocr.cv_pipeline import _cells_to_vocab_entries

        cells = [
            {
@@ -1,7 +1,7 @@
 """Tests for cv_words_first.py — Words-First Grid Builder."""

 import pytest
-from cv_words_first import (
+from ocr.words_first import (
    _assign_word_to_column,
    _assign_word_to_row,
    _build_cells,
@@ -10,8 +10,8 @@ import os
 # Add backend to path for imports
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

-from cv_vocab_types import ColumnGeometry
-from cv_layout import _score_dictionary_signals, _classify_dictionary_columns, _score_language
+from ocr.types import ColumnGeometry
+from ocr.layout.layout import _score_dictionary_signals, _classify_dictionary_columns, _score_language


 def _make_words(texts, start_y=0, y_step=30, x=100, conf=80):
@@ -19,8 +19,8 @@ import importlib
 # ---------------------------------------------------------------------------

 def _fresh_import():
-    """Re-import cv_doclayout_detect with reset globals."""
-    import cv_doclayout_detect as mod
+    """Re-import ocr.detect.doclayout_detect with reset globals."""
+    import ocr.detect.doclayout_detect as mod
    # Reset module-level caching so each test starts clean
    mod._onnx_session = None
    mod._model_path = None
@@ -62,7 +62,7 @@ class TestIsDoclayoutAvailableNoModel:

 class TestLayoutRegionDataclass:
    def test_basic_creation(self):
-        from cv_doclayout_detect import LayoutRegion
+        from ocr.detect.doclayout_detect import LayoutRegion
        region = LayoutRegion(
            x=10, y=20, width=100, height=200,
            label="figure", confidence=0.95, label_index=1,
@@ -76,14 +76,14 @@ class TestLayoutRegionDataclass:
        assert region.label_index == 1

    def test_all_fields_present(self):
-        from cv_doclayout_detect import LayoutRegion
+        from ocr.detect.doclayout_detect import LayoutRegion
        import dataclasses
        field_names = {f.name for f in dataclasses.fields(LayoutRegion)}
        expected = {"x", "y", "width", "height", "label", "confidence", "label_index"}
        assert field_names == expected

    def test_different_labels(self):
-        from cv_doclayout_detect import LayoutRegion, DOCLAYOUT_CLASSES
+        from ocr.detect.doclayout_detect import LayoutRegion, DOCLAYOUT_CLASSES
        for idx, label in enumerate(DOCLAYOUT_CLASSES):
            region = LayoutRegion(
                x=0, y=0, width=50, height=50,
@@ -125,7 +125,7 @@ class TestDetectLayoutRegionsNoModel:

 class TestPreprocessingShapes:
    def test_square_image(self):
-        from cv_doclayout_detect import preprocess_image
+        from ocr.detect.doclayout_detect import preprocess_image
        img = np.random.randint(0, 255, (800, 800, 3), dtype=np.uint8)
        tensor, scale, pad_x, pad_y = preprocess_image(img)
        assert tensor.shape == (1, 3, 800, 800)
@@ -134,7 +134,7 @@ class TestPreprocessingShapes:
        assert tensor.max() <= 1.0

    def test_landscape_image(self):
-        from cv_doclayout_detect import preprocess_image
+        from ocr.detect.doclayout_detect import preprocess_image
        img = np.random.randint(0, 255, (600, 1200, 3), dtype=np.uint8)
        tensor, scale, pad_x, pad_y = preprocess_image(img)
        assert tensor.shape == (1, 3, 800, 800)
@@ -144,7 +144,7 @@ class TestPreprocessingShapes:
        assert pad_y > 0  # vertical padding expected

    def test_portrait_image(self):
-        from cv_doclayout_detect import preprocess_image
+        from ocr.detect.doclayout_detect import preprocess_image
        img = np.random.randint(0, 255, (1200, 600, 3), dtype=np.uint8)
        tensor, scale, pad_x, pad_y = preprocess_image(img)
        assert tensor.shape == (1, 3, 800, 800)
@@ -154,20 +154,20 @@ class TestPreprocessingShapes:
        assert pad_x > 0  # horizontal padding expected

    def test_small_image(self):
-        from cv_doclayout_detect import preprocess_image
+        from ocr.detect.doclayout_detect import preprocess_image
        img = np.random.randint(0, 255, (100, 200, 3), dtype=np.uint8)
        tensor, scale, pad_x, pad_y = preprocess_image(img)
        assert tensor.shape == (1, 3, 800, 800)

    def test_typical_scan_a4(self):
        """A4 scan at 300dpi: roughly 2480x3508 pixels."""
-        from cv_doclayout_detect import preprocess_image
+        from ocr.detect.doclayout_detect import preprocess_image
        img = np.random.randint(0, 255, (3508, 2480, 3), dtype=np.uint8)
        tensor, scale, pad_x, pad_y = preprocess_image(img)
        assert tensor.shape == (1, 3, 800, 800)

    def test_values_normalized(self):
-        from cv_doclayout_detect import preprocess_image
+        from ocr.detect.doclayout_detect import preprocess_image
        # All white image
        img = np.full((400, 400, 3), 255, dtype=np.uint8)
        tensor, _, _, _ = preprocess_image(img)
@@ -182,20 +182,20 @@ class TestPreprocessingShapes:

 class TestNmsLogic:
    def test_empty_input(self):
-        from cv_doclayout_detect import nms
+        from ocr.detect.doclayout_detect import nms
        boxes = np.array([]).reshape(0, 4)
        scores = np.array([])
        assert nms(boxes, scores) == []

    def test_single_box(self):
-        from cv_doclayout_detect import nms
+        from ocr.detect.doclayout_detect import nms
        boxes = np.array([[10, 10, 100, 100]], dtype=np.float32)
        scores = np.array([0.9])
        kept = nms(boxes, scores, iou_threshold=0.5)
        assert kept == [0]

    def test_non_overlapping_boxes(self):
-        from cv_doclayout_detect import nms
+        from ocr.detect.doclayout_detect import nms
        boxes = np.array([
            [0, 0, 50, 50],
            [200, 200, 300, 300],
@@ -207,7 +207,7 @@ class TestNmsLogic:
        assert set(kept) == {0, 1, 2}

    def test_overlapping_boxes_suppressed(self):
-        from cv_doclayout_detect import nms
+        from ocr.detect.doclayout_detect import nms
        # Two boxes that heavily overlap
        boxes = np.array([
            [10, 10, 110, 110],   # 100x100
@@ -219,7 +219,7 @@ class TestNmsLogic:
        assert kept == [0]

    def test_partially_overlapping_boxes_kept(self):
-        from cv_doclayout_detect import nms
+        from ocr.detect.doclayout_detect import nms
        # Two boxes that overlap ~25% (below 0.5 threshold)
        boxes = np.array([
            [0, 0, 100, 100],     # 100x100
@@ -231,7 +231,7 @@ class TestNmsLogic:
        assert len(kept) == 2

    def test_nms_respects_score_ordering(self):
-        from cv_doclayout_detect import nms
+        from ocr.detect.doclayout_detect import nms
        # Three overlapping boxes — highest confidence should be kept first
        boxes = np.array([
            [10, 10, 110, 110],
@@ -244,7 +244,7 @@ class TestNmsLogic:
        assert kept[0] == 1

    def test_iou_computation(self):
-        from cv_doclayout_detect import _compute_iou
+        from ocr.detect.doclayout_detect import _compute_iou
        box_a = np.array([0, 0, 100, 100], dtype=np.float32)
        box_b = np.array([0, 0, 100, 100], dtype=np.float32)
        assert abs(_compute_iou(box_a, box_b) - 1.0) < 1e-5
@@ -259,7 +259,7 @@ class TestNmsLogic:

 class TestDoclayoutClasses:
    def test_correct_class_list(self):
-        from cv_doclayout_detect import DOCLAYOUT_CLASSES
+        from ocr.detect.doclayout_detect import DOCLAYOUT_CLASSES
        expected = [
            "table", "figure", "title", "text", "list",
            "header", "footer", "equation", "reference", "abstract",
@@ -267,15 +267,15 @@ class TestDoclayoutClasses:
        assert DOCLAYOUT_CLASSES == expected

    def test_class_count(self):
-        from cv_doclayout_detect import DOCLAYOUT_CLASSES
+        from ocr.detect.doclayout_detect import DOCLAYOUT_CLASSES
        assert len(DOCLAYOUT_CLASSES) == 10

    def test_no_duplicates(self):
-        from cv_doclayout_detect import DOCLAYOUT_CLASSES
+        from ocr.detect.doclayout_detect import DOCLAYOUT_CLASSES
        assert len(DOCLAYOUT_CLASSES) == len(set(DOCLAYOUT_CLASSES))

    def test_all_lowercase(self):
-        from cv_doclayout_detect import DOCLAYOUT_CLASSES
+        from ocr.detect.doclayout_detect import DOCLAYOUT_CLASSES
        for cls in DOCLAYOUT_CLASSES:
            assert cls == cls.lower(), f"Class '{cls}' should be lowercase"

@@ -303,7 +303,7 @@ class TestGetDoclayoutStatus:
 class TestPostprocessing:
    def test_single_tensor_format_6cols(self):
        """Test parsing of (1, N, 6) output format: x1,y1,x2,y2,score,class."""
-        from cv_doclayout_detect import _postprocess
+        from ocr.detect.doclayout_detect import _postprocess

        # One detection: figure at (100,100)-(300,300) in 800x800 space
        raw = np.array([[[100, 100, 300, 300, 0.92, 1]]], dtype=np.float32)
@@ -320,7 +320,7 @@ class TestPostprocessing:

    def test_three_tensor_format(self):
        """Test parsing of 3-tensor output: boxes, scores, class_ids."""
-        from cv_doclayout_detect import _postprocess
+        from ocr.detect.doclayout_detect import _postprocess

        boxes = np.array([[50, 50, 200, 150]], dtype=np.float32)
        scores = np.array([0.88], dtype=np.float32)
@@ -338,7 +338,7 @@ class TestPostprocessing:

    def test_confidence_filtering(self):
        """Detections below threshold should be excluded."""
-        from cv_doclayout_detect import _postprocess
+        from ocr.detect.doclayout_detect import _postprocess

        raw = np.array([
            [100, 100, 200, 200, 0.9, 1],   # above threshold
@@ -357,7 +357,7 @@ class TestPostprocessing:

    def test_coordinate_scaling(self):
        """Verify coordinates are correctly scaled back to original image."""
-        from cv_doclayout_detect import _postprocess
+        from ocr.detect.doclayout_detect import _postprocess

        # Image was 1600x1200, scaled to fit 800x800 → scale=0.5, pad_y offset
        scale = 800 / 1600  # 0.5
@@ -382,7 +382,7 @@ class TestPostprocessing:
        assert r.y == 200

    def test_empty_output(self):
-        from cv_doclayout_detect import _postprocess
+        from ocr.detect.doclayout_detect import _postprocess
        raw = np.array([]).reshape(1, 0, 6).astype(np.float32)
        regions = _postprocess(
            outputs=[raw],
@@ -14,15 +14,15 @@ sys.path.insert(0, '/app')
 import cv2
 import numpy as np
 import pytest
-from cv_vocab_types import PageZone, DetectedBox
-from grid_editor_api import (
+from ocr.types import PageZone, DetectedBox
+from grid.editor.api import (
    _merge_content_zones_across_boxes,
    _filter_border_ghosts,
    _detect_header_rows,
    _detect_heading_rows_by_color,
    _detect_heading_rows_by_single_cell,
 )
-from cv_ocr_engines import _text_has_garbled_ipa, fix_ipa_continuation_cell
+from ocr.engines.engines import _text_has_garbled_ipa, fix_ipa_continuation_cell


 # ---------------------------------------------------------------------------
@@ -818,7 +818,7 @@ class TestSlashIpaConversion:
    def _run_step_5h(self, text: str) -> str:
        """Run the Step 5h regex logic on a single text string."""
        import re
-        from cv_ocr_engines import _lookup_ipa
+        from ocr.engines.engines import _lookup_ipa

        _SLASH_IPA_RE = re.compile(
            r'(\b[a-zA-Z]+[²³¹]?)\s*'
@@ -926,7 +926,7 @@ class TestRedFalsePositiveSuppression:
    def test_low_saturation_red_classified_as_black(self):
        """Black text with slight warm scanner tint (sat ~85) → black, not red."""
        import numpy as np
-        from cv_color_detect import detect_word_colors
+        from ocr.detect.color_detect import detect_word_colors

        # Create a 40x20 image with dark gray pixels (slight warm tint)
        # HSV: hue=5 (red range), sat=85 (above 55 threshold but below 90), val=40
@@ -941,7 +941,7 @@ class TestRedFalsePositiveSuppression:
    def test_high_saturation_red_classified_as_red(self):
        """Genuinely red text (sat=150) → red."""
        import numpy as np
-        from cv_color_detect import detect_word_colors
+        from ocr.detect.color_detect import detect_word_colors

        # White background with red text region
        # Background: white (H=0, S=0, V=255)
@@ -984,7 +984,7 @@ class TestBlueBulletFilter:
        zone = {"zone_index": 0, "cells": [cell], "rows": [], "columns": []}

        # Run the bullet filter logic inline
-        from grid_editor_api import _build_grid_core
+        from grid.editor.api import _build_grid_core
        # Instead, test the logic directly
        wbs = cell["word_boxes"]
        to_remove = set()
@@ -1057,7 +1057,7 @@ class TestWordBoxReadingOrder:

    def test_single_line_sorted_by_left(self):
        """Words on same Y line sorted by X (left) position."""
-        from cv_ocr_engines import _group_words_into_lines
+        from ocr.engines.engines import _group_words_into_lines
        wbs = [
            {"text": "up",        "left": 376, "top": 264, "width": 22, "height": 19},
            {"text": "tie",       "left": 284, "top": 264, "width": 23, "height": 14},
@@ -1069,7 +1069,7 @@ class TestWordBoxReadingOrder:

    def test_two_lines_preserves_line_order(self):
        """Words on two Y lines: first line first, then second line."""
-        from cv_ocr_engines import _group_words_into_lines
+        from ocr.engines.engines import _group_words_into_lines
        wbs = [
            {"text": "b)", "left": 100, "top": 290, "width": 20, "height": 15},
            {"text": "cat", "left": 50, "top": 264, "width": 30, "height": 15},
@@ -1082,7 +1082,7 @@ class TestWordBoxReadingOrder:

    def test_already_sorted_unchanged(self):
        """Already-sorted word_boxes stay in same order."""
-        from cv_ocr_engines import _group_words_into_lines
+        from ocr.engines.engines import _group_words_into_lines
        wbs = [
            {"text": "tie",    "left": 284, "top": 264, "width": 23, "height": 14},
            {"text": "sb/sth", "left": 309, "top": 264, "width": 57, "height": 20},
@@ -7,7 +7,7 @@ import os
 # Add parent directory to path so we can import the module
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

-from cv_gutter_repair import (
+from ocr.gutter.repair import (
    _is_known,
    _try_hyphen_join,
    _try_spell_fix,
@@ -173,7 +173,7 @@ class TestMarkdownParser:

    def test_parse_simple_markdown(self):
        """Test parsing simple markdown content."""
-        from github_crawler import MarkdownParser
+        from crawler.github import MarkdownParser

        content = """# Test Title

@@ -191,21 +191,21 @@ More content here.

    def test_extract_title_from_heading(self):
        """Test extracting title from h1 heading."""
-        from github_crawler import MarkdownParser
+        from crawler.github import MarkdownParser

        title = MarkdownParser._extract_title("# My Document\n\nContent", "fallback.md")
        assert title == "My Document"

    def test_extract_title_fallback(self):
        """Test fallback to filename when no heading."""
-        from github_crawler import MarkdownParser
+        from crawler.github import MarkdownParser

        title = MarkdownParser._extract_title("No heading here", "my-document.md")
        assert title == "My Document"

    def test_detect_german_language(self):
        """Test German language detection."""
-        from github_crawler import MarkdownParser
+        from crawler.github import MarkdownParser

        german_text = "Dies ist eine Datenschutzerklaerung fuer die Verarbeitung personenbezogener Daten."
        lang = MarkdownParser._detect_language(german_text)
@@ -213,7 +213,7 @@ More content here.

    def test_detect_english_language(self):
        """Test English language detection."""
-        from github_crawler import MarkdownParser
+        from crawler.github import MarkdownParser

        english_text = "This is a privacy policy for processing personal data in our application."
        lang = MarkdownParser._detect_language(english_text)
@@ -221,7 +221,7 @@ More content here.

    def test_find_placeholders(self):
        """Test finding placeholder patterns."""
-        from github_crawler import MarkdownParser
+        from crawler.github import MarkdownParser

        content = "Company: [COMPANY_NAME], Contact: {email}, Address: __ADDRESS__"
        placeholders = MarkdownParser._find_placeholders(content)
@@ -236,7 +236,7 @@ class TestHTMLParser:

    def test_parse_simple_html(self):
        """Test parsing simple HTML content."""
-        from github_crawler import HTMLParser
+        from crawler.github import HTMLParser

        content = """<!DOCTYPE html>
 <html>
@@ -255,7 +255,7 @@ class TestHTMLParser:

    def test_html_to_text_removes_scripts(self):
        """Test that scripts are removed from HTML."""
-        from github_crawler import HTMLParser
+        from crawler.github import HTMLParser

        html = "<p>Text</p><script>alert('bad');</script><p>More</p>"
        text = HTMLParser._html_to_text(html)
@@ -270,7 +270,7 @@ class TestJSONParser:

    def test_parse_simple_json(self):
        """Test parsing simple JSON content."""
-        from github_crawler import JSONParser
+        from crawler.github import JSONParser

        content = json.dumps({
            "title": "Privacy Policy",
@@ -286,7 +286,7 @@ class TestJSONParser:

    def test_parse_nested_json(self):
        """Test parsing nested JSON structures."""
-        from github_crawler import JSONParser
+        from crawler.github import JSONParser

        content = json.dumps({
            "sections": {
@@ -305,7 +305,7 @@ class TestExtractedDocument:

    def test_extracted_document_hash(self):
        """Test that source hash is auto-generated."""
-        from github_crawler import ExtractedDocument
+        from crawler.github import ExtractedDocument

        doc = ExtractedDocument(
            text="Some content",
@@ -396,7 +396,7 @@ class TestLegalTemplatesIngestion:
    def test_infer_template_type_privacy(self):
        """Test inferring privacy policy type."""
        from legal_templates_ingestion import LegalTemplatesIngestion
-        from github_crawler import ExtractedDocument
+        from crawler.github import ExtractedDocument
        from template_sources import SourceConfig, LicenseType

        with patch('legal_templates_ingestion.QdrantClient'):
@@ -449,7 +449,7 @@ class TestTemplatesAdminAPI:

    def test_templates_status_structure(self):
        """Test the structure of templates status response."""
-        from admin_api import _templates_ingestion_status
+        from admin.api import _templates_ingestion_status

        # Reset status
        _templates_ingestion_status["running"] = False
@@ -462,7 +462,7 @@ class TestTemplatesAdminAPI:

    def test_templates_status_running(self):
        """Test status when ingestion is running."""
-        from admin_api import _templates_ingestion_status
+        from admin.api import _templates_ingestion_status

        _templates_ingestion_status["running"] = True
        _templates_ingestion_status["current_source"] = "github-site-policy"
@@ -473,7 +473,7 @@ class TestTemplatesAdminAPI:

    def test_templates_results_tracking(self):
        """Test that ingestion results are tracked correctly."""
-        from admin_api import _templates_ingestion_status
+        from admin.api import _templates_ingestion_status

        _templates_ingestion_status["results"] = {
            "github-site-policy": {
@@ -578,7 +578,7 @@ class TestTemplatesIntegration:
    def test_full_chunk_creation_pipeline(self, mock_all_services):
        """Test the full chunk creation pipeline."""
        from legal_templates_ingestion import LegalTemplatesIngestion
-        from github_crawler import ExtractedDocument
+        from crawler.github import ExtractedDocument
        from template_sources import SourceConfig, LicenseType

        ingestion = LegalTemplatesIngestion()
@@ -5,7 +5,7 @@ import sys
 import os

 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-from cv_cell_grid import _merge_wrapped_rows
+from ocr.cell_grid.cell_grid import _merge_wrapped_rows


 def _entry(row_index, english='', german='', example=''):
@@ -124,8 +124,8 @@ class TestSessionCreation:
    @pytest.mark.asyncio
    async def test_create_session_success(self, mock_db_pool):
        """Test successful session creation."""
-        from ocr_labeling_api import SessionCreate
-        from metrics_db import create_ocr_labeling_session
+        from ocr.labeling.api import SessionCreate
+        from metrics.db import create_ocr_labeling_session

        pool, conn = mock_db_pool
        conn.execute.return_value = None
@@ -144,7 +144,7 @@ class TestSessionCreation:

    def test_session_create_model_validation(self):
        """Test SessionCreate model validation."""
-        from ocr_labeling_api import SessionCreate
+        from ocr.labeling.api import SessionCreate

        # Valid session
        session = SessionCreate(
@@ -158,7 +158,7 @@ class TestSessionCreation:

    def test_session_create_with_custom_model(self):
        """Test SessionCreate with custom OCR model."""
-        from ocr_labeling_api import SessionCreate
+        from ocr.labeling.api import SessionCreate

        session = SessionCreate(
            name="TrOCR Session",
@@ -174,7 +174,7 @@ class TestSessionListing:
    @pytest.mark.asyncio
    async def test_get_sessions_empty(self):
        """Test getting sessions when none exist."""
-        from metrics_db import get_ocr_labeling_sessions
+        from metrics.db import get_ocr_labeling_sessions

        with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
            sessions = await get_ocr_labeling_sessions()
@@ -183,7 +183,7 @@ class TestSessionListing:
    @pytest.mark.asyncio
    async def test_get_session_not_found(self):
        """Test getting a non-existent session."""
-        from metrics_db import get_ocr_labeling_session
+        from metrics.db import get_ocr_labeling_session

        with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
            session = await get_ocr_labeling_session("non-existent-id")
@@ -199,7 +199,7 @@ class TestImageUpload:

    def test_compute_image_hash(self):
        """Test image hash computation."""
-        from ocr_labeling_api import compute_image_hash
+        from ocr.labeling.api import compute_image_hash

        image_data = b"\x89PNG fake image data"
        hash1 = compute_image_hash(image_data)
@@ -211,7 +211,7 @@ class TestImageUpload:

    def test_compute_image_hash_different_data(self):
        """Test that different images produce different hashes."""
-        from ocr_labeling_api import compute_image_hash
+        from ocr.labeling.api import compute_image_hash

        hash1 = compute_image_hash(b"image 1 data")
        hash2 = compute_image_hash(b"image 2 data")
@@ -220,11 +220,11 @@ class TestImageUpload:

    def test_save_image_locally(self, tmp_path):
        """Test local image saving."""
-        from ocr_labeling_api import save_image_locally, LOCAL_STORAGE_PATH
+        from ocr.labeling.api import save_image_locally, LOCAL_STORAGE_PATH

        # Temporarily override storage path
        with patch('ocr_labeling_api.LOCAL_STORAGE_PATH', str(tmp_path)):
-            from ocr_labeling_api import save_image_locally
+            from ocr.labeling.api import save_image_locally

            image_data = b"\x89PNG fake image data"
            filepath = save_image_locally(
@@ -241,7 +241,7 @@ class TestImageUpload:

    def test_get_image_url_local(self):
        """Test URL generation for local images."""
-        from ocr_labeling_api import get_image_url, LOCAL_STORAGE_PATH
+        from ocr.labeling.api import get_image_url, LOCAL_STORAGE_PATH

        local_path = f"{LOCAL_STORAGE_PATH}/session-123/item-456.png"
        url = get_image_url(local_path)
@@ -250,7 +250,7 @@ class TestImageUpload:

    def test_get_image_url_minio(self):
        """Test URL for MinIO images (passthrough)."""
-        from ocr_labeling_api import get_image_url
+        from ocr.labeling.api import get_image_url

        minio_path = "ocr-labeling/session-123/item-456.png"
        url = get_image_url(minio_path)
@@ -269,7 +269,7 @@ class TestConfirmLabel:
    @pytest.mark.asyncio
    async def test_confirm_label_success(self, mock_db_pool):
        """Test successful label confirmation."""
-        from metrics_db import confirm_ocr_label
+        from metrics.db import confirm_ocr_label

        pool, conn = mock_db_pool
        conn.fetchrow.return_value = {"ocr_text": "Test text"}
@@ -287,7 +287,7 @@ class TestConfirmLabel:

    def test_confirm_request_validation(self):
        """Test ConfirmRequest model validation."""
-        from ocr_labeling_api import ConfirmRequest
+        from ocr.labeling.api import ConfirmRequest

        request = ConfirmRequest(
            item_id="item-456",
@@ -303,7 +303,7 @@ class TestCorrectLabel:
    @pytest.mark.asyncio
    async def test_correct_label_success(self, mock_db_pool):
        """Test successful label correction."""
-        from metrics_db import correct_ocr_label
+        from metrics.db import correct_ocr_label

        pool, conn = mock_db_pool
        conn.execute.return_value = None
@@ -321,7 +321,7 @@ class TestCorrectLabel:

    def test_correct_request_validation(self):
        """Test CorrectRequest model validation."""
-        from ocr_labeling_api import CorrectRequest
+        from ocr.labeling.api import CorrectRequest

        request = CorrectRequest(
            item_id="item-456",
@@ -338,7 +338,7 @@ class TestSkipItem:
    @pytest.mark.asyncio
    async def test_skip_item_success(self, mock_db_pool):
        """Test successful item skip."""
-        from metrics_db import skip_ocr_item
+        from metrics.db import skip_ocr_item

        pool, conn = mock_db_pool
        conn.execute.return_value = None
@@ -363,7 +363,7 @@ class TestLabelingStats:
    @pytest.mark.asyncio
    async def test_get_stats_no_db(self):
        """Test stats when database is not available."""
-        from metrics_db import get_ocr_labeling_stats
+        from metrics.db import get_ocr_labeling_stats

        with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
            stats = await get_ocr_labeling_stats()
@@ -371,7 +371,7 @@ class TestLabelingStats:

    def test_stats_response_model(self):
        """Test StatsResponse model structure."""
-        from ocr_labeling_api import StatsResponse
+        from ocr.labeling.api import StatsResponse

        stats = StatsResponse(
            total_items=100,
@@ -395,7 +395,7 @@ class TestTrainingExport:

    def test_export_request_validation(self):
        """Test ExportRequest model validation."""
-        from ocr_labeling_api import ExportRequest
+        from ocr.labeling.api import ExportRequest

        # Default format is generic
        request = ExportRequest()
@@ -412,7 +412,7 @@ class TestTrainingExport:
    @pytest.mark.asyncio
    async def test_export_training_samples(self, mock_db_pool):
        """Test training sample export from database."""
-        from metrics_db import export_training_samples
+        from metrics.db import export_training_samples

        pool, conn = mock_db_pool
        conn.fetch.return_value = [
@@ -495,7 +495,7 @@ class TestOCRProcessing:
    @pytest.mark.asyncio
    async def test_run_ocr_on_image_no_service(self):
        """Test OCR when service is not available."""
-        from ocr_labeling_api import run_ocr_on_image
+        from ocr.labeling.api import run_ocr_on_image

        with patch('ocr_labeling_api.VISION_OCR_AVAILABLE', False), \
             patch('ocr_labeling_api.PADDLEOCR_AVAILABLE', False), \
@@ -512,7 +512,7 @@ class TestOCRProcessing:
    @pytest.mark.asyncio
    async def test_run_ocr_on_image_success(self, mock_vision_ocr):
        """Test successful OCR processing."""
-        from ocr_labeling_api import run_ocr_on_image
+        from ocr.labeling.api import run_ocr_on_image

        text, confidence = await run_ocr_on_image(
            image_data=b"fake image",
@@ -533,7 +533,7 @@ class TestOCRModelDispatcher:
    @pytest.mark.asyncio
    async def test_dispatcher_vision_model_default(self, mock_vision_ocr):
        """Test dispatcher uses Vision OCR by default."""
-        from ocr_labeling_api import run_ocr_on_image
+        from ocr.labeling.api import run_ocr_on_image

        text, confidence = await run_ocr_on_image(
            image_data=b"fake image",
@@ -547,7 +547,7 @@ class TestOCRModelDispatcher:
    @pytest.mark.asyncio
    async def test_dispatcher_paddleocr_model(self):
        """Test dispatcher routes to PaddleOCR."""
-        from ocr_labeling_api import run_ocr_on_image
+        from ocr.labeling.api import run_ocr_on_image

        # Mock PaddleOCR
        mock_regions = []
@@ -567,7 +567,7 @@ class TestOCRModelDispatcher:
    @pytest.mark.asyncio
    async def test_dispatcher_paddleocr_fallback_to_vision(self, mock_vision_ocr):
        """Test PaddleOCR falls back to Vision OCR when unavailable."""
-        from ocr_labeling_api import run_ocr_on_image
+        from ocr.labeling.api import run_ocr_on_image

        with patch('ocr_labeling_api.PADDLEOCR_AVAILABLE', False):
            text, confidence = await run_ocr_on_image(
@@ -583,7 +583,7 @@ class TestOCRModelDispatcher:
    @pytest.mark.asyncio
    async def test_dispatcher_trocr_model(self):
        """Test dispatcher routes to TrOCR."""
-        from ocr_labeling_api import run_ocr_on_image
+        from ocr.labeling.api import run_ocr_on_image

        async def mock_trocr(image_data):
            return "TrOCR erkannter Text", 0.85
@@ -603,7 +603,7 @@ class TestOCRModelDispatcher:
    @pytest.mark.asyncio
    async def test_dispatcher_donut_model(self):
        """Test dispatcher routes to Donut."""
-        from ocr_labeling_api import run_ocr_on_image
+        from ocr.labeling.api import run_ocr_on_image

        async def mock_donut(image_data):
            return "Donut erkannter Text", 0.80
@@ -623,7 +623,7 @@ class TestOCRModelDispatcher:
    @pytest.mark.asyncio
    async def test_dispatcher_unknown_model_uses_vision(self, mock_vision_ocr):
        """Test dispatcher uses Vision OCR for unknown models."""
-        from ocr_labeling_api import run_ocr_on_image
+        from ocr.labeling.api import run_ocr_on_image

        text, confidence = await run_ocr_on_image(
            image_data=b"fake image",
@@ -641,7 +641,7 @@ class TestOCRModelTypes:

    def test_session_with_paddleocr_model(self):
        """Test session creation with PaddleOCR model."""
-        from ocr_labeling_api import SessionCreate
+        from ocr.labeling.api import SessionCreate

        session = SessionCreate(
            name="PaddleOCR Session",
@@ -653,7 +653,7 @@ class TestOCRModelTypes:

    def test_session_with_donut_model(self):
        """Test session creation with Donut model."""
-        from ocr_labeling_api import SessionCreate
+        from ocr.labeling.api import SessionCreate

        session = SessionCreate(
            name="Donut Session",
@@ -665,7 +665,7 @@ class TestOCRModelTypes:

    def test_session_with_trocr_model(self):
        """Test session creation with TrOCR model."""
-        from ocr_labeling_api import SessionCreate
+        from ocr.labeling.api import SessionCreate

        session = SessionCreate(
            name="TrOCR Session",
@@ -685,7 +685,7 @@ class TestResponseModels:

    def test_session_response_model(self):
        """Test SessionResponse model."""
-        from ocr_labeling_api import SessionResponse
+        from ocr.labeling.api import SessionResponse

        session = SessionResponse(
            id="session-123",
@@ -706,7 +706,7 @@ class TestResponseModels:

    def test_item_response_model(self):
        """Test ItemResponse model."""
-        from ocr_labeling_api import ItemResponse
+        from ocr.labeling.api import ItemResponse

        item = ItemResponse(
            id="item-456",
@@ -735,7 +735,7 @@ class TestDeduplication:

    def test_hash_based_deduplication(self):
        """Test that same images produce same hash for deduplication."""
-        from ocr_labeling_api import compute_image_hash
+        from ocr.labeling.api import compute_image_hash

        # Same content should be detected as duplicate
        image1 = b"\x89PNG\x0d\x0a\x1a\x0a test image content"
@@ -748,7 +748,7 @@ class TestDeduplication:

    def test_unique_images_different_hash(self):
        """Test that different images produce different hashes."""
-        from ocr_labeling_api import compute_image_hash
+        from ocr.labeling.api import compute_image_hash

        image1 = b"\x89PNG unique content 1"
        image2 = b"\x89PNG unique content 2"
@@ -13,7 +13,7 @@ import os

 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

-from ocr_pipeline_api import (
+from ocr.pipeline.api import (
    _split_paddle_multi_words,
    _group_words_into_rows,
    _merge_row_sequences,
@@ -13,7 +13,7 @@ Tests cover:
 import numpy as np
 import pytest

-from page_crop import (
+from ocr.pipeline.page_crop import (
    detect_and_crop_page,
    detect_page_splits,
    _detect_format,
@@ -56,7 +56,7 @@ class TestIngestionStatus:

    def test_status_not_running(self):
        """Test status when no ingestion is running."""
-        from admin_api import _ingestion_status
+        from admin.api import _ingestion_status

        # Reset status
        _ingestion_status["running"] = False
@@ -67,7 +67,7 @@ class TestIngestionStatus:

    def test_status_running(self):
        """Test status when ingestion is running."""
-        from admin_api import _ingestion_status
+        from admin.api import _ingestion_status

        _ingestion_status["running"] = True
        _ingestion_status["last_run"] = datetime.now().isoformat()
@@ -81,7 +81,7 @@ class TestUploadAPI:

    def test_upload_record_creation(self):
        """Test that upload records are created correctly."""
-        from admin_api import _upload_history
+        from admin.api import _upload_history

        # Clear history
        _upload_history.clear()
@@ -102,7 +102,7 @@ class TestUploadAPI:

    def test_upload_history_limit(self):
        """Test that upload history is limited to 100 entries."""
-        from admin_api import _upload_history
+        from admin.api import _upload_history

        _upload_history.clear()

@@ -187,7 +187,7 @@ class TestMetricsDB:
    @pytest.mark.asyncio
    async def test_store_feedback_no_pool(self):
        """Test feedback storage when DB is not available."""
-        from metrics_db import store_feedback
+        from metrics.db import store_feedback

        with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
            result = await store_feedback(
@@ -199,7 +199,7 @@ class TestMetricsDB:
    @pytest.mark.asyncio
    async def test_calculate_metrics_no_pool(self):
        """Test metrics calculation when DB is not available."""
-        from metrics_db import calculate_metrics
+        from metrics.db import calculate_metrics

        with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
            metrics = await calculate_metrics()
@@ -214,7 +214,7 @@ class TestMetricsDB:
        ]

        # Read the metrics_db module to check table names
-        from metrics_db import init_metrics_tables
+        from metrics.db import init_metrics_tables

        # The function should create these tables
        assert callable(init_metrics_tables)
@@ -231,8 +231,8 @@ class TestRAGIntegration:
    @pytest.mark.asyncio
    async def test_nibis_search(self):
        """Test NiBiS semantic search."""
-        from admin_api import search_nibis
-        from admin_api import NiBiSSearchRequest
+        from admin.api import search_nibis
+        from admin.api import NiBiSSearchRequest

        request = NiBiSSearchRequest(
            query="Gedichtanalyse Expressionismus",
@@ -265,7 +265,7 @@ class TestRAGIntegration:
    @pytest.mark.asyncio
    async def test_metrics_storage(self):
        """Test metrics storage in PostgreSQL."""
-        from metrics_db import store_feedback, calculate_metrics
+        from metrics.db import store_feedback, calculate_metrics

        # This would require PostgreSQL running
        # stored = await store_feedback(
@@ -330,7 +330,7 @@ class TestEmbeddings:

    def test_vector_dimensions(self):
        """Test that vector dimensions are configured correctly."""
-        from eh_pipeline import get_vector_size, EMBEDDING_BACKEND
+        from korrektur.eh_pipeline import get_vector_size, EMBEDDING_BACKEND

        size = get_vector_size()

@@ -341,7 +341,7 @@ class TestEmbeddings:

    def test_chunking_config(self):
        """Test chunking configuration."""
-        from eh_pipeline import CHUNK_SIZE, CHUNK_OVERLAP
+        from korrektur.eh_pipeline import CHUNK_SIZE, CHUNK_OVERLAP

        assert CHUNK_SIZE > 0
        assert CHUNK_OVERLAP >= 0
@@ -30,7 +30,7 @@ from datetime import datetime, timezone, timedelta

 import sys
 sys.path.insert(0, '..')
-from rbac import (
+from compliance.rbac import (
    Role,
    Action,
    ResourceType,
@@ -4,7 +4,7 @@ import pytest
 import sys, os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

-from smart_spell import SmartSpellChecker, CorrectionResult
+from ocr.spell.smart_spell import SmartSpellChecker, CorrectionResult


@pytest.fixture
@@ -4,7 +4,7 @@ import pytest
 import sys, os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

-from unified_grid import (
+from grid.unified import (
    _compute_dominant_row_height,
    _classify_boxes,
    build_unified_grid,
@@ -37,7 +37,7 @@ from fastapi.testclient import TestClient
 # Import the main app and vocab-worksheet components
 sys.path.insert(0, '..')
 from main import app
-from vocab_worksheet_api import (
+from vocab.worksheet.api import (
    _sessions,
    _worksheets,
    SessionStatus,
@@ -7,7 +7,7 @@ uses dynamic programming + dictionary lookup to find valid splits.

 import pytest

-from cv_review import _try_split_merged_word, _spell_dict_knows, _SPELL_AVAILABLE
+from ocr.review.review import _try_split_merged_word, _spell_dict_knows, _SPELL_AVAILABLE

 pytestmark = pytest.mark.skipif(
    not _SPELL_AVAILABLE,
@@ -35,7 +35,7 @@ from fastapi.testclient import TestClient
 # Import the main app and worksheet-editor components
 sys.path.insert(0, '..')
 from main import app
-from worksheet_editor_api import (
+from worksheet.editor_api import (
    worksheets_db,
    AIImageStyle,
    WorksheetStatus,