Fix: Resolve all lint errors from ocr/ restructure

- Added ocr_region import to cell_grid/build.py and legacy.py - Fixed circular import in engines.py via lazy import - Auto-fixed 22 unused imports via ruff --fix Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 21:15:53 +02:00
parent cb1be59e46
commit 098a2ff092
11 changed files with 14 additions and 24 deletions
@@ -11,6 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import numpy as np

 from ..types import PageRegion, RowGeometry
+from ..review.pipeline import ocr_region  # noqa: F401 — Tesseract fallback
 from ..engines.engines import (
    RAPIDOCR_AVAILABLE,
    _assign_row_words_to_columns,
@@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import numpy as np

 from ..types import PageRegion, RowGeometry
+from ..review.pipeline import ocr_region  # noqa: F401 — Tesseract wrapper
 from ..engines.engines import (
    RAPIDOCR_AVAILABLE,
    _assign_row_words_to_columns,
@@ -10,7 +10,7 @@ Used by the Box-Grid-Review step to rebuild box zones with correct structure.
 import logging
 import re
 import statistics
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional

 logger = logging.getLogger(__name__)

@@ -149,7 +149,7 @@ def build_box_zone_grid(

    Returns the same format as _build_zone_grid (columns, rows, cells, header_rows).
    """
-    from grid_editor_helpers import _build_zone_grid, _cluster_rows
+    from grid_editor_helpers import _build_zone_grid

    if not zone_words:
        return {
@@ -14,7 +14,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
 """

 import logging
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Dict, List, Tuple

 import cv2
 import numpy as np
@@ -9,7 +9,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.

 import logging
 import re
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Dict, List, Optional

 logger = logging.getLogger(__name__)

@@ -9,7 +9,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.

 import logging
 import re
-from typing import Any, Dict, List, Optional
+from typing import Dict, List, Optional

 import numpy as np

@@ -16,20 +16,14 @@ Lizenz: Apache 2.0 (kommerziell nutzbar)
 DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
 """

-import io
 import logging
 import os
-import re
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional

 import numpy as np

 from ..types import (
-    IPA_AVAILABLE,
    PageRegion,
-    RowGeometry,
-    _britfone_dict,
-    _ipa_convert_american,
 )

 logger = logging.getLogger(__name__)
@@ -194,7 +188,7 @@ def ocr_region_trocr(img_bgr: np.ndarray, region: PageRegion, handwritten: bool
        if region.height > 0 and region.width > 0:
            ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) if img_bgr is not None else None
            if ocr_img_crop is not None:
-                return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
+                from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
        return []

    crop = img_bgr[region.y:region.y + region.height, region.x:region.x + region.width]
@@ -209,7 +203,7 @@ def ocr_region_trocr(img_bgr: np.ndarray, region: PageRegion, handwritten: bool
        if processor is None or model is None:
            logger.warning("TrOCR model not loaded, falling back to Tesseract")
            ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
-            return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
+            from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)

        pil_crop = _PILImage.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
        lines = _split_into_lines(pil_crop)
@@ -260,14 +254,13 @@ def ocr_region_lighton(img_bgr: np.ndarray, region: PageRegion) -> List[Dict[str
        if RAPIDOCR_AVAILABLE and img_bgr is not None:
            return ocr_region_rapid(img_bgr, region)
        ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) if img_bgr is not None else None
-        return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) if ocr_img_crop is not None else []
+        from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) if ocr_img_crop is not None else []

    crop = img_bgr[region.y:region.y + region.height, region.x:region.x + region.width]
    if crop.size == 0:
        return []

    try:
-        import io
        import torch
        from PIL import Image as _PILImage

@@ -277,7 +270,7 @@ def ocr_region_lighton(img_bgr: np.ndarray, region: PageRegion) -> List[Dict[str
            if RAPIDOCR_AVAILABLE and img_bgr is not None:
                return ocr_region_rapid(img_bgr, region)
            ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
-            return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
+            from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)

        pil_crop = _PILImage.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
        conversation = [{"role": "user", "content": [{"type": "image"}]}]
@@ -14,7 +14,7 @@ Contains:

 import logging
 import re
-from typing import Any, Dict, List, Optional
+from typing import List

 from ..types import IPA_AVAILABLE
 from .ipa_lookup import (
@@ -17,7 +17,7 @@ Sub-modules:
 """

 import logging
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple

 import numpy as np

@@ -18,7 +18,6 @@ import numpy as np

 from ..types import (
    CV2_AVAILABLE,
-    TESSERACT_AVAILABLE,
 )

 logger = logging.getLogger(__name__)
@@ -17,10 +17,6 @@ from typing import Tuple

 import numpy as np

-from ..types import (
-    CV2_AVAILABLE,
-    TESSERACT_AVAILABLE,
-)

 logger = logging.getLogger(__name__)