From 098a2ff0929f899a9df1574fc2067da64a11c629 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sat, 25 Apr 2026 21:15:53 +0200 Subject: [PATCH] Fix: Resolve all lint errors from ocr/ restructure - Added ocr_region import to cell_grid/build.py and legacy.py - Fixed circular import in engines.py via lazy import - Auto-fixed 22 unused imports via ruff --fix Co-Authored-By: Claude Opus 4.6 (1M context) --- klausur-service/backend/ocr/cell_grid/build.py | 1 + klausur-service/backend/ocr/cell_grid/legacy.py | 1 + .../backend/ocr/detect/box_layout.py | 4 ++-- .../backend/ocr/detect/color_detect.py | 2 +- .../backend/ocr/detect/syllable/core.py | 2 +- .../backend/ocr/detect/syllable/merge.py | 2 +- klausur-service/backend/ocr/engines/engines.py | 17 +++++------------ .../backend/ocr/engines/ipa_repair.py | 2 +- klausur-service/backend/ocr/layout/layout.py | 2 +- .../backend/ocr/preprocessing/dewarp.py | 1 - .../backend/ocr/preprocessing/preprocessing.py | 4 ---- 11 files changed, 14 insertions(+), 24 deletions(-) diff --git a/klausur-service/backend/ocr/cell_grid/build.py b/klausur-service/backend/ocr/cell_grid/build.py index 352935c..52784d7 100644 --- a/klausur-service/backend/ocr/cell_grid/build.py +++ b/klausur-service/backend/ocr/cell_grid/build.py @@ -11,6 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple import numpy as np from ..types import PageRegion, RowGeometry +from ..review.pipeline import ocr_region # noqa: F401 — Tesseract fallback from ..engines.engines import ( RAPIDOCR_AVAILABLE, _assign_row_words_to_columns, diff --git a/klausur-service/backend/ocr/cell_grid/legacy.py b/klausur-service/backend/ocr/cell_grid/legacy.py index 39babfb..ec55128 100644 --- a/klausur-service/backend/ocr/cell_grid/legacy.py +++ b/klausur-service/backend/ocr/cell_grid/legacy.py @@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional, Tuple import numpy as np from ..types import PageRegion, RowGeometry +from ..review.pipeline import ocr_region # noqa: F401 — Tesseract wrapper from ..engines.engines import ( RAPIDOCR_AVAILABLE, _assign_row_words_to_columns, diff --git a/klausur-service/backend/ocr/detect/box_layout.py b/klausur-service/backend/ocr/detect/box_layout.py index 49201c0..b9d492b 100644 --- a/klausur-service/backend/ocr/detect/box_layout.py +++ b/klausur-service/backend/ocr/detect/box_layout.py @@ -10,7 +10,7 @@ Used by the Box-Grid-Review step to rebuild box zones with correct structure. import logging import re import statistics -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) @@ -149,7 +149,7 @@ def build_box_zone_grid( Returns the same format as _build_zone_grid (columns, rows, cells, header_rows). """ - from grid_editor_helpers import _build_zone_grid, _cluster_rows + from grid_editor_helpers import _build_zone_grid if not zone_words: return { diff --git a/klausur-service/backend/ocr/detect/color_detect.py b/klausur-service/backend/ocr/detect/color_detect.py index 998d604..b419c78 100644 --- a/klausur-service/backend/ocr/detect/color_detect.py +++ b/klausur-service/backend/ocr/detect/color_detect.py @@ -14,7 +14,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ import logging -from typing import Any, Dict, List, Optional, Tuple +from typing import Dict, List, Tuple import cv2 import numpy as np diff --git a/klausur-service/backend/ocr/detect/syllable/core.py b/klausur-service/backend/ocr/detect/syllable/core.py index 4a4dca8..690c5d9 100644 --- a/klausur-service/backend/ocr/detect/syllable/core.py +++ b/klausur-service/backend/ocr/detect/syllable/core.py @@ -9,7 +9,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. import logging import re -from typing import Any, Dict, List, Optional, Tuple +from typing import Dict, List, Optional logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/detect/syllable/merge.py b/klausur-service/backend/ocr/detect/syllable/merge.py index 1d0675c..497ed99 100644 --- a/klausur-service/backend/ocr/detect/syllable/merge.py +++ b/klausur-service/backend/ocr/detect/syllable/merge.py @@ -9,7 +9,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. import logging import re -from typing import Any, Dict, List, Optional +from typing import Dict, List, Optional import numpy as np diff --git a/klausur-service/backend/ocr/engines/engines.py b/klausur-service/backend/ocr/engines/engines.py index 9b57001..bf70dfe 100644 --- a/klausur-service/backend/ocr/engines/engines.py +++ b/klausur-service/backend/ocr/engines/engines.py @@ -16,20 +16,14 @@ Lizenz: Apache 2.0 (kommerziell nutzbar) DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ -import io import logging import os -import re -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional import numpy as np from ..types import ( - IPA_AVAILABLE, PageRegion, - RowGeometry, - _britfone_dict, - _ipa_convert_american, ) logger = logging.getLogger(__name__) @@ -194,7 +188,7 @@ def ocr_region_trocr(img_bgr: np.ndarray, region: PageRegion, handwritten: bool if region.height > 0 and region.width > 0: ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) if img_bgr is not None else None if ocr_img_crop is not None: - return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) + from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) return [] crop = img_bgr[region.y:region.y + region.height, region.x:region.x + region.width] @@ -209,7 +203,7 @@ def ocr_region_trocr(img_bgr: np.ndarray, region: PageRegion, handwritten: bool if processor is None or model is None: logger.warning("TrOCR model not loaded, falling back to Tesseract") ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) - return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) + from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) pil_crop = _PILImage.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)) lines = _split_into_lines(pil_crop) @@ -260,14 +254,13 @@ def ocr_region_lighton(img_bgr: np.ndarray, region: PageRegion) -> List[Dict[str if RAPIDOCR_AVAILABLE and img_bgr is not None: return ocr_region_rapid(img_bgr, region) ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) if img_bgr is not None else None - return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) if ocr_img_crop is not None else [] + from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) if ocr_img_crop is not None else [] crop = img_bgr[region.y:region.y + region.height, region.x:region.x + region.width] if crop.size == 0: return [] try: - import io import torch from PIL import Image as _PILImage @@ -277,7 +270,7 @@ def ocr_region_lighton(img_bgr: np.ndarray, region: PageRegion) -> List[Dict[str if RAPIDOCR_AVAILABLE and img_bgr is not None: return ocr_region_rapid(img_bgr, region) ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) - return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) + from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) pil_crop = _PILImage.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)) conversation = [{"role": "user", "content": [{"type": "image"}]}] diff --git a/klausur-service/backend/ocr/engines/ipa_repair.py b/klausur-service/backend/ocr/engines/ipa_repair.py index 4cd4848..c1d6b30 100644 --- a/klausur-service/backend/ocr/engines/ipa_repair.py +++ b/klausur-service/backend/ocr/engines/ipa_repair.py @@ -14,7 +14,7 @@ Contains: import logging import re -from typing import Any, Dict, List, Optional +from typing import List from ..types import IPA_AVAILABLE from .ipa_lookup import ( diff --git a/klausur-service/backend/ocr/layout/layout.py b/klausur-service/backend/ocr/layout/layout.py index 37a7b84..bb4cb7c 100644 --- a/klausur-service/backend/ocr/layout/layout.py +++ b/klausur-service/backend/ocr/layout/layout.py @@ -17,7 +17,7 @@ Sub-modules: """ import logging -from typing import Any, Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple import numpy as np diff --git a/klausur-service/backend/ocr/preprocessing/dewarp.py b/klausur-service/backend/ocr/preprocessing/dewarp.py index 6639ac6..8c77bc1 100644 --- a/klausur-service/backend/ocr/preprocessing/dewarp.py +++ b/klausur-service/backend/ocr/preprocessing/dewarp.py @@ -18,7 +18,6 @@ import numpy as np from ..types import ( CV2_AVAILABLE, - TESSERACT_AVAILABLE, ) logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/preprocessing/preprocessing.py b/klausur-service/backend/ocr/preprocessing/preprocessing.py index c80e6cd..2b7365a 100644 --- a/klausur-service/backend/ocr/preprocessing/preprocessing.py +++ b/klausur-service/backend/ocr/preprocessing/preprocessing.py @@ -17,10 +17,6 @@ from typing import Tuple import numpy as np -from ..types import ( - CV2_AVAILABLE, - TESSERACT_AVAILABLE, -) logger = logging.getLogger(__name__)