Fix: Resolve all lint errors from ocr/ restructure
- Added ocr_region import to cell_grid/build.py and legacy.py - Fixed circular import in engines.py via lazy import - Auto-fixed 22 unused imports via ruff --fix Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from ..types import PageRegion, RowGeometry
|
from ..types import PageRegion, RowGeometry
|
||||||
|
from ..review.pipeline import ocr_region # noqa: F401 — Tesseract fallback
|
||||||
from ..engines.engines import (
|
from ..engines.engines import (
|
||||||
RAPIDOCR_AVAILABLE,
|
RAPIDOCR_AVAILABLE,
|
||||||
_assign_row_words_to_columns,
|
_assign_row_words_to_columns,
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from ..types import PageRegion, RowGeometry
|
from ..types import PageRegion, RowGeometry
|
||||||
|
from ..review.pipeline import ocr_region # noqa: F401 — Tesseract wrapper
|
||||||
from ..engines.engines import (
|
from ..engines.engines import (
|
||||||
RAPIDOCR_AVAILABLE,
|
RAPIDOCR_AVAILABLE,
|
||||||
_assign_row_words_to_columns,
|
_assign_row_words_to_columns,
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ Used by the Box-Grid-Review step to rebuild box zones with correct structure.
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import statistics
|
import statistics
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -149,7 +149,7 @@ def build_box_zone_grid(
|
|||||||
|
|
||||||
Returns the same format as _build_zone_grid (columns, rows, cells, header_rows).
|
Returns the same format as _build_zone_grid (columns, rows, cells, header_rows).
|
||||||
"""
|
"""
|
||||||
from grid_editor_helpers import _build_zone_grid, _cluster_rows
|
from grid_editor_helpers import _build_zone_grid
|
||||||
|
|
||||||
if not zone_words:
|
if not zone_words:
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Dict, List, Tuple
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|||||||
@@ -16,20 +16,14 @@ Lizenz: Apache 2.0 (kommerziell nutzbar)
|
|||||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import io
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
from typing import Any, Dict, List, Optional
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from ..types import (
|
from ..types import (
|
||||||
IPA_AVAILABLE,
|
|
||||||
PageRegion,
|
PageRegion,
|
||||||
RowGeometry,
|
|
||||||
_britfone_dict,
|
|
||||||
_ipa_convert_american,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -194,7 +188,7 @@ def ocr_region_trocr(img_bgr: np.ndarray, region: PageRegion, handwritten: bool
|
|||||||
if region.height > 0 and region.width > 0:
|
if region.height > 0 and region.width > 0:
|
||||||
ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) if img_bgr is not None else None
|
ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) if img_bgr is not None else None
|
||||||
if ocr_img_crop is not None:
|
if ocr_img_crop is not None:
|
||||||
return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
|
from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
crop = img_bgr[region.y:region.y + region.height, region.x:region.x + region.width]
|
crop = img_bgr[region.y:region.y + region.height, region.x:region.x + region.width]
|
||||||
@@ -209,7 +203,7 @@ def ocr_region_trocr(img_bgr: np.ndarray, region: PageRegion, handwritten: bool
|
|||||||
if processor is None or model is None:
|
if processor is None or model is None:
|
||||||
logger.warning("TrOCR model not loaded, falling back to Tesseract")
|
logger.warning("TrOCR model not loaded, falling back to Tesseract")
|
||||||
ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||||
return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
|
from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
|
||||||
|
|
||||||
pil_crop = _PILImage.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
|
pil_crop = _PILImage.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
|
||||||
lines = _split_into_lines(pil_crop)
|
lines = _split_into_lines(pil_crop)
|
||||||
@@ -260,14 +254,13 @@ def ocr_region_lighton(img_bgr: np.ndarray, region: PageRegion) -> List[Dict[str
|
|||||||
if RAPIDOCR_AVAILABLE and img_bgr is not None:
|
if RAPIDOCR_AVAILABLE and img_bgr is not None:
|
||||||
return ocr_region_rapid(img_bgr, region)
|
return ocr_region_rapid(img_bgr, region)
|
||||||
ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) if img_bgr is not None else None
|
ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) if img_bgr is not None else None
|
||||||
return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) if ocr_img_crop is not None else []
|
from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6) if ocr_img_crop is not None else []
|
||||||
|
|
||||||
crop = img_bgr[region.y:region.y + region.height, region.x:region.x + region.width]
|
crop = img_bgr[region.y:region.y + region.height, region.x:region.x + region.width]
|
||||||
if crop.size == 0:
|
if crop.size == 0:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import io
|
|
||||||
import torch
|
import torch
|
||||||
from PIL import Image as _PILImage
|
from PIL import Image as _PILImage
|
||||||
|
|
||||||
@@ -277,7 +270,7 @@ def ocr_region_lighton(img_bgr: np.ndarray, region: PageRegion) -> List[Dict[str
|
|||||||
if RAPIDOCR_AVAILABLE and img_bgr is not None:
|
if RAPIDOCR_AVAILABLE and img_bgr is not None:
|
||||||
return ocr_region_rapid(img_bgr, region)
|
return ocr_region_rapid(img_bgr, region)
|
||||||
ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
ocr_img_crop = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||||
return ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
|
from ..review.pipeline import ocr_region as _ocr_region; return _ocr_region(ocr_img_crop, region, lang="eng+deu", psm=6)
|
||||||
|
|
||||||
pil_crop = _PILImage.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
|
pil_crop = _PILImage.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
|
||||||
conversation = [{"role": "user", "content": [{"type": "image"}]}]
|
conversation = [{"role": "user", "content": [{"type": "image"}]}]
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ Contains:
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import List
|
||||||
|
|
||||||
from ..types import IPA_AVAILABLE
|
from ..types import IPA_AVAILABLE
|
||||||
from .ipa_lookup import (
|
from .ipa_lookup import (
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ Sub-modules:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ import numpy as np
|
|||||||
|
|
||||||
from ..types import (
|
from ..types import (
|
||||||
CV2_AVAILABLE,
|
CV2_AVAILABLE,
|
||||||
TESSERACT_AVAILABLE,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -17,10 +17,6 @@ from typing import Tuple
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from ..types import (
|
|
||||||
CV2_AVAILABLE,
|
|
||||||
TESSERACT_AVAILABLE,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user