Restructure: Move 47 cv_* files into ocr/ package
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m34s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 26s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m34s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 26s
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2
klausur-service/backend/ocr/layout/__init__.py
Normal file
2
klausur-service/backend/ocr/layout/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Layout analysis sub-package."""
|
||||
from .layout import * # noqa: F401,F403
|
||||
@@ -13,8 +13,8 @@ from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import PageRegion
|
||||
from cv_layout_detection import _find_content_bounds
|
||||
from ..types import PageRegion
|
||||
from .detection import _find_content_bounds
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -246,7 +246,7 @@ def analyze_layout(layout_img: np.ndarray, ocr_img: np.ndarray) -> List[PageRegi
|
||||
|
||||
# Add header/footer info (gap-based detection with fallback)
|
||||
# Lazy import to avoid circular dependency with cv_layout.py
|
||||
from cv_layout_detection import _add_header_footer
|
||||
from .detection import _add_header_footer
|
||||
_add_header_footer(regions, top_y, bottom_y, w, h, inv=inv)
|
||||
|
||||
top_region = next((r.type for r in regions if r.type in ('header', 'margin_top')), 'none')
|
||||
|
||||
@@ -11,16 +11,16 @@ from typing import Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import ColumnGeometry, PageRegion
|
||||
from ..types import ColumnGeometry, PageRegion
|
||||
|
||||
from cv_layout_scoring import (
|
||||
from .scoring import (
|
||||
_score_language,
|
||||
_score_role,
|
||||
_score_dictionary_signals,
|
||||
_classify_dictionary_columns,
|
||||
)
|
||||
|
||||
from cv_layout_classify_position import (
|
||||
from .classify_position import (
|
||||
_classify_by_position_enhanced,
|
||||
_classify_by_position_fallback,
|
||||
)
|
||||
@@ -211,7 +211,7 @@ def classify_column_types(geometries: List[ColumnGeometry],
|
||||
# _add_header_footer lives in cv_layout (avoids circular import at module
|
||||
# level). Lazy-import here so the module can be tested independently when
|
||||
# cv_layout hasn't been modified yet.
|
||||
from cv_layout_detection import _add_header_footer # noqa: E402
|
||||
from .detection import _add_header_footer # noqa: E402
|
||||
|
||||
content_h = bottom_y - top_y
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ Extracted from cv_layout_classify.py during file-size split.
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from cv_vocab_types import ColumnGeometry, PageRegion
|
||||
from ..types import ColumnGeometry, PageRegion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import ColumnGeometry
|
||||
from ..types import ColumnGeometry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -19,8 +19,8 @@ from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import ColumnGeometry
|
||||
from cv_layout_detection import _find_content_bounds
|
||||
from ..types import ColumnGeometry
|
||||
from .detection import _find_content_bounds
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from typing import List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
DocumentTypeResult,
|
||||
PageRegion,
|
||||
)
|
||||
|
||||
@@ -21,14 +21,14 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import ColumnGeometry, DetectedBox, PageRegion
|
||||
from ..types import ColumnGeometry, DetectedBox, PageRegion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Re-exports (backward compatibility) ───────────────────────────────────
|
||||
|
||||
from cv_layout_detection import ( # noqa: F401
|
||||
from .detection import ( # noqa: F401
|
||||
detect_document_type,
|
||||
create_ocr_image,
|
||||
create_layout_image,
|
||||
@@ -39,46 +39,46 @@ from cv_layout_detection import ( # noqa: F401
|
||||
_add_header_footer,
|
||||
)
|
||||
|
||||
from cv_layout_analyze import ( # noqa: F401
|
||||
from .analyze import ( # noqa: F401
|
||||
analyze_layout,
|
||||
)
|
||||
|
||||
from cv_layout_columns import ( # noqa: F401
|
||||
from .columns import ( # noqa: F401
|
||||
detect_column_geometry,
|
||||
_detect_columns_by_clustering,
|
||||
_build_geometries_from_starts,
|
||||
)
|
||||
|
||||
from cv_layout_column_refine import ( # noqa: F401
|
||||
from .column_refine import ( # noqa: F401
|
||||
_detect_sub_columns,
|
||||
_split_broad_columns,
|
||||
expand_narrow_columns,
|
||||
)
|
||||
|
||||
from cv_layout_rows import ( # noqa: F401
|
||||
from .rows import ( # noqa: F401
|
||||
detect_row_geometry,
|
||||
_build_rows_from_word_grouping,
|
||||
)
|
||||
|
||||
from cv_layout_row_regularize import ( # noqa: F401
|
||||
from .row_regularize import ( # noqa: F401
|
||||
_regularize_row_grid,
|
||||
)
|
||||
|
||||
from cv_layout_scoring import ( # noqa: F401
|
||||
from .scoring import ( # noqa: F401
|
||||
_score_language,
|
||||
_score_role,
|
||||
_score_dictionary_signals,
|
||||
_classify_dictionary_columns,
|
||||
)
|
||||
|
||||
from cv_layout_classify import ( # noqa: F401
|
||||
from .classify import ( # noqa: F401
|
||||
_build_margin_regions,
|
||||
positional_column_regions,
|
||||
classify_column_types,
|
||||
_classify_by_content,
|
||||
)
|
||||
|
||||
from cv_layout_classify_position import ( # noqa: F401
|
||||
from .classify_position import ( # noqa: F401
|
||||
_classify_by_position_enhanced,
|
||||
_classify_by_position_fallback,
|
||||
)
|
||||
@@ -143,7 +143,7 @@ def detect_column_geometry_zoned(
|
||||
per content zone on the corresponding sub-image.
|
||||
4. If no boxes: delegates entirely to detect_column_geometry().
|
||||
"""
|
||||
from cv_box_detect import detect_boxes, split_page_into_zones
|
||||
from ..detect.box_detect import detect_boxes, split_page_into_zones
|
||||
|
||||
geo_result = detect_column_geometry(ocr_img, dewarped_bgr)
|
||||
if geo_result is None:
|
||||
|
||||
@@ -13,7 +13,7 @@ from typing import Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import RowGeometry
|
||||
from ..types import RowGeometry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -20,9 +20,9 @@ try:
|
||||
except ImportError:
|
||||
cv2 = None # type: ignore[assignment]
|
||||
|
||||
from cv_vocab_types import RowGeometry
|
||||
from cv_ocr_word_assembly import _group_words_into_lines
|
||||
from cv_layout_row_regularize import _regularize_row_grid
|
||||
from ..types import RowGeometry
|
||||
from ..engines.word_assembly import _group_words_into_lines
|
||||
from .row_regularize import _regularize_row_grid
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ import logging
|
||||
from collections import Counter
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
ColumnGeometry,
|
||||
ENGLISH_FUNCTION_WORDS,
|
||||
GERMAN_FUNCTION_WORDS,
|
||||
|
||||
Reference in New Issue
Block a user