Restructure: Move 47 cv_* files into ocr/ package
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m34s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 26s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m34s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 26s
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -27,6 +27,7 @@
|
||||
|
||||
# Algorithmic monolith — detect_column_geometry() allein 411 LOC, nicht weiter teilbar
|
||||
**/cv_layout_columns.py | owner=klausur | reason=detect_column_geometry ist eine einzelne 411-LOC Funktion (Whitespace-Gap-Analyse) | review=2026-10-01
|
||||
**/ocr/layout/columns.py | owner=klausur | reason=Same file moved to ocr/ package | review=2026-10-01
|
||||
|
||||
# Two indivisible route handlers (~230 LOC each) that cannot be split further
|
||||
**/vocab_worksheet_compare_api.py | owner=klausur | reason=compare_ocr_methods (234 LOC) + analyze_grid (255 LOC), each a single cohesive handler | review=2026-10-01
|
||||
|
||||
4
klausur-service/backend/cv_box_detect.py
Normal file
4
klausur-service/backend/cv_box_detect.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/detect\/box_detect.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.detect.box_detect")
|
||||
4
klausur-service/backend/cv_box_layout.py
Normal file
4
klausur-service/backend/cv_box_layout.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/detect\/box_layout.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.detect.box_layout")
|
||||
4
klausur-service/backend/cv_cell_grid.py
Normal file
4
klausur-service/backend/cv_cell_grid.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/cell_grid\/cell_grid.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.cell_grid")
|
||||
4
klausur-service/backend/cv_cell_grid_build.py
Normal file
4
klausur-service/backend/cv_cell_grid_build.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/cell_grid\/build.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.build")
|
||||
4
klausur-service/backend/cv_cell_grid_helpers.py
Normal file
4
klausur-service/backend/cv_cell_grid_helpers.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/cell_grid\/helpers.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.helpers")
|
||||
4
klausur-service/backend/cv_cell_grid_legacy.py
Normal file
4
klausur-service/backend/cv_cell_grid_legacy.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/cell_grid\/legacy.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.legacy")
|
||||
4
klausur-service/backend/cv_cell_grid_merge.py
Normal file
4
klausur-service/backend/cv_cell_grid_merge.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/cell_grid\/merge.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.merge")
|
||||
4
klausur-service/backend/cv_cell_grid_streaming.py
Normal file
4
klausur-service/backend/cv_cell_grid_streaming.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/cell_grid\/streaming.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.streaming")
|
||||
4
klausur-service/backend/cv_cell_grid_vocab.py
Normal file
4
klausur-service/backend/cv_cell_grid_vocab.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/cell_grid\/vocab.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.vocab")
|
||||
4
klausur-service/backend/cv_color_detect.py
Normal file
4
klausur-service/backend/cv_color_detect.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/detect\/color_detect.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.detect.color_detect")
|
||||
4
klausur-service/backend/cv_doclayout_detect.py
Normal file
4
klausur-service/backend/cv_doclayout_detect.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/detect\/doclayout_detect.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.detect.doclayout_detect")
|
||||
4
klausur-service/backend/cv_graphic_detect.py
Normal file
4
klausur-service/backend/cv_graphic_detect.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/detect\/graphic_detect.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.detect.graphic_detect")
|
||||
4
klausur-service/backend/cv_gutter_repair.py
Normal file
4
klausur-service/backend/cv_gutter_repair.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/gutter\/repair.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.gutter.repair")
|
||||
4
klausur-service/backend/cv_gutter_repair_core.py
Normal file
4
klausur-service/backend/cv_gutter_repair_core.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/gutter\/core.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.gutter.core")
|
||||
4
klausur-service/backend/cv_gutter_repair_grid.py
Normal file
4
klausur-service/backend/cv_gutter_repair_grid.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/gutter\/grid.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.gutter.grid")
|
||||
4
klausur-service/backend/cv_ipa_german.py
Normal file
4
klausur-service/backend/cv_ipa_german.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/ipa_german.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.ipa_german")
|
||||
4
klausur-service/backend/cv_layout.py
Normal file
4
klausur-service/backend/cv_layout.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/layout.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.layout")
|
||||
4
klausur-service/backend/cv_layout_analyze.py
Normal file
4
klausur-service/backend/cv_layout_analyze.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/analyze.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.analyze")
|
||||
4
klausur-service/backend/cv_layout_classify.py
Normal file
4
klausur-service/backend/cv_layout_classify.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/classify.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.classify")
|
||||
4
klausur-service/backend/cv_layout_classify_position.py
Normal file
4
klausur-service/backend/cv_layout_classify_position.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/classify_position.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.classify_position")
|
||||
4
klausur-service/backend/cv_layout_column_refine.py
Normal file
4
klausur-service/backend/cv_layout_column_refine.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/column_refine.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.column_refine")
|
||||
4
klausur-service/backend/cv_layout_columns.py
Normal file
4
klausur-service/backend/cv_layout_columns.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/columns.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.columns")
|
||||
4
klausur-service/backend/cv_layout_detection.py
Normal file
4
klausur-service/backend/cv_layout_detection.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/detection.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.detection")
|
||||
4
klausur-service/backend/cv_layout_row_regularize.py
Normal file
4
klausur-service/backend/cv_layout_row_regularize.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/row_regularize.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.row_regularize")
|
||||
4
klausur-service/backend/cv_layout_rows.py
Normal file
4
klausur-service/backend/cv_layout_rows.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/rows.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.rows")
|
||||
4
klausur-service/backend/cv_layout_scoring.py
Normal file
4
klausur-service/backend/cv_layout_scoring.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/layout\/scoring.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.layout.scoring")
|
||||
4
klausur-service/backend/cv_ocr_cell_filter.py
Normal file
4
klausur-service/backend/cv_ocr_cell_filter.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/engines\/cell_filter.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.engines.cell_filter")
|
||||
4
klausur-service/backend/cv_ocr_cell_phonetics.py
Normal file
4
klausur-service/backend/cv_ocr_cell_phonetics.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/engines\/cell_phonetics.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.engines.cell_phonetics")
|
||||
4
klausur-service/backend/cv_ocr_engines.py
Normal file
4
klausur-service/backend/cv_ocr_engines.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/engines\/engines.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.engines.engines")
|
||||
4
klausur-service/backend/cv_ocr_ipa_lookup.py
Normal file
4
klausur-service/backend/cv_ocr_ipa_lookup.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/engines\/ipa_lookup.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.engines.ipa_lookup")
|
||||
4
klausur-service/backend/cv_ocr_ipa_repair.py
Normal file
4
klausur-service/backend/cv_ocr_ipa_repair.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/engines\/ipa_repair.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.engines.ipa_repair")
|
||||
4
klausur-service/backend/cv_ocr_vocab_postprocess.py
Normal file
4
klausur-service/backend/cv_ocr_vocab_postprocess.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/engines\/vocab_postprocess.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.engines.vocab_postprocess")
|
||||
4
klausur-service/backend/cv_ocr_word_assembly.py
Normal file
4
klausur-service/backend/cv_ocr_word_assembly.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/engines\/word_assembly.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.engines.word_assembly")
|
||||
4
klausur-service/backend/cv_preprocessing.py
Normal file
4
klausur-service/backend/cv_preprocessing.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/preprocessing\/preprocessing.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.preprocessing.preprocessing")
|
||||
4
klausur-service/backend/cv_preprocessing_deskew.py
Normal file
4
klausur-service/backend/cv_preprocessing_deskew.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/preprocessing\/deskew.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.preprocessing.deskew")
|
||||
4
klausur-service/backend/cv_preprocessing_dewarp.py
Normal file
4
klausur-service/backend/cv_preprocessing_dewarp.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/preprocessing\/dewarp.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.preprocessing.dewarp")
|
||||
4
klausur-service/backend/cv_review.py
Normal file
4
klausur-service/backend/cv_review.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/review\/review.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.review.review")
|
||||
4
klausur-service/backend/cv_review_llm.py
Normal file
4
klausur-service/backend/cv_review_llm.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/review\/llm.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.review.llm")
|
||||
4
klausur-service/backend/cv_review_pipeline.py
Normal file
4
klausur-service/backend/cv_review_pipeline.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/review\/pipeline.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.review.pipeline")
|
||||
4
klausur-service/backend/cv_review_spell.py
Normal file
4
klausur-service/backend/cv_review_spell.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/review\/spell.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.review.spell")
|
||||
4
klausur-service/backend/cv_syllable_core.py
Normal file
4
klausur-service/backend/cv_syllable_core.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/detect\/syllable\/core.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.detect.syllable.core")
|
||||
4
klausur-service/backend/cv_syllable_detect.py
Normal file
4
klausur-service/backend/cv_syllable_detect.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/detect\/syllable\/detect.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.detect.syllable.detect")
|
||||
4
klausur-service/backend/cv_syllable_merge.py
Normal file
4
klausur-service/backend/cv_syllable_merge.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/detect\/syllable\/merge.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.detect.syllable.merge")
|
||||
4
klausur-service/backend/cv_vocab_pipeline.py
Normal file
4
klausur-service/backend/cv_vocab_pipeline.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/pipeline.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.pipeline")
|
||||
4
klausur-service/backend/cv_vocab_types.py
Normal file
4
klausur-service/backend/cv_vocab_types.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/types.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.types")
|
||||
4
klausur-service/backend/cv_words_first.py
Normal file
4
klausur-service/backend/cv_words_first.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/words_first.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.words_first")
|
||||
9
klausur-service/backend/ocr/__init__.py
Normal file
9
klausur-service/backend/ocr/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
OCR package — restructured from cv_* flat modules.
|
||||
|
||||
Backward-compatible re-exports: consumers can still use
|
||||
``from cv_layout import ...`` etc. via the shim files in backend/.
|
||||
"""
|
||||
|
||||
from .types import * # noqa: F401,F403
|
||||
from .pipeline import * # noqa: F401,F403
|
||||
2
klausur-service/backend/ocr/cell_grid/__init__.py
Normal file
2
klausur-service/backend/ocr/cell_grid/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Cell-grid construction sub-package."""
|
||||
from .cell_grid import * # noqa: F401,F403
|
||||
@@ -10,8 +10,8 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import PageRegion, RowGeometry
|
||||
from cv_ocr_engines import (
|
||||
from ..types import PageRegion, RowGeometry
|
||||
from ..engines.engines import (
|
||||
RAPIDOCR_AVAILABLE,
|
||||
_assign_row_words_to_columns,
|
||||
_clean_cell_text,
|
||||
@@ -22,7 +22,7 @@ from cv_ocr_engines import (
|
||||
ocr_region_rapid,
|
||||
ocr_region_trocr,
|
||||
)
|
||||
from cv_cell_grid_helpers import (
|
||||
from .helpers import (
|
||||
_MIN_WORD_CONF,
|
||||
_ensure_minimum_crop_size,
|
||||
_heal_row_gaps,
|
||||
|
||||
@@ -16,7 +16,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
# --- Helpers ---
|
||||
from cv_cell_grid_helpers import ( # noqa: F401
|
||||
from .helpers import ( # noqa: F401
|
||||
_MIN_WORD_CONF,
|
||||
_compute_cell_padding,
|
||||
_ensure_minimum_crop_size,
|
||||
@@ -26,26 +26,26 @@ from cv_cell_grid_helpers import ( # noqa: F401
|
||||
)
|
||||
|
||||
# --- v2 build (current default) ---
|
||||
from cv_cell_grid_build import ( # noqa: F401
|
||||
from .build import ( # noqa: F401
|
||||
_NARROW_COL_THRESHOLD_PCT,
|
||||
_ocr_cell_crop,
|
||||
build_cell_grid_v2,
|
||||
)
|
||||
|
||||
# --- Legacy build (DEPRECATED) ---
|
||||
from cv_cell_grid_legacy import ( # noqa: F401
|
||||
from .legacy import ( # noqa: F401
|
||||
_ocr_single_cell,
|
||||
build_cell_grid,
|
||||
)
|
||||
|
||||
# --- Streaming variants ---
|
||||
from cv_cell_grid_streaming import ( # noqa: F401
|
||||
from .streaming import ( # noqa: F401
|
||||
build_cell_grid_streaming,
|
||||
build_cell_grid_v2_streaming,
|
||||
)
|
||||
|
||||
# --- Row merging ---
|
||||
from cv_cell_grid_merge import ( # noqa: F401
|
||||
from .merge import ( # noqa: F401
|
||||
_PHONETIC_ONLY_RE,
|
||||
_is_phonetic_only_text,
|
||||
_merge_continuation_rows,
|
||||
@@ -54,7 +54,7 @@ from cv_cell_grid_merge import ( # noqa: F401
|
||||
)
|
||||
|
||||
# --- Vocab extraction ---
|
||||
from cv_cell_grid_vocab import ( # noqa: F401
|
||||
from .vocab import ( # noqa: F401
|
||||
_cells_to_vocab_entries,
|
||||
build_word_grid,
|
||||
)
|
||||
|
||||
@@ -13,7 +13,7 @@ from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import RowGeometry
|
||||
from ..types import RowGeometry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -12,8 +12,8 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import PageRegion, RowGeometry
|
||||
from cv_ocr_engines import (
|
||||
from ..types import PageRegion, RowGeometry
|
||||
from ..engines.engines import (
|
||||
RAPIDOCR_AVAILABLE,
|
||||
_assign_row_words_to_columns,
|
||||
_clean_cell_text,
|
||||
@@ -22,7 +22,7 @@ from cv_ocr_engines import (
|
||||
ocr_region_rapid,
|
||||
ocr_region_trocr,
|
||||
)
|
||||
from cv_cell_grid_helpers import (
|
||||
from .helpers import (
|
||||
_MIN_WORD_CONF,
|
||||
_compute_cell_padding,
|
||||
_ensure_minimum_crop_size,
|
||||
|
||||
@@ -11,7 +11,7 @@ import logging
|
||||
import re
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from cv_ocr_engines import _RE_ALPHA
|
||||
from ..engines.engines import _RE_ALPHA
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -13,17 +13,17 @@ from typing import Any, Dict, Generator, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import PageRegion, RowGeometry
|
||||
from cv_ocr_engines import (
|
||||
from ..types import PageRegion, RowGeometry
|
||||
from ..engines.engines import (
|
||||
RAPIDOCR_AVAILABLE,
|
||||
_assign_row_words_to_columns,
|
||||
)
|
||||
from cv_cell_grid_helpers import (
|
||||
from .helpers import (
|
||||
_heal_row_gaps,
|
||||
_is_artifact_row,
|
||||
)
|
||||
from cv_cell_grid_build import _ocr_cell_crop
|
||||
from cv_cell_grid_legacy import _ocr_single_cell
|
||||
from .build import _ocr_cell_crop
|
||||
from .legacy import _ocr_single_cell
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -10,13 +10,13 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from cv_ocr_engines import (
|
||||
from ..engines.engines import (
|
||||
_attach_example_sentences,
|
||||
_fix_phonetic_brackets,
|
||||
_split_comma_entries,
|
||||
)
|
||||
from cv_cell_grid_legacy import build_cell_grid
|
||||
from cv_cell_grid_merge import (
|
||||
from .legacy import build_cell_grid
|
||||
from .merge import (
|
||||
_merge_continuation_rows,
|
||||
_merge_phonetic_continuation_rows,
|
||||
_merge_wrapped_rows,
|
||||
|
||||
2
klausur-service/backend/ocr/detect/__init__.py
Normal file
2
klausur-service/backend/ocr/detect/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Detection sub-package (boxes, graphics, colors, syllables, doclayout)."""
|
||||
from .box_detect import * # noqa: F401,F403
|
||||
@@ -21,7 +21,7 @@ from typing import List, Optional, Tuple
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import DetectedBox, PageZone
|
||||
from ..types import DetectedBox, PageZone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -127,7 +127,7 @@ def detect_graphic_elements(
|
||||
backend = os.environ.get("GRAPHIC_DETECT_BACKEND", "auto")
|
||||
if backend in ("doclayout", "auto"):
|
||||
try:
|
||||
from cv_doclayout_detect import detect_layout_regions, is_doclayout_available
|
||||
from .doclayout_detect import detect_layout_regions, is_doclayout_available
|
||||
if is_doclayout_available():
|
||||
regions = detect_layout_regions(img_bgr)
|
||||
if regions:
|
||||
|
||||
2
klausur-service/backend/ocr/detect/syllable/__init__.py
Normal file
2
klausur-service/backend/ocr/detect/syllable/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Syllable detection sub-package."""
|
||||
from .detect import * # noqa: F401,F403
|
||||
@@ -10,7 +10,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
# Core: init, validation, autocorrect
|
||||
from cv_syllable_core import ( # noqa: F401
|
||||
from .core import ( # noqa: F401
|
||||
_IPA_RE,
|
||||
_STOP_WORDS,
|
||||
_get_hyphenators,
|
||||
@@ -23,7 +23,7 @@ from cv_syllable_core import ( # noqa: F401
|
||||
)
|
||||
|
||||
# Merge: gap merging, syllabify, insert
|
||||
from cv_syllable_merge import ( # noqa: F401
|
||||
from .merge import ( # noqa: F401
|
||||
_try_merge_pipe_gaps,
|
||||
merge_word_gaps_in_zones,
|
||||
_try_merge_word_gaps,
|
||||
|
||||
@@ -13,7 +13,7 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_syllable_core import (
|
||||
from .core import (
|
||||
_get_hyphenators,
|
||||
_hyphenate_word,
|
||||
_IPA_RE,
|
||||
|
||||
2
klausur-service/backend/ocr/engines/__init__.py
Normal file
2
klausur-service/backend/ocr/engines/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""OCR engines sub-package."""
|
||||
from .engines import * # noqa: F401,F403
|
||||
@@ -16,7 +16,7 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import PageRegion, RowGeometry
|
||||
from ..types import PageRegion, RowGeometry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -11,14 +11,14 @@ import logging
|
||||
import re
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from cv_vocab_types import IPA_AVAILABLE
|
||||
from ..types import IPA_AVAILABLE
|
||||
|
||||
from cv_ocr_ipa_lookup import (
|
||||
from .ipa_lookup import (
|
||||
_insert_missing_ipa,
|
||||
_replace_phonetics_in_text,
|
||||
_text_has_garbled_ipa,
|
||||
)
|
||||
from cv_ocr_ipa_repair import (
|
||||
from .ipa_repair import (
|
||||
_has_non_dict_trailing,
|
||||
_insert_headword_ipa,
|
||||
_strip_post_bracket_garbled,
|
||||
|
||||
@@ -24,7 +24,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
IPA_AVAILABLE,
|
||||
PageRegion,
|
||||
RowGeometry,
|
||||
@@ -47,7 +47,7 @@ except ImportError:
|
||||
|
||||
# ── Re-exports from sub-modules (backward compatibility) ──────────────────
|
||||
|
||||
from cv_ocr_word_assembly import ( # noqa: F401
|
||||
from .word_assembly import ( # noqa: F401
|
||||
_group_words_into_lines,
|
||||
_words_to_reading_order_lines,
|
||||
_rejoin_hyphenated,
|
||||
@@ -55,7 +55,7 @@ from cv_ocr_word_assembly import ( # noqa: F401
|
||||
_words_to_spaced_text,
|
||||
)
|
||||
|
||||
from cv_ocr_vocab_postprocess import ( # noqa: F401
|
||||
from .vocab_postprocess import ( # noqa: F401
|
||||
_CHAR_CONFUSION_RULES,
|
||||
_DE_INDICATORS_FOR_EN_I,
|
||||
_fix_character_confusion,
|
||||
@@ -66,7 +66,7 @@ from cv_ocr_vocab_postprocess import ( # noqa: F401
|
||||
_attach_example_sentences,
|
||||
)
|
||||
|
||||
from cv_ocr_ipa_lookup import ( # noqa: F401
|
||||
from .ipa_lookup import ( # noqa: F401
|
||||
_PHONETIC_BRACKET_RE,
|
||||
_IPA_CHARS,
|
||||
_MIN_WORD_CONF,
|
||||
@@ -80,20 +80,20 @@ from cv_ocr_ipa_lookup import ( # noqa: F401
|
||||
_insert_missing_ipa,
|
||||
)
|
||||
|
||||
from cv_ocr_ipa_repair import ( # noqa: F401
|
||||
from .ipa_repair import ( # noqa: F401
|
||||
_has_non_dict_trailing,
|
||||
_strip_post_bracket_garbled,
|
||||
fix_ipa_continuation_cell,
|
||||
_insert_headword_ipa,
|
||||
)
|
||||
|
||||
from cv_ocr_cell_phonetics import ( # noqa: F401
|
||||
from .cell_phonetics import ( # noqa: F401
|
||||
fix_cell_phonetics,
|
||||
_has_ipa_gap,
|
||||
_sync_word_boxes_after_ipa_insert,
|
||||
)
|
||||
|
||||
from cv_ocr_cell_filter import ( # noqa: F401
|
||||
from .cell_filter import ( # noqa: F401
|
||||
_RE_REAL_WORD,
|
||||
_RE_ALPHA,
|
||||
_COMMON_SHORT_WORDS,
|
||||
|
||||
@@ -23,7 +23,7 @@ import logging
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
IPA_AVAILABLE,
|
||||
_britfone_dict,
|
||||
_ipa_convert_american,
|
||||
|
||||
@@ -16,8 +16,8 @@ import logging
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from cv_vocab_types import IPA_AVAILABLE
|
||||
from cv_ocr_ipa_lookup import (
|
||||
from ..types import IPA_AVAILABLE
|
||||
from .ipa_lookup import (
|
||||
_lookup_ipa,
|
||||
_GRAMMAR_BRACKET_WORDS,
|
||||
)
|
||||
|
||||
2
klausur-service/backend/ocr/gutter/__init__.py
Normal file
2
klausur-service/backend/ocr/gutter/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Gutter repair sub-package."""
|
||||
from .repair import * # noqa: F401,F403
|
||||
@@ -11,7 +11,7 @@ import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
from cv_gutter_repair_core import (
|
||||
from .core import (
|
||||
_init_spellcheckers,
|
||||
_is_ipa_text,
|
||||
_is_known,
|
||||
|
||||
@@ -10,7 +10,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
# Core: spellchecker, data types, repair helpers
|
||||
from cv_gutter_repair_core import ( # noqa: F401
|
||||
from .core import ( # noqa: F401
|
||||
_init_spellcheckers,
|
||||
_is_known,
|
||||
_spell_candidates,
|
||||
@@ -29,7 +29,7 @@ from cv_gutter_repair_core import ( # noqa: F401
|
||||
)
|
||||
|
||||
# Grid: analysis and application
|
||||
from cv_gutter_repair_grid import ( # noqa: F401
|
||||
from .grid import ( # noqa: F401
|
||||
analyse_grid_for_gutter_repair,
|
||||
apply_gutter_suggestions,
|
||||
)
|
||||
|
||||
@@ -26,7 +26,7 @@ def _lookup_ipa_de(word: str) -> Optional[str]:
|
||||
|
||||
Returns IPA string or None if not found.
|
||||
"""
|
||||
from cv_vocab_types import _de_ipa_dict, _epitran_de, DE_IPA_AVAILABLE
|
||||
from .types import _de_ipa_dict, _epitran_de, DE_IPA_AVAILABLE
|
||||
|
||||
if not DE_IPA_AVAILABLE and _epitran_de is None:
|
||||
return None
|
||||
@@ -109,7 +109,7 @@ def insert_german_ipa(
|
||||
Returns:
|
||||
Number of cells modified.
|
||||
"""
|
||||
from cv_vocab_types import DE_IPA_AVAILABLE, _epitran_de
|
||||
from .types import DE_IPA_AVAILABLE, _epitran_de
|
||||
|
||||
if not DE_IPA_AVAILABLE and _epitran_de is None:
|
||||
logger.warning("German IPA not available — skipping")
|
||||
|
||||
2
klausur-service/backend/ocr/layout/__init__.py
Normal file
2
klausur-service/backend/ocr/layout/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Layout analysis sub-package."""
|
||||
from .layout import * # noqa: F401,F403
|
||||
@@ -13,8 +13,8 @@ from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import PageRegion
|
||||
from cv_layout_detection import _find_content_bounds
|
||||
from ..types import PageRegion
|
||||
from .detection import _find_content_bounds
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -246,7 +246,7 @@ def analyze_layout(layout_img: np.ndarray, ocr_img: np.ndarray) -> List[PageRegi
|
||||
|
||||
# Add header/footer info (gap-based detection with fallback)
|
||||
# Lazy import to avoid circular dependency with cv_layout.py
|
||||
from cv_layout_detection import _add_header_footer
|
||||
from .detection import _add_header_footer
|
||||
_add_header_footer(regions, top_y, bottom_y, w, h, inv=inv)
|
||||
|
||||
top_region = next((r.type for r in regions if r.type in ('header', 'margin_top')), 'none')
|
||||
|
||||
@@ -11,16 +11,16 @@ from typing import Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import ColumnGeometry, PageRegion
|
||||
from ..types import ColumnGeometry, PageRegion
|
||||
|
||||
from cv_layout_scoring import (
|
||||
from .scoring import (
|
||||
_score_language,
|
||||
_score_role,
|
||||
_score_dictionary_signals,
|
||||
_classify_dictionary_columns,
|
||||
)
|
||||
|
||||
from cv_layout_classify_position import (
|
||||
from .classify_position import (
|
||||
_classify_by_position_enhanced,
|
||||
_classify_by_position_fallback,
|
||||
)
|
||||
@@ -211,7 +211,7 @@ def classify_column_types(geometries: List[ColumnGeometry],
|
||||
# _add_header_footer lives in cv_layout (avoids circular import at module
|
||||
# level). Lazy-import here so the module can be tested independently when
|
||||
# cv_layout hasn't been modified yet.
|
||||
from cv_layout_detection import _add_header_footer # noqa: E402
|
||||
from .detection import _add_header_footer # noqa: E402
|
||||
|
||||
content_h = bottom_y - top_y
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ Extracted from cv_layout_classify.py during file-size split.
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from cv_vocab_types import ColumnGeometry, PageRegion
|
||||
from ..types import ColumnGeometry, PageRegion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import ColumnGeometry
|
||||
from ..types import ColumnGeometry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -19,8 +19,8 @@ from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import ColumnGeometry
|
||||
from cv_layout_detection import _find_content_bounds
|
||||
from ..types import ColumnGeometry
|
||||
from .detection import _find_content_bounds
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from typing import List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
DocumentTypeResult,
|
||||
PageRegion,
|
||||
)
|
||||
|
||||
@@ -21,14 +21,14 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import ColumnGeometry, DetectedBox, PageRegion
|
||||
from ..types import ColumnGeometry, DetectedBox, PageRegion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Re-exports (backward compatibility) ───────────────────────────────────
|
||||
|
||||
from cv_layout_detection import ( # noqa: F401
|
||||
from .detection import ( # noqa: F401
|
||||
detect_document_type,
|
||||
create_ocr_image,
|
||||
create_layout_image,
|
||||
@@ -39,46 +39,46 @@ from cv_layout_detection import ( # noqa: F401
|
||||
_add_header_footer,
|
||||
)
|
||||
|
||||
from cv_layout_analyze import ( # noqa: F401
|
||||
from .analyze import ( # noqa: F401
|
||||
analyze_layout,
|
||||
)
|
||||
|
||||
from cv_layout_columns import ( # noqa: F401
|
||||
from .columns import ( # noqa: F401
|
||||
detect_column_geometry,
|
||||
_detect_columns_by_clustering,
|
||||
_build_geometries_from_starts,
|
||||
)
|
||||
|
||||
from cv_layout_column_refine import ( # noqa: F401
|
||||
from .column_refine import ( # noqa: F401
|
||||
_detect_sub_columns,
|
||||
_split_broad_columns,
|
||||
expand_narrow_columns,
|
||||
)
|
||||
|
||||
from cv_layout_rows import ( # noqa: F401
|
||||
from .rows import ( # noqa: F401
|
||||
detect_row_geometry,
|
||||
_build_rows_from_word_grouping,
|
||||
)
|
||||
|
||||
from cv_layout_row_regularize import ( # noqa: F401
|
||||
from .row_regularize import ( # noqa: F401
|
||||
_regularize_row_grid,
|
||||
)
|
||||
|
||||
from cv_layout_scoring import ( # noqa: F401
|
||||
from .scoring import ( # noqa: F401
|
||||
_score_language,
|
||||
_score_role,
|
||||
_score_dictionary_signals,
|
||||
_classify_dictionary_columns,
|
||||
)
|
||||
|
||||
from cv_layout_classify import ( # noqa: F401
|
||||
from .classify import ( # noqa: F401
|
||||
_build_margin_regions,
|
||||
positional_column_regions,
|
||||
classify_column_types,
|
||||
_classify_by_content,
|
||||
)
|
||||
|
||||
from cv_layout_classify_position import ( # noqa: F401
|
||||
from .classify_position import ( # noqa: F401
|
||||
_classify_by_position_enhanced,
|
||||
_classify_by_position_fallback,
|
||||
)
|
||||
@@ -143,7 +143,7 @@ def detect_column_geometry_zoned(
|
||||
per content zone on the corresponding sub-image.
|
||||
4. If no boxes: delegates entirely to detect_column_geometry().
|
||||
"""
|
||||
from cv_box_detect import detect_boxes, split_page_into_zones
|
||||
from ..detect.box_detect import detect_boxes, split_page_into_zones
|
||||
|
||||
geo_result = detect_column_geometry(ocr_img, dewarped_bgr)
|
||||
if geo_result is None:
|
||||
|
||||
@@ -13,7 +13,7 @@ from typing import Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import RowGeometry
|
||||
from ..types import RowGeometry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -20,9 +20,9 @@ try:
|
||||
except ImportError:
|
||||
cv2 = None # type: ignore[assignment]
|
||||
|
||||
from cv_vocab_types import RowGeometry
|
||||
from cv_ocr_word_assembly import _group_words_into_lines
|
||||
from cv_layout_row_regularize import _regularize_row_grid
|
||||
from ..types import RowGeometry
|
||||
from ..engines.word_assembly import _group_words_into_lines
|
||||
from .row_regularize import _regularize_row_grid
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ import logging
|
||||
from collections import Counter
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
ColumnGeometry,
|
||||
ENGLISH_FUNCTION_WORDS,
|
||||
GERMAN_FUNCTION_WORDS,
|
||||
|
||||
@@ -14,24 +14,24 @@ Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
from cv_vocab_types import * # noqa: F401,F403
|
||||
from cv_preprocessing import * # noqa: F401,F403
|
||||
from cv_layout import * # noqa: F401,F403
|
||||
from cv_ocr_engines import * # noqa: F401,F403
|
||||
from cv_cell_grid import * # noqa: F401,F403
|
||||
from cv_box_detect import * # noqa: F401,F403
|
||||
from cv_review import * # noqa: F401,F403
|
||||
from .types import * # noqa: F401,F403
|
||||
from .preprocessing.preprocessing import * # noqa: F401,F403
|
||||
from .layout.layout import * # noqa: F401,F403
|
||||
from .engines.engines import * # noqa: F401,F403
|
||||
from .cell_grid.cell_grid import * # noqa: F401,F403
|
||||
from .detect.box_detect import * # noqa: F401,F403
|
||||
from .review.review import * # noqa: F401,F403
|
||||
|
||||
# Private names used by consumers — not covered by wildcard re-exports.
|
||||
from cv_preprocessing import _apply_shear # noqa: F401
|
||||
from cv_layout import ( # noqa: F401
|
||||
from .preprocessing.preprocessing import _apply_shear # noqa: F401
|
||||
from .layout.layout import ( # noqa: F401
|
||||
_detect_header_footer_gaps,
|
||||
_detect_sub_columns,
|
||||
_split_broad_columns,
|
||||
)
|
||||
from cv_ocr_engines import ( # noqa: F401
|
||||
from .engines.engines import ( # noqa: F401
|
||||
_fix_character_confusion,
|
||||
_fix_phonetic_brackets,
|
||||
)
|
||||
from cv_cell_grid import _cells_to_vocab_entries # noqa: F401
|
||||
from cv_words_first import build_grid_from_words # noqa: F401
|
||||
from .cell_grid.cell_grid import _cells_to_vocab_entries # noqa: F401
|
||||
from .words_first import build_grid_from_words # noqa: F401
|
||||
|
||||
2
klausur-service/backend/ocr/preprocessing/__init__.py
Normal file
2
klausur-service/backend/ocr/preprocessing/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Preprocessing sub-package (deskew, dewarp, image I/O)."""
|
||||
from .preprocessing import * # noqa: F401,F403
|
||||
@@ -11,7 +11,7 @@ from typing import Any, Dict, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
CV2_AVAILABLE,
|
||||
TESSERACT_AVAILABLE,
|
||||
)
|
||||
|
||||
@@ -16,7 +16,7 @@ from typing import Any, Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
CV2_AVAILABLE,
|
||||
TESSERACT_AVAILABLE,
|
||||
)
|
||||
|
||||
@@ -17,7 +17,7 @@ from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
CV2_AVAILABLE,
|
||||
TESSERACT_AVAILABLE,
|
||||
)
|
||||
@@ -38,7 +38,7 @@ except ImportError:
|
||||
Image = None # type: ignore[assignment,misc]
|
||||
|
||||
# Re-export all deskew functions
|
||||
from cv_preprocessing_deskew import ( # noqa: F401
|
||||
from .deskew import ( # noqa: F401
|
||||
deskew_image,
|
||||
deskew_image_by_word_alignment,
|
||||
deskew_image_iterative,
|
||||
@@ -48,7 +48,7 @@ from cv_preprocessing_deskew import ( # noqa: F401
|
||||
)
|
||||
|
||||
# Re-export all dewarp functions
|
||||
from cv_preprocessing_dewarp import ( # noqa: F401
|
||||
from .dewarp import ( # noqa: F401
|
||||
_apply_shear,
|
||||
_detect_shear_angle,
|
||||
_detect_shear_by_hough,
|
||||
|
||||
2
klausur-service/backend/ocr/review/__init__.py
Normal file
2
klausur-service/backend/ocr/review/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Review sub-package (spell, LLM, pipeline orchestration)."""
|
||||
from .review import * # noqa: F401,F403
|
||||
@@ -183,7 +183,7 @@ async def llm_review_entries(
|
||||
model: str = None,
|
||||
) -> Dict:
|
||||
"""OCR error correction. Uses spell-checker (REVIEW_ENGINE=spell) or LLM (REVIEW_ENGINE=llm)."""
|
||||
from cv_review_spell import spell_review_entries_sync, _SPELL_AVAILABLE
|
||||
from .spell import spell_review_entries_sync, _SPELL_AVAILABLE
|
||||
|
||||
if REVIEW_ENGINE == "spell" and _SPELL_AVAILABLE:
|
||||
return spell_review_entries_sync(entries)
|
||||
@@ -260,8 +260,8 @@ async def llm_review_entries_streaming(
|
||||
|
||||
Phase 0 (always): Run _fix_character_confusion and emit any changes.
|
||||
"""
|
||||
from cv_ocr_engines import _fix_character_confusion
|
||||
from cv_review_spell import spell_review_entries_streaming, _SPELL_AVAILABLE
|
||||
from ..engines.engines import _fix_character_confusion
|
||||
from .spell import spell_review_entries_streaming, _SPELL_AVAILABLE
|
||||
|
||||
_CONF_FIELDS = ('english', 'german', 'example')
|
||||
originals = [{f: e.get(f, '') for f in _CONF_FIELDS} for e in entries]
|
||||
|
||||
@@ -13,24 +13,24 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import (
|
||||
from ..types import (
|
||||
CV_PIPELINE_AVAILABLE,
|
||||
PageRegion,
|
||||
PipelineResult,
|
||||
VocabRow,
|
||||
)
|
||||
from cv_preprocessing import (
|
||||
from ..preprocessing.preprocessing import (
|
||||
deskew_image,
|
||||
dewarp_image,
|
||||
render_image_high_res,
|
||||
render_pdf_high_res,
|
||||
)
|
||||
from cv_layout import (
|
||||
from ..layout.layout import (
|
||||
analyze_layout,
|
||||
create_layout_image,
|
||||
create_ocr_image,
|
||||
)
|
||||
from cv_ocr_engines import (
|
||||
from ..engines.engines import (
|
||||
_group_words_into_lines,
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
# Re-export everything for backward compatibility
|
||||
from cv_review_pipeline import ( # noqa: F401
|
||||
from .pipeline import ( # noqa: F401
|
||||
ocr_region,
|
||||
run_multi_pass_ocr,
|
||||
match_lines_to_vocab,
|
||||
@@ -20,7 +20,7 @@ from cv_review_pipeline import ( # noqa: F401
|
||||
run_cv_pipeline,
|
||||
)
|
||||
|
||||
from cv_review_spell import ( # noqa: F401
|
||||
from .spell import ( # noqa: F401
|
||||
_SPELL_AVAILABLE,
|
||||
_spell_dict_knows,
|
||||
_spell_fix_field,
|
||||
@@ -31,7 +31,7 @@ from cv_review_spell import ( # noqa: F401
|
||||
spell_review_entries_streaming,
|
||||
)
|
||||
|
||||
from cv_review_llm import ( # noqa: F401
|
||||
from .llm import ( # noqa: F401
|
||||
OLLAMA_REVIEW_MODEL,
|
||||
REVIEW_ENGINE,
|
||||
_REVIEW_BATCH_SIZE,
|
||||
|
||||
@@ -210,7 +210,7 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
|
||||
Uses SmartSpellChecker for language-aware corrections with context-based
|
||||
disambiguation (a/I), multi-digit substitution, and cross-language guard.
|
||||
"""
|
||||
from cv_review_llm import _entry_needs_review
|
||||
from .llm import _entry_needs_review
|
||||
|
||||
t0 = time.time()
|
||||
changes: List[Dict] = []
|
||||
|
||||
@@ -19,7 +19,7 @@ import re
|
||||
import statistics
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from cv_ocr_engines import (
|
||||
from .engines.engines import (
|
||||
_group_words_into_lines,
|
||||
_words_to_reading_order_text,
|
||||
)
|
||||
|
||||
4
klausur-service/backend/ocr_image_enhance.py
Normal file
4
klausur-service/backend/ocr_image_enhance.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# Backward-compat shim -- module moved to ocr\/image_enhance.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("ocr.image_enhance")
|
||||
Reference in New Issue
Block a user