From cb1be59e468b69363af03c578fbeece2df007a09 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sat, 25 Apr 2026 21:03:54 +0200 Subject: [PATCH] Restructure: Move 47 cv_* files into ocr/ package Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/rules/loc-exceptions.txt | 1 + klausur-service/backend/cv_box_detect.py | 4 ++++ klausur-service/backend/cv_box_layout.py | 4 ++++ klausur-service/backend/cv_cell_grid.py | 4 ++++ klausur-service/backend/cv_cell_grid_build.py | 4 ++++ .../backend/cv_cell_grid_helpers.py | 4 ++++ .../backend/cv_cell_grid_legacy.py | 4 ++++ klausur-service/backend/cv_cell_grid_merge.py | 4 ++++ .../backend/cv_cell_grid_streaming.py | 4 ++++ klausur-service/backend/cv_cell_grid_vocab.py | 4 ++++ klausur-service/backend/cv_color_detect.py | 4 ++++ .../backend/cv_doclayout_detect.py | 4 ++++ klausur-service/backend/cv_graphic_detect.py | 4 ++++ klausur-service/backend/cv_gutter_repair.py | 4 ++++ .../backend/cv_gutter_repair_core.py | 4 ++++ .../backend/cv_gutter_repair_grid.py | 4 ++++ klausur-service/backend/cv_ipa_german.py | 4 ++++ klausur-service/backend/cv_layout.py | 4 ++++ klausur-service/backend/cv_layout_analyze.py | 4 ++++ klausur-service/backend/cv_layout_classify.py | 4 ++++ .../backend/cv_layout_classify_position.py | 4 ++++ .../backend/cv_layout_column_refine.py | 4 ++++ klausur-service/backend/cv_layout_columns.py | 4 ++++ .../backend/cv_layout_detection.py | 4 ++++ .../backend/cv_layout_row_regularize.py | 4 ++++ klausur-service/backend/cv_layout_rows.py | 4 ++++ klausur-service/backend/cv_layout_scoring.py | 4 ++++ klausur-service/backend/cv_ocr_cell_filter.py | 4 ++++ .../backend/cv_ocr_cell_phonetics.py | 4 ++++ klausur-service/backend/cv_ocr_engines.py | 4 ++++ klausur-service/backend/cv_ocr_ipa_lookup.py | 4 ++++ klausur-service/backend/cv_ocr_ipa_repair.py | 4 ++++ .../backend/cv_ocr_vocab_postprocess.py | 4 ++++ .../backend/cv_ocr_word_assembly.py | 4 ++++ klausur-service/backend/cv_preprocessing.py | 4 ++++ .../backend/cv_preprocessing_deskew.py | 4 ++++ .../backend/cv_preprocessing_dewarp.py | 4 ++++ klausur-service/backend/cv_review.py | 4 ++++ klausur-service/backend/cv_review_llm.py | 4 ++++ klausur-service/backend/cv_review_pipeline.py | 4 ++++ klausur-service/backend/cv_review_spell.py | 4 ++++ klausur-service/backend/cv_syllable_core.py | 4 ++++ klausur-service/backend/cv_syllable_detect.py | 4 ++++ klausur-service/backend/cv_syllable_merge.py | 4 ++++ klausur-service/backend/cv_vocab_pipeline.py | 4 ++++ klausur-service/backend/cv_vocab_types.py | 4 ++++ klausur-service/backend/cv_words_first.py | 4 ++++ klausur-service/backend/ocr/__init__.py | 9 +++++++ .../backend/ocr/cell_grid/__init__.py | 2 ++ .../backend/ocr/cell_grid/build.py | 6 ++--- .../backend/ocr/cell_grid/cell_grid.py | 12 +++++----- .../backend/ocr/cell_grid/helpers.py | 2 +- .../backend/ocr/cell_grid/legacy.py | 6 ++--- .../backend/ocr/cell_grid/merge.py | 2 +- .../backend/ocr/cell_grid/streaming.py | 10 ++++---- .../backend/ocr/cell_grid/vocab.py | 6 ++--- .../backend/ocr/detect/__init__.py | 2 ++ .../backend/ocr/detect/box_detect.py | 2 +- .../backend/ocr/detect/graphic_detect.py | 2 +- .../backend/ocr/detect/syllable/__init__.py | 2 ++ .../backend/ocr/detect/syllable/detect.py | 4 ++-- .../backend/ocr/detect/syllable/merge.py | 2 +- .../backend/ocr/engines/__init__.py | 2 ++ .../backend/ocr/engines/cell_filter.py | 2 +- .../backend/ocr/engines/cell_phonetics.py | 6 ++--- .../backend/ocr/engines/engines.py | 14 +++++------ .../backend/ocr/engines/ipa_lookup.py | 2 +- .../backend/ocr/engines/ipa_repair.py | 4 ++-- .../backend/ocr/gutter/__init__.py | 2 ++ klausur-service/backend/ocr/gutter/grid.py | 2 +- klausur-service/backend/ocr/gutter/repair.py | 4 ++-- klausur-service/backend/ocr/ipa_german.py | 4 ++-- .../backend/ocr/layout/__init__.py | 2 ++ klausur-service/backend/ocr/layout/analyze.py | 6 ++--- .../backend/ocr/layout/classify.py | 8 +++---- .../backend/ocr/layout/classify_position.py | 2 +- .../backend/ocr/layout/column_refine.py | 2 +- klausur-service/backend/ocr/layout/columns.py | 4 ++-- .../backend/ocr/layout/detection.py | 2 +- klausur-service/backend/ocr/layout/layout.py | 22 ++++++++--------- .../backend/ocr/layout/row_regularize.py | 2 +- klausur-service/backend/ocr/layout/rows.py | 6 ++--- klausur-service/backend/ocr/layout/scoring.py | 2 +- klausur-service/backend/ocr/pipeline.py | 24 +++++++++---------- .../backend/ocr/preprocessing/__init__.py | 2 ++ .../backend/ocr/preprocessing/deskew.py | 2 +- .../backend/ocr/preprocessing/dewarp.py | 2 +- .../ocr/preprocessing/preprocessing.py | 6 ++--- .../backend/ocr/review/__init__.py | 2 ++ klausur-service/backend/ocr/review/llm.py | 6 ++--- .../backend/ocr/review/pipeline.py | 8 +++---- klausur-service/backend/ocr/review/review.py | 6 ++--- klausur-service/backend/ocr/review/spell.py | 2 +- klausur-service/backend/ocr/words_first.py | 2 +- klausur-service/backend/ocr_image_enhance.py | 4 ++++ 95 files changed, 317 insertions(+), 103 deletions(-) create mode 100644 klausur-service/backend/cv_box_detect.py create mode 100644 klausur-service/backend/cv_box_layout.py create mode 100644 klausur-service/backend/cv_cell_grid.py create mode 100644 klausur-service/backend/cv_cell_grid_build.py create mode 100644 klausur-service/backend/cv_cell_grid_helpers.py create mode 100644 klausur-service/backend/cv_cell_grid_legacy.py create mode 100644 klausur-service/backend/cv_cell_grid_merge.py create mode 100644 klausur-service/backend/cv_cell_grid_streaming.py create mode 100644 klausur-service/backend/cv_cell_grid_vocab.py create mode 100644 klausur-service/backend/cv_color_detect.py create mode 100644 klausur-service/backend/cv_doclayout_detect.py create mode 100644 klausur-service/backend/cv_graphic_detect.py create mode 100644 klausur-service/backend/cv_gutter_repair.py create mode 100644 klausur-service/backend/cv_gutter_repair_core.py create mode 100644 klausur-service/backend/cv_gutter_repair_grid.py create mode 100644 klausur-service/backend/cv_ipa_german.py create mode 100644 klausur-service/backend/cv_layout.py create mode 100644 klausur-service/backend/cv_layout_analyze.py create mode 100644 klausur-service/backend/cv_layout_classify.py create mode 100644 klausur-service/backend/cv_layout_classify_position.py create mode 100644 klausur-service/backend/cv_layout_column_refine.py create mode 100644 klausur-service/backend/cv_layout_columns.py create mode 100644 klausur-service/backend/cv_layout_detection.py create mode 100644 klausur-service/backend/cv_layout_row_regularize.py create mode 100644 klausur-service/backend/cv_layout_rows.py create mode 100644 klausur-service/backend/cv_layout_scoring.py create mode 100644 klausur-service/backend/cv_ocr_cell_filter.py create mode 100644 klausur-service/backend/cv_ocr_cell_phonetics.py create mode 100644 klausur-service/backend/cv_ocr_engines.py create mode 100644 klausur-service/backend/cv_ocr_ipa_lookup.py create mode 100644 klausur-service/backend/cv_ocr_ipa_repair.py create mode 100644 klausur-service/backend/cv_ocr_vocab_postprocess.py create mode 100644 klausur-service/backend/cv_ocr_word_assembly.py create mode 100644 klausur-service/backend/cv_preprocessing.py create mode 100644 klausur-service/backend/cv_preprocessing_deskew.py create mode 100644 klausur-service/backend/cv_preprocessing_dewarp.py create mode 100644 klausur-service/backend/cv_review.py create mode 100644 klausur-service/backend/cv_review_llm.py create mode 100644 klausur-service/backend/cv_review_pipeline.py create mode 100644 klausur-service/backend/cv_review_spell.py create mode 100644 klausur-service/backend/cv_syllable_core.py create mode 100644 klausur-service/backend/cv_syllable_detect.py create mode 100644 klausur-service/backend/cv_syllable_merge.py create mode 100644 klausur-service/backend/cv_vocab_pipeline.py create mode 100644 klausur-service/backend/cv_vocab_types.py create mode 100644 klausur-service/backend/cv_words_first.py create mode 100644 klausur-service/backend/ocr/__init__.py create mode 100644 klausur-service/backend/ocr/cell_grid/__init__.py create mode 100644 klausur-service/backend/ocr/detect/__init__.py create mode 100644 klausur-service/backend/ocr/detect/syllable/__init__.py create mode 100644 klausur-service/backend/ocr/engines/__init__.py create mode 100644 klausur-service/backend/ocr/gutter/__init__.py create mode 100644 klausur-service/backend/ocr/layout/__init__.py create mode 100644 klausur-service/backend/ocr/preprocessing/__init__.py create mode 100644 klausur-service/backend/ocr/review/__init__.py create mode 100644 klausur-service/backend/ocr_image_enhance.py diff --git a/.claude/rules/loc-exceptions.txt b/.claude/rules/loc-exceptions.txt index 7afdc5c..78f0efc 100644 --- a/.claude/rules/loc-exceptions.txt +++ b/.claude/rules/loc-exceptions.txt @@ -27,6 +27,7 @@ # Algorithmic monolith — detect_column_geometry() allein 411 LOC, nicht weiter teilbar **/cv_layout_columns.py | owner=klausur | reason=detect_column_geometry ist eine einzelne 411-LOC Funktion (Whitespace-Gap-Analyse) | review=2026-10-01 +**/ocr/layout/columns.py | owner=klausur | reason=Same file moved to ocr/ package | review=2026-10-01 # Two indivisible route handlers (~230 LOC each) that cannot be split further **/vocab_worksheet_compare_api.py | owner=klausur | reason=compare_ocr_methods (234 LOC) + analyze_grid (255 LOC), each a single cohesive handler | review=2026-10-01 diff --git a/klausur-service/backend/cv_box_detect.py b/klausur-service/backend/cv_box_detect.py new file mode 100644 index 0000000..e4d5c86 --- /dev/null +++ b/klausur-service/backend/cv_box_detect.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/detect\/box_detect.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.detect.box_detect") diff --git a/klausur-service/backend/cv_box_layout.py b/klausur-service/backend/cv_box_layout.py new file mode 100644 index 0000000..8cc7e13 --- /dev/null +++ b/klausur-service/backend/cv_box_layout.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/detect\/box_layout.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.detect.box_layout") diff --git a/klausur-service/backend/cv_cell_grid.py b/klausur-service/backend/cv_cell_grid.py new file mode 100644 index 0000000..f82d614 --- /dev/null +++ b/klausur-service/backend/cv_cell_grid.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/cell_grid\/cell_grid.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.cell_grid") diff --git a/klausur-service/backend/cv_cell_grid_build.py b/klausur-service/backend/cv_cell_grid_build.py new file mode 100644 index 0000000..29fe6b5 --- /dev/null +++ b/klausur-service/backend/cv_cell_grid_build.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/cell_grid\/build.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.build") diff --git a/klausur-service/backend/cv_cell_grid_helpers.py b/klausur-service/backend/cv_cell_grid_helpers.py new file mode 100644 index 0000000..5cdcf18 --- /dev/null +++ b/klausur-service/backend/cv_cell_grid_helpers.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/cell_grid\/helpers.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.helpers") diff --git a/klausur-service/backend/cv_cell_grid_legacy.py b/klausur-service/backend/cv_cell_grid_legacy.py new file mode 100644 index 0000000..17e1436 --- /dev/null +++ b/klausur-service/backend/cv_cell_grid_legacy.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/cell_grid\/legacy.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.legacy") diff --git a/klausur-service/backend/cv_cell_grid_merge.py b/klausur-service/backend/cv_cell_grid_merge.py new file mode 100644 index 0000000..5bc616a --- /dev/null +++ b/klausur-service/backend/cv_cell_grid_merge.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/cell_grid\/merge.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.merge") diff --git a/klausur-service/backend/cv_cell_grid_streaming.py b/klausur-service/backend/cv_cell_grid_streaming.py new file mode 100644 index 0000000..799bbbc --- /dev/null +++ b/klausur-service/backend/cv_cell_grid_streaming.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/cell_grid\/streaming.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.streaming") diff --git a/klausur-service/backend/cv_cell_grid_vocab.py b/klausur-service/backend/cv_cell_grid_vocab.py new file mode 100644 index 0000000..f8c091f --- /dev/null +++ b/klausur-service/backend/cv_cell_grid_vocab.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/cell_grid\/vocab.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.cell_grid.vocab") diff --git a/klausur-service/backend/cv_color_detect.py b/klausur-service/backend/cv_color_detect.py new file mode 100644 index 0000000..1f757b2 --- /dev/null +++ b/klausur-service/backend/cv_color_detect.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/detect\/color_detect.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.detect.color_detect") diff --git a/klausur-service/backend/cv_doclayout_detect.py b/klausur-service/backend/cv_doclayout_detect.py new file mode 100644 index 0000000..be9447a --- /dev/null +++ b/klausur-service/backend/cv_doclayout_detect.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/detect\/doclayout_detect.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.detect.doclayout_detect") diff --git a/klausur-service/backend/cv_graphic_detect.py b/klausur-service/backend/cv_graphic_detect.py new file mode 100644 index 0000000..987e7a0 --- /dev/null +++ b/klausur-service/backend/cv_graphic_detect.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/detect\/graphic_detect.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.detect.graphic_detect") diff --git a/klausur-service/backend/cv_gutter_repair.py b/klausur-service/backend/cv_gutter_repair.py new file mode 100644 index 0000000..05eb58f --- /dev/null +++ b/klausur-service/backend/cv_gutter_repair.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/gutter\/repair.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.gutter.repair") diff --git a/klausur-service/backend/cv_gutter_repair_core.py b/klausur-service/backend/cv_gutter_repair_core.py new file mode 100644 index 0000000..e83ec63 --- /dev/null +++ b/klausur-service/backend/cv_gutter_repair_core.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/gutter\/core.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.gutter.core") diff --git a/klausur-service/backend/cv_gutter_repair_grid.py b/klausur-service/backend/cv_gutter_repair_grid.py new file mode 100644 index 0000000..0b91eef --- /dev/null +++ b/klausur-service/backend/cv_gutter_repair_grid.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/gutter\/grid.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.gutter.grid") diff --git a/klausur-service/backend/cv_ipa_german.py b/klausur-service/backend/cv_ipa_german.py new file mode 100644 index 0000000..4f5664c --- /dev/null +++ b/klausur-service/backend/cv_ipa_german.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/ipa_german.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.ipa_german") diff --git a/klausur-service/backend/cv_layout.py b/klausur-service/backend/cv_layout.py new file mode 100644 index 0000000..082cf52 --- /dev/null +++ b/klausur-service/backend/cv_layout.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/layout.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.layout") diff --git a/klausur-service/backend/cv_layout_analyze.py b/klausur-service/backend/cv_layout_analyze.py new file mode 100644 index 0000000..ef0a16b --- /dev/null +++ b/klausur-service/backend/cv_layout_analyze.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/analyze.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.analyze") diff --git a/klausur-service/backend/cv_layout_classify.py b/klausur-service/backend/cv_layout_classify.py new file mode 100644 index 0000000..4b07f54 --- /dev/null +++ b/klausur-service/backend/cv_layout_classify.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/classify.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.classify") diff --git a/klausur-service/backend/cv_layout_classify_position.py b/klausur-service/backend/cv_layout_classify_position.py new file mode 100644 index 0000000..90a1f64 --- /dev/null +++ b/klausur-service/backend/cv_layout_classify_position.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/classify_position.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.classify_position") diff --git a/klausur-service/backend/cv_layout_column_refine.py b/klausur-service/backend/cv_layout_column_refine.py new file mode 100644 index 0000000..d3abdea --- /dev/null +++ b/klausur-service/backend/cv_layout_column_refine.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/column_refine.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.column_refine") diff --git a/klausur-service/backend/cv_layout_columns.py b/klausur-service/backend/cv_layout_columns.py new file mode 100644 index 0000000..7b1c070 --- /dev/null +++ b/klausur-service/backend/cv_layout_columns.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/columns.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.columns") diff --git a/klausur-service/backend/cv_layout_detection.py b/klausur-service/backend/cv_layout_detection.py new file mode 100644 index 0000000..9bbadde --- /dev/null +++ b/klausur-service/backend/cv_layout_detection.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/detection.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.detection") diff --git a/klausur-service/backend/cv_layout_row_regularize.py b/klausur-service/backend/cv_layout_row_regularize.py new file mode 100644 index 0000000..8d5553c --- /dev/null +++ b/klausur-service/backend/cv_layout_row_regularize.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/row_regularize.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.row_regularize") diff --git a/klausur-service/backend/cv_layout_rows.py b/klausur-service/backend/cv_layout_rows.py new file mode 100644 index 0000000..37c3b83 --- /dev/null +++ b/klausur-service/backend/cv_layout_rows.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/rows.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.rows") diff --git a/klausur-service/backend/cv_layout_scoring.py b/klausur-service/backend/cv_layout_scoring.py new file mode 100644 index 0000000..6c9450a --- /dev/null +++ b/klausur-service/backend/cv_layout_scoring.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/layout\/scoring.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.layout.scoring") diff --git a/klausur-service/backend/cv_ocr_cell_filter.py b/klausur-service/backend/cv_ocr_cell_filter.py new file mode 100644 index 0000000..8b77d49 --- /dev/null +++ b/klausur-service/backend/cv_ocr_cell_filter.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/engines\/cell_filter.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.engines.cell_filter") diff --git a/klausur-service/backend/cv_ocr_cell_phonetics.py b/klausur-service/backend/cv_ocr_cell_phonetics.py new file mode 100644 index 0000000..cd6d5fe --- /dev/null +++ b/klausur-service/backend/cv_ocr_cell_phonetics.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/engines\/cell_phonetics.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.engines.cell_phonetics") diff --git a/klausur-service/backend/cv_ocr_engines.py b/klausur-service/backend/cv_ocr_engines.py new file mode 100644 index 0000000..d272c58 --- /dev/null +++ b/klausur-service/backend/cv_ocr_engines.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/engines\/engines.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.engines.engines") diff --git a/klausur-service/backend/cv_ocr_ipa_lookup.py b/klausur-service/backend/cv_ocr_ipa_lookup.py new file mode 100644 index 0000000..29c2729 --- /dev/null +++ b/klausur-service/backend/cv_ocr_ipa_lookup.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/engines\/ipa_lookup.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.engines.ipa_lookup") diff --git a/klausur-service/backend/cv_ocr_ipa_repair.py b/klausur-service/backend/cv_ocr_ipa_repair.py new file mode 100644 index 0000000..d973356 --- /dev/null +++ b/klausur-service/backend/cv_ocr_ipa_repair.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/engines\/ipa_repair.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.engines.ipa_repair") diff --git a/klausur-service/backend/cv_ocr_vocab_postprocess.py b/klausur-service/backend/cv_ocr_vocab_postprocess.py new file mode 100644 index 0000000..6103a1f --- /dev/null +++ b/klausur-service/backend/cv_ocr_vocab_postprocess.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/engines\/vocab_postprocess.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.engines.vocab_postprocess") diff --git a/klausur-service/backend/cv_ocr_word_assembly.py b/klausur-service/backend/cv_ocr_word_assembly.py new file mode 100644 index 0000000..3dde57b --- /dev/null +++ b/klausur-service/backend/cv_ocr_word_assembly.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/engines\/word_assembly.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.engines.word_assembly") diff --git a/klausur-service/backend/cv_preprocessing.py b/klausur-service/backend/cv_preprocessing.py new file mode 100644 index 0000000..220af51 --- /dev/null +++ b/klausur-service/backend/cv_preprocessing.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/preprocessing\/preprocessing.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.preprocessing.preprocessing") diff --git a/klausur-service/backend/cv_preprocessing_deskew.py b/klausur-service/backend/cv_preprocessing_deskew.py new file mode 100644 index 0000000..3ab60db --- /dev/null +++ b/klausur-service/backend/cv_preprocessing_deskew.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/preprocessing\/deskew.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.preprocessing.deskew") diff --git a/klausur-service/backend/cv_preprocessing_dewarp.py b/klausur-service/backend/cv_preprocessing_dewarp.py new file mode 100644 index 0000000..8291bcd --- /dev/null +++ b/klausur-service/backend/cv_preprocessing_dewarp.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/preprocessing\/dewarp.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.preprocessing.dewarp") diff --git a/klausur-service/backend/cv_review.py b/klausur-service/backend/cv_review.py new file mode 100644 index 0000000..5d89f5b --- /dev/null +++ b/klausur-service/backend/cv_review.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/review\/review.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.review.review") diff --git a/klausur-service/backend/cv_review_llm.py b/klausur-service/backend/cv_review_llm.py new file mode 100644 index 0000000..07e2768 --- /dev/null +++ b/klausur-service/backend/cv_review_llm.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/review\/llm.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.review.llm") diff --git a/klausur-service/backend/cv_review_pipeline.py b/klausur-service/backend/cv_review_pipeline.py new file mode 100644 index 0000000..027d6fa --- /dev/null +++ b/klausur-service/backend/cv_review_pipeline.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/review\/pipeline.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.review.pipeline") diff --git a/klausur-service/backend/cv_review_spell.py b/klausur-service/backend/cv_review_spell.py new file mode 100644 index 0000000..525dbb7 --- /dev/null +++ b/klausur-service/backend/cv_review_spell.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/review\/spell.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.review.spell") diff --git a/klausur-service/backend/cv_syllable_core.py b/klausur-service/backend/cv_syllable_core.py new file mode 100644 index 0000000..8588b20 --- /dev/null +++ b/klausur-service/backend/cv_syllable_core.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/detect\/syllable\/core.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.detect.syllable.core") diff --git a/klausur-service/backend/cv_syllable_detect.py b/klausur-service/backend/cv_syllable_detect.py new file mode 100644 index 0000000..ef088b7 --- /dev/null +++ b/klausur-service/backend/cv_syllable_detect.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/detect\/syllable\/detect.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.detect.syllable.detect") diff --git a/klausur-service/backend/cv_syllable_merge.py b/klausur-service/backend/cv_syllable_merge.py new file mode 100644 index 0000000..4ce25ea --- /dev/null +++ b/klausur-service/backend/cv_syllable_merge.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/detect\/syllable\/merge.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.detect.syllable.merge") diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py new file mode 100644 index 0000000..01a0e6f --- /dev/null +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/pipeline.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.pipeline") diff --git a/klausur-service/backend/cv_vocab_types.py b/klausur-service/backend/cv_vocab_types.py new file mode 100644 index 0000000..60f7231 --- /dev/null +++ b/klausur-service/backend/cv_vocab_types.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/types.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.types") diff --git a/klausur-service/backend/cv_words_first.py b/klausur-service/backend/cv_words_first.py new file mode 100644 index 0000000..3488924 --- /dev/null +++ b/klausur-service/backend/cv_words_first.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/words_first.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.words_first") diff --git a/klausur-service/backend/ocr/__init__.py b/klausur-service/backend/ocr/__init__.py new file mode 100644 index 0000000..36e3203 --- /dev/null +++ b/klausur-service/backend/ocr/__init__.py @@ -0,0 +1,9 @@ +""" +OCR package — restructured from cv_* flat modules. + +Backward-compatible re-exports: consumers can still use +``from cv_layout import ...`` etc. via the shim files in backend/. +""" + +from .types import * # noqa: F401,F403 +from .pipeline import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/cell_grid/__init__.py b/klausur-service/backend/ocr/cell_grid/__init__.py new file mode 100644 index 0000000..0f1cc7f --- /dev/null +++ b/klausur-service/backend/ocr/cell_grid/__init__.py @@ -0,0 +1,2 @@ +"""Cell-grid construction sub-package.""" +from .cell_grid import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/cell_grid/build.py b/klausur-service/backend/ocr/cell_grid/build.py index 9ac0ac5..352935c 100644 --- a/klausur-service/backend/ocr/cell_grid/build.py +++ b/klausur-service/backend/ocr/cell_grid/build.py @@ -10,8 +10,8 @@ from typing import Any, Dict, List, Optional, Tuple import numpy as np -from cv_vocab_types import PageRegion, RowGeometry -from cv_ocr_engines import ( +from ..types import PageRegion, RowGeometry +from ..engines.engines import ( RAPIDOCR_AVAILABLE, _assign_row_words_to_columns, _clean_cell_text, @@ -22,7 +22,7 @@ from cv_ocr_engines import ( ocr_region_rapid, ocr_region_trocr, ) -from cv_cell_grid_helpers import ( +from .helpers import ( _MIN_WORD_CONF, _ensure_minimum_crop_size, _heal_row_gaps, diff --git a/klausur-service/backend/ocr/cell_grid/cell_grid.py b/klausur-service/backend/ocr/cell_grid/cell_grid.py index 466565e..aea1d45 100644 --- a/klausur-service/backend/ocr/cell_grid/cell_grid.py +++ b/klausur-service/backend/ocr/cell_grid/cell_grid.py @@ -16,7 +16,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ # --- Helpers --- -from cv_cell_grid_helpers import ( # noqa: F401 +from .helpers import ( # noqa: F401 _MIN_WORD_CONF, _compute_cell_padding, _ensure_minimum_crop_size, @@ -26,26 +26,26 @@ from cv_cell_grid_helpers import ( # noqa: F401 ) # --- v2 build (current default) --- -from cv_cell_grid_build import ( # noqa: F401 +from .build import ( # noqa: F401 _NARROW_COL_THRESHOLD_PCT, _ocr_cell_crop, build_cell_grid_v2, ) # --- Legacy build (DEPRECATED) --- -from cv_cell_grid_legacy import ( # noqa: F401 +from .legacy import ( # noqa: F401 _ocr_single_cell, build_cell_grid, ) # --- Streaming variants --- -from cv_cell_grid_streaming import ( # noqa: F401 +from .streaming import ( # noqa: F401 build_cell_grid_streaming, build_cell_grid_v2_streaming, ) # --- Row merging --- -from cv_cell_grid_merge import ( # noqa: F401 +from .merge import ( # noqa: F401 _PHONETIC_ONLY_RE, _is_phonetic_only_text, _merge_continuation_rows, @@ -54,7 +54,7 @@ from cv_cell_grid_merge import ( # noqa: F401 ) # --- Vocab extraction --- -from cv_cell_grid_vocab import ( # noqa: F401 +from .vocab import ( # noqa: F401 _cells_to_vocab_entries, build_word_grid, ) diff --git a/klausur-service/backend/ocr/cell_grid/helpers.py b/klausur-service/backend/ocr/cell_grid/helpers.py index f5e41d3..95f7661 100644 --- a/klausur-service/backend/ocr/cell_grid/helpers.py +++ b/klausur-service/backend/ocr/cell_grid/helpers.py @@ -13,7 +13,7 @@ from typing import List import numpy as np -from cv_vocab_types import RowGeometry +from ..types import RowGeometry logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/cell_grid/legacy.py b/klausur-service/backend/ocr/cell_grid/legacy.py index e00df7c..39babfb 100644 --- a/klausur-service/backend/ocr/cell_grid/legacy.py +++ b/klausur-service/backend/ocr/cell_grid/legacy.py @@ -12,8 +12,8 @@ from typing import Any, Dict, List, Optional, Tuple import numpy as np -from cv_vocab_types import PageRegion, RowGeometry -from cv_ocr_engines import ( +from ..types import PageRegion, RowGeometry +from ..engines.engines import ( RAPIDOCR_AVAILABLE, _assign_row_words_to_columns, _clean_cell_text, @@ -22,7 +22,7 @@ from cv_ocr_engines import ( ocr_region_rapid, ocr_region_trocr, ) -from cv_cell_grid_helpers import ( +from .helpers import ( _MIN_WORD_CONF, _compute_cell_padding, _ensure_minimum_crop_size, diff --git a/klausur-service/backend/ocr/cell_grid/merge.py b/klausur-service/backend/ocr/cell_grid/merge.py index a86770e..b39bf84 100644 --- a/klausur-service/backend/ocr/cell_grid/merge.py +++ b/klausur-service/backend/ocr/cell_grid/merge.py @@ -11,7 +11,7 @@ import logging import re from typing import Any, Dict, List -from cv_ocr_engines import _RE_ALPHA +from ..engines.engines import _RE_ALPHA logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/cell_grid/streaming.py b/klausur-service/backend/ocr/cell_grid/streaming.py index 4db3268..0fe8852 100644 --- a/klausur-service/backend/ocr/cell_grid/streaming.py +++ b/klausur-service/backend/ocr/cell_grid/streaming.py @@ -13,17 +13,17 @@ from typing import Any, Dict, Generator, List, Optional, Tuple import numpy as np -from cv_vocab_types import PageRegion, RowGeometry -from cv_ocr_engines import ( +from ..types import PageRegion, RowGeometry +from ..engines.engines import ( RAPIDOCR_AVAILABLE, _assign_row_words_to_columns, ) -from cv_cell_grid_helpers import ( +from .helpers import ( _heal_row_gaps, _is_artifact_row, ) -from cv_cell_grid_build import _ocr_cell_crop -from cv_cell_grid_legacy import _ocr_single_cell +from .build import _ocr_cell_crop +from .legacy import _ocr_single_cell logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/cell_grid/vocab.py b/klausur-service/backend/ocr/cell_grid/vocab.py index d475c33..33c4ce0 100644 --- a/klausur-service/backend/ocr/cell_grid/vocab.py +++ b/klausur-service/backend/ocr/cell_grid/vocab.py @@ -10,13 +10,13 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. import logging from typing import Any, Dict, List -from cv_ocr_engines import ( +from ..engines.engines import ( _attach_example_sentences, _fix_phonetic_brackets, _split_comma_entries, ) -from cv_cell_grid_legacy import build_cell_grid -from cv_cell_grid_merge import ( +from .legacy import build_cell_grid +from .merge import ( _merge_continuation_rows, _merge_phonetic_continuation_rows, _merge_wrapped_rows, diff --git a/klausur-service/backend/ocr/detect/__init__.py b/klausur-service/backend/ocr/detect/__init__.py new file mode 100644 index 0000000..34a368a --- /dev/null +++ b/klausur-service/backend/ocr/detect/__init__.py @@ -0,0 +1,2 @@ +"""Detection sub-package (boxes, graphics, colors, syllables, doclayout).""" +from .box_detect import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/detect/box_detect.py b/klausur-service/backend/ocr/detect/box_detect.py index 1859969..0caade9 100644 --- a/klausur-service/backend/ocr/detect/box_detect.py +++ b/klausur-service/backend/ocr/detect/box_detect.py @@ -21,7 +21,7 @@ from typing import List, Optional, Tuple import cv2 import numpy as np -from cv_vocab_types import DetectedBox, PageZone +from ..types import DetectedBox, PageZone logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/detect/graphic_detect.py b/klausur-service/backend/ocr/detect/graphic_detect.py index 8fcaf16..92cf715 100644 --- a/klausur-service/backend/ocr/detect/graphic_detect.py +++ b/klausur-service/backend/ocr/detect/graphic_detect.py @@ -127,7 +127,7 @@ def detect_graphic_elements( backend = os.environ.get("GRAPHIC_DETECT_BACKEND", "auto") if backend in ("doclayout", "auto"): try: - from cv_doclayout_detect import detect_layout_regions, is_doclayout_available + from .doclayout_detect import detect_layout_regions, is_doclayout_available if is_doclayout_available(): regions = detect_layout_regions(img_bgr) if regions: diff --git a/klausur-service/backend/ocr/detect/syllable/__init__.py b/klausur-service/backend/ocr/detect/syllable/__init__.py new file mode 100644 index 0000000..dfdc401 --- /dev/null +++ b/klausur-service/backend/ocr/detect/syllable/__init__.py @@ -0,0 +1,2 @@ +"""Syllable detection sub-package.""" +from .detect import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/detect/syllable/detect.py b/klausur-service/backend/ocr/detect/syllable/detect.py index fe2b003..963734a 100644 --- a/klausur-service/backend/ocr/detect/syllable/detect.py +++ b/klausur-service/backend/ocr/detect/syllable/detect.py @@ -10,7 +10,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ # Core: init, validation, autocorrect -from cv_syllable_core import ( # noqa: F401 +from .core import ( # noqa: F401 _IPA_RE, _STOP_WORDS, _get_hyphenators, @@ -23,7 +23,7 @@ from cv_syllable_core import ( # noqa: F401 ) # Merge: gap merging, syllabify, insert -from cv_syllable_merge import ( # noqa: F401 +from .merge import ( # noqa: F401 _try_merge_pipe_gaps, merge_word_gaps_in_zones, _try_merge_word_gaps, diff --git a/klausur-service/backend/ocr/detect/syllable/merge.py b/klausur-service/backend/ocr/detect/syllable/merge.py index 3684210..1d0675c 100644 --- a/klausur-service/backend/ocr/detect/syllable/merge.py +++ b/klausur-service/backend/ocr/detect/syllable/merge.py @@ -13,7 +13,7 @@ from typing import Any, Dict, List, Optional import numpy as np -from cv_syllable_core import ( +from .core import ( _get_hyphenators, _hyphenate_word, _IPA_RE, diff --git a/klausur-service/backend/ocr/engines/__init__.py b/klausur-service/backend/ocr/engines/__init__.py new file mode 100644 index 0000000..a8381d6 --- /dev/null +++ b/klausur-service/backend/ocr/engines/__init__.py @@ -0,0 +1,2 @@ +"""OCR engines sub-package.""" +from .engines import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/engines/cell_filter.py b/klausur-service/backend/ocr/engines/cell_filter.py index 643b06a..115ac1e 100644 --- a/klausur-service/backend/ocr/engines/cell_filter.py +++ b/klausur-service/backend/ocr/engines/cell_filter.py @@ -16,7 +16,7 @@ from typing import Any, Dict, List, Optional import numpy as np -from cv_vocab_types import PageRegion, RowGeometry +from ..types import PageRegion, RowGeometry logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/engines/cell_phonetics.py b/klausur-service/backend/ocr/engines/cell_phonetics.py index e063451..3401d9b 100644 --- a/klausur-service/backend/ocr/engines/cell_phonetics.py +++ b/klausur-service/backend/ocr/engines/cell_phonetics.py @@ -11,14 +11,14 @@ import logging import re from typing import Any, Dict, List -from cv_vocab_types import IPA_AVAILABLE +from ..types import IPA_AVAILABLE -from cv_ocr_ipa_lookup import ( +from .ipa_lookup import ( _insert_missing_ipa, _replace_phonetics_in_text, _text_has_garbled_ipa, ) -from cv_ocr_ipa_repair import ( +from .ipa_repair import ( _has_non_dict_trailing, _insert_headword_ipa, _strip_post_bracket_garbled, diff --git a/klausur-service/backend/ocr/engines/engines.py b/klausur-service/backend/ocr/engines/engines.py index dc681f9..9b57001 100644 --- a/klausur-service/backend/ocr/engines/engines.py +++ b/klausur-service/backend/ocr/engines/engines.py @@ -24,7 +24,7 @@ from typing import Any, Dict, List, Optional, Tuple import numpy as np -from cv_vocab_types import ( +from ..types import ( IPA_AVAILABLE, PageRegion, RowGeometry, @@ -47,7 +47,7 @@ except ImportError: # ── Re-exports from sub-modules (backward compatibility) ────────────────── -from cv_ocr_word_assembly import ( # noqa: F401 +from .word_assembly import ( # noqa: F401 _group_words_into_lines, _words_to_reading_order_lines, _rejoin_hyphenated, @@ -55,7 +55,7 @@ from cv_ocr_word_assembly import ( # noqa: F401 _words_to_spaced_text, ) -from cv_ocr_vocab_postprocess import ( # noqa: F401 +from .vocab_postprocess import ( # noqa: F401 _CHAR_CONFUSION_RULES, _DE_INDICATORS_FOR_EN_I, _fix_character_confusion, @@ -66,7 +66,7 @@ from cv_ocr_vocab_postprocess import ( # noqa: F401 _attach_example_sentences, ) -from cv_ocr_ipa_lookup import ( # noqa: F401 +from .ipa_lookup import ( # noqa: F401 _PHONETIC_BRACKET_RE, _IPA_CHARS, _MIN_WORD_CONF, @@ -80,20 +80,20 @@ from cv_ocr_ipa_lookup import ( # noqa: F401 _insert_missing_ipa, ) -from cv_ocr_ipa_repair import ( # noqa: F401 +from .ipa_repair import ( # noqa: F401 _has_non_dict_trailing, _strip_post_bracket_garbled, fix_ipa_continuation_cell, _insert_headword_ipa, ) -from cv_ocr_cell_phonetics import ( # noqa: F401 +from .cell_phonetics import ( # noqa: F401 fix_cell_phonetics, _has_ipa_gap, _sync_word_boxes_after_ipa_insert, ) -from cv_ocr_cell_filter import ( # noqa: F401 +from .cell_filter import ( # noqa: F401 _RE_REAL_WORD, _RE_ALPHA, _COMMON_SHORT_WORDS, diff --git a/klausur-service/backend/ocr/engines/ipa_lookup.py b/klausur-service/backend/ocr/engines/ipa_lookup.py index 079cfbd..73a3f7a 100644 --- a/klausur-service/backend/ocr/engines/ipa_lookup.py +++ b/klausur-service/backend/ocr/engines/ipa_lookup.py @@ -23,7 +23,7 @@ import logging import re from typing import Any, Dict, List, Optional -from cv_vocab_types import ( +from ..types import ( IPA_AVAILABLE, _britfone_dict, _ipa_convert_american, diff --git a/klausur-service/backend/ocr/engines/ipa_repair.py b/klausur-service/backend/ocr/engines/ipa_repair.py index 1d29eb2..4cd4848 100644 --- a/klausur-service/backend/ocr/engines/ipa_repair.py +++ b/klausur-service/backend/ocr/engines/ipa_repair.py @@ -16,8 +16,8 @@ import logging import re from typing import Any, Dict, List, Optional -from cv_vocab_types import IPA_AVAILABLE -from cv_ocr_ipa_lookup import ( +from ..types import IPA_AVAILABLE +from .ipa_lookup import ( _lookup_ipa, _GRAMMAR_BRACKET_WORDS, ) diff --git a/klausur-service/backend/ocr/gutter/__init__.py b/klausur-service/backend/ocr/gutter/__init__.py new file mode 100644 index 0000000..791c1a3 --- /dev/null +++ b/klausur-service/backend/ocr/gutter/__init__.py @@ -0,0 +1,2 @@ +"""Gutter repair sub-package.""" +from .repair import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/gutter/grid.py b/klausur-service/backend/ocr/gutter/grid.py index caf7c0f..6f0f624 100644 --- a/klausur-service/backend/ocr/gutter/grid.py +++ b/klausur-service/backend/ocr/gutter/grid.py @@ -11,7 +11,7 @@ import logging import time from typing import Any, Dict, List, Tuple -from cv_gutter_repair_core import ( +from .core import ( _init_spellcheckers, _is_ipa_text, _is_known, diff --git a/klausur-service/backend/ocr/gutter/repair.py b/klausur-service/backend/ocr/gutter/repair.py index fc6fc6c..4111e91 100644 --- a/klausur-service/backend/ocr/gutter/repair.py +++ b/klausur-service/backend/ocr/gutter/repair.py @@ -10,7 +10,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ # Core: spellchecker, data types, repair helpers -from cv_gutter_repair_core import ( # noqa: F401 +from .core import ( # noqa: F401 _init_spellcheckers, _is_known, _spell_candidates, @@ -29,7 +29,7 @@ from cv_gutter_repair_core import ( # noqa: F401 ) # Grid: analysis and application -from cv_gutter_repair_grid import ( # noqa: F401 +from .grid import ( # noqa: F401 analyse_grid_for_gutter_repair, apply_gutter_suggestions, ) diff --git a/klausur-service/backend/ocr/ipa_german.py b/klausur-service/backend/ocr/ipa_german.py index d834ea3..5fc7dca 100644 --- a/klausur-service/backend/ocr/ipa_german.py +++ b/klausur-service/backend/ocr/ipa_german.py @@ -26,7 +26,7 @@ def _lookup_ipa_de(word: str) -> Optional[str]: Returns IPA string or None if not found. """ - from cv_vocab_types import _de_ipa_dict, _epitran_de, DE_IPA_AVAILABLE + from .types import _de_ipa_dict, _epitran_de, DE_IPA_AVAILABLE if not DE_IPA_AVAILABLE and _epitran_de is None: return None @@ -109,7 +109,7 @@ def insert_german_ipa( Returns: Number of cells modified. """ - from cv_vocab_types import DE_IPA_AVAILABLE, _epitran_de + from .types import DE_IPA_AVAILABLE, _epitran_de if not DE_IPA_AVAILABLE and _epitran_de is None: logger.warning("German IPA not available — skipping") diff --git a/klausur-service/backend/ocr/layout/__init__.py b/klausur-service/backend/ocr/layout/__init__.py new file mode 100644 index 0000000..fef76cc --- /dev/null +++ b/klausur-service/backend/ocr/layout/__init__.py @@ -0,0 +1,2 @@ +"""Layout analysis sub-package.""" +from .layout import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/layout/analyze.py b/klausur-service/backend/ocr/layout/analyze.py index cf4c194..6912d47 100644 --- a/klausur-service/backend/ocr/layout/analyze.py +++ b/klausur-service/backend/ocr/layout/analyze.py @@ -13,8 +13,8 @@ from typing import List import numpy as np -from cv_vocab_types import PageRegion -from cv_layout_detection import _find_content_bounds +from ..types import PageRegion +from .detection import _find_content_bounds logger = logging.getLogger(__name__) @@ -246,7 +246,7 @@ def analyze_layout(layout_img: np.ndarray, ocr_img: np.ndarray) -> List[PageRegi # Add header/footer info (gap-based detection with fallback) # Lazy import to avoid circular dependency with cv_layout.py - from cv_layout_detection import _add_header_footer + from .detection import _add_header_footer _add_header_footer(regions, top_y, bottom_y, w, h, inv=inv) top_region = next((r.type for r in regions if r.type in ('header', 'margin_top')), 'none') diff --git a/klausur-service/backend/ocr/layout/classify.py b/klausur-service/backend/ocr/layout/classify.py index 99803c9..42e6f46 100644 --- a/klausur-service/backend/ocr/layout/classify.py +++ b/klausur-service/backend/ocr/layout/classify.py @@ -11,16 +11,16 @@ from typing import Dict, List, Optional import numpy as np -from cv_vocab_types import ColumnGeometry, PageRegion +from ..types import ColumnGeometry, PageRegion -from cv_layout_scoring import ( +from .scoring import ( _score_language, _score_role, _score_dictionary_signals, _classify_dictionary_columns, ) -from cv_layout_classify_position import ( +from .classify_position import ( _classify_by_position_enhanced, _classify_by_position_fallback, ) @@ -211,7 +211,7 @@ def classify_column_types(geometries: List[ColumnGeometry], # _add_header_footer lives in cv_layout (avoids circular import at module # level). Lazy-import here so the module can be tested independently when # cv_layout hasn't been modified yet. - from cv_layout_detection import _add_header_footer # noqa: E402 + from .detection import _add_header_footer # noqa: E402 content_h = bottom_y - top_y diff --git a/klausur-service/backend/ocr/layout/classify_position.py b/klausur-service/backend/ocr/layout/classify_position.py index 39a4911..81197c9 100644 --- a/klausur-service/backend/ocr/layout/classify_position.py +++ b/klausur-service/backend/ocr/layout/classify_position.py @@ -11,7 +11,7 @@ Extracted from cv_layout_classify.py during file-size split. import logging from typing import Dict, List, Optional -from cv_vocab_types import ColumnGeometry, PageRegion +from ..types import ColumnGeometry, PageRegion logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/layout/column_refine.py b/klausur-service/backend/ocr/layout/column_refine.py index 2b74925..a858c1b 100644 --- a/klausur-service/backend/ocr/layout/column_refine.py +++ b/klausur-service/backend/ocr/layout/column_refine.py @@ -16,7 +16,7 @@ from typing import Dict, List, Optional, Tuple import numpy as np -from cv_vocab_types import ColumnGeometry +from ..types import ColumnGeometry logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/layout/columns.py b/klausur-service/backend/ocr/layout/columns.py index da66621..b7d773e 100644 --- a/klausur-service/backend/ocr/layout/columns.py +++ b/klausur-service/backend/ocr/layout/columns.py @@ -19,8 +19,8 @@ from typing import Dict, List, Optional, Tuple import numpy as np -from cv_vocab_types import ColumnGeometry -from cv_layout_detection import _find_content_bounds +from ..types import ColumnGeometry +from .detection import _find_content_bounds logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/layout/detection.py b/klausur-service/backend/ocr/layout/detection.py index 77f9113..570cb30 100644 --- a/klausur-service/backend/ocr/layout/detection.py +++ b/klausur-service/backend/ocr/layout/detection.py @@ -13,7 +13,7 @@ from typing import List, Optional, Tuple import numpy as np -from cv_vocab_types import ( +from ..types import ( DocumentTypeResult, PageRegion, ) diff --git a/klausur-service/backend/ocr/layout/layout.py b/klausur-service/backend/ocr/layout/layout.py index ab5e061..37a7b84 100644 --- a/klausur-service/backend/ocr/layout/layout.py +++ b/klausur-service/backend/ocr/layout/layout.py @@ -21,14 +21,14 @@ from typing import Any, Dict, List, Optional, Tuple import numpy as np -from cv_vocab_types import ColumnGeometry, DetectedBox, PageRegion +from ..types import ColumnGeometry, DetectedBox, PageRegion logger = logging.getLogger(__name__) # ── Re-exports (backward compatibility) ─────────────────────────────────── -from cv_layout_detection import ( # noqa: F401 +from .detection import ( # noqa: F401 detect_document_type, create_ocr_image, create_layout_image, @@ -39,46 +39,46 @@ from cv_layout_detection import ( # noqa: F401 _add_header_footer, ) -from cv_layout_analyze import ( # noqa: F401 +from .analyze import ( # noqa: F401 analyze_layout, ) -from cv_layout_columns import ( # noqa: F401 +from .columns import ( # noqa: F401 detect_column_geometry, _detect_columns_by_clustering, _build_geometries_from_starts, ) -from cv_layout_column_refine import ( # noqa: F401 +from .column_refine import ( # noqa: F401 _detect_sub_columns, _split_broad_columns, expand_narrow_columns, ) -from cv_layout_rows import ( # noqa: F401 +from .rows import ( # noqa: F401 detect_row_geometry, _build_rows_from_word_grouping, ) -from cv_layout_row_regularize import ( # noqa: F401 +from .row_regularize import ( # noqa: F401 _regularize_row_grid, ) -from cv_layout_scoring import ( # noqa: F401 +from .scoring import ( # noqa: F401 _score_language, _score_role, _score_dictionary_signals, _classify_dictionary_columns, ) -from cv_layout_classify import ( # noqa: F401 +from .classify import ( # noqa: F401 _build_margin_regions, positional_column_regions, classify_column_types, _classify_by_content, ) -from cv_layout_classify_position import ( # noqa: F401 +from .classify_position import ( # noqa: F401 _classify_by_position_enhanced, _classify_by_position_fallback, ) @@ -143,7 +143,7 @@ def detect_column_geometry_zoned( per content zone on the corresponding sub-image. 4. If no boxes: delegates entirely to detect_column_geometry(). """ - from cv_box_detect import detect_boxes, split_page_into_zones + from ..detect.box_detect import detect_boxes, split_page_into_zones geo_result = detect_column_geometry(ocr_img, dewarped_bgr) if geo_result is None: diff --git a/klausur-service/backend/ocr/layout/row_regularize.py b/klausur-service/backend/ocr/layout/row_regularize.py index 4c09199..458997f 100644 --- a/klausur-service/backend/ocr/layout/row_regularize.py +++ b/klausur-service/backend/ocr/layout/row_regularize.py @@ -13,7 +13,7 @@ from typing import Dict, List import numpy as np -from cv_vocab_types import RowGeometry +from ..types import RowGeometry logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/layout/rows.py b/klausur-service/backend/ocr/layout/rows.py index baa5b73..9f95e3b 100644 --- a/klausur-service/backend/ocr/layout/rows.py +++ b/klausur-service/backend/ocr/layout/rows.py @@ -20,9 +20,9 @@ try: except ImportError: cv2 = None # type: ignore[assignment] -from cv_vocab_types import RowGeometry -from cv_ocr_word_assembly import _group_words_into_lines -from cv_layout_row_regularize import _regularize_row_grid +from ..types import RowGeometry +from ..engines.word_assembly import _group_words_into_lines +from .row_regularize import _regularize_row_grid logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/layout/scoring.py b/klausur-service/backend/ocr/layout/scoring.py index c72175d..d5acd99 100644 --- a/klausur-service/backend/ocr/layout/scoring.py +++ b/klausur-service/backend/ocr/layout/scoring.py @@ -11,7 +11,7 @@ import logging from collections import Counter from typing import Any, Dict, List, Optional -from cv_vocab_types import ( +from ..types import ( ColumnGeometry, ENGLISH_FUNCTION_WORDS, GERMAN_FUNCTION_WORDS, diff --git a/klausur-service/backend/ocr/pipeline.py b/klausur-service/backend/ocr/pipeline.py index a40b056..67aa530 100644 --- a/klausur-service/backend/ocr/pipeline.py +++ b/klausur-service/backend/ocr/pipeline.py @@ -14,24 +14,24 @@ Lizenz: Apache 2.0 (kommerziell nutzbar) DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ -from cv_vocab_types import * # noqa: F401,F403 -from cv_preprocessing import * # noqa: F401,F403 -from cv_layout import * # noqa: F401,F403 -from cv_ocr_engines import * # noqa: F401,F403 -from cv_cell_grid import * # noqa: F401,F403 -from cv_box_detect import * # noqa: F401,F403 -from cv_review import * # noqa: F401,F403 +from .types import * # noqa: F401,F403 +from .preprocessing.preprocessing import * # noqa: F401,F403 +from .layout.layout import * # noqa: F401,F403 +from .engines.engines import * # noqa: F401,F403 +from .cell_grid.cell_grid import * # noqa: F401,F403 +from .detect.box_detect import * # noqa: F401,F403 +from .review.review import * # noqa: F401,F403 # Private names used by consumers — not covered by wildcard re-exports. -from cv_preprocessing import _apply_shear # noqa: F401 -from cv_layout import ( # noqa: F401 +from .preprocessing.preprocessing import _apply_shear # noqa: F401 +from .layout.layout import ( # noqa: F401 _detect_header_footer_gaps, _detect_sub_columns, _split_broad_columns, ) -from cv_ocr_engines import ( # noqa: F401 +from .engines.engines import ( # noqa: F401 _fix_character_confusion, _fix_phonetic_brackets, ) -from cv_cell_grid import _cells_to_vocab_entries # noqa: F401 -from cv_words_first import build_grid_from_words # noqa: F401 +from .cell_grid.cell_grid import _cells_to_vocab_entries # noqa: F401 +from .words_first import build_grid_from_words # noqa: F401 diff --git a/klausur-service/backend/ocr/preprocessing/__init__.py b/klausur-service/backend/ocr/preprocessing/__init__.py new file mode 100644 index 0000000..a902951 --- /dev/null +++ b/klausur-service/backend/ocr/preprocessing/__init__.py @@ -0,0 +1,2 @@ +"""Preprocessing sub-package (deskew, dewarp, image I/O).""" +from .preprocessing import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/preprocessing/deskew.py b/klausur-service/backend/ocr/preprocessing/deskew.py index 1bdb27e..72d1b2e 100644 --- a/klausur-service/backend/ocr/preprocessing/deskew.py +++ b/klausur-service/backend/ocr/preprocessing/deskew.py @@ -11,7 +11,7 @@ from typing import Any, Dict, Tuple import numpy as np -from cv_vocab_types import ( +from ..types import ( CV2_AVAILABLE, TESSERACT_AVAILABLE, ) diff --git a/klausur-service/backend/ocr/preprocessing/dewarp.py b/klausur-service/backend/ocr/preprocessing/dewarp.py index 640c87c..6639ac6 100644 --- a/klausur-service/backend/ocr/preprocessing/dewarp.py +++ b/klausur-service/backend/ocr/preprocessing/dewarp.py @@ -16,7 +16,7 @@ from typing import Any, Dict, List, Tuple import numpy as np -from cv_vocab_types import ( +from ..types import ( CV2_AVAILABLE, TESSERACT_AVAILABLE, ) diff --git a/klausur-service/backend/ocr/preprocessing/preprocessing.py b/klausur-service/backend/ocr/preprocessing/preprocessing.py index 0cb2841..c80e6cd 100644 --- a/klausur-service/backend/ocr/preprocessing/preprocessing.py +++ b/klausur-service/backend/ocr/preprocessing/preprocessing.py @@ -17,7 +17,7 @@ from typing import Tuple import numpy as np -from cv_vocab_types import ( +from ..types import ( CV2_AVAILABLE, TESSERACT_AVAILABLE, ) @@ -38,7 +38,7 @@ except ImportError: Image = None # type: ignore[assignment,misc] # Re-export all deskew functions -from cv_preprocessing_deskew import ( # noqa: F401 +from .deskew import ( # noqa: F401 deskew_image, deskew_image_by_word_alignment, deskew_image_iterative, @@ -48,7 +48,7 @@ from cv_preprocessing_deskew import ( # noqa: F401 ) # Re-export all dewarp functions -from cv_preprocessing_dewarp import ( # noqa: F401 +from .dewarp import ( # noqa: F401 _apply_shear, _detect_shear_angle, _detect_shear_by_hough, diff --git a/klausur-service/backend/ocr/review/__init__.py b/klausur-service/backend/ocr/review/__init__.py new file mode 100644 index 0000000..7ff12c9 --- /dev/null +++ b/klausur-service/backend/ocr/review/__init__.py @@ -0,0 +1,2 @@ +"""Review sub-package (spell, LLM, pipeline orchestration).""" +from .review import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/review/llm.py b/klausur-service/backend/ocr/review/llm.py index dc3b288..7369a4b 100644 --- a/klausur-service/backend/ocr/review/llm.py +++ b/klausur-service/backend/ocr/review/llm.py @@ -183,7 +183,7 @@ async def llm_review_entries( model: str = None, ) -> Dict: """OCR error correction. Uses spell-checker (REVIEW_ENGINE=spell) or LLM (REVIEW_ENGINE=llm).""" - from cv_review_spell import spell_review_entries_sync, _SPELL_AVAILABLE + from .spell import spell_review_entries_sync, _SPELL_AVAILABLE if REVIEW_ENGINE == "spell" and _SPELL_AVAILABLE: return spell_review_entries_sync(entries) @@ -260,8 +260,8 @@ async def llm_review_entries_streaming( Phase 0 (always): Run _fix_character_confusion and emit any changes. """ - from cv_ocr_engines import _fix_character_confusion - from cv_review_spell import spell_review_entries_streaming, _SPELL_AVAILABLE + from ..engines.engines import _fix_character_confusion + from .spell import spell_review_entries_streaming, _SPELL_AVAILABLE _CONF_FIELDS = ('english', 'german', 'example') originals = [{f: e.get(f, '') for f in _CONF_FIELDS} for e in entries] diff --git a/klausur-service/backend/ocr/review/pipeline.py b/klausur-service/backend/ocr/review/pipeline.py index 746b45c..8802a72 100644 --- a/klausur-service/backend/ocr/review/pipeline.py +++ b/klausur-service/backend/ocr/review/pipeline.py @@ -13,24 +13,24 @@ from typing import Any, Dict, List, Optional import numpy as np -from cv_vocab_types import ( +from ..types import ( CV_PIPELINE_AVAILABLE, PageRegion, PipelineResult, VocabRow, ) -from cv_preprocessing import ( +from ..preprocessing.preprocessing import ( deskew_image, dewarp_image, render_image_high_res, render_pdf_high_res, ) -from cv_layout import ( +from ..layout.layout import ( analyze_layout, create_layout_image, create_ocr_image, ) -from cv_ocr_engines import ( +from ..engines.engines import ( _group_words_into_lines, ) diff --git a/klausur-service/backend/ocr/review/review.py b/klausur-service/backend/ocr/review/review.py index 217e463..22d79c1 100644 --- a/klausur-service/backend/ocr/review/review.py +++ b/klausur-service/backend/ocr/review/review.py @@ -12,7 +12,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ # Re-export everything for backward compatibility -from cv_review_pipeline import ( # noqa: F401 +from .pipeline import ( # noqa: F401 ocr_region, run_multi_pass_ocr, match_lines_to_vocab, @@ -20,7 +20,7 @@ from cv_review_pipeline import ( # noqa: F401 run_cv_pipeline, ) -from cv_review_spell import ( # noqa: F401 +from .spell import ( # noqa: F401 _SPELL_AVAILABLE, _spell_dict_knows, _spell_fix_field, @@ -31,7 +31,7 @@ from cv_review_spell import ( # noqa: F401 spell_review_entries_streaming, ) -from cv_review_llm import ( # noqa: F401 +from .llm import ( # noqa: F401 OLLAMA_REVIEW_MODEL, REVIEW_ENGINE, _REVIEW_BATCH_SIZE, diff --git a/klausur-service/backend/ocr/review/spell.py b/klausur-service/backend/ocr/review/spell.py index 5398a21..3166b26 100644 --- a/klausur-service/backend/ocr/review/spell.py +++ b/klausur-service/backend/ocr/review/spell.py @@ -210,7 +210,7 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict: Uses SmartSpellChecker for language-aware corrections with context-based disambiguation (a/I), multi-digit substitution, and cross-language guard. """ - from cv_review_llm import _entry_needs_review + from .llm import _entry_needs_review t0 = time.time() changes: List[Dict] = [] diff --git a/klausur-service/backend/ocr/words_first.py b/klausur-service/backend/ocr/words_first.py index a4756e6..6e787b0 100644 --- a/klausur-service/backend/ocr/words_first.py +++ b/klausur-service/backend/ocr/words_first.py @@ -19,7 +19,7 @@ import re import statistics from typing import Any, Dict, List, Optional, Tuple -from cv_ocr_engines import ( +from .engines.engines import ( _group_words_into_lines, _words_to_reading_order_text, ) diff --git a/klausur-service/backend/ocr_image_enhance.py b/klausur-service/backend/ocr_image_enhance.py new file mode 100644 index 0000000..d154675 --- /dev/null +++ b/klausur-service/backend/ocr_image_enhance.py @@ -0,0 +1,4 @@ +# Backward-compat shim -- module moved to ocr\/image_enhance.py +import importlib as _importlib +import sys as _sys +_sys.modules[__name__] = _importlib.import_module("ocr.image_enhance")