diff --git a/backend-lehrer/messenger/conversations.py b/backend-lehrer/messenger/conversations.py index b58e358..40c56c0 100644 --- a/backend-lehrer/messenger/conversations.py +++ b/backend-lehrer/messenger/conversations.py @@ -266,7 +266,7 @@ async def send_message(conversation_id: str, message: MessageBase): if contact and contact.get("email"): try: - from email_service import email_service + from services.email import email_service result = email_service.send_messenger_notification( to_email=contact["email"], diff --git a/backend-lehrer/units/learning_api.py b/backend-lehrer/units/learning_api.py index a5afc5a..15cf502 100644 --- a/backend-lehrer/units/learning_api.py +++ b/backend-lehrer/units/learning_api.py @@ -363,7 +363,7 @@ def api_generate_story(unit_id: str, payload: StoryGeneratePayload): raise HTTPException(status_code=404, detail="Lerneinheit nicht gefunden.") try: - from story_generator import generate_story + from services.story_generator import generate_story result = generate_story( vocabulary=payload.vocabulary, language=payload.language, diff --git a/backend-lehrer/vocabulary/api.py b/backend-lehrer/vocabulary/api.py index eb93f93..7045dd5 100644 --- a/backend-lehrer/vocabulary/api.py +++ b/backend-lehrer/vocabulary/api.py @@ -22,7 +22,7 @@ from .db import ( get_all_pos, VocabularyWord, ) -from learning_units import ( +from units.learning import ( LearningUnitCreate, create_learning_unit, get_learning_unit, @@ -343,7 +343,7 @@ async def api_translate_words(payload: TranslateRequest): Uses local LLM (Ollama) for translation. Results are cached in the vocabulary_words.translations JSONB field. """ - from translation_service import translate_and_store + from services.translation import translate_and_store if payload.target_language not in {"tr", "ar", "uk", "ru", "pl", "fr", "es"}: raise HTTPException(status_code=400, detail=f"Sprache '{payload.target_language}' nicht unterstuetzt") diff --git a/klausur-service/backend/admin/__init__.py b/klausur-service/backend/admin/__init__.py index d83fbe3..73bd990 100644 --- a/klausur-service/backend/admin/__init__.py +++ b/klausur-service/backend/admin/__init__.py @@ -2,5 +2,5 @@ admin package — admin APIs for NiBiS, RAG, templates. Backward-compatible re-exports: consumers can still use -``from admin_api import ...`` etc. via the shim files in backend/. +``from admin.api import ...`` etc. via the shim files in backend/. """ diff --git a/klausur-service/backend/admin/api.py b/klausur-service/backend/admin/api.py index 3bc3de9..6050199 100644 --- a/klausur-service/backend/admin/api.py +++ b/klausur-service/backend/admin/api.py @@ -7,7 +7,7 @@ This module was split into: - admin_templates.py (Legal templates ingestion, search) The `router` object is assembled here by including all sub-routers. -Importers that did `from admin_api import router` continue to work. +Importers that did `from admin.api import router` continue to work. """ from fastapi import APIRouter diff --git a/klausur-service/backend/admin/rag.py b/klausur-service/backend/admin/rag.py index 8d50b70..b21f7d3 100644 --- a/klausur-service/backend/admin/rag.py +++ b/klausur-service/backend/admin/rag.py @@ -28,7 +28,7 @@ except ImportError: MINIO_AVAILABLE = False try: - from metrics_db import ( + from metrics.db import ( init_metrics_tables, store_feedback, log_search, log_upload, calculate_metrics, get_recent_feedback, get_upload_history ) diff --git a/klausur-service/backend/compliance/__init__.py b/klausur-service/backend/compliance/__init__.py index 5cc742d..c49d05f 100644 --- a/klausur-service/backend/compliance/__init__.py +++ b/klausur-service/backend/compliance/__init__.py @@ -2,5 +2,5 @@ compliance package — compliance pipeline, RBAC/ABAC policy engine. Backward-compatible re-exports: consumers can still use -``from compliance_models import ...`` etc. via the shim files in backend/. +``from compliance.models import ...`` etc. via the shim files in backend/. """ diff --git a/klausur-service/backend/grid/__init__.py b/klausur-service/backend/grid/__init__.py index 51be601..958449c 100644 --- a/klausur-service/backend/grid/__init__.py +++ b/klausur-service/backend/grid/__init__.py @@ -2,7 +2,7 @@ Grid package — restructured from grid_* flat modules. Backward-compatible re-exports: consumers can still use -``from grid_build_core import ...`` etc. via the shim files in backend/. +``from grid.build.core import ...`` etc. via the shim files in backend/. Sub-packages: - grid.build — grid construction pipeline (_build_grid_core and phases) diff --git a/klausur-service/backend/grid/build/cell_ops.py b/klausur-service/backend/grid/build/cell_ops.py index 57bc721..f309519 100644 --- a/klausur-service/backend/grid/build/cell_ops.py +++ b/klausur-service/backend/grid/build/cell_ops.py @@ -9,7 +9,7 @@ import logging import re from typing import Any, Dict, List, Tuple -from cv_ocr_engines import ( +from ocr.engines.engines import ( _words_to_reading_order_text, _group_words_into_lines, _lookup_ipa, ) diff --git a/klausur-service/backend/grid/build/cleanup.py b/klausur-service/backend/grid/build/cleanup.py index 39a60d8..62f190d 100644 --- a/klausur-service/backend/grid/build/cleanup.py +++ b/klausur-service/backend/grid/build/cleanup.py @@ -10,7 +10,7 @@ import logging import re from typing import Any, Dict, List -from cv_ocr_engines import _words_to_reading_order_text +from ocr.engines.engines import _words_to_reading_order_text logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/grid/build/finalize.py b/klausur-service/backend/grid/build/finalize.py index 857df5f..7f8b736 100644 --- a/klausur-service/backend/grid/build/finalize.py +++ b/klausur-service/backend/grid/build/finalize.py @@ -69,14 +69,14 @@ def _finalize_grid( # --- Word-gap merge --- try: - from cv_syllable_detect import merge_word_gaps_in_zones + from ocr.detect.syllable.detect import merge_word_gaps_in_zones merge_word_gaps_in_zones(zones_data, session_id) except Exception as e: logger.warning("Word-gap merge failed: %s", e) # --- Pipe auto-correction --- try: - from cv_syllable_detect import autocorrect_pipe_artifacts + from ocr.detect.syllable.detect import autocorrect_pipe_artifacts autocorrect_pipe_artifacts(zones_data, session_id) except Exception as e: logger.warning("Pipe autocorrect failed: %s", e) @@ -132,10 +132,10 @@ def _detect_dictionary( margin_strip_detected: bool, ) -> Dict[str, Any]: """Run dictionary detection on the assembled grid.""" - from cv_layout import _score_dictionary_signals + from ocr.layout.layout import _score_dictionary_signals dict_detection: Dict[str, Any] = {"is_dictionary": False, "confidence": 0.0} try: - from cv_vocab_types import ColumnGeometry + from ocr.types import ColumnGeometry for z in zones_data: zone_cells = z.get("cells", []) zone_cols = z.get("columns", []) @@ -222,7 +222,7 @@ def _insert_syllable_dividers( if _syllable_eligible: try: - from cv_syllable_detect import insert_syllable_dividers + from ocr.detect.syllable.detect import insert_syllable_dividers force_syllables = (syllable_mode in ("all", "de", "en")) syllable_insertions = insert_syllable_dividers( zones_data, img_bgr, session_id, @@ -241,7 +241,7 @@ def _split_merged_words( ) -> None: """Split merged words using dictionary lookup.""" try: - from cv_review import _try_split_merged_word, _SPELL_AVAILABLE + from ocr.review.review import _try_split_merged_word, _SPELL_AVAILABLE if not _SPELL_AVAILABLE: return split_count = 0 @@ -307,7 +307,7 @@ def _run_spell_checker( ) -> None: """Run SmartSpellChecker on all cells.""" try: - from smart_spell import SmartSpellChecker + from ocr.spell.smart_spell import SmartSpellChecker _ssc = SmartSpellChecker() spell_fix_count = 0 diff --git a/klausur-service/backend/grid/build/text_ops.py b/klausur-service/backend/grid/build/text_ops.py index dc0bf9c..1be481a 100644 --- a/klausur-service/backend/grid/build/text_ops.py +++ b/klausur-service/backend/grid/build/text_ops.py @@ -10,8 +10,8 @@ import logging import re from typing import Any, Dict, List, Optional, Set, Tuple -from cv_color_detect import detect_word_colors -from cv_ocr_engines import ( +from ocr.detect.color_detect import detect_word_colors +from ocr.engines.engines import ( fix_cell_phonetics, fix_ipa_continuation_cell, _text_has_garbled_ipa, _lookup_ipa, ) @@ -207,7 +207,7 @@ def _run_ipa_correction( # --- German IPA (wiki-pronunciation-dict + epitran) --- if de_ipa_target_cols: - from cv_ipa_german import insert_german_ipa + from ocr.ipa_german import insert_german_ipa insert_german_ipa(all_cells, de_ipa_target_cols) ipa_target_cols = en_ipa_target_cols | de_ipa_target_cols diff --git a/klausur-service/backend/grid/build/zones.py b/klausur-service/backend/grid/build/zones.py index 8616e3a..aaed497 100644 --- a/klausur-service/backend/grid/build/zones.py +++ b/klausur-service/backend/grid/build/zones.py @@ -11,11 +11,11 @@ from typing import Any, Dict, List, Optional import cv2 import numpy as np -from cv_box_detect import detect_boxes, split_page_into_zones -from cv_graphic_detect import detect_graphic_elements -from cv_color_detect import recover_colored_text -from cv_vocab_types import PageZone -from ocr_pipeline_session_store import get_session_image +from ocr.detect.box_detect import detect_boxes, split_page_into_zones +from ocr.detect.graphic_detect import detect_graphic_elements +from ocr.detect.color_detect import recover_colored_text +from ocr.types import PageZone +from ocr.pipeline.session_store import get_session_image from grid.editor.filters import ( _filter_border_strip_words, diff --git a/klausur-service/backend/grid/editor/api.py b/klausur-service/backend/grid/editor/api.py index aef3113..30cce87 100644 --- a/klausur-service/backend/grid/editor/api.py +++ b/klausur-service/backend/grid/editor/api.py @@ -8,7 +8,7 @@ The actual endpoints live in: - grid_editor_api_unified.py (build-unified-grid, unified-grid) This module re-exports the combined router and key symbols so that -existing `from grid_editor_api import router` / `from grid_editor_api import _build_grid_core` +existing `from grid.editor.api import router` / `from grid.editor.api import _build_grid_core` continue to work unchanged. """ @@ -20,7 +20,7 @@ from .api_box import router as _box_router from .api_unified import router as _unified_router # Re-export _build_grid_core so callers that do -# `from grid_editor_api import _build_grid_core` keep working. +# `from grid.editor.api import _build_grid_core` keep working. from grid.build.core import _build_grid_core # noqa: F401 # Merge all sub-routers into one combined router diff --git a/klausur-service/backend/grid/editor/api_box.py b/klausur-service/backend/grid/editor/api_box.py index 94a5d0a..179a0fe 100644 --- a/klausur-service/backend/grid/editor/api_box.py +++ b/klausur-service/backend/grid/editor/api_box.py @@ -7,7 +7,7 @@ import logging from fastapi import APIRouter, HTTPException, Request from .filters import _words_in_zone -from ocr_pipeline_session_store import ( +from ocr.pipeline.session_store import ( get_session_db, update_session_db, ) @@ -76,7 +76,7 @@ async def build_box_grids(session_id: str, request: Request): pass layout_overrides = body.get("overrides", {}) - from cv_box_layout import build_box_zone_grid + from ocr.detect.box_layout import build_box_zone_grid img_w = grid_data.get("image_width", 0) or word_result.get("image_width", 0) img_h = grid_data.get("image_height", 0) or word_result.get("image_height", 0) @@ -119,7 +119,7 @@ async def build_box_grids(session_id: str, request: Request): # Apply SmartSpellChecker to all box cells try: - from smart_spell import SmartSpellChecker + from ocr.spell.smart_spell import SmartSpellChecker ssc = SmartSpellChecker() for cell in box_grid.get("cells", []): text = cell.get("text", "") diff --git a/klausur-service/backend/grid/editor/api_grid.py b/klausur-service/backend/grid/editor/api_grid.py index 685d789..288b49b 100644 --- a/klausur-service/backend/grid/editor/api_grid.py +++ b/klausur-service/backend/grid/editor/api_grid.py @@ -7,11 +7,11 @@ import logging from fastapi import APIRouter, HTTPException, Query, Request from grid.build.core import _build_grid_core -from ocr_pipeline_session_store import ( +from ocr.pipeline.session_store import ( get_session_db, update_session_db, ) -from ocr_pipeline_common import ( +from ocr.pipeline.common import ( _cache, _load_session_to_cache, _get_cached, @@ -60,7 +60,7 @@ async def build_grid( # Save automatic grid snapshot for later comparison with manual corrections # Lazy import to avoid circular dependency with ocr_pipeline_regression - from ocr_pipeline_regression import _build_reference_snapshot + from ocr.pipeline.regression import _build_reference_snapshot wr = session.get("word_result") or {} engine = wr.get("ocr_engine", "") @@ -134,7 +134,7 @@ async def rerun_ocr_and_build_grid( # 2. Scan quality assessment scan_quality_info = {} try: - from scan_quality import score_scan_quality + from ocr.pipeline.scan_quality import score_scan_quality quality_report = score_scan_quality(ocr_input) scan_quality_info = quality_report.to_dict() actual_min_conf = min_conf if min_conf > 0 else quality_report.recommended_min_conf @@ -146,7 +146,7 @@ async def rerun_ocr_and_build_grid( is_degraded = scan_quality_info.get("is_degraded", False) if enhance and is_degraded: try: - from ocr_image_enhance import enhance_for_ocr + from ocr.image_enhance import enhance_for_ocr ocr_input = enhance_for_ocr(ocr_input, is_degraded=True) logger.info("rerun-ocr: CLAHE enhancement applied") except Exception as e: @@ -159,8 +159,8 @@ async def rerun_ocr_and_build_grid( # RapidOCR rapid_words = [] try: - from cv_ocr_engines import ocr_region_rapid - from cv_vocab_types import PageRegion + from ocr.engines.engines import ocr_region_rapid + from ocr.types import PageRegion full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h) rapid_words = ocr_region_rapid(ocr_input, full_region) or [] except Exception as e: @@ -182,7 +182,7 @@ async def rerun_ocr_and_build_grid( }) # 5. Merge OCR results - from ocr_pipeline_ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words + from ocr.pipeline.ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else [] if rapid_split or tess_words: merged_words = _merge_paddle_tesseract(rapid_split, tess_words) @@ -207,7 +207,7 @@ async def rerun_ocr_and_build_grid( vision_applied = False if vision_fusion: try: - from vision_ocr_fusion import vision_fuse_ocr + from ocr.pipeline.vision_fusion import vision_fuse_ocr category = doc_category or session.get("document_category") or "vokabelseite" logger.info(f"rerun-ocr: running Vision-LLM fusion (category={category})") merged_words = await vision_fuse_ocr(ocr_input, merged_words, category) diff --git a/klausur-service/backend/grid/editor/api_gutter.py b/klausur-service/backend/grid/editor/api_gutter.py index 7dfbd9f..b85d6c9 100644 --- a/klausur-service/backend/grid/editor/api_gutter.py +++ b/klausur-service/backend/grid/editor/api_gutter.py @@ -6,7 +6,7 @@ import logging from fastapi import APIRouter, HTTPException, Request -from ocr_pipeline_session_store import ( +from ocr.pipeline.session_store import ( get_session_db, update_session_db, ) @@ -35,7 +35,7 @@ async def gutter_repair(session_id: str): detail="No grid data. Run build-grid first.", ) - from cv_gutter_repair import analyse_grid_for_gutter_repair + from ocr.gutter.repair import analyse_grid_for_gutter_repair image_width = grid_data.get("image_width", 0) result = analyse_grid_for_gutter_repair(grid_data, image_width=image_width) @@ -86,7 +86,7 @@ async def gutter_repair_apply(session_id: str, request: Request): # Allows the user to pick a different correction from the alternatives list text_overrides = body.get("text_overrides", {}) - from cv_gutter_repair import apply_gutter_suggestions + from ocr.gutter.repair import apply_gutter_suggestions suggestions = gutter_result.get("suggestions", []) diff --git a/klausur-service/backend/grid/editor/api_unified.py b/klausur-service/backend/grid/editor/api_unified.py index 9ee83b8..5d5137d 100644 --- a/klausur-service/backend/grid/editor/api_unified.py +++ b/klausur-service/backend/grid/editor/api_unified.py @@ -6,7 +6,7 @@ import logging from fastapi import APIRouter, HTTPException -from ocr_pipeline_session_store import ( +from ocr.pipeline.session_store import ( get_session_db, update_session_db, ) @@ -32,7 +32,7 @@ async def build_unified_grid_endpoint(session_id: str): if not grid_data: raise HTTPException(status_code=400, detail="No grid data. Run build-grid first.") - from unified_grid import build_unified_grid + from grid.unified import build_unified_grid result = build_unified_grid( zones=grid_data.get("zones", []), diff --git a/klausur-service/backend/grid/editor/headers.py b/klausur-service/backend/grid/editor/headers.py index 6d6cb43..67755a8 100644 --- a/klausur-service/backend/grid/editor/headers.py +++ b/klausur-service/backend/grid/editor/headers.py @@ -8,7 +8,7 @@ import logging import re from typing import Dict, List, Optional -from cv_ocr_engines import _text_has_garbled_ipa +from ocr.engines.engines import _text_has_garbled_ipa logger = logging.getLogger(__name__) @@ -394,7 +394,7 @@ def _detect_colspan_cells( if len(columns) < 2 or not zone_words or not rows: return cells - from cv_words_first import _assign_word_to_row + from ocr.words_first import _assign_word_to_row # Column boundaries (midpoints between adjacent columns) col_boundaries = [] diff --git a/klausur-service/backend/grid/editor/helpers.py b/klausur-service/backend/grid/editor/helpers.py index 209e7ff..47424d0 100644 --- a/klausur-service/backend/grid/editor/helpers.py +++ b/klausur-service/backend/grid/editor/helpers.py @@ -2,7 +2,7 @@ Grid Editor helper functions — barrel re-export module. This file re-exports all public symbols from the split sub-modules -so that existing ``from grid_editor_helpers import ...`` statements +so that existing ``from grid.editor.helpers import ...`` statements continue to work without changes. Sub-modules: @@ -55,4 +55,4 @@ from .zones import ( # noqa: F401 ) # --- Re-export from cv_words_first (used by cv_box_layout.py) --------------- -from cv_words_first import _cluster_rows # noqa: F401 +from ocr.words_first import _cluster_rows # noqa: F401 diff --git a/klausur-service/backend/grid/editor/zones.py b/klausur-service/backend/grid/editor/zones.py index e77a1c5..1f29dce 100644 --- a/klausur-service/backend/grid/editor/zones.py +++ b/klausur-service/backend/grid/editor/zones.py @@ -12,8 +12,8 @@ import logging import re from typing import Any, Dict, List, Optional -from cv_vocab_types import PageZone -from cv_words_first import _cluster_rows, _build_cells +from ocr.types import PageZone +from ocr.words_first import _cluster_rows, _build_cells from .columns import ( _cluster_columns_by_alignment, diff --git a/klausur-service/backend/korrektur/__init__.py b/klausur-service/backend/korrektur/__init__.py index ec2a482..4e1a78b 100644 --- a/klausur-service/backend/korrektur/__init__.py +++ b/klausur-service/backend/korrektur/__init__.py @@ -2,5 +2,5 @@ korrektur package — exam correction, EH templates, PDF export. Backward-compatible re-exports: consumers can still use -``from eh_pipeline import ...`` etc. via the shim files in backend/. +``from korrektur.eh_pipeline import ...`` etc. via the shim files in backend/. """ diff --git a/klausur-service/backend/metrics/__init__.py b/klausur-service/backend/metrics/__init__.py index 86bdf8c..7b3e1a3 100644 --- a/klausur-service/backend/metrics/__init__.py +++ b/klausur-service/backend/metrics/__init__.py @@ -2,5 +2,5 @@ metrics package — PostgreSQL metrics database operations. Backward-compatible re-exports: consumers can still use -``from metrics_db import ...`` etc. via the shim files in backend/. +``from metrics.db import ...`` etc. via the shim files in backend/. """ diff --git a/klausur-service/backend/ocr/__init__.py b/klausur-service/backend/ocr/__init__.py index 679c58a..4da194a 100644 --- a/klausur-service/backend/ocr/__init__.py +++ b/klausur-service/backend/ocr/__init__.py @@ -2,7 +2,7 @@ OCR package — restructured from cv_* flat modules. Backward-compatible re-exports: consumers can still use -``from cv_layout import ...`` etc. via the shim files in backend/. +``from ocr.layout.layout import ...`` etc. via the shim files in backend/. """ from .types import * # noqa: F401,F403 diff --git a/klausur-service/backend/ocr/detect/box_layout.py b/klausur-service/backend/ocr/detect/box_layout.py index b9d492b..3d04e13 100644 --- a/klausur-service/backend/ocr/detect/box_layout.py +++ b/klausur-service/backend/ocr/detect/box_layout.py @@ -149,7 +149,7 @@ def build_box_zone_grid( Returns the same format as _build_zone_grid (columns, rows, cells, header_rows). """ - from grid_editor_helpers import _build_zone_grid + from grid.editor.helpers import _build_zone_grid if not zone_words: return { diff --git a/klausur-service/backend/ocr/labeling/api.py b/klausur-service/backend/ocr/labeling/api.py index 30a1bc6..7f2e02b 100644 --- a/klausur-service/backend/ocr/labeling/api.py +++ b/klausur-service/backend/ocr/labeling/api.py @@ -48,7 +48,7 @@ except ImportError: pass try: - from training_export_service import ( # noqa: F401 + from training.export_service import ( # noqa: F401 TrainingExportService, TrainingSample, get_training_export_service, diff --git a/klausur-service/backend/ocr/labeling/helpers.py b/klausur-service/backend/ocr/labeling/helpers.py index a4af3f1..b21c84b 100644 --- a/klausur-service/backend/ocr/labeling/helpers.py +++ b/klausur-service/backend/ocr/labeling/helpers.py @@ -57,7 +57,7 @@ except ImportError: # Try to import Training Export Service try: - from training_export_service import ( + from training.export_service import ( TrainingExportService, TrainingSample, get_training_export_service, diff --git a/klausur-service/backend/ocr/labeling/routes.py b/klausur-service/backend/ocr/labeling/routes.py index 8674353..e2f2c95 100644 --- a/klausur-service/backend/ocr/labeling/routes.py +++ b/klausur-service/backend/ocr/labeling/routes.py @@ -20,7 +20,7 @@ from typing import Optional, List from datetime import datetime import uuid -from metrics_db import ( +from metrics.db import ( create_ocr_labeling_session, get_ocr_labeling_sessions, get_ocr_labeling_session, diff --git a/klausur-service/backend/ocr/labeling/upload_routes.py b/klausur-service/backend/ocr/labeling/upload_routes.py index bad8bc5..9153103 100644 --- a/klausur-service/backend/ocr/labeling/upload_routes.py +++ b/klausur-service/backend/ocr/labeling/upload_routes.py @@ -17,7 +17,7 @@ from typing import Optional, List import uuid import os -from metrics_db import ( +from metrics.db import ( get_ocr_labeling_session, add_ocr_labeling_item, get_ocr_labeling_item, @@ -42,7 +42,7 @@ except ImportError: pass try: - from training_export_service import TrainingSample, get_training_export_service + from training.export_service import TrainingSample, get_training_export_service except ImportError: pass @@ -271,7 +271,7 @@ async def run_ocr_for_item(item_id: str): if ocr_text is None: raise HTTPException(status_code=500, detail="OCR failed") - from metrics_db import get_pool + from metrics.db import get_pool pool = await get_pool() if pool: async with pool.acquire() as conn: diff --git a/klausur-service/backend/ocr/pipeline/auto.py b/klausur-service/backend/ocr/pipeline/auto.py index 7b28616..14aa0eb 100644 --- a/klausur-service/backend/ocr/pipeline/auto.py +++ b/klausur-service/backend/ocr/pipeline/auto.py @@ -15,7 +15,7 @@ from .reprocess import router as _reprocess_router from .auto_steps import router as _steps_router # Combine both sub-routers into a single router for backwards compatibility. -# The consumer imports `from ocr_pipeline_auto import router as _auto_router`. +# The consumer imports `from ocr.pipeline.auto import router as _auto_router`. router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"]) router.include_router(_reprocess_router) router.include_router(_steps_router) diff --git a/klausur-service/backend/ocr/pipeline/auto_steps.py b/klausur-service/backend/ocr/pipeline/auto_steps.py index 86c7897..1e789c7 100644 --- a/klausur-service/backend/ocr/pipeline/auto_steps.py +++ b/klausur-service/backend/ocr/pipeline/auto_steps.py @@ -17,7 +17,7 @@ import numpy as np from fastapi import APIRouter, HTTPException, Request from fastapi.responses import StreamingResponse -from cv_vocab_pipeline import ( +from ocr.cv_pipeline import ( OLLAMA_REVIEW_MODEL, PageRegion, RowGeometry, diff --git a/klausur-service/backend/ocr/pipeline/columns.py b/klausur-service/backend/ocr/pipeline/columns.py index 6fa2672..5175adf 100644 --- a/klausur-service/backend/ocr/pipeline/columns.py +++ b/klausur-service/backend/ocr/pipeline/columns.py @@ -14,7 +14,7 @@ from typing import Dict, List import cv2 from fastapi import APIRouter, HTTPException -from cv_vocab_pipeline import ( +from ocr.cv_pipeline import ( _detect_header_footer_gaps, _detect_sub_columns, classify_column_types, diff --git a/klausur-service/backend/ocr/pipeline/deskew.py b/klausur-service/backend/ocr/pipeline/deskew.py index 1caeec2..8ebcc91 100644 --- a/klausur-service/backend/ocr/pipeline/deskew.py +++ b/klausur-service/backend/ocr/pipeline/deskew.py @@ -12,7 +12,7 @@ from datetime import datetime import cv2 from fastapi import APIRouter, HTTPException -from cv_vocab_pipeline import ( +from ocr.cv_pipeline import ( create_ocr_image, deskew_image, deskew_image_by_word_alignment, diff --git a/klausur-service/backend/ocr/pipeline/dewarp.py b/klausur-service/backend/ocr/pipeline/dewarp.py index 21fd7d0..d83d7bc 100644 --- a/klausur-service/backend/ocr/pipeline/dewarp.py +++ b/klausur-service/backend/ocr/pipeline/dewarp.py @@ -17,7 +17,7 @@ from typing import Any, Dict import cv2 from fastapi import APIRouter, HTTPException, Query -from cv_vocab_pipeline import ( +from ocr.cv_pipeline import ( _apply_shear, create_ocr_image, dewarp_image, diff --git a/klausur-service/backend/ocr/pipeline/geometry.py b/klausur-service/backend/ocr/pipeline/geometry.py index 2e77a75..3af7363 100644 --- a/klausur-service/backend/ocr/pipeline/geometry.py +++ b/klausur-service/backend/ocr/pipeline/geometry.py @@ -8,7 +8,7 @@ This module was split into: - ocr_pipeline_columns.py (Column detection + ground truth) The `router` object is assembled here by including all sub-routers. -Importers that did `from ocr_pipeline_geometry import router` continue to work. +Importers that did `from ocr.pipeline.geometry import router` continue to work. """ from fastapi import APIRouter diff --git a/klausur-service/backend/ocr/pipeline/htr_api.py b/klausur-service/backend/ocr/pipeline/htr_api.py index 2976069..3c4a62a 100644 --- a/klausur-service/backend/ocr/pipeline/htr_api.py +++ b/klausur-service/backend/ocr/pipeline/htr_api.py @@ -243,7 +243,7 @@ async def recognize_from_session(req: HTRSessionRequest): Set use_clean=true to prefer the clean image (after handwriting removal step). This is useful when you want to do HTR on isolated handwriting regions. """ - from ocr_pipeline_session_store import get_session_db, get_session_image + from ocr.pipeline.session_store import get_session_db, get_session_image session = await get_session_db(req.session_id) if not session: diff --git a/klausur-service/backend/ocr/pipeline/llm_review.py b/klausur-service/backend/ocr/pipeline/llm_review.py index f2dd054..9007685 100644 --- a/klausur-service/backend/ocr/pipeline/llm_review.py +++ b/klausur-service/backend/ocr/pipeline/llm_review.py @@ -15,7 +15,7 @@ from typing import Dict, List from fastapi import APIRouter, HTTPException, Request from fastapi.responses import StreamingResponse -from cv_vocab_pipeline import ( +from ocr.cv_pipeline import ( OLLAMA_REVIEW_MODEL, llm_review_entries, llm_review_entries_streaming, diff --git a/klausur-service/backend/ocr/pipeline/ocr_merge.py b/klausur-service/backend/ocr/pipeline/ocr_merge.py index 17f73a1..974c29b 100644 --- a/klausur-service/backend/ocr/pipeline/ocr_merge.py +++ b/klausur-service/backend/ocr/pipeline/ocr_merge.py @@ -15,7 +15,7 @@ import cv2 import numpy as np from fastapi import APIRouter, HTTPException -from cv_words_first import build_grid_from_words +from ocr.words_first import build_grid_from_words from .common import _cache, _append_pipeline_log from .session_store import get_session_image, update_session_db @@ -136,7 +136,7 @@ async def paddle_kombi(session_id: str): img_png, img_bgr = await _load_session_image(session_id) img_h, img_w = img_bgr.shape[:2] - from cv_ocr_engines import ocr_region_paddle + from ocr.engines.engines import ocr_region_paddle t0 = time.time() @@ -202,8 +202,8 @@ async def rapid_kombi(session_id: str): img_png, img_bgr = await _load_session_image(session_id) img_h, img_w = img_bgr.shape[:2] - from cv_ocr_engines import ocr_region_rapid - from cv_vocab_types import PageRegion + from ocr.engines.engines import ocr_region_rapid + from ocr.types import PageRegion t0 = time.time() diff --git a/klausur-service/backend/ocr/pipeline/orientation_api.py b/klausur-service/backend/ocr/pipeline/orientation_api.py index 254eb3c..cded86f 100644 --- a/klausur-service/backend/ocr/pipeline/orientation_api.py +++ b/klausur-service/backend/ocr/pipeline/orientation_api.py @@ -9,7 +9,7 @@ from typing import Any, Dict import cv2 from fastapi import APIRouter, HTTPException -from cv_vocab_pipeline import detect_and_fix_orientation +from ocr.cv_pipeline import detect_and_fix_orientation from .page_crop import detect_page_splits from .session_store import update_session_db diff --git a/klausur-service/backend/ocr/pipeline/overlay_structure.py b/klausur-service/backend/ocr/pipeline/overlay_structure.py index f621d10..136112c 100644 --- a/klausur-service/backend/ocr/pipeline/overlay_structure.py +++ b/klausur-service/backend/ocr/pipeline/overlay_structure.py @@ -17,8 +17,8 @@ from fastapi.responses import Response from .common import _get_base_image_png from .session_store import get_session_db -from cv_color_detect import _COLOR_HEX, _COLOR_RANGES -from cv_box_detect import detect_boxes, split_page_into_zones +from ocr.detect.color_detect import _COLOR_HEX, _COLOR_RANGES +from ocr.detect.box_detect import detect_boxes, split_page_into_zones logger = logging.getLogger(__name__) diff --git a/klausur-service/backend/ocr/pipeline/postprocess.py b/klausur-service/backend/ocr/pipeline/postprocess.py index 9c63206..4e2f711 100644 --- a/klausur-service/backend/ocr/pipeline/postprocess.py +++ b/klausur-service/backend/ocr/pipeline/postprocess.py @@ -19,7 +19,7 @@ from .reconstruction import router as _reconstruction_router from .validation import router as _validation_router # Composite router — drop-in replacement for the old monolithic router. -# ocr_pipeline_api.py imports ``from ocr_pipeline_postprocess import router``. +# ocr_pipeline_api.py imports ``from ocr.pipeline.postprocess import router``. router = APIRouter() router.include_router(_llm_review_router) router.include_router(_reconstruction_router) diff --git a/klausur-service/backend/ocr/pipeline/regression_endpoints.py b/klausur-service/backend/ocr/pipeline/regression_endpoints.py index d2d2c34..91d78e4 100644 --- a/klausur-service/backend/ocr/pipeline/regression_endpoints.py +++ b/klausur-service/backend/ocr/pipeline/regression_endpoints.py @@ -14,7 +14,7 @@ from typing import Any, Dict, Optional from fastapi import APIRouter, HTTPException, Query -from grid_editor_api import _build_grid_core +from grid.editor.api import _build_grid_core from .session_store import ( get_session_db, list_ground_truth_sessions_db, diff --git a/klausur-service/backend/ocr/pipeline/rows.py b/klausur-service/backend/ocr/pipeline/rows.py index 67179d7..fb4f3a8 100644 --- a/klausur-service/backend/ocr/pipeline/rows.py +++ b/klausur-service/backend/ocr/pipeline/rows.py @@ -17,7 +17,7 @@ import cv2 import numpy as np from fastapi import APIRouter, HTTPException -from cv_vocab_pipeline import ( +from ocr.cv_pipeline import ( create_ocr_image, detect_column_geometry, detect_row_geometry, @@ -120,7 +120,7 @@ async def detect_rows(session_id: str): # merges rows. Word-grouping directly clusters words by Y proximity, # which is more robust for these cases. if is_sub_session and word_dicts: - from cv_layout import _build_rows_from_word_grouping + from ocr.layout.layout import _build_rows_from_word_grouping rows = _build_rows_from_word_grouping( word_dicts, left_x, right_x, top_y, bottom_y, right_x - left_x, bottom_y - top_y, diff --git a/klausur-service/backend/ocr/pipeline/sessions_crud.py b/klausur-service/backend/ocr/pipeline/sessions_crud.py index 41bd7ad..3824031 100644 --- a/klausur-service/backend/ocr/pipeline/sessions_crud.py +++ b/klausur-service/backend/ocr/pipeline/sessions_crud.py @@ -15,7 +15,7 @@ import cv2 import numpy as np from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile -from cv_vocab_pipeline import render_image_high_res, render_pdf_high_res +from ocr.cv_pipeline import render_image_high_res, render_pdf_high_res from .common import ( VALID_DOCUMENT_CATEGORIES, UpdateSessionRequest, diff --git a/klausur-service/backend/ocr/pipeline/sessions_images.py b/klausur-service/backend/ocr/pipeline/sessions_images.py index 9be42da..9de795d 100644 --- a/klausur-service/backend/ocr/pipeline/sessions_images.py +++ b/klausur-service/backend/ocr/pipeline/sessions_images.py @@ -17,7 +17,7 @@ import numpy as np from fastapi import APIRouter, HTTPException, Query from fastapi.responses import Response -from cv_vocab_pipeline import create_ocr_image, detect_document_type +from ocr.cv_pipeline import create_ocr_image, detect_document_type from .common import ( VALID_DOCUMENT_CATEGORIES, _append_pipeline_log, diff --git a/klausur-service/backend/ocr/pipeline/structure.py b/klausur-service/backend/ocr/pipeline/structure.py index 6908cc4..99e7ce8 100644 --- a/klausur-service/backend/ocr/pipeline/structure.py +++ b/klausur-service/backend/ocr/pipeline/structure.py @@ -15,9 +15,9 @@ import numpy as np from fastapi import APIRouter, HTTPException from pydantic import BaseModel -from cv_box_detect import detect_boxes -from cv_color_detect import _COLOR_RANGES, _COLOR_HEX -from cv_graphic_detect import detect_graphic_elements +from ocr.detect.box_detect import detect_boxes +from ocr.detect.color_detect import _COLOR_RANGES, _COLOR_HEX +from ocr.detect.graphic_detect import detect_graphic_elements from .session_store import ( get_session_db, update_session_db, @@ -100,7 +100,7 @@ async def detect_structure(session_id: str): ) # --- Zone splitting --- - from cv_box_detect import split_page_into_zones as _split_zones + from ocr.detect.box_detect import split_page_into_zones as _split_zones zones = _split_zones(content_x, content_y, content_w_px, content_h_px, boxes) # --- Color region sampling --- @@ -123,7 +123,7 @@ async def detect_structure(session_id: str): med_s = float(np.median(roi_hsv[:, :, 1])) med_v = float(np.median(roi_hsv[:, :, 2])) if med_s > 15: - from cv_color_detect import _hue_to_color_name + from ocr.detect.color_detect import _hue_to_color_name bg_name = _hue_to_color_name(med_h) bg_hex = _COLOR_HEX.get(bg_name, "#6b7280") else: diff --git a/klausur-service/backend/ocr/pipeline/words.py b/klausur-service/backend/ocr/pipeline/words.py index dd43c56..b4a71fd 100644 --- a/klausur-service/backend/ocr/pipeline/words.py +++ b/klausur-service/backend/ocr/pipeline/words.py @@ -23,7 +23,7 @@ import numpy as np from fastapi import APIRouter, HTTPException from pydantic import BaseModel -from cv_words_first import build_grid_from_words +from ocr.words_first import build_grid_from_words from .session_store import ( get_session_db, get_session_image, @@ -72,7 +72,7 @@ async def paddle_direct(session_id: str): img_h, img_w = img_bgr.shape[:2] - from cv_ocr_engines import ocr_region_paddle + from ocr.engines.engines import ocr_region_paddle t0 = time.time() word_dicts = await ocr_region_paddle(img_bgr, region=None) diff --git a/klausur-service/backend/ocr/pipeline/words_detect.py b/klausur-service/backend/ocr/pipeline/words_detect.py index 2770d28..ffdf062 100644 --- a/klausur-service/backend/ocr/pipeline/words_detect.py +++ b/klausur-service/backend/ocr/pipeline/words_detect.py @@ -17,7 +17,7 @@ import numpy as np from fastapi import APIRouter, HTTPException, Request from fastapi.responses import StreamingResponse -from cv_vocab_pipeline import ( +from ocr.cv_pipeline import ( PageRegion, RowGeometry, _cells_to_vocab_entries, @@ -27,7 +27,7 @@ from cv_vocab_pipeline import ( create_ocr_image, detect_column_geometry, ) -from cv_words_first import build_grid_from_words +from ocr.words_first import build_grid_from_words from .session_store import ( get_session_db, update_session_db, @@ -223,7 +223,7 @@ async def _words_first_path( img_h, img_w = dewarped_bgr.shape[:2] if engine == "paddle": - from cv_ocr_engines import ocr_region_paddle + from ocr.engines.engines import ocr_region_paddle wf_word_dicts = await ocr_region_paddle(dewarped_bgr, region=None) cached["_paddle_word_dicts"] = wf_word_dicts else: diff --git a/klausur-service/backend/ocr/pipeline/words_stream.py b/klausur-service/backend/ocr/pipeline/words_stream.py index 9ff06d1..54f273d 100644 --- a/klausur-service/backend/ocr/pipeline/words_stream.py +++ b/klausur-service/backend/ocr/pipeline/words_stream.py @@ -15,7 +15,7 @@ from typing import Any, Dict, List import numpy as np from fastapi import Request -from cv_vocab_pipeline import ( +from ocr.cv_pipeline import ( PageRegion, RowGeometry, _cells_to_vocab_entries, diff --git a/klausur-service/backend/ocr/review/spell.py b/klausur-service/backend/ocr/review/spell.py index 3166b26..ad124ee 100644 --- a/klausur-service/backend/ocr/review/spell.py +++ b/klausur-service/backend/ocr/review/spell.py @@ -219,7 +219,7 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict: # Use SmartSpellChecker if available _smart = None try: - from smart_spell import SmartSpellChecker + from ocr.spell.smart_spell import SmartSpellChecker _smart = SmartSpellChecker() logger.debug("spell_review: using SmartSpellChecker") except Exception: diff --git a/klausur-service/backend/ocr/spell/core.py b/klausur-service/backend/ocr/spell/core.py index 9f2fa7d..090f2b4 100644 --- a/klausur-service/backend/ocr/spell/core.py +++ b/klausur-service/backend/ocr/spell/core.py @@ -159,7 +159,7 @@ class _SmartSpellCoreBase: return True # Also accept known abbreviations (sth, sb, adj, etc.) try: - from cv_ocr_engines import _KNOWN_ABBREVIATIONS + from ocr.engines.engines import _KNOWN_ABBREVIATIONS if w in _KNOWN_ABBREVIATIONS: return True except ImportError: diff --git a/klausur-service/backend/ocr/spell/text.py b/klausur-service/backend/ocr/spell/text.py index a5081f4..affda53 100644 --- a/klausur-service/backend/ocr/spell/text.py +++ b/klausur-service/backend/ocr/spell/text.py @@ -35,7 +35,7 @@ class SmartSpellChecker(_SmartSpellCoreBase): """ # Import known abbreviations for vocabulary context try: - from cv_ocr_engines import _KNOWN_ABBREVIATIONS + from ocr.engines.engines import _KNOWN_ABBREVIATIONS except ImportError: _KNOWN_ABBREVIATIONS = set() @@ -149,7 +149,7 @@ class SmartSpellChecker(_SmartSpellCoreBase): # --- Pass 1: Boundary repair between adjacent unknown words --- # Import abbreviations for the heuristic below try: - from cv_ocr_engines import _KNOWN_ABBREVIATIONS as _ABBREVS + from ocr.engines.engines import _KNOWN_ABBREVIATIONS as _ABBREVS except ImportError: _ABBREVS = set() diff --git a/klausur-service/backend/training/__init__.py b/klausur-service/backend/training/__init__.py index 454bcee..24175cf 100644 --- a/klausur-service/backend/training/__init__.py +++ b/klausur-service/backend/training/__init__.py @@ -2,5 +2,5 @@ training package — training API, simulation, export, TrOCR. Backward-compatible re-exports: consumers can still use -``from training_api import ...`` etc. via the shim files in backend/. +``from training.api import ...`` etc. via the shim files in backend/. """ diff --git a/klausur-service/backend/training/routes.py b/klausur-service/backend/training/routes.py index cc35759..79ac4bc 100644 --- a/klausur-service/backend/training/routes.py +++ b/klausur-service/backend/training/routes.py @@ -211,7 +211,7 @@ async def delete_model_version(version_id: str): @router.get("/dataset/stats", response_model=dict) async def get_dataset_stats(): """Get statistics about the training dataset.""" - from metrics_db import get_zeugnis_stats + from metrics.db import get_zeugnis_stats zeugnis_stats = await get_zeugnis_stats() diff --git a/klausur-service/backend/vocab/__init__.py b/klausur-service/backend/vocab/__init__.py index eabdd83..91a3e95 100644 --- a/klausur-service/backend/vocab/__init__.py +++ b/klausur-service/backend/vocab/__init__.py @@ -2,5 +2,5 @@ Vocab package — restructured from vocab_* flat modules. Backward-compatible re-exports: consumers can still use -``from vocab_worksheet_api import ...`` etc. via the shim files in backend/. +``from vocab.worksheet.api import ...`` etc. via the shim files in backend/. """ diff --git a/klausur-service/backend/vocab/worksheet/analysis_api.py b/klausur-service/backend/vocab/worksheet/analysis_api.py index dd74c5b..4dea005 100644 --- a/klausur-service/backend/vocab/worksheet/analysis_api.py +++ b/klausur-service/backend/vocab/worksheet/analysis_api.py @@ -27,7 +27,7 @@ from .generation import convert_pdf_page_to_image # Try to import Tesseract extractor try: - from tesseract_vocab_extractor import ( + from ocr.engines.tesseract_extractor import ( extract_bounding_boxes, TESSERACT_AVAILABLE, ) except ImportError: @@ -264,7 +264,7 @@ async def extract_with_boxes(session_id: str, page_number: int): # Deskew image before OCR deskew_angle = 0.0 try: - from cv_vocab_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE + from ocr.cv_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE if CV2_AVAILABLE: image_data, deskew_angle = deskew_image_by_word_alignment(image_data) logger.info(f"Deskew: {deskew_angle:.2f}° for page {page_number}") diff --git a/klausur-service/backend/vocab/worksheet/compare_api.py b/klausur-service/backend/vocab/worksheet/compare_api.py index 88d1df1..d96143e 100644 --- a/klausur-service/backend/vocab/worksheet/compare_api.py +++ b/klausur-service/backend/vocab/worksheet/compare_api.py @@ -23,7 +23,7 @@ from .generation import convert_pdf_page_to_image # Try to import Tesseract extractor try: - from tesseract_vocab_extractor import ( + from ocr.engines.tesseract_extractor import ( run_tesseract_pipeline, match_positions_to_vocab, TESSERACT_AVAILABLE, ) @@ -32,7 +32,7 @@ except ImportError: # Try to import CV Pipeline try: - from cv_vocab_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE + from ocr.cv_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE except ImportError: CV_PIPELINE_AVAILABLE = False @@ -328,7 +328,7 @@ async def analyze_grid(session_id: str, page_number: int, use_tesseract: bool = # Run Tesseract if not already cached if not tess_page_data: logger.info("Running Tesseract for grid analysis (not cached)") - from tesseract_vocab_extractor import run_tesseract_pipeline as _run_tess + from ocr.engines.tesseract_extractor import run_tesseract_pipeline as _run_tess tess_page_data = await _run_tess(image_data, lang="eng+deu") session[f"tesseract_page_{page_number}"] = tess_page_data session["tesseract_words"] = tess_page_data.get("words", []) diff --git a/klausur-service/backend/vocab/worksheet/ocr.py b/klausur-service/backend/vocab/worksheet/ocr.py index 6a22747..664940d 100644 --- a/klausur-service/backend/vocab/worksheet/ocr.py +++ b/klausur-service/backend/vocab/worksheet/ocr.py @@ -39,7 +39,7 @@ except ImportError: # CV pipeline helpers try: - from cv_vocab_pipeline import ( + from ocr.cv_pipeline import ( deskew_two_pass, dewarp_image, detect_and_fix_orientation, @@ -54,7 +54,7 @@ except ImportError: _fix_phonetic_brackets = None # type: ignore[assignment] try: - from cv_cell_grid import ( + from ocr.cell_grid.cell_grid import ( _merge_wrapped_rows, _merge_phonetic_continuation_rows, _merge_continuation_rows, @@ -65,17 +65,17 @@ except ImportError: _merge_continuation_rows = None # type: ignore[assignment] try: - from cv_ocr_engines import ocr_region_rapid + from ocr.engines.engines import ocr_region_rapid except ImportError: ocr_region_rapid = None # type: ignore[assignment] try: - from cv_vocab_types import PageRegion + from ocr.types import PageRegion except ImportError: PageRegion = None # type: ignore[assignment] try: - from ocr_pipeline_ocr_merge import ( + from ocr.pipeline.ocr_merge import ( _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words, @@ -86,12 +86,12 @@ except ImportError: _deduplicate_words = None # type: ignore[assignment] try: - from cv_words_first import build_grid_from_words + from ocr.words_first import build_grid_from_words except ImportError: build_grid_from_words = None # type: ignore[assignment] try: - from ocr_pipeline_session_store import ( + from ocr.pipeline.session_store import ( create_session_db as create_pipeline_session_db, update_session_db as update_pipeline_session_db, ) @@ -173,7 +173,7 @@ async def _run_ocr_pipeline_for_page( # 5. Content crop (removes scanner borders, gutter shadows) t0 = _time.time() try: - from page_crop import detect_and_crop_page + from ocr.pipeline.page_crop import detect_and_crop_page cropped_bgr, crop_result = detect_and_crop_page(dewarped_bgr) if crop_result.get("crop_applied"): dewarped_bgr = cropped_bgr @@ -186,7 +186,7 @@ async def _run_ocr_pipeline_for_page( # 5b. Scan quality assessment scan_quality_report = None try: - from scan_quality import score_scan_quality + from ocr.pipeline.scan_quality import score_scan_quality scan_quality_report = score_scan_quality(dewarped_bgr) except Exception as e: logger.warning(f" scan quality: failed ({e})") @@ -200,7 +200,7 @@ async def _run_ocr_pipeline_for_page( is_degraded = scan_quality_report.is_degraded if scan_quality_report else False if is_degraded and enable_enhance: try: - from ocr_image_enhance import enhance_for_ocr + from ocr.image_enhance import enhance_for_ocr dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True) logger.info(" enhancement: applied (degraded scan)") except Exception as e: @@ -212,8 +212,8 @@ async def _run_ocr_pipeline_for_page( # RapidOCR (local ONNX) try: - from cv_ocr_engines import ocr_region_rapid - from cv_vocab_types import PageRegion + from ocr.engines.engines import ocr_region_rapid + from ocr.types import PageRegion full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h) rapid_words = ocr_region_rapid(dewarped_bgr, full_region) or [] except Exception as e: @@ -243,8 +243,8 @@ async def _run_ocr_pipeline_for_page( }) # Merge dual-engine results - from ocr_pipeline_ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words - from cv_words_first import build_grid_from_words + from ocr.pipeline.ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words + from ocr.words_first import build_grid_from_words rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else [] if rapid_split or tess_words: @@ -300,7 +300,7 @@ async def _run_ocr_pipeline_for_page( # 8. Run full grid-build (with pipe-autocorrect, word-gap merge, etc.) t0 = _time.time() try: - from grid_editor_api import _build_grid_core + from grid.editor.api import _build_grid_core session_data = { "word_result": word_result, } diff --git a/klausur-service/backend/vocab/worksheet/upload_api.py b/klausur-service/backend/vocab/worksheet/upload_api.py index 274a708..73cec7c 100644 --- a/klausur-service/backend/vocab/worksheet/upload_api.py +++ b/klausur-service/backend/vocab/worksheet/upload_api.py @@ -36,7 +36,7 @@ LOCAL_STORAGE_PATH = os.getenv("VOCAB_STORAGE_PATH", "/app/vocab-worksheets") try: import numpy as np - from cv_preprocessing import render_pdf_high_res, detect_and_fix_orientation + from ocr.preprocessing.preprocessing import render_pdf_high_res, detect_and_fix_orientation OCR_PIPELINE_AVAILABLE = True except ImportError: np = None # type: ignore[assignment] diff --git a/klausur-service/backend/worksheet/__init__.py b/klausur-service/backend/worksheet/__init__.py index 8f277a5..7e41241 100644 --- a/klausur-service/backend/worksheet/__init__.py +++ b/klausur-service/backend/worksheet/__init__.py @@ -2,5 +2,5 @@ worksheet package — worksheet editor, NRU generator, cleanup. Backward-compatible re-exports: consumers can still use -``from worksheet_editor_api import ...`` etc. via the shim files in backend/. +``from worksheet.editor_api import ...`` etc. via the shim files in backend/. """ diff --git a/klausur-service/backend/worksheet/editor_api.py b/klausur-service/backend/worksheet/editor_api.py index ef0ea57..00a32f6 100644 --- a/klausur-service/backend/worksheet/editor_api.py +++ b/klausur-service/backend/worksheet/editor_api.py @@ -366,7 +366,7 @@ async def reconstruct_document_from_session(request: ReconstructRequest): async def get_available_sessions(): """Get list of available vocab sessions that can be reconstructed.""" try: - from vocab_worksheet_api import _sessions + from vocab.worksheet.api import _sessions available = [] for session_id, session in _sessions.items(): diff --git a/klausur-service/backend/worksheet/editor_reconstruct.py b/klausur-service/backend/worksheet/editor_reconstruct.py index e8db91b..8306111 100644 --- a/klausur-service/backend/worksheet/editor_reconstruct.py +++ b/klausur-service/backend/worksheet/editor_reconstruct.py @@ -31,7 +31,7 @@ async def reconstruct_document_logic(request: ReconstructRequest) -> Reconstruct Returns ReconstructResponse ready to send to the client. """ from fastapi import HTTPException - from vocab_worksheet_api import _sessions, convert_pdf_page_to_image + from vocab.worksheet.api import _sessions, convert_pdf_page_to_image # Check if session exists if request.session_id not in _sessions: diff --git a/klausur-service/backend/zeugnis/__init__.py b/klausur-service/backend/zeugnis/__init__.py index 3e24f29..2a8202b 100644 --- a/klausur-service/backend/zeugnis/__init__.py +++ b/klausur-service/backend/zeugnis/__init__.py @@ -2,5 +2,5 @@ zeugnis package — certificate crawler, models, storage. Backward-compatible re-exports: consumers can still use -``from zeugnis_api import ...`` etc. via the shim files in backend/. +``from zeugnis.api import ...`` etc. via the shim files in backend/. """ diff --git a/klausur-service/backend/zeugnis/api_docs.py b/klausur-service/backend/zeugnis/api_docs.py index 39c5c9f..e4f379b 100644 --- a/klausur-service/backend/zeugnis/api_docs.py +++ b/klausur-service/backend/zeugnis/api_docs.py @@ -16,7 +16,7 @@ from .models import ( from .crawler import ( start_crawler, stop_crawler, get_crawler_status, ) -from metrics_db import ( +from metrics.db import ( get_zeugnis_documents, get_zeugnis_stats, log_zeugnis_event, get_pool, ) diff --git a/klausur-service/backend/zeugnis/api_sources.py b/klausur-service/backend/zeugnis/api_sources.py index 021f283..bee108a 100644 --- a/klausur-service/backend/zeugnis/api_sources.py +++ b/klausur-service/backend/zeugnis/api_sources.py @@ -15,7 +15,7 @@ from .models import ( BUNDESLAENDER, generate_id, get_training_allowed, get_bundesland_name, get_license_for_bundesland, ) -from metrics_db import ( +from metrics.db import ( get_zeugnis_sources, upsert_zeugnis_source, get_pool, ) diff --git a/klausur-service/backend/zeugnis/control.py b/klausur-service/backend/zeugnis/control.py index c105c6f..43c95f6 100644 --- a/klausur-service/backend/zeugnis/control.py +++ b/klausur-service/backend/zeugnis/control.py @@ -31,7 +31,7 @@ async def start_crawler(bundesland: Optional[str] = None, source_id: Optional[st async def run_crawler(): try: - from metrics_db import get_pool + from metrics.db import get_pool pool = await get_pool() if pool: diff --git a/klausur-service/backend/zeugnis/seed_data.py b/klausur-service/backend/zeugnis/seed_data.py index 0d68107..e2ead7e 100644 --- a/klausur-service/backend/zeugnis/seed_data.py +++ b/klausur-service/backend/zeugnis/seed_data.py @@ -322,8 +322,8 @@ SEED_DATA: Dict[str, Dict[str, Any]] = { async def populate_seed_data(): """Populate database with seed data.""" - from metrics_db import get_pool, upsert_zeugnis_source - from zeugnis_models import generate_id + from metrics.db import get_pool, upsert_zeugnis_source + from zeugnis.models import generate_id pool = await get_pool() if not pool: @@ -412,4 +412,4 @@ if __name__ == "__main__": print("\n" + "=" * 60) print("To populate database, run:") - print(" python -c 'import asyncio; from zeugnis_seed_data import populate_seed_data; asyncio.run(populate_seed_data())'") + print(" python -c 'import asyncio; from zeugnis.seed_data import populate_seed_data; asyncio.run(populate_seed_data())'") diff --git a/klausur-service/backend/zeugnis/worker.py b/klausur-service/backend/zeugnis/worker.py index 7003d21..bc55ec8 100644 --- a/klausur-service/backend/zeugnis/worker.py +++ b/klausur-service/backend/zeugnis/worker.py @@ -83,7 +83,7 @@ class ZeugnisCrawler: # Initialize database connection try: - from metrics_db import get_pool + from metrics.db import get_pool self.db_pool = await get_pool() except Exception as e: print(f"Failed to get database pool: {e}")