Fix: Update all old-style imports inside packages to new paths
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 1m7s
CI / test-go-edu-search (push) Successful in 46s
CI / test-python-klausur (push) Failing after 2m32s
CI / test-python-agent-core (push) Successful in 33s
CI / test-nodejs-website (push) Successful in 34s

65 files in klausur-service packages + 3 in backend-lehrer packages
had stale imports referencing deleted shim modules.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-26 00:19:13 +02:00
parent 5f2ed44654
commit eecb5472dd
68 changed files with 132 additions and 132 deletions
+1 -1
View File
@@ -266,7 +266,7 @@ async def send_message(conversation_id: str, message: MessageBase):
if contact and contact.get("email"): if contact and contact.get("email"):
try: try:
from email_service import email_service from services.email import email_service
result = email_service.send_messenger_notification( result = email_service.send_messenger_notification(
to_email=contact["email"], to_email=contact["email"],
+1 -1
View File
@@ -363,7 +363,7 @@ def api_generate_story(unit_id: str, payload: StoryGeneratePayload):
raise HTTPException(status_code=404, detail="Lerneinheit nicht gefunden.") raise HTTPException(status_code=404, detail="Lerneinheit nicht gefunden.")
try: try:
from story_generator import generate_story from services.story_generator import generate_story
result = generate_story( result = generate_story(
vocabulary=payload.vocabulary, vocabulary=payload.vocabulary,
language=payload.language, language=payload.language,
+2 -2
View File
@@ -22,7 +22,7 @@ from .db import (
get_all_pos, get_all_pos,
VocabularyWord, VocabularyWord,
) )
from learning_units import ( from units.learning import (
LearningUnitCreate, LearningUnitCreate,
create_learning_unit, create_learning_unit,
get_learning_unit, get_learning_unit,
@@ -343,7 +343,7 @@ async def api_translate_words(payload: TranslateRequest):
Uses local LLM (Ollama) for translation. Results are cached in the Uses local LLM (Ollama) for translation. Results are cached in the
vocabulary_words.translations JSONB field. vocabulary_words.translations JSONB field.
""" """
from translation_service import translate_and_store from services.translation import translate_and_store
if payload.target_language not in {"tr", "ar", "uk", "ru", "pl", "fr", "es"}: if payload.target_language not in {"tr", "ar", "uk", "ru", "pl", "fr", "es"}:
raise HTTPException(status_code=400, detail=f"Sprache '{payload.target_language}' nicht unterstuetzt") raise HTTPException(status_code=400, detail=f"Sprache '{payload.target_language}' nicht unterstuetzt")
+1 -1
View File
@@ -2,5 +2,5 @@
admin package — admin APIs for NiBiS, RAG, templates. admin package — admin APIs for NiBiS, RAG, templates.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from admin_api import ...`` etc. via the shim files in backend/. ``from admin.api import ...`` etc. via the shim files in backend/.
""" """
+1 -1
View File
@@ -7,7 +7,7 @@ This module was split into:
- admin_templates.py (Legal templates ingestion, search) - admin_templates.py (Legal templates ingestion, search)
The `router` object is assembled here by including all sub-routers. The `router` object is assembled here by including all sub-routers.
Importers that did `from admin_api import router` continue to work. Importers that did `from admin.api import router` continue to work.
""" """
from fastapi import APIRouter from fastapi import APIRouter
+1 -1
View File
@@ -28,7 +28,7 @@ except ImportError:
MINIO_AVAILABLE = False MINIO_AVAILABLE = False
try: try:
from metrics_db import ( from metrics.db import (
init_metrics_tables, store_feedback, log_search, log_upload, init_metrics_tables, store_feedback, log_search, log_upload,
calculate_metrics, get_recent_feedback, get_upload_history calculate_metrics, get_recent_feedback, get_upload_history
) )
@@ -2,5 +2,5 @@
compliance package — compliance pipeline, RBAC/ABAC policy engine. compliance package — compliance pipeline, RBAC/ABAC policy engine.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from compliance_models import ...`` etc. via the shim files in backend/. ``from compliance.models import ...`` etc. via the shim files in backend/.
""" """
+1 -1
View File
@@ -2,7 +2,7 @@
Grid package — restructured from grid_* flat modules. Grid package — restructured from grid_* flat modules.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from grid_build_core import ...`` etc. via the shim files in backend/. ``from grid.build.core import ...`` etc. via the shim files in backend/.
Sub-packages: Sub-packages:
- grid.build — grid construction pipeline (_build_grid_core and phases) - grid.build — grid construction pipeline (_build_grid_core and phases)
@@ -9,7 +9,7 @@ import logging
import re import re
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
from cv_ocr_engines import ( from ocr.engines.engines import (
_words_to_reading_order_text, _group_words_into_lines, _lookup_ipa, _words_to_reading_order_text, _group_words_into_lines, _lookup_ipa,
) )
@@ -10,7 +10,7 @@ import logging
import re import re
from typing import Any, Dict, List from typing import Any, Dict, List
from cv_ocr_engines import _words_to_reading_order_text from ocr.engines.engines import _words_to_reading_order_text
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -69,14 +69,14 @@ def _finalize_grid(
# --- Word-gap merge --- # --- Word-gap merge ---
try: try:
from cv_syllable_detect import merge_word_gaps_in_zones from ocr.detect.syllable.detect import merge_word_gaps_in_zones
merge_word_gaps_in_zones(zones_data, session_id) merge_word_gaps_in_zones(zones_data, session_id)
except Exception as e: except Exception as e:
logger.warning("Word-gap merge failed: %s", e) logger.warning("Word-gap merge failed: %s", e)
# --- Pipe auto-correction --- # --- Pipe auto-correction ---
try: try:
from cv_syllable_detect import autocorrect_pipe_artifacts from ocr.detect.syllable.detect import autocorrect_pipe_artifacts
autocorrect_pipe_artifacts(zones_data, session_id) autocorrect_pipe_artifacts(zones_data, session_id)
except Exception as e: except Exception as e:
logger.warning("Pipe autocorrect failed: %s", e) logger.warning("Pipe autocorrect failed: %s", e)
@@ -132,10 +132,10 @@ def _detect_dictionary(
margin_strip_detected: bool, margin_strip_detected: bool,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Run dictionary detection on the assembled grid.""" """Run dictionary detection on the assembled grid."""
from cv_layout import _score_dictionary_signals from ocr.layout.layout import _score_dictionary_signals
dict_detection: Dict[str, Any] = {"is_dictionary": False, "confidence": 0.0} dict_detection: Dict[str, Any] = {"is_dictionary": False, "confidence": 0.0}
try: try:
from cv_vocab_types import ColumnGeometry from ocr.types import ColumnGeometry
for z in zones_data: for z in zones_data:
zone_cells = z.get("cells", []) zone_cells = z.get("cells", [])
zone_cols = z.get("columns", []) zone_cols = z.get("columns", [])
@@ -222,7 +222,7 @@ def _insert_syllable_dividers(
if _syllable_eligible: if _syllable_eligible:
try: try:
from cv_syllable_detect import insert_syllable_dividers from ocr.detect.syllable.detect import insert_syllable_dividers
force_syllables = (syllable_mode in ("all", "de", "en")) force_syllables = (syllable_mode in ("all", "de", "en"))
syllable_insertions = insert_syllable_dividers( syllable_insertions = insert_syllable_dividers(
zones_data, img_bgr, session_id, zones_data, img_bgr, session_id,
@@ -241,7 +241,7 @@ def _split_merged_words(
) -> None: ) -> None:
"""Split merged words using dictionary lookup.""" """Split merged words using dictionary lookup."""
try: try:
from cv_review import _try_split_merged_word, _SPELL_AVAILABLE from ocr.review.review import _try_split_merged_word, _SPELL_AVAILABLE
if not _SPELL_AVAILABLE: if not _SPELL_AVAILABLE:
return return
split_count = 0 split_count = 0
@@ -307,7 +307,7 @@ def _run_spell_checker(
) -> None: ) -> None:
"""Run SmartSpellChecker on all cells.""" """Run SmartSpellChecker on all cells."""
try: try:
from smart_spell import SmartSpellChecker from ocr.spell.smart_spell import SmartSpellChecker
_ssc = SmartSpellChecker() _ssc = SmartSpellChecker()
spell_fix_count = 0 spell_fix_count = 0
@@ -10,8 +10,8 @@ import logging
import re import re
from typing import Any, Dict, List, Optional, Set, Tuple from typing import Any, Dict, List, Optional, Set, Tuple
from cv_color_detect import detect_word_colors from ocr.detect.color_detect import detect_word_colors
from cv_ocr_engines import ( from ocr.engines.engines import (
fix_cell_phonetics, fix_ipa_continuation_cell, _text_has_garbled_ipa, fix_cell_phonetics, fix_ipa_continuation_cell, _text_has_garbled_ipa,
_lookup_ipa, _lookup_ipa,
) )
@@ -207,7 +207,7 @@ def _run_ipa_correction(
# --- German IPA (wiki-pronunciation-dict + epitran) --- # --- German IPA (wiki-pronunciation-dict + epitran) ---
if de_ipa_target_cols: if de_ipa_target_cols:
from cv_ipa_german import insert_german_ipa from ocr.ipa_german import insert_german_ipa
insert_german_ipa(all_cells, de_ipa_target_cols) insert_german_ipa(all_cells, de_ipa_target_cols)
ipa_target_cols = en_ipa_target_cols | de_ipa_target_cols ipa_target_cols = en_ipa_target_cols | de_ipa_target_cols
+5 -5
View File
@@ -11,11 +11,11 @@ from typing import Any, Dict, List, Optional
import cv2 import cv2
import numpy as np import numpy as np
from cv_box_detect import detect_boxes, split_page_into_zones from ocr.detect.box_detect import detect_boxes, split_page_into_zones
from cv_graphic_detect import detect_graphic_elements from ocr.detect.graphic_detect import detect_graphic_elements
from cv_color_detect import recover_colored_text from ocr.detect.color_detect import recover_colored_text
from cv_vocab_types import PageZone from ocr.types import PageZone
from ocr_pipeline_session_store import get_session_image from ocr.pipeline.session_store import get_session_image
from grid.editor.filters import ( from grid.editor.filters import (
_filter_border_strip_words, _filter_border_strip_words,
+2 -2
View File
@@ -8,7 +8,7 @@ The actual endpoints live in:
- grid_editor_api_unified.py (build-unified-grid, unified-grid) - grid_editor_api_unified.py (build-unified-grid, unified-grid)
This module re-exports the combined router and key symbols so that This module re-exports the combined router and key symbols so that
existing `from grid_editor_api import router` / `from grid_editor_api import _build_grid_core` existing `from grid.editor.api import router` / `from grid.editor.api import _build_grid_core`
continue to work unchanged. continue to work unchanged.
""" """
@@ -20,7 +20,7 @@ from .api_box import router as _box_router
from .api_unified import router as _unified_router from .api_unified import router as _unified_router
# Re-export _build_grid_core so callers that do # Re-export _build_grid_core so callers that do
# `from grid_editor_api import _build_grid_core` keep working. # `from grid.editor.api import _build_grid_core` keep working.
from grid.build.core import _build_grid_core # noqa: F401 from grid.build.core import _build_grid_core # noqa: F401
# Merge all sub-routers into one combined router # Merge all sub-routers into one combined router
@@ -7,7 +7,7 @@ import logging
from fastapi import APIRouter, HTTPException, Request from fastapi import APIRouter, HTTPException, Request
from .filters import _words_in_zone from .filters import _words_in_zone
from ocr_pipeline_session_store import ( from ocr.pipeline.session_store import (
get_session_db, get_session_db,
update_session_db, update_session_db,
) )
@@ -76,7 +76,7 @@ async def build_box_grids(session_id: str, request: Request):
pass pass
layout_overrides = body.get("overrides", {}) layout_overrides = body.get("overrides", {})
from cv_box_layout import build_box_zone_grid from ocr.detect.box_layout import build_box_zone_grid
img_w = grid_data.get("image_width", 0) or word_result.get("image_width", 0) img_w = grid_data.get("image_width", 0) or word_result.get("image_width", 0)
img_h = grid_data.get("image_height", 0) or word_result.get("image_height", 0) img_h = grid_data.get("image_height", 0) or word_result.get("image_height", 0)
@@ -119,7 +119,7 @@ async def build_box_grids(session_id: str, request: Request):
# Apply SmartSpellChecker to all box cells # Apply SmartSpellChecker to all box cells
try: try:
from smart_spell import SmartSpellChecker from ocr.spell.smart_spell import SmartSpellChecker
ssc = SmartSpellChecker() ssc = SmartSpellChecker()
for cell in box_grid.get("cells", []): for cell in box_grid.get("cells", []):
text = cell.get("text", "") text = cell.get("text", "")
@@ -7,11 +7,11 @@ import logging
from fastapi import APIRouter, HTTPException, Query, Request from fastapi import APIRouter, HTTPException, Query, Request
from grid.build.core import _build_grid_core from grid.build.core import _build_grid_core
from ocr_pipeline_session_store import ( from ocr.pipeline.session_store import (
get_session_db, get_session_db,
update_session_db, update_session_db,
) )
from ocr_pipeline_common import ( from ocr.pipeline.common import (
_cache, _cache,
_load_session_to_cache, _load_session_to_cache,
_get_cached, _get_cached,
@@ -60,7 +60,7 @@ async def build_grid(
# Save automatic grid snapshot for later comparison with manual corrections # Save automatic grid snapshot for later comparison with manual corrections
# Lazy import to avoid circular dependency with ocr_pipeline_regression # Lazy import to avoid circular dependency with ocr_pipeline_regression
from ocr_pipeline_regression import _build_reference_snapshot from ocr.pipeline.regression import _build_reference_snapshot
wr = session.get("word_result") or {} wr = session.get("word_result") or {}
engine = wr.get("ocr_engine", "") engine = wr.get("ocr_engine", "")
@@ -134,7 +134,7 @@ async def rerun_ocr_and_build_grid(
# 2. Scan quality assessment # 2. Scan quality assessment
scan_quality_info = {} scan_quality_info = {}
try: try:
from scan_quality import score_scan_quality from ocr.pipeline.scan_quality import score_scan_quality
quality_report = score_scan_quality(ocr_input) quality_report = score_scan_quality(ocr_input)
scan_quality_info = quality_report.to_dict() scan_quality_info = quality_report.to_dict()
actual_min_conf = min_conf if min_conf > 0 else quality_report.recommended_min_conf actual_min_conf = min_conf if min_conf > 0 else quality_report.recommended_min_conf
@@ -146,7 +146,7 @@ async def rerun_ocr_and_build_grid(
is_degraded = scan_quality_info.get("is_degraded", False) is_degraded = scan_quality_info.get("is_degraded", False)
if enhance and is_degraded: if enhance and is_degraded:
try: try:
from ocr_image_enhance import enhance_for_ocr from ocr.image_enhance import enhance_for_ocr
ocr_input = enhance_for_ocr(ocr_input, is_degraded=True) ocr_input = enhance_for_ocr(ocr_input, is_degraded=True)
logger.info("rerun-ocr: CLAHE enhancement applied") logger.info("rerun-ocr: CLAHE enhancement applied")
except Exception as e: except Exception as e:
@@ -159,8 +159,8 @@ async def rerun_ocr_and_build_grid(
# RapidOCR # RapidOCR
rapid_words = [] rapid_words = []
try: try:
from cv_ocr_engines import ocr_region_rapid from ocr.engines.engines import ocr_region_rapid
from cv_vocab_types import PageRegion from ocr.types import PageRegion
full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h) full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h)
rapid_words = ocr_region_rapid(ocr_input, full_region) or [] rapid_words = ocr_region_rapid(ocr_input, full_region) or []
except Exception as e: except Exception as e:
@@ -182,7 +182,7 @@ async def rerun_ocr_and_build_grid(
}) })
# 5. Merge OCR results # 5. Merge OCR results
from ocr_pipeline_ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words from ocr.pipeline.ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else [] rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else []
if rapid_split or tess_words: if rapid_split or tess_words:
merged_words = _merge_paddle_tesseract(rapid_split, tess_words) merged_words = _merge_paddle_tesseract(rapid_split, tess_words)
@@ -207,7 +207,7 @@ async def rerun_ocr_and_build_grid(
vision_applied = False vision_applied = False
if vision_fusion: if vision_fusion:
try: try:
from vision_ocr_fusion import vision_fuse_ocr from ocr.pipeline.vision_fusion import vision_fuse_ocr
category = doc_category or session.get("document_category") or "vokabelseite" category = doc_category or session.get("document_category") or "vokabelseite"
logger.info(f"rerun-ocr: running Vision-LLM fusion (category={category})") logger.info(f"rerun-ocr: running Vision-LLM fusion (category={category})")
merged_words = await vision_fuse_ocr(ocr_input, merged_words, category) merged_words = await vision_fuse_ocr(ocr_input, merged_words, category)
@@ -6,7 +6,7 @@ import logging
from fastapi import APIRouter, HTTPException, Request from fastapi import APIRouter, HTTPException, Request
from ocr_pipeline_session_store import ( from ocr.pipeline.session_store import (
get_session_db, get_session_db,
update_session_db, update_session_db,
) )
@@ -35,7 +35,7 @@ async def gutter_repair(session_id: str):
detail="No grid data. Run build-grid first.", detail="No grid data. Run build-grid first.",
) )
from cv_gutter_repair import analyse_grid_for_gutter_repair from ocr.gutter.repair import analyse_grid_for_gutter_repair
image_width = grid_data.get("image_width", 0) image_width = grid_data.get("image_width", 0)
result = analyse_grid_for_gutter_repair(grid_data, image_width=image_width) result = analyse_grid_for_gutter_repair(grid_data, image_width=image_width)
@@ -86,7 +86,7 @@ async def gutter_repair_apply(session_id: str, request: Request):
# Allows the user to pick a different correction from the alternatives list # Allows the user to pick a different correction from the alternatives list
text_overrides = body.get("text_overrides", {}) text_overrides = body.get("text_overrides", {})
from cv_gutter_repair import apply_gutter_suggestions from ocr.gutter.repair import apply_gutter_suggestions
suggestions = gutter_result.get("suggestions", []) suggestions = gutter_result.get("suggestions", [])
@@ -6,7 +6,7 @@ import logging
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from ocr_pipeline_session_store import ( from ocr.pipeline.session_store import (
get_session_db, get_session_db,
update_session_db, update_session_db,
) )
@@ -32,7 +32,7 @@ async def build_unified_grid_endpoint(session_id: str):
if not grid_data: if not grid_data:
raise HTTPException(status_code=400, detail="No grid data. Run build-grid first.") raise HTTPException(status_code=400, detail="No grid data. Run build-grid first.")
from unified_grid import build_unified_grid from grid.unified import build_unified_grid
result = build_unified_grid( result = build_unified_grid(
zones=grid_data.get("zones", []), zones=grid_data.get("zones", []),
@@ -8,7 +8,7 @@ import logging
import re import re
from typing import Dict, List, Optional from typing import Dict, List, Optional
from cv_ocr_engines import _text_has_garbled_ipa from ocr.engines.engines import _text_has_garbled_ipa
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -394,7 +394,7 @@ def _detect_colspan_cells(
if len(columns) < 2 or not zone_words or not rows: if len(columns) < 2 or not zone_words or not rows:
return cells return cells
from cv_words_first import _assign_word_to_row from ocr.words_first import _assign_word_to_row
# Column boundaries (midpoints between adjacent columns) # Column boundaries (midpoints between adjacent columns)
col_boundaries = [] col_boundaries = []
@@ -2,7 +2,7 @@
Grid Editor helper functions — barrel re-export module. Grid Editor helper functions — barrel re-export module.
This file re-exports all public symbols from the split sub-modules This file re-exports all public symbols from the split sub-modules
so that existing ``from grid_editor_helpers import ...`` statements so that existing ``from grid.editor.helpers import ...`` statements
continue to work without changes. continue to work without changes.
Sub-modules: Sub-modules:
@@ -55,4 +55,4 @@ from .zones import ( # noqa: F401
) )
# --- Re-export from cv_words_first (used by cv_box_layout.py) --------------- # --- Re-export from cv_words_first (used by cv_box_layout.py) ---------------
from cv_words_first import _cluster_rows # noqa: F401 from ocr.words_first import _cluster_rows # noqa: F401
+2 -2
View File
@@ -12,8 +12,8 @@ import logging
import re import re
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from cv_vocab_types import PageZone from ocr.types import PageZone
from cv_words_first import _cluster_rows, _build_cells from ocr.words_first import _cluster_rows, _build_cells
from .columns import ( from .columns import (
_cluster_columns_by_alignment, _cluster_columns_by_alignment,
@@ -2,5 +2,5 @@
korrektur package exam correction, EH templates, PDF export. korrektur package exam correction, EH templates, PDF export.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from eh_pipeline import ...`` etc. via the shim files in backend/. ``from korrektur.eh_pipeline import ...`` etc. via the shim files in backend/.
""" """
+1 -1
View File
@@ -2,5 +2,5 @@
metrics package PostgreSQL metrics database operations. metrics package PostgreSQL metrics database operations.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from metrics_db import ...`` etc. via the shim files in backend/. ``from metrics.db import ...`` etc. via the shim files in backend/.
""" """
+1 -1
View File
@@ -2,7 +2,7 @@
OCR package restructured from cv_* flat modules. OCR package restructured from cv_* flat modules.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from cv_layout import ...`` etc. via the shim files in backend/. ``from ocr.layout.layout import ...`` etc. via the shim files in backend/.
""" """
from .types import * # noqa: F401,F403 from .types import * # noqa: F401,F403
@@ -149,7 +149,7 @@ def build_box_zone_grid(
Returns the same format as _build_zone_grid (columns, rows, cells, header_rows). Returns the same format as _build_zone_grid (columns, rows, cells, header_rows).
""" """
from grid_editor_helpers import _build_zone_grid from grid.editor.helpers import _build_zone_grid
if not zone_words: if not zone_words:
return { return {
+1 -1
View File
@@ -48,7 +48,7 @@ except ImportError:
pass pass
try: try:
from training_export_service import ( # noqa: F401 from training.export_service import ( # noqa: F401
TrainingExportService, TrainingExportService,
TrainingSample, TrainingSample,
get_training_export_service, get_training_export_service,
@@ -57,7 +57,7 @@ except ImportError:
# Try to import Training Export Service # Try to import Training Export Service
try: try:
from training_export_service import ( from training.export_service import (
TrainingExportService, TrainingExportService,
TrainingSample, TrainingSample,
get_training_export_service, get_training_export_service,
@@ -20,7 +20,7 @@ from typing import Optional, List
from datetime import datetime from datetime import datetime
import uuid import uuid
from metrics_db import ( from metrics.db import (
create_ocr_labeling_session, create_ocr_labeling_session,
get_ocr_labeling_sessions, get_ocr_labeling_sessions,
get_ocr_labeling_session, get_ocr_labeling_session,
@@ -17,7 +17,7 @@ from typing import Optional, List
import uuid import uuid
import os import os
from metrics_db import ( from metrics.db import (
get_ocr_labeling_session, get_ocr_labeling_session,
add_ocr_labeling_item, add_ocr_labeling_item,
get_ocr_labeling_item, get_ocr_labeling_item,
@@ -42,7 +42,7 @@ except ImportError:
pass pass
try: try:
from training_export_service import TrainingSample, get_training_export_service from training.export_service import TrainingSample, get_training_export_service
except ImportError: except ImportError:
pass pass
@@ -271,7 +271,7 @@ async def run_ocr_for_item(item_id: str):
if ocr_text is None: if ocr_text is None:
raise HTTPException(status_code=500, detail="OCR failed") raise HTTPException(status_code=500, detail="OCR failed")
from metrics_db import get_pool from metrics.db import get_pool
pool = await get_pool() pool = await get_pool()
if pool: if pool:
async with pool.acquire() as conn: async with pool.acquire() as conn:
+1 -1
View File
@@ -15,7 +15,7 @@ from .reprocess import router as _reprocess_router
from .auto_steps import router as _steps_router from .auto_steps import router as _steps_router
# Combine both sub-routers into a single router for backwards compatibility. # Combine both sub-routers into a single router for backwards compatibility.
# The consumer imports `from ocr_pipeline_auto import router as _auto_router`. # The consumer imports `from ocr.pipeline.auto import router as _auto_router`.
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"]) router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
router.include_router(_reprocess_router) router.include_router(_reprocess_router)
router.include_router(_steps_router) router.include_router(_steps_router)
@@ -17,7 +17,7 @@ import numpy as np
from fastapi import APIRouter, HTTPException, Request from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from cv_vocab_pipeline import ( from ocr.cv_pipeline import (
OLLAMA_REVIEW_MODEL, OLLAMA_REVIEW_MODEL,
PageRegion, PageRegion,
RowGeometry, RowGeometry,
@@ -14,7 +14,7 @@ from typing import Dict, List
import cv2 import cv2
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from cv_vocab_pipeline import ( from ocr.cv_pipeline import (
_detect_header_footer_gaps, _detect_header_footer_gaps,
_detect_sub_columns, _detect_sub_columns,
classify_column_types, classify_column_types,
@@ -12,7 +12,7 @@ from datetime import datetime
import cv2 import cv2
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from cv_vocab_pipeline import ( from ocr.cv_pipeline import (
create_ocr_image, create_ocr_image,
deskew_image, deskew_image,
deskew_image_by_word_alignment, deskew_image_by_word_alignment,
@@ -17,7 +17,7 @@ from typing import Any, Dict
import cv2 import cv2
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from cv_vocab_pipeline import ( from ocr.cv_pipeline import (
_apply_shear, _apply_shear,
create_ocr_image, create_ocr_image,
dewarp_image, dewarp_image,
@@ -8,7 +8,7 @@ This module was split into:
- ocr_pipeline_columns.py (Column detection + ground truth) - ocr_pipeline_columns.py (Column detection + ground truth)
The `router` object is assembled here by including all sub-routers. The `router` object is assembled here by including all sub-routers.
Importers that did `from ocr_pipeline_geometry import router` continue to work. Importers that did `from ocr.pipeline.geometry import router` continue to work.
""" """
from fastapi import APIRouter from fastapi import APIRouter
@@ -243,7 +243,7 @@ async def recognize_from_session(req: HTRSessionRequest):
Set use_clean=true to prefer the clean image (after handwriting removal step). Set use_clean=true to prefer the clean image (after handwriting removal step).
This is useful when you want to do HTR on isolated handwriting regions. This is useful when you want to do HTR on isolated handwriting regions.
""" """
from ocr_pipeline_session_store import get_session_db, get_session_image from ocr.pipeline.session_store import get_session_db, get_session_image
session = await get_session_db(req.session_id) session = await get_session_db(req.session_id)
if not session: if not session:
@@ -15,7 +15,7 @@ from typing import Dict, List
from fastapi import APIRouter, HTTPException, Request from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from cv_vocab_pipeline import ( from ocr.cv_pipeline import (
OLLAMA_REVIEW_MODEL, OLLAMA_REVIEW_MODEL,
llm_review_entries, llm_review_entries,
llm_review_entries_streaming, llm_review_entries_streaming,
@@ -15,7 +15,7 @@ import cv2
import numpy as np import numpy as np
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from cv_words_first import build_grid_from_words from ocr.words_first import build_grid_from_words
from .common import _cache, _append_pipeline_log from .common import _cache, _append_pipeline_log
from .session_store import get_session_image, update_session_db from .session_store import get_session_image, update_session_db
@@ -136,7 +136,7 @@ async def paddle_kombi(session_id: str):
img_png, img_bgr = await _load_session_image(session_id) img_png, img_bgr = await _load_session_image(session_id)
img_h, img_w = img_bgr.shape[:2] img_h, img_w = img_bgr.shape[:2]
from cv_ocr_engines import ocr_region_paddle from ocr.engines.engines import ocr_region_paddle
t0 = time.time() t0 = time.time()
@@ -202,8 +202,8 @@ async def rapid_kombi(session_id: str):
img_png, img_bgr = await _load_session_image(session_id) img_png, img_bgr = await _load_session_image(session_id)
img_h, img_w = img_bgr.shape[:2] img_h, img_w = img_bgr.shape[:2]
from cv_ocr_engines import ocr_region_rapid from ocr.engines.engines import ocr_region_rapid
from cv_vocab_types import PageRegion from ocr.types import PageRegion
t0 = time.time() t0 = time.time()
@@ -9,7 +9,7 @@ from typing import Any, Dict
import cv2 import cv2
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from cv_vocab_pipeline import detect_and_fix_orientation from ocr.cv_pipeline import detect_and_fix_orientation
from .page_crop import detect_page_splits from .page_crop import detect_page_splits
from .session_store import update_session_db from .session_store import update_session_db
@@ -17,8 +17,8 @@ from fastapi.responses import Response
from .common import _get_base_image_png from .common import _get_base_image_png
from .session_store import get_session_db from .session_store import get_session_db
from cv_color_detect import _COLOR_HEX, _COLOR_RANGES from ocr.detect.color_detect import _COLOR_HEX, _COLOR_RANGES
from cv_box_detect import detect_boxes, split_page_into_zones from ocr.detect.box_detect import detect_boxes, split_page_into_zones
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -19,7 +19,7 @@ from .reconstruction import router as _reconstruction_router
from .validation import router as _validation_router from .validation import router as _validation_router
# Composite router — drop-in replacement for the old monolithic router. # Composite router — drop-in replacement for the old monolithic router.
# ocr_pipeline_api.py imports ``from ocr_pipeline_postprocess import router``. # ocr_pipeline_api.py imports ``from ocr.pipeline.postprocess import router``.
router = APIRouter() router = APIRouter()
router.include_router(_llm_review_router) router.include_router(_llm_review_router)
router.include_router(_reconstruction_router) router.include_router(_reconstruction_router)
@@ -14,7 +14,7 @@ from typing import Any, Dict, Optional
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from grid_editor_api import _build_grid_core from grid.editor.api import _build_grid_core
from .session_store import ( from .session_store import (
get_session_db, get_session_db,
list_ground_truth_sessions_db, list_ground_truth_sessions_db,
+2 -2
View File
@@ -17,7 +17,7 @@ import cv2
import numpy as np import numpy as np
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from cv_vocab_pipeline import ( from ocr.cv_pipeline import (
create_ocr_image, create_ocr_image,
detect_column_geometry, detect_column_geometry,
detect_row_geometry, detect_row_geometry,
@@ -120,7 +120,7 @@ async def detect_rows(session_id: str):
# merges rows. Word-grouping directly clusters words by Y proximity, # merges rows. Word-grouping directly clusters words by Y proximity,
# which is more robust for these cases. # which is more robust for these cases.
if is_sub_session and word_dicts: if is_sub_session and word_dicts:
from cv_layout import _build_rows_from_word_grouping from ocr.layout.layout import _build_rows_from_word_grouping
rows = _build_rows_from_word_grouping( rows = _build_rows_from_word_grouping(
word_dicts, left_x, right_x, top_y, bottom_y, word_dicts, left_x, right_x, top_y, bottom_y,
right_x - left_x, bottom_y - top_y, right_x - left_x, bottom_y - top_y,
@@ -15,7 +15,7 @@ import cv2
import numpy as np import numpy as np
from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile
from cv_vocab_pipeline import render_image_high_res, render_pdf_high_res from ocr.cv_pipeline import render_image_high_res, render_pdf_high_res
from .common import ( from .common import (
VALID_DOCUMENT_CATEGORIES, VALID_DOCUMENT_CATEGORIES,
UpdateSessionRequest, UpdateSessionRequest,
@@ -17,7 +17,7 @@ import numpy as np
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import Response from fastapi.responses import Response
from cv_vocab_pipeline import create_ocr_image, detect_document_type from ocr.cv_pipeline import create_ocr_image, detect_document_type
from .common import ( from .common import (
VALID_DOCUMENT_CATEGORIES, VALID_DOCUMENT_CATEGORIES,
_append_pipeline_log, _append_pipeline_log,
@@ -15,9 +15,9 @@ import numpy as np
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
from cv_box_detect import detect_boxes from ocr.detect.box_detect import detect_boxes
from cv_color_detect import _COLOR_RANGES, _COLOR_HEX from ocr.detect.color_detect import _COLOR_RANGES, _COLOR_HEX
from cv_graphic_detect import detect_graphic_elements from ocr.detect.graphic_detect import detect_graphic_elements
from .session_store import ( from .session_store import (
get_session_db, get_session_db,
update_session_db, update_session_db,
@@ -100,7 +100,7 @@ async def detect_structure(session_id: str):
) )
# --- Zone splitting --- # --- Zone splitting ---
from cv_box_detect import split_page_into_zones as _split_zones from ocr.detect.box_detect import split_page_into_zones as _split_zones
zones = _split_zones(content_x, content_y, content_w_px, content_h_px, boxes) zones = _split_zones(content_x, content_y, content_w_px, content_h_px, boxes)
# --- Color region sampling --- # --- Color region sampling ---
@@ -123,7 +123,7 @@ async def detect_structure(session_id: str):
med_s = float(np.median(roi_hsv[:, :, 1])) med_s = float(np.median(roi_hsv[:, :, 1]))
med_v = float(np.median(roi_hsv[:, :, 2])) med_v = float(np.median(roi_hsv[:, :, 2]))
if med_s > 15: if med_s > 15:
from cv_color_detect import _hue_to_color_name from ocr.detect.color_detect import _hue_to_color_name
bg_name = _hue_to_color_name(med_h) bg_name = _hue_to_color_name(med_h)
bg_hex = _COLOR_HEX.get(bg_name, "#6b7280") bg_hex = _COLOR_HEX.get(bg_name, "#6b7280")
else: else:
@@ -23,7 +23,7 @@ import numpy as np
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
from cv_words_first import build_grid_from_words from ocr.words_first import build_grid_from_words
from .session_store import ( from .session_store import (
get_session_db, get_session_db,
get_session_image, get_session_image,
@@ -72,7 +72,7 @@ async def paddle_direct(session_id: str):
img_h, img_w = img_bgr.shape[:2] img_h, img_w = img_bgr.shape[:2]
from cv_ocr_engines import ocr_region_paddle from ocr.engines.engines import ocr_region_paddle
t0 = time.time() t0 = time.time()
word_dicts = await ocr_region_paddle(img_bgr, region=None) word_dicts = await ocr_region_paddle(img_bgr, region=None)
@@ -17,7 +17,7 @@ import numpy as np
from fastapi import APIRouter, HTTPException, Request from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from cv_vocab_pipeline import ( from ocr.cv_pipeline import (
PageRegion, PageRegion,
RowGeometry, RowGeometry,
_cells_to_vocab_entries, _cells_to_vocab_entries,
@@ -27,7 +27,7 @@ from cv_vocab_pipeline import (
create_ocr_image, create_ocr_image,
detect_column_geometry, detect_column_geometry,
) )
from cv_words_first import build_grid_from_words from ocr.words_first import build_grid_from_words
from .session_store import ( from .session_store import (
get_session_db, get_session_db,
update_session_db, update_session_db,
@@ -223,7 +223,7 @@ async def _words_first_path(
img_h, img_w = dewarped_bgr.shape[:2] img_h, img_w = dewarped_bgr.shape[:2]
if engine == "paddle": if engine == "paddle":
from cv_ocr_engines import ocr_region_paddle from ocr.engines.engines import ocr_region_paddle
wf_word_dicts = await ocr_region_paddle(dewarped_bgr, region=None) wf_word_dicts = await ocr_region_paddle(dewarped_bgr, region=None)
cached["_paddle_word_dicts"] = wf_word_dicts cached["_paddle_word_dicts"] = wf_word_dicts
else: else:
@@ -15,7 +15,7 @@ from typing import Any, Dict, List
import numpy as np import numpy as np
from fastapi import Request from fastapi import Request
from cv_vocab_pipeline import ( from ocr.cv_pipeline import (
PageRegion, PageRegion,
RowGeometry, RowGeometry,
_cells_to_vocab_entries, _cells_to_vocab_entries,
+1 -1
View File
@@ -219,7 +219,7 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
# Use SmartSpellChecker if available # Use SmartSpellChecker if available
_smart = None _smart = None
try: try:
from smart_spell import SmartSpellChecker from ocr.spell.smart_spell import SmartSpellChecker
_smart = SmartSpellChecker() _smart = SmartSpellChecker()
logger.debug("spell_review: using SmartSpellChecker") logger.debug("spell_review: using SmartSpellChecker")
except Exception: except Exception:
+1 -1
View File
@@ -159,7 +159,7 @@ class _SmartSpellCoreBase:
return True return True
# Also accept known abbreviations (sth, sb, adj, etc.) # Also accept known abbreviations (sth, sb, adj, etc.)
try: try:
from cv_ocr_engines import _KNOWN_ABBREVIATIONS from ocr.engines.engines import _KNOWN_ABBREVIATIONS
if w in _KNOWN_ABBREVIATIONS: if w in _KNOWN_ABBREVIATIONS:
return True return True
except ImportError: except ImportError:
+2 -2
View File
@@ -35,7 +35,7 @@ class SmartSpellChecker(_SmartSpellCoreBase):
""" """
# Import known abbreviations for vocabulary context # Import known abbreviations for vocabulary context
try: try:
from cv_ocr_engines import _KNOWN_ABBREVIATIONS from ocr.engines.engines import _KNOWN_ABBREVIATIONS
except ImportError: except ImportError:
_KNOWN_ABBREVIATIONS = set() _KNOWN_ABBREVIATIONS = set()
@@ -149,7 +149,7 @@ class SmartSpellChecker(_SmartSpellCoreBase):
# --- Pass 1: Boundary repair between adjacent unknown words --- # --- Pass 1: Boundary repair between adjacent unknown words ---
# Import abbreviations for the heuristic below # Import abbreviations for the heuristic below
try: try:
from cv_ocr_engines import _KNOWN_ABBREVIATIONS as _ABBREVS from ocr.engines.engines import _KNOWN_ABBREVIATIONS as _ABBREVS
except ImportError: except ImportError:
_ABBREVS = set() _ABBREVS = set()
+1 -1
View File
@@ -2,5 +2,5 @@
training package training API, simulation, export, TrOCR. training package training API, simulation, export, TrOCR.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from training_api import ...`` etc. via the shim files in backend/. ``from training.api import ...`` etc. via the shim files in backend/.
""" """
+1 -1
View File
@@ -211,7 +211,7 @@ async def delete_model_version(version_id: str):
@router.get("/dataset/stats", response_model=dict) @router.get("/dataset/stats", response_model=dict)
async def get_dataset_stats(): async def get_dataset_stats():
"""Get statistics about the training dataset.""" """Get statistics about the training dataset."""
from metrics_db import get_zeugnis_stats from metrics.db import get_zeugnis_stats
zeugnis_stats = await get_zeugnis_stats() zeugnis_stats = await get_zeugnis_stats()
+1 -1
View File
@@ -2,5 +2,5 @@
Vocab package restructured from vocab_* flat modules. Vocab package restructured from vocab_* flat modules.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from vocab_worksheet_api import ...`` etc. via the shim files in backend/. ``from vocab.worksheet.api import ...`` etc. via the shim files in backend/.
""" """
@@ -27,7 +27,7 @@ from .generation import convert_pdf_page_to_image
# Try to import Tesseract extractor # Try to import Tesseract extractor
try: try:
from tesseract_vocab_extractor import ( from ocr.engines.tesseract_extractor import (
extract_bounding_boxes, TESSERACT_AVAILABLE, extract_bounding_boxes, TESSERACT_AVAILABLE,
) )
except ImportError: except ImportError:
@@ -264,7 +264,7 @@ async def extract_with_boxes(session_id: str, page_number: int):
# Deskew image before OCR # Deskew image before OCR
deskew_angle = 0.0 deskew_angle = 0.0
try: try:
from cv_vocab_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE from ocr.cv_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE
if CV2_AVAILABLE: if CV2_AVAILABLE:
image_data, deskew_angle = deskew_image_by_word_alignment(image_data) image_data, deskew_angle = deskew_image_by_word_alignment(image_data)
logger.info(f"Deskew: {deskew_angle:.2f}° for page {page_number}") logger.info(f"Deskew: {deskew_angle:.2f}° for page {page_number}")
@@ -23,7 +23,7 @@ from .generation import convert_pdf_page_to_image
# Try to import Tesseract extractor # Try to import Tesseract extractor
try: try:
from tesseract_vocab_extractor import ( from ocr.engines.tesseract_extractor import (
run_tesseract_pipeline, run_tesseract_pipeline,
match_positions_to_vocab, TESSERACT_AVAILABLE, match_positions_to_vocab, TESSERACT_AVAILABLE,
) )
@@ -32,7 +32,7 @@ except ImportError:
# Try to import CV Pipeline # Try to import CV Pipeline
try: try:
from cv_vocab_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE from ocr.cv_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE
except ImportError: except ImportError:
CV_PIPELINE_AVAILABLE = False CV_PIPELINE_AVAILABLE = False
@@ -328,7 +328,7 @@ async def analyze_grid(session_id: str, page_number: int, use_tesseract: bool =
# Run Tesseract if not already cached # Run Tesseract if not already cached
if not tess_page_data: if not tess_page_data:
logger.info("Running Tesseract for grid analysis (not cached)") logger.info("Running Tesseract for grid analysis (not cached)")
from tesseract_vocab_extractor import run_tesseract_pipeline as _run_tess from ocr.engines.tesseract_extractor import run_tesseract_pipeline as _run_tess
tess_page_data = await _run_tess(image_data, lang="eng+deu") tess_page_data = await _run_tess(image_data, lang="eng+deu")
session[f"tesseract_page_{page_number}"] = tess_page_data session[f"tesseract_page_{page_number}"] = tess_page_data
session["tesseract_words"] = tess_page_data.get("words", []) session["tesseract_words"] = tess_page_data.get("words", [])
+15 -15
View File
@@ -39,7 +39,7 @@ except ImportError:
# CV pipeline helpers # CV pipeline helpers
try: try:
from cv_vocab_pipeline import ( from ocr.cv_pipeline import (
deskew_two_pass, deskew_two_pass,
dewarp_image, dewarp_image,
detect_and_fix_orientation, detect_and_fix_orientation,
@@ -54,7 +54,7 @@ except ImportError:
_fix_phonetic_brackets = None # type: ignore[assignment] _fix_phonetic_brackets = None # type: ignore[assignment]
try: try:
from cv_cell_grid import ( from ocr.cell_grid.cell_grid import (
_merge_wrapped_rows, _merge_wrapped_rows,
_merge_phonetic_continuation_rows, _merge_phonetic_continuation_rows,
_merge_continuation_rows, _merge_continuation_rows,
@@ -65,17 +65,17 @@ except ImportError:
_merge_continuation_rows = None # type: ignore[assignment] _merge_continuation_rows = None # type: ignore[assignment]
try: try:
from cv_ocr_engines import ocr_region_rapid from ocr.engines.engines import ocr_region_rapid
except ImportError: except ImportError:
ocr_region_rapid = None # type: ignore[assignment] ocr_region_rapid = None # type: ignore[assignment]
try: try:
from cv_vocab_types import PageRegion from ocr.types import PageRegion
except ImportError: except ImportError:
PageRegion = None # type: ignore[assignment] PageRegion = None # type: ignore[assignment]
try: try:
from ocr_pipeline_ocr_merge import ( from ocr.pipeline.ocr_merge import (
_split_paddle_multi_words, _split_paddle_multi_words,
_merge_paddle_tesseract, _merge_paddle_tesseract,
_deduplicate_words, _deduplicate_words,
@@ -86,12 +86,12 @@ except ImportError:
_deduplicate_words = None # type: ignore[assignment] _deduplicate_words = None # type: ignore[assignment]
try: try:
from cv_words_first import build_grid_from_words from ocr.words_first import build_grid_from_words
except ImportError: except ImportError:
build_grid_from_words = None # type: ignore[assignment] build_grid_from_words = None # type: ignore[assignment]
try: try:
from ocr_pipeline_session_store import ( from ocr.pipeline.session_store import (
create_session_db as create_pipeline_session_db, create_session_db as create_pipeline_session_db,
update_session_db as update_pipeline_session_db, update_session_db as update_pipeline_session_db,
) )
@@ -173,7 +173,7 @@ async def _run_ocr_pipeline_for_page(
# 5. Content crop (removes scanner borders, gutter shadows) # 5. Content crop (removes scanner borders, gutter shadows)
t0 = _time.time() t0 = _time.time()
try: try:
from page_crop import detect_and_crop_page from ocr.pipeline.page_crop import detect_and_crop_page
cropped_bgr, crop_result = detect_and_crop_page(dewarped_bgr) cropped_bgr, crop_result = detect_and_crop_page(dewarped_bgr)
if crop_result.get("crop_applied"): if crop_result.get("crop_applied"):
dewarped_bgr = cropped_bgr dewarped_bgr = cropped_bgr
@@ -186,7 +186,7 @@ async def _run_ocr_pipeline_for_page(
# 5b. Scan quality assessment # 5b. Scan quality assessment
scan_quality_report = None scan_quality_report = None
try: try:
from scan_quality import score_scan_quality from ocr.pipeline.scan_quality import score_scan_quality
scan_quality_report = score_scan_quality(dewarped_bgr) scan_quality_report = score_scan_quality(dewarped_bgr)
except Exception as e: except Exception as e:
logger.warning(f" scan quality: failed ({e})") logger.warning(f" scan quality: failed ({e})")
@@ -200,7 +200,7 @@ async def _run_ocr_pipeline_for_page(
is_degraded = scan_quality_report.is_degraded if scan_quality_report else False is_degraded = scan_quality_report.is_degraded if scan_quality_report else False
if is_degraded and enable_enhance: if is_degraded and enable_enhance:
try: try:
from ocr_image_enhance import enhance_for_ocr from ocr.image_enhance import enhance_for_ocr
dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True) dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True)
logger.info(" enhancement: applied (degraded scan)") logger.info(" enhancement: applied (degraded scan)")
except Exception as e: except Exception as e:
@@ -212,8 +212,8 @@ async def _run_ocr_pipeline_for_page(
# RapidOCR (local ONNX) # RapidOCR (local ONNX)
try: try:
from cv_ocr_engines import ocr_region_rapid from ocr.engines.engines import ocr_region_rapid
from cv_vocab_types import PageRegion from ocr.types import PageRegion
full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h) full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h)
rapid_words = ocr_region_rapid(dewarped_bgr, full_region) or [] rapid_words = ocr_region_rapid(dewarped_bgr, full_region) or []
except Exception as e: except Exception as e:
@@ -243,8 +243,8 @@ async def _run_ocr_pipeline_for_page(
}) })
# Merge dual-engine results # Merge dual-engine results
from ocr_pipeline_ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words from ocr.pipeline.ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
from cv_words_first import build_grid_from_words from ocr.words_first import build_grid_from_words
rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else [] rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else []
if rapid_split or tess_words: if rapid_split or tess_words:
@@ -300,7 +300,7 @@ async def _run_ocr_pipeline_for_page(
# 8. Run full grid-build (with pipe-autocorrect, word-gap merge, etc.) # 8. Run full grid-build (with pipe-autocorrect, word-gap merge, etc.)
t0 = _time.time() t0 = _time.time()
try: try:
from grid_editor_api import _build_grid_core from grid.editor.api import _build_grid_core
session_data = { session_data = {
"word_result": word_result, "word_result": word_result,
} }
@@ -36,7 +36,7 @@ LOCAL_STORAGE_PATH = os.getenv("VOCAB_STORAGE_PATH", "/app/vocab-worksheets")
try: try:
import numpy as np import numpy as np
from cv_preprocessing import render_pdf_high_res, detect_and_fix_orientation from ocr.preprocessing.preprocessing import render_pdf_high_res, detect_and_fix_orientation
OCR_PIPELINE_AVAILABLE = True OCR_PIPELINE_AVAILABLE = True
except ImportError: except ImportError:
np = None # type: ignore[assignment] np = None # type: ignore[assignment]
@@ -2,5 +2,5 @@
worksheet package worksheet editor, NRU generator, cleanup. worksheet package worksheet editor, NRU generator, cleanup.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from worksheet_editor_api import ...`` etc. via the shim files in backend/. ``from worksheet.editor_api import ...`` etc. via the shim files in backend/.
""" """
@@ -366,7 +366,7 @@ async def reconstruct_document_from_session(request: ReconstructRequest):
async def get_available_sessions(): async def get_available_sessions():
"""Get list of available vocab sessions that can be reconstructed.""" """Get list of available vocab sessions that can be reconstructed."""
try: try:
from vocab_worksheet_api import _sessions from vocab.worksheet.api import _sessions
available = [] available = []
for session_id, session in _sessions.items(): for session_id, session in _sessions.items():
@@ -31,7 +31,7 @@ async def reconstruct_document_logic(request: ReconstructRequest) -> Reconstruct
Returns ReconstructResponse ready to send to the client. Returns ReconstructResponse ready to send to the client.
""" """
from fastapi import HTTPException from fastapi import HTTPException
from vocab_worksheet_api import _sessions, convert_pdf_page_to_image from vocab.worksheet.api import _sessions, convert_pdf_page_to_image
# Check if session exists # Check if session exists
if request.session_id not in _sessions: if request.session_id not in _sessions:
+1 -1
View File
@@ -2,5 +2,5 @@
zeugnis package certificate crawler, models, storage. zeugnis package certificate crawler, models, storage.
Backward-compatible re-exports: consumers can still use Backward-compatible re-exports: consumers can still use
``from zeugnis_api import ...`` etc. via the shim files in backend/. ``from zeugnis.api import ...`` etc. via the shim files in backend/.
""" """
+1 -1
View File
@@ -16,7 +16,7 @@ from .models import (
from .crawler import ( from .crawler import (
start_crawler, stop_crawler, get_crawler_status, start_crawler, stop_crawler, get_crawler_status,
) )
from metrics_db import ( from metrics.db import (
get_zeugnis_documents, get_zeugnis_stats, get_zeugnis_documents, get_zeugnis_stats,
log_zeugnis_event, get_pool, log_zeugnis_event, get_pool,
) )
@@ -15,7 +15,7 @@ from .models import (
BUNDESLAENDER, BUNDESLAENDER,
generate_id, get_training_allowed, get_bundesland_name, get_license_for_bundesland, generate_id, get_training_allowed, get_bundesland_name, get_license_for_bundesland,
) )
from metrics_db import ( from metrics.db import (
get_zeugnis_sources, upsert_zeugnis_source, get_pool, get_zeugnis_sources, upsert_zeugnis_source, get_pool,
) )
+1 -1
View File
@@ -31,7 +31,7 @@ async def start_crawler(bundesland: Optional[str] = None, source_id: Optional[st
async def run_crawler(): async def run_crawler():
try: try:
from metrics_db import get_pool from metrics.db import get_pool
pool = await get_pool() pool = await get_pool()
if pool: if pool:
+3 -3
View File
@@ -322,8 +322,8 @@ SEED_DATA: Dict[str, Dict[str, Any]] = {
async def populate_seed_data(): async def populate_seed_data():
"""Populate database with seed data.""" """Populate database with seed data."""
from metrics_db import get_pool, upsert_zeugnis_source from metrics.db import get_pool, upsert_zeugnis_source
from zeugnis_models import generate_id from zeugnis.models import generate_id
pool = await get_pool() pool = await get_pool()
if not pool: if not pool:
@@ -412,4 +412,4 @@ if __name__ == "__main__":
print("\n" + "=" * 60) print("\n" + "=" * 60)
print("To populate database, run:") print("To populate database, run:")
print(" python -c 'import asyncio; from zeugnis_seed_data import populate_seed_data; asyncio.run(populate_seed_data())'") print(" python -c 'import asyncio; from zeugnis.seed_data import populate_seed_data; asyncio.run(populate_seed_data())'")
+1 -1
View File
@@ -83,7 +83,7 @@ class ZeugnisCrawler:
# Initialize database connection # Initialize database connection
try: try:
from metrics_db import get_pool from metrics.db import get_pool
self.db_pool = await get_pool() self.db_pool = await get_pool()
except Exception as e: except Exception as e:
print(f"Failed to get database pool: {e}") print(f"Failed to get database pool: {e}")