Fix: Update all old-style imports inside packages to new paths
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 1m7s
CI / test-go-edu-search (push) Successful in 46s
CI / test-python-klausur (push) Failing after 2m32s
CI / test-python-agent-core (push) Successful in 33s
CI / test-nodejs-website (push) Successful in 34s

65 files in klausur-service packages + 3 in backend-lehrer packages
had stale imports referencing deleted shim modules.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-26 00:19:13 +02:00
parent 5f2ed44654
commit eecb5472dd
68 changed files with 132 additions and 132 deletions

View File

@@ -266,7 +266,7 @@ async def send_message(conversation_id: str, message: MessageBase):
if contact and contact.get("email"):
try:
from email_service import email_service
from services.email import email_service
result = email_service.send_messenger_notification(
to_email=contact["email"],

View File

@@ -363,7 +363,7 @@ def api_generate_story(unit_id: str, payload: StoryGeneratePayload):
raise HTTPException(status_code=404, detail="Lerneinheit nicht gefunden.")
try:
from story_generator import generate_story
from services.story_generator import generate_story
result = generate_story(
vocabulary=payload.vocabulary,
language=payload.language,

View File

@@ -22,7 +22,7 @@ from .db import (
get_all_pos,
VocabularyWord,
)
from learning_units import (
from units.learning import (
LearningUnitCreate,
create_learning_unit,
get_learning_unit,
@@ -343,7 +343,7 @@ async def api_translate_words(payload: TranslateRequest):
Uses local LLM (Ollama) for translation. Results are cached in the
vocabulary_words.translations JSONB field.
"""
from translation_service import translate_and_store
from services.translation import translate_and_store
if payload.target_language not in {"tr", "ar", "uk", "ru", "pl", "fr", "es"}:
raise HTTPException(status_code=400, detail=f"Sprache '{payload.target_language}' nicht unterstuetzt")

View File

@@ -2,5 +2,5 @@
admin package — admin APIs for NiBiS, RAG, templates.
Backward-compatible re-exports: consumers can still use
``from admin_api import ...`` etc. via the shim files in backend/.
``from admin.api import ...`` etc. via the shim files in backend/.
"""

View File

@@ -7,7 +7,7 @@ This module was split into:
- admin_templates.py (Legal templates ingestion, search)
The `router` object is assembled here by including all sub-routers.
Importers that did `from admin_api import router` continue to work.
Importers that did `from admin.api import router` continue to work.
"""
from fastapi import APIRouter

View File

@@ -28,7 +28,7 @@ except ImportError:
MINIO_AVAILABLE = False
try:
from metrics_db import (
from metrics.db import (
init_metrics_tables, store_feedback, log_search, log_upload,
calculate_metrics, get_recent_feedback, get_upload_history
)

View File

@@ -2,5 +2,5 @@
compliance package — compliance pipeline, RBAC/ABAC policy engine.
Backward-compatible re-exports: consumers can still use
``from compliance_models import ...`` etc. via the shim files in backend/.
``from compliance.models import ...`` etc. via the shim files in backend/.
"""

View File

@@ -2,7 +2,7 @@
Grid package — restructured from grid_* flat modules.
Backward-compatible re-exports: consumers can still use
``from grid_build_core import ...`` etc. via the shim files in backend/.
``from grid.build.core import ...`` etc. via the shim files in backend/.
Sub-packages:
- grid.build — grid construction pipeline (_build_grid_core and phases)

View File

@@ -9,7 +9,7 @@ import logging
import re
from typing import Any, Dict, List, Tuple
from cv_ocr_engines import (
from ocr.engines.engines import (
_words_to_reading_order_text, _group_words_into_lines, _lookup_ipa,
)

View File

@@ -10,7 +10,7 @@ import logging
import re
from typing import Any, Dict, List
from cv_ocr_engines import _words_to_reading_order_text
from ocr.engines.engines import _words_to_reading_order_text
logger = logging.getLogger(__name__)

View File

@@ -69,14 +69,14 @@ def _finalize_grid(
# --- Word-gap merge ---
try:
from cv_syllable_detect import merge_word_gaps_in_zones
from ocr.detect.syllable.detect import merge_word_gaps_in_zones
merge_word_gaps_in_zones(zones_data, session_id)
except Exception as e:
logger.warning("Word-gap merge failed: %s", e)
# --- Pipe auto-correction ---
try:
from cv_syllable_detect import autocorrect_pipe_artifacts
from ocr.detect.syllable.detect import autocorrect_pipe_artifacts
autocorrect_pipe_artifacts(zones_data, session_id)
except Exception as e:
logger.warning("Pipe autocorrect failed: %s", e)
@@ -132,10 +132,10 @@ def _detect_dictionary(
margin_strip_detected: bool,
) -> Dict[str, Any]:
"""Run dictionary detection on the assembled grid."""
from cv_layout import _score_dictionary_signals
from ocr.layout.layout import _score_dictionary_signals
dict_detection: Dict[str, Any] = {"is_dictionary": False, "confidence": 0.0}
try:
from cv_vocab_types import ColumnGeometry
from ocr.types import ColumnGeometry
for z in zones_data:
zone_cells = z.get("cells", [])
zone_cols = z.get("columns", [])
@@ -222,7 +222,7 @@ def _insert_syllable_dividers(
if _syllable_eligible:
try:
from cv_syllable_detect import insert_syllable_dividers
from ocr.detect.syllable.detect import insert_syllable_dividers
force_syllables = (syllable_mode in ("all", "de", "en"))
syllable_insertions = insert_syllable_dividers(
zones_data, img_bgr, session_id,
@@ -241,7 +241,7 @@ def _split_merged_words(
) -> None:
"""Split merged words using dictionary lookup."""
try:
from cv_review import _try_split_merged_word, _SPELL_AVAILABLE
from ocr.review.review import _try_split_merged_word, _SPELL_AVAILABLE
if not _SPELL_AVAILABLE:
return
split_count = 0
@@ -307,7 +307,7 @@ def _run_spell_checker(
) -> None:
"""Run SmartSpellChecker on all cells."""
try:
from smart_spell import SmartSpellChecker
from ocr.spell.smart_spell import SmartSpellChecker
_ssc = SmartSpellChecker()
spell_fix_count = 0

View File

@@ -10,8 +10,8 @@ import logging
import re
from typing import Any, Dict, List, Optional, Set, Tuple
from cv_color_detect import detect_word_colors
from cv_ocr_engines import (
from ocr.detect.color_detect import detect_word_colors
from ocr.engines.engines import (
fix_cell_phonetics, fix_ipa_continuation_cell, _text_has_garbled_ipa,
_lookup_ipa,
)
@@ -207,7 +207,7 @@ def _run_ipa_correction(
# --- German IPA (wiki-pronunciation-dict + epitran) ---
if de_ipa_target_cols:
from cv_ipa_german import insert_german_ipa
from ocr.ipa_german import insert_german_ipa
insert_german_ipa(all_cells, de_ipa_target_cols)
ipa_target_cols = en_ipa_target_cols | de_ipa_target_cols

View File

@@ -11,11 +11,11 @@ from typing import Any, Dict, List, Optional
import cv2
import numpy as np
from cv_box_detect import detect_boxes, split_page_into_zones
from cv_graphic_detect import detect_graphic_elements
from cv_color_detect import recover_colored_text
from cv_vocab_types import PageZone
from ocr_pipeline_session_store import get_session_image
from ocr.detect.box_detect import detect_boxes, split_page_into_zones
from ocr.detect.graphic_detect import detect_graphic_elements
from ocr.detect.color_detect import recover_colored_text
from ocr.types import PageZone
from ocr.pipeline.session_store import get_session_image
from grid.editor.filters import (
_filter_border_strip_words,

View File

@@ -8,7 +8,7 @@ The actual endpoints live in:
- grid_editor_api_unified.py (build-unified-grid, unified-grid)
This module re-exports the combined router and key symbols so that
existing `from grid_editor_api import router` / `from grid_editor_api import _build_grid_core`
existing `from grid.editor.api import router` / `from grid.editor.api import _build_grid_core`
continue to work unchanged.
"""
@@ -20,7 +20,7 @@ from .api_box import router as _box_router
from .api_unified import router as _unified_router
# Re-export _build_grid_core so callers that do
# `from grid_editor_api import _build_grid_core` keep working.
# `from grid.editor.api import _build_grid_core` keep working.
from grid.build.core import _build_grid_core # noqa: F401
# Merge all sub-routers into one combined router

View File

@@ -7,7 +7,7 @@ import logging
from fastapi import APIRouter, HTTPException, Request
from .filters import _words_in_zone
from ocr_pipeline_session_store import (
from ocr.pipeline.session_store import (
get_session_db,
update_session_db,
)
@@ -76,7 +76,7 @@ async def build_box_grids(session_id: str, request: Request):
pass
layout_overrides = body.get("overrides", {})
from cv_box_layout import build_box_zone_grid
from ocr.detect.box_layout import build_box_zone_grid
img_w = grid_data.get("image_width", 0) or word_result.get("image_width", 0)
img_h = grid_data.get("image_height", 0) or word_result.get("image_height", 0)
@@ -119,7 +119,7 @@ async def build_box_grids(session_id: str, request: Request):
# Apply SmartSpellChecker to all box cells
try:
from smart_spell import SmartSpellChecker
from ocr.spell.smart_spell import SmartSpellChecker
ssc = SmartSpellChecker()
for cell in box_grid.get("cells", []):
text = cell.get("text", "")

View File

@@ -7,11 +7,11 @@ import logging
from fastapi import APIRouter, HTTPException, Query, Request
from grid.build.core import _build_grid_core
from ocr_pipeline_session_store import (
from ocr.pipeline.session_store import (
get_session_db,
update_session_db,
)
from ocr_pipeline_common import (
from ocr.pipeline.common import (
_cache,
_load_session_to_cache,
_get_cached,
@@ -60,7 +60,7 @@ async def build_grid(
# Save automatic grid snapshot for later comparison with manual corrections
# Lazy import to avoid circular dependency with ocr_pipeline_regression
from ocr_pipeline_regression import _build_reference_snapshot
from ocr.pipeline.regression import _build_reference_snapshot
wr = session.get("word_result") or {}
engine = wr.get("ocr_engine", "")
@@ -134,7 +134,7 @@ async def rerun_ocr_and_build_grid(
# 2. Scan quality assessment
scan_quality_info = {}
try:
from scan_quality import score_scan_quality
from ocr.pipeline.scan_quality import score_scan_quality
quality_report = score_scan_quality(ocr_input)
scan_quality_info = quality_report.to_dict()
actual_min_conf = min_conf if min_conf > 0 else quality_report.recommended_min_conf
@@ -146,7 +146,7 @@ async def rerun_ocr_and_build_grid(
is_degraded = scan_quality_info.get("is_degraded", False)
if enhance and is_degraded:
try:
from ocr_image_enhance import enhance_for_ocr
from ocr.image_enhance import enhance_for_ocr
ocr_input = enhance_for_ocr(ocr_input, is_degraded=True)
logger.info("rerun-ocr: CLAHE enhancement applied")
except Exception as e:
@@ -159,8 +159,8 @@ async def rerun_ocr_and_build_grid(
# RapidOCR
rapid_words = []
try:
from cv_ocr_engines import ocr_region_rapid
from cv_vocab_types import PageRegion
from ocr.engines.engines import ocr_region_rapid
from ocr.types import PageRegion
full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h)
rapid_words = ocr_region_rapid(ocr_input, full_region) or []
except Exception as e:
@@ -182,7 +182,7 @@ async def rerun_ocr_and_build_grid(
})
# 5. Merge OCR results
from ocr_pipeline_ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
from ocr.pipeline.ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else []
if rapid_split or tess_words:
merged_words = _merge_paddle_tesseract(rapid_split, tess_words)
@@ -207,7 +207,7 @@ async def rerun_ocr_and_build_grid(
vision_applied = False
if vision_fusion:
try:
from vision_ocr_fusion import vision_fuse_ocr
from ocr.pipeline.vision_fusion import vision_fuse_ocr
category = doc_category or session.get("document_category") or "vokabelseite"
logger.info(f"rerun-ocr: running Vision-LLM fusion (category={category})")
merged_words = await vision_fuse_ocr(ocr_input, merged_words, category)

View File

@@ -6,7 +6,7 @@ import logging
from fastapi import APIRouter, HTTPException, Request
from ocr_pipeline_session_store import (
from ocr.pipeline.session_store import (
get_session_db,
update_session_db,
)
@@ -35,7 +35,7 @@ async def gutter_repair(session_id: str):
detail="No grid data. Run build-grid first.",
)
from cv_gutter_repair import analyse_grid_for_gutter_repair
from ocr.gutter.repair import analyse_grid_for_gutter_repair
image_width = grid_data.get("image_width", 0)
result = analyse_grid_for_gutter_repair(grid_data, image_width=image_width)
@@ -86,7 +86,7 @@ async def gutter_repair_apply(session_id: str, request: Request):
# Allows the user to pick a different correction from the alternatives list
text_overrides = body.get("text_overrides", {})
from cv_gutter_repair import apply_gutter_suggestions
from ocr.gutter.repair import apply_gutter_suggestions
suggestions = gutter_result.get("suggestions", [])

View File

@@ -6,7 +6,7 @@ import logging
from fastapi import APIRouter, HTTPException
from ocr_pipeline_session_store import (
from ocr.pipeline.session_store import (
get_session_db,
update_session_db,
)
@@ -32,7 +32,7 @@ async def build_unified_grid_endpoint(session_id: str):
if not grid_data:
raise HTTPException(status_code=400, detail="No grid data. Run build-grid first.")
from unified_grid import build_unified_grid
from grid.unified import build_unified_grid
result = build_unified_grid(
zones=grid_data.get("zones", []),

View File

@@ -8,7 +8,7 @@ import logging
import re
from typing import Dict, List, Optional
from cv_ocr_engines import _text_has_garbled_ipa
from ocr.engines.engines import _text_has_garbled_ipa
logger = logging.getLogger(__name__)
@@ -394,7 +394,7 @@ def _detect_colspan_cells(
if len(columns) < 2 or not zone_words or not rows:
return cells
from cv_words_first import _assign_word_to_row
from ocr.words_first import _assign_word_to_row
# Column boundaries (midpoints between adjacent columns)
col_boundaries = []

View File

@@ -2,7 +2,7 @@
Grid Editor helper functions — barrel re-export module.
This file re-exports all public symbols from the split sub-modules
so that existing ``from grid_editor_helpers import ...`` statements
so that existing ``from grid.editor.helpers import ...`` statements
continue to work without changes.
Sub-modules:
@@ -55,4 +55,4 @@ from .zones import ( # noqa: F401
)
# --- Re-export from cv_words_first (used by cv_box_layout.py) ---------------
from cv_words_first import _cluster_rows # noqa: F401
from ocr.words_first import _cluster_rows # noqa: F401

View File

@@ -12,8 +12,8 @@ import logging
import re
from typing import Any, Dict, List, Optional
from cv_vocab_types import PageZone
from cv_words_first import _cluster_rows, _build_cells
from ocr.types import PageZone
from ocr.words_first import _cluster_rows, _build_cells
from .columns import (
_cluster_columns_by_alignment,

View File

@@ -2,5 +2,5 @@
korrektur package — exam correction, EH templates, PDF export.
Backward-compatible re-exports: consumers can still use
``from eh_pipeline import ...`` etc. via the shim files in backend/.
``from korrektur.eh_pipeline import ...`` etc. via the shim files in backend/.
"""

View File

@@ -2,5 +2,5 @@
metrics package — PostgreSQL metrics database operations.
Backward-compatible re-exports: consumers can still use
``from metrics_db import ...`` etc. via the shim files in backend/.
``from metrics.db import ...`` etc. via the shim files in backend/.
"""

View File

@@ -2,7 +2,7 @@
OCR package — restructured from cv_* flat modules.
Backward-compatible re-exports: consumers can still use
``from cv_layout import ...`` etc. via the shim files in backend/.
``from ocr.layout.layout import ...`` etc. via the shim files in backend/.
"""
from .types import * # noqa: F401,F403

View File

@@ -149,7 +149,7 @@ def build_box_zone_grid(
Returns the same format as _build_zone_grid (columns, rows, cells, header_rows).
"""
from grid_editor_helpers import _build_zone_grid
from grid.editor.helpers import _build_zone_grid
if not zone_words:
return {

View File

@@ -48,7 +48,7 @@ except ImportError:
pass
try:
from training_export_service import ( # noqa: F401
from training.export_service import ( # noqa: F401
TrainingExportService,
TrainingSample,
get_training_export_service,

View File

@@ -57,7 +57,7 @@ except ImportError:
# Try to import Training Export Service
try:
from training_export_service import (
from training.export_service import (
TrainingExportService,
TrainingSample,
get_training_export_service,

View File

@@ -20,7 +20,7 @@ from typing import Optional, List
from datetime import datetime
import uuid
from metrics_db import (
from metrics.db import (
create_ocr_labeling_session,
get_ocr_labeling_sessions,
get_ocr_labeling_session,

View File

@@ -17,7 +17,7 @@ from typing import Optional, List
import uuid
import os
from metrics_db import (
from metrics.db import (
get_ocr_labeling_session,
add_ocr_labeling_item,
get_ocr_labeling_item,
@@ -42,7 +42,7 @@ except ImportError:
pass
try:
from training_export_service import TrainingSample, get_training_export_service
from training.export_service import TrainingSample, get_training_export_service
except ImportError:
pass
@@ -271,7 +271,7 @@ async def run_ocr_for_item(item_id: str):
if ocr_text is None:
raise HTTPException(status_code=500, detail="OCR failed")
from metrics_db import get_pool
from metrics.db import get_pool
pool = await get_pool()
if pool:
async with pool.acquire() as conn:

View File

@@ -15,7 +15,7 @@ from .reprocess import router as _reprocess_router
from .auto_steps import router as _steps_router
# Combine both sub-routers into a single router for backwards compatibility.
# The consumer imports `from ocr_pipeline_auto import router as _auto_router`.
# The consumer imports `from ocr.pipeline.auto import router as _auto_router`.
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
router.include_router(_reprocess_router)
router.include_router(_steps_router)

View File

@@ -17,7 +17,7 @@ import numpy as np
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
OLLAMA_REVIEW_MODEL,
PageRegion,
RowGeometry,

View File

@@ -14,7 +14,7 @@ from typing import Dict, List
import cv2
from fastapi import APIRouter, HTTPException
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
_detect_header_footer_gaps,
_detect_sub_columns,
classify_column_types,

View File

@@ -12,7 +12,7 @@ from datetime import datetime
import cv2
from fastapi import APIRouter, HTTPException
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
create_ocr_image,
deskew_image,
deskew_image_by_word_alignment,

View File

@@ -17,7 +17,7 @@ from typing import Any, Dict
import cv2
from fastapi import APIRouter, HTTPException, Query
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
_apply_shear,
create_ocr_image,
dewarp_image,

View File

@@ -8,7 +8,7 @@ This module was split into:
- ocr_pipeline_columns.py (Column detection + ground truth)
The `router` object is assembled here by including all sub-routers.
Importers that did `from ocr_pipeline_geometry import router` continue to work.
Importers that did `from ocr.pipeline.geometry import router` continue to work.
"""
from fastapi import APIRouter

View File

@@ -243,7 +243,7 @@ async def recognize_from_session(req: HTRSessionRequest):
Set use_clean=true to prefer the clean image (after handwriting removal step).
This is useful when you want to do HTR on isolated handwriting regions.
"""
from ocr_pipeline_session_store import get_session_db, get_session_image
from ocr.pipeline.session_store import get_session_db, get_session_image
session = await get_session_db(req.session_id)
if not session:

View File

@@ -15,7 +15,7 @@ from typing import Dict, List
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
OLLAMA_REVIEW_MODEL,
llm_review_entries,
llm_review_entries_streaming,

View File

@@ -15,7 +15,7 @@ import cv2
import numpy as np
from fastapi import APIRouter, HTTPException
from cv_words_first import build_grid_from_words
from ocr.words_first import build_grid_from_words
from .common import _cache, _append_pipeline_log
from .session_store import get_session_image, update_session_db
@@ -136,7 +136,7 @@ async def paddle_kombi(session_id: str):
img_png, img_bgr = await _load_session_image(session_id)
img_h, img_w = img_bgr.shape[:2]
from cv_ocr_engines import ocr_region_paddle
from ocr.engines.engines import ocr_region_paddle
t0 = time.time()
@@ -202,8 +202,8 @@ async def rapid_kombi(session_id: str):
img_png, img_bgr = await _load_session_image(session_id)
img_h, img_w = img_bgr.shape[:2]
from cv_ocr_engines import ocr_region_rapid
from cv_vocab_types import PageRegion
from ocr.engines.engines import ocr_region_rapid
from ocr.types import PageRegion
t0 = time.time()

View File

@@ -9,7 +9,7 @@ from typing import Any, Dict
import cv2
from fastapi import APIRouter, HTTPException
from cv_vocab_pipeline import detect_and_fix_orientation
from ocr.cv_pipeline import detect_and_fix_orientation
from .page_crop import detect_page_splits
from .session_store import update_session_db

View File

@@ -17,8 +17,8 @@ from fastapi.responses import Response
from .common import _get_base_image_png
from .session_store import get_session_db
from cv_color_detect import _COLOR_HEX, _COLOR_RANGES
from cv_box_detect import detect_boxes, split_page_into_zones
from ocr.detect.color_detect import _COLOR_HEX, _COLOR_RANGES
from ocr.detect.box_detect import detect_boxes, split_page_into_zones
logger = logging.getLogger(__name__)

View File

@@ -19,7 +19,7 @@ from .reconstruction import router as _reconstruction_router
from .validation import router as _validation_router
# Composite router — drop-in replacement for the old monolithic router.
# ocr_pipeline_api.py imports ``from ocr_pipeline_postprocess import router``.
# ocr_pipeline_api.py imports ``from ocr.pipeline.postprocess import router``.
router = APIRouter()
router.include_router(_llm_review_router)
router.include_router(_reconstruction_router)

View File

@@ -14,7 +14,7 @@ from typing import Any, Dict, Optional
from fastapi import APIRouter, HTTPException, Query
from grid_editor_api import _build_grid_core
from grid.editor.api import _build_grid_core
from .session_store import (
get_session_db,
list_ground_truth_sessions_db,

View File

@@ -17,7 +17,7 @@ import cv2
import numpy as np
from fastapi import APIRouter, HTTPException
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
create_ocr_image,
detect_column_geometry,
detect_row_geometry,
@@ -120,7 +120,7 @@ async def detect_rows(session_id: str):
# merges rows. Word-grouping directly clusters words by Y proximity,
# which is more robust for these cases.
if is_sub_session and word_dicts:
from cv_layout import _build_rows_from_word_grouping
from ocr.layout.layout import _build_rows_from_word_grouping
rows = _build_rows_from_word_grouping(
word_dicts, left_x, right_x, top_y, bottom_y,
right_x - left_x, bottom_y - top_y,

View File

@@ -15,7 +15,7 @@ import cv2
import numpy as np
from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile
from cv_vocab_pipeline import render_image_high_res, render_pdf_high_res
from ocr.cv_pipeline import render_image_high_res, render_pdf_high_res
from .common import (
VALID_DOCUMENT_CATEGORIES,
UpdateSessionRequest,

View File

@@ -17,7 +17,7 @@ import numpy as np
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import Response
from cv_vocab_pipeline import create_ocr_image, detect_document_type
from ocr.cv_pipeline import create_ocr_image, detect_document_type
from .common import (
VALID_DOCUMENT_CATEGORIES,
_append_pipeline_log,

View File

@@ -15,9 +15,9 @@ import numpy as np
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from cv_box_detect import detect_boxes
from cv_color_detect import _COLOR_RANGES, _COLOR_HEX
from cv_graphic_detect import detect_graphic_elements
from ocr.detect.box_detect import detect_boxes
from ocr.detect.color_detect import _COLOR_RANGES, _COLOR_HEX
from ocr.detect.graphic_detect import detect_graphic_elements
from .session_store import (
get_session_db,
update_session_db,
@@ -100,7 +100,7 @@ async def detect_structure(session_id: str):
)
# --- Zone splitting ---
from cv_box_detect import split_page_into_zones as _split_zones
from ocr.detect.box_detect import split_page_into_zones as _split_zones
zones = _split_zones(content_x, content_y, content_w_px, content_h_px, boxes)
# --- Color region sampling ---
@@ -123,7 +123,7 @@ async def detect_structure(session_id: str):
med_s = float(np.median(roi_hsv[:, :, 1]))
med_v = float(np.median(roi_hsv[:, :, 2]))
if med_s > 15:
from cv_color_detect import _hue_to_color_name
from ocr.detect.color_detect import _hue_to_color_name
bg_name = _hue_to_color_name(med_h)
bg_hex = _COLOR_HEX.get(bg_name, "#6b7280")
else:

View File

@@ -23,7 +23,7 @@ import numpy as np
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from cv_words_first import build_grid_from_words
from ocr.words_first import build_grid_from_words
from .session_store import (
get_session_db,
get_session_image,
@@ -72,7 +72,7 @@ async def paddle_direct(session_id: str):
img_h, img_w = img_bgr.shape[:2]
from cv_ocr_engines import ocr_region_paddle
from ocr.engines.engines import ocr_region_paddle
t0 = time.time()
word_dicts = await ocr_region_paddle(img_bgr, region=None)

View File

@@ -17,7 +17,7 @@ import numpy as np
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
PageRegion,
RowGeometry,
_cells_to_vocab_entries,
@@ -27,7 +27,7 @@ from cv_vocab_pipeline import (
create_ocr_image,
detect_column_geometry,
)
from cv_words_first import build_grid_from_words
from ocr.words_first import build_grid_from_words
from .session_store import (
get_session_db,
update_session_db,
@@ -223,7 +223,7 @@ async def _words_first_path(
img_h, img_w = dewarped_bgr.shape[:2]
if engine == "paddle":
from cv_ocr_engines import ocr_region_paddle
from ocr.engines.engines import ocr_region_paddle
wf_word_dicts = await ocr_region_paddle(dewarped_bgr, region=None)
cached["_paddle_word_dicts"] = wf_word_dicts
else:

View File

@@ -15,7 +15,7 @@ from typing import Any, Dict, List
import numpy as np
from fastapi import Request
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
PageRegion,
RowGeometry,
_cells_to_vocab_entries,

View File

@@ -219,7 +219,7 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
# Use SmartSpellChecker if available
_smart = None
try:
from smart_spell import SmartSpellChecker
from ocr.spell.smart_spell import SmartSpellChecker
_smart = SmartSpellChecker()
logger.debug("spell_review: using SmartSpellChecker")
except Exception:

View File

@@ -159,7 +159,7 @@ class _SmartSpellCoreBase:
return True
# Also accept known abbreviations (sth, sb, adj, etc.)
try:
from cv_ocr_engines import _KNOWN_ABBREVIATIONS
from ocr.engines.engines import _KNOWN_ABBREVIATIONS
if w in _KNOWN_ABBREVIATIONS:
return True
except ImportError:

View File

@@ -35,7 +35,7 @@ class SmartSpellChecker(_SmartSpellCoreBase):
"""
# Import known abbreviations for vocabulary context
try:
from cv_ocr_engines import _KNOWN_ABBREVIATIONS
from ocr.engines.engines import _KNOWN_ABBREVIATIONS
except ImportError:
_KNOWN_ABBREVIATIONS = set()
@@ -149,7 +149,7 @@ class SmartSpellChecker(_SmartSpellCoreBase):
# --- Pass 1: Boundary repair between adjacent unknown words ---
# Import abbreviations for the heuristic below
try:
from cv_ocr_engines import _KNOWN_ABBREVIATIONS as _ABBREVS
from ocr.engines.engines import _KNOWN_ABBREVIATIONS as _ABBREVS
except ImportError:
_ABBREVS = set()

View File

@@ -2,5 +2,5 @@
training package — training API, simulation, export, TrOCR.
Backward-compatible re-exports: consumers can still use
``from training_api import ...`` etc. via the shim files in backend/.
``from training.api import ...`` etc. via the shim files in backend/.
"""

View File

@@ -211,7 +211,7 @@ async def delete_model_version(version_id: str):
@router.get("/dataset/stats", response_model=dict)
async def get_dataset_stats():
"""Get statistics about the training dataset."""
from metrics_db import get_zeugnis_stats
from metrics.db import get_zeugnis_stats
zeugnis_stats = await get_zeugnis_stats()

View File

@@ -2,5 +2,5 @@
Vocab package — restructured from vocab_* flat modules.
Backward-compatible re-exports: consumers can still use
``from vocab_worksheet_api import ...`` etc. via the shim files in backend/.
``from vocab.worksheet.api import ...`` etc. via the shim files in backend/.
"""

View File

@@ -27,7 +27,7 @@ from .generation import convert_pdf_page_to_image
# Try to import Tesseract extractor
try:
from tesseract_vocab_extractor import (
from ocr.engines.tesseract_extractor import (
extract_bounding_boxes, TESSERACT_AVAILABLE,
)
except ImportError:
@@ -264,7 +264,7 @@ async def extract_with_boxes(session_id: str, page_number: int):
# Deskew image before OCR
deskew_angle = 0.0
try:
from cv_vocab_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE
from ocr.cv_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE
if CV2_AVAILABLE:
image_data, deskew_angle = deskew_image_by_word_alignment(image_data)
logger.info(f"Deskew: {deskew_angle:.2f}° for page {page_number}")

View File

@@ -23,7 +23,7 @@ from .generation import convert_pdf_page_to_image
# Try to import Tesseract extractor
try:
from tesseract_vocab_extractor import (
from ocr.engines.tesseract_extractor import (
run_tesseract_pipeline,
match_positions_to_vocab, TESSERACT_AVAILABLE,
)
@@ -32,7 +32,7 @@ except ImportError:
# Try to import CV Pipeline
try:
from cv_vocab_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE
from ocr.cv_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE
except ImportError:
CV_PIPELINE_AVAILABLE = False
@@ -328,7 +328,7 @@ async def analyze_grid(session_id: str, page_number: int, use_tesseract: bool =
# Run Tesseract if not already cached
if not tess_page_data:
logger.info("Running Tesseract for grid analysis (not cached)")
from tesseract_vocab_extractor import run_tesseract_pipeline as _run_tess
from ocr.engines.tesseract_extractor import run_tesseract_pipeline as _run_tess
tess_page_data = await _run_tess(image_data, lang="eng+deu")
session[f"tesseract_page_{page_number}"] = tess_page_data
session["tesseract_words"] = tess_page_data.get("words", [])

View File

@@ -39,7 +39,7 @@ except ImportError:
# CV pipeline helpers
try:
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
deskew_two_pass,
dewarp_image,
detect_and_fix_orientation,
@@ -54,7 +54,7 @@ except ImportError:
_fix_phonetic_brackets = None # type: ignore[assignment]
try:
from cv_cell_grid import (
from ocr.cell_grid.cell_grid import (
_merge_wrapped_rows,
_merge_phonetic_continuation_rows,
_merge_continuation_rows,
@@ -65,17 +65,17 @@ except ImportError:
_merge_continuation_rows = None # type: ignore[assignment]
try:
from cv_ocr_engines import ocr_region_rapid
from ocr.engines.engines import ocr_region_rapid
except ImportError:
ocr_region_rapid = None # type: ignore[assignment]
try:
from cv_vocab_types import PageRegion
from ocr.types import PageRegion
except ImportError:
PageRegion = None # type: ignore[assignment]
try:
from ocr_pipeline_ocr_merge import (
from ocr.pipeline.ocr_merge import (
_split_paddle_multi_words,
_merge_paddle_tesseract,
_deduplicate_words,
@@ -86,12 +86,12 @@ except ImportError:
_deduplicate_words = None # type: ignore[assignment]
try:
from cv_words_first import build_grid_from_words
from ocr.words_first import build_grid_from_words
except ImportError:
build_grid_from_words = None # type: ignore[assignment]
try:
from ocr_pipeline_session_store import (
from ocr.pipeline.session_store import (
create_session_db as create_pipeline_session_db,
update_session_db as update_pipeline_session_db,
)
@@ -173,7 +173,7 @@ async def _run_ocr_pipeline_for_page(
# 5. Content crop (removes scanner borders, gutter shadows)
t0 = _time.time()
try:
from page_crop import detect_and_crop_page
from ocr.pipeline.page_crop import detect_and_crop_page
cropped_bgr, crop_result = detect_and_crop_page(dewarped_bgr)
if crop_result.get("crop_applied"):
dewarped_bgr = cropped_bgr
@@ -186,7 +186,7 @@ async def _run_ocr_pipeline_for_page(
# 5b. Scan quality assessment
scan_quality_report = None
try:
from scan_quality import score_scan_quality
from ocr.pipeline.scan_quality import score_scan_quality
scan_quality_report = score_scan_quality(dewarped_bgr)
except Exception as e:
logger.warning(f" scan quality: failed ({e})")
@@ -200,7 +200,7 @@ async def _run_ocr_pipeline_for_page(
is_degraded = scan_quality_report.is_degraded if scan_quality_report else False
if is_degraded and enable_enhance:
try:
from ocr_image_enhance import enhance_for_ocr
from ocr.image_enhance import enhance_for_ocr
dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True)
logger.info(" enhancement: applied (degraded scan)")
except Exception as e:
@@ -212,8 +212,8 @@ async def _run_ocr_pipeline_for_page(
# RapidOCR (local ONNX)
try:
from cv_ocr_engines import ocr_region_rapid
from cv_vocab_types import PageRegion
from ocr.engines.engines import ocr_region_rapid
from ocr.types import PageRegion
full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h)
rapid_words = ocr_region_rapid(dewarped_bgr, full_region) or []
except Exception as e:
@@ -243,8 +243,8 @@ async def _run_ocr_pipeline_for_page(
})
# Merge dual-engine results
from ocr_pipeline_ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
from cv_words_first import build_grid_from_words
from ocr.pipeline.ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
from ocr.words_first import build_grid_from_words
rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else []
if rapid_split or tess_words:
@@ -300,7 +300,7 @@ async def _run_ocr_pipeline_for_page(
# 8. Run full grid-build (with pipe-autocorrect, word-gap merge, etc.)
t0 = _time.time()
try:
from grid_editor_api import _build_grid_core
from grid.editor.api import _build_grid_core
session_data = {
"word_result": word_result,
}

View File

@@ -36,7 +36,7 @@ LOCAL_STORAGE_PATH = os.getenv("VOCAB_STORAGE_PATH", "/app/vocab-worksheets")
try:
import numpy as np
from cv_preprocessing import render_pdf_high_res, detect_and_fix_orientation
from ocr.preprocessing.preprocessing import render_pdf_high_res, detect_and_fix_orientation
OCR_PIPELINE_AVAILABLE = True
except ImportError:
np = None # type: ignore[assignment]

View File

@@ -2,5 +2,5 @@
worksheet package — worksheet editor, NRU generator, cleanup.
Backward-compatible re-exports: consumers can still use
``from worksheet_editor_api import ...`` etc. via the shim files in backend/.
``from worksheet.editor_api import ...`` etc. via the shim files in backend/.
"""

View File

@@ -366,7 +366,7 @@ async def reconstruct_document_from_session(request: ReconstructRequest):
async def get_available_sessions():
"""Get list of available vocab sessions that can be reconstructed."""
try:
from vocab_worksheet_api import _sessions
from vocab.worksheet.api import _sessions
available = []
for session_id, session in _sessions.items():

View File

@@ -31,7 +31,7 @@ async def reconstruct_document_logic(request: ReconstructRequest) -> Reconstruct
Returns ReconstructResponse ready to send to the client.
"""
from fastapi import HTTPException
from vocab_worksheet_api import _sessions, convert_pdf_page_to_image
from vocab.worksheet.api import _sessions, convert_pdf_page_to_image
# Check if session exists
if request.session_id not in _sessions:

View File

@@ -2,5 +2,5 @@
zeugnis package — certificate crawler, models, storage.
Backward-compatible re-exports: consumers can still use
``from zeugnis_api import ...`` etc. via the shim files in backend/.
``from zeugnis.api import ...`` etc. via the shim files in backend/.
"""

View File

@@ -16,7 +16,7 @@ from .models import (
from .crawler import (
start_crawler, stop_crawler, get_crawler_status,
)
from metrics_db import (
from metrics.db import (
get_zeugnis_documents, get_zeugnis_stats,
log_zeugnis_event, get_pool,
)

View File

@@ -15,7 +15,7 @@ from .models import (
BUNDESLAENDER,
generate_id, get_training_allowed, get_bundesland_name, get_license_for_bundesland,
)
from metrics_db import (
from metrics.db import (
get_zeugnis_sources, upsert_zeugnis_source, get_pool,
)

View File

@@ -31,7 +31,7 @@ async def start_crawler(bundesland: Optional[str] = None, source_id: Optional[st
async def run_crawler():
try:
from metrics_db import get_pool
from metrics.db import get_pool
pool = await get_pool()
if pool:

View File

@@ -322,8 +322,8 @@ SEED_DATA: Dict[str, Dict[str, Any]] = {
async def populate_seed_data():
"""Populate database with seed data."""
from metrics_db import get_pool, upsert_zeugnis_source
from zeugnis_models import generate_id
from metrics.db import get_pool, upsert_zeugnis_source
from zeugnis.models import generate_id
pool = await get_pool()
if not pool:
@@ -412,4 +412,4 @@ if __name__ == "__main__":
print("\n" + "=" * 60)
print("To populate database, run:")
print(" python -c 'import asyncio; from zeugnis_seed_data import populate_seed_data; asyncio.run(populate_seed_data())'")
print(" python -c 'import asyncio; from zeugnis.seed_data import populate_seed_data; asyncio.run(populate_seed_data())'")

View File

@@ -83,7 +83,7 @@ class ZeugnisCrawler:
# Initialize database connection
try:
from metrics_db import get_pool
from metrics.db import get_pool
self.db_pool = await get_pool()
except Exception as e:
print(f"Failed to get database pool: {e}")