Fix: Update all old-style imports inside packages to new paths
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 1m7s
CI / test-go-edu-search (push) Successful in 46s
CI / test-python-klausur (push) Failing after 2m32s
CI / test-python-agent-core (push) Successful in 33s
CI / test-nodejs-website (push) Successful in 34s

65 files in klausur-service packages + 3 in backend-lehrer packages
had stale imports referencing deleted shim modules.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-26 00:19:13 +02:00
parent 5f2ed44654
commit eecb5472dd
68 changed files with 132 additions and 132 deletions

View File

@@ -2,5 +2,5 @@
Vocab package — restructured from vocab_* flat modules.
Backward-compatible re-exports: consumers can still use
``from vocab_worksheet_api import ...`` etc. via the shim files in backend/.
``from vocab.worksheet.api import ...`` etc. via the shim files in backend/.
"""

View File

@@ -27,7 +27,7 @@ from .generation import convert_pdf_page_to_image
# Try to import Tesseract extractor
try:
from tesseract_vocab_extractor import (
from ocr.engines.tesseract_extractor import (
extract_bounding_boxes, TESSERACT_AVAILABLE,
)
except ImportError:
@@ -264,7 +264,7 @@ async def extract_with_boxes(session_id: str, page_number: int):
# Deskew image before OCR
deskew_angle = 0.0
try:
from cv_vocab_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE
from ocr.cv_pipeline import deskew_image_by_word_alignment, CV2_AVAILABLE
if CV2_AVAILABLE:
image_data, deskew_angle = deskew_image_by_word_alignment(image_data)
logger.info(f"Deskew: {deskew_angle:.2f}° for page {page_number}")

View File

@@ -23,7 +23,7 @@ from .generation import convert_pdf_page_to_image
# Try to import Tesseract extractor
try:
from tesseract_vocab_extractor import (
from ocr.engines.tesseract_extractor import (
run_tesseract_pipeline,
match_positions_to_vocab, TESSERACT_AVAILABLE,
)
@@ -32,7 +32,7 @@ except ImportError:
# Try to import CV Pipeline
try:
from cv_vocab_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE
from ocr.cv_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE
except ImportError:
CV_PIPELINE_AVAILABLE = False
@@ -328,7 +328,7 @@ async def analyze_grid(session_id: str, page_number: int, use_tesseract: bool =
# Run Tesseract if not already cached
if not tess_page_data:
logger.info("Running Tesseract for grid analysis (not cached)")
from tesseract_vocab_extractor import run_tesseract_pipeline as _run_tess
from ocr.engines.tesseract_extractor import run_tesseract_pipeline as _run_tess
tess_page_data = await _run_tess(image_data, lang="eng+deu")
session[f"tesseract_page_{page_number}"] = tess_page_data
session["tesseract_words"] = tess_page_data.get("words", [])

View File

@@ -39,7 +39,7 @@ except ImportError:
# CV pipeline helpers
try:
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
deskew_two_pass,
dewarp_image,
detect_and_fix_orientation,
@@ -54,7 +54,7 @@ except ImportError:
_fix_phonetic_brackets = None # type: ignore[assignment]
try:
from cv_cell_grid import (
from ocr.cell_grid.cell_grid import (
_merge_wrapped_rows,
_merge_phonetic_continuation_rows,
_merge_continuation_rows,
@@ -65,17 +65,17 @@ except ImportError:
_merge_continuation_rows = None # type: ignore[assignment]
try:
from cv_ocr_engines import ocr_region_rapid
from ocr.engines.engines import ocr_region_rapid
except ImportError:
ocr_region_rapid = None # type: ignore[assignment]
try:
from cv_vocab_types import PageRegion
from ocr.types import PageRegion
except ImportError:
PageRegion = None # type: ignore[assignment]
try:
from ocr_pipeline_ocr_merge import (
from ocr.pipeline.ocr_merge import (
_split_paddle_multi_words,
_merge_paddle_tesseract,
_deduplicate_words,
@@ -86,12 +86,12 @@ except ImportError:
_deduplicate_words = None # type: ignore[assignment]
try:
from cv_words_first import build_grid_from_words
from ocr.words_first import build_grid_from_words
except ImportError:
build_grid_from_words = None # type: ignore[assignment]
try:
from ocr_pipeline_session_store import (
from ocr.pipeline.session_store import (
create_session_db as create_pipeline_session_db,
update_session_db as update_pipeline_session_db,
)
@@ -173,7 +173,7 @@ async def _run_ocr_pipeline_for_page(
# 5. Content crop (removes scanner borders, gutter shadows)
t0 = _time.time()
try:
from page_crop import detect_and_crop_page
from ocr.pipeline.page_crop import detect_and_crop_page
cropped_bgr, crop_result = detect_and_crop_page(dewarped_bgr)
if crop_result.get("crop_applied"):
dewarped_bgr = cropped_bgr
@@ -186,7 +186,7 @@ async def _run_ocr_pipeline_for_page(
# 5b. Scan quality assessment
scan_quality_report = None
try:
from scan_quality import score_scan_quality
from ocr.pipeline.scan_quality import score_scan_quality
scan_quality_report = score_scan_quality(dewarped_bgr)
except Exception as e:
logger.warning(f" scan quality: failed ({e})")
@@ -200,7 +200,7 @@ async def _run_ocr_pipeline_for_page(
is_degraded = scan_quality_report.is_degraded if scan_quality_report else False
if is_degraded and enable_enhance:
try:
from ocr_image_enhance import enhance_for_ocr
from ocr.image_enhance import enhance_for_ocr
dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True)
logger.info(" enhancement: applied (degraded scan)")
except Exception as e:
@@ -212,8 +212,8 @@ async def _run_ocr_pipeline_for_page(
# RapidOCR (local ONNX)
try:
from cv_ocr_engines import ocr_region_rapid
from cv_vocab_types import PageRegion
from ocr.engines.engines import ocr_region_rapid
from ocr.types import PageRegion
full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h)
rapid_words = ocr_region_rapid(dewarped_bgr, full_region) or []
except Exception as e:
@@ -243,8 +243,8 @@ async def _run_ocr_pipeline_for_page(
})
# Merge dual-engine results
from ocr_pipeline_ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
from cv_words_first import build_grid_from_words
from ocr.pipeline.ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
from ocr.words_first import build_grid_from_words
rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else []
if rapid_split or tess_words:
@@ -300,7 +300,7 @@ async def _run_ocr_pipeline_for_page(
# 8. Run full grid-build (with pipe-autocorrect, word-gap merge, etc.)
t0 = _time.time()
try:
from grid_editor_api import _build_grid_core
from grid.editor.api import _build_grid_core
session_data = {
"word_result": word_result,
}

View File

@@ -36,7 +36,7 @@ LOCAL_STORAGE_PATH = os.getenv("VOCAB_STORAGE_PATH", "/app/vocab-worksheets")
try:
import numpy as np
from cv_preprocessing import render_pdf_high_res, detect_and_fix_orientation
from ocr.preprocessing.preprocessing import render_pdf_high_res, detect_and_fix_orientation
OCR_PIPELINE_AVAILABLE = True
except ImportError:
np = None # type: ignore[assignment]