Cleanup: Delete ALL 242 shims, update ALL consumer imports
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 41s
CI / test-go-edu-search (push) Successful in 32s
CI / test-python-klausur (push) Failing after 2m41s
CI / test-python-agent-core (push) Successful in 34s
CI / test-nodejs-website (push) Successful in 39s

klausur-service: 183 shims deleted, 26 test files + 8 source files updated
backend-lehrer: 59 shims deleted, main.py + 8 source files updated

All imports now use the new package paths directly.
Zero shims remaining in the entire codebase.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-26 00:11:33 +02:00
parent d093a4d388
commit 5f2ed44654
288 changed files with 214 additions and 1182 deletions
+2 -2
View File
@@ -6,7 +6,7 @@ sys.path.insert(0, "/app/backend")
import cv2
import numpy as np
import pytesseract
from ocr_pipeline_session_store import get_session_db
from ocr.pipeline.session_store import get_session_db
SESSION_ID = "3dcb1897-09a6-4b80-91b5-7e4207980bf3"
@@ -74,7 +74,7 @@ async def main():
# Also test the 4 dewarp methods directly
print("\n--- Dewarp method results on deskewed image ---")
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
_detect_shear_angle, _detect_shear_by_projection,
_detect_shear_by_hough, _detect_shear_by_text_lines,
)
@@ -257,7 +257,7 @@ class TestPDFExtraction:
def test_pdf_extraction_config(self):
"""Test PDF extraction configuration."""
from pdf_extraction import PDF_BACKEND, get_pdf_extraction_info
from korrektur.pdf_extraction import PDF_BACKEND, get_pdf_extraction_info
info = get_pdf_extraction_info()
assert "configured_backend" in info
@@ -266,7 +266,7 @@ class TestPDFExtraction:
def test_detect_available_backends(self):
"""Test backend detection."""
from pdf_extraction import _detect_available_backends
from korrektur.pdf_extraction import _detect_available_backends
backends = _detect_available_backends()
assert isinstance(backends, list)
@@ -280,7 +280,7 @@ class TestPDFExtraction:
def test_pdf_extraction_result_class(self):
"""Test PDFExtractionResult data class."""
from pdf_extraction import PDFExtractionResult
from korrektur.pdf_extraction import PDFExtractionResult
result = PDFExtractionResult(
text="Extracted text",
@@ -305,7 +305,7 @@ class TestPDFExtraction:
def test_pdf_extraction_error(self):
"""Test PDF extraction error handling."""
from pdf_extraction import PDFExtractionError
from korrektur.pdf_extraction import PDFExtractionError
with pytest.raises(PDFExtractionError):
raise PDFExtractionError("Test error")
@@ -313,7 +313,7 @@ class TestPDFExtraction:
@pytest.mark.xfail(reason="_extract_with_pypdf is internal function not exposed in API")
def test_pypdf_extraction(self):
"""Test pypdf extraction with a simple PDF (BSD-3-Clause licensed)."""
from pdf_extraction import _extract_with_pypdf, PDFExtractionError
from korrektur.pdf_extraction import _extract_with_pypdf, PDFExtractionError
# Create a minimal valid PDF
# This is a very simple PDF that PyPDF2 can parse
@@ -517,7 +517,7 @@ class TestModuleAvailability:
def test_pdf_extraction_import(self):
"""Test PDF Extraction module import."""
from pdf_extraction import (
from korrektur.pdf_extraction import (
extract_text_from_pdf,
extract_text_from_pdf_enhanced,
get_pdf_extraction_info,
@@ -551,7 +551,7 @@ class TestFeatureVerification:
from hyde import get_hyde_info
from hybrid_search import get_hybrid_search_info
from rag_evaluation import get_evaluation_info
from pdf_extraction import get_pdf_extraction_info
from korrektur.pdf_extraction import get_pdf_extraction_info
from self_rag import get_self_rag_info
infos = [
@@ -598,7 +598,7 @@ class TestRAGAdminAPI:
@pytest.mark.asyncio
async def test_rag_documentation_markdown_format(self):
"""Test RAG documentation endpoint returns markdown."""
from admin_api import get_rag_documentation
from admin.api import get_rag_documentation
result = await get_rag_documentation(format="markdown")
@@ -610,7 +610,7 @@ class TestRAGAdminAPI:
@pytest.mark.asyncio
async def test_rag_documentation_html_format(self):
"""Test RAG documentation endpoint returns HTML with tables."""
from admin_api import get_rag_documentation
from admin.api import get_rag_documentation
result = await get_rag_documentation(format="html")
@@ -628,7 +628,7 @@ class TestRAGAdminAPI:
@pytest.mark.asyncio
async def test_rag_system_info_has_feature_status(self):
"""Test RAG system-info includes feature status."""
from admin_api import get_rag_system_info
from admin.api import get_rag_system_info
result = await get_rag_system_info()
@@ -639,7 +639,7 @@ class TestRAGAdminAPI:
@pytest.mark.asyncio
async def test_rag_system_info_has_privacy_notes(self):
"""Test RAG system-info includes privacy notes."""
from admin_api import get_rag_system_info
from admin.api import get_rag_system_info
result = await get_rag_system_info()
@@ -15,8 +15,8 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from ocr_pipeline_api import _filter_border_ghost_words, _BORDER_GHOST_CHARS
from cv_vocab_types import DetectedBox
from ocr.pipeline.api import _filter_border_ghost_words, _BORDER_GHOST_CHARS
from ocr.types import DetectedBox
# ---------------------------------------------------------------------------
@@ -14,7 +14,7 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from cv_words_first import build_grid_from_words, _cluster_columns
from ocr.words_first import build_grid_from_words, _cluster_columns
# ---------------------------------------------------------------------------
@@ -4,7 +4,7 @@ import pytest
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from cv_box_layout import classify_box_layout, build_box_zone_grid, _group_into_lines
from ocr.detect.box_layout import classify_box_layout, build_box_zone_grid, _group_into_lines
def _make_words(lines_data):
+4 -4
View File
@@ -514,7 +514,7 @@ class TestEncryptionUtils:
def test_hash_key(self):
"""Key hashing produces consistent results."""
from eh_pipeline import hash_key
from korrektur.eh_pipeline import hash_key
import os
passphrase = "test-secret-passphrase"
@@ -527,7 +527,7 @@ class TestEncryptionUtils:
def test_verify_key_hash(self):
"""Key hash verification works correctly."""
from eh_pipeline import hash_key, verify_key_hash
from korrektur.eh_pipeline import hash_key, verify_key_hash
import os
passphrase = "test-secret-passphrase"
@@ -539,7 +539,7 @@ class TestEncryptionUtils:
def test_chunk_text(self):
"""Text chunking produces correct overlap."""
from eh_pipeline import chunk_text
from korrektur.eh_pipeline import chunk_text
text = "A" * 2000 # 2000 characters
chunks = chunk_text(text, chunk_size=1000, overlap=200)
@@ -550,7 +550,7 @@ class TestEncryptionUtils:
def test_encrypt_decrypt_text(self):
"""Text encryption and decryption round-trip."""
from eh_pipeline import encrypt_text, decrypt_text
from korrektur.eh_pipeline import encrypt_text, decrypt_text
plaintext = "Dies ist ein geheimer Text."
passphrase = "geheim123"
@@ -13,71 +13,71 @@ class TestInsertMissingIpa:
def test_single_headword_gets_ipa(self):
"""Single English headword should get IPA inserted."""
from cv_ocr_engines import _insert_missing_ipa
from ocr.engines.engines import _insert_missing_ipa
result = _insert_missing_ipa("badge", "british")
assert "[" in result and "]" in result
assert result.startswith("badge [")
def test_short_phrase_first_word_gets_ipa(self):
"""First real word in short phrase gets IPA."""
from cv_ocr_engines import _insert_missing_ipa
from ocr.engines.engines import _insert_missing_ipa
result = _insert_missing_ipa("film", "british")
assert "[" in result
def test_long_sentence_unchanged(self):
"""Sentences with >6 words should not get IPA."""
from cv_ocr_engines import _insert_missing_ipa
from ocr.engines.engines import _insert_missing_ipa
text = "Can I borrow your CD player from you please"
result = _insert_missing_ipa(text, "british")
assert result == text
def test_existing_brackets_unchanged(self):
"""Text with existing brackets should not get double IPA."""
from cv_ocr_engines import _insert_missing_ipa
from ocr.engines.engines import _insert_missing_ipa
text = "dance [dˈɑːns]"
result = _insert_missing_ipa(text, "british")
assert result == text
def test_empty_text_unchanged(self):
"""Empty text returns empty."""
from cv_ocr_engines import _insert_missing_ipa
from ocr.engines.engines import _insert_missing_ipa
assert _insert_missing_ipa("", "british") == ""
assert _insert_missing_ipa(" ", "british") == ""
def test_grammar_words_skipped(self):
"""Grammar particles should not get IPA."""
from cv_ocr_engines import _insert_missing_ipa
from ocr.engines.engines import _insert_missing_ipa
# "sth" is in _GRAMMAR_BRACKET_WORDS
result = _insert_missing_ipa("sth", "british")
assert "[" not in result
def test_german_word_no_ipa(self):
"""German words (no IPA entry) stay unchanged."""
from cv_ocr_engines import _insert_missing_ipa
from ocr.engines.engines import _insert_missing_ipa
result = _insert_missing_ipa("Anstecknadel", "british")
assert result == "Anstecknadel"
def test_compound_word_schoolbag_gets_ipa(self):
"""R07: Compound word 'schoolbag' should get decomposed IPA (school+bag)."""
from cv_ocr_engines import _insert_missing_ipa
from ocr.engines.engines import _insert_missing_ipa
result = _insert_missing_ipa("schoolbag", "british")
assert "[" in result and "]" in result
assert result.startswith("schoolbag [")
def test_compound_word_blackbird(self):
"""Compound word 'blackbird' should get decomposed IPA."""
from cv_ocr_engines import _insert_missing_ipa
from ocr.engines.engines import _insert_missing_ipa
result = _insert_missing_ipa("blackbird", "british")
assert "[" in result and "]" in result
def test_compound_word_too_short(self):
"""Words shorter than 6 chars should not attempt compound decomposition."""
from cv_ocr_engines import _decompose_compound
from ocr.engines.engines import _decompose_compound
assert _decompose_compound("bag", "british") is None
def test_decompose_compound_direct(self):
"""Direct test of _decompose_compound for known compounds."""
from cv_ocr_engines import _decompose_compound
from ocr.engines.engines import _decompose_compound
# schoolbag = school + bag — both should be in dictionary
result = _decompose_compound("schoolbag", "british")
assert result is not None
@@ -88,14 +88,14 @@ class TestStripPostBracketGarbled:
def test_simple_trailing_garbled(self):
"""R21-simple: 'sea [sˈiː] si:' → trailing IPA marker removed."""
from cv_ocr_engines import _strip_post_bracket_garbled
from ocr.engines.engines import _strip_post_bracket_garbled
result = _strip_post_bracket_garbled("sea [sˈiː] si:")
assert "si:" not in result
assert result.startswith("sea [sˈiː]")
def test_multi_word_trailing_garbled(self):
"""R21: 'seat [sˈiːt] belt si:t belt' → keep 'belt', remove garbled."""
from cv_ocr_engines import _strip_post_bracket_garbled
from ocr.engines.engines import _strip_post_bracket_garbled
result = _strip_post_bracket_garbled("seat [sˈiːt] belt si:t belt")
assert "belt" in result # real word kept
assert "si:t" not in result # garbled removed
@@ -104,13 +104,13 @@ class TestStripPostBracketGarbled:
def test_delimiter_after_bracket_kept(self):
"""Delimiters after IPA bracket are kept."""
from cv_ocr_engines import _strip_post_bracket_garbled
from ocr.engines.engines import _strip_post_bracket_garbled
result = _strip_post_bracket_garbled("dance [dˈɑːns] tanzen")
assert " tanzen" in result
def test_german_after_bracket_kept(self):
"""German words (uppercase) after IPA bracket are kept."""
from cv_ocr_engines import _strip_post_bracket_garbled
from ocr.engines.engines import _strip_post_bracket_garbled
result = _strip_post_bracket_garbled("badge [bædʒ] Abzeichen")
assert "Abzeichen" in result
@@ -120,7 +120,7 @@ class TestFixCellPhonetics:
def test_english_column_cells_processed(self):
"""Cells with col_type column_en should be processed."""
from cv_ocr_engines import fix_cell_phonetics
from ocr.engines.engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_en", "text": "badge"},
{"cell_id": "c2", "col_type": "column_de", "text": "Anstecknadel"},
@@ -133,7 +133,7 @@ class TestFixCellPhonetics:
def test_column_text_cells_processed(self):
"""Cells with col_type column_text should be processed."""
from cv_ocr_engines import fix_cell_phonetics
from ocr.engines.engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_text", "text": "challenge"},
]
@@ -142,7 +142,7 @@ class TestFixCellPhonetics:
def test_garbled_ipa_replaced(self):
"""Garbled IPA brackets should be replaced with correct IPA."""
from cv_ocr_engines import fix_cell_phonetics
from ocr.engines.engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_en", "text": "dance {'tfatno]"},
]
@@ -154,7 +154,7 @@ class TestFixCellPhonetics:
def test_empty_cells_unchanged(self):
"""Empty cells should not cause errors."""
from cv_ocr_engines import fix_cell_phonetics
from ocr.engines.engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_en", "text": ""},
{"cell_id": "c2", "col_type": "column_en", "text": None},
@@ -164,7 +164,7 @@ class TestFixCellPhonetics:
def test_non_english_col_types_skipped(self):
"""Cells with column_de, column_example etc. should not be processed."""
from cv_ocr_engines import fix_cell_phonetics
from ocr.engines.engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_de", "text": "Eis (gefrorenes Wasser)"},
{"cell_id": "c2", "col_type": "column_example", "text": "(sich beschweren)"},
@@ -9,8 +9,8 @@ import pytest
import cv2
from cv_box_detect import detect_boxes, split_page_into_zones
from cv_vocab_types import DetectedBox, PageZone
from ocr.detect.box_detect import detect_boxes, split_page_into_zones
from ocr.types import DetectedBox, PageZone
# ---------------------------------------------------------------------------
@@ -9,7 +9,7 @@ import pytest
import cv2
from cv_graphic_detect import detect_graphic_elements, GraphicElement, _dominant_color
from ocr.detect.graphic_detect import detect_graphic_elements, GraphicElement, _dominant_color
# ---------------------------------------------------------------------------
@@ -23,7 +23,7 @@ from unittest.mock import AsyncMock, MagicMock, patch, PropertyMock
from dataclasses import asdict
# Import module under test
from cv_vocab_pipeline import (
from ocr.cv_pipeline import (
ColumnGeometry,
DocumentTypeResult,
PageRegion,
@@ -1408,7 +1408,7 @@ class TestCellsToVocabEntriesPageRef:
def test_page_ref_mapped_to_source_page(self):
"""Cell with col_type='page_ref' → source_page field populated."""
from cv_vocab_pipeline import _cells_to_vocab_entries
from ocr.cv_pipeline import _cells_to_vocab_entries
cells = [
{
@@ -1450,7 +1450,7 @@ class TestCellsToVocabEntriesPageRef:
def test_no_page_ref_defaults_empty(self):
"""Without page_ref cell, source_page defaults to empty string."""
from cv_vocab_pipeline import _cells_to_vocab_entries
from ocr.cv_pipeline import _cells_to_vocab_entries
cells = [
{
@@ -1472,7 +1472,7 @@ class TestCellsToVocabEntriesPageRef:
def test_marker_only_row_included(self):
"""Row with only a marker (no english/german/example) is kept."""
from cv_vocab_pipeline import _cells_to_vocab_entries
from ocr.cv_pipeline import _cells_to_vocab_entries
cells = [
# Row 0: has english + marker
@@ -1543,7 +1543,7 @@ class TestCellsToVocabEntriesPageRef:
def test_page_ref_only_row_included(self):
"""Row with only source_page text is kept (no english/german/example)."""
from cv_vocab_pipeline import _cells_to_vocab_entries
from ocr.cv_pipeline import _cells_to_vocab_entries
cells = [
{
@@ -1,7 +1,7 @@
"""Tests for cv_words_first.py — Words-First Grid Builder."""
import pytest
from cv_words_first import (
from ocr.words_first import (
_assign_word_to_column,
_assign_word_to_row,
_build_cells,
@@ -10,8 +10,8 @@ import os
# Add backend to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from cv_vocab_types import ColumnGeometry
from cv_layout import _score_dictionary_signals, _classify_dictionary_columns, _score_language
from ocr.types import ColumnGeometry
from ocr.layout.layout import _score_dictionary_signals, _classify_dictionary_columns, _score_language
def _make_words(texts, start_y=0, y_step=30, x=100, conf=80):
@@ -19,8 +19,8 @@ import importlib
# ---------------------------------------------------------------------------
def _fresh_import():
"""Re-import cv_doclayout_detect with reset globals."""
import cv_doclayout_detect as mod
"""Re-import ocr.detect.doclayout_detect with reset globals."""
import ocr.detect.doclayout_detect as mod
# Reset module-level caching so each test starts clean
mod._onnx_session = None
mod._model_path = None
@@ -62,7 +62,7 @@ class TestIsDoclayoutAvailableNoModel:
class TestLayoutRegionDataclass:
def test_basic_creation(self):
from cv_doclayout_detect import LayoutRegion
from ocr.detect.doclayout_detect import LayoutRegion
region = LayoutRegion(
x=10, y=20, width=100, height=200,
label="figure", confidence=0.95, label_index=1,
@@ -76,14 +76,14 @@ class TestLayoutRegionDataclass:
assert region.label_index == 1
def test_all_fields_present(self):
from cv_doclayout_detect import LayoutRegion
from ocr.detect.doclayout_detect import LayoutRegion
import dataclasses
field_names = {f.name for f in dataclasses.fields(LayoutRegion)}
expected = {"x", "y", "width", "height", "label", "confidence", "label_index"}
assert field_names == expected
def test_different_labels(self):
from cv_doclayout_detect import LayoutRegion, DOCLAYOUT_CLASSES
from ocr.detect.doclayout_detect import LayoutRegion, DOCLAYOUT_CLASSES
for idx, label in enumerate(DOCLAYOUT_CLASSES):
region = LayoutRegion(
x=0, y=0, width=50, height=50,
@@ -125,7 +125,7 @@ class TestDetectLayoutRegionsNoModel:
class TestPreprocessingShapes:
def test_square_image(self):
from cv_doclayout_detect import preprocess_image
from ocr.detect.doclayout_detect import preprocess_image
img = np.random.randint(0, 255, (800, 800, 3), dtype=np.uint8)
tensor, scale, pad_x, pad_y = preprocess_image(img)
assert tensor.shape == (1, 3, 800, 800)
@@ -134,7 +134,7 @@ class TestPreprocessingShapes:
assert tensor.max() <= 1.0
def test_landscape_image(self):
from cv_doclayout_detect import preprocess_image
from ocr.detect.doclayout_detect import preprocess_image
img = np.random.randint(0, 255, (600, 1200, 3), dtype=np.uint8)
tensor, scale, pad_x, pad_y = preprocess_image(img)
assert tensor.shape == (1, 3, 800, 800)
@@ -144,7 +144,7 @@ class TestPreprocessingShapes:
assert pad_y > 0 # vertical padding expected
def test_portrait_image(self):
from cv_doclayout_detect import preprocess_image
from ocr.detect.doclayout_detect import preprocess_image
img = np.random.randint(0, 255, (1200, 600, 3), dtype=np.uint8)
tensor, scale, pad_x, pad_y = preprocess_image(img)
assert tensor.shape == (1, 3, 800, 800)
@@ -154,20 +154,20 @@ class TestPreprocessingShapes:
assert pad_x > 0 # horizontal padding expected
def test_small_image(self):
from cv_doclayout_detect import preprocess_image
from ocr.detect.doclayout_detect import preprocess_image
img = np.random.randint(0, 255, (100, 200, 3), dtype=np.uint8)
tensor, scale, pad_x, pad_y = preprocess_image(img)
assert tensor.shape == (1, 3, 800, 800)
def test_typical_scan_a4(self):
"""A4 scan at 300dpi: roughly 2480x3508 pixels."""
from cv_doclayout_detect import preprocess_image
from ocr.detect.doclayout_detect import preprocess_image
img = np.random.randint(0, 255, (3508, 2480, 3), dtype=np.uint8)
tensor, scale, pad_x, pad_y = preprocess_image(img)
assert tensor.shape == (1, 3, 800, 800)
def test_values_normalized(self):
from cv_doclayout_detect import preprocess_image
from ocr.detect.doclayout_detect import preprocess_image
# All white image
img = np.full((400, 400, 3), 255, dtype=np.uint8)
tensor, _, _, _ = preprocess_image(img)
@@ -182,20 +182,20 @@ class TestPreprocessingShapes:
class TestNmsLogic:
def test_empty_input(self):
from cv_doclayout_detect import nms
from ocr.detect.doclayout_detect import nms
boxes = np.array([]).reshape(0, 4)
scores = np.array([])
assert nms(boxes, scores) == []
def test_single_box(self):
from cv_doclayout_detect import nms
from ocr.detect.doclayout_detect import nms
boxes = np.array([[10, 10, 100, 100]], dtype=np.float32)
scores = np.array([0.9])
kept = nms(boxes, scores, iou_threshold=0.5)
assert kept == [0]
def test_non_overlapping_boxes(self):
from cv_doclayout_detect import nms
from ocr.detect.doclayout_detect import nms
boxes = np.array([
[0, 0, 50, 50],
[200, 200, 300, 300],
@@ -207,7 +207,7 @@ class TestNmsLogic:
assert set(kept) == {0, 1, 2}
def test_overlapping_boxes_suppressed(self):
from cv_doclayout_detect import nms
from ocr.detect.doclayout_detect import nms
# Two boxes that heavily overlap
boxes = np.array([
[10, 10, 110, 110], # 100x100
@@ -219,7 +219,7 @@ class TestNmsLogic:
assert kept == [0]
def test_partially_overlapping_boxes_kept(self):
from cv_doclayout_detect import nms
from ocr.detect.doclayout_detect import nms
# Two boxes that overlap ~25% (below 0.5 threshold)
boxes = np.array([
[0, 0, 100, 100], # 100x100
@@ -231,7 +231,7 @@ class TestNmsLogic:
assert len(kept) == 2
def test_nms_respects_score_ordering(self):
from cv_doclayout_detect import nms
from ocr.detect.doclayout_detect import nms
# Three overlapping boxes — highest confidence should be kept first
boxes = np.array([
[10, 10, 110, 110],
@@ -244,7 +244,7 @@ class TestNmsLogic:
assert kept[0] == 1
def test_iou_computation(self):
from cv_doclayout_detect import _compute_iou
from ocr.detect.doclayout_detect import _compute_iou
box_a = np.array([0, 0, 100, 100], dtype=np.float32)
box_b = np.array([0, 0, 100, 100], dtype=np.float32)
assert abs(_compute_iou(box_a, box_b) - 1.0) < 1e-5
@@ -259,7 +259,7 @@ class TestNmsLogic:
class TestDoclayoutClasses:
def test_correct_class_list(self):
from cv_doclayout_detect import DOCLAYOUT_CLASSES
from ocr.detect.doclayout_detect import DOCLAYOUT_CLASSES
expected = [
"table", "figure", "title", "text", "list",
"header", "footer", "equation", "reference", "abstract",
@@ -267,15 +267,15 @@ class TestDoclayoutClasses:
assert DOCLAYOUT_CLASSES == expected
def test_class_count(self):
from cv_doclayout_detect import DOCLAYOUT_CLASSES
from ocr.detect.doclayout_detect import DOCLAYOUT_CLASSES
assert len(DOCLAYOUT_CLASSES) == 10
def test_no_duplicates(self):
from cv_doclayout_detect import DOCLAYOUT_CLASSES
from ocr.detect.doclayout_detect import DOCLAYOUT_CLASSES
assert len(DOCLAYOUT_CLASSES) == len(set(DOCLAYOUT_CLASSES))
def test_all_lowercase(self):
from cv_doclayout_detect import DOCLAYOUT_CLASSES
from ocr.detect.doclayout_detect import DOCLAYOUT_CLASSES
for cls in DOCLAYOUT_CLASSES:
assert cls == cls.lower(), f"Class '{cls}' should be lowercase"
@@ -303,7 +303,7 @@ class TestGetDoclayoutStatus:
class TestPostprocessing:
def test_single_tensor_format_6cols(self):
"""Test parsing of (1, N, 6) output format: x1,y1,x2,y2,score,class."""
from cv_doclayout_detect import _postprocess
from ocr.detect.doclayout_detect import _postprocess
# One detection: figure at (100,100)-(300,300) in 800x800 space
raw = np.array([[[100, 100, 300, 300, 0.92, 1]]], dtype=np.float32)
@@ -320,7 +320,7 @@ class TestPostprocessing:
def test_three_tensor_format(self):
"""Test parsing of 3-tensor output: boxes, scores, class_ids."""
from cv_doclayout_detect import _postprocess
from ocr.detect.doclayout_detect import _postprocess
boxes = np.array([[50, 50, 200, 150]], dtype=np.float32)
scores = np.array([0.88], dtype=np.float32)
@@ -338,7 +338,7 @@ class TestPostprocessing:
def test_confidence_filtering(self):
"""Detections below threshold should be excluded."""
from cv_doclayout_detect import _postprocess
from ocr.detect.doclayout_detect import _postprocess
raw = np.array([
[100, 100, 200, 200, 0.9, 1], # above threshold
@@ -357,7 +357,7 @@ class TestPostprocessing:
def test_coordinate_scaling(self):
"""Verify coordinates are correctly scaled back to original image."""
from cv_doclayout_detect import _postprocess
from ocr.detect.doclayout_detect import _postprocess
# Image was 1600x1200, scaled to fit 800x800 → scale=0.5, pad_y offset
scale = 800 / 1600 # 0.5
@@ -382,7 +382,7 @@ class TestPostprocessing:
assert r.y == 200
def test_empty_output(self):
from cv_doclayout_detect import _postprocess
from ocr.detect.doclayout_detect import _postprocess
raw = np.array([]).reshape(1, 0, 6).astype(np.float32)
regions = _postprocess(
outputs=[raw],
@@ -14,15 +14,15 @@ sys.path.insert(0, '/app')
import cv2
import numpy as np
import pytest
from cv_vocab_types import PageZone, DetectedBox
from grid_editor_api import (
from ocr.types import PageZone, DetectedBox
from grid.editor.api import (
_merge_content_zones_across_boxes,
_filter_border_ghosts,
_detect_header_rows,
_detect_heading_rows_by_color,
_detect_heading_rows_by_single_cell,
)
from cv_ocr_engines import _text_has_garbled_ipa, fix_ipa_continuation_cell
from ocr.engines.engines import _text_has_garbled_ipa, fix_ipa_continuation_cell
# ---------------------------------------------------------------------------
@@ -818,7 +818,7 @@ class TestSlashIpaConversion:
def _run_step_5h(self, text: str) -> str:
"""Run the Step 5h regex logic on a single text string."""
import re
from cv_ocr_engines import _lookup_ipa
from ocr.engines.engines import _lookup_ipa
_SLASH_IPA_RE = re.compile(
r'(\b[a-zA-Z]+[²³¹]?)\s*'
@@ -926,7 +926,7 @@ class TestRedFalsePositiveSuppression:
def test_low_saturation_red_classified_as_black(self):
"""Black text with slight warm scanner tint (sat ~85) → black, not red."""
import numpy as np
from cv_color_detect import detect_word_colors
from ocr.detect.color_detect import detect_word_colors
# Create a 40x20 image with dark gray pixels (slight warm tint)
# HSV: hue=5 (red range), sat=85 (above 55 threshold but below 90), val=40
@@ -941,7 +941,7 @@ class TestRedFalsePositiveSuppression:
def test_high_saturation_red_classified_as_red(self):
"""Genuinely red text (sat=150) → red."""
import numpy as np
from cv_color_detect import detect_word_colors
from ocr.detect.color_detect import detect_word_colors
# White background with red text region
# Background: white (H=0, S=0, V=255)
@@ -984,7 +984,7 @@ class TestBlueBulletFilter:
zone = {"zone_index": 0, "cells": [cell], "rows": [], "columns": []}
# Run the bullet filter logic inline
from grid_editor_api import _build_grid_core
from grid.editor.api import _build_grid_core
# Instead, test the logic directly
wbs = cell["word_boxes"]
to_remove = set()
@@ -1057,7 +1057,7 @@ class TestWordBoxReadingOrder:
def test_single_line_sorted_by_left(self):
"""Words on same Y line sorted by X (left) position."""
from cv_ocr_engines import _group_words_into_lines
from ocr.engines.engines import _group_words_into_lines
wbs = [
{"text": "up", "left": 376, "top": 264, "width": 22, "height": 19},
{"text": "tie", "left": 284, "top": 264, "width": 23, "height": 14},
@@ -1069,7 +1069,7 @@ class TestWordBoxReadingOrder:
def test_two_lines_preserves_line_order(self):
"""Words on two Y lines: first line first, then second line."""
from cv_ocr_engines import _group_words_into_lines
from ocr.engines.engines import _group_words_into_lines
wbs = [
{"text": "b)", "left": 100, "top": 290, "width": 20, "height": 15},
{"text": "cat", "left": 50, "top": 264, "width": 30, "height": 15},
@@ -1082,7 +1082,7 @@ class TestWordBoxReadingOrder:
def test_already_sorted_unchanged(self):
"""Already-sorted word_boxes stay in same order."""
from cv_ocr_engines import _group_words_into_lines
from ocr.engines.engines import _group_words_into_lines
wbs = [
{"text": "tie", "left": 284, "top": 264, "width": 23, "height": 14},
{"text": "sb/sth", "left": 309, "top": 264, "width": 57, "height": 20},
@@ -7,7 +7,7 @@ import os
# Add parent directory to path so we can import the module
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from cv_gutter_repair import (
from ocr.gutter.repair import (
_is_known,
_try_hyphen_join,
_try_spell_fix,
@@ -173,7 +173,7 @@ class TestMarkdownParser:
def test_parse_simple_markdown(self):
"""Test parsing simple markdown content."""
from github_crawler import MarkdownParser
from crawler.github import MarkdownParser
content = """# Test Title
@@ -191,21 +191,21 @@ More content here.
def test_extract_title_from_heading(self):
"""Test extracting title from h1 heading."""
from github_crawler import MarkdownParser
from crawler.github import MarkdownParser
title = MarkdownParser._extract_title("# My Document\n\nContent", "fallback.md")
assert title == "My Document"
def test_extract_title_fallback(self):
"""Test fallback to filename when no heading."""
from github_crawler import MarkdownParser
from crawler.github import MarkdownParser
title = MarkdownParser._extract_title("No heading here", "my-document.md")
assert title == "My Document"
def test_detect_german_language(self):
"""Test German language detection."""
from github_crawler import MarkdownParser
from crawler.github import MarkdownParser
german_text = "Dies ist eine Datenschutzerklaerung fuer die Verarbeitung personenbezogener Daten."
lang = MarkdownParser._detect_language(german_text)
@@ -213,7 +213,7 @@ More content here.
def test_detect_english_language(self):
"""Test English language detection."""
from github_crawler import MarkdownParser
from crawler.github import MarkdownParser
english_text = "This is a privacy policy for processing personal data in our application."
lang = MarkdownParser._detect_language(english_text)
@@ -221,7 +221,7 @@ More content here.
def test_find_placeholders(self):
"""Test finding placeholder patterns."""
from github_crawler import MarkdownParser
from crawler.github import MarkdownParser
content = "Company: [COMPANY_NAME], Contact: {email}, Address: __ADDRESS__"
placeholders = MarkdownParser._find_placeholders(content)
@@ -236,7 +236,7 @@ class TestHTMLParser:
def test_parse_simple_html(self):
"""Test parsing simple HTML content."""
from github_crawler import HTMLParser
from crawler.github import HTMLParser
content = """<!DOCTYPE html>
<html>
@@ -255,7 +255,7 @@ class TestHTMLParser:
def test_html_to_text_removes_scripts(self):
"""Test that scripts are removed from HTML."""
from github_crawler import HTMLParser
from crawler.github import HTMLParser
html = "<p>Text</p><script>alert('bad');</script><p>More</p>"
text = HTMLParser._html_to_text(html)
@@ -270,7 +270,7 @@ class TestJSONParser:
def test_parse_simple_json(self):
"""Test parsing simple JSON content."""
from github_crawler import JSONParser
from crawler.github import JSONParser
content = json.dumps({
"title": "Privacy Policy",
@@ -286,7 +286,7 @@ class TestJSONParser:
def test_parse_nested_json(self):
"""Test parsing nested JSON structures."""
from github_crawler import JSONParser
from crawler.github import JSONParser
content = json.dumps({
"sections": {
@@ -305,7 +305,7 @@ class TestExtractedDocument:
def test_extracted_document_hash(self):
"""Test that source hash is auto-generated."""
from github_crawler import ExtractedDocument
from crawler.github import ExtractedDocument
doc = ExtractedDocument(
text="Some content",
@@ -396,7 +396,7 @@ class TestLegalTemplatesIngestion:
def test_infer_template_type_privacy(self):
"""Test inferring privacy policy type."""
from legal_templates_ingestion import LegalTemplatesIngestion
from github_crawler import ExtractedDocument
from crawler.github import ExtractedDocument
from template_sources import SourceConfig, LicenseType
with patch('legal_templates_ingestion.QdrantClient'):
@@ -449,7 +449,7 @@ class TestTemplatesAdminAPI:
def test_templates_status_structure(self):
"""Test the structure of templates status response."""
from admin_api import _templates_ingestion_status
from admin.api import _templates_ingestion_status
# Reset status
_templates_ingestion_status["running"] = False
@@ -462,7 +462,7 @@ class TestTemplatesAdminAPI:
def test_templates_status_running(self):
"""Test status when ingestion is running."""
from admin_api import _templates_ingestion_status
from admin.api import _templates_ingestion_status
_templates_ingestion_status["running"] = True
_templates_ingestion_status["current_source"] = "github-site-policy"
@@ -473,7 +473,7 @@ class TestTemplatesAdminAPI:
def test_templates_results_tracking(self):
"""Test that ingestion results are tracked correctly."""
from admin_api import _templates_ingestion_status
from admin.api import _templates_ingestion_status
_templates_ingestion_status["results"] = {
"github-site-policy": {
@@ -578,7 +578,7 @@ class TestTemplatesIntegration:
def test_full_chunk_creation_pipeline(self, mock_all_services):
"""Test the full chunk creation pipeline."""
from legal_templates_ingestion import LegalTemplatesIngestion
from github_crawler import ExtractedDocument
from crawler.github import ExtractedDocument
from template_sources import SourceConfig, LicenseType
ingestion = LegalTemplatesIngestion()
@@ -5,7 +5,7 @@ import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from cv_cell_grid import _merge_wrapped_rows
from ocr.cell_grid.cell_grid import _merge_wrapped_rows
def _entry(row_index, english='', german='', example=''):
@@ -124,8 +124,8 @@ class TestSessionCreation:
@pytest.mark.asyncio
async def test_create_session_success(self, mock_db_pool):
"""Test successful session creation."""
from ocr_labeling_api import SessionCreate
from metrics_db import create_ocr_labeling_session
from ocr.labeling.api import SessionCreate
from metrics.db import create_ocr_labeling_session
pool, conn = mock_db_pool
conn.execute.return_value = None
@@ -144,7 +144,7 @@ class TestSessionCreation:
def test_session_create_model_validation(self):
"""Test SessionCreate model validation."""
from ocr_labeling_api import SessionCreate
from ocr.labeling.api import SessionCreate
# Valid session
session = SessionCreate(
@@ -158,7 +158,7 @@ class TestSessionCreation:
def test_session_create_with_custom_model(self):
"""Test SessionCreate with custom OCR model."""
from ocr_labeling_api import SessionCreate
from ocr.labeling.api import SessionCreate
session = SessionCreate(
name="TrOCR Session",
@@ -174,7 +174,7 @@ class TestSessionListing:
@pytest.mark.asyncio
async def test_get_sessions_empty(self):
"""Test getting sessions when none exist."""
from metrics_db import get_ocr_labeling_sessions
from metrics.db import get_ocr_labeling_sessions
with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
sessions = await get_ocr_labeling_sessions()
@@ -183,7 +183,7 @@ class TestSessionListing:
@pytest.mark.asyncio
async def test_get_session_not_found(self):
"""Test getting a non-existent session."""
from metrics_db import get_ocr_labeling_session
from metrics.db import get_ocr_labeling_session
with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
session = await get_ocr_labeling_session("non-existent-id")
@@ -199,7 +199,7 @@ class TestImageUpload:
def test_compute_image_hash(self):
"""Test image hash computation."""
from ocr_labeling_api import compute_image_hash
from ocr.labeling.api import compute_image_hash
image_data = b"\x89PNG fake image data"
hash1 = compute_image_hash(image_data)
@@ -211,7 +211,7 @@ class TestImageUpload:
def test_compute_image_hash_different_data(self):
"""Test that different images produce different hashes."""
from ocr_labeling_api import compute_image_hash
from ocr.labeling.api import compute_image_hash
hash1 = compute_image_hash(b"image 1 data")
hash2 = compute_image_hash(b"image 2 data")
@@ -220,11 +220,11 @@ class TestImageUpload:
def test_save_image_locally(self, tmp_path):
"""Test local image saving."""
from ocr_labeling_api import save_image_locally, LOCAL_STORAGE_PATH
from ocr.labeling.api import save_image_locally, LOCAL_STORAGE_PATH
# Temporarily override storage path
with patch('ocr_labeling_api.LOCAL_STORAGE_PATH', str(tmp_path)):
from ocr_labeling_api import save_image_locally
from ocr.labeling.api import save_image_locally
image_data = b"\x89PNG fake image data"
filepath = save_image_locally(
@@ -241,7 +241,7 @@ class TestImageUpload:
def test_get_image_url_local(self):
"""Test URL generation for local images."""
from ocr_labeling_api import get_image_url, LOCAL_STORAGE_PATH
from ocr.labeling.api import get_image_url, LOCAL_STORAGE_PATH
local_path = f"{LOCAL_STORAGE_PATH}/session-123/item-456.png"
url = get_image_url(local_path)
@@ -250,7 +250,7 @@ class TestImageUpload:
def test_get_image_url_minio(self):
"""Test URL for MinIO images (passthrough)."""
from ocr_labeling_api import get_image_url
from ocr.labeling.api import get_image_url
minio_path = "ocr-labeling/session-123/item-456.png"
url = get_image_url(minio_path)
@@ -269,7 +269,7 @@ class TestConfirmLabel:
@pytest.mark.asyncio
async def test_confirm_label_success(self, mock_db_pool):
"""Test successful label confirmation."""
from metrics_db import confirm_ocr_label
from metrics.db import confirm_ocr_label
pool, conn = mock_db_pool
conn.fetchrow.return_value = {"ocr_text": "Test text"}
@@ -287,7 +287,7 @@ class TestConfirmLabel:
def test_confirm_request_validation(self):
"""Test ConfirmRequest model validation."""
from ocr_labeling_api import ConfirmRequest
from ocr.labeling.api import ConfirmRequest
request = ConfirmRequest(
item_id="item-456",
@@ -303,7 +303,7 @@ class TestCorrectLabel:
@pytest.mark.asyncio
async def test_correct_label_success(self, mock_db_pool):
"""Test successful label correction."""
from metrics_db import correct_ocr_label
from metrics.db import correct_ocr_label
pool, conn = mock_db_pool
conn.execute.return_value = None
@@ -321,7 +321,7 @@ class TestCorrectLabel:
def test_correct_request_validation(self):
"""Test CorrectRequest model validation."""
from ocr_labeling_api import CorrectRequest
from ocr.labeling.api import CorrectRequest
request = CorrectRequest(
item_id="item-456",
@@ -338,7 +338,7 @@ class TestSkipItem:
@pytest.mark.asyncio
async def test_skip_item_success(self, mock_db_pool):
"""Test successful item skip."""
from metrics_db import skip_ocr_item
from metrics.db import skip_ocr_item
pool, conn = mock_db_pool
conn.execute.return_value = None
@@ -363,7 +363,7 @@ class TestLabelingStats:
@pytest.mark.asyncio
async def test_get_stats_no_db(self):
"""Test stats when database is not available."""
from metrics_db import get_ocr_labeling_stats
from metrics.db import get_ocr_labeling_stats
with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
stats = await get_ocr_labeling_stats()
@@ -371,7 +371,7 @@ class TestLabelingStats:
def test_stats_response_model(self):
"""Test StatsResponse model structure."""
from ocr_labeling_api import StatsResponse
from ocr.labeling.api import StatsResponse
stats = StatsResponse(
total_items=100,
@@ -395,7 +395,7 @@ class TestTrainingExport:
def test_export_request_validation(self):
"""Test ExportRequest model validation."""
from ocr_labeling_api import ExportRequest
from ocr.labeling.api import ExportRequest
# Default format is generic
request = ExportRequest()
@@ -412,7 +412,7 @@ class TestTrainingExport:
@pytest.mark.asyncio
async def test_export_training_samples(self, mock_db_pool):
"""Test training sample export from database."""
from metrics_db import export_training_samples
from metrics.db import export_training_samples
pool, conn = mock_db_pool
conn.fetch.return_value = [
@@ -495,7 +495,7 @@ class TestOCRProcessing:
@pytest.mark.asyncio
async def test_run_ocr_on_image_no_service(self):
"""Test OCR when service is not available."""
from ocr_labeling_api import run_ocr_on_image
from ocr.labeling.api import run_ocr_on_image
with patch('ocr_labeling_api.VISION_OCR_AVAILABLE', False), \
patch('ocr_labeling_api.PADDLEOCR_AVAILABLE', False), \
@@ -512,7 +512,7 @@ class TestOCRProcessing:
@pytest.mark.asyncio
async def test_run_ocr_on_image_success(self, mock_vision_ocr):
"""Test successful OCR processing."""
from ocr_labeling_api import run_ocr_on_image
from ocr.labeling.api import run_ocr_on_image
text, confidence = await run_ocr_on_image(
image_data=b"fake image",
@@ -533,7 +533,7 @@ class TestOCRModelDispatcher:
@pytest.mark.asyncio
async def test_dispatcher_vision_model_default(self, mock_vision_ocr):
"""Test dispatcher uses Vision OCR by default."""
from ocr_labeling_api import run_ocr_on_image
from ocr.labeling.api import run_ocr_on_image
text, confidence = await run_ocr_on_image(
image_data=b"fake image",
@@ -547,7 +547,7 @@ class TestOCRModelDispatcher:
@pytest.mark.asyncio
async def test_dispatcher_paddleocr_model(self):
"""Test dispatcher routes to PaddleOCR."""
from ocr_labeling_api import run_ocr_on_image
from ocr.labeling.api import run_ocr_on_image
# Mock PaddleOCR
mock_regions = []
@@ -567,7 +567,7 @@ class TestOCRModelDispatcher:
@pytest.mark.asyncio
async def test_dispatcher_paddleocr_fallback_to_vision(self, mock_vision_ocr):
"""Test PaddleOCR falls back to Vision OCR when unavailable."""
from ocr_labeling_api import run_ocr_on_image
from ocr.labeling.api import run_ocr_on_image
with patch('ocr_labeling_api.PADDLEOCR_AVAILABLE', False):
text, confidence = await run_ocr_on_image(
@@ -583,7 +583,7 @@ class TestOCRModelDispatcher:
@pytest.mark.asyncio
async def test_dispatcher_trocr_model(self):
"""Test dispatcher routes to TrOCR."""
from ocr_labeling_api import run_ocr_on_image
from ocr.labeling.api import run_ocr_on_image
async def mock_trocr(image_data):
return "TrOCR erkannter Text", 0.85
@@ -603,7 +603,7 @@ class TestOCRModelDispatcher:
@pytest.mark.asyncio
async def test_dispatcher_donut_model(self):
"""Test dispatcher routes to Donut."""
from ocr_labeling_api import run_ocr_on_image
from ocr.labeling.api import run_ocr_on_image
async def mock_donut(image_data):
return "Donut erkannter Text", 0.80
@@ -623,7 +623,7 @@ class TestOCRModelDispatcher:
@pytest.mark.asyncio
async def test_dispatcher_unknown_model_uses_vision(self, mock_vision_ocr):
"""Test dispatcher uses Vision OCR for unknown models."""
from ocr_labeling_api import run_ocr_on_image
from ocr.labeling.api import run_ocr_on_image
text, confidence = await run_ocr_on_image(
image_data=b"fake image",
@@ -641,7 +641,7 @@ class TestOCRModelTypes:
def test_session_with_paddleocr_model(self):
"""Test session creation with PaddleOCR model."""
from ocr_labeling_api import SessionCreate
from ocr.labeling.api import SessionCreate
session = SessionCreate(
name="PaddleOCR Session",
@@ -653,7 +653,7 @@ class TestOCRModelTypes:
def test_session_with_donut_model(self):
"""Test session creation with Donut model."""
from ocr_labeling_api import SessionCreate
from ocr.labeling.api import SessionCreate
session = SessionCreate(
name="Donut Session",
@@ -665,7 +665,7 @@ class TestOCRModelTypes:
def test_session_with_trocr_model(self):
"""Test session creation with TrOCR model."""
from ocr_labeling_api import SessionCreate
from ocr.labeling.api import SessionCreate
session = SessionCreate(
name="TrOCR Session",
@@ -685,7 +685,7 @@ class TestResponseModels:
def test_session_response_model(self):
"""Test SessionResponse model."""
from ocr_labeling_api import SessionResponse
from ocr.labeling.api import SessionResponse
session = SessionResponse(
id="session-123",
@@ -706,7 +706,7 @@ class TestResponseModels:
def test_item_response_model(self):
"""Test ItemResponse model."""
from ocr_labeling_api import ItemResponse
from ocr.labeling.api import ItemResponse
item = ItemResponse(
id="item-456",
@@ -735,7 +735,7 @@ class TestDeduplication:
def test_hash_based_deduplication(self):
"""Test that same images produce same hash for deduplication."""
from ocr_labeling_api import compute_image_hash
from ocr.labeling.api import compute_image_hash
# Same content should be detected as duplicate
image1 = b"\x89PNG\x0d\x0a\x1a\x0a test image content"
@@ -748,7 +748,7 @@ class TestDeduplication:
def test_unique_images_different_hash(self):
"""Test that different images produce different hashes."""
from ocr_labeling_api import compute_image_hash
from ocr.labeling.api import compute_image_hash
image1 = b"\x89PNG unique content 1"
image2 = b"\x89PNG unique content 2"
@@ -13,7 +13,7 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from ocr_pipeline_api import (
from ocr.pipeline.api import (
_split_paddle_multi_words,
_group_words_into_rows,
_merge_row_sequences,
@@ -13,7 +13,7 @@ Tests cover:
import numpy as np
import pytest
from page_crop import (
from ocr.pipeline.page_crop import (
detect_and_crop_page,
detect_page_splits,
_detect_format,
+12 -12
View File
@@ -56,7 +56,7 @@ class TestIngestionStatus:
def test_status_not_running(self):
"""Test status when no ingestion is running."""
from admin_api import _ingestion_status
from admin.api import _ingestion_status
# Reset status
_ingestion_status["running"] = False
@@ -67,7 +67,7 @@ class TestIngestionStatus:
def test_status_running(self):
"""Test status when ingestion is running."""
from admin_api import _ingestion_status
from admin.api import _ingestion_status
_ingestion_status["running"] = True
_ingestion_status["last_run"] = datetime.now().isoformat()
@@ -81,7 +81,7 @@ class TestUploadAPI:
def test_upload_record_creation(self):
"""Test that upload records are created correctly."""
from admin_api import _upload_history
from admin.api import _upload_history
# Clear history
_upload_history.clear()
@@ -102,7 +102,7 @@ class TestUploadAPI:
def test_upload_history_limit(self):
"""Test that upload history is limited to 100 entries."""
from admin_api import _upload_history
from admin.api import _upload_history
_upload_history.clear()
@@ -187,7 +187,7 @@ class TestMetricsDB:
@pytest.mark.asyncio
async def test_store_feedback_no_pool(self):
"""Test feedback storage when DB is not available."""
from metrics_db import store_feedback
from metrics.db import store_feedback
with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
result = await store_feedback(
@@ -199,7 +199,7 @@ class TestMetricsDB:
@pytest.mark.asyncio
async def test_calculate_metrics_no_pool(self):
"""Test metrics calculation when DB is not available."""
from metrics_db import calculate_metrics
from metrics.db import calculate_metrics
with patch('metrics_db.get_pool', new_callable=AsyncMock, return_value=None):
metrics = await calculate_metrics()
@@ -214,7 +214,7 @@ class TestMetricsDB:
]
# Read the metrics_db module to check table names
from metrics_db import init_metrics_tables
from metrics.db import init_metrics_tables
# The function should create these tables
assert callable(init_metrics_tables)
@@ -231,8 +231,8 @@ class TestRAGIntegration:
@pytest.mark.asyncio
async def test_nibis_search(self):
"""Test NiBiS semantic search."""
from admin_api import search_nibis
from admin_api import NiBiSSearchRequest
from admin.api import search_nibis
from admin.api import NiBiSSearchRequest
request = NiBiSSearchRequest(
query="Gedichtanalyse Expressionismus",
@@ -265,7 +265,7 @@ class TestRAGIntegration:
@pytest.mark.asyncio
async def test_metrics_storage(self):
"""Test metrics storage in PostgreSQL."""
from metrics_db import store_feedback, calculate_metrics
from metrics.db import store_feedback, calculate_metrics
# This would require PostgreSQL running
# stored = await store_feedback(
@@ -330,7 +330,7 @@ class TestEmbeddings:
def test_vector_dimensions(self):
"""Test that vector dimensions are configured correctly."""
from eh_pipeline import get_vector_size, EMBEDDING_BACKEND
from korrektur.eh_pipeline import get_vector_size, EMBEDDING_BACKEND
size = get_vector_size()
@@ -341,7 +341,7 @@ class TestEmbeddings:
def test_chunking_config(self):
"""Test chunking configuration."""
from eh_pipeline import CHUNK_SIZE, CHUNK_OVERLAP
from korrektur.eh_pipeline import CHUNK_SIZE, CHUNK_OVERLAP
assert CHUNK_SIZE > 0
assert CHUNK_OVERLAP >= 0
+1 -1
View File
@@ -30,7 +30,7 @@ from datetime import datetime, timezone, timedelta
import sys
sys.path.insert(0, '..')
from rbac import (
from compliance.rbac import (
Role,
Action,
ResourceType,
@@ -4,7 +4,7 @@ import pytest
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from smart_spell import SmartSpellChecker, CorrectionResult
from ocr.spell.smart_spell import SmartSpellChecker, CorrectionResult
@pytest.fixture
@@ -4,7 +4,7 @@ import pytest
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from unified_grid import (
from grid.unified import (
_compute_dominant_row_height,
_classify_boxes,
build_unified_grid,
@@ -37,7 +37,7 @@ from fastapi.testclient import TestClient
# Import the main app and vocab-worksheet components
sys.path.insert(0, '..')
from main import app
from vocab_worksheet_api import (
from vocab.worksheet.api import (
_sessions,
_worksheets,
SessionStatus,
@@ -7,7 +7,7 @@ uses dynamic programming + dictionary lookup to find valid splits.
import pytest
from cv_review import _try_split_merged_word, _spell_dict_knows, _SPELL_AVAILABLE
from ocr.review.review import _try_split_merged_word, _spell_dict_knows, _SPELL_AVAILABLE
pytestmark = pytest.mark.skipif(
not _SPELL_AVAILABLE,
@@ -35,7 +35,7 @@ from fastapi.testclient import TestClient
# Import the main app and worksheet-editor components
sys.path.insert(0, '..')
from main import app
from worksheet_editor_api import (
from worksheet.editor_api import (
worksheets_db,
AIImageStyle,
WorksheetStatus,