Restructure: Move 47 cv_* files into ocr/ package

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 21:03:54 +02:00
parent 45287b3541
commit cb1be59e46
95 changed files with 317 additions and 103 deletions
@@ -0,0 +1,2 @@
+"""Review sub-package (spell, LLM, pipeline orchestration)."""
+from .review import *  # noqa: F401,F403
@@ -183,7 +183,7 @@ async def llm_review_entries(
    model: str = None,
 ) -> Dict:
    """OCR error correction. Uses spell-checker (REVIEW_ENGINE=spell) or LLM (REVIEW_ENGINE=llm)."""
-    from cv_review_spell import spell_review_entries_sync, _SPELL_AVAILABLE
+    from .spell import spell_review_entries_sync, _SPELL_AVAILABLE

    if REVIEW_ENGINE == "spell" and _SPELL_AVAILABLE:
        return spell_review_entries_sync(entries)
@@ -260,8 +260,8 @@ async def llm_review_entries_streaming(

    Phase 0 (always): Run _fix_character_confusion and emit any changes.
    """
-    from cv_ocr_engines import _fix_character_confusion
-    from cv_review_spell import spell_review_entries_streaming, _SPELL_AVAILABLE
+    from ..engines.engines import _fix_character_confusion
+    from .spell import spell_review_entries_streaming, _SPELL_AVAILABLE

    _CONF_FIELDS = ('english', 'german', 'example')
    originals = [{f: e.get(f, '') for f in _CONF_FIELDS} for e in entries]
@@ -13,24 +13,24 @@ from typing import Any, Dict, List, Optional

 import numpy as np

-from cv_vocab_types import (
+from ..types import (
    CV_PIPELINE_AVAILABLE,
    PageRegion,
    PipelineResult,
    VocabRow,
 )
-from cv_preprocessing import (
+from ..preprocessing.preprocessing import (
    deskew_image,
    dewarp_image,
    render_image_high_res,
    render_pdf_high_res,
 )
-from cv_layout import (
+from ..layout.layout import (
    analyze_layout,
    create_layout_image,
    create_ocr_image,
 )
-from cv_ocr_engines import (
+from ..engines.engines import (
    _group_words_into_lines,
 )

@@ -12,7 +12,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
 """

 # Re-export everything for backward compatibility
-from cv_review_pipeline import (  # noqa: F401
+from .pipeline import (  # noqa: F401
    ocr_region,
    run_multi_pass_ocr,
    match_lines_to_vocab,
@@ -20,7 +20,7 @@ from cv_review_pipeline import (  # noqa: F401
    run_cv_pipeline,
 )

-from cv_review_spell import (  # noqa: F401
+from .spell import (  # noqa: F401
    _SPELL_AVAILABLE,
    _spell_dict_knows,
    _spell_fix_field,
@@ -31,7 +31,7 @@ from cv_review_spell import (  # noqa: F401
    spell_review_entries_streaming,
 )

-from cv_review_llm import (  # noqa: F401
+from .llm import (  # noqa: F401
    OLLAMA_REVIEW_MODEL,
    REVIEW_ENGINE,
    _REVIEW_BATCH_SIZE,
@@ -210,7 +210,7 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
    Uses SmartSpellChecker for language-aware corrections with context-based
    disambiguation (a/I), multi-digit substitution, and cross-language guard.
    """
-    from cv_review_llm import _entry_needs_review
+    from .llm import _entry_needs_review

    t0 = time.time()
    changes: List[Dict] = []