Restructure: Move 47 cv_* files into ocr/ package
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m34s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 26s

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 21:03:54 +02:00
parent 45287b3541
commit cb1be59e46
95 changed files with 317 additions and 103 deletions

View File

@@ -0,0 +1,2 @@
"""Review sub-package (spell, LLM, pipeline orchestration)."""
from .review import * # noqa: F401,F403

View File

@@ -183,7 +183,7 @@ async def llm_review_entries(
model: str = None,
) -> Dict:
"""OCR error correction. Uses spell-checker (REVIEW_ENGINE=spell) or LLM (REVIEW_ENGINE=llm)."""
from cv_review_spell import spell_review_entries_sync, _SPELL_AVAILABLE
from .spell import spell_review_entries_sync, _SPELL_AVAILABLE
if REVIEW_ENGINE == "spell" and _SPELL_AVAILABLE:
return spell_review_entries_sync(entries)
@@ -260,8 +260,8 @@ async def llm_review_entries_streaming(
Phase 0 (always): Run _fix_character_confusion and emit any changes.
"""
from cv_ocr_engines import _fix_character_confusion
from cv_review_spell import spell_review_entries_streaming, _SPELL_AVAILABLE
from ..engines.engines import _fix_character_confusion
from .spell import spell_review_entries_streaming, _SPELL_AVAILABLE
_CONF_FIELDS = ('english', 'german', 'example')
originals = [{f: e.get(f, '') for f in _CONF_FIELDS} for e in entries]

View File

@@ -13,24 +13,24 @@ from typing import Any, Dict, List, Optional
import numpy as np
from cv_vocab_types import (
from ..types import (
CV_PIPELINE_AVAILABLE,
PageRegion,
PipelineResult,
VocabRow,
)
from cv_preprocessing import (
from ..preprocessing.preprocessing import (
deskew_image,
dewarp_image,
render_image_high_res,
render_pdf_high_res,
)
from cv_layout import (
from ..layout.layout import (
analyze_layout,
create_layout_image,
create_ocr_image,
)
from cv_ocr_engines import (
from ..engines.engines import (
_group_words_into_lines,
)

View File

@@ -12,7 +12,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
# Re-export everything for backward compatibility
from cv_review_pipeline import ( # noqa: F401
from .pipeline import ( # noqa: F401
ocr_region,
run_multi_pass_ocr,
match_lines_to_vocab,
@@ -20,7 +20,7 @@ from cv_review_pipeline import ( # noqa: F401
run_cv_pipeline,
)
from cv_review_spell import ( # noqa: F401
from .spell import ( # noqa: F401
_SPELL_AVAILABLE,
_spell_dict_knows,
_spell_fix_field,
@@ -31,7 +31,7 @@ from cv_review_spell import ( # noqa: F401
spell_review_entries_streaming,
)
from cv_review_llm import ( # noqa: F401
from .llm import ( # noqa: F401
OLLAMA_REVIEW_MODEL,
REVIEW_ENGINE,
_REVIEW_BATCH_SIZE,

View File

@@ -210,7 +210,7 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
Uses SmartSpellChecker for language-aware corrections with context-based
disambiguation (a/I), multi-digit substitution, and cross-language guard.
"""
from cv_review_llm import _entry_needs_review
from .llm import _entry_needs_review
t0 = time.time()
changes: List[Dict] = []