klausur-service (7 monoliths): - grid_editor_helpers.py (1,737 → 5 files: columns, filters, headers, zones) - cv_cell_grid.py (1,675 → 7 files: build, legacy, streaming, merge, vocab) - worksheet_editor_api.py (1,305 → 4 files: models, AI, reconstruct, routes) - legal_corpus_ingestion.py (1,280 → 3 files: registry, chunking, ingestion) - cv_review.py (1,248 → 4 files: pipeline, spell, LLM, barrel) - cv_preprocessing.py (1,166 → 3 files: deskew, dewarp, barrel) - rbac.py, admin_api.py, routes/eh.py remain (next batch) backend-lehrer (1 monolith): - classroom_engine/repository.py (1,705 → 7 files by domain) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
61 lines
1.8 KiB
Python
61 lines
1.8 KiB
Python
"""
|
|
Cell-grid construction (v2 + legacy), vocab conversion, and word-grid OCR.
|
|
|
|
Re-export hub — all public and private names remain importable from here
|
|
for backward compatibility. The actual implementations live in:
|
|
|
|
cv_cell_grid_helpers.py — shared helpers (_heal_row_gaps, _is_artifact_row, ...)
|
|
cv_cell_grid_build.py — v2 hybrid grid (build_cell_grid_v2, _ocr_cell_crop)
|
|
cv_cell_grid_legacy.py — deprecated v1 grid (build_cell_grid, _ocr_single_cell)
|
|
cv_cell_grid_streaming.py — streaming variants (build_cell_grid_v2_streaming, ...)
|
|
cv_cell_grid_merge.py — row-merging logic (_merge_wrapped_rows, ...)
|
|
cv_cell_grid_vocab.py — vocab extraction (_cells_to_vocab_entries, build_word_grid)
|
|
|
|
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|
"""
|
|
|
|
# --- Helpers ---
|
|
from cv_cell_grid_helpers import ( # noqa: F401
|
|
_MIN_WORD_CONF,
|
|
_compute_cell_padding,
|
|
_ensure_minimum_crop_size,
|
|
_heal_row_gaps,
|
|
_is_artifact_row,
|
|
_select_psm_for_column,
|
|
)
|
|
|
|
# --- v2 build (current default) ---
|
|
from cv_cell_grid_build import ( # noqa: F401
|
|
_NARROW_COL_THRESHOLD_PCT,
|
|
_ocr_cell_crop,
|
|
build_cell_grid_v2,
|
|
)
|
|
|
|
# --- Legacy build (DEPRECATED) ---
|
|
from cv_cell_grid_legacy import ( # noqa: F401
|
|
_ocr_single_cell,
|
|
build_cell_grid,
|
|
)
|
|
|
|
# --- Streaming variants ---
|
|
from cv_cell_grid_streaming import ( # noqa: F401
|
|
build_cell_grid_streaming,
|
|
build_cell_grid_v2_streaming,
|
|
)
|
|
|
|
# --- Row merging ---
|
|
from cv_cell_grid_merge import ( # noqa: F401
|
|
_PHONETIC_ONLY_RE,
|
|
_is_phonetic_only_text,
|
|
_merge_continuation_rows,
|
|
_merge_phonetic_continuation_rows,
|
|
_merge_wrapped_rows,
|
|
)
|
|
|
|
# --- Vocab extraction ---
|
|
from cv_cell_grid_vocab import ( # noqa: F401
|
|
_cells_to_vocab_entries,
|
|
build_word_grid,
|
|
)
|