fix: _group_words_into_lines nach cv_ocr_engines.py verschieben

Funktion war nur in cv_review.py definiert, wurde aber auch in cv_ocr_engines.py und cv_layout.py benutzt — NameError zur Laufzeit. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 15:24:56 +01:00
parent 60c4138660
commit cf9dde9876
3 changed files with 28 additions and 26 deletions
@@ -21,6 +21,7 @@ from cv_vocab_types import (
    PageZone,
    RowGeometry,
 )
 from cv_ocr_engines import _group_words_into_lines  # noqa: E402
 logger = logging.getLogger(__name__)
@@ -37,6 +37,32 @@ except ImportError:
 # Pipeline Step 5: Word Grid from Columns × Rows
 # =============================================================================
 def _group_words_into_lines(words: List[Dict], y_tolerance_px: int = 20) -> List[List[Dict]]:
    """Group words by Y position into lines, sorted by X within each line."""
    if not words:
        return []
    sorted_words = sorted(words, key=lambda w: (w['top'], w['left']))
    lines: List[List[Dict]] = []
    current_line: List[Dict] = [sorted_words[0]]
    current_y = sorted_words[0]['top']
    for word in sorted_words[1:]:
        if abs(word['top'] - current_y) <= y_tolerance_px:
            current_line.append(word)
        else:
            current_line.sort(key=lambda w: w['left'])
            lines.append(current_line)
            current_line = [word]
            current_y = word['top']
    if current_line:
        current_line.sort(key=lambda w: w['left'])
        lines.append(current_line)
    return lines
 def _words_to_reading_order_lines(words: List[Dict], y_tolerance_px: int = 15) -> List[str]:
    """Group OCR words into visual lines in reading order.
@@ -33,6 +33,7 @@ from cv_layout import (
 )
 from cv_ocr_engines import (
    _fix_character_confusion,
    _group_words_into_lines,
 )
 logger = logging.getLogger(__name__)
@@ -227,32 +228,6 @@ def run_multi_pass_ocr(ocr_img: np.ndarray,
 # Stage 7: Line Alignment → Vocabulary Entries
 # =============================================================================
 def _group_words_into_lines(words: List[Dict], y_tolerance_px: int = 20) -> List[List[Dict]]:
    """Group words by Y position into lines, sorted by X within each line."""
    if not words:
        return []
    sorted_words = sorted(words, key=lambda w: (w['top'], w['left']))
    lines: List[List[Dict]] = []
    current_line: List[Dict] = [sorted_words[0]]
    current_y = sorted_words[0]['top']
    for word in sorted_words[1:]:
        if abs(word['top'] - current_y) <= y_tolerance_px:
            current_line.append(word)
        else:
            current_line.sort(key=lambda w: w['left'])
            lines.append(current_line)
            current_line = [word]
            current_y = word['top']
    if current_line:
        current_line.sort(key=lambda w: w['left'])
        lines.append(current_line)
    return lines
 def match_lines_to_vocab(ocr_results: Dict[str, List[Dict]],
                          regions: List[PageRegion],
                          y_tolerance_px: int = 25) -> List[VocabRow]: