Fix: Sidebar scrollable + add Eltern-Portal nav link

overflow-hidden → overflow-y-auto so all nav items are reachable. Added /parent (Eltern-Portal) link with people icon. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 20:49:44 +02:00
parent d87645ffce
commit 45287b3541
48 changed files with 6 additions and 1 deletions
@@ -0,0 +1,471 @@
+"""
+Embedded box detection and page zone splitting for the CV vocabulary pipeline.
+
+Detects boxes (grammar tips, exercises, etc.) that span the page width and
+interrupt the normal column layout. Splits the page into vertical zones so
+that column detection can run independently per zone.
+
+Two-stage algorithm (both run, results merged):
+  1. Morphological line detection — finds bordered boxes via horizontal lines.
+  2. Background shading detection — finds shaded/colored boxes via median-blur
+     background analysis.  Works for colored (blue, green) and grayscale
+     (gray shading on B/W scans) boxes.
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+from typing import List, Optional, Tuple
+
+import cv2
+import numpy as np
+
+from cv_vocab_types import DetectedBox, PageZone
+
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    "detect_boxes",
+    "split_page_into_zones",
+]
+
+
+# ---------------------------------------------------------------------------
+# Stage 1: Morphological line detection
+# ---------------------------------------------------------------------------
+
+def _detect_boxes_by_lines(
+    gray: np.ndarray,
+    content_x: int,
+    content_w: int,
+    content_y: int,
+    content_h: int,
+) -> List[DetectedBox]:
+    """Find boxes defined by pairs of long horizontal border lines.
+
+    Args:
+        gray: Grayscale image (full page).
+        content_x, content_w: Horizontal content bounds.
+        content_y, content_h: Vertical content bounds.
+
+    Returns:
+        List of DetectedBox for each detected bordered box.
+    """
+    h, w = gray.shape[:2]
+
+    # Binarize: dark pixels → white on black background
+    _, binary = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)
+
+    # Horizontal morphology kernel — at least 50% of content width
+    kernel_w = max(50, content_w // 2)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1))
+    lines_img = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
+
+    # Horizontal projection: count line pixels per row
+    h_proj = np.sum(lines_img[:, content_x:content_x + content_w] > 0, axis=1)
+    line_threshold = content_w * 0.30
+
+    # Group consecutive rows with enough line pixels into line segments
+    line_segments: List[Tuple[int, int]] = []  # (y_start, y_end)
+    seg_start: Optional[int] = None
+    for y in range(h):
+        if h_proj[y] >= line_threshold:
+            if seg_start is None:
+                seg_start = y
+        else:
+            if seg_start is not None:
+                line_segments.append((seg_start, y))
+                seg_start = None
+    if seg_start is not None:
+        line_segments.append((seg_start, h))
+
+    if len(line_segments) < 2:
+        return []
+
+    # Pair lines into boxes: top-line + bottom-line
+    # Minimum box height: 30px.  Maximum: 70% of content height.
+    min_box_h = 30
+    max_box_h = int(content_h * 0.70)
+
+    boxes: List[DetectedBox] = []
+    used = set()
+    for i, (top_start, top_end) in enumerate(line_segments):
+        if i in used:
+            continue
+        for j in range(i + 1, len(line_segments)):
+            if j in used:
+                continue
+            bot_start, bot_end = line_segments[j]
+            box_y = top_start
+            box_h = bot_end - top_start
+            if box_h < min_box_h or box_h > max_box_h:
+                continue
+
+            # Estimate border thickness from line segment heights
+            border_top = top_end - top_start
+            border_bot = bot_end - bot_start
+
+            box = DetectedBox(
+                x=content_x,
+                y=box_y,
+                width=content_w,
+                height=box_h,
+                confidence=0.8,
+                border_thickness=max(border_top, border_bot),
+            )
+            boxes.append(box)
+            used.add(i)
+            used.add(j)
+            break  # move to next top-line candidate
+
+    return boxes
+
+
+# ---------------------------------------------------------------------------
+# Stage 2: Background shading detection (color + grayscale)
+# ---------------------------------------------------------------------------
+
+def _detect_boxes_by_shading(
+    img_bgr: np.ndarray,
+    content_x: int,
+    content_w: int,
+    content_y: int,
+    content_h: int,
+) -> List[DetectedBox]:
+    """Find boxes with shaded/colored background (no visible border lines).
+
+    Uses heavy median blur to remove text and reveal the underlying background.
+    Then detects rectangular regions where the background differs from white.
+    Works for both colored boxes (blue, green) and grayscale shading (gray on
+    B/W scans).
+
+    Args:
+        img_bgr: BGR color image (full page).
+        content_x, content_w: Horizontal content bounds.
+        content_y, content_h: Vertical content bounds.
+
+    Returns:
+        List of DetectedBox for each detected shaded box.
+    """
+    h, w = img_bgr.shape[:2]
+
+    # --- Heavy median blur removes text strokes, keeps background ---
+    blur_size = 31  # large kernel to wipe out text
+    blurred = cv2.medianBlur(img_bgr, blur_size)
+    blur_gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
+    blur_hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
+
+    # Estimate page background from top-left / top-right corners
+    corner_size = max(20, min(h // 10, w // 10))
+    corners = np.concatenate([
+        blur_gray[:corner_size, :corner_size].ravel(),
+        blur_gray[:corner_size, -corner_size:].ravel(),
+    ])
+    page_bg = float(np.median(corners))
+
+    # Two masks: grayscale shading + color saturation
+    # Grayscale: regions noticeably darker than the page background
+    shade_thresh = max(page_bg - 30, 150)
+    gray_mask = (blur_gray < shade_thresh).astype(np.uint8) * 255
+
+    # Color: regions with noticeable saturation (blue/green/etc. boxes)
+    sat_mask = (blur_hsv[:, :, 1] > 20).astype(np.uint8) * 255
+
+    combined = cv2.bitwise_or(gray_mask, sat_mask)
+
+    # Morphological cleanup: close gaps, remove small noise
+    kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 10))
+    combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel_close)
+    kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
+    combined = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel_open)
+
+    contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    # Size thresholds: smaller boxes allowed (e.g. "German leihen" ~30% width)
+    min_area = content_w * 30  # at least 30px tall at full width
+    min_box_h = 25
+    max_box_h = int(content_h * 0.70)
+    min_width_ratio = 0.25  # boxes can be ~25% of content width
+
+    boxes: List[DetectedBox] = []
+    for cnt in contours:
+        area = cv2.contourArea(cnt)
+        if area < min_area:
+            continue
+
+        bx, by, bw, bh = cv2.boundingRect(cnt)
+
+        # Width filter
+        if bw < content_w * min_width_ratio:
+            continue
+
+        # Height filter
+        if bh < min_box_h or bh > max_box_h:
+            continue
+
+        # Rectangularity check: area / bounding-rect area > 0.6
+        rect_area = bw * bh
+        if rect_area > 0 and area / rect_area < 0.5:
+            continue
+
+        # Verify that the background inside this region is actually shaded
+        roi_gray = blur_gray[by:by + bh, bx:bx + bw]
+        roi_hsv = blur_hsv[by:by + bh, bx:bx + bw]
+        if roi_gray.size == 0:
+            continue
+
+        median_val = float(np.median(roi_gray))
+        median_sat = float(np.median(roi_hsv[:, :, 1]))
+
+        # Must be noticeably different from page background
+        is_shaded = median_val < (page_bg - 15)
+        is_colored = median_sat > 15
+
+        if not is_shaded and not is_colored:
+            continue
+
+        conf = 0.7 if is_colored else 0.6
+
+        boxes.append(DetectedBox(
+            x=bx,
+            y=by,
+            width=bw,
+            height=bh,
+            confidence=conf,
+            border_thickness=0,
+        ))
+
+    return boxes
+
+
+# ---------------------------------------------------------------------------
+# Validation
+# ---------------------------------------------------------------------------
+
+def _validate_box(
+    box: DetectedBox,
+    gray: np.ndarray,
+    content_w: int,
+    content_h: int,
+    median_row_gap: int,
+) -> bool:
+    """Validate that a detected box is genuine (not a table-row separator etc.)."""
+    # Must span > 25% of content width (lowered from 60% to allow smaller boxes)
+    if box.width < content_w * 0.25:
+        return False
+
+    # Height constraints
+    if box.height < 25 or box.height > content_h * 0.70:
+        return False
+
+    # Must not be confused with a table-row separator:
+    # real boxes are at least 3x the median row gap
+    if median_row_gap > 0 and box.height < median_row_gap * 3:
+        return False
+
+    # Must contain some text (ink density check)
+    h, w = gray.shape[:2]
+    y1 = max(0, box.y)
+    y2 = min(h, box.y + box.height)
+    x1 = max(0, box.x)
+    x2 = min(w, box.x + box.width)
+    roi = gray[y1:y2, x1:x2]
+    if roi.size == 0:
+        return False
+    ink_ratio = np.sum(roi < 128) / roi.size
+    if ink_ratio < 0.002:  # nearly empty → not a real content box
+        return False
+
+    return True
+
+
+# ---------------------------------------------------------------------------
+# Public API: detect_boxes
+# ---------------------------------------------------------------------------
+
+def _merge_overlapping_boxes(boxes: List[DetectedBox]) -> List[DetectedBox]:
+    """Merge boxes that overlap significantly (IoU > 0.3 or one contains the other).
+
+    When two boxes overlap, keep the one with higher confidence (or the larger
+    one if confidences are equal).
+    """
+    if len(boxes) <= 1:
+        return boxes
+
+    # Sort by area descending so larger boxes are processed first
+    boxes = sorted(boxes, key=lambda b: b.width * b.height, reverse=True)
+    keep = [True] * len(boxes)
+
+    for i in range(len(boxes)):
+        if not keep[i]:
+            continue
+        bi = boxes[i]
+        for j in range(i + 1, len(boxes)):
+            if not keep[j]:
+                continue
+            bj = boxes[j]
+
+            # Compute overlap
+            x1 = max(bi.x, bj.x)
+            y1 = max(bi.y, bj.y)
+            x2 = min(bi.x + bi.width, bj.x + bj.width)
+            y2 = min(bi.y + bi.height, bj.y + bj.height)
+
+            if x2 <= x1 or y2 <= y1:
+                continue  # no overlap
+
+            inter = (x2 - x1) * (y2 - y1)
+            area_i = bi.width * bi.height
+            area_j = bj.width * bj.height
+            smaller_area = min(area_i, area_j)
+
+            # If overlap covers > 50% of the smaller box, merge (drop the weaker)
+            if smaller_area > 0 and inter / smaller_area > 0.50:
+                # Keep the one with higher confidence; if equal, keep larger
+                if bj.confidence > bi.confidence:
+                    keep[i] = False
+                    break
+                else:
+                    keep[j] = False
+
+    return [b for b, k in zip(boxes, keep) if k]
+
+
+def detect_boxes(
+    img_bgr: np.ndarray,
+    content_x: int,
+    content_w: int,
+    content_y: int,
+    content_h: int,
+    median_row_gap: int = 0,
+) -> List[DetectedBox]:
+    """Detect embedded boxes on a page image.
+
+    Runs BOTH line-based and shading-based detection, then merges and
+    deduplicates results.
+
+    Args:
+        img_bgr: BGR color image (full page or cropped).
+        content_x, content_w: Horizontal content bounds.
+        content_y, content_h: Vertical content bounds.
+        median_row_gap: Median row gap height (for filtering out table separators).
+
+    Returns:
+        List of validated DetectedBox instances, sorted by y position.
+    """
+    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+
+    # Stage 1: Line-based detection (bordered boxes)
+    line_boxes = _detect_boxes_by_lines(gray, content_x, content_w, content_y, content_h)
+
+    # Stage 2: Shading-based detection (colored/gray background boxes)
+    shade_boxes = _detect_boxes_by_shading(img_bgr, content_x, content_w, content_y, content_h)
+
+    logger.debug("BoxDetect: %d line-based, %d shading-based candidates",
+                 len(line_boxes), len(shade_boxes))
+
+    # Combine and deduplicate
+    all_boxes = line_boxes + shade_boxes
+    merged = _merge_overlapping_boxes(all_boxes)
+
+    # Validate
+    validated = [b for b in merged if _validate_box(b, gray, content_w, content_h, median_row_gap)]
+
+    # Sort top to bottom
+    validated.sort(key=lambda b: b.y)
+
+    if validated:
+        logger.info("BoxDetect: %d box(es) detected (line=%d, shade=%d, merged=%d)",
+                     len(validated), len(line_boxes), len(shade_boxes), len(merged))
+    else:
+        logger.debug("BoxDetect: no boxes detected")
+
+    return validated
+
+
+# ---------------------------------------------------------------------------
+# Zone Splitting
+# ---------------------------------------------------------------------------
+
+def split_page_into_zones(
+    content_x: int,
+    content_y: int,
+    content_w: int,
+    content_h: int,
+    boxes: List[DetectedBox],
+    min_zone_height: int = 40,
+) -> List[PageZone]:
+    """Split a page into vertical zones based on detected boxes.
+
+    Regions above, between, and below boxes become 'content' zones;
+    box regions become 'box' zones.
+
+    Args:
+        content_x, content_y, content_w, content_h: Content area bounds.
+        boxes: Detected boxes, sorted by y position.
+        min_zone_height: Minimum height for a content zone to be kept.
+
+    Returns:
+        List of PageZone, ordered top to bottom.
+    """
+    if not boxes:
+        # Single zone: entire content area
+        return [PageZone(
+            index=0,
+            zone_type='content',
+            y=content_y,
+            height=content_h,
+            x=content_x,
+            width=content_w,
+        )]
+
+    zones: List[PageZone] = []
+    zone_idx = 0
+    cursor_y = content_y
+    content_bottom = content_y + content_h
+
+    for box in boxes:
+        # Content zone above this box
+        gap_above = box.y - cursor_y
+        if gap_above >= min_zone_height:
+            zones.append(PageZone(
+                index=zone_idx,
+                zone_type='content',
+                y=cursor_y,
+                height=gap_above,
+                x=content_x,
+                width=content_w,
+            ))
+            zone_idx += 1
+
+        # Box zone
+        zones.append(PageZone(
+            index=zone_idx,
+            zone_type='box',
+            y=box.y,
+            height=box.height,
+            x=box.x,
+            width=box.width,
+            box=box,
+        ))
+        zone_idx += 1
+
+        cursor_y = box.y + box.height
+
+    # Content zone below last box
+    remaining = content_bottom - cursor_y
+    if remaining >= min_zone_height:
+        zones.append(PageZone(
+            index=zone_idx,
+            zone_type='content',
+            y=cursor_y,
+            height=remaining,
+            x=content_x,
+            width=content_w,
+        ))
+
+    logger.info(f"ZoneSplit: {len(zones)} zones from {len(boxes)} box(es): "
+                f"{[z.zone_type for z in zones]}")
+
+    return zones
@@ -0,0 +1,339 @@
+"""
+Box layout classifier — detects internal layout type of embedded boxes.
+
+Classifies each box as: flowing | columnar | bullet_list | header_only
+and provides layout-appropriate grid building.
+
+Used by the Box-Grid-Review step to rebuild box zones with correct structure.
+"""
+
+import logging
+import re
+import statistics
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+# Bullet / list-item patterns at the start of a line
+_BULLET_RE = re.compile(
+    r'^[\-\u2022\u2013\u2014\u25CF\u25CB\u25AA\u25A0•·]\s'  # dash, bullet chars
+    r'|^\d{1,2}[.)]\s'     # numbered: "1) " or "1. "
+    r'|^[a-z][.)]\s'       # lettered: "a) " or "a. "
+)
+
+
+def classify_box_layout(
+    words: List[Dict],
+    box_w: int,
+    box_h: int,
+) -> str:
+    """Classify the internal layout of a detected box.
+
+    Args:
+        words: OCR word dicts within the box (with top, left, width, height, text)
+        box_w: Box width in pixels
+        box_h: Box height in pixels
+
+    Returns:
+        'header_only' | 'bullet_list' | 'columnar' | 'flowing'
+    """
+    if not words:
+        return "header_only"
+
+    # Group words into lines by y-proximity
+    lines = _group_into_lines(words)
+
+    # Header only: very few words or single line
+    total_words = sum(len(line) for line in lines)
+    if total_words <= 5 or len(lines) <= 1:
+        return "header_only"
+
+    # Bullet list: check if majority of lines start with bullet patterns
+    bullet_count = 0
+    for line in lines:
+        first_text = line[0].get("text", "") if line else ""
+        if _BULLET_RE.match(first_text):
+            bullet_count += 1
+        # Also check if first word IS a bullet char
+        elif first_text.strip() in ("-", "–", "—", "•", "·", "▪", "▸"):
+            bullet_count += 1
+    if bullet_count >= len(lines) * 0.4 and bullet_count >= 2:
+        return "bullet_list"
+
+    # Columnar: check for multiple distinct x-clusters
+    if len(lines) >= 3 and _has_column_structure(words, box_w):
+        return "columnar"
+
+    # Default: flowing text
+    return "flowing"
+
+
+def _group_into_lines(words: List[Dict]) -> List[List[Dict]]:
+    """Group words into lines by y-proximity."""
+    if not words:
+        return []
+
+    sorted_words = sorted(words, key=lambda w: (w["top"], w["left"]))
+    heights = [w["height"] for w in sorted_words if w.get("height", 0) > 0]
+    median_h = statistics.median(heights) if heights else 20
+    y_tolerance = max(median_h * 0.5, 5)
+
+    lines: List[List[Dict]] = []
+    current_line: List[Dict] = [sorted_words[0]]
+    current_y = sorted_words[0]["top"]
+
+    for w in sorted_words[1:]:
+        if abs(w["top"] - current_y) <= y_tolerance:
+            current_line.append(w)
+        else:
+            lines.append(sorted(current_line, key=lambda ww: ww["left"]))
+            current_line = [w]
+            current_y = w["top"]
+
+    if current_line:
+        lines.append(sorted(current_line, key=lambda ww: ww["left"]))
+
+    return lines
+
+
+def _has_column_structure(words: List[Dict], box_w: int) -> bool:
+    """Check if words have multiple distinct left-edge clusters (columns)."""
+    if box_w <= 0:
+        return False
+
+    lines = _group_into_lines(words)
+    if len(lines) < 3:
+        return False
+
+    # Collect left-edges of non-first words in each line
+    # (first word of each line often aligns regardless of columns)
+    left_edges = []
+    for line in lines:
+        for w in line[1:]:  # skip first word
+            left_edges.append(w["left"])
+
+    if len(left_edges) < 4:
+        return False
+
+    # Check if left edges cluster into 2+ distinct groups
+    left_edges.sort()
+    gaps = [left_edges[i + 1] - left_edges[i] for i in range(len(left_edges) - 1)]
+    if not gaps:
+        return False
+
+    median_gap = statistics.median(gaps)
+    # A column gap is typically > 15% of box width
+    column_gap_threshold = box_w * 0.15
+    large_gaps = [g for g in gaps if g > column_gap_threshold]
+
+    return len(large_gaps) >= 1
+
+
+def build_box_zone_grid(
+    zone_words: List[Dict],
+    box_x: int,
+    box_y: int,
+    box_w: int,
+    box_h: int,
+    zone_index: int,
+    img_w: int,
+    img_h: int,
+    layout_type: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Build a grid for a box zone with layout-aware processing.
+
+    If layout_type is None, auto-detects it.
+    For 'flowing' and 'bullet_list', forces single-column layout.
+    For 'columnar', uses the standard multi-column detection.
+    For 'header_only', creates a single cell.
+
+    Returns the same format as _build_zone_grid (columns, rows, cells, header_rows).
+    """
+    from grid_editor_helpers import _build_zone_grid, _cluster_rows
+
+    if not zone_words:
+        return {
+            "columns": [],
+            "rows": [],
+            "cells": [],
+            "header_rows": [],
+            "box_layout_type": layout_type or "header_only",
+            "box_grid_reviewed": False,
+        }
+
+    # Auto-detect layout if not specified
+    if not layout_type:
+        layout_type = classify_box_layout(zone_words, box_w, box_h)
+
+    logger.info(
+        "Box zone %d: layout_type=%s, %d words, %dx%d",
+        zone_index, layout_type, len(zone_words), box_w, box_h,
+    )
+
+    if layout_type == "header_only":
+        # Single cell with all text concatenated
+        all_text = " ".join(
+            w.get("text", "") for w in sorted(zone_words, key=lambda ww: (ww["top"], ww["left"]))
+        ).strip()
+        return {
+            "columns": [{"col_index": 0, "index": 0, "label": "column_text", "col_type": "column_1",
+                         "x_min_px": box_x, "x_max_px": box_x + box_w,
+                         "x_min_pct": round(box_x / img_w * 100, 2) if img_w else 0,
+                         "x_max_pct": round((box_x + box_w) / img_w * 100, 2) if img_w else 0,
+                         "bold": False}],
+            "rows": [{"index": 0, "row_index": 0,
+                       "y_min": box_y, "y_max": box_y + box_h, "y_center": box_y + box_h / 2,
+                       "y_min_px": box_y, "y_max_px": box_y + box_h,
+                       "y_min_pct": round(box_y / img_h * 100, 2) if img_h else 0,
+                       "y_max_pct": round((box_y + box_h) / img_h * 100, 2) if img_h else 0,
+                       "is_header": True}],
+            "cells": [{
+                "cell_id": f"Z{zone_index}_R0C0",
+                "row_index": 0,
+                "col_index": 0,
+                "col_type": "column_1",
+                "text": all_text,
+                "word_boxes": zone_words,
+            }],
+            "header_rows": [0],
+            "box_layout_type": layout_type,
+            "box_grid_reviewed": False,
+        }
+
+    if layout_type in ("flowing", "bullet_list"):
+        # Force single column — each line becomes one row with one cell.
+        # Detect bullet structure from indentation and merge continuation
+        # lines into the bullet they belong to.
+        lines = _group_into_lines(zone_words)
+        column = {
+            "col_index": 0, "index": 0, "label": "column_text", "col_type": "column_1",
+            "x_min_px": box_x, "x_max_px": box_x + box_w,
+            "x_min_pct": round(box_x / img_w * 100, 2) if img_w else 0,
+            "x_max_pct": round((box_x + box_w) / img_w * 100, 2) if img_w else 0,
+            "bold": False,
+        }
+
+        # --- Detect indentation levels ---
+        line_indents = []
+        for line_words in lines:
+            if not line_words:
+                line_indents.append(0)
+                continue
+            min_left = min(w["left"] for w in line_words)
+            line_indents.append(min_left - box_x)
+
+        # Find the minimum indent (= bullet/main level)
+        valid_indents = [ind for ind in line_indents if ind >= 0]
+        min_indent = min(valid_indents) if valid_indents else 0
+
+        # Indentation threshold: lines indented > 15px more than minimum
+        # are continuation lines belonging to the previous bullet
+        INDENT_THRESHOLD = 15
+
+        # --- Group lines into logical items (bullet + continuations) ---
+        # Each item is a list of line indices
+        items: List[List[int]] = []
+        for li, indent in enumerate(line_indents):
+            is_continuation = (indent > min_indent + INDENT_THRESHOLD) and len(items) > 0
+            if is_continuation:
+                items[-1].append(li)
+            else:
+                items.append([li])
+
+        logger.info(
+            "Box zone %d flowing: %d lines → %d items (indents=%s, min=%d, threshold=%d)",
+            zone_index, len(lines), len(items),
+            [int(i) for i in line_indents], int(min_indent), INDENT_THRESHOLD,
+        )
+
+        # --- Build rows and cells from grouped items ---
+        rows = []
+        cells = []
+        header_rows = []
+
+        for row_idx, item_line_indices in enumerate(items):
+            # Collect all words from all lines in this item
+            item_words = []
+            item_texts = []
+            for li in item_line_indices:
+                if li < len(lines):
+                    item_words.extend(lines[li])
+                    line_text = " ".join(w.get("text", "") for w in lines[li]).strip()
+                    if line_text:
+                        item_texts.append(line_text)
+
+            if not item_words:
+                continue
+
+            y_min = min(w["top"] for w in item_words)
+            y_max = max(w["top"] + w["height"] for w in item_words)
+            y_center = (y_min + y_max) / 2
+
+            row = {
+                "index": row_idx,
+                "row_index": row_idx,
+                "y_min": y_min,
+                "y_max": y_max,
+                "y_center": y_center,
+                "y_min_px": y_min,
+                "y_max_px": y_max,
+                "y_min_pct": round(y_min / img_h * 100, 2) if img_h else 0,
+                "y_max_pct": round(y_max / img_h * 100, 2) if img_h else 0,
+                "is_header": False,
+            }
+            rows.append(row)
+
+            # Join multi-line text with newline for display
+            merged_text = "\n".join(item_texts)
+
+            # Add bullet marker if this is a bullet item without one
+            first_text = item_texts[0] if item_texts else ""
+            is_bullet = len(item_line_indices) > 1 or _BULLET_RE.match(first_text)
+            if is_bullet and not _BULLET_RE.match(first_text) and row_idx > 0:
+                # Continuation item without bullet — add one
+                merged_text = "• " + merged_text
+
+            cell = {
+                "cell_id": f"Z{zone_index}_R{row_idx}C0",
+                "row_index": row_idx,
+                "col_index": 0,
+                "col_type": "column_1",
+                "text": merged_text,
+                "word_boxes": item_words,
+            }
+            cells.append(cell)
+
+        # Detect header: first item if it has no continuation lines and is short
+        if len(items) >= 2:
+            first_item_texts = []
+            for li in items[0]:
+                if li < len(lines):
+                    first_item_texts.append(" ".join(w.get("text", "") for w in lines[li]).strip())
+            first_text = " ".join(first_item_texts)
+            if (len(first_text) < 40
+                    or first_text.isupper()
+                    or first_text.rstrip().endswith(':')):
+                header_rows = [0]
+
+        return {
+            "columns": [column],
+            "rows": rows,
+            "cells": cells,
+            "header_rows": header_rows,
+            "box_layout_type": layout_type,
+            "box_grid_reviewed": False,
+        }
+
+    # Columnar: use standard grid builder with independent column detection
+    result = _build_zone_grid(
+        zone_words, box_x, box_y, box_w, box_h,
+        zone_index, img_w, img_h,
+        global_columns=None,  # detect columns independently
+    )
+
+    # Colspan detection is now handled generically by _detect_colspan_cells
+    # in grid_editor_helpers.py (called inside _build_zone_grid).
+
+    result["box_layout_type"] = layout_type
+    result["box_grid_reviewed"] = False
+    return result
@@ -0,0 +1,312 @@
+"""
+Color detection for OCR word boxes.
+
+Detects the text color of existing OCR words and recovers colored text
+regions (e.g. red markers, blue headings) that standard OCR may have missed.
+
+Standard OCR (Tesseract, PaddleOCR) binarises images before processing,
+destroying all color information.  This module adds it back by sampling
+HSV pixel values at word-box positions and finding colored regions that
+no word-box covers.
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+from typing import Any, Dict, List, Optional, Tuple
+
+import cv2
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# HSV color ranges  (OpenCV:  H 0-180,  S 0-255,  V 0-255)
+# ---------------------------------------------------------------------------
+
+_COLOR_RANGES: Dict[str, List[Tuple[np.ndarray, np.ndarray]]] = {
+    "red": [
+        (np.array([0, 70, 50]), np.array([10, 255, 255])),
+        (np.array([170, 70, 50]), np.array([180, 255, 255])),
+    ],
+    "orange": [
+        (np.array([10, 70, 50]), np.array([25, 255, 255])),
+    ],
+    "yellow": [
+        (np.array([25, 70, 50]), np.array([35, 255, 255])),
+    ],
+    "green": [
+        (np.array([35, 70, 50]), np.array([85, 255, 255])),
+    ],
+    "blue": [
+        (np.array([100, 70, 50]), np.array([130, 255, 255])),
+    ],
+    "purple": [
+        (np.array([130, 70, 50]), np.array([170, 255, 255])),
+    ],
+}
+
+_COLOR_HEX: Dict[str, str] = {
+    "black": "#000000",
+    "gray": "#6b7280",
+    "red": "#dc2626",
+    "orange": "#ea580c",
+    "yellow": "#ca8a04",
+    "green": "#16a34a",
+    "blue": "#2563eb",
+    "purple": "#9333ea",
+}
+
+
+def _hue_to_color_name(hue: float) -> str:
+    """Map OpenCV hue (0-180) to a color name."""
+    if hue < 10 or hue > 170:
+        return "red"
+    if hue < 25:
+        return "orange"
+    if hue < 35:
+        return "yellow"
+    if hue < 85:
+        return "green"
+    if hue < 130:
+        return "blue"
+    return "purple"
+
+
+# ---------------------------------------------------------------------------
+# 1.  Color annotation for existing word boxes
+# ---------------------------------------------------------------------------
+
+def detect_word_colors(
+    img_bgr: np.ndarray,
+    word_boxes: List[Dict],
+    sat_threshold: int = 55,
+    min_sat_ratio: float = 0.25,
+) -> None:
+    """Annotate each word_box in-place with its detected text color.
+
+    Adds ``color`` (hex string) and ``color_name`` (e.g. 'red', 'black')
+    keys to each dict.
+
+    Algorithm per word:
+      1. Crop the word region from the image.
+      2. Otsu-threshold for text/background separation.
+      3. Sample background color from border pixels of the crop.
+      4. Remove text pixels that match the background (avoids colored
+         backgrounds like blue boxes leaking into the result).
+      5. Use **median** hue (robust to outliers) and require a minimum
+         ratio of saturated pixels before classifying as colored.
+    """
+    if img_bgr is None or not word_boxes:
+        return
+
+    img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
+    img_h, img_w = img_bgr.shape[:2]
+
+    colored_count = 0
+
+    for wb in word_boxes:
+        x1 = max(0, int(wb["left"]))
+        y1 = max(0, int(wb["top"]))
+        x2 = min(img_w, int(wb["left"] + wb["width"]))
+        y2 = min(img_h, int(wb["top"] + wb["height"]))
+
+        if x2 <= x1 or y2 <= y1:
+            wb["color"] = _COLOR_HEX["black"]
+            wb["color_name"] = "black"
+            continue
+
+        crop_hsv = img_hsv[y1:y2, x1:x2]
+        crop_bgr = img_bgr[y1:y2, x1:x2]
+        crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
+        ch, cw = crop_hsv.shape[:2]
+
+        # --- Text mask: Otsu (adaptive) + high-saturation pixels ---
+        _, dark_mask = cv2.threshold(
+            crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
+        )
+        sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255
+        text_mask = cv2.bitwise_or(dark_mask, sat_mask)
+
+        text_pixels = crop_hsv[text_mask > 0]
+
+        if len(text_pixels) < 3:
+            wb["color"] = _COLOR_HEX["black"]
+            wb["color_name"] = "black"
+            continue
+
+        # --- Background subtraction via border pixels ---
+        # Sample background from the 2px border ring of the crop
+        if ch > 6 and cw > 6:
+            border = 2
+            bg_top = crop_hsv[:border, :].reshape(-1, 3)
+            bg_bot = crop_hsv[-border:, :].reshape(-1, 3)
+            bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3)
+            bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3)
+            bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt])
+
+            bg_med_h = float(np.median(bg_pixels[:, 0]))
+            bg_med_s = float(np.median(bg_pixels[:, 1]))
+
+            # If background is tinted (S > 15), remove text pixels
+            # with similar hue to avoid false colored detections
+            if bg_med_s > 15:
+                hue_diff = np.minimum(
+                    np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
+                    180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
+                )
+                keep = hue_diff > 20
+                if np.any(keep):
+                    text_pixels = text_pixels[keep]
+
+        if len(text_pixels) < 3:
+            wb["color"] = _COLOR_HEX["black"]
+            wb["color_name"] = "black"
+            continue
+
+        # --- Classification using MEDIAN (robust to outliers) ---
+        median_sat = float(np.median(text_pixels[:, 1]))
+        sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold))
+        sat_ratio = sat_count / len(text_pixels)
+
+        if median_sat < sat_threshold or sat_ratio < min_sat_ratio:
+            wb["color"] = _COLOR_HEX["black"]
+            wb["color_name"] = "black"
+        else:
+            # Use median hue of saturated pixels only for cleaner signal
+            sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
+            median_hue = float(np.median(sat_pixels[:, 0]))
+            name = _hue_to_color_name(median_hue)
+
+            # Red requires higher saturation — scanner artifacts on black
+            # text often produce a slight warm tint (hue ~0) with low
+            # saturation that would otherwise be misclassified as red.
+            if name == "red" and median_sat < 90:
+                wb["color"] = _COLOR_HEX["black"]
+                wb["color_name"] = "black"
+                continue
+
+            wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
+            wb["color_name"] = name
+            colored_count += 1
+
+    if colored_count:
+        logger.info("color annotation: %d / %d words are colored",
+                     colored_count, len(word_boxes))
+
+
+# ---------------------------------------------------------------------------
+# 2.  Recover colored text that OCR missed
+# ---------------------------------------------------------------------------
+
+def recover_colored_text(
+    img_bgr: np.ndarray,
+    existing_words: List[Dict],
+    min_area: int = 40,
+    max_regions: int = 60,
+) -> List[Dict]:
+    """Find colored text regions not covered by any existing word box.
+
+    Returns a list of recovered word dicts with ``color``, ``color_name``,
+    and ``recovered=True`` fields.  The ``text`` is set via a lightweight
+    shape heuristic (e.g. ``!`` for tall narrow shapes) or ``?``.
+    """
+    if img_bgr is None:
+        return []
+
+    img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
+    ih, iw = img_bgr.shape[:2]
+    max_area = int(ih * iw * 0.005)
+
+    # --- Build occupancy mask from existing words (adaptive padding) ---
+    # Pad word boxes generously to prevent colored-pixel artifacts in
+    # narrow inter-word gaps from being recovered as false characters.
+    heights = [wb["height"] for wb in existing_words if wb.get("height", 0) > 0]
+    median_h = int(np.median(heights)) if heights else 20
+    pad = max(8, int(median_h * 0.35))
+
+    occupied = np.zeros((ih, iw), dtype=np.uint8)
+    for wb in existing_words:
+        x1 = max(0, int(wb["left"]) - pad)
+        y1 = max(0, int(wb["top"]) - pad)
+        x2 = min(iw, int(wb["left"] + wb["width"]) + pad)
+        y2 = min(ih, int(wb["top"] + wb["height"]) + pad)
+        occupied[y1:y2, x1:x2] = 255
+
+    recovered: List[Dict] = []
+
+    for color_name, ranges in _COLOR_RANGES.items():
+        # Create mask for this color
+        mask = np.zeros((ih, iw), dtype=np.uint8)
+        for lower, upper in ranges:
+            mask = cv2.bitwise_or(mask, cv2.inRange(img_hsv, lower, upper))
+
+        # Remove pixels already covered by existing OCR words
+        mask = cv2.bitwise_and(mask, cv2.bitwise_not(occupied))
+
+        # Morphological cleanup:
+        # - Close with tall kernel to merge ! stroke + dot
+        # - Open to remove noise specks
+        kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 8))
+        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
+        kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
+
+        contours, _ = cv2.findContours(
+            mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
+        )
+
+        candidates = []
+        for cnt in contours:
+            area = cv2.contourArea(cnt)
+            if area < min_area or area > max_area:
+                continue
+            bx, by, bw, bh = cv2.boundingRect(cnt)
+            if bh < 6:
+                continue
+            # Reject regions too wide to be single characters
+            if bw > median_h * 4:
+                continue
+            candidates.append((area, bx, by, bw, bh))
+
+        # Keep largest first, limited count
+        candidates.sort(key=lambda c: c[0], reverse=True)
+
+        for area, bx, by, bw, bh in candidates[:max_regions]:
+            text = _identify_shape(bw, bh)
+            recovered.append({
+                "text": text,
+                "left": bx,
+                "top": by,
+                "width": bw,
+                "height": bh,
+                "conf": 45,
+                "color": _COLOR_HEX.get(color_name, "#000000"),
+                "color_name": color_name,
+                "recovered": True,
+            })
+
+    if recovered:
+        logger.info(
+            "color recovery: %d colored regions found (%s)",
+            len(recovered),
+            ", ".join(
+                f"{c}: {sum(1 for r in recovered if r['color_name'] == c)}"
+                for c in sorted({r["color_name"] for r in recovered})
+            ),
+        )
+
+    return recovered
+
+
+def _identify_shape(w: int, h: int) -> str:
+    """Simple shape heuristic for common single-character text markers."""
+    aspect = w / h if h > 0 else 1.0
+    if aspect < 0.55 and h > 10:
+        # Tall, narrow — likely exclamation mark
+        return "!"
+    if 0.6 < aspect < 1.5 and max(w, h) < 25:
+        # Small, roughly square — bullet or dot
+        return "•"
+    return "?"
@@ -0,0 +1,413 @@
+"""
+PP-DocLayout ONNX Document Layout Detection.
+
+Uses PP-DocLayout ONNX model to detect document structure regions:
+  table, figure, title, text, list, header, footer, equation, reference, abstract
+
+Fallback: If ONNX model not available, returns empty list (caller should
+fall back to OpenCV-based detection in cv_graphic_detect.py).
+
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    "detect_layout_regions",
+    "is_doclayout_available",
+    "get_doclayout_status",
+    "LayoutRegion",
+    "DOCLAYOUT_CLASSES",
+]
+
+# ---------------------------------------------------------------------------
+# Class labels (PP-DocLayout default order)
+# ---------------------------------------------------------------------------
+
+DOCLAYOUT_CLASSES = [
+    "table", "figure", "title", "text", "list",
+    "header", "footer", "equation", "reference", "abstract",
+]
+
+# ---------------------------------------------------------------------------
+# Data types
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class LayoutRegion:
+    """A detected document layout region."""
+    x: int
+    y: int
+    width: int
+    height: int
+    label: str           # table, figure, title, text, list, etc.
+    confidence: float
+    label_index: int     # raw class index
+
+
+# ---------------------------------------------------------------------------
+# ONNX model loading
+# ---------------------------------------------------------------------------
+
+_MODEL_SEARCH_PATHS = [
+    # 1. Explicit environment variable
+    os.environ.get("DOCLAYOUT_ONNX_PATH", ""),
+    # 2. Docker default cache path
+    "/root/.cache/huggingface/onnx/pp-doclayout/model.onnx",
+    # 3. Local dev relative to working directory
+    "models/onnx/pp-doclayout/model.onnx",
+]
+
+_onnx_session: Optional[object] = None
+_model_path: Optional[str] = None
+_load_attempted: bool = False
+_load_error: Optional[str] = None
+
+
+def _find_model_path() -> Optional[str]:
+    """Search for the ONNX model file in known locations."""
+    for p in _MODEL_SEARCH_PATHS:
+        if p and Path(p).is_file():
+            return str(Path(p).resolve())
+    return None
+
+
+def _load_onnx_session():
+    """Lazy-load the ONNX runtime session (once)."""
+    global _onnx_session, _model_path, _load_attempted, _load_error
+
+    if _load_attempted:
+        return _onnx_session
+
+    _load_attempted = True
+
+    path = _find_model_path()
+    if path is None:
+        _load_error = "ONNX model not found in any search path"
+        logger.info("PP-DocLayout: %s", _load_error)
+        return None
+
+    try:
+        import onnxruntime as ort  # type: ignore[import-untyped]
+
+        sess_options = ort.SessionOptions()
+        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        # Prefer CPU – keeps the GPU free for OCR / LLM.
+        providers = ["CPUExecutionProvider"]
+        _onnx_session = ort.InferenceSession(path, sess_options, providers=providers)
+        _model_path = path
+        logger.info("PP-DocLayout: model loaded from %s", path)
+    except ImportError:
+        _load_error = "onnxruntime not installed"
+        logger.info("PP-DocLayout: %s", _load_error)
+    except Exception as exc:
+        _load_error = str(exc)
+        logger.warning("PP-DocLayout: failed to load model from %s: %s", path, exc)
+
+    return _onnx_session
+
+
+# ---------------------------------------------------------------------------
+# Public helpers
+# ---------------------------------------------------------------------------
+
+
+def is_doclayout_available() -> bool:
+    """Return True if the ONNX model can be loaded successfully."""
+    return _load_onnx_session() is not None
+
+
+def get_doclayout_status() -> Dict:
+    """Return diagnostic information about the DocLayout backend."""
+    _load_onnx_session()  # ensure we tried
+    return {
+        "available": _onnx_session is not None,
+        "model_path": _model_path,
+        "load_error": _load_error,
+        "classes": DOCLAYOUT_CLASSES,
+        "class_count": len(DOCLAYOUT_CLASSES),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Pre-processing
+# ---------------------------------------------------------------------------
+
+_INPUT_SIZE = 800  # PP-DocLayout expects 800x800
+
+
+def preprocess_image(img_bgr: np.ndarray) -> tuple:
+    """Resize + normalize image for PP-DocLayout ONNX input.
+
+    Returns:
+        (input_tensor, scale_x, scale_y, pad_x, pad_y)
+        where scale/pad allow mapping boxes back to original coords.
+    """
+    orig_h, orig_w = img_bgr.shape[:2]
+
+    # Compute scale to fit within _INPUT_SIZE keeping aspect ratio
+    scale = min(_INPUT_SIZE / orig_w, _INPUT_SIZE / orig_h)
+    new_w = int(orig_w * scale)
+    new_h = int(orig_h * scale)
+
+    import cv2  # local import — cv2 is always available in this service
+    resized = cv2.resize(img_bgr, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
+
+    # Pad to _INPUT_SIZE x _INPUT_SIZE with gray (114)
+    pad_x = (_INPUT_SIZE - new_w) // 2
+    pad_y = (_INPUT_SIZE - new_h) // 2
+    padded = np.full((_INPUT_SIZE, _INPUT_SIZE, 3), 114, dtype=np.uint8)
+    padded[pad_y:pad_y + new_h, pad_x:pad_x + new_w] = resized
+
+    # Normalize to [0, 1] float32
+    blob = padded.astype(np.float32) / 255.0
+
+    # HWC → CHW
+    blob = blob.transpose(2, 0, 1)
+
+    # Add batch dimension → (1, 3, 800, 800)
+    blob = np.expand_dims(blob, axis=0)
+
+    return blob, scale, pad_x, pad_y
+
+
+# ---------------------------------------------------------------------------
+# Non-Maximum Suppression (NMS)
+# ---------------------------------------------------------------------------
+
+
+def _compute_iou(box_a: np.ndarray, box_b: np.ndarray) -> float:
+    """Compute IoU between two boxes [x1, y1, x2, y2]."""
+    ix1 = max(box_a[0], box_b[0])
+    iy1 = max(box_a[1], box_b[1])
+    ix2 = min(box_a[2], box_b[2])
+    iy2 = min(box_a[3], box_b[3])
+
+    inter = max(0.0, ix2 - ix1) * max(0.0, iy2 - iy1)
+    if inter == 0:
+        return 0.0
+
+    area_a = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1])
+    area_b = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1])
+    union = area_a + area_b - inter
+    return inter / union if union > 0 else 0.0
+
+
+def nms(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float = 0.5) -> List[int]:
+    """Apply greedy Non-Maximum Suppression.
+
+    Args:
+        boxes: (N, 4) array of [x1, y1, x2, y2].
+        scores: (N,) confidence scores.
+        iou_threshold: Overlap threshold for suppression.
+
+    Returns:
+        List of kept indices.
+    """
+    if len(boxes) == 0:
+        return []
+
+    order = np.argsort(scores)[::-1].tolist()
+    keep: List[int] = []
+
+    while order:
+        i = order.pop(0)
+        keep.append(i)
+        remaining = []
+        for j in order:
+            if _compute_iou(boxes[i], boxes[j]) < iou_threshold:
+                remaining.append(j)
+        order = remaining
+
+    return keep
+
+
+# ---------------------------------------------------------------------------
+# Post-processing
+# ---------------------------------------------------------------------------
+
+
+def _postprocess(
+    outputs: list,
+    scale: float,
+    pad_x: int,
+    pad_y: int,
+    orig_w: int,
+    orig_h: int,
+    confidence_threshold: float,
+    max_regions: int,
+) -> List[LayoutRegion]:
+    """Parse ONNX output tensors into LayoutRegion list.
+
+    PP-DocLayout ONNX typically outputs one tensor of shape
+    (1, N, 6) or three tensors (boxes, scores, class_ids).
+    We handle both common formats.
+    """
+    regions: List[LayoutRegion] = []
+
+    # --- Determine output format ---
+    if len(outputs) == 1:
+        # Single tensor: (1, N, 4+1+1) = (batch, detections, [x1,y1,x2,y2,score,class])
+        raw = np.squeeze(outputs[0])  # (N, 6) or (N, 5+num_classes)
+        if raw.ndim == 1:
+            raw = raw.reshape(1, -1)
+        if raw.shape[0] == 0:
+            return []
+
+        if raw.shape[1] == 6:
+            # Format: x1, y1, x2, y2, score, class_id
+            all_boxes = raw[:, :4]
+            all_scores = raw[:, 4]
+            all_classes = raw[:, 5].astype(int)
+        elif raw.shape[1] > 6:
+            # Format: x1, y1, x2, y2, obj_conf, cls0_conf, cls1_conf, ...
+            all_boxes = raw[:, :4]
+            cls_scores = raw[:, 5:]
+            all_classes = np.argmax(cls_scores, axis=1)
+            all_scores = raw[:, 4] * np.max(cls_scores, axis=1)
+        else:
+            logger.warning("PP-DocLayout: unexpected output shape %s", raw.shape)
+            return []
+
+    elif len(outputs) == 3:
+        # Three tensors: boxes (N,4), scores (N,), class_ids (N,)
+        all_boxes = np.squeeze(outputs[0])
+        all_scores = np.squeeze(outputs[1])
+        all_classes = np.squeeze(outputs[2]).astype(int)
+        if all_boxes.ndim == 1:
+            all_boxes = all_boxes.reshape(1, 4)
+            all_scores = np.array([all_scores])
+            all_classes = np.array([all_classes])
+    else:
+        logger.warning("PP-DocLayout: unexpected %d output tensors", len(outputs))
+        return []
+
+    # --- Confidence filter ---
+    mask = all_scores >= confidence_threshold
+    boxes = all_boxes[mask]
+    scores = all_scores[mask]
+    classes = all_classes[mask]
+
+    if len(boxes) == 0:
+        return []
+
+    # --- NMS ---
+    keep_idxs = nms(boxes, scores, iou_threshold=0.5)
+    boxes = boxes[keep_idxs]
+    scores = scores[keep_idxs]
+    classes = classes[keep_idxs]
+
+    # --- Scale boxes back to original image coordinates ---
+    for i in range(len(boxes)):
+        x1, y1, x2, y2 = boxes[i]
+
+        # Remove padding offset
+        x1 = (x1 - pad_x) / scale
+        y1 = (y1 - pad_y) / scale
+        x2 = (x2 - pad_x) / scale
+        y2 = (y2 - pad_y) / scale
+
+        # Clamp to original dimensions
+        x1 = max(0, min(x1, orig_w))
+        y1 = max(0, min(y1, orig_h))
+        x2 = max(0, min(x2, orig_w))
+        y2 = max(0, min(y2, orig_h))
+
+        w = int(round(x2 - x1))
+        h = int(round(y2 - y1))
+        if w < 5 or h < 5:
+            continue
+
+        cls_idx = int(classes[i])
+        label = DOCLAYOUT_CLASSES[cls_idx] if 0 <= cls_idx < len(DOCLAYOUT_CLASSES) else f"class_{cls_idx}"
+
+        regions.append(LayoutRegion(
+            x=int(round(x1)),
+            y=int(round(y1)),
+            width=w,
+            height=h,
+            label=label,
+            confidence=round(float(scores[i]), 4),
+            label_index=cls_idx,
+        ))
+
+    # Sort by confidence descending, limit
+    regions.sort(key=lambda r: r.confidence, reverse=True)
+    return regions[:max_regions]
+
+
+# ---------------------------------------------------------------------------
+# Main detection function
+# ---------------------------------------------------------------------------
+
+
+def detect_layout_regions(
+    img_bgr: np.ndarray,
+    confidence_threshold: float = 0.5,
+    max_regions: int = 50,
+) -> List[LayoutRegion]:
+    """Detect document layout regions using PP-DocLayout ONNX model.
+
+    Args:
+        img_bgr: BGR color image (OpenCV format).
+        confidence_threshold: Minimum confidence to keep a detection.
+        max_regions: Maximum number of regions to return.
+
+    Returns:
+        List of LayoutRegion sorted by confidence descending.
+        Returns empty list if model is not available.
+    """
+    session = _load_onnx_session()
+    if session is None:
+        return []
+
+    if img_bgr is None or img_bgr.size == 0:
+        return []
+
+    orig_h, orig_w = img_bgr.shape[:2]
+
+    # Pre-process
+    input_tensor, scale, pad_x, pad_y = preprocess_image(img_bgr)
+
+    # Run inference
+    try:
+        input_name = session.get_inputs()[0].name
+        outputs = session.run(None, {input_name: input_tensor})
+    except Exception as exc:
+        logger.warning("PP-DocLayout inference failed: %s", exc)
+        return []
+
+    # Post-process
+    regions = _postprocess(
+        outputs,
+        scale=scale,
+        pad_x=pad_x,
+        pad_y=pad_y,
+        orig_w=orig_w,
+        orig_h=orig_h,
+        confidence_threshold=confidence_threshold,
+        max_regions=max_regions,
+    )
+
+    if regions:
+        label_counts: Dict[str, int] = {}
+        for r in regions:
+            label_counts[r.label] = label_counts.get(r.label, 0) + 1
+        logger.info(
+            "PP-DocLayout: %d regions (%s)",
+            len(regions),
+            ", ".join(f"{k}: {v}" for k, v in sorted(label_counts.items())),
+        )
+    else:
+        logger.debug("PP-DocLayout: no regions above threshold %.2f", confidence_threshold)
+
+    return regions
@@ -0,0 +1,422 @@
+"""
+Graphical element detection for OCR pages.
+
+Region-based approach:
+  1. Build a color mask (saturation channel — black text is invisible).
+  2. Dilate heavily to merge nearby colored pixels into regions.
+  3. For each region, check overlap with OCR word boxes:
+       - High word overlap → colored text (skip)
+       - Low word overlap  → colored graphic / image (keep)
+  4. Separately detect large black-ink illustrations via ink mask.
+
+Boxes and text colors are handled by cv_box_detect / cv_color_detect.
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+import cv2
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["detect_graphic_elements", "GraphicElement"]
+
+
+@dataclass
+class GraphicElement:
+    """A detected non-text graphical element."""
+    x: int
+    y: int
+    width: int
+    height: int
+    area: int
+    shape: str          # image, illustration
+    color_name: str     # dominant color or 'black'
+    color_hex: str
+    confidence: float
+    contour: Any = field(default=None, repr=False)
+
+
+# ---------------------------------------------------------------------------
+# Color helpers
+# ---------------------------------------------------------------------------
+
+_COLOR_HEX = {
+    "black": "#000000",
+    "gray": "#6b7280",
+    "red": "#dc2626",
+    "orange": "#ea580c",
+    "yellow": "#ca8a04",
+    "green": "#16a34a",
+    "blue": "#2563eb",
+    "purple": "#9333ea",
+}
+
+
+def _dominant_color(hsv_roi: np.ndarray, sat_threshold: int = 40) -> tuple:
+    """Return (color_name, color_hex) for an HSV region."""
+    if hsv_roi.size == 0:
+        return "black", _COLOR_HEX["black"]
+
+    pixels = hsv_roi.reshape(-1, 3)
+    sat = pixels[:, 1]
+    sat_mask = sat > sat_threshold
+    sat_ratio = np.sum(sat_mask) / len(pixels) if len(pixels) > 0 else 0
+
+    if sat_ratio < 0.15:
+        return "black", _COLOR_HEX["black"]
+
+    sat_pixels = pixels[sat_mask]
+    if len(sat_pixels) < 3:
+        return "black", _COLOR_HEX["black"]
+
+    med_hue = float(np.median(sat_pixels[:, 0]))
+
+    if med_hue < 10 or med_hue > 170:
+        name = "red"
+    elif med_hue < 25:
+        name = "orange"
+    elif med_hue < 35:
+        name = "yellow"
+    elif med_hue < 85:
+        name = "green"
+    elif med_hue < 130:
+        name = "blue"
+    else:
+        name = "purple"
+
+    return name, _COLOR_HEX.get(name, _COLOR_HEX["black"])
+
+
+# ---------------------------------------------------------------------------
+# Main detection
+# ---------------------------------------------------------------------------
+
+def detect_graphic_elements(
+    img_bgr: np.ndarray,
+    word_boxes: List[Dict],
+    detected_boxes: Optional[List[Dict]] = None,
+    max_elements: int = 50,
+) -> List[GraphicElement]:
+    """Find non-text graphical regions on the page.
+
+    Region-based: dilate color mask to form regions, then check word
+    overlap to distinguish colored text from colored graphics.
+
+    Args:
+        img_bgr: BGR color image.
+        word_boxes: List of OCR word dicts with left/top/width/height.
+        detected_boxes: Optional list of detected box dicts (x/y/w/h).
+        max_elements: Maximum number of elements to return.
+
+    Returns:
+        List of GraphicElement, sorted by area descending.
+    """
+    if img_bgr is None:
+        return []
+
+    # ------------------------------------------------------------------
+    # Try PP-DocLayout ONNX first if available
+    # ------------------------------------------------------------------
+    import os
+    backend = os.environ.get("GRAPHIC_DETECT_BACKEND", "auto")
+    if backend in ("doclayout", "auto"):
+        try:
+            from cv_doclayout_detect import detect_layout_regions, is_doclayout_available
+            if is_doclayout_available():
+                regions = detect_layout_regions(img_bgr)
+                if regions:
+                    _LABEL_TO_COLOR = {
+                        "figure": ("image", "green", _COLOR_HEX.get("green", "#16a34a")),
+                        "table":  ("image", "blue",  _COLOR_HEX.get("blue", "#2563eb")),
+                    }
+                    converted: List[GraphicElement] = []
+                    for r in regions:
+                        shape, color_name, color_hex = _LABEL_TO_COLOR.get(
+                            r.label,
+                            (r.label, "gray", _COLOR_HEX.get("gray", "#6b7280")),
+                        )
+                        converted.append(GraphicElement(
+                            x=r.x,
+                            y=r.y,
+                            width=r.width,
+                            height=r.height,
+                            area=r.width * r.height,
+                            shape=shape,
+                            color_name=color_name,
+                            color_hex=color_hex,
+                            confidence=r.confidence,
+                            contour=None,
+                        ))
+                    converted.sort(key=lambda g: g.area, reverse=True)
+                    result = converted[:max_elements]
+                    if result:
+                        shape_counts: Dict[str, int] = {}
+                        for g in result:
+                            shape_counts[g.shape] = shape_counts.get(g.shape, 0) + 1
+                        logger.info(
+                            "GraphicDetect (PP-DocLayout): %d elements (%s)",
+                            len(result),
+                            ", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
+                        )
+                    return result
+        except Exception as e:
+            logger.warning("PP-DocLayout failed, falling back to OpenCV: %s", e)
+    # ------------------------------------------------------------------
+    # OpenCV fallback (original logic)
+    # ------------------------------------------------------------------
+
+    h, w = img_bgr.shape[:2]
+
+    logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
+                 w, h, len(word_boxes), len(detected_boxes or []))
+
+    hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
+    candidates: List[GraphicElement] = []
+
+    # --- Build word mask (for overlap checking) ---
+    word_mask = np.zeros((h, w), dtype=np.uint8)
+    for wb in word_boxes:
+        x1 = max(0, int(wb.get("left", 0)))
+        y1 = max(0, int(wb.get("top", 0)))
+        x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)))
+        y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)))
+        word_mask[y1:y2, x1:x2] = 255
+
+    # =====================================================================
+    # PASS 1 — COLORED IMAGE REGIONS
+    # =====================================================================
+    # Color mask: saturated pixels (black text has sat ≈ 0 → invisible)
+    sat_mask = (hsv[:, :, 1] > 40).astype(np.uint8) * 255
+    val_mask = (hsv[:, :, 2] < 240).astype(np.uint8) * 255
+    color_pixels = cv2.bitwise_and(sat_mask, val_mask)
+
+    # Remove tiny speckle
+    kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+    color_pixels = cv2.morphologyEx(color_pixels, cv2.MORPH_OPEN, kernel_open)
+
+    # Count raw colored pixels before dilation (for density check later)
+    color_pixel_raw = color_pixels.copy()
+
+    # Heavy dilation to merge nearby colored elements into regions.
+    # A 25x25 kernel merges elements within ~12px of each other.
+    kernel_dilate = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (25, 25))
+    region_mask = cv2.dilate(color_pixels, kernel_dilate, iterations=1)
+
+    contours_regions, _ = cv2.findContours(
+        region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
+    )
+    logger.debug("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
+
+    for cnt in contours_regions:
+        bx, by, bw, bh = cv2.boundingRect(cnt)
+
+        # Skip tiny regions
+        if bw < 15 or bh < 15:
+            continue
+
+        # Skip page-spanning regions
+        if bw > w * 0.6 or bh > h * 0.6:
+            logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
+            continue
+
+        bbox_area = bw * bh
+
+        # Check: how much of this region's bounding box overlaps with words?
+        roi_words = word_mask[by:by + bh, bx:bx + bw]
+        word_pixel_count = int(np.sum(roi_words > 0))
+        word_overlap = word_pixel_count / bbox_area if bbox_area > 0 else 0
+
+        # Check: how many OCR word centroids fall inside this region?
+        # Colored text that OCR detected will have multiple centroids inside.
+        # Actual images may have 0-1 spurious OCR artifacts.
+        word_centroid_count = sum(
+            1 for wb in word_boxes
+            if (bx <= int(wb.get("left", 0) + wb.get("width", 0) / 2) <= bx + bw
+                and by <= int(wb.get("top", 0) + wb.get("height", 0) / 2) <= by + bh)
+        )
+
+        # Check: how many actual colored pixels are in this region?
+        roi_color = color_pixel_raw[by:by + bh, bx:bx + bw]
+        color_pixel_count = int(np.sum(roi_color > 0))
+
+        # Color pixel density (before any skip checks so we can log it)
+        density = color_pixel_count / bbox_area if bbox_area > 0 else 0
+
+        # --- Skip heuristics for colored TEXT (not images) ---
+
+        # (a) High word-box pixel overlap → clearly text
+        if word_overlap > 0.40:
+            logger.info(
+                "GraphicDetect PASS1 skip text-overlap (%d,%d) %dx%d "
+                "overlap=%.0f%% centroids=%d",
+                bx, by, bw, bh, word_overlap * 100, word_centroid_count,
+            )
+            continue
+
+        # (b) Multiple OCR words detected inside → colored text
+        #     (images rarely produce 2+ confident word detections)
+        if word_centroid_count >= 2:
+            logger.info(
+                "GraphicDetect PASS1 skip multi-word (%d,%d) %dx%d "
+                "centroids=%d overlap=%.0f%% density=%.0f%%",
+                bx, by, bw, bh, word_centroid_count,
+                word_overlap * 100, density * 100,
+            )
+            continue
+
+        # (c) Even 1 word + some pixel overlap → likely text
+        if word_centroid_count >= 1 and word_overlap > 0.10:
+            logger.info(
+                "GraphicDetect PASS1 skip word+overlap (%d,%d) %dx%d "
+                "centroids=%d overlap=%.0f%%",
+                bx, by, bw, bh, word_centroid_count, word_overlap * 100,
+            )
+            continue
+
+        # Need a minimum number of colored pixels (not just dilated area)
+        if color_pixel_count < 200:
+            continue
+
+        # (d) Very low density → thin strokes, almost certainly text.
+        # Large regions (photos/illustrations) can have low color density
+        # because most pixels are grayscale ink.  Use a lower threshold
+        # for regions bigger than 100×80 px.
+        _min_density = 0.05 if (bw > 100 and bh > 80) else 0.20
+        if density < _min_density:
+            logger.info(
+                "GraphicDetect PASS1 skip low-density (%d,%d) %dx%d "
+                "density=%.0f%% (min=%.0f%%, likely colored text)",
+                bx, by, bw, bh, density * 100, _min_density * 100,
+            )
+            continue
+
+        # (e) Moderate density + small height → colored text line
+        if density < 0.35 and bh < h * 0.05:
+            logger.info(
+                "GraphicDetect PASS1 skip text-height (%d,%d) %dx%d "
+                "density=%.0f%% height=%.1f%%",
+                bx, by, bw, bh, density * 100, 100.0 * bh / h,
+            )
+            continue
+
+        # Determine dominant color from the actual colored pixels
+        roi_hsv = hsv[by:by + bh, bx:bx + bw]
+        color_px_mask = roi_color > 0
+        if np.sum(color_px_mask) > 0:
+            masked_hsv = roi_hsv[color_px_mask]
+            color_name, color_hex = _dominant_color(masked_hsv)
+        else:
+            color_name, color_hex = "black", _COLOR_HEX["black"]
+
+        # Confidence based on color density and low word overlap
+        conf = min(0.95, 0.5 + density * 0.5)
+
+        logger.debug("GraphicDetect PASS1 accept (%d,%d) %dx%d px=%d density=%.0f%% overlap=%.0f%% %s",
+                     bx, by, bw, bh, color_pixel_count, density * 100, word_overlap * 100, color_name)
+        candidates.append(GraphicElement(
+            x=bx, y=by, width=bw, height=bh,
+            area=color_pixel_count,
+            shape="image",
+            color_name=color_name, color_hex=color_hex,
+            confidence=round(conf, 2), contour=cnt,
+        ))
+
+    # =====================================================================
+    # PASS 2 — LARGE BLACK-INK ILLUSTRATIONS
+    # =====================================================================
+    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+    _, dark_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+
+    # Exclude words and colored regions already found
+    exclusion = np.zeros((h, w), dtype=np.uint8)
+    word_pad = 5
+    for wb in word_boxes:
+        x1 = max(0, int(wb.get("left", 0)) - word_pad)
+        y1 = max(0, int(wb.get("top", 0)) - word_pad)
+        x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)) + word_pad)
+        y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)) + word_pad)
+        exclusion[y1:y2, x1:x2] = 255
+
+    if detected_boxes:
+        for box in detected_boxes:
+            bbx = int(box.get("x", 0))
+            bby = int(box.get("y", 0))
+            bbw = int(box.get("w", box.get("width", 0)))
+            bbh = int(box.get("h", box.get("height", 0)))
+            inset = 8
+            x1 = max(0, bbx + inset)
+            y1 = max(0, bby + inset)
+            x2 = min(w, bbx + bbw - inset)
+            y2 = min(h, bby + bbh - inset)
+            if x2 > x1 and y2 > y1:
+                exclusion[y1:y2, x1:x2] = 255
+
+    ink_only = cv2.bitwise_and(dark_mask, cv2.bitwise_not(exclusion))
+    ink_only = cv2.bitwise_and(ink_only, cv2.bitwise_not(color_pixels))
+
+    contours_ink, _ = cv2.findContours(
+        ink_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
+    )
+    logger.debug("GraphicDetect PASS2 ink: %d contours", len(contours_ink))
+
+    for cnt in contours_ink:
+        area = cv2.contourArea(cnt)
+        bx, by, bw, bh = cv2.boundingRect(cnt)
+
+        if area < 5000 or min(bw, bh) < 40:
+            continue
+        if bw > w * 0.8 or bh > h * 0.8:
+            continue
+
+        logger.debug("GraphicDetect PASS2 accept (%d,%d) %dx%d area=%d",
+                     bx, by, bw, bh, int(area))
+        candidates.append(GraphicElement(
+            x=bx, y=by, width=bw, height=bh,
+            area=int(area), shape="illustration",
+            color_name="black", color_hex="#000000",
+            confidence=0.5, contour=cnt,
+        ))
+
+    # =====================================================================
+    # Deduplicate and return
+    # =====================================================================
+    candidates.sort(key=lambda g: g.area, reverse=True)
+
+    final: List[GraphicElement] = []
+    for c in candidates:
+        overlap = False
+        for f in final:
+            ix1 = max(c.x, f.x)
+            iy1 = max(c.y, f.y)
+            ix2 = min(c.x + c.width, f.x + f.width)
+            iy2 = min(c.y + c.height, f.y + f.height)
+            if ix2 > ix1 and iy2 > iy1:
+                inter = (ix2 - ix1) * (iy2 - iy1)
+                smaller = min(c.width * c.height, f.width * f.height)
+                if smaller > 0 and inter / smaller > 0.5:
+                    overlap = True
+                    break
+        if not overlap:
+            final.append(c)
+
+    result = final[:max_elements]
+
+    if result:
+        shape_counts: Dict[str, int] = {}
+        for g in result:
+            shape_counts[g.shape] = shape_counts.get(g.shape, 0) + 1
+        logger.info(
+            "GraphicDetect: %d elements found (%s)",
+            len(result),
+            ", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
+        )
+    else:
+        logger.info("GraphicDetect: no graphic elements found")
+
+    return result
@@ -0,0 +1,231 @@
+"""
+Syllable Core — hyphenator init, word validation, pipe autocorrect.
+
+Extracted from cv_syllable_detect.py for modularity.
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+import re
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+# IPA/phonetic characters -- skip cells containing these
+_IPA_RE = re.compile(r'[\[\]\u02c8\u02cc\u02d0\u0283\u0292\u03b8\u00f0\u014b\u0251\u0252\u00e6\u0254\u0259\u025b\u025c\u026a\u028a\u028c]')
+
+# Common German words that should NOT be merged with adjacent tokens.
+_STOP_WORDS = frozenset([
+    # Articles
+    'der', 'die', 'das', 'dem', 'den', 'des',
+    'ein', 'eine', 'einem', 'einen', 'einer',
+    # Pronouns
+    'du', 'er', 'es', 'sie', 'wir', 'ihr', 'ich', 'man', 'sich',
+    'dich', 'dir', 'mich', 'mir', 'uns', 'euch', 'ihm', 'ihn',
+    # Prepositions
+    'mit', 'von', 'zu', 'f\u00fcr', 'auf', 'in', 'an', 'um', 'am', 'im',
+    'aus', 'bei', 'nach', 'vor', 'bis', 'durch', '\u00fcber', 'unter',
+    'zwischen', 'ohne', 'gegen',
+    # Conjunctions
+    'und', 'oder', 'als', 'wie', 'wenn', 'dass', 'weil', 'aber',
+    # Adverbs
+    'auch', 'noch', 'nur', 'schon', 'sehr', 'nicht',
+    # Verbs
+    'ist', 'hat', 'wird', 'kann', 'soll', 'muss', 'darf',
+    'sein', 'haben',
+    # Other
+    'kein', 'keine', 'keinem', 'keinen', 'keiner',
+])
+
+# Cached hyphenators
+_hyph_de = None
+_hyph_en = None
+
+# Cached spellchecker (for autocorrect_pipe_artifacts)
+_spell_de = None
+
+
+def _get_hyphenators():
+    """Lazy-load pyphen hyphenators (cached across calls)."""
+    global _hyph_de, _hyph_en
+    if _hyph_de is not None:
+        return _hyph_de, _hyph_en
+    try:
+        import pyphen
+    except ImportError:
+        return None, None
+    _hyph_de = pyphen.Pyphen(lang='de_DE')
+    _hyph_en = pyphen.Pyphen(lang='en_US')
+    return _hyph_de, _hyph_en
+
+
+def _get_spellchecker():
+    """Lazy-load German spellchecker (cached across calls)."""
+    global _spell_de
+    if _spell_de is not None:
+        return _spell_de
+    try:
+        from spellchecker import SpellChecker
+    except ImportError:
+        return None
+    _spell_de = SpellChecker(language='de')
+    return _spell_de
+
+
+def _is_known_word(word: str, hyph_de, hyph_en) -> bool:
+    """Check whether pyphen recognises a word (DE or EN)."""
+    if len(word) < 2:
+        return False
+    return ('|' in hyph_de.inserted(word, hyphen='|')
+            or '|' in hyph_en.inserted(word, hyphen='|'))
+
+
+def _is_real_word(word: str) -> bool:
+    """Check whether spellchecker knows this word (case-insensitive)."""
+    spell = _get_spellchecker()
+    if spell is None:
+        return False
+    return word.lower() in spell
+
+
+def _hyphenate_word(word: str, hyph_de, hyph_en) -> Optional[str]:
+    """Try to hyphenate a word using DE then EN dictionary.
+
+    Returns word with | separators, or None if not recognized.
+    """
+    hyph = hyph_de.inserted(word, hyphen='|')
+    if '|' in hyph:
+        return hyph
+    hyph = hyph_en.inserted(word, hyphen='|')
+    if '|' in hyph:
+        return hyph
+    return None
+
+
+def _autocorrect_piped_word(word_with_pipes: str) -> Optional[str]:
+    """Try to correct a word that has OCR pipe artifacts.
+
+    Printed syllable divider lines on dictionary pages confuse OCR:
+    the vertical stroke is often read as an extra character (commonly
+    ``l``, ``I``, ``1``, ``i``) adjacent to where the pipe appears.
+
+    Uses ``spellchecker`` (frequency-based word list) for validation.
+
+    Strategy:
+        1. Strip ``|`` -- if spellchecker knows the result, done.
+        2. Try deleting each pipe-like character (l, I, 1, i, t).
+        3. Fall back to spellchecker's own ``correction()`` method.
+        4. Preserve the original casing of the first letter.
+    """
+    stripped = word_with_pipes.replace('|', '')
+    if not stripped or len(stripped) < 3:
+        return stripped  # too short to validate
+
+    # Step 1: if the stripped word is already a real word, done
+    if _is_real_word(stripped):
+        return stripped
+
+    # Step 2: try deleting pipe-like characters (most likely artifacts)
+    _PIPE_LIKE = frozenset('lI1it')
+    for idx in range(len(stripped)):
+        if stripped[idx] not in _PIPE_LIKE:
+            continue
+        candidate = stripped[:idx] + stripped[idx + 1:]
+        if len(candidate) >= 3 and _is_real_word(candidate):
+            return candidate
+
+    # Step 3: use spellchecker's built-in correction
+    spell = _get_spellchecker()
+    if spell is not None:
+        suggestion = spell.correction(stripped.lower())
+        if suggestion and suggestion != stripped.lower():
+            # Preserve original first-letter case
+            if stripped[0].isupper():
+                suggestion = suggestion[0].upper() + suggestion[1:]
+            return suggestion
+
+    return None  # could not fix
+
+
+def autocorrect_pipe_artifacts(
+    zones_data: List[Dict], session_id: str,
+) -> int:
+    """Strip OCR pipe artifacts and correct garbled words in-place.
+
+    Printed syllable divider lines on dictionary scans are read by OCR
+    as ``|`` characters embedded in words (e.g. ``Zel|le``, ``Ze|plpe|lin``).
+    This function:
+
+    1. Strips ``|`` from every word in content cells.
+    2. Validates with spellchecker (real dictionary lookup).
+    3. If not recognised, tries deleting pipe-like characters or uses
+       spellchecker's correction (e.g. ``Zeplpelin`` -> ``Zeppelin``).
+    4. Updates both word-box texts and cell text.
+
+    Returns the number of cells modified.
+    """
+    spell = _get_spellchecker()
+    if spell is None:
+        logger.warning("spellchecker not available -- pipe autocorrect limited")
+        # Fall back: still strip pipes even without spellchecker
+        pass
+
+    modified = 0
+    for z in zones_data:
+        for cell in z.get("cells", []):
+            ct = cell.get("col_type", "")
+            if not ct.startswith("column_"):
+                continue
+
+            cell_changed = False
+
+            # --- Fix word boxes ---
+            for wb in cell.get("word_boxes", []):
+                wb_text = wb.get("text", "")
+                if "|" not in wb_text:
+                    continue
+
+                # Separate trailing punctuation
+                m = re.match(
+                    r'^([^a-zA-Z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u1e9e]*)'
+                    r'(.*?)'
+                    r'([^a-zA-Z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u1e9e]*)$',
+                    wb_text,
+                )
+                if not m:
+                    continue
+                lead, core, trail = m.group(1), m.group(2), m.group(3)
+                if "|" not in core:
+                    continue
+
+                corrected = _autocorrect_piped_word(core)
+                if corrected is not None and corrected != core:
+                    wb["text"] = lead + corrected + trail
+                    cell_changed = True
+
+            # --- Rebuild cell text from word boxes ---
+            if cell_changed:
+                wbs = cell.get("word_boxes", [])
+                if wbs:
+                    cell["text"] = " ".join(
+                        (wb.get("text") or "") for wb in wbs
+                    )
+                modified += 1
+
+            # --- Fallback: strip residual | from cell text ---
+            text = cell.get("text", "")
+            if "|" in text:
+                clean = text.replace("|", "")
+                if clean != text:
+                    cell["text"] = clean
+                    if not cell_changed:
+                        modified += 1
+
+    if modified:
+        logger.info(
+            "build-grid session %s: autocorrected pipe artifacts in %d cells",
+            session_id, modified,
+        )
+    return modified
@@ -0,0 +1,32 @@
+"""
+Syllable divider insertion for dictionary pages — barrel re-export.
+
+All implementation split into:
+  cv_syllable_core  — hyphenator init, word validation, pipe autocorrect
+  cv_syllable_merge — word gap merging, syllabification, divider insertion
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+# Core: init, validation, autocorrect
+from cv_syllable_core import (  # noqa: F401
+    _IPA_RE,
+    _STOP_WORDS,
+    _get_hyphenators,
+    _get_spellchecker,
+    _is_known_word,
+    _is_real_word,
+    _hyphenate_word,
+    _autocorrect_piped_word,
+    autocorrect_pipe_artifacts,
+)
+
+# Merge: gap merging, syllabify, insert
+from cv_syllable_merge import (  # noqa: F401
+    _try_merge_pipe_gaps,
+    merge_word_gaps_in_zones,
+    _try_merge_word_gaps,
+    _syllabify_text,
+    insert_syllable_dividers,
+)
@@ -0,0 +1,300 @@
+"""
+Syllable Merge — word gap merging, syllabification, divider insertion.
+
+Extracted from cv_syllable_detect.py for modularity.
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+import re
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+from cv_syllable_core import (
+    _get_hyphenators,
+    _hyphenate_word,
+    _IPA_RE,
+    _STOP_WORDS,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _try_merge_pipe_gaps(text: str, hyph_de) -> str:
+    """Merge fragments separated by single spaces where OCR split at a pipe.
+
+    Example: "Kaf fee" -> "Kaffee" (pyphen recognizes the merged word).
+    Multi-step: "Ka bel jau" -> "Kabel jau" -> "Kabeljau".
+
+    Guards against false merges:
+    - The FIRST token must be pure alpha (word start -- no attached punctuation)
+    - The second token may have trailing punctuation (comma, period) which
+      stays attached to the merged word: "Ka" + "fer," -> "Kafer,"
+    - Common German function words (der, die, das, ...) are never merged
+    - At least one fragment must be very short (<=3 alpha chars)
+    """
+    parts = text.split(' ')
+    if len(parts) < 2:
+        return text
+
+    result = [parts[0]]
+    i = 1
+    while i < len(parts):
+        prev = result[-1]
+        curr = parts[i]
+
+        # Extract alpha-only core for lookup
+        prev_alpha = re.sub(r'[^a-zA-Z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u1e9e]', '', prev)
+        curr_alpha = re.sub(r'[^a-zA-Z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u1e9e]', '', curr)
+
+        # Guard 1: first token must be pure alpha (word-start fragment)
+        #          second token may have trailing punctuation
+        # Guard 2: neither alpha core can be a common German function word
+        # Guard 3: the shorter fragment must be <= 3 chars (pipe-gap signal)
+        # Guard 4: combined length must be >= 4
+        should_try = (
+            prev == prev_alpha  # first token: pure alpha (word start)
+            and prev_alpha and curr_alpha
+            and prev_alpha.lower() not in _STOP_WORDS
+            and curr_alpha.lower() not in _STOP_WORDS
+            and min(len(prev_alpha), len(curr_alpha)) <= 3
+            and len(prev_alpha) + len(curr_alpha) >= 4
+        )
+
+        if should_try:
+            merged_alpha = prev_alpha + curr_alpha
+            hyph = hyph_de.inserted(merged_alpha, hyphen='-')
+            if '-' in hyph:
+                # pyphen recognizes merged word -- collapse the space
+                result[-1] = prev + curr
+                i += 1
+                continue
+
+        result.append(curr)
+        i += 1
+
+    return ' '.join(result)
+
+
+def merge_word_gaps_in_zones(zones_data: List[Dict], session_id: str) -> int:
+    """Merge OCR word-gap fragments in cell texts using pyphen validation.
+
+    OCR often splits words at syllable boundaries into separate word_boxes,
+    producing text like "zerknit tert" instead of "zerknittert".  This
+    function tries to merge adjacent fragments in every content cell.
+
+    More permissive than ``_try_merge_pipe_gaps`` (threshold 5 instead of 3)
+    but still guarded by pyphen dictionary lookup and stop-word exclusion.
+
+    Returns the number of cells modified.
+    """
+    hyph_de, _ = _get_hyphenators()
+    if hyph_de is None:
+        return 0
+
+    modified = 0
+    for z in zones_data:
+        for cell in z.get("cells", []):
+            ct = cell.get("col_type", "")
+            if not ct.startswith("column_"):
+                continue
+            text = cell.get("text", "")
+            if not text or " " not in text:
+                continue
+
+            # Skip IPA cells
+            text_no_brackets = re.sub(r'\[[^\]]*\]', '', text)
+            if _IPA_RE.search(text_no_brackets):
+                continue
+
+            new_text = _try_merge_word_gaps(text, hyph_de)
+            if new_text != text:
+                cell["text"] = new_text
+                modified += 1
+
+    if modified:
+        logger.info(
+            "build-grid session %s: merged word gaps in %d cells",
+            session_id, modified,
+        )
+    return modified
+
+
+def _try_merge_word_gaps(text: str, hyph_de) -> str:
+    """Merge OCR word fragments with relaxed threshold (max_short=5).
+
+    Similar to ``_try_merge_pipe_gaps`` but allows slightly longer fragments
+    (max_short=5 instead of 3).  Still requires pyphen to recognize the
+    merged word.
+    """
+    parts = text.split(' ')
+    if len(parts) < 2:
+        return text
+
+    result = [parts[0]]
+    i = 1
+    while i < len(parts):
+        prev = result[-1]
+        curr = parts[i]
+
+        prev_alpha = re.sub(r'[^a-zA-Z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u1e9e]', '', prev)
+        curr_alpha = re.sub(r'[^a-zA-Z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u1e9e]', '', curr)
+
+        should_try = (
+            prev == prev_alpha
+            and prev_alpha and curr_alpha
+            and prev_alpha.lower() not in _STOP_WORDS
+            and curr_alpha.lower() not in _STOP_WORDS
+            and min(len(prev_alpha), len(curr_alpha)) <= 5
+            and len(prev_alpha) + len(curr_alpha) >= 4
+        )
+
+        if should_try:
+            merged_alpha = prev_alpha + curr_alpha
+            hyph = hyph_de.inserted(merged_alpha, hyphen='-')
+            if '-' in hyph:
+                result[-1] = prev + curr
+                i += 1
+                continue
+
+        result.append(curr)
+        i += 1
+
+    return ' '.join(result)
+
+
+def _syllabify_text(text: str, hyph_de, hyph_en) -> str:
+    """Syllabify all significant words in a text string.
+
+    1. Strip existing | dividers
+    2. Merge pipe-gap spaces where possible
+    3. Apply pyphen to each word >= 3 alphabetic chars
+    4. Words pyphen doesn't recognize stay as-is (no bad guesses)
+    """
+    if not text:
+        return text
+
+    # Skip cells that contain IPA transcription characters outside brackets.
+    text_no_brackets = re.sub(r'\[[^\]]*\]', '', text)
+    if _IPA_RE.search(text_no_brackets):
+        return text
+
+    # Phase 1: strip existing pipe dividers for clean normalization
+    clean = text.replace('|', '')
+
+    # Phase 2: merge pipe-gap spaces (OCR fragments from pipe splitting)
+    clean = _try_merge_pipe_gaps(clean, hyph_de)
+
+    # Phase 3: tokenize and syllabify each word
+    # Split on whitespace and comma/semicolon sequences, keeping separators
+    tokens = re.split(r'(\s+|[,;:]+\s*)', clean)
+
+    result = []
+    for tok in tokens:
+        if not tok or re.match(r'^[\s,;:]+$', tok):
+            result.append(tok)
+            continue
+
+        # Strip trailing/leading punctuation for pyphen lookup
+        m = re.match(r'^([^a-zA-Z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u1e9e]*)(.*?)([^a-zA-Z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u1e9e]*)$', tok)
+        if not m:
+            result.append(tok)
+            continue
+        lead, word, trail = m.group(1), m.group(2), m.group(3)
+
+        if len(word) < 3 or not re.search(r'[a-zA-Z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df]', word):
+            result.append(tok)
+            continue
+
+        hyph = _hyphenate_word(word, hyph_de, hyph_en)
+        if hyph:
+            result.append(lead + hyph + trail)
+        else:
+            result.append(tok)
+
+    return ''.join(result)
+
+
+def insert_syllable_dividers(
+    zones_data: List[Dict],
+    img_bgr: np.ndarray,
+    session_id: str,
+    *,
+    force: bool = False,
+    col_filter: Optional[set] = None,
+) -> int:
+    """Insert pipe syllable dividers into dictionary cells.
+
+    For dictionary pages: process all content column cells, strip existing
+    pipes, merge pipe-gap spaces, and re-syllabify using pyphen.
+
+    Pre-check: at least 1% of content cells must already contain ``|`` from
+    OCR.  This guards against pages with zero pipe characters.
+
+    Args:
+        force: If True, skip the pipe-ratio pre-check and syllabify all
+            content words regardless of whether the original has pipe dividers.
+        col_filter: If set, only process cells whose col_type is in this set.
+            None means process all content columns.
+
+    Returns the number of cells modified.
+    """
+    hyph_de, hyph_en = _get_hyphenators()
+    if hyph_de is None:
+        logger.warning("pyphen not installed -- skipping syllable insertion")
+        return 0
+
+    # Pre-check: count cells that already have | from OCR.
+    if not force:
+        total_col_cells = 0
+        cells_with_pipes = 0
+        for z in zones_data:
+            for cell in z.get("cells", []):
+                if cell.get("col_type", "").startswith("column_"):
+                    total_col_cells += 1
+                    if "|" in cell.get("text", ""):
+                        cells_with_pipes += 1
+
+        if total_col_cells > 0:
+            pipe_ratio = cells_with_pipes / total_col_cells
+            if pipe_ratio < 0.01:
+                logger.info(
+                    "build-grid session %s: skipping syllable insertion -- "
+                    "only %.1f%% of cells have existing pipes (need >=1%%)",
+                    session_id, pipe_ratio * 100,
+                )
+                return 0
+
+    insertions = 0
+    for z in zones_data:
+        for cell in z.get("cells", []):
+            ct = cell.get("col_type", "")
+            if not ct.startswith("column_"):
+                continue
+            if col_filter is not None and ct not in col_filter:
+                continue
+            text = cell.get("text", "")
+            if not text:
+                continue
+
+            # In auto mode (force=False), only normalize cells that already
+            # have | from OCR (i.e. printed syllable dividers on the original
+            # scan).  Don't add new syllable marks to other words.
+            if not force and "|" not in text:
+                continue
+
+            new_text = _syllabify_text(text, hyph_de, hyph_en)
+            if new_text != text:
+                cell["text"] = new_text
+                insertions += 1
+
+    if insertions:
+        logger.info(
+            "build-grid session %s: syllable dividers inserted/normalized "
+            "in %d cells (pyphen)",
+            session_id, insertions,
+        )
+    return insertions