Fix: Sidebar scrollable + add Eltern-Portal nav link

overflow-hidden → overflow-y-auto so all nav items are reachable. Added /parent (Eltern-Portal) link with people icon. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 20:49:44 +02:00
parent d87645ffce
commit 45287b3541
48 changed files with 6 additions and 1 deletions
--- a/klausur-service/backend/ocr/detect/graphic_detect.py
+++ b/klausur-service/backend/ocr/detect/graphic_detect.py
@@ -0,0 +1,422 @@
+"""
+Graphical element detection for OCR pages.
+
+Region-based approach:
+  1. Build a color mask (saturation channel — black text is invisible).
+  2. Dilate heavily to merge nearby colored pixels into regions.
+  3. For each region, check overlap with OCR word boxes:
+       - High word overlap → colored text (skip)
+       - Low word overlap  → colored graphic / image (keep)
+  4. Separately detect large black-ink illustrations via ink mask.
+
+Boxes and text colors are handled by cv_box_detect / cv_color_detect.
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+import cv2
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["detect_graphic_elements", "GraphicElement"]
+
+
+@dataclass
+class GraphicElement:
+    """A detected non-text graphical element."""
+    x: int
+    y: int
+    width: int
+    height: int
+    area: int
+    shape: str          # image, illustration
+    color_name: str     # dominant color or 'black'
+    color_hex: str
+    confidence: float
+    contour: Any = field(default=None, repr=False)
+
+
+# ---------------------------------------------------------------------------
+# Color helpers
+# ---------------------------------------------------------------------------
+
+_COLOR_HEX = {
+    "black": "#000000",
+    "gray": "#6b7280",
+    "red": "#dc2626",
+    "orange": "#ea580c",
+    "yellow": "#ca8a04",
+    "green": "#16a34a",
+    "blue": "#2563eb",
+    "purple": "#9333ea",
+}
+
+
+def _dominant_color(hsv_roi: np.ndarray, sat_threshold: int = 40) -> tuple:
+    """Return (color_name, color_hex) for an HSV region."""
+    if hsv_roi.size == 0:
+        return "black", _COLOR_HEX["black"]
+
+    pixels = hsv_roi.reshape(-1, 3)
+    sat = pixels[:, 1]
+    sat_mask = sat > sat_threshold
+    sat_ratio = np.sum(sat_mask) / len(pixels) if len(pixels) > 0 else 0
+
+    if sat_ratio < 0.15:
+        return "black", _COLOR_HEX["black"]
+
+    sat_pixels = pixels[sat_mask]
+    if len(sat_pixels) < 3:
+        return "black", _COLOR_HEX["black"]
+
+    med_hue = float(np.median(sat_pixels[:, 0]))
+
+    if med_hue < 10 or med_hue > 170:
+        name = "red"
+    elif med_hue < 25:
+        name = "orange"
+    elif med_hue < 35:
+        name = "yellow"
+    elif med_hue < 85:
+        name = "green"
+    elif med_hue < 130:
+        name = "blue"
+    else:
+        name = "purple"
+
+    return name, _COLOR_HEX.get(name, _COLOR_HEX["black"])
+
+
+# ---------------------------------------------------------------------------
+# Main detection
+# ---------------------------------------------------------------------------
+
+def detect_graphic_elements(
+    img_bgr: np.ndarray,
+    word_boxes: List[Dict],
+    detected_boxes: Optional[List[Dict]] = None,
+    max_elements: int = 50,
+) -> List[GraphicElement]:
+    """Find non-text graphical regions on the page.
+
+    Region-based: dilate color mask to form regions, then check word
+    overlap to distinguish colored text from colored graphics.
+
+    Args:
+        img_bgr: BGR color image.
+        word_boxes: List of OCR word dicts with left/top/width/height.
+        detected_boxes: Optional list of detected box dicts (x/y/w/h).
+        max_elements: Maximum number of elements to return.
+
+    Returns:
+        List of GraphicElement, sorted by area descending.
+    """
+    if img_bgr is None:
+        return []
+
+    # ------------------------------------------------------------------
+    # Try PP-DocLayout ONNX first if available
+    # ------------------------------------------------------------------
+    import os
+    backend = os.environ.get("GRAPHIC_DETECT_BACKEND", "auto")
+    if backend in ("doclayout", "auto"):
+        try:
+            from cv_doclayout_detect import detect_layout_regions, is_doclayout_available
+            if is_doclayout_available():
+                regions = detect_layout_regions(img_bgr)
+                if regions:
+                    _LABEL_TO_COLOR = {
+                        "figure": ("image", "green", _COLOR_HEX.get("green", "#16a34a")),
+                        "table":  ("image", "blue",  _COLOR_HEX.get("blue", "#2563eb")),
+                    }
+                    converted: List[GraphicElement] = []
+                    for r in regions:
+                        shape, color_name, color_hex = _LABEL_TO_COLOR.get(
+                            r.label,
+                            (r.label, "gray", _COLOR_HEX.get("gray", "#6b7280")),
+                        )
+                        converted.append(GraphicElement(
+                            x=r.x,
+                            y=r.y,
+                            width=r.width,
+                            height=r.height,
+                            area=r.width * r.height,
+                            shape=shape,
+                            color_name=color_name,
+                            color_hex=color_hex,
+                            confidence=r.confidence,
+                            contour=None,
+                        ))
+                    converted.sort(key=lambda g: g.area, reverse=True)
+                    result = converted[:max_elements]
+                    if result:
+                        shape_counts: Dict[str, int] = {}
+                        for g in result:
+                            shape_counts[g.shape] = shape_counts.get(g.shape, 0) + 1
+                        logger.info(
+                            "GraphicDetect (PP-DocLayout): %d elements (%s)",
+                            len(result),
+                            ", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
+                        )
+                    return result
+        except Exception as e:
+            logger.warning("PP-DocLayout failed, falling back to OpenCV: %s", e)
+    # ------------------------------------------------------------------
+    # OpenCV fallback (original logic)
+    # ------------------------------------------------------------------
+
+    h, w = img_bgr.shape[:2]
+
+    logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
+                 w, h, len(word_boxes), len(detected_boxes or []))
+
+    hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
+    candidates: List[GraphicElement] = []
+
+    # --- Build word mask (for overlap checking) ---
+    word_mask = np.zeros((h, w), dtype=np.uint8)
+    for wb in word_boxes:
+        x1 = max(0, int(wb.get("left", 0)))
+        y1 = max(0, int(wb.get("top", 0)))
+        x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)))
+        y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)))
+        word_mask[y1:y2, x1:x2] = 255
+
+    # =====================================================================
+    # PASS 1 — COLORED IMAGE REGIONS
+    # =====================================================================
+    # Color mask: saturated pixels (black text has sat ≈ 0 → invisible)
+    sat_mask = (hsv[:, :, 1] > 40).astype(np.uint8) * 255
+    val_mask = (hsv[:, :, 2] < 240).astype(np.uint8) * 255
+    color_pixels = cv2.bitwise_and(sat_mask, val_mask)
+
+    # Remove tiny speckle
+    kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+    color_pixels = cv2.morphologyEx(color_pixels, cv2.MORPH_OPEN, kernel_open)
+
+    # Count raw colored pixels before dilation (for density check later)
+    color_pixel_raw = color_pixels.copy()
+
+    # Heavy dilation to merge nearby colored elements into regions.
+    # A 25x25 kernel merges elements within ~12px of each other.
+    kernel_dilate = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (25, 25))
+    region_mask = cv2.dilate(color_pixels, kernel_dilate, iterations=1)
+
+    contours_regions, _ = cv2.findContours(
+        region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
+    )
+    logger.debug("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
+
+    for cnt in contours_regions:
+        bx, by, bw, bh = cv2.boundingRect(cnt)
+
+        # Skip tiny regions
+        if bw < 15 or bh < 15:
+            continue
+
+        # Skip page-spanning regions
+        if bw > w * 0.6 or bh > h * 0.6:
+            logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
+            continue
+
+        bbox_area = bw * bh
+
+        # Check: how much of this region's bounding box overlaps with words?
+        roi_words = word_mask[by:by + bh, bx:bx + bw]
+        word_pixel_count = int(np.sum(roi_words > 0))
+        word_overlap = word_pixel_count / bbox_area if bbox_area > 0 else 0
+
+        # Check: how many OCR word centroids fall inside this region?
+        # Colored text that OCR detected will have multiple centroids inside.
+        # Actual images may have 0-1 spurious OCR artifacts.
+        word_centroid_count = sum(
+            1 for wb in word_boxes
+            if (bx <= int(wb.get("left", 0) + wb.get("width", 0) / 2) <= bx + bw
+                and by <= int(wb.get("top", 0) + wb.get("height", 0) / 2) <= by + bh)
+        )
+
+        # Check: how many actual colored pixels are in this region?
+        roi_color = color_pixel_raw[by:by + bh, bx:bx + bw]
+        color_pixel_count = int(np.sum(roi_color > 0))
+
+        # Color pixel density (before any skip checks so we can log it)
+        density = color_pixel_count / bbox_area if bbox_area > 0 else 0
+
+        # --- Skip heuristics for colored TEXT (not images) ---
+
+        # (a) High word-box pixel overlap → clearly text
+        if word_overlap > 0.40:
+            logger.info(
+                "GraphicDetect PASS1 skip text-overlap (%d,%d) %dx%d "
+                "overlap=%.0f%% centroids=%d",
+                bx, by, bw, bh, word_overlap * 100, word_centroid_count,
+            )
+            continue
+
+        # (b) Multiple OCR words detected inside → colored text
+        #     (images rarely produce 2+ confident word detections)
+        if word_centroid_count >= 2:
+            logger.info(
+                "GraphicDetect PASS1 skip multi-word (%d,%d) %dx%d "
+                "centroids=%d overlap=%.0f%% density=%.0f%%",
+                bx, by, bw, bh, word_centroid_count,
+                word_overlap * 100, density * 100,
+            )
+            continue
+
+        # (c) Even 1 word + some pixel overlap → likely text
+        if word_centroid_count >= 1 and word_overlap > 0.10:
+            logger.info(
+                "GraphicDetect PASS1 skip word+overlap (%d,%d) %dx%d "
+                "centroids=%d overlap=%.0f%%",
+                bx, by, bw, bh, word_centroid_count, word_overlap * 100,
+            )
+            continue
+
+        # Need a minimum number of colored pixels (not just dilated area)
+        if color_pixel_count < 200:
+            continue
+
+        # (d) Very low density → thin strokes, almost certainly text.
+        # Large regions (photos/illustrations) can have low color density
+        # because most pixels are grayscale ink.  Use a lower threshold
+        # for regions bigger than 100×80 px.
+        _min_density = 0.05 if (bw > 100 and bh > 80) else 0.20
+        if density < _min_density:
+            logger.info(
+                "GraphicDetect PASS1 skip low-density (%d,%d) %dx%d "
+                "density=%.0f%% (min=%.0f%%, likely colored text)",
+                bx, by, bw, bh, density * 100, _min_density * 100,
+            )
+            continue
+
+        # (e) Moderate density + small height → colored text line
+        if density < 0.35 and bh < h * 0.05:
+            logger.info(
+                "GraphicDetect PASS1 skip text-height (%d,%d) %dx%d "
+                "density=%.0f%% height=%.1f%%",
+                bx, by, bw, bh, density * 100, 100.0 * bh / h,
+            )
+            continue
+
+        # Determine dominant color from the actual colored pixels
+        roi_hsv = hsv[by:by + bh, bx:bx + bw]
+        color_px_mask = roi_color > 0
+        if np.sum(color_px_mask) > 0:
+            masked_hsv = roi_hsv[color_px_mask]
+            color_name, color_hex = _dominant_color(masked_hsv)
+        else:
+            color_name, color_hex = "black", _COLOR_HEX["black"]
+
+        # Confidence based on color density and low word overlap
+        conf = min(0.95, 0.5 + density * 0.5)
+
+        logger.debug("GraphicDetect PASS1 accept (%d,%d) %dx%d px=%d density=%.0f%% overlap=%.0f%% %s",
+                     bx, by, bw, bh, color_pixel_count, density * 100, word_overlap * 100, color_name)
+        candidates.append(GraphicElement(
+            x=bx, y=by, width=bw, height=bh,
+            area=color_pixel_count,
+            shape="image",
+            color_name=color_name, color_hex=color_hex,
+            confidence=round(conf, 2), contour=cnt,
+        ))
+
+    # =====================================================================
+    # PASS 2 — LARGE BLACK-INK ILLUSTRATIONS
+    # =====================================================================
+    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+    _, dark_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+
+    # Exclude words and colored regions already found
+    exclusion = np.zeros((h, w), dtype=np.uint8)
+    word_pad = 5
+    for wb in word_boxes:
+        x1 = max(0, int(wb.get("left", 0)) - word_pad)
+        y1 = max(0, int(wb.get("top", 0)) - word_pad)
+        x2 = min(w, int(wb.get("left", 0) + wb.get("width", 0)) + word_pad)
+        y2 = min(h, int(wb.get("top", 0) + wb.get("height", 0)) + word_pad)
+        exclusion[y1:y2, x1:x2] = 255
+
+    if detected_boxes:
+        for box in detected_boxes:
+            bbx = int(box.get("x", 0))
+            bby = int(box.get("y", 0))
+            bbw = int(box.get("w", box.get("width", 0)))
+            bbh = int(box.get("h", box.get("height", 0)))
+            inset = 8
+            x1 = max(0, bbx + inset)
+            y1 = max(0, bby + inset)
+            x2 = min(w, bbx + bbw - inset)
+            y2 = min(h, bby + bbh - inset)
+            if x2 > x1 and y2 > y1:
+                exclusion[y1:y2, x1:x2] = 255
+
+    ink_only = cv2.bitwise_and(dark_mask, cv2.bitwise_not(exclusion))
+    ink_only = cv2.bitwise_and(ink_only, cv2.bitwise_not(color_pixels))
+
+    contours_ink, _ = cv2.findContours(
+        ink_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
+    )
+    logger.debug("GraphicDetect PASS2 ink: %d contours", len(contours_ink))
+
+    for cnt in contours_ink:
+        area = cv2.contourArea(cnt)
+        bx, by, bw, bh = cv2.boundingRect(cnt)
+
+        if area < 5000 or min(bw, bh) < 40:
+            continue
+        if bw > w * 0.8 or bh > h * 0.8:
+            continue
+
+        logger.debug("GraphicDetect PASS2 accept (%d,%d) %dx%d area=%d",
+                     bx, by, bw, bh, int(area))
+        candidates.append(GraphicElement(
+            x=bx, y=by, width=bw, height=bh,
+            area=int(area), shape="illustration",
+            color_name="black", color_hex="#000000",
+            confidence=0.5, contour=cnt,
+        ))
+
+    # =====================================================================
+    # Deduplicate and return
+    # =====================================================================
+    candidates.sort(key=lambda g: g.area, reverse=True)
+
+    final: List[GraphicElement] = []
+    for c in candidates:
+        overlap = False
+        for f in final:
+            ix1 = max(c.x, f.x)
+            iy1 = max(c.y, f.y)
+            ix2 = min(c.x + c.width, f.x + f.width)
+            iy2 = min(c.y + c.height, f.y + f.height)
+            if ix2 > ix1 and iy2 > iy1:
+                inter = (ix2 - ix1) * (iy2 - iy1)
+                smaller = min(c.width * c.height, f.width * f.height)
+                if smaller > 0 and inter / smaller > 0.5:
+                    overlap = True
+                    break
+        if not overlap:
+            final.append(c)
+
+    result = final[:max_elements]
+
+    if result:
+        shape_counts: Dict[str, int] = {}
+        for g in result:
+            shape_counts[g.shape] = shape_counts.get(g.shape, 0) + 1
+        logger.info(
+            "GraphicDetect: %d elements found (%s)",
+            len(result),
+            ", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
+        )
+    else:
+        logger.info("GraphicDetect: no graphic elements found")
+
+    return result