Fix: Sidebar scrollable + add Eltern-Portal nav link

overflow-hidden → overflow-y-auto so all nav items are reachable. Added /parent (Eltern-Portal) link with people icon. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 20:49:44 +02:00
parent d87645ffce
commit 45287b3541
48 changed files with 6 additions and 1 deletions
--- a/klausur-service/backend/cv_box_detect.py
+++ b/klausur-service/backend/cv_box_detect.py
@@ -1,471 +0,0 @@
-"""
-Embedded box detection and page zone splitting for the CV vocabulary pipeline.
-
-Detects boxes (grammar tips, exercises, etc.) that span the page width and
-interrupt the normal column layout. Splits the page into vertical zones so
-that column detection can run independently per zone.
-
-Two-stage algorithm (both run, results merged):
-  1. Morphological line detection — finds bordered boxes via horizontal lines.
-  2. Background shading detection — finds shaded/colored boxes via median-blur
-     background analysis.  Works for colored (blue, green) and grayscale
-     (gray shading on B/W scans) boxes.
-
-Lizenz: Apache 2.0 (kommerziell nutzbar)
-DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
-"""
-
-import logging
-from typing import List, Optional, Tuple
-
-import cv2
-import numpy as np
-
-from cv_vocab_types import DetectedBox, PageZone
-
-logger = logging.getLogger(__name__)
-
-__all__ = [
-    "detect_boxes",
-    "split_page_into_zones",
-]
-
-
-# ---------------------------------------------------------------------------
-# Stage 1: Morphological line detection
-# ---------------------------------------------------------------------------
-
-def _detect_boxes_by_lines(
-    gray: np.ndarray,
-    content_x: int,
-    content_w: int,
-    content_y: int,
-    content_h: int,
-) -> List[DetectedBox]:
-    """Find boxes defined by pairs of long horizontal border lines.
-
-    Args:
-        gray: Grayscale image (full page).
-        content_x, content_w: Horizontal content bounds.
-        content_y, content_h: Vertical content bounds.
-
-    Returns:
-        List of DetectedBox for each detected bordered box.
-    """
-    h, w = gray.shape[:2]
-
-    # Binarize: dark pixels → white on black background
-    _, binary = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)
-
-    # Horizontal morphology kernel — at least 50% of content width
-    kernel_w = max(50, content_w // 2)
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1))
-    lines_img = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
-
-    # Horizontal projection: count line pixels per row
-    h_proj = np.sum(lines_img[:, content_x:content_x + content_w] > 0, axis=1)
-    line_threshold = content_w * 0.30
-
-    # Group consecutive rows with enough line pixels into line segments
-    line_segments: List[Tuple[int, int]] = []  # (y_start, y_end)
-    seg_start: Optional[int] = None
-    for y in range(h):
-        if h_proj[y] >= line_threshold:
-            if seg_start is None:
-                seg_start = y
-        else:
-            if seg_start is not None:
-                line_segments.append((seg_start, y))
-                seg_start = None
-    if seg_start is not None:
-        line_segments.append((seg_start, h))
-
-    if len(line_segments) < 2:
-        return []
-
-    # Pair lines into boxes: top-line + bottom-line
-    # Minimum box height: 30px.  Maximum: 70% of content height.
-    min_box_h = 30
-    max_box_h = int(content_h * 0.70)
-
-    boxes: List[DetectedBox] = []
-    used = set()
-    for i, (top_start, top_end) in enumerate(line_segments):
-        if i in used:
-            continue
-        for j in range(i + 1, len(line_segments)):
-            if j in used:
-                continue
-            bot_start, bot_end = line_segments[j]
-            box_y = top_start
-            box_h = bot_end - top_start
-            if box_h < min_box_h or box_h > max_box_h:
-                continue
-
-            # Estimate border thickness from line segment heights
-            border_top = top_end - top_start
-            border_bot = bot_end - bot_start
-
-            box = DetectedBox(
-                x=content_x,
-                y=box_y,
-                width=content_w,
-                height=box_h,
-                confidence=0.8,
-                border_thickness=max(border_top, border_bot),
-            )
-            boxes.append(box)
-            used.add(i)
-            used.add(j)
-            break  # move to next top-line candidate
-
-    return boxes
-
-
-# ---------------------------------------------------------------------------
-# Stage 2: Background shading detection (color + grayscale)
-# ---------------------------------------------------------------------------
-
-def _detect_boxes_by_shading(
-    img_bgr: np.ndarray,
-    content_x: int,
-    content_w: int,
-    content_y: int,
-    content_h: int,
-) -> List[DetectedBox]:
-    """Find boxes with shaded/colored background (no visible border lines).
-
-    Uses heavy median blur to remove text and reveal the underlying background.
-    Then detects rectangular regions where the background differs from white.
-    Works for both colored boxes (blue, green) and grayscale shading (gray on
-    B/W scans).
-
-    Args:
-        img_bgr: BGR color image (full page).
-        content_x, content_w: Horizontal content bounds.
-        content_y, content_h: Vertical content bounds.
-
-    Returns:
-        List of DetectedBox for each detected shaded box.
-    """
-    h, w = img_bgr.shape[:2]
-
-    # --- Heavy median blur removes text strokes, keeps background ---
-    blur_size = 31  # large kernel to wipe out text
-    blurred = cv2.medianBlur(img_bgr, blur_size)
-    blur_gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
-    blur_hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
-
-    # Estimate page background from top-left / top-right corners
-    corner_size = max(20, min(h // 10, w // 10))
-    corners = np.concatenate([
-        blur_gray[:corner_size, :corner_size].ravel(),
-        blur_gray[:corner_size, -corner_size:].ravel(),
-    ])
-    page_bg = float(np.median(corners))
-
-    # Two masks: grayscale shading + color saturation
-    # Grayscale: regions noticeably darker than the page background
-    shade_thresh = max(page_bg - 30, 150)
-    gray_mask = (blur_gray < shade_thresh).astype(np.uint8) * 255
-
-    # Color: regions with noticeable saturation (blue/green/etc. boxes)
-    sat_mask = (blur_hsv[:, :, 1] > 20).astype(np.uint8) * 255
-
-    combined = cv2.bitwise_or(gray_mask, sat_mask)
-
-    # Morphological cleanup: close gaps, remove small noise
-    kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 10))
-    combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel_close)
-    kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
-    combined = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel_open)
-
-    contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-
-    # Size thresholds: smaller boxes allowed (e.g. "German leihen" ~30% width)
-    min_area = content_w * 30  # at least 30px tall at full width
-    min_box_h = 25
-    max_box_h = int(content_h * 0.70)
-    min_width_ratio = 0.25  # boxes can be ~25% of content width
-
-    boxes: List[DetectedBox] = []
-    for cnt in contours:
-        area = cv2.contourArea(cnt)
-        if area < min_area:
-            continue
-
-        bx, by, bw, bh = cv2.boundingRect(cnt)
-
-        # Width filter
-        if bw < content_w * min_width_ratio:
-            continue
-
-        # Height filter
-        if bh < min_box_h or bh > max_box_h:
-            continue
-
-        # Rectangularity check: area / bounding-rect area > 0.6
-        rect_area = bw * bh
-        if rect_area > 0 and area / rect_area < 0.5:
-            continue
-
-        # Verify that the background inside this region is actually shaded
-        roi_gray = blur_gray[by:by + bh, bx:bx + bw]
-        roi_hsv = blur_hsv[by:by + bh, bx:bx + bw]
-        if roi_gray.size == 0:
-            continue
-
-        median_val = float(np.median(roi_gray))
-        median_sat = float(np.median(roi_hsv[:, :, 1]))
-
-        # Must be noticeably different from page background
-        is_shaded = median_val < (page_bg - 15)
-        is_colored = median_sat > 15
-
-        if not is_shaded and not is_colored:
-            continue
-
-        conf = 0.7 if is_colored else 0.6
-
-        boxes.append(DetectedBox(
-            x=bx,
-            y=by,
-            width=bw,
-            height=bh,
-            confidence=conf,
-            border_thickness=0,
-        ))
-
-    return boxes
-
-
-# ---------------------------------------------------------------------------
-# Validation
-# ---------------------------------------------------------------------------
-
-def _validate_box(
-    box: DetectedBox,
-    gray: np.ndarray,
-    content_w: int,
-    content_h: int,
-    median_row_gap: int,
-) -> bool:
-    """Validate that a detected box is genuine (not a table-row separator etc.)."""
-    # Must span > 25% of content width (lowered from 60% to allow smaller boxes)
-    if box.width < content_w * 0.25:
-        return False
-
-    # Height constraints
-    if box.height < 25 or box.height > content_h * 0.70:
-        return False
-
-    # Must not be confused with a table-row separator:
-    # real boxes are at least 3x the median row gap
-    if median_row_gap > 0 and box.height < median_row_gap * 3:
-        return False
-
-    # Must contain some text (ink density check)
-    h, w = gray.shape[:2]
-    y1 = max(0, box.y)
-    y2 = min(h, box.y + box.height)
-    x1 = max(0, box.x)
-    x2 = min(w, box.x + box.width)
-    roi = gray[y1:y2, x1:x2]
-    if roi.size == 0:
-        return False
-    ink_ratio = np.sum(roi < 128) / roi.size
-    if ink_ratio < 0.002:  # nearly empty → not a real content box
-        return False
-
-    return True
-
-
-# ---------------------------------------------------------------------------
-# Public API: detect_boxes
-# ---------------------------------------------------------------------------
-
-def _merge_overlapping_boxes(boxes: List[DetectedBox]) -> List[DetectedBox]:
-    """Merge boxes that overlap significantly (IoU > 0.3 or one contains the other).
-
-    When two boxes overlap, keep the one with higher confidence (or the larger
-    one if confidences are equal).
-    """
-    if len(boxes) <= 1:
-        return boxes
-
-    # Sort by area descending so larger boxes are processed first
-    boxes = sorted(boxes, key=lambda b: b.width * b.height, reverse=True)
-    keep = [True] * len(boxes)
-
-    for i in range(len(boxes)):
-        if not keep[i]:
-            continue
-        bi = boxes[i]
-        for j in range(i + 1, len(boxes)):
-            if not keep[j]:
-                continue
-            bj = boxes[j]
-
-            # Compute overlap
-            x1 = max(bi.x, bj.x)
-            y1 = max(bi.y, bj.y)
-            x2 = min(bi.x + bi.width, bj.x + bj.width)
-            y2 = min(bi.y + bi.height, bj.y + bj.height)
-
-            if x2 <= x1 or y2 <= y1:
-                continue  # no overlap
-
-            inter = (x2 - x1) * (y2 - y1)
-            area_i = bi.width * bi.height
-            area_j = bj.width * bj.height
-            smaller_area = min(area_i, area_j)
-
-            # If overlap covers > 50% of the smaller box, merge (drop the weaker)
-            if smaller_area > 0 and inter / smaller_area > 0.50:
-                # Keep the one with higher confidence; if equal, keep larger
-                if bj.confidence > bi.confidence:
-                    keep[i] = False
-                    break
-                else:
-                    keep[j] = False
-
-    return [b for b, k in zip(boxes, keep) if k]
-
-
-def detect_boxes(
-    img_bgr: np.ndarray,
-    content_x: int,
-    content_w: int,
-    content_y: int,
-    content_h: int,
-    median_row_gap: int = 0,
-) -> List[DetectedBox]:
-    """Detect embedded boxes on a page image.
-
-    Runs BOTH line-based and shading-based detection, then merges and
-    deduplicates results.
-
-    Args:
-        img_bgr: BGR color image (full page or cropped).
-        content_x, content_w: Horizontal content bounds.
-        content_y, content_h: Vertical content bounds.
-        median_row_gap: Median row gap height (for filtering out table separators).
-
-    Returns:
-        List of validated DetectedBox instances, sorted by y position.
-    """
-    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
-
-    # Stage 1: Line-based detection (bordered boxes)
-    line_boxes = _detect_boxes_by_lines(gray, content_x, content_w, content_y, content_h)
-
-    # Stage 2: Shading-based detection (colored/gray background boxes)
-    shade_boxes = _detect_boxes_by_shading(img_bgr, content_x, content_w, content_y, content_h)
-
-    logger.debug("BoxDetect: %d line-based, %d shading-based candidates",
-                 len(line_boxes), len(shade_boxes))
-
-    # Combine and deduplicate
-    all_boxes = line_boxes + shade_boxes
-    merged = _merge_overlapping_boxes(all_boxes)
-
-    # Validate
-    validated = [b for b in merged if _validate_box(b, gray, content_w, content_h, median_row_gap)]
-
-    # Sort top to bottom
-    validated.sort(key=lambda b: b.y)
-
-    if validated:
-        logger.info("BoxDetect: %d box(es) detected (line=%d, shade=%d, merged=%d)",
-                     len(validated), len(line_boxes), len(shade_boxes), len(merged))
-    else:
-        logger.debug("BoxDetect: no boxes detected")
-
-    return validated
-
-
-# ---------------------------------------------------------------------------
-# Zone Splitting
-# ---------------------------------------------------------------------------
-
-def split_page_into_zones(
-    content_x: int,
-    content_y: int,
-    content_w: int,
-    content_h: int,
-    boxes: List[DetectedBox],
-    min_zone_height: int = 40,
-) -> List[PageZone]:
-    """Split a page into vertical zones based on detected boxes.
-
-    Regions above, between, and below boxes become 'content' zones;
-    box regions become 'box' zones.
-
-    Args:
-        content_x, content_y, content_w, content_h: Content area bounds.
-        boxes: Detected boxes, sorted by y position.
-        min_zone_height: Minimum height for a content zone to be kept.
-
-    Returns:
-        List of PageZone, ordered top to bottom.
-    """
-    if not boxes:
-        # Single zone: entire content area
-        return [PageZone(
-            index=0,
-            zone_type='content',
-            y=content_y,
-            height=content_h,
-            x=content_x,
-            width=content_w,
-        )]
-
-    zones: List[PageZone] = []
-    zone_idx = 0
-    cursor_y = content_y
-    content_bottom = content_y + content_h
-
-    for box in boxes:
-        # Content zone above this box
-        gap_above = box.y - cursor_y
-        if gap_above >= min_zone_height:
-            zones.append(PageZone(
-                index=zone_idx,
-                zone_type='content',
-                y=cursor_y,
-                height=gap_above,
-                x=content_x,
-                width=content_w,
-            ))
-            zone_idx += 1
-
-        # Box zone
-        zones.append(PageZone(
-            index=zone_idx,
-            zone_type='box',
-            y=box.y,
-            height=box.height,
-            x=box.x,
-            width=box.width,
-            box=box,
-        ))
-        zone_idx += 1
-
-        cursor_y = box.y + box.height
-
-    # Content zone below last box
-    remaining = content_bottom - cursor_y
-    if remaining >= min_zone_height:
-        zones.append(PageZone(
-            index=zone_idx,
-            zone_type='content',
-            y=cursor_y,
-            height=remaining,
-            x=content_x,
-            width=content_w,
-        ))
-
-    logger.info(f"ZoneSplit: {len(zones)} zones from {len(boxes)} box(es): "
-                f"{[z.zone_type for z in zones]}")
-
-    return zones