Fix: Sidebar scrollable + add Eltern-Portal nav link

overflow-hidden → overflow-y-auto so all nav items are reachable. Added /parent (Eltern-Portal) link with people icon. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 20:49:44 +02:00
parent d87645ffce
commit 45287b3541
48 changed files with 6 additions and 1 deletions
--- a/klausur-service/backend/ocr/preprocessing/preprocessing.py
+++ b/klausur-service/backend/ocr/preprocessing/preprocessing.py
@@ -0,0 +1,157 @@
+"""
+Image I/O, orientation detection, deskew, and dewarp for the CV vocabulary pipeline.
+
+Re-export facade -- all logic lives in the sub-modules:
+
+  cv_preprocessing_deskew   Rotation correction (Hough, word-alignment, iterative, two-pass)
+  cv_preprocessing_dewarp   Vertical shear detection and correction (4 methods + ensemble)
+
+This file contains the image I/O and orientation detection functions.
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+from typing import Tuple
+
+import numpy as np
+
+from cv_vocab_types import (
+    CV2_AVAILABLE,
+    TESSERACT_AVAILABLE,
+)
+
+logger = logging.getLogger(__name__)
+
+# Guarded imports
+try:
+    import cv2
+except ImportError:
+    cv2 = None  # type: ignore[assignment]
+
+try:
+    import pytesseract
+    from PIL import Image
+except ImportError:
+    pytesseract = None  # type: ignore[assignment]
+    Image = None  # type: ignore[assignment,misc]
+
+# Re-export all deskew functions
+from cv_preprocessing_deskew import (  # noqa: F401
+    deskew_image,
+    deskew_image_by_word_alignment,
+    deskew_image_iterative,
+    deskew_two_pass,
+    _projection_gradient_score,
+    _measure_textline_slope,
+)
+
+# Re-export all dewarp functions
+from cv_preprocessing_dewarp import (  # noqa: F401
+    _apply_shear,
+    _detect_shear_angle,
+    _detect_shear_by_hough,
+    _detect_shear_by_projection,
+    _detect_shear_by_text_lines,
+    _dewarp_quality_check,
+    _ensemble_shear,
+    dewarp_image,
+    dewarp_image_manual,
+)
+
+
+# =============================================================================
+# Image I/O
+# =============================================================================
+
+def render_pdf_high_res(pdf_data: bytes, page_number: int = 0, zoom: float = 3.0) -> np.ndarray:
+    """Render a PDF page to a high-resolution numpy array (BGR).
+
+    Args:
+        pdf_data: Raw PDF bytes.
+        page_number: 0-indexed page number.
+        zoom: Zoom factor (3.0 = 432 DPI).
+
+    Returns:
+        numpy array in BGR format.
+    """
+    import fitz  # PyMuPDF
+
+    pdf_doc = fitz.open(stream=pdf_data, filetype="pdf")
+    if page_number >= pdf_doc.page_count:
+        raise ValueError(f"Page {page_number} does not exist (PDF has {pdf_doc.page_count} pages)")
+
+    page = pdf_doc[page_number]
+    mat = fitz.Matrix(zoom, zoom)
+    pix = page.get_pixmap(matrix=mat)
+
+    img_data = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
+    if pix.n == 4:  # RGBA
+        img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGBA2BGR)
+    elif pix.n == 3:  # RGB
+        img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR)
+    else:  # Grayscale
+        img_bgr = cv2.cvtColor(img_data, cv2.COLOR_GRAY2BGR)
+
+    pdf_doc.close()
+    return img_bgr
+
+
+def render_image_high_res(image_data: bytes) -> np.ndarray:
+    """Load an image (PNG/JPEG) into a numpy array (BGR).
+
+    Args:
+        image_data: Raw image bytes.
+
+    Returns:
+        numpy array in BGR format.
+    """
+    img_array = np.frombuffer(image_data, dtype=np.uint8)
+    img_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+    if img_bgr is None:
+        raise ValueError("Could not decode image data")
+    return img_bgr
+
+
+# =============================================================================
+# Orientation Detection (0/90/180/270)
+# =============================================================================
+
+def detect_and_fix_orientation(img_bgr: np.ndarray) -> Tuple[np.ndarray, int]:
+    """Detect page orientation via Tesseract OSD and rotate if needed.
+
+    Returns:
+        (corrected_image, rotation_degrees) -- rotation is 0, 90, 180, or 270.
+    """
+    if pytesseract is None:
+        return img_bgr, 0
+
+    try:
+        gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+        pil_img = Image.fromarray(gray)
+
+        osd = pytesseract.image_to_osd(pil_img, output_type=pytesseract.Output.DICT)
+        rotate = osd.get("rotate", 0)
+        confidence = osd.get("orientation_conf", 0.0)
+
+        logger.info(f"OSD: orientation={rotate}\u00b0 confidence={confidence:.1f}")
+
+        if rotate == 0 or confidence < 1.0:
+            return img_bgr, 0
+
+        if rotate == 180:
+            corrected = cv2.rotate(img_bgr, cv2.ROTATE_180)
+        elif rotate == 90:
+            corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_CLOCKWISE)
+        elif rotate == 270:
+            corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_COUNTERCLOCKWISE)
+        else:
+            return img_bgr, 0
+
+        logger.info(f"OSD: rotated {rotate}\u00b0 to fix orientation")
+        return corrected, rotate
+
+    except Exception as e:
+        logger.warning(f"OSD orientation detection failed: {e}")
+        return img_bgr, 0