breakpilot-lehrer/klausur-service/backend/ocr_pipeline_overlay_structure.py

"""
Overlay rendering for structure detection (boxes, zones, colors, graphics).

Extracted from ocr_pipeline_overlays.py for modularity.

Lizenz: Apache 2.0
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""

import logging
from typing import Any, Dict, List

import cv2
import numpy as np
from fastapi import HTTPException
from fastapi.responses import Response

from ocr_pipeline_common import _get_base_image_png
from ocr_pipeline_session_store import get_session_db
from cv_color_detect import _COLOR_HEX, _COLOR_RANGES
from cv_box_detect import detect_boxes, split_page_into_zones

logger = logging.getLogger(__name__)


async def _get_structure_overlay(session_id: str) -> Response:
    """Generate overlay image showing detected boxes, zones, and color regions."""
    base_png = await _get_base_image_png(session_id)
    if not base_png:
        raise HTTPException(status_code=404, detail="No base image available")

    arr = np.frombuffer(base_png, dtype=np.uint8)
    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
    if img is None:
        raise HTTPException(status_code=500, detail="Failed to decode image")

    h, w = img.shape[:2]

    # Get structure result (run detection if not cached)
    session = await get_session_db(session_id)
    structure = (session or {}).get("structure_result")

    if not structure:
        # Run detection on-the-fly
        margin = int(min(w, h) * 0.03)
        content_x, content_y = margin, margin
        content_w_px = w - 2 * margin
        content_h_px = h - 2 * margin
        boxes = detect_boxes(img, content_x, content_w_px, content_y, content_h_px)
        zones = split_page_into_zones(content_x, content_y, content_w_px, content_h_px, boxes)
        structure = {
            "boxes": [
                {"x": b.x, "y": b.y, "w": b.width, "h": b.height,
                 "confidence": b.confidence, "border_thickness": b.border_thickness}
                for b in boxes
            ],
            "zones": [
                {"index": z.index, "zone_type": z.zone_type,
                 "y": z.y, "h": z.height, "x": z.x, "w": z.width}
                for z in zones
            ],
        }

    overlay = img.copy()

    # --- Draw zone boundaries ---
    zone_colors = {
        "content": (200, 200, 200),  # light gray
        "box": (255, 180, 0),        # blue-ish (BGR)
    }
    for zone in structure.get("zones", []):
        zx = zone["x"]
        zy = zone["y"]
        zw = zone["w"]
        zh = zone["h"]
        color = zone_colors.get(zone["zone_type"], (200, 200, 200))

        # Draw zone boundary as dashed line
        dash_len = 12
        for edge_x in range(zx, zx + zw, dash_len * 2):
            end_x = min(edge_x + dash_len, zx + zw)
            cv2.line(img, (edge_x, zy), (end_x, zy), color, 1)
            cv2.line(img, (edge_x, zy + zh), (end_x, zy + zh), color, 1)

        # Zone label
        zone_label = f"Zone {zone['index']} ({zone['zone_type']})"
        cv2.putText(img, zone_label, (zx + 5, zy + 15),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1)

    # --- Draw detected boxes ---
    # Color map for box backgrounds (BGR)
    bg_hex_to_bgr = {
        "#dc2626": (38, 38, 220),    # red
        "#2563eb": (235, 99, 37),    # blue
        "#16a34a": (74, 163, 22),    # green
        "#ea580c": (12, 88, 234),    # orange
        "#9333ea": (234, 51, 147),   # purple
        "#ca8a04": (4, 138, 202),    # yellow
        "#6b7280": (128, 114, 107),  # gray
    }

    for box_data in structure.get("boxes", []):
        bx = box_data["x"]
        by = box_data["y"]
        bw = box_data["w"]
        bh = box_data["h"]
        conf = box_data.get("confidence", 0)
        thickness = box_data.get("border_thickness", 0)
        bg_hex = box_data.get("bg_color_hex", "#6b7280")
        bg_name = box_data.get("bg_color_name", "")

        # Box fill color
        fill_bgr = bg_hex_to_bgr.get(bg_hex, (128, 114, 107))

        # Semi-transparent fill
        cv2.rectangle(overlay, (bx, by), (bx + bw, by + bh), fill_bgr, -1)

        # Solid border
        border_color = fill_bgr
        cv2.rectangle(img, (bx, by), (bx + bw, by + bh), border_color, 3)

        # Label
        label = f"BOX"
        if bg_name and bg_name not in ("unknown", "white"):
            label += f" ({bg_name})"
        if thickness > 0:
            label += f" border={thickness}px"
        label += f" {int(conf * 100)}%"
        cv2.putText(img, label, (bx + 8, by + 22),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)
        cv2.putText(img, label, (bx + 8, by + 22),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, border_color, 1)

    # Blend overlay at 15% opacity
    cv2.addWeighted(overlay, 0.15, img, 0.85, 0, img)

    # --- Draw color regions (HSV masks) ---
    hsv = cv2.cvtColor(
        cv2.imdecode(np.frombuffer(base_png, dtype=np.uint8), cv2.IMREAD_COLOR),
        cv2.COLOR_BGR2HSV,
    )
    color_bgr_map = {
        "red": (0, 0, 255),
        "orange": (0, 140, 255),
        "yellow": (0, 200, 255),
        "green": (0, 200, 0),
        "blue": (255, 150, 0),
        "purple": (200, 0, 200),
    }
    for color_name, ranges in _COLOR_RANGES.items():
        mask = np.zeros((h, w), dtype=np.uint8)
        for lower, upper in ranges:
            mask = cv2.bitwise_or(mask, cv2.inRange(hsv, lower, upper))
        # Only draw if there are significant colored pixels
        if np.sum(mask > 0) < 100:
            continue
        # Draw colored contours
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        draw_color = color_bgr_map.get(color_name, (200, 200, 200))
        for cnt in contours:
            area = cv2.contourArea(cnt)
            if area < 20:
                continue
            cv2.drawContours(img, [cnt], -1, draw_color, 2)

    # --- Draw graphic elements ---
    graphics_data = structure.get("graphics", [])
    shape_icons = {
        "image": "IMAGE",
        "illustration": "ILLUST",
    }
    for gfx in graphics_data:
        gx, gy = gfx["x"], gfx["y"]
        gw, gh = gfx["w"], gfx["h"]
        shape = gfx.get("shape", "icon")
        color_hex = gfx.get("color_hex", "#6b7280")
        conf = gfx.get("confidence", 0)

        # Pick draw color based on element color (BGR)
        gfx_bgr = bg_hex_to_bgr.get(color_hex, (128, 114, 107))

        # Draw bounding box (dashed style via short segments)
        dash = 6
        for seg_x in range(gx, gx + gw, dash * 2):
            end_x = min(seg_x + dash, gx + gw)
            cv2.line(img, (seg_x, gy), (end_x, gy), gfx_bgr, 2)
            cv2.line(img, (seg_x, gy + gh), (end_x, gy + gh), gfx_bgr, 2)
        for seg_y in range(gy, gy + gh, dash * 2):
            end_y = min(seg_y + dash, gy + gh)
            cv2.line(img, (gx, seg_y), (gx, end_y), gfx_bgr, 2)
            cv2.line(img, (gx + gw, seg_y), (gx + gw, end_y), gfx_bgr, 2)

        # Label
        icon = shape_icons.get(shape, shape.upper()[:5])
        label = f"{icon} {int(conf * 100)}%"
        # White background for readability
        (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
        lx = gx + 2
        ly = max(gy - 4, th + 4)
        cv2.rectangle(img, (lx - 1, ly - th - 2), (lx + tw + 2, ly + 3), (255, 255, 255), -1)
        cv2.putText(img, label, (lx, ly), cv2.FONT_HERSHEY_SIMPLEX, 0.4, gfx_bgr, 1)

    # Encode result
    _, png_buf = cv2.imencode(".png", img)
    return Response(content=png_buf.tobytes(), media_type="image/png")