""" Overlay rendering for structure detection (boxes, zones, colors, graphics). Extracted from ocr_pipeline_overlays.py for modularity. Lizenz: Apache 2.0 DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ import logging from typing import Any, Dict, List import cv2 import numpy as np from fastapi import HTTPException from fastapi.responses import Response from ocr_pipeline_common import _get_base_image_png from ocr_pipeline_session_store import get_session_db from cv_color_detect import _COLOR_HEX, _COLOR_RANGES from cv_box_detect import detect_boxes, split_page_into_zones logger = logging.getLogger(__name__) async def _get_structure_overlay(session_id: str) -> Response: """Generate overlay image showing detected boxes, zones, and color regions.""" base_png = await _get_base_image_png(session_id) if not base_png: raise HTTPException(status_code=404, detail="No base image available") arr = np.frombuffer(base_png, dtype=np.uint8) img = cv2.imdecode(arr, cv2.IMREAD_COLOR) if img is None: raise HTTPException(status_code=500, detail="Failed to decode image") h, w = img.shape[:2] # Get structure result (run detection if not cached) session = await get_session_db(session_id) structure = (session or {}).get("structure_result") if not structure: # Run detection on-the-fly margin = int(min(w, h) * 0.03) content_x, content_y = margin, margin content_w_px = w - 2 * margin content_h_px = h - 2 * margin boxes = detect_boxes(img, content_x, content_w_px, content_y, content_h_px) zones = split_page_into_zones(content_x, content_y, content_w_px, content_h_px, boxes) structure = { "boxes": [ {"x": b.x, "y": b.y, "w": b.width, "h": b.height, "confidence": b.confidence, "border_thickness": b.border_thickness} for b in boxes ], "zones": [ {"index": z.index, "zone_type": z.zone_type, "y": z.y, "h": z.height, "x": z.x, "w": z.width} for z in zones ], } overlay = img.copy() # --- Draw zone boundaries --- zone_colors = { "content": (200, 200, 200), # light gray "box": (255, 180, 0), # blue-ish (BGR) } for zone in structure.get("zones", []): zx = zone["x"] zy = zone["y"] zw = zone["w"] zh = zone["h"] color = zone_colors.get(zone["zone_type"], (200, 200, 200)) # Draw zone boundary as dashed line dash_len = 12 for edge_x in range(zx, zx + zw, dash_len * 2): end_x = min(edge_x + dash_len, zx + zw) cv2.line(img, (edge_x, zy), (end_x, zy), color, 1) cv2.line(img, (edge_x, zy + zh), (end_x, zy + zh), color, 1) # Zone label zone_label = f"Zone {zone['index']} ({zone['zone_type']})" cv2.putText(img, zone_label, (zx + 5, zy + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1) # --- Draw detected boxes --- # Color map for box backgrounds (BGR) bg_hex_to_bgr = { "#dc2626": (38, 38, 220), # red "#2563eb": (235, 99, 37), # blue "#16a34a": (74, 163, 22), # green "#ea580c": (12, 88, 234), # orange "#9333ea": (234, 51, 147), # purple "#ca8a04": (4, 138, 202), # yellow "#6b7280": (128, 114, 107), # gray } for box_data in structure.get("boxes", []): bx = box_data["x"] by = box_data["y"] bw = box_data["w"] bh = box_data["h"] conf = box_data.get("confidence", 0) thickness = box_data.get("border_thickness", 0) bg_hex = box_data.get("bg_color_hex", "#6b7280") bg_name = box_data.get("bg_color_name", "") # Box fill color fill_bgr = bg_hex_to_bgr.get(bg_hex, (128, 114, 107)) # Semi-transparent fill cv2.rectangle(overlay, (bx, by), (bx + bw, by + bh), fill_bgr, -1) # Solid border border_color = fill_bgr cv2.rectangle(img, (bx, by), (bx + bw, by + bh), border_color, 3) # Label label = f"BOX" if bg_name and bg_name not in ("unknown", "white"): label += f" ({bg_name})" if thickness > 0: label += f" border={thickness}px" label += f" {int(conf * 100)}%" cv2.putText(img, label, (bx + 8, by + 22), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2) cv2.putText(img, label, (bx + 8, by + 22), cv2.FONT_HERSHEY_SIMPLEX, 0.55, border_color, 1) # Blend overlay at 15% opacity cv2.addWeighted(overlay, 0.15, img, 0.85, 0, img) # --- Draw color regions (HSV masks) --- hsv = cv2.cvtColor( cv2.imdecode(np.frombuffer(base_png, dtype=np.uint8), cv2.IMREAD_COLOR), cv2.COLOR_BGR2HSV, ) color_bgr_map = { "red": (0, 0, 255), "orange": (0, 140, 255), "yellow": (0, 200, 255), "green": (0, 200, 0), "blue": (255, 150, 0), "purple": (200, 0, 200), } for color_name, ranges in _COLOR_RANGES.items(): mask = np.zeros((h, w), dtype=np.uint8) for lower, upper in ranges: mask = cv2.bitwise_or(mask, cv2.inRange(hsv, lower, upper)) # Only draw if there are significant colored pixels if np.sum(mask > 0) < 100: continue # Draw colored contours contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) draw_color = color_bgr_map.get(color_name, (200, 200, 200)) for cnt in contours: area = cv2.contourArea(cnt) if area < 20: continue cv2.drawContours(img, [cnt], -1, draw_color, 2) # --- Draw graphic elements --- graphics_data = structure.get("graphics", []) shape_icons = { "image": "IMAGE", "illustration": "ILLUST", } for gfx in graphics_data: gx, gy = gfx["x"], gfx["y"] gw, gh = gfx["w"], gfx["h"] shape = gfx.get("shape", "icon") color_hex = gfx.get("color_hex", "#6b7280") conf = gfx.get("confidence", 0) # Pick draw color based on element color (BGR) gfx_bgr = bg_hex_to_bgr.get(color_hex, (128, 114, 107)) # Draw bounding box (dashed style via short segments) dash = 6 for seg_x in range(gx, gx + gw, dash * 2): end_x = min(seg_x + dash, gx + gw) cv2.line(img, (seg_x, gy), (end_x, gy), gfx_bgr, 2) cv2.line(img, (seg_x, gy + gh), (end_x, gy + gh), gfx_bgr, 2) for seg_y in range(gy, gy + gh, dash * 2): end_y = min(seg_y + dash, gy + gh) cv2.line(img, (gx, seg_y), (gx, end_y), gfx_bgr, 2) cv2.line(img, (gx + gw, seg_y), (gx + gw, end_y), gfx_bgr, 2) # Label icon = shape_icons.get(shape, shape.upper()[:5]) label = f"{icon} {int(conf * 100)}%" # White background for readability (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1) lx = gx + 2 ly = max(gy - 4, th + 4) cv2.rectangle(img, (lx - 1, ly - th - 2), (lx + tw + 2, ly + 3), (255, 255, 255), -1) cv2.putText(img, label, (lx, ly), cv2.FONT_HERSHEY_SIMPLEX, 0.4, gfx_bgr, 1) # Encode result _, png_buf = cv2.imencode(".png", img) return Response(content=png_buf.tobytes(), media_type="image/png")