""" Overlay rendering for columns, rows, and words (grid-based overlays). Extracted from ocr_pipeline_overlays.py for modularity. Lizenz: Apache 2.0 DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ import logging from typing import Any, Dict, List import cv2 import numpy as np from fastapi import HTTPException from fastapi.responses import Response from ocr_pipeline_common import _get_base_image_png from ocr_pipeline_session_store import get_session_db from ocr_pipeline_rows import _draw_box_exclusion_overlay logger = logging.getLogger(__name__) async def _get_columns_overlay(session_id: str) -> Response: """Generate cropped (or dewarped) image with column borders drawn on it.""" session = await get_session_db(session_id) if not session: raise HTTPException(status_code=404, detail=f"Session {session_id} not found") column_result = session.get("column_result") if not column_result or not column_result.get("columns"): raise HTTPException(status_code=404, detail="No column data available") # Load best available base image (cropped > dewarped > original) base_png = await _get_base_image_png(session_id) if not base_png: raise HTTPException(status_code=404, detail="No base image available") arr = np.frombuffer(base_png, dtype=np.uint8) img = cv2.imdecode(arr, cv2.IMREAD_COLOR) if img is None: raise HTTPException(status_code=500, detail="Failed to decode image") # Color map for region types (BGR) colors = { "column_en": (255, 180, 0), # Blue "column_de": (0, 200, 0), # Green "column_example": (0, 140, 255), # Orange "column_text": (200, 200, 0), # Cyan/Turquoise "page_ref": (200, 0, 200), # Purple "column_marker": (0, 0, 220), # Red "column_ignore": (180, 180, 180), # Light Gray "header": (128, 128, 128), # Gray "footer": (128, 128, 128), # Gray "margin_top": (100, 100, 100), # Dark Gray "margin_bottom": (100, 100, 100), # Dark Gray } overlay = img.copy() for col in column_result["columns"]: x, y = col["x"], col["y"] w, h = col["width"], col["height"] color = colors.get(col.get("type", ""), (200, 200, 200)) # Semi-transparent fill cv2.rectangle(overlay, (x, y), (x + w, y + h), color, -1) # Solid border cv2.rectangle(img, (x, y), (x + w, y + h), color, 3) # Label with confidence label = col.get("type", "unknown").replace("column_", "").upper() conf = col.get("classification_confidence") if conf is not None and conf < 1.0: label = f"{label} {int(conf * 100)}%" cv2.putText(img, label, (x + 10, y + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2) # Blend overlay at 20% opacity cv2.addWeighted(overlay, 0.2, img, 0.8, 0, img) # Draw detected box boundaries as dashed rectangles zones = column_result.get("zones") or [] for zone in zones: if zone.get("zone_type") == "box" and zone.get("box"): box = zone["box"] bx, by = box["x"], box["y"] bw, bh = box["width"], box["height"] box_color = (0, 200, 255) # Yellow (BGR) # Draw dashed rectangle by drawing short line segments dash_len = 15 for edge_x in range(bx, bx + bw, dash_len * 2): end_x = min(edge_x + dash_len, bx + bw) cv2.line(img, (edge_x, by), (end_x, by), box_color, 2) cv2.line(img, (edge_x, by + bh), (end_x, by + bh), box_color, 2) for edge_y in range(by, by + bh, dash_len * 2): end_y = min(edge_y + dash_len, by + bh) cv2.line(img, (bx, edge_y), (bx, end_y), box_color, 2) cv2.line(img, (bx + bw, edge_y), (bx + bw, end_y), box_color, 2) cv2.putText(img, "BOX", (bx + 10, by + bh - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, box_color, 2) # Red semi-transparent overlay for box zones _draw_box_exclusion_overlay(img, zones) success, result_png = cv2.imencode(".png", img) if not success: raise HTTPException(status_code=500, detail="Failed to encode overlay image") return Response(content=result_png.tobytes(), media_type="image/png") async def _get_rows_overlay(session_id: str) -> Response: """Generate cropped (or dewarped) image with row bands drawn on it.""" session = await get_session_db(session_id) if not session: raise HTTPException(status_code=404, detail=f"Session {session_id} not found") row_result = session.get("row_result") if not row_result or not row_result.get("rows"): raise HTTPException(status_code=404, detail="No row data available") # Load best available base image (cropped > dewarped > original) base_png = await _get_base_image_png(session_id) if not base_png: raise HTTPException(status_code=404, detail="No base image available") arr = np.frombuffer(base_png, dtype=np.uint8) img = cv2.imdecode(arr, cv2.IMREAD_COLOR) if img is None: raise HTTPException(status_code=500, detail="Failed to decode image") # Color map for row types (BGR) row_colors = { "content": (255, 180, 0), # Blue "header": (128, 128, 128), # Gray "footer": (128, 128, 128), # Gray "margin_top": (100, 100, 100), # Dark Gray "margin_bottom": (100, 100, 100), # Dark Gray } overlay = img.copy() for row in row_result["rows"]: x, y = row["x"], row["y"] w, h = row["width"], row["height"] row_type = row.get("row_type", "content") color = row_colors.get(row_type, (200, 200, 200)) # Semi-transparent fill cv2.rectangle(overlay, (x, y), (x + w, y + h), color, -1) # Solid border cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) # Label idx = row.get("index", 0) label = f"R{idx} {row_type.upper()}" wc = row.get("word_count", 0) if wc: label = f"{label} ({wc}w)" cv2.putText(img, label, (x + 5, y + 18), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) # Blend overlay at 15% opacity cv2.addWeighted(overlay, 0.15, img, 0.85, 0, img) # Draw zone separator lines if zones exist column_result = session.get("column_result") or {} zones = column_result.get("zones") or [] if zones: img_w_px = img.shape[1] zone_color = (0, 200, 255) # Yellow (BGR) dash_len = 20 for zone in zones: if zone.get("zone_type") == "box": zy = zone["y"] zh = zone["height"] for line_y in [zy, zy + zh]: for sx in range(0, img_w_px, dash_len * 2): ex = min(sx + dash_len, img_w_px) cv2.line(img, (sx, line_y), (ex, line_y), zone_color, 2) # Red semi-transparent overlay for box zones _draw_box_exclusion_overlay(img, zones) success, result_png = cv2.imencode(".png", img) if not success: raise HTTPException(status_code=500, detail="Failed to encode overlay image") return Response(content=result_png.tobytes(), media_type="image/png") async def _get_words_overlay(session_id: str) -> Response: """Generate cropped (or dewarped) image with cell grid drawn on it.""" session = await get_session_db(session_id) if not session: raise HTTPException(status_code=404, detail=f"Session {session_id} not found") word_result = session.get("word_result") if not word_result: raise HTTPException(status_code=404, detail="No word data available") # Support both new cell-based and legacy entry-based formats cells = word_result.get("cells") if not cells and not word_result.get("entries"): raise HTTPException(status_code=404, detail="No word data available") # Load best available base image (cropped > dewarped > original) base_png = await _get_base_image_png(session_id) if not base_png: raise HTTPException(status_code=404, detail="No base image available") arr = np.frombuffer(base_png, dtype=np.uint8) img = cv2.imdecode(arr, cv2.IMREAD_COLOR) if img is None: raise HTTPException(status_code=500, detail="Failed to decode image") img_h, img_w = img.shape[:2] overlay = img.copy() if cells: # New cell-based overlay: color by column index col_palette = [ (255, 180, 0), # Blue (BGR) (0, 200, 0), # Green (0, 140, 255), # Orange (200, 100, 200), # Purple (200, 200, 0), # Cyan (100, 200, 200), # Yellow-ish ] for cell in cells: bbox = cell.get("bbox_px", {}) cx = bbox.get("x", 0) cy = bbox.get("y", 0) cw = bbox.get("w", 0) ch = bbox.get("h", 0) if cw <= 0 or ch <= 0: continue col_idx = cell.get("col_index", 0) color = col_palette[col_idx % len(col_palette)] # Cell rectangle border cv2.rectangle(img, (cx, cy), (cx + cw, cy + ch), color, 1) # Semi-transparent fill cv2.rectangle(overlay, (cx, cy), (cx + cw, cy + ch), color, -1) # Cell-ID label (top-left corner) cell_id = cell.get("cell_id", "") cv2.putText(img, cell_id, (cx + 2, cy + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.28, color, 1) # Text label (bottom of cell) text = cell.get("text", "") if text: conf = cell.get("confidence", 0) if conf >= 70: text_color = (0, 180, 0) elif conf >= 50: text_color = (0, 180, 220) else: text_color = (0, 0, 220) label = text.replace('\n', ' ')[:30] cv2.putText(img, label, (cx + 3, cy + ch - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.35, text_color, 1) else: # Legacy fallback: entry-based overlay (for old sessions) column_result = session.get("column_result") row_result = session.get("row_result") col_colors = { "column_en": (255, 180, 0), "column_de": (0, 200, 0), "column_example": (0, 140, 255), } columns = [] if column_result and column_result.get("columns"): columns = [c for c in column_result["columns"] if c.get("type", "").startswith("column_")] content_rows_data = [] if row_result and row_result.get("rows"): content_rows_data = [r for r in row_result["rows"] if r.get("row_type") == "content"] for col in columns: col_type = col.get("type", "") color = col_colors.get(col_type, (200, 200, 200)) cx, cw = col["x"], col["width"] for row in content_rows_data: ry, rh = row["y"], row["height"] cv2.rectangle(img, (cx, ry), (cx + cw, ry + rh), color, 1) cv2.rectangle(overlay, (cx, ry), (cx + cw, ry + rh), color, -1) entries = word_result["entries"] entry_by_row: Dict[int, Dict] = {} for entry in entries: entry_by_row[entry.get("row_index", -1)] = entry for row_idx, row in enumerate(content_rows_data): entry = entry_by_row.get(row_idx) if not entry: continue conf = entry.get("confidence", 0) text_color = (0, 180, 0) if conf >= 70 else (0, 180, 220) if conf >= 50 else (0, 0, 220) ry, rh = row["y"], row["height"] for col in columns: col_type = col.get("type", "") cx, cw = col["x"], col["width"] field = {"column_en": "english", "column_de": "german", "column_example": "example"}.get(col_type, "") text = entry.get(field, "") if field else "" if text: label = text.replace('\n', ' ')[:30] cv2.putText(img, label, (cx + 3, ry + rh - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.35, text_color, 1) # Blend overlay at 10% opacity cv2.addWeighted(overlay, 0.1, img, 0.9, 0, img) # Red semi-transparent overlay for box zones column_result = session.get("column_result") or {} zones = column_result.get("zones") or [] _draw_box_exclusion_overlay(img, zones) success, result_png = cv2.imencode(".png", img) if not success: raise HTTPException(status_code=500, detail="Failed to encode overlay image") return Response(content=result_png.tobytes(), media_type="image/png")