klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
206 lines
7.4 KiB
Python
206 lines
7.4 KiB
Python
"""
|
|
Overlay rendering for structure detection (boxes, zones, colors, graphics).
|
|
|
|
Extracted from ocr_pipeline_overlays.py for modularity.
|
|
|
|
Lizenz: Apache 2.0
|
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any, Dict, List
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from fastapi import HTTPException
|
|
from fastapi.responses import Response
|
|
|
|
from ocr_pipeline_common import _get_base_image_png
|
|
from ocr_pipeline_session_store import get_session_db
|
|
from cv_color_detect import _COLOR_HEX, _COLOR_RANGES
|
|
from cv_box_detect import detect_boxes, split_page_into_zones
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def _get_structure_overlay(session_id: str) -> Response:
|
|
"""Generate overlay image showing detected boxes, zones, and color regions."""
|
|
base_png = await _get_base_image_png(session_id)
|
|
if not base_png:
|
|
raise HTTPException(status_code=404, detail="No base image available")
|
|
|
|
arr = np.frombuffer(base_png, dtype=np.uint8)
|
|
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
|
if img is None:
|
|
raise HTTPException(status_code=500, detail="Failed to decode image")
|
|
|
|
h, w = img.shape[:2]
|
|
|
|
# Get structure result (run detection if not cached)
|
|
session = await get_session_db(session_id)
|
|
structure = (session or {}).get("structure_result")
|
|
|
|
if not structure:
|
|
# Run detection on-the-fly
|
|
margin = int(min(w, h) * 0.03)
|
|
content_x, content_y = margin, margin
|
|
content_w_px = w - 2 * margin
|
|
content_h_px = h - 2 * margin
|
|
boxes = detect_boxes(img, content_x, content_w_px, content_y, content_h_px)
|
|
zones = split_page_into_zones(content_x, content_y, content_w_px, content_h_px, boxes)
|
|
structure = {
|
|
"boxes": [
|
|
{"x": b.x, "y": b.y, "w": b.width, "h": b.height,
|
|
"confidence": b.confidence, "border_thickness": b.border_thickness}
|
|
for b in boxes
|
|
],
|
|
"zones": [
|
|
{"index": z.index, "zone_type": z.zone_type,
|
|
"y": z.y, "h": z.height, "x": z.x, "w": z.width}
|
|
for z in zones
|
|
],
|
|
}
|
|
|
|
overlay = img.copy()
|
|
|
|
# --- Draw zone boundaries ---
|
|
zone_colors = {
|
|
"content": (200, 200, 200), # light gray
|
|
"box": (255, 180, 0), # blue-ish (BGR)
|
|
}
|
|
for zone in structure.get("zones", []):
|
|
zx = zone["x"]
|
|
zy = zone["y"]
|
|
zw = zone["w"]
|
|
zh = zone["h"]
|
|
color = zone_colors.get(zone["zone_type"], (200, 200, 200))
|
|
|
|
# Draw zone boundary as dashed line
|
|
dash_len = 12
|
|
for edge_x in range(zx, zx + zw, dash_len * 2):
|
|
end_x = min(edge_x + dash_len, zx + zw)
|
|
cv2.line(img, (edge_x, zy), (end_x, zy), color, 1)
|
|
cv2.line(img, (edge_x, zy + zh), (end_x, zy + zh), color, 1)
|
|
|
|
# Zone label
|
|
zone_label = f"Zone {zone['index']} ({zone['zone_type']})"
|
|
cv2.putText(img, zone_label, (zx + 5, zy + 15),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1)
|
|
|
|
# --- Draw detected boxes ---
|
|
# Color map for box backgrounds (BGR)
|
|
bg_hex_to_bgr = {
|
|
"#dc2626": (38, 38, 220), # red
|
|
"#2563eb": (235, 99, 37), # blue
|
|
"#16a34a": (74, 163, 22), # green
|
|
"#ea580c": (12, 88, 234), # orange
|
|
"#9333ea": (234, 51, 147), # purple
|
|
"#ca8a04": (4, 138, 202), # yellow
|
|
"#6b7280": (128, 114, 107), # gray
|
|
}
|
|
|
|
for box_data in structure.get("boxes", []):
|
|
bx = box_data["x"]
|
|
by = box_data["y"]
|
|
bw = box_data["w"]
|
|
bh = box_data["h"]
|
|
conf = box_data.get("confidence", 0)
|
|
thickness = box_data.get("border_thickness", 0)
|
|
bg_hex = box_data.get("bg_color_hex", "#6b7280")
|
|
bg_name = box_data.get("bg_color_name", "")
|
|
|
|
# Box fill color
|
|
fill_bgr = bg_hex_to_bgr.get(bg_hex, (128, 114, 107))
|
|
|
|
# Semi-transparent fill
|
|
cv2.rectangle(overlay, (bx, by), (bx + bw, by + bh), fill_bgr, -1)
|
|
|
|
# Solid border
|
|
border_color = fill_bgr
|
|
cv2.rectangle(img, (bx, by), (bx + bw, by + bh), border_color, 3)
|
|
|
|
# Label
|
|
label = f"BOX"
|
|
if bg_name and bg_name not in ("unknown", "white"):
|
|
label += f" ({bg_name})"
|
|
if thickness > 0:
|
|
label += f" border={thickness}px"
|
|
label += f" {int(conf * 100)}%"
|
|
cv2.putText(img, label, (bx + 8, by + 22),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)
|
|
cv2.putText(img, label, (bx + 8, by + 22),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.55, border_color, 1)
|
|
|
|
# Blend overlay at 15% opacity
|
|
cv2.addWeighted(overlay, 0.15, img, 0.85, 0, img)
|
|
|
|
# --- Draw color regions (HSV masks) ---
|
|
hsv = cv2.cvtColor(
|
|
cv2.imdecode(np.frombuffer(base_png, dtype=np.uint8), cv2.IMREAD_COLOR),
|
|
cv2.COLOR_BGR2HSV,
|
|
)
|
|
color_bgr_map = {
|
|
"red": (0, 0, 255),
|
|
"orange": (0, 140, 255),
|
|
"yellow": (0, 200, 255),
|
|
"green": (0, 200, 0),
|
|
"blue": (255, 150, 0),
|
|
"purple": (200, 0, 200),
|
|
}
|
|
for color_name, ranges in _COLOR_RANGES.items():
|
|
mask = np.zeros((h, w), dtype=np.uint8)
|
|
for lower, upper in ranges:
|
|
mask = cv2.bitwise_or(mask, cv2.inRange(hsv, lower, upper))
|
|
# Only draw if there are significant colored pixels
|
|
if np.sum(mask > 0) < 100:
|
|
continue
|
|
# Draw colored contours
|
|
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
draw_color = color_bgr_map.get(color_name, (200, 200, 200))
|
|
for cnt in contours:
|
|
area = cv2.contourArea(cnt)
|
|
if area < 20:
|
|
continue
|
|
cv2.drawContours(img, [cnt], -1, draw_color, 2)
|
|
|
|
# --- Draw graphic elements ---
|
|
graphics_data = structure.get("graphics", [])
|
|
shape_icons = {
|
|
"image": "IMAGE",
|
|
"illustration": "ILLUST",
|
|
}
|
|
for gfx in graphics_data:
|
|
gx, gy = gfx["x"], gfx["y"]
|
|
gw, gh = gfx["w"], gfx["h"]
|
|
shape = gfx.get("shape", "icon")
|
|
color_hex = gfx.get("color_hex", "#6b7280")
|
|
conf = gfx.get("confidence", 0)
|
|
|
|
# Pick draw color based on element color (BGR)
|
|
gfx_bgr = bg_hex_to_bgr.get(color_hex, (128, 114, 107))
|
|
|
|
# Draw bounding box (dashed style via short segments)
|
|
dash = 6
|
|
for seg_x in range(gx, gx + gw, dash * 2):
|
|
end_x = min(seg_x + dash, gx + gw)
|
|
cv2.line(img, (seg_x, gy), (end_x, gy), gfx_bgr, 2)
|
|
cv2.line(img, (seg_x, gy + gh), (end_x, gy + gh), gfx_bgr, 2)
|
|
for seg_y in range(gy, gy + gh, dash * 2):
|
|
end_y = min(seg_y + dash, gy + gh)
|
|
cv2.line(img, (gx, seg_y), (gx, end_y), gfx_bgr, 2)
|
|
cv2.line(img, (gx + gw, seg_y), (gx + gw, end_y), gfx_bgr, 2)
|
|
|
|
# Label
|
|
icon = shape_icons.get(shape, shape.upper()[:5])
|
|
label = f"{icon} {int(conf * 100)}%"
|
|
# White background for readability
|
|
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
|
|
lx = gx + 2
|
|
ly = max(gy - 4, th + 4)
|
|
cv2.rectangle(img, (lx - 1, ly - th - 2), (lx + tw + 2, ly + 3), (255, 255, 255), -1)
|
|
cv2.putText(img, label, (lx, ly), cv2.FONT_HERSHEY_SIMPLEX, 0.4, gfx_bgr, 1)
|
|
|
|
# Encode result
|
|
_, png_buf = cv2.imencode(".png", img)
|
|
return Response(content=png_buf.tobytes(), media_type="image/png")
|