klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
334 lines
13 KiB
Python
334 lines
13 KiB
Python
"""
|
|
Overlay rendering for columns, rows, and words (grid-based overlays).
|
|
|
|
Extracted from ocr_pipeline_overlays.py for modularity.
|
|
|
|
Lizenz: Apache 2.0
|
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any, Dict, List
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from fastapi import HTTPException
|
|
from fastapi.responses import Response
|
|
|
|
from ocr_pipeline_common import _get_base_image_png
|
|
from ocr_pipeline_session_store import get_session_db
|
|
from ocr_pipeline_rows import _draw_box_exclusion_overlay
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def _get_columns_overlay(session_id: str) -> Response:
|
|
"""Generate cropped (or dewarped) image with column borders drawn on it."""
|
|
session = await get_session_db(session_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
column_result = session.get("column_result")
|
|
if not column_result or not column_result.get("columns"):
|
|
raise HTTPException(status_code=404, detail="No column data available")
|
|
|
|
# Load best available base image (cropped > dewarped > original)
|
|
base_png = await _get_base_image_png(session_id)
|
|
if not base_png:
|
|
raise HTTPException(status_code=404, detail="No base image available")
|
|
|
|
arr = np.frombuffer(base_png, dtype=np.uint8)
|
|
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
|
if img is None:
|
|
raise HTTPException(status_code=500, detail="Failed to decode image")
|
|
|
|
# Color map for region types (BGR)
|
|
colors = {
|
|
"column_en": (255, 180, 0), # Blue
|
|
"column_de": (0, 200, 0), # Green
|
|
"column_example": (0, 140, 255), # Orange
|
|
"column_text": (200, 200, 0), # Cyan/Turquoise
|
|
"page_ref": (200, 0, 200), # Purple
|
|
"column_marker": (0, 0, 220), # Red
|
|
"column_ignore": (180, 180, 180), # Light Gray
|
|
"header": (128, 128, 128), # Gray
|
|
"footer": (128, 128, 128), # Gray
|
|
"margin_top": (100, 100, 100), # Dark Gray
|
|
"margin_bottom": (100, 100, 100), # Dark Gray
|
|
}
|
|
|
|
overlay = img.copy()
|
|
for col in column_result["columns"]:
|
|
x, y = col["x"], col["y"]
|
|
w, h = col["width"], col["height"]
|
|
color = colors.get(col.get("type", ""), (200, 200, 200))
|
|
|
|
# Semi-transparent fill
|
|
cv2.rectangle(overlay, (x, y), (x + w, y + h), color, -1)
|
|
|
|
# Solid border
|
|
cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
|
|
|
|
# Label with confidence
|
|
label = col.get("type", "unknown").replace("column_", "").upper()
|
|
conf = col.get("classification_confidence")
|
|
if conf is not None and conf < 1.0:
|
|
label = f"{label} {int(conf * 100)}%"
|
|
cv2.putText(img, label, (x + 10, y + 30),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
|
|
|
|
# Blend overlay at 20% opacity
|
|
cv2.addWeighted(overlay, 0.2, img, 0.8, 0, img)
|
|
|
|
# Draw detected box boundaries as dashed rectangles
|
|
zones = column_result.get("zones") or []
|
|
for zone in zones:
|
|
if zone.get("zone_type") == "box" and zone.get("box"):
|
|
box = zone["box"]
|
|
bx, by = box["x"], box["y"]
|
|
bw, bh = box["width"], box["height"]
|
|
box_color = (0, 200, 255) # Yellow (BGR)
|
|
# Draw dashed rectangle by drawing short line segments
|
|
dash_len = 15
|
|
for edge_x in range(bx, bx + bw, dash_len * 2):
|
|
end_x = min(edge_x + dash_len, bx + bw)
|
|
cv2.line(img, (edge_x, by), (end_x, by), box_color, 2)
|
|
cv2.line(img, (edge_x, by + bh), (end_x, by + bh), box_color, 2)
|
|
for edge_y in range(by, by + bh, dash_len * 2):
|
|
end_y = min(edge_y + dash_len, by + bh)
|
|
cv2.line(img, (bx, edge_y), (bx, end_y), box_color, 2)
|
|
cv2.line(img, (bx + bw, edge_y), (bx + bw, end_y), box_color, 2)
|
|
cv2.putText(img, "BOX", (bx + 10, by + bh - 10),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.7, box_color, 2)
|
|
|
|
# Red semi-transparent overlay for box zones
|
|
_draw_box_exclusion_overlay(img, zones)
|
|
|
|
success, result_png = cv2.imencode(".png", img)
|
|
if not success:
|
|
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
|
|
|
return Response(content=result_png.tobytes(), media_type="image/png")
|
|
|
|
|
|
async def _get_rows_overlay(session_id: str) -> Response:
|
|
"""Generate cropped (or dewarped) image with row bands drawn on it."""
|
|
session = await get_session_db(session_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
row_result = session.get("row_result")
|
|
if not row_result or not row_result.get("rows"):
|
|
raise HTTPException(status_code=404, detail="No row data available")
|
|
|
|
# Load best available base image (cropped > dewarped > original)
|
|
base_png = await _get_base_image_png(session_id)
|
|
if not base_png:
|
|
raise HTTPException(status_code=404, detail="No base image available")
|
|
|
|
arr = np.frombuffer(base_png, dtype=np.uint8)
|
|
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
|
if img is None:
|
|
raise HTTPException(status_code=500, detail="Failed to decode image")
|
|
|
|
# Color map for row types (BGR)
|
|
row_colors = {
|
|
"content": (255, 180, 0), # Blue
|
|
"header": (128, 128, 128), # Gray
|
|
"footer": (128, 128, 128), # Gray
|
|
"margin_top": (100, 100, 100), # Dark Gray
|
|
"margin_bottom": (100, 100, 100), # Dark Gray
|
|
}
|
|
|
|
overlay = img.copy()
|
|
for row in row_result["rows"]:
|
|
x, y = row["x"], row["y"]
|
|
w, h = row["width"], row["height"]
|
|
row_type = row.get("row_type", "content")
|
|
color = row_colors.get(row_type, (200, 200, 200))
|
|
|
|
# Semi-transparent fill
|
|
cv2.rectangle(overlay, (x, y), (x + w, y + h), color, -1)
|
|
|
|
# Solid border
|
|
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
|
|
|
|
# Label
|
|
idx = row.get("index", 0)
|
|
label = f"R{idx} {row_type.upper()}"
|
|
wc = row.get("word_count", 0)
|
|
if wc:
|
|
label = f"{label} ({wc}w)"
|
|
cv2.putText(img, label, (x + 5, y + 18),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
|
|
|
# Blend overlay at 15% opacity
|
|
cv2.addWeighted(overlay, 0.15, img, 0.85, 0, img)
|
|
|
|
# Draw zone separator lines if zones exist
|
|
column_result = session.get("column_result") or {}
|
|
zones = column_result.get("zones") or []
|
|
if zones:
|
|
img_w_px = img.shape[1]
|
|
zone_color = (0, 200, 255) # Yellow (BGR)
|
|
dash_len = 20
|
|
for zone in zones:
|
|
if zone.get("zone_type") == "box":
|
|
zy = zone["y"]
|
|
zh = zone["height"]
|
|
for line_y in [zy, zy + zh]:
|
|
for sx in range(0, img_w_px, dash_len * 2):
|
|
ex = min(sx + dash_len, img_w_px)
|
|
cv2.line(img, (sx, line_y), (ex, line_y), zone_color, 2)
|
|
|
|
# Red semi-transparent overlay for box zones
|
|
_draw_box_exclusion_overlay(img, zones)
|
|
|
|
success, result_png = cv2.imencode(".png", img)
|
|
if not success:
|
|
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
|
|
|
return Response(content=result_png.tobytes(), media_type="image/png")
|
|
|
|
|
|
async def _get_words_overlay(session_id: str) -> Response:
|
|
"""Generate cropped (or dewarped) image with cell grid drawn on it."""
|
|
session = await get_session_db(session_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
word_result = session.get("word_result")
|
|
if not word_result:
|
|
raise HTTPException(status_code=404, detail="No word data available")
|
|
|
|
# Support both new cell-based and legacy entry-based formats
|
|
cells = word_result.get("cells")
|
|
if not cells and not word_result.get("entries"):
|
|
raise HTTPException(status_code=404, detail="No word data available")
|
|
|
|
# Load best available base image (cropped > dewarped > original)
|
|
base_png = await _get_base_image_png(session_id)
|
|
if not base_png:
|
|
raise HTTPException(status_code=404, detail="No base image available")
|
|
|
|
arr = np.frombuffer(base_png, dtype=np.uint8)
|
|
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
|
if img is None:
|
|
raise HTTPException(status_code=500, detail="Failed to decode image")
|
|
|
|
img_h, img_w = img.shape[:2]
|
|
|
|
overlay = img.copy()
|
|
|
|
if cells:
|
|
# New cell-based overlay: color by column index
|
|
col_palette = [
|
|
(255, 180, 0), # Blue (BGR)
|
|
(0, 200, 0), # Green
|
|
(0, 140, 255), # Orange
|
|
(200, 100, 200), # Purple
|
|
(200, 200, 0), # Cyan
|
|
(100, 200, 200), # Yellow-ish
|
|
]
|
|
|
|
for cell in cells:
|
|
bbox = cell.get("bbox_px", {})
|
|
cx = bbox.get("x", 0)
|
|
cy = bbox.get("y", 0)
|
|
cw = bbox.get("w", 0)
|
|
ch = bbox.get("h", 0)
|
|
if cw <= 0 or ch <= 0:
|
|
continue
|
|
|
|
col_idx = cell.get("col_index", 0)
|
|
color = col_palette[col_idx % len(col_palette)]
|
|
|
|
# Cell rectangle border
|
|
cv2.rectangle(img, (cx, cy), (cx + cw, cy + ch), color, 1)
|
|
# Semi-transparent fill
|
|
cv2.rectangle(overlay, (cx, cy), (cx + cw, cy + ch), color, -1)
|
|
|
|
# Cell-ID label (top-left corner)
|
|
cell_id = cell.get("cell_id", "")
|
|
cv2.putText(img, cell_id, (cx + 2, cy + 10),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.28, color, 1)
|
|
|
|
# Text label (bottom of cell)
|
|
text = cell.get("text", "")
|
|
if text:
|
|
conf = cell.get("confidence", 0)
|
|
if conf >= 70:
|
|
text_color = (0, 180, 0)
|
|
elif conf >= 50:
|
|
text_color = (0, 180, 220)
|
|
else:
|
|
text_color = (0, 0, 220)
|
|
|
|
label = text.replace('\n', ' ')[:30]
|
|
cv2.putText(img, label, (cx + 3, cy + ch - 4),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.35, text_color, 1)
|
|
else:
|
|
# Legacy fallback: entry-based overlay (for old sessions)
|
|
column_result = session.get("column_result")
|
|
row_result = session.get("row_result")
|
|
col_colors = {
|
|
"column_en": (255, 180, 0),
|
|
"column_de": (0, 200, 0),
|
|
"column_example": (0, 140, 255),
|
|
}
|
|
|
|
columns = []
|
|
if column_result and column_result.get("columns"):
|
|
columns = [c for c in column_result["columns"]
|
|
if c.get("type", "").startswith("column_")]
|
|
|
|
content_rows_data = []
|
|
if row_result and row_result.get("rows"):
|
|
content_rows_data = [r for r in row_result["rows"]
|
|
if r.get("row_type") == "content"]
|
|
|
|
for col in columns:
|
|
col_type = col.get("type", "")
|
|
color = col_colors.get(col_type, (200, 200, 200))
|
|
cx, cw = col["x"], col["width"]
|
|
for row in content_rows_data:
|
|
ry, rh = row["y"], row["height"]
|
|
cv2.rectangle(img, (cx, ry), (cx + cw, ry + rh), color, 1)
|
|
cv2.rectangle(overlay, (cx, ry), (cx + cw, ry + rh), color, -1)
|
|
|
|
entries = word_result["entries"]
|
|
entry_by_row: Dict[int, Dict] = {}
|
|
for entry in entries:
|
|
entry_by_row[entry.get("row_index", -1)] = entry
|
|
|
|
for row_idx, row in enumerate(content_rows_data):
|
|
entry = entry_by_row.get(row_idx)
|
|
if not entry:
|
|
continue
|
|
conf = entry.get("confidence", 0)
|
|
text_color = (0, 180, 0) if conf >= 70 else (0, 180, 220) if conf >= 50 else (0, 0, 220)
|
|
ry, rh = row["y"], row["height"]
|
|
for col in columns:
|
|
col_type = col.get("type", "")
|
|
cx, cw = col["x"], col["width"]
|
|
field = {"column_en": "english", "column_de": "german", "column_example": "example"}.get(col_type, "")
|
|
text = entry.get(field, "") if field else ""
|
|
if text:
|
|
label = text.replace('\n', ' ')[:30]
|
|
cv2.putText(img, label, (cx + 3, ry + rh - 4),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.35, text_color, 1)
|
|
|
|
# Blend overlay at 10% opacity
|
|
cv2.addWeighted(overlay, 0.1, img, 0.9, 0, img)
|
|
|
|
# Red semi-transparent overlay for box zones
|
|
column_result = session.get("column_result") or {}
|
|
zones = column_result.get("zones") or []
|
|
_draw_box_exclusion_overlay(img, zones)
|
|
|
|
success, result_png = cv2.imencode(".png", img)
|
|
if not success:
|
|
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
|
|
|
return Response(content=result_png.tobytes(), media_type="image/png")
|