From f9d71d50d16251b63ec032d97e0a16cec7ffd15b Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 19 Mar 2026 09:08:30 +0100 Subject: [PATCH] Add exclude region marking in Structure step Users can now draw rectangles on the document image in the Structure Detection step to mark areas (e.g. header graphics, alphabet strips) that should be excluded from OCR results during grid building. - Backend: PUT/DELETE endpoints for exclude regions stored in structure_result - Backend: _build_grid_core() filters all words inside user-defined exclude regions - Frontend: Interactive rectangle drawing with visual overlay and delete buttons - Preserve exclude regions when re-running structure detection Co-Authored-By: Claude Opus 4.6 --- .../app/(admin)/ai/ocr-pipeline/types.ts | 9 + .../ocr-pipeline/StepStructureDetection.tsx | 307 +++++++++++++++++- klausur-service/backend/grid_editor_api.py | 35 +- .../backend/ocr_pipeline_geometry.py | 76 +++++ 4 files changed, 417 insertions(+), 10 deletions(-) diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts index ea0ba01..5a0ba7c 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts @@ -225,6 +225,14 @@ export interface StructureGraphic { confidence: number } +export interface ExcludeRegion { + x: number + y: number + w: number + h: number + label?: string +} + export interface StructureResult { image_width: number image_height: number @@ -232,6 +240,7 @@ export interface StructureResult { boxes: StructureBox[] zones: StructureZone[] graphics: StructureGraphic[] + exclude_regions?: ExcludeRegion[] color_pixel_counts: Record has_words: boolean word_count: number diff --git a/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx b/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx index 56d261d..88cef5a 100644 --- a/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx @@ -1,7 +1,7 @@ 'use client' -import { useEffect, useState } from 'react' -import type { StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types' +import { useCallback, useEffect, useRef, useState } from 'react' +import type { ExcludeRegion, StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types' const KLAUSUR_API = '/klausur-api' @@ -19,6 +19,77 @@ const COLOR_HEX: Record = { purple: '#9333ea', } +/** + * Convert a mouse event on the image container to image-pixel coordinates. + * The image uses object-contain inside an A4-ratio container, so we need + * to account for letterboxing. + */ +function mouseToImageCoords( + e: React.MouseEvent, + containerEl: HTMLElement, + imgWidth: number, + imgHeight: number, +): { x: number; y: number } | null { + const rect = containerEl.getBoundingClientRect() + const containerW = rect.width + const containerH = rect.height + + // object-contain: image is scaled to fit, centered + const scaleX = containerW / imgWidth + const scaleY = containerH / imgHeight + const scale = Math.min(scaleX, scaleY) + + const renderedW = imgWidth * scale + const renderedH = imgHeight * scale + const offsetX = (containerW - renderedW) / 2 + const offsetY = (containerH - renderedH) / 2 + + const relX = e.clientX - rect.left - offsetX + const relY = e.clientY - rect.top - offsetY + + if (relX < 0 || relY < 0 || relX > renderedW || relY > renderedH) { + return null + } + + return { + x: Math.round(relX / scale), + y: Math.round(relY / scale), + } +} + +/** + * Convert image-pixel coordinates to container-relative percentages + * for overlay positioning. + */ +function imageToOverlayPct( + region: { x: number; y: number; w: number; h: number }, + containerW: number, + containerH: number, + imgWidth: number, + imgHeight: number, +): { left: string; top: string; width: string; height: string } { + const scaleX = containerW / imgWidth + const scaleY = containerH / imgHeight + const scale = Math.min(scaleX, scaleY) + + const renderedW = imgWidth * scale + const renderedH = imgHeight * scale + const offsetX = (containerW - renderedW) / 2 + const offsetY = (containerH - renderedH) / 2 + + const left = offsetX + region.x * scale + const top = offsetY + region.y * scale + const width = region.w * scale + const height = region.h * scale + + return { + left: `${(left / containerW) * 100}%`, + top: `${(top / containerH) * 100}%`, + width: `${(width / containerW) * 100}%`, + height: `${(height / containerH) * 100}%`, + } +} + export function StepStructureDetection({ sessionId, onNext }: StepStructureDetectionProps) { const [result, setResult] = useState(null) const [detecting, setDetecting] = useState(false) @@ -26,6 +97,30 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec const [hasRun, setHasRun] = useState(false) const [overlayTs, setOverlayTs] = useState(0) + // Exclude region drawing state + const [excludeRegions, setExcludeRegions] = useState([]) + const [drawing, setDrawing] = useState(false) + const [drawStart, setDrawStart] = useState<{ x: number; y: number } | null>(null) + const [drawCurrent, setDrawCurrent] = useState<{ x: number; y: number } | null>(null) + const [saving, setSaving] = useState(false) + const [drawMode, setDrawMode] = useState(false) + + const containerRef = useRef(null) + const [containerSize, setContainerSize] = useState({ w: 0, h: 0 }) + + // Track container size for overlay positioning + useEffect(() => { + const el = containerRef.current + if (!el) return + const obs = new ResizeObserver((entries) => { + for (const entry of entries) { + setContainerSize({ w: entry.contentRect.width, h: entry.contentRect.height }) + } + }) + obs.observe(el) + return () => obs.disconnect() + }, []) + // Auto-trigger detection on mount useEffect(() => { if (!sessionId || hasRun) return @@ -36,7 +131,6 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec setError(null) try { - // Always re-run detection to pick up latest word_result from OCR step const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, { method: 'POST', }) @@ -47,6 +141,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec const data = await res.json() setResult(data) + setExcludeRegions(data.exclude_regions || []) setOverlayTs(Date.now()) } catch (e) { setError(e instanceof Error ? e.message : 'Unbekannter Fehler') @@ -69,6 +164,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec if (!res.ok) throw new Error('Erneute Erkennung fehlgeschlagen') const data = await res.json() setResult(data) + setExcludeRegions(data.exclude_regions || []) setOverlayTs(Date.now()) } catch (e) { setError(e instanceof Error ? e.message : 'Unbekannter Fehler') @@ -77,6 +173,84 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec } } + // Save exclude regions to backend + const saveExcludeRegions = useCallback(async (regions: ExcludeRegion[]) => { + if (!sessionId) return + setSaving(true) + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/exclude-regions`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ regions }), + }) + if (!res.ok) throw new Error('Speichern fehlgeschlagen') + } catch (e) { + setError(e instanceof Error ? e.message : 'Speichern fehlgeschlagen') + } finally { + setSaving(false) + } + }, [sessionId]) + + // Mouse handlers for drawing exclude rectangles + const handleMouseDown = useCallback((e: React.MouseEvent) => { + if (!drawMode || !containerRef.current || !result) return + const coords = mouseToImageCoords(e, containerRef.current, result.image_width, result.image_height) + if (coords) { + setDrawing(true) + setDrawStart(coords) + setDrawCurrent(coords) + } + }, [drawMode, result]) + + const handleMouseMove = useCallback((e: React.MouseEvent) => { + if (!drawing || !containerRef.current || !result) return + const coords = mouseToImageCoords(e, containerRef.current, result.image_width, result.image_height) + if (coords) { + setDrawCurrent(coords) + } + }, [drawing, result]) + + const handleMouseUp = useCallback(() => { + if (!drawing || !drawStart || !drawCurrent) { + setDrawing(false) + return + } + + const x = Math.min(drawStart.x, drawCurrent.x) + const y = Math.min(drawStart.y, drawCurrent.y) + const w = Math.abs(drawCurrent.x - drawStart.x) + const h = Math.abs(drawCurrent.y - drawStart.y) + + // Minimum size to avoid accidental clicks + if (w > 10 && h > 10) { + const newRegion: ExcludeRegion = { x, y, w, h, label: `Bereich ${excludeRegions.length + 1}` } + const updated = [...excludeRegions, newRegion] + setExcludeRegions(updated) + saveExcludeRegions(updated) + } + + setDrawing(false) + setDrawStart(null) + setDrawCurrent(null) + }, [drawing, drawStart, drawCurrent, excludeRegions, saveExcludeRegions]) + + const handleDeleteRegion = useCallback(async (index: number) => { + if (!sessionId) return + setSaving(true) + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/exclude-regions/${index}`, { + method: 'DELETE', + }) + if (!res.ok) throw new Error('Loeschen fehlgeschlagen') + const updated = excludeRegions.filter((_, i) => i !== index) + setExcludeRegions(updated) + } catch (e) { + setError(e instanceof Error ? e.message : 'Loeschen fehlgeschlagen') + } finally { + setSaving(false) + } + }, [sessionId, excludeRegions]) + if (!sessionId) { return
Keine Session ausgewaehlt.
} @@ -84,6 +258,16 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec const croppedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/structure-overlay${overlayTs ? `?t=${overlayTs}` : ''}` + // Current drag rectangle in image coords + const dragRect = drawing && drawStart && drawCurrent + ? { + x: Math.min(drawStart.x, drawCurrent.x), + y: Math.min(drawStart.y, drawCurrent.y), + w: Math.abs(drawCurrent.x - drawStart.x), + h: Math.abs(drawCurrent.y - drawStart.y), + } + : null + return (
{/* Loading indicator */} @@ -94,23 +278,97 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
)} + {/* Draw mode toggle */} + {result && ( +
+ + {drawMode && ( + + Rechteck auf dem Bild zeichnen um Bereiche von der OCR-Erkennung auszuschliessen + + )} + {saving && ( + Speichern... + )} +
+ )} + {/* Two-column image comparison */}
- {/* Left: Original document */} + {/* Left: Original document with exclude region drawing */}
- Original + Original {excludeRegions.length > 0 && `(${excludeRegions.length} Ausschlussbereich${excludeRegions.length !== 1 ? 'e' : ''})`}
-
+
{ + if (drawing) { + handleMouseUp() + } + }} + > {/* eslint-disable-next-line @next/next/no-img-element */} Originaldokument { (e.target as HTMLImageElement).style.display = 'none' }} /> + + {/* Saved exclude regions overlay */} + {result && containerSize.w > 0 && excludeRegions.map((region, i) => { + const pos = imageToOverlayPct(region, containerSize.w, containerSize.h, result.image_width, result.image_height) + return ( +
+
+ + {region.label || `Bereich ${i + 1}`} + + +
+
+ ) + })} + + {/* Current drag rectangle */} + {dragRect && result && containerSize.w > 0 && (() => { + const pos = imageToOverlayPct(dragRect, containerSize.w, containerSize.h, result.image_width, result.image_height) + return ( +
+ ) + })()}
@@ -133,6 +391,34 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
+ {/* Exclude regions list */} + {excludeRegions.length > 0 && ( +
+

+ Ausschlussbereiche ({excludeRegions.length}) — Woerter in diesen Bereichen werden nicht erkannt +

+
+ {excludeRegions.map((region, i) => ( +
+ + + {region.label || `Bereich ${i + 1}`} + + + {region.w}x{region.h}px @ ({region.x}, {region.y}) + + +
+ ))} +
+
+ )} + {/* Result info */} {result && (
@@ -154,6 +440,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec {result.word_count} Woerter )} + {excludeRegions.length > 0 && ( + + {excludeRegions.length} Ausschluss + + )} {(result.border_ghosts_removed ?? 0) > 0 && ( {result.border_ghosts_removed} Rahmenlinien entfernt @@ -244,7 +535,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec className="inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-300 border border-purple-200 dark:border-purple-800" > {shape === 'arrow' ? '→' : shape === 'circle' ? '●' : shape === 'line' ? '─' : shape === 'exclamation' ? '❗' : shape === 'dot' ? '•' : shape === 'illustration' ? '🖼' : '◆'} - {' '}{shape} ×{count} + {' '}{shape} x{count} ))}
diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index aba9c91..06a85e8 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -792,13 +792,44 @@ async def _build_grid_core(session_id: str, session: dict) -> dict: # page number ("64", "S. 12") and not real content. _filter_footer_words(all_words, img_h, logger, session_id) - # 2d. Filter words inside detected graphic/image regions + # 2d. Filter words inside user-defined exclude regions (from Structure step). + # These are explicitly marked by the user, so ALL words inside are removed + # regardless of confidence. + structure_result = session.get("structure_result") + exclude_rects = [] + if structure_result: + for er in structure_result.get("exclude_regions", []): + exclude_rects.append({ + "x": er["x"], "y": er["y"], + "w": er["w"], "h": er["h"], + }) + if exclude_rects: + before = len(all_words) + filtered = [] + for w in all_words: + w_cx = w["left"] + w.get("width", 0) / 2 + w_cy = w["top"] + w.get("height", 0) / 2 + inside = any( + er["x"] <= w_cx <= er["x"] + er["w"] + and er["y"] <= w_cy <= er["y"] + er["h"] + for er in exclude_rects + ) + if not inside: + filtered.append(w) + removed = before - len(filtered) + if removed: + all_words = filtered + logger.info( + "build-grid session %s: removed %d words inside %d user exclude region(s)", + session_id, removed, len(exclude_rects), + ) + + # 2e. Filter words inside detected graphic/image regions # Only remove LOW-CONFIDENCE words (likely OCR artifacts from images). # High-confidence words are real text even if they overlap a detected # graphic region (e.g. colored text that graphic detection couldn't # fully distinguish from an image). _GRAPHIC_CONF_THRESHOLD = 50 # keep words with conf >= 50 - structure_result = session.get("structure_result") graphic_rects = [] if structure_result: for g in structure_result.get("graphics", []): diff --git a/klausur-service/backend/ocr_pipeline_geometry.py b/klausur-service/backend/ocr_pipeline_geometry.py index 6e81936..1c48adc 100644 --- a/klausur-service/backend/ocr_pipeline_geometry.py +++ b/klausur-service/backend/ocr_pipeline_geometry.py @@ -22,6 +22,7 @@ from typing import Any, Dict, List, Optional import cv2 import numpy as np from fastapi import APIRouter, HTTPException, Query +from pydantic import BaseModel from cv_vocab_pipeline import ( _apply_shear, @@ -712,6 +713,10 @@ async def detect_structure(session_id: str): duration = time.time() - t0 + # Preserve user-drawn exclude regions from previous run + prev_sr = cached.get("structure_result") or {} + prev_exclude = prev_sr.get("exclude_regions", []) + result_dict = { "image_width": w, "image_height": h, @@ -749,6 +754,7 @@ async def detect_structure(session_id: str): } for g in graphics ], + "exclude_regions": prev_exclude, "color_pixel_counts": color_summary, "has_words": len(words) > 0, "word_count": len(words), @@ -766,6 +772,76 @@ async def detect_structure(session_id: str): return {"session_id": session_id, **result_dict} +# --------------------------------------------------------------------------- +# Exclude Regions — user-drawn rectangles to exclude from OCR results +# --------------------------------------------------------------------------- + +class _ExcludeRegionIn(BaseModel): + x: int + y: int + w: int + h: int + label: str = "" + + +class _ExcludeRegionsBatchIn(BaseModel): + regions: list[_ExcludeRegionIn] + + +@router.put("/sessions/{session_id}/exclude-regions") +async def set_exclude_regions(session_id: str, body: _ExcludeRegionsBatchIn): + """Replace all exclude regions for a session. + + Regions are stored inside ``structure_result.exclude_regions``. + """ + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail="Session not found") + + sr = session.get("structure_result") or {} + sr["exclude_regions"] = [r.model_dump() for r in body.regions] + + await update_session_db(session_id, structure_result=sr) + + # Update cache + if session_id in _cache: + _cache[session_id]["structure_result"] = sr + + return { + "session_id": session_id, + "exclude_regions": sr["exclude_regions"], + "count": len(sr["exclude_regions"]), + } + + +@router.delete("/sessions/{session_id}/exclude-regions/{region_index}") +async def delete_exclude_region(session_id: str, region_index: int): + """Remove a single exclude region by index.""" + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail="Session not found") + + sr = session.get("structure_result") or {} + regions = sr.get("exclude_regions", []) + + if region_index < 0 or region_index >= len(regions): + raise HTTPException(status_code=404, detail="Region index out of range") + + removed = regions.pop(region_index) + sr["exclude_regions"] = regions + + await update_session_db(session_id, structure_result=sr) + + if session_id in _cache: + _cache[session_id]["structure_result"] = sr + + return { + "session_id": session_id, + "removed": removed, + "remaining": len(regions), + } + + # --------------------------------------------------------------------------- # Column Detection Endpoints (Step 3) # ---------------------------------------------------------------------------