diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts index ea0ba01..5a0ba7c 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts @@ -225,6 +225,14 @@ export interface StructureGraphic { confidence: number } +export interface ExcludeRegion { + x: number + y: number + w: number + h: number + label?: string +} + export interface StructureResult { image_width: number image_height: number @@ -232,6 +240,7 @@ export interface StructureResult { boxes: StructureBox[] zones: StructureZone[] graphics: StructureGraphic[] + exclude_regions?: ExcludeRegion[] color_pixel_counts: Record has_words: boolean word_count: number diff --git a/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx b/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx index 56d261d..88cef5a 100644 --- a/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx @@ -1,7 +1,7 @@ 'use client' -import { useEffect, useState } from 'react' -import type { StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types' +import { useCallback, useEffect, useRef, useState } from 'react' +import type { ExcludeRegion, StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types' const KLAUSUR_API = '/klausur-api' @@ -19,6 +19,77 @@ const COLOR_HEX: Record = { purple: '#9333ea', } +/** + * Convert a mouse event on the image container to image-pixel coordinates. + * The image uses object-contain inside an A4-ratio container, so we need + * to account for letterboxing. + */ +function mouseToImageCoords( + e: React.MouseEvent, + containerEl: HTMLElement, + imgWidth: number, + imgHeight: number, +): { x: number; y: number } | null { + const rect = containerEl.getBoundingClientRect() + const containerW = rect.width + const containerH = rect.height + + // object-contain: image is scaled to fit, centered + const scaleX = containerW / imgWidth + const scaleY = containerH / imgHeight + const scale = Math.min(scaleX, scaleY) + + const renderedW = imgWidth * scale + const renderedH = imgHeight * scale + const offsetX = (containerW - renderedW) / 2 + const offsetY = (containerH - renderedH) / 2 + + const relX = e.clientX - rect.left - offsetX + const relY = e.clientY - rect.top - offsetY + + if (relX < 0 || relY < 0 || relX > renderedW || relY > renderedH) { + return null + } + + return { + x: Math.round(relX / scale), + y: Math.round(relY / scale), + } +} + +/** + * Convert image-pixel coordinates to container-relative percentages + * for overlay positioning. + */ +function imageToOverlayPct( + region: { x: number; y: number; w: number; h: number }, + containerW: number, + containerH: number, + imgWidth: number, + imgHeight: number, +): { left: string; top: string; width: string; height: string } { + const scaleX = containerW / imgWidth + const scaleY = containerH / imgHeight + const scale = Math.min(scaleX, scaleY) + + const renderedW = imgWidth * scale + const renderedH = imgHeight * scale + const offsetX = (containerW - renderedW) / 2 + const offsetY = (containerH - renderedH) / 2 + + const left = offsetX + region.x * scale + const top = offsetY + region.y * scale + const width = region.w * scale + const height = region.h * scale + + return { + left: `${(left / containerW) * 100}%`, + top: `${(top / containerH) * 100}%`, + width: `${(width / containerW) * 100}%`, + height: `${(height / containerH) * 100}%`, + } +} + export function StepStructureDetection({ sessionId, onNext }: StepStructureDetectionProps) { const [result, setResult] = useState(null) const [detecting, setDetecting] = useState(false) @@ -26,6 +97,30 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec const [hasRun, setHasRun] = useState(false) const [overlayTs, setOverlayTs] = useState(0) + // Exclude region drawing state + const [excludeRegions, setExcludeRegions] = useState([]) + const [drawing, setDrawing] = useState(false) + const [drawStart, setDrawStart] = useState<{ x: number; y: number } | null>(null) + const [drawCurrent, setDrawCurrent] = useState<{ x: number; y: number } | null>(null) + const [saving, setSaving] = useState(false) + const [drawMode, setDrawMode] = useState(false) + + const containerRef = useRef(null) + const [containerSize, setContainerSize] = useState({ w: 0, h: 0 }) + + // Track container size for overlay positioning + useEffect(() => { + const el = containerRef.current + if (!el) return + const obs = new ResizeObserver((entries) => { + for (const entry of entries) { + setContainerSize({ w: entry.contentRect.width, h: entry.contentRect.height }) + } + }) + obs.observe(el) + return () => obs.disconnect() + }, []) + // Auto-trigger detection on mount useEffect(() => { if (!sessionId || hasRun) return @@ -36,7 +131,6 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec setError(null) try { - // Always re-run detection to pick up latest word_result from OCR step const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, { method: 'POST', }) @@ -47,6 +141,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec const data = await res.json() setResult(data) + setExcludeRegions(data.exclude_regions || []) setOverlayTs(Date.now()) } catch (e) { setError(e instanceof Error ? e.message : 'Unbekannter Fehler') @@ -69,6 +164,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec if (!res.ok) throw new Error('Erneute Erkennung fehlgeschlagen') const data = await res.json() setResult(data) + setExcludeRegions(data.exclude_regions || []) setOverlayTs(Date.now()) } catch (e) { setError(e instanceof Error ? e.message : 'Unbekannter Fehler') @@ -77,6 +173,84 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec } } + // Save exclude regions to backend + const saveExcludeRegions = useCallback(async (regions: ExcludeRegion[]) => { + if (!sessionId) return + setSaving(true) + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/exclude-regions`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ regions }), + }) + if (!res.ok) throw new Error('Speichern fehlgeschlagen') + } catch (e) { + setError(e instanceof Error ? e.message : 'Speichern fehlgeschlagen') + } finally { + setSaving(false) + } + }, [sessionId]) + + // Mouse handlers for drawing exclude rectangles + const handleMouseDown = useCallback((e: React.MouseEvent) => { + if (!drawMode || !containerRef.current || !result) return + const coords = mouseToImageCoords(e, containerRef.current, result.image_width, result.image_height) + if (coords) { + setDrawing(true) + setDrawStart(coords) + setDrawCurrent(coords) + } + }, [drawMode, result]) + + const handleMouseMove = useCallback((e: React.MouseEvent) => { + if (!drawing || !containerRef.current || !result) return + const coords = mouseToImageCoords(e, containerRef.current, result.image_width, result.image_height) + if (coords) { + setDrawCurrent(coords) + } + }, [drawing, result]) + + const handleMouseUp = useCallback(() => { + if (!drawing || !drawStart || !drawCurrent) { + setDrawing(false) + return + } + + const x = Math.min(drawStart.x, drawCurrent.x) + const y = Math.min(drawStart.y, drawCurrent.y) + const w = Math.abs(drawCurrent.x - drawStart.x) + const h = Math.abs(drawCurrent.y - drawStart.y) + + // Minimum size to avoid accidental clicks + if (w > 10 && h > 10) { + const newRegion: ExcludeRegion = { x, y, w, h, label: `Bereich ${excludeRegions.length + 1}` } + const updated = [...excludeRegions, newRegion] + setExcludeRegions(updated) + saveExcludeRegions(updated) + } + + setDrawing(false) + setDrawStart(null) + setDrawCurrent(null) + }, [drawing, drawStart, drawCurrent, excludeRegions, saveExcludeRegions]) + + const handleDeleteRegion = useCallback(async (index: number) => { + if (!sessionId) return + setSaving(true) + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/exclude-regions/${index}`, { + method: 'DELETE', + }) + if (!res.ok) throw new Error('Loeschen fehlgeschlagen') + const updated = excludeRegions.filter((_, i) => i !== index) + setExcludeRegions(updated) + } catch (e) { + setError(e instanceof Error ? e.message : 'Loeschen fehlgeschlagen') + } finally { + setSaving(false) + } + }, [sessionId, excludeRegions]) + if (!sessionId) { return
Keine Session ausgewaehlt.
} @@ -84,6 +258,16 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec const croppedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/structure-overlay${overlayTs ? `?t=${overlayTs}` : ''}` + // Current drag rectangle in image coords + const dragRect = drawing && drawStart && drawCurrent + ? { + x: Math.min(drawStart.x, drawCurrent.x), + y: Math.min(drawStart.y, drawCurrent.y), + w: Math.abs(drawCurrent.x - drawStart.x), + h: Math.abs(drawCurrent.y - drawStart.y), + } + : null + return (
{/* Loading indicator */} @@ -94,23 +278,97 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
)} + {/* Draw mode toggle */} + {result && ( +
+ + {drawMode && ( + + Rechteck auf dem Bild zeichnen um Bereiche von der OCR-Erkennung auszuschliessen + + )} + {saving && ( + Speichern... + )} +
+ )} + {/* Two-column image comparison */}
- {/* Left: Original document */} + {/* Left: Original document with exclude region drawing */}
- Original + Original {excludeRegions.length > 0 && `(${excludeRegions.length} Ausschlussbereich${excludeRegions.length !== 1 ? 'e' : ''})`}
-
+
{ + if (drawing) { + handleMouseUp() + } + }} + > {/* eslint-disable-next-line @next/next/no-img-element */} Originaldokument { (e.target as HTMLImageElement).style.display = 'none' }} /> + + {/* Saved exclude regions overlay */} + {result && containerSize.w > 0 && excludeRegions.map((region, i) => { + const pos = imageToOverlayPct(region, containerSize.w, containerSize.h, result.image_width, result.image_height) + return ( +
+
+ + {region.label || `Bereich ${i + 1}`} + + +
+
+ ) + })} + + {/* Current drag rectangle */} + {dragRect && result && containerSize.w > 0 && (() => { + const pos = imageToOverlayPct(dragRect, containerSize.w, containerSize.h, result.image_width, result.image_height) + return ( +
+ ) + })()}
@@ -133,6 +391,34 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
+ {/* Exclude regions list */} + {excludeRegions.length > 0 && ( +
+

+ Ausschlussbereiche ({excludeRegions.length}) — Woerter in diesen Bereichen werden nicht erkannt +

+
+ {excludeRegions.map((region, i) => ( +
+ + + {region.label || `Bereich ${i + 1}`} + + + {region.w}x{region.h}px @ ({region.x}, {region.y}) + + +
+ ))} +
+
+ )} + {/* Result info */} {result && (
@@ -154,6 +440,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec {result.word_count} Woerter )} + {excludeRegions.length > 0 && ( + + {excludeRegions.length} Ausschluss + + )} {(result.border_ghosts_removed ?? 0) > 0 && ( {result.border_ghosts_removed} Rahmenlinien entfernt @@ -244,7 +535,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec className="inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-300 border border-purple-200 dark:border-purple-800" > {shape === 'arrow' ? '→' : shape === 'circle' ? '●' : shape === 'line' ? '─' : shape === 'exclamation' ? '❗' : shape === 'dot' ? '•' : shape === 'illustration' ? '🖼' : '◆'} - {' '}{shape} ×{count} + {' '}{shape} x{count} ))}
diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index aba9c91..06a85e8 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -792,13 +792,44 @@ async def _build_grid_core(session_id: str, session: dict) -> dict: # page number ("64", "S. 12") and not real content. _filter_footer_words(all_words, img_h, logger, session_id) - # 2d. Filter words inside detected graphic/image regions + # 2d. Filter words inside user-defined exclude regions (from Structure step). + # These are explicitly marked by the user, so ALL words inside are removed + # regardless of confidence. + structure_result = session.get("structure_result") + exclude_rects = [] + if structure_result: + for er in structure_result.get("exclude_regions", []): + exclude_rects.append({ + "x": er["x"], "y": er["y"], + "w": er["w"], "h": er["h"], + }) + if exclude_rects: + before = len(all_words) + filtered = [] + for w in all_words: + w_cx = w["left"] + w.get("width", 0) / 2 + w_cy = w["top"] + w.get("height", 0) / 2 + inside = any( + er["x"] <= w_cx <= er["x"] + er["w"] + and er["y"] <= w_cy <= er["y"] + er["h"] + for er in exclude_rects + ) + if not inside: + filtered.append(w) + removed = before - len(filtered) + if removed: + all_words = filtered + logger.info( + "build-grid session %s: removed %d words inside %d user exclude region(s)", + session_id, removed, len(exclude_rects), + ) + + # 2e. Filter words inside detected graphic/image regions # Only remove LOW-CONFIDENCE words (likely OCR artifacts from images). # High-confidence words are real text even if they overlap a detected # graphic region (e.g. colored text that graphic detection couldn't # fully distinguish from an image). _GRAPHIC_CONF_THRESHOLD = 50 # keep words with conf >= 50 - structure_result = session.get("structure_result") graphic_rects = [] if structure_result: for g in structure_result.get("graphics", []): diff --git a/klausur-service/backend/ocr_pipeline_geometry.py b/klausur-service/backend/ocr_pipeline_geometry.py index 6e81936..1c48adc 100644 --- a/klausur-service/backend/ocr_pipeline_geometry.py +++ b/klausur-service/backend/ocr_pipeline_geometry.py @@ -22,6 +22,7 @@ from typing import Any, Dict, List, Optional import cv2 import numpy as np from fastapi import APIRouter, HTTPException, Query +from pydantic import BaseModel from cv_vocab_pipeline import ( _apply_shear, @@ -712,6 +713,10 @@ async def detect_structure(session_id: str): duration = time.time() - t0 + # Preserve user-drawn exclude regions from previous run + prev_sr = cached.get("structure_result") or {} + prev_exclude = prev_sr.get("exclude_regions", []) + result_dict = { "image_width": w, "image_height": h, @@ -749,6 +754,7 @@ async def detect_structure(session_id: str): } for g in graphics ], + "exclude_regions": prev_exclude, "color_pixel_counts": color_summary, "has_words": len(words) > 0, "word_count": len(words), @@ -766,6 +772,76 @@ async def detect_structure(session_id: str): return {"session_id": session_id, **result_dict} +# --------------------------------------------------------------------------- +# Exclude Regions — user-drawn rectangles to exclude from OCR results +# --------------------------------------------------------------------------- + +class _ExcludeRegionIn(BaseModel): + x: int + y: int + w: int + h: int + label: str = "" + + +class _ExcludeRegionsBatchIn(BaseModel): + regions: list[_ExcludeRegionIn] + + +@router.put("/sessions/{session_id}/exclude-regions") +async def set_exclude_regions(session_id: str, body: _ExcludeRegionsBatchIn): + """Replace all exclude regions for a session. + + Regions are stored inside ``structure_result.exclude_regions``. + """ + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail="Session not found") + + sr = session.get("structure_result") or {} + sr["exclude_regions"] = [r.model_dump() for r in body.regions] + + await update_session_db(session_id, structure_result=sr) + + # Update cache + if session_id in _cache: + _cache[session_id]["structure_result"] = sr + + return { + "session_id": session_id, + "exclude_regions": sr["exclude_regions"], + "count": len(sr["exclude_regions"]), + } + + +@router.delete("/sessions/{session_id}/exclude-regions/{region_index}") +async def delete_exclude_region(session_id: str, region_index: int): + """Remove a single exclude region by index.""" + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail="Session not found") + + sr = session.get("structure_result") or {} + regions = sr.get("exclude_regions", []) + + if region_index < 0 or region_index >= len(regions): + raise HTTPException(status_code=404, detail="Region index out of range") + + removed = regions.pop(region_index) + sr["exclude_regions"] = regions + + await update_session_db(session_id, structure_result=sr) + + if session_id in _cache: + _cache[session_id]["structure_result"] = sr + + return { + "session_id": session_id, + "removed": removed, + "remaining": len(regions), + } + + # --------------------------------------------------------------------------- # Column Detection Endpoints (Step 3) # ---------------------------------------------------------------------------