From 1cc69d6b5ec34b73f1ff99f40a07b826eecb45a2 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 5 Mar 2026 10:40:37 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20OCR=20pipeline=20step=208=20=E2=80=94?= =?UTF-8?q?=20validation=20view=20with=20image=20detection=20&=20generatio?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the stub StepGroundTruth with a full side-by-side Original vs Reconstruction view. Adds VLM-based image region detection (qwen2.5vl), mflux image generation proxy, sync scroll/zoom, manual region drawing, and score/notes persistence. New backend endpoints: detect-images, generate-image, validate, get validation. New standalone mflux-service (scripts/mflux-service.py) for Metal GPU generation. Dockerfile.base: adds fonts-liberation (Apache-2.0). Co-Authored-By: Claude Opus 4.6 --- .../app/(admin)/ai/ocr-pipeline/page.tsx | 2 +- .../app/(admin)/ai/ocr-pipeline/types.ts | 18 + .../ocr-pipeline/StepGroundTruth.tsx | 588 +++++++++++++++++- .../services/klausur-service/OCR-Pipeline.md | 358 +++++++++-- klausur-service/Dockerfile.base | 1 + klausur-service/backend/ocr_pipeline_api.py | 265 ++++++++ scripts/mflux-service.py | 121 ++++ 7 files changed, 1284 insertions(+), 69 deletions(-) create mode 100644 scripts/mflux-service.py diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx index c94ad4f..d33bfde 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx @@ -313,7 +313,7 @@ export default function OcrPipelinePage() { case 6: return case 7: - return + return default: return null } diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts index 8734715..849d589 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts @@ -264,6 +264,24 @@ export interface WordGroundTruth { notes?: string } +export interface ImageRegion { + bbox_pct: { x: number; y: number; w: number; h: number } + prompt: string + description: string + image_b64: string | null + style: 'educational' | 'cartoon' | 'sketch' | 'clipart' | 'realistic' +} + +export type ImageStyle = ImageRegion['style'] + +export const IMAGE_STYLES: { value: ImageStyle; label: string }[] = [ + { value: 'educational', label: 'Lehrbuch' }, + { value: 'cartoon', label: 'Cartoon' }, + { value: 'sketch', label: 'Skizze' }, + { value: 'clipart', label: 'Clipart' }, + { value: 'realistic', label: 'Realistisch' }, +] + export const PIPELINE_STEPS: PipelineStep[] = [ { id: 'deskew', name: 'Begradigung', icon: 'πŸ“', status: 'pending' }, { id: 'dewarp', name: 'Entzerrung', icon: 'πŸ”§', status: 'pending' }, diff --git a/admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx b/admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx index da5cf3d..1bb1bae 100644 --- a/admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx @@ -1,18 +1,582 @@ 'use client' -export function StepGroundTruth() { +import { useCallback, useEffect, useRef, useState } from 'react' +import type { + GridCell, ColumnMeta, ImageRegion, ImageStyle, +} from '@/app/(admin)/ai/ocr-pipeline/types' +import { IMAGE_STYLES as STYLES } from '@/app/(admin)/ai/ocr-pipeline/types' + +const KLAUSUR_API = '/klausur-api' + +const COL_TYPE_COLORS: Record = { + column_en: '#3b82f6', + column_de: '#22c55e', + column_example: '#f97316', + column_text: '#a855f7', + page_ref: '#06b6d4', + column_marker: '#6b7280', +} + +interface StepGroundTruthProps { + sessionId: string | null + onNext: () => void +} + +interface SessionData { + cells: GridCell[] + columnsUsed: ColumnMeta[] + imageWidth: number + imageHeight: number + originalImageUrl: string +} + +export function StepGroundTruth({ sessionId, onNext }: StepGroundTruthProps) { + const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading') + const [error, setError] = useState('') + const [session, setSession] = useState(null) + const [imageRegions, setImageRegions] = useState<(ImageRegion & { generating?: boolean })[]>([]) + const [detecting, setDetecting] = useState(false) + const [zoom, setZoom] = useState(100) + const [syncScroll, setSyncScroll] = useState(true) + const [notes, setNotes] = useState('') + const [score, setScore] = useState(null) + const [drawingRegion, setDrawingRegion] = useState(false) + const [dragStart, setDragStart] = useState<{ x: number; y: number } | null>(null) + const [dragEnd, setDragEnd] = useState<{ x: number; y: number } | null>(null) + + const leftPanelRef = useRef(null) + const rightPanelRef = useRef(null) + + // Load session data + useEffect(() => { + if (!sessionId) return + loadSessionData() + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [sessionId]) + + const loadSessionData = async () => { + if (!sessionId) return + setStatus('loading') + try { + const resp = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`) + if (!resp.ok) throw new Error(`Failed to load session: ${resp.status}`) + const data = await resp.json() + + const wordResult = data.word_result || {} + setSession({ + cells: wordResult.cells || [], + columnsUsed: wordResult.columns_used || [], + imageWidth: wordResult.image_width || data.image_width || 800, + imageHeight: wordResult.image_height || data.image_height || 600, + originalImageUrl: data.original_image_url || `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/original`, + }) + + // Load existing validation data + const valResp = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/validation`) + if (valResp.ok) { + const valData = await valResp.json() + const validation = valData.validation + if (validation) { + setImageRegions(validation.image_regions || []) + setNotes(validation.notes || '') + setScore(validation.score ?? null) + } + } + + setStatus('ready') + } catch (e) { + setError(e instanceof Error ? e.message : String(e)) + setStatus('error') + } + } + + // Sync scroll between panels + const handleScroll = useCallback((source: 'left' | 'right') => { + if (!syncScroll) return + const from = source === 'left' ? leftPanelRef.current : rightPanelRef.current + const to = source === 'left' ? rightPanelRef.current : leftPanelRef.current + if (from && to) { + to.scrollTop = from.scrollTop + to.scrollLeft = from.scrollLeft + } + }, [syncScroll]) + + // Detect images via VLM + const handleDetectImages = async () => { + if (!sessionId) return + setDetecting(true) + try { + const resp = await fetch( + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/detect-images`, + { method: 'POST' } + ) + if (!resp.ok) throw new Error(`Detection failed: ${resp.status}`) + const data = await resp.json() + setImageRegions(data.regions || []) + } catch (e) { + setError(e instanceof Error ? e.message : String(e)) + } finally { + setDetecting(false) + } + } + + // Generate image for a region + const handleGenerateImage = async (index: number) => { + if (!sessionId) return + const region = imageRegions[index] + if (!region) return + + setImageRegions(prev => prev.map((r, i) => i === index ? { ...r, generating: true } : r)) + + try { + const resp = await fetch( + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/generate-image`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + region_index: index, + prompt: region.prompt, + style: region.style, + }), + } + ) + if (!resp.ok) throw new Error(`Generation failed: ${resp.status}`) + const data = await resp.json() + + setImageRegions(prev => prev.map((r, i) => + i === index ? { ...r, image_b64: data.image_b64, generating: false } : r + )) + } catch (e) { + setImageRegions(prev => prev.map((r, i) => i === index ? { ...r, generating: false } : r)) + setError(e instanceof Error ? e.message : String(e)) + } + } + + // Save validation + const handleSave = async () => { + if (!sessionId) return + setStatus('saving') + try { + const resp = await fetch( + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/validate`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ notes, score }), + } + ) + if (!resp.ok) throw new Error(`Save failed: ${resp.status}`) + setStatus('saved') + } catch (e) { + setError(e instanceof Error ? e.message : String(e)) + setStatus('error') + } + } + + // Handle manual region drawing on reconstruction + const handleReconMouseDown = (e: React.MouseEvent) => { + if (!drawingRegion) return + const rect = e.currentTarget.getBoundingClientRect() + const x = ((e.clientX - rect.left) / rect.width) * 100 + const y = ((e.clientY - rect.top) / rect.height) * 100 + setDragStart({ x, y }) + setDragEnd({ x, y }) + } + + const handleReconMouseMove = (e: React.MouseEvent) => { + if (!dragStart) return + const rect = e.currentTarget.getBoundingClientRect() + const x = ((e.clientX - rect.left) / rect.width) * 100 + const y = ((e.clientY - rect.top) / rect.height) * 100 + setDragEnd({ x, y }) + } + + const handleReconMouseUp = () => { + if (!dragStart || !dragEnd) return + const x = Math.min(dragStart.x, dragEnd.x) + const y = Math.min(dragStart.y, dragEnd.y) + const w = Math.abs(dragEnd.x - dragStart.x) + const h = Math.abs(dragEnd.y - dragStart.y) + + if (w > 2 && h > 2) { + setImageRegions(prev => [...prev, { + bbox_pct: { x, y, w, h }, + prompt: '', + description: 'Manually selected region', + image_b64: null, + style: 'educational' as ImageStyle, + }]) + } + + setDragStart(null) + setDragEnd(null) + setDrawingRegion(false) + } + + const handleRemoveRegion = (index: number) => { + setImageRegions(prev => prev.filter((_, i) => i !== index)) + } + + if (status === 'loading') { + return ( +
+
+ Session wird geladen... +
+ ) + } + + if (status === 'error' && !session) { + return ( +
+

{error}

+ +
+ ) + } + + if (!session) return null + + const aspect = session.imageHeight / session.imageWidth + return ( -
-
βœ…
-

- Schritt 7: Ground Truth Validierung -

-

- Gesamtpruefung der rekonstruierten Seite gegen das Original. - Dieser Schritt wird in einer zukuenftigen Version implementiert. -

-
- Kommt bald +
+ {/* Header / Controls */} +
+

+ Validierung β€” Original vs. Rekonstruktion +

+
+ + +
+ + {zoom}% + +
+
+
+ + {error && ( +
+ {error} + +
+ )} + + {/* Side-by-side panels */} +
+ {/* Left: Original */} +
+
+ Original +
+
handleScroll('left')} + > +
+ Original +
+
+
+ + {/* Right: Reconstruction */} +
+
+ Rekonstruktion + +
+
handleScroll('right')} + > +
+ {/* Reconstruction container */} +
+ {/* Column background stripes */} + {session.columnsUsed.map((col, i) => { + const color = COL_TYPE_COLORS[col.type] || '#9ca3af' + return ( +
+ ) + })} + + {/* Row separator lines β€” derive from cells */} + {(() => { + const rowYs = new Set() + for (const cell of session.cells) { + if (cell.col_index === 0 && cell.bbox_pct) { + rowYs.add(cell.bbox_pct.y) + } + } + return Array.from(rowYs).map((y, i) => ( +
+ )) + })()} + + {/* Cell texts */} + {session.cells.map(cell => { + if (!cell.bbox_pct || !cell.text) return null + const color = COL_TYPE_COLORS[cell.col_type] || '#374151' + return ( + + {cell.text} + + ) + })} + + {/* Generated images at region positions */} + {imageRegions.map((region, i) => ( +
+ {region.image_b64 ? ( + {region.description} + ) : ( +
+ {region.generating ? '...' : `Bild ${i + 1}`} +
+ )} +
+ ))} + + {/* Drawing rectangle */} + {dragStart && dragEnd && ( +
+ )} +
+
+
+
+
+ + {/* Image regions panel */} + {imageRegions.length > 0 && ( +
+

+ Bildbereiche ({imageRegions.length} gefunden) +

+
+ {imageRegions.map((region, i) => ( +
+ {/* Preview thumbnail */} +
+ {region.image_b64 ? ( + + ) : ( +
+ {Math.round(region.bbox_pct.w)}x{Math.round(region.bbox_pct.h)}% +
+ )} +
+ + {/* Prompt + controls */} +
+
+ + Bereich {i + 1}: + + { + setImageRegions(prev => prev.map((r, j) => + j === i ? { ...r, prompt: e.target.value } : r + )) + }} + placeholder="Beschreibung / Prompt..." + className="flex-1 text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white" + /> +
+
+ + + +
+ {region.description && region.description !== region.prompt && ( +

{region.description}

+ )} +
+
+ ))} +
+
+ )} + + {/* Notes and score */} +
+
+ + setScore(e.target.value ? parseInt(e.target.value) : null)} + className="w-20 text-sm px-2 py-1 border rounded dark:border-gray-600 dark:bg-gray-700 dark:text-white" + /> +
+ {[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(v => ( + + ))} +
+
+
+ +