From c743a38eafe058105c7f771feae95b2c3ed0d97a Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 12 Mar 2026 16:56:18 +0100 Subject: [PATCH] fix: Paddle Direct keeps preprocessing (orient/deskew/dewarp/crop) Uses the cropped/dewarped image instead of the original so the overlay shows the correctly oriented page. 5 steps instead of 2. Co-Authored-By: Claude Opus 4.6 --- .../app/(admin)/ai/ocr-overlay/page.tsx | 12 +++++++--- .../app/(admin)/ai/ocr-overlay/types.ts | 9 ++++--- klausur-service/backend/ocr_pipeline_api.py | 24 ++++++++++++------- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx index 5cc77fe..6078447 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx @@ -71,10 +71,10 @@ export default function OcrOverlayPage() { setSteps( PADDLE_DIRECT_STEPS.map((s, i) => ({ ...s, - status: i < 1 ? 'completed' : i === 1 ? 'active' : 'pending', + status: i < 4 ? 'completed' : i === 4 ? 'active' : 'pending', })), ) - setCurrentStep(1) + setCurrentStep(4) } else { setMode('pipeline') // Map DB step to overlay UI step @@ -231,6 +231,12 @@ export default function OcrOverlayPage() { case 0: return case 1: + return + case 2: + return + case 3: + return + case 4: return default: return null @@ -472,7 +478,7 @@ export default function OcrOverlayPage() { : 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300' }`} > - Paddle Direct (2 Schritte) + Paddle Direct (5 Schritte) diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts index 034b808..ba578a4 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts @@ -49,11 +49,14 @@ export const OVERLAY_UI_TO_DB: Record = { } /** - * 2-step pipeline for Paddle Direct mode. - * Upload → PaddleOCR+Overlay (skips deskew/dewarp/crop/rows) + * 5-step pipeline for Paddle Direct mode. + * Same preprocessing (orient/deskew/dewarp/crop), then PaddleOCR replaces rows+words+overlay. */ export const PADDLE_DIRECT_STEPS: PipelineStep[] = [ - { id: 'orientation', name: 'Upload', icon: '📤', status: 'pending' }, + { id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' }, + { id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' }, + { id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' }, + { id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' }, { id: 'paddle-direct', name: 'PaddleOCR + Overlay', icon: '⚡', status: 'pending' }, ] diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index 6b05a87..aa6434d 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -2511,17 +2511,23 @@ async def _word_stream_generator( @router.post("/sessions/{session_id}/paddle-direct") async def paddle_direct(session_id: str): - """Run PaddleOCR on the original image and build a word grid directly. + """Run PaddleOCR on the preprocessed image and build a word grid directly. - Skips deskew/dewarp/crop/rows — just Upload → PaddleOCR → Overlay. - The original image is stored as cropped_png so OverlayReconstruction + Expects orientation/deskew/dewarp/crop to be done already. + Uses the cropped image (falls back to dewarped, then original). + The used image is stored as cropped_png so OverlayReconstruction can display it as the background. """ - original_png = await get_session_image(session_id, "original") - if not original_png: - raise HTTPException(status_code=404, detail="No original image found for this session") + # Try preprocessed images first (crop > dewarp > original) + img_png = await get_session_image(session_id, "cropped") + if not img_png: + img_png = await get_session_image(session_id, "dewarped") + if not img_png: + img_png = await get_session_image(session_id, "original") + if not img_png: + raise HTTPException(status_code=404, detail="No image found for this session") - img_arr = np.frombuffer(original_png, dtype=np.uint8) + img_arr = np.frombuffer(img_png, dtype=np.uint8) img_bgr = cv2.imdecode(img_arr, cv2.IMREAD_COLOR) if img_bgr is None: raise HTTPException(status_code=400, detail="Failed to decode original image") @@ -2562,11 +2568,11 @@ async def paddle_direct(session_id: str): }, } - # Store original image as cropped_png so OverlayReconstruction shows it + # Store preprocessed image as cropped_png so OverlayReconstruction shows it await update_session_db( session_id, word_result=word_result, - cropped_png=original_png, + cropped_png=img_png, current_step=8, )