From f30e526917bad8d0788fd28228ccffae124e8449 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 17 Mar 2026 16:44:32 +0100 Subject: [PATCH] fix: merge nearby spine gaps + handle multi-page crop in frontend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend: merge gaps within 5% of image width — the spine area may have thin ink strips splitting one physical gap into multiple detected gaps. Only use gaps >= 2% width as split points. Frontend: StepCrop now handles multi_page crop responses without crashing on missing original_size/cropped_size fields. Co-Authored-By: Claude Opus 4.6 --- .../components/ocr-pipeline/StepCrop.tsx | 29 ++++++++++++++----- klausur-service/backend/page_crop.py | 27 +++++++++++++++-- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/admin-lehrer/components/ocr-pipeline/StepCrop.tsx b/admin-lehrer/components/ocr-pipeline/StepCrop.tsx index 7363fb4..e578e3c 100644 --- a/admin-lehrer/components/ocr-pipeline/StepCrop.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepCrop.tsx @@ -108,10 +108,21 @@ export function StepCrop({ sessionId, onNext }: StepCropProps) { {cropResult && (
- {cropResult.crop_applied ? ( + {(cropResult as Record).multi_page ? ( + <> + + Mehrseitig: {(cropResult as Record).page_count as number} Seiten erkannt + + {((cropResult as Record).sub_sessions as Array<{id: string; name: string; page_index: number}> | undefined)?.map((sub) => ( + + Seite {sub.page_index + 1} + + ))} + + ) : cropResult.crop_applied ? ( <> - ✂️ Zugeschnitten + Zugeschnitten {cropResult.detected_format && ( <> @@ -126,10 +137,14 @@ export function StepCrop({ sessionId, onNext }: StepCropProps) { )} -
- - {cropResult.original_size.width}x{cropResult.original_size.height} → {cropResult.cropped_size.width}x{cropResult.cropped_size.height} - + {cropResult.original_size && cropResult.cropped_size && ( + <> +
+ + {cropResult.original_size.width}x{cropResult.original_size.height} → {cropResult.cropped_size.width}x{cropResult.cropped_size.height} + + + )} {cropResult.border_fractions && ( <>
@@ -141,7 +156,7 @@ export function StepCrop({ sessionId, onNext }: StepCropProps) { ) : ( - ✓ Kein Zuschnitt noetig + Kein Zuschnitt noetig )} {cropResult.duration_seconds != null && ( diff --git a/klausur-service/backend/page_crop.py b/klausur-service/backend/page_crop.py index ea51714..8ac8e3c 100644 --- a/klausur-service/backend/page_crop.py +++ b/klausur-service/backend/page_crop.py @@ -100,12 +100,33 @@ def detect_page_splits( if not gaps: return [] + # Merge nearby gaps (< 5% of width apart) — the spine area may have + # thin ink strips between multiple gap segments + merge_dist = max(20, int(w * 0.05)) + merged: list = [gaps[0]] + for g in gaps[1:]: + prev = merged[-1] + prev_end = prev["x"] + prev["width"] + if g["x"] - prev_end < merge_dist: + # Merge: extend previous gap to cover both + new_end = g["x"] + g["width"] + prev["width"] = new_end - prev["x"] + prev["center"] = prev["x"] + prev["width"] // 2 + else: + merged.append(g) + gaps = merged + # Sort gaps by width (largest = most likely spine) gaps.sort(key=lambda g: g["width"], reverse=True) - # Use the widest gap(s) as split points - # For now: support up to N-1 gaps → N pages - split_points = sorted(g["center"] for g in gaps[:3]) # max 4 pages + # Use only gaps that are significant (>= 2% of image width) + significant_gaps = [g for g in gaps if g["width"] >= w * 0.02] + if not significant_gaps: + # Fall back to widest gap + significant_gaps = [gaps[0]] + + # Use the significant gap(s) as split points + split_points = sorted(g["center"] for g in significant_gaps[:3]) # Build page rectangles pages: list = []