From 1a2efbf0750851c49012e3bf8120be65188f5436 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 5 Mar 2026 13:02:16 +0100 Subject: [PATCH] fix: relative bold detection (page median), fix save/finish buttons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bold detection: - Replace absolute threshold with page-level relative comparison - Measure stroke width for all cells, then mark cells >1.4× median as bold - Adapts automatically to font, DPI and scan quality Save buttons: - Fix status stuck on 'error' preventing re-click - Better error messages with response body - Fallback score to 0 when null Co-Authored-By: Claude Opus 4.6 --- .../ocr-pipeline/StepGroundTruth.tsx | 15 ++- klausur-service/backend/cv_vocab_pipeline.py | 96 +++++++++++-------- 2 files changed, 68 insertions(+), 43 deletions(-) diff --git a/admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx b/admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx index 6ec6ebe..f471d59 100644 --- a/admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx @@ -170,22 +170,29 @@ export function StepGroundTruth({ sessionId, onNext }: StepGroundTruthProps) { // Save validation const handleSave = async () => { - if (!sessionId) return + if (!sessionId) { + setError('Keine Session-ID vorhanden') + return + } setStatus('saving') + setError('') try { const resp = await fetch( `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/validate`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ notes, score }), + body: JSON.stringify({ notes, score: score ?? 0 }), } ) - if (!resp.ok) throw new Error(`Save failed: ${resp.status}`) + if (!resp.ok) { + const body = await resp.text().catch(() => '') + throw new Error(`Speichern fehlgeschlagen (${resp.status}): ${body}`) + } setStatus('saved') } catch (e) { setError(e instanceof Error ? e.message : String(e)) - setStatus('error') + setStatus('ready') } } diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 6ac2650..966f47e 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -4768,35 +4768,30 @@ def _clean_cell_text_lite(text: str) -> str: # --------------------------------------------------------------------------- -# Bold detection via stroke-width analysis +# Bold detection via stroke-width analysis (relative / page-level) # --------------------------------------------------------------------------- -def _detect_bold(gray_crop: np.ndarray) -> bool: - """Detect bold text by measuring mean stroke width in a binarised cell crop. +def _measure_stroke_width(gray_crop: np.ndarray) -> float: + """Measure mean stroke width in a binarised cell crop. - Bold text has thicker strokes. We binarise (Otsu), skeletonise to get - single-pixel strokes, then compute mean distance-transform value on the - skeleton — that approximates half the stroke width. A value above the - threshold indicates bold. - - Returns True if the crop likely contains bold text. + Returns a DPI-normalised value (mean stroke width as % of crop height), + or 0.0 if measurement is not possible. """ if gray_crop is None or gray_crop.size == 0: - return False + return 0.0 h, w = gray_crop.shape[:2] if h < 10 or w < 10: - return False + return 0.0 # Binarise: text = white (255), background = black (0) _, bw = cv2.threshold(gray_crop, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU) if cv2.countNonZero(bw) < 20: - return False + return 0.0 # Distance transform: value at each white pixel = distance to nearest black dist = cv2.distanceTransform(bw, cv2.DIST_L2, 3) - # Skeleton via morphological thinning (approximate with erode-based approach) - # Use thin iterations of erosion to approximate the medial axis + # Skeleton via morphological thinning kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3)) thin = bw.copy() for _ in range(max(1, min(h, w) // 6)): @@ -4805,16 +4800,52 @@ def _detect_bold(gray_crop: np.ndarray) -> bool: break thin = eroded - # Mean distance-transform value on the skeleton points skeleton_pts = thin > 0 if not np.any(skeleton_pts): - return False + return 0.0 mean_stroke = float(np.mean(dist[skeleton_pts])) + return mean_stroke / max(h, 1) * 100 # normalised: % of cell height - # Threshold: empirically, normal text ≈ 1.0–1.8, bold ≈ 2.0+ - # Scale by crop height to be DPI-independent - normalised = mean_stroke / max(h, 1) * 100 # % of cell height - return normalised > 3.5 + +def _classify_bold_cells(cells: List[Dict[str, Any]], ocr_img: Optional[np.ndarray], + img_w: int, img_h: int) -> None: + """Two-pass bold detection: measure all cells, then compare against median. + + Cells with stroke width > 1.4× the page median are marked as bold. + This adapts automatically to font, DPI and scan quality. + Modifies cells in-place (sets 'is_bold' key). + """ + if ocr_img is None: + return + + # Pass 1: measure stroke width for every cell with text + metrics: List[float] = [] + cell_strokes: List[float] = [] + for cell in cells: + sw = 0.0 + if cell.get('text', '').strip(): + bp = cell['bbox_px'] + y1 = max(0, bp['y']) + y2 = min(img_h, bp['y'] + bp['h']) + x1 = max(0, bp['x']) + x2 = min(img_w, bp['x'] + bp['w']) + if y2 > y1 and x2 > x1: + sw = _measure_stroke_width(ocr_img[y1:y2, x1:x2]) + cell_strokes.append(sw) + if sw > 0: + metrics.append(sw) + + if len(metrics) < 3: + # Too few cells to compare — leave all as non-bold + return + + median_sw = float(np.median(metrics)) + if median_sw <= 0: + return + + # Pass 2: cells significantly above median → bold + for cell, sw in zip(cells, cell_strokes): + cell['is_bold'] = sw > 0 and (sw / median_sw) > 1.4 # --------------------------------------------------------------------------- @@ -5006,17 +5037,10 @@ def _ocr_cell_crop( row_idx, col_idx, pre_filter) avg_conf = 0.0 - # --- Bold detection via stroke-width analysis --- - is_bold = False - if text.strip() and ocr_img is not None: - gray_cell = ocr_img[cy:cy + ch, cx:cx + cw] - is_bold = _detect_bold(gray_cell) - result = dict(empty_cell) result['text'] = text result['confidence'] = avg_conf result['ocr_engine'] = used_engine - result['is_bold'] = is_bold return result @@ -5163,16 +5187,6 @@ def build_cell_grid_v2( # Apply noise filter text = _clean_cell_text(text) - # Bold detection for broad columns - is_bold = False - if text.strip() and ocr_img is not None: - bc_y = max(0, row.y) - bc_h = min(img_h, row.y + row.height) - bc_y - bc_x = max(0, col.x) - bc_w = min(img_w, col.x + col.width) - bc_x - if bc_h > 0 and bc_w > 0: - is_bold = _detect_bold(ocr_img[bc_y:bc_y + bc_h, bc_x:bc_x + bc_w]) - cell = { 'cell_id': f"R{row_idx:02d}_C{col_idx}", 'row_index': row_idx, @@ -5191,7 +5205,7 @@ def build_cell_grid_v2( 'h': round(row.height / img_h * 100, 2) if img_h else 0, }, 'ocr_engine': 'word_lookup', - 'is_bold': is_bold, + 'is_bold': False, } cells.append(cell) @@ -5236,9 +5250,13 @@ def build_cell_grid_v2( if empty_rows_removed > 0: logger.info(f"build_cell_grid_v2: removed {empty_rows_removed} all-empty rows") + # --- Page-level bold detection: compare stroke widths across all cells --- + _classify_bold_cells(cells, ocr_img, img_w, img_h) + bold_count = sum(1 for c in cells if c.get('is_bold')) + logger.info(f"build_cell_grid_v2: {len(cells)} cells from " f"{len(content_rows)} rows × {len(relevant_cols)} columns, " - f"engine={engine_name} (hybrid)") + f"engine={engine_name} (hybrid), {bold_count} bold") return cells, columns_meta