diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 6aaabc9..6c8eeaa 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -90,6 +90,20 @@ class ColumnGeometry: width_ratio: float # width / content_width (0.0-1.0) +@dataclass +class RowGeometry: + """Geometrisch erkannte Zeile mit Kopf-/Fusszeilen-Klassifikation.""" + index: int # 0-basiert, oben→unten + x: int # absolute left (= content left_x) + y: int # absolute y start + width: int # content width + height: int # Zeilenhoehe in px + word_count: int + words: List[Dict] + row_type: str = 'content' # 'content' | 'header' | 'footer' + gap_before: int = 0 # Gap in px ueber dieser Zeile + + @dataclass class VocabRow: """A single vocabulary entry assembled from multi-column OCR.""" @@ -885,7 +899,8 @@ def _detect_columns_by_clustering( right_x: int, top_y: int, bottom_y: int, -) -> Optional[Tuple[List[ColumnGeometry], int, int, int, int]]: + inv: Optional[np.ndarray] = None, +) -> Optional[Tuple[List[ColumnGeometry], int, int, int, int, List[Dict], Optional[np.ndarray]]]: """Fallback: detect columns by clustering left-aligned word positions. Used when the primary gap-based algorithm finds fewer than 2 gaps. @@ -965,7 +980,7 @@ def _detect_columns_by_clustering( margin_px = max(6, int(content_w * 0.003)) return _build_geometries_from_starts( [(max(0, left_x + m['min_edge'] - margin_px), m['count']) for m in merged], - word_dicts, left_x, right_x, top_y, bottom_y, content_w, content_h, + word_dicts, left_x, right_x, top_y, bottom_y, content_w, content_h, inv, ) @@ -978,7 +993,8 @@ def _build_geometries_from_starts( bottom_y: int, content_w: int, content_h: int, -) -> Tuple[List[ColumnGeometry], int, int, int, int]: + inv: Optional[np.ndarray] = None, +) -> Tuple[List[ColumnGeometry], int, int, int, int, List[Dict], Optional[np.ndarray]]: """Build ColumnGeometry objects from a list of (abs_start_x, word_count) pairs.""" geometries = [] for i, (start_x, count) in enumerate(col_starts): @@ -1005,10 +1021,10 @@ def _build_geometries_from_starts( logger.info(f"ColumnGeometry: {len(geometries)} columns: " f"{[(g.index, g.x, g.width, g.word_count) for g in geometries]}") - return (geometries, left_x, right_x, top_y, bottom_y) + return (geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv) -def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Optional[Tuple[List[ColumnGeometry], int, int, int, int]]: +def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Optional[Tuple[List[ColumnGeometry], int, int, int, int, List[Dict], np.ndarray]]: """Detect column geometry using whitespace-gap analysis with word validation. Phase A of the two-phase column detection. Uses vertical projection @@ -1022,8 +1038,8 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt dewarped_bgr: Original BGR image (for Tesseract word detection). Returns: - Tuple of (geometries, left_x, right_x, top_y, bottom_y) or None if - detection fails entirely. + Tuple of (geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv) + or None if detection fails entirely. """ h, w = ocr_img.shape[:2] @@ -1165,7 +1181,7 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt logger.info("ColumnGeometry: < 2 gaps found, falling back to clustering") return _detect_columns_by_clustering( word_dicts, left_edges, edge_word_indices, - content_w, content_h, left_x, right_x, top_y, bottom_y, + content_w, content_h, left_x, right_x, top_y, bottom_y, inv, ) # --- Step 7: Derive column boundaries from gaps --- @@ -1261,7 +1277,270 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt logger.info(f"ColumnGeometry: {len(geometries)} columns: " f"{[(g.index, g.x, g.width, g.word_count) for g in geometries]}") - return (geometries, left_x, right_x, top_y, bottom_y) + return (geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv) + + +# ============================================================================= +# Row Geometry Detection (horizontal whitespace-gap analysis) +# ============================================================================= + +def detect_row_geometry( + inv: np.ndarray, + word_dicts: List[Dict], + left_x: int, right_x: int, + top_y: int, bottom_y: int, +) -> List['RowGeometry']: + """Detect row geometry using horizontal whitespace-gap analysis. + + Mirrors the vertical gap approach used for columns, but operates on + horizontal projection profiles to find gaps between text lines. + Also classifies header/footer rows based on gap size. + + Args: + inv: Inverted binarized image (white text on black bg, full page). + word_dicts: Word bounding boxes from Tesseract (relative to content ROI). + left_x, right_x: Absolute X bounds of the content area. + top_y, bottom_y: Absolute Y bounds of the content area. + + Returns: + List of RowGeometry objects sorted top to bottom. + """ + content_w = right_x - left_x + content_h = bottom_y - top_y + + if content_h < 10 or content_w < 10: + logger.warning("detect_row_geometry: content area too small") + return [] + + # --- Step 1: Horizontal projection profile --- + content_strip = inv[top_y:bottom_y, left_x:right_x] + h_proj = np.sum(content_strip, axis=1).astype(float) + h_proj_norm = h_proj / (content_w * 255) if content_w > 0 else h_proj + + # --- Step 2: Smoothing + threshold --- + kernel_size = max(3, content_h // 200) + if kernel_size % 2 == 0: + kernel_size += 1 + h_smooth = np.convolve(h_proj_norm, np.ones(kernel_size) / kernel_size, mode='same') + + median_density = float(np.median(h_smooth[h_smooth > 0])) if np.any(h_smooth > 0) else 0.01 + gap_threshold = max(median_density * 0.15, 0.003) + + in_gap = h_smooth < gap_threshold + MIN_GAP_HEIGHT = max(3, content_h // 500) + + # --- Step 3: Collect contiguous gap regions --- + raw_gaps = [] # (start_y_rel, end_y_rel) relative to content ROI + gap_start = None + for y in range(len(in_gap)): + if in_gap[y]: + if gap_start is None: + gap_start = y + else: + if gap_start is not None: + gap_height = y - gap_start + if gap_height >= MIN_GAP_HEIGHT: + raw_gaps.append((gap_start, y)) + gap_start = None + if gap_start is not None: + gap_height = len(in_gap) - gap_start + if gap_height >= MIN_GAP_HEIGHT: + raw_gaps.append((gap_start, len(in_gap))) + + logger.info(f"RowGeometry: {len(raw_gaps)} raw gaps found (threshold={gap_threshold:.4f}, " + f"min_height={MIN_GAP_HEIGHT}px)") + + # --- Step 4: Validate gaps against word bounding boxes --- + validated_gaps = [] + for gap_start_rel, gap_end_rel in raw_gaps: + overlapping = False + for wd in word_dicts: + word_top = wd['top'] + word_bottom = wd['top'] + wd['height'] + if word_top < gap_end_rel and word_bottom > gap_start_rel: + overlapping = True + break + + if not overlapping: + validated_gaps.append((gap_start_rel, gap_end_rel)) + else: + # Try to shift the gap to avoid overlapping words + min_word_top = content_h + max_word_bottom = 0 + for wd in word_dicts: + word_top = wd['top'] + word_bottom = wd['top'] + wd['height'] + if word_top < gap_end_rel and word_bottom > gap_start_rel: + min_word_top = min(min_word_top, word_top) + max_word_bottom = max(max_word_bottom, word_bottom) + + if min_word_top - gap_start_rel >= MIN_GAP_HEIGHT: + validated_gaps.append((gap_start_rel, min_word_top)) + elif gap_end_rel - max_word_bottom >= MIN_GAP_HEIGHT: + validated_gaps.append((max_word_bottom, gap_end_rel)) + else: + logger.debug(f"RowGeometry: gap [{gap_start_rel}..{gap_end_rel}] " + f"discarded (word overlap, no room to shift)") + + logger.info(f"RowGeometry: {len(validated_gaps)} gaps after word validation") + + # --- Fallback if too few gaps --- + if len(validated_gaps) < 2: + logger.info("RowGeometry: < 2 gaps found, falling back to word grouping") + return _build_rows_from_word_grouping( + word_dicts, left_x, right_x, top_y, bottom_y, content_w, content_h, + ) + + validated_gaps.sort(key=lambda g: g[0]) + + # --- Step 5: Header/footer detection via gap size --- + HEADER_FOOTER_ZONE = 0.15 + GAP_MULTIPLIER = 2.0 + + gap_sizes = [g[1] - g[0] for g in validated_gaps] + median_gap = float(np.median(gap_sizes)) if gap_sizes else 0 + large_gap_threshold = median_gap * GAP_MULTIPLIER + + header_boundary_rel = None # y below which is header + footer_boundary_rel = None # y above which is footer + + header_zone_limit = int(content_h * HEADER_FOOTER_ZONE) + footer_zone_start = int(content_h * (1.0 - HEADER_FOOTER_ZONE)) + + # Find largest gap in header zone + best_header_gap = None + for gs, ge in validated_gaps: + gap_mid = (gs + ge) / 2 + gap_size = ge - gs + if gap_mid < header_zone_limit and gap_size > large_gap_threshold: + if best_header_gap is None or gap_size > (best_header_gap[1] - best_header_gap[0]): + best_header_gap = (gs, ge) + + if best_header_gap is not None: + header_boundary_rel = best_header_gap[1] + logger.info(f"RowGeometry: header boundary at y_rel={header_boundary_rel} " + f"(gap={best_header_gap[1] - best_header_gap[0]}px, " + f"median_gap={median_gap:.0f}px)") + + # Find largest gap in footer zone + best_footer_gap = None + for gs, ge in validated_gaps: + gap_mid = (gs + ge) / 2 + gap_size = ge - gs + if gap_mid > footer_zone_start and gap_size > large_gap_threshold: + if best_footer_gap is None or gap_size > (best_footer_gap[1] - best_footer_gap[0]): + best_footer_gap = (gs, ge) + + if best_footer_gap is not None: + footer_boundary_rel = best_footer_gap[0] + logger.info(f"RowGeometry: footer boundary at y_rel={footer_boundary_rel} " + f"(gap={best_footer_gap[1] - best_footer_gap[0]}px)") + + # --- Step 6: Build RowGeometry objects from gaps --- + # Rows are the spans between gaps + row_boundaries = [] # (start_y_rel, end_y_rel) + + # Top of content to first gap + if validated_gaps[0][0] > MIN_GAP_HEIGHT: + row_boundaries.append((0, validated_gaps[0][0])) + + # Between gaps + for i in range(len(validated_gaps) - 1): + row_start = validated_gaps[i][1] + row_end = validated_gaps[i + 1][0] + if row_end - row_start > 0: + row_boundaries.append((row_start, row_end)) + + # Last gap to bottom of content + if validated_gaps[-1][1] < content_h - MIN_GAP_HEIGHT: + row_boundaries.append((validated_gaps[-1][1], content_h)) + + rows = [] + for idx, (row_start_rel, row_end_rel) in enumerate(row_boundaries): + # Determine row type + row_mid = (row_start_rel + row_end_rel) / 2 + if header_boundary_rel is not None and row_mid < header_boundary_rel: + row_type = 'header' + elif footer_boundary_rel is not None and row_mid > footer_boundary_rel: + row_type = 'footer' + else: + row_type = 'content' + + # Collect words in this row + row_words = [w for w in word_dicts + if w['top'] + w['height'] / 2 >= row_start_rel + and w['top'] + w['height'] / 2 < row_end_rel] + + # Gap before this row + gap_before = 0 + if idx == 0 and validated_gaps[0][0] > 0: + gap_before = validated_gaps[0][0] + elif idx > 0: + # Find the gap just before this row boundary + for gs, ge in validated_gaps: + if ge == row_start_rel: + gap_before = ge - gs + break + + rows.append(RowGeometry( + index=idx, + x=left_x, + y=top_y + row_start_rel, + width=content_w, + height=row_end_rel - row_start_rel, + word_count=len(row_words), + words=row_words, + row_type=row_type, + gap_before=gap_before, + )) + + type_counts = {} + for r in rows: + type_counts[r.row_type] = type_counts.get(r.row_type, 0) + 1 + logger.info(f"RowGeometry: {len(rows)} rows detected: {type_counts}") + + return rows + + +def _build_rows_from_word_grouping( + word_dicts: List[Dict], + left_x: int, right_x: int, + top_y: int, bottom_y: int, + content_w: int, content_h: int, +) -> List['RowGeometry']: + """Fallback: build rows by grouping words by Y position. + + Uses _group_words_into_lines() with a generous tolerance. + No header/footer detection in fallback mode. + """ + if not word_dicts: + return [] + + y_tolerance = max(20, content_h // 100) + lines = _group_words_into_lines(word_dicts, y_tolerance_px=y_tolerance) + + rows = [] + for idx, line_words in enumerate(lines): + if not line_words: + continue + min_top = min(w['top'] for w in line_words) + max_bottom = max(w['top'] + w['height'] for w in line_words) + row_height = max_bottom - min_top + + rows.append(RowGeometry( + index=idx, + x=left_x, + y=top_y + min_top, + width=content_w, + height=row_height, + word_count=len(line_words), + words=line_words, + row_type='content', + gap_before=0, + )) + + logger.info(f"RowGeometry (fallback): {len(rows)} rows from word grouping") + return rows # --- Phase B: Content-Based Classification --- @@ -1861,7 +2140,7 @@ def analyze_layout_by_words(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Li layout_img = create_layout_image(dewarped_bgr) return analyze_layout(layout_img, ocr_img) - geometries, left_x, right_x, top_y, bottom_y = result + geometries, left_x, right_x, top_y, bottom_y, _word_dicts, _inv = result content_w = right_x - left_x # Phase B: Content-based classification diff --git a/klausur-service/backend/migrations/003_add_row_result.sql b/klausur-service/backend/migrations/003_add_row_result.sql new file mode 100644 index 0000000..4ef1eea --- /dev/null +++ b/klausur-service/backend/migrations/003_add_row_result.sql @@ -0,0 +1,4 @@ +-- Migration 003: Add row_result column for row geometry detection +-- Stores detected row geometries including header/footer classification + +ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS row_result JSONB; diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index 3a0e657..ecea61d 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -1,14 +1,15 @@ """ OCR Pipeline API - Schrittweise Seitenrekonstruktion. -Zerlegt den OCR-Prozess in 7 einzelne Schritte: +Zerlegt den OCR-Prozess in 8 einzelne Schritte: 1. Deskewing - Scan begradigen 2. Dewarping - Buchwoelbung entzerren 3. Spaltenerkennung - Unsichtbare Spalten finden -4. Worterkennung - OCR mit Bounding Boxes -5. Koordinatenzuweisung - Exakte Positionen -6. Seitenrekonstruktion - Seite nachbauen -7. Ground Truth Validierung - Gesamtpruefung +4. Zeilenerkennung - Horizontale Zeilen + Kopf-/Fusszeilen +5. Worterkennung - OCR mit Bounding Boxes +6. Koordinatenzuweisung - Exakte Positionen +7. Seitenrekonstruktion - Seite nachbauen +8. Ground Truth Validierung - Gesamtpruefung Lizenz: Apache 2.0 DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. @@ -30,9 +31,13 @@ from pydantic import BaseModel from cv_vocab_pipeline import ( analyze_layout, analyze_layout_by_words, + classify_column_types, + create_layout_image, create_ocr_image, deskew_image, deskew_image_by_word_alignment, + detect_column_geometry, + detect_row_geometry, dewarp_image, dewarp_image_manual, render_image_high_res, @@ -139,6 +144,16 @@ class ColumnGroundTruthRequest(BaseModel): notes: Optional[str] = None +class ManualRowsRequest(BaseModel): + rows: List[Dict[str, Any]] + + +class RowGroundTruthRequest(BaseModel): + is_correct: bool + corrected_rows: Optional[List[Dict[str, Any]]] = None + notes: Optional[str] = None + + # --------------------------------------------------------------------------- # Session Management Endpoints # --------------------------------------------------------------------------- @@ -275,14 +290,17 @@ async def delete_session(session_id: str): @router.get("/sessions/{session_id}/image/{image_type}") async def get_image(session_id: str, image_type: str): - """Serve session images: original, deskewed, dewarped, binarized, or columns-overlay.""" - valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay"} + """Serve session images: original, deskewed, dewarped, binarized, columns-overlay, or rows-overlay.""" + valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay", "rows-overlay"} if image_type not in valid_types: raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}") if image_type == "columns-overlay": return await _get_columns_overlay(session_id) + if image_type == "rows-overlay": + return await _get_rows_overlay(session_id) + # Try cache first for fast serving cached = _cache.get(session_id) if cached: @@ -643,9 +661,27 @@ async def detect_columns(session_id: str): # Binarized image for layout analysis ocr_img = create_ocr_image(dewarped_bgr) + h, w = ocr_img.shape[:2] + + # Phase A: Geometry detection (returns word_dicts + inv for reuse) + geo_result = detect_column_geometry(ocr_img, dewarped_bgr) + + if geo_result is None: + # Fallback to projection-based layout + layout_img = create_layout_image(dewarped_bgr) + regions = analyze_layout(layout_img, ocr_img) + else: + geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result + content_w = right_x - left_x + + # Cache intermediates for row detection (avoids second Tesseract run) + cached["_word_dicts"] = word_dicts + cached["_inv"] = inv + cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y) + + # Phase B: Content-based classification + regions = classify_column_types(geometries, content_w, top_y, w, h, bottom_y) - # Word-based detection (with automatic fallback to projection profiles) - regions = analyze_layout_by_words(ocr_img, dewarped_bgr) duration = time.time() - t0 columns = [asdict(r) for r in regions] @@ -807,3 +843,209 @@ async def _get_columns_overlay(session_id: str) -> Response: raise HTTPException(status_code=500, detail="Failed to encode overlay image") return Response(content=result_png.tobytes(), media_type="image/png") + + +# --------------------------------------------------------------------------- +# Row Detection Endpoints +# --------------------------------------------------------------------------- + +@router.post("/sessions/{session_id}/rows") +async def detect_rows(session_id: str): + """Run row detection on the dewarped image using horizontal gap analysis.""" + if session_id not in _cache: + await _load_session_to_cache(session_id) + cached = _get_cached(session_id) + + dewarped_bgr = cached.get("dewarped_bgr") + if dewarped_bgr is None: + raise HTTPException(status_code=400, detail="Dewarp must be completed before row detection") + + t0 = time.time() + + # Try to reuse cached word_dicts and inv from column detection + word_dicts = cached.get("_word_dicts") + inv = cached.get("_inv") + content_bounds = cached.get("_content_bounds") + + if word_dicts is None or inv is None or content_bounds is None: + # Not cached — run column geometry to get intermediates + ocr_img = create_ocr_image(dewarped_bgr) + geo_result = detect_column_geometry(ocr_img, dewarped_bgr) + if geo_result is None: + raise HTTPException(status_code=400, detail="Column geometry detection failed — cannot detect rows") + _geoms, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result + cached["_word_dicts"] = word_dicts + cached["_inv"] = inv + cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y) + else: + left_x, right_x, top_y, bottom_y = content_bounds + + # Run row detection + rows = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y) + duration = time.time() - t0 + + # Build serializable result (exclude words to keep payload small) + rows_data = [] + for r in rows: + rows_data.append({ + "index": r.index, + "x": r.x, + "y": r.y, + "width": r.width, + "height": r.height, + "word_count": r.word_count, + "row_type": r.row_type, + "gap_before": r.gap_before, + }) + + type_counts = {} + for r in rows: + type_counts[r.row_type] = type_counts.get(r.row_type, 0) + 1 + + row_result = { + "rows": rows_data, + "summary": type_counts, + "total_rows": len(rows), + "duration_seconds": round(duration, 2), + } + + # Persist to DB + await update_session_db( + session_id, + row_result=row_result, + current_step=4, + ) + + cached["row_result"] = row_result + + logger.info(f"OCR Pipeline: rows session {session_id}: " + f"{len(rows)} rows detected ({duration:.2f}s): {type_counts}") + + return { + "session_id": session_id, + **row_result, + } + + +@router.post("/sessions/{session_id}/rows/manual") +async def set_manual_rows(session_id: str, req: ManualRowsRequest): + """Override detected rows with manual definitions.""" + row_result = { + "rows": req.rows, + "total_rows": len(req.rows), + "duration_seconds": 0, + "method": "manual", + } + + await update_session_db(session_id, row_result=row_result) + + if session_id in _cache: + _cache[session_id]["row_result"] = row_result + + logger.info(f"OCR Pipeline: manual rows session {session_id}: " + f"{len(req.rows)} rows set") + + return {"session_id": session_id, **row_result} + + +@router.post("/sessions/{session_id}/ground-truth/rows") +async def save_row_ground_truth(session_id: str, req: RowGroundTruthRequest): + """Save ground truth feedback for the row detection step.""" + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail=f"Session {session_id} not found") + + ground_truth = session.get("ground_truth") or {} + gt = { + "is_correct": req.is_correct, + "corrected_rows": req.corrected_rows, + "notes": req.notes, + "saved_at": datetime.utcnow().isoformat(), + "row_result": session.get("row_result"), + } + ground_truth["rows"] = gt + + await update_session_db(session_id, ground_truth=ground_truth) + + if session_id in _cache: + _cache[session_id]["ground_truth"] = ground_truth + + return {"session_id": session_id, "ground_truth": gt} + + +@router.get("/sessions/{session_id}/ground-truth/rows") +async def get_row_ground_truth(session_id: str): + """Retrieve saved ground truth for row detection.""" + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail=f"Session {session_id} not found") + + ground_truth = session.get("ground_truth") or {} + rows_gt = ground_truth.get("rows") + if not rows_gt: + raise HTTPException(status_code=404, detail="No row ground truth saved") + + return { + "session_id": session_id, + "rows_gt": rows_gt, + "rows_auto": session.get("row_result"), + } + + +async def _get_rows_overlay(session_id: str) -> Response: + """Generate dewarped image with row bands drawn on it.""" + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail=f"Session {session_id} not found") + + row_result = session.get("row_result") + if not row_result or not row_result.get("rows"): + raise HTTPException(status_code=404, detail="No row data available") + + # Load dewarped image + dewarped_png = await get_session_image(session_id, "dewarped") + if not dewarped_png: + raise HTTPException(status_code=404, detail="Dewarped image not available") + + arr = np.frombuffer(dewarped_png, dtype=np.uint8) + img = cv2.imdecode(arr, cv2.IMREAD_COLOR) + if img is None: + raise HTTPException(status_code=500, detail="Failed to decode image") + + # Color map for row types (BGR) + row_colors = { + "content": (255, 180, 0), # Blue + "header": (128, 128, 128), # Gray + "footer": (128, 128, 128), # Gray + } + + overlay = img.copy() + for row in row_result["rows"]: + x, y = row["x"], row["y"] + w, h = row["width"], row["height"] + row_type = row.get("row_type", "content") + color = row_colors.get(row_type, (200, 200, 200)) + + # Semi-transparent fill + cv2.rectangle(overlay, (x, y), (x + w, y + h), color, -1) + + # Solid border + cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) + + # Label + idx = row.get("index", 0) + label = f"R{idx} {row_type.upper()}" + wc = row.get("word_count", 0) + if wc: + label = f"{label} ({wc}w)" + cv2.putText(img, label, (x + 5, y + 18), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) + + # Blend overlay at 15% opacity + cv2.addWeighted(overlay, 0.15, img, 0.85, 0, img) + + success, result_png = cv2.imencode(".png", img) + if not success: + raise HTTPException(status_code=500, detail="Failed to encode overlay image") + + return Response(content=result_png.tobytes(), media_type="image/png") diff --git a/klausur-service/backend/ocr_pipeline_session_store.py b/klausur-service/backend/ocr_pipeline_session_store.py index f83583c..9670763 100644 --- a/klausur-service/backend/ocr_pipeline_session_store.py +++ b/klausur-service/backend/ocr_pipeline_session_store.py @@ -79,7 +79,7 @@ async def create_session_db( id, name, filename, original_png, status, current_step ) VALUES ($1, $2, $3, $4, 'active', 1) RETURNING id, name, filename, status, current_step, - deskew_result, dewarp_result, column_result, + deskew_result, dewarp_result, column_result, row_result, ground_truth, auto_shear_degrees, created_at, updated_at """, uuid.UUID(session_id), name, filename, original_png) @@ -93,7 +93,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]: async with pool.acquire() as conn: row = await conn.fetchrow(""" SELECT id, name, filename, status, current_step, - deskew_result, dewarp_result, column_result, + deskew_result, dewarp_result, column_result, row_result, ground_truth, auto_shear_degrees, created_at, updated_at FROM ocr_pipeline_sessions WHERE id = $1 @@ -135,11 +135,11 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any allowed_fields = { 'name', 'filename', 'status', 'current_step', 'original_png', 'deskewed_png', 'binarized_png', 'dewarped_png', - 'deskew_result', 'dewarp_result', 'column_result', + 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'ground_truth', 'auto_shear_degrees', } - jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'ground_truth'} + jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'ground_truth'} for key, value in kwargs.items(): if key in allowed_fields: @@ -163,7 +163,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any SET {', '.join(fields)} WHERE id = ${param_idx} RETURNING id, name, filename, status, current_step, - deskew_result, dewarp_result, column_result, + deskew_result, dewarp_result, column_result, row_result, ground_truth, auto_shear_degrees, created_at, updated_at """, *values) @@ -220,7 +220,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]: result[key] = result[key].isoformat() # JSONB → parsed (asyncpg returns str for JSONB) - for key in ['deskew_result', 'dewarp_result', 'column_result', 'ground_truth']: + for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'ground_truth']: if key in result and result[key] is not None: if isinstance(result[key], str): result[key] = json.loads(result[key])