diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index aa6434d..39cb199 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -2597,10 +2597,10 @@ def _paddle_words_to_grid_cells( ) -> tuple: """Convert PaddleOCR word dicts into GridCell dicts + columns_meta. - 1. Sort words by (top, left). - 2. Cluster into rows by Y-proximity (threshold = 50% of median word height). - 3. Within each row, sort left→right and assign col_index. - 4. Each word → 1 GridCell with word_boxes and bbox_pct. + Groups words into rows (Y-proximity), then builds ONE cell per row + with all words as word_boxes — matching the format of _build_cells() + in cv_words_first.py. This gives OverlayReconstruction a row-spanning + bbox_pct for correct font sizing and per-word positions for placement. Returns (cells, columns_meta) in the same format as build_grid_from_words. """ @@ -2632,69 +2632,82 @@ def _paddle_words_to_grid_cells( if current_row: rows.append(current_row) - # Sort each row left→right and build cells + # Build ONE cell per row (all words in reading order, word_boxes for positioning) cells: List[Dict[str, Any]] = [] - max_col = 0 for row_idx, row_words in enumerate(rows): row_words.sort(key=lambda w: w["left"]) - for col_idx, w in enumerate(row_words): - left = w["left"] - top = w["top"] - width = w["width"] - height = w["height"] - conf = w.get("confidence", 0) - if isinstance(conf, float) and conf <= 1.0: - conf = conf * 100 # normalize to 0-100 - cell = { - "cell_id": f"PD_R{row_idx:02d}_W{col_idx:02d}", - "x": left, - "y": top, - "width": width, - "height": height, - "text": w.get("text", ""), - "confidence": round(conf, 1), - "column_index": col_idx, - "row_index": row_idx, - "zone_index": 0, - "ocr_engine": "paddle_direct", - "word_boxes": [{ - "text": w.get("text", ""), - "left": left, - "top": top, - "width": width, - "height": height, - "confidence": round(conf, 1), - }], - "bbox_pct": { - "x": round(left / img_w * 100, 3), - "y": round(top / img_h * 100, 3), - "w": round(width / img_w * 100, 3), - "h": round(height / img_h * 100, 3), - }, - } - cells.append(cell) - if col_idx > max_col: - max_col = col_idx + # Tight bbox spanning all words in this row + x_min = min(w["left"] for w in row_words) + y_min = min(w["top"] for w in row_words) + x_max = max(w["left"] + w["width"] for w in row_words) + y_max = max(w["top"] + w["height"] for w in row_words) + bw = x_max - x_min + bh = y_max - y_min - # Build columns_meta — one pseudo-column per column index - columns_meta = [] - for ci in range(max_col + 1): - col_cells = [c for c in cells if c["column_index"] == ci] - if col_cells: - min_x = min(c["x"] for c in col_cells) - max_right = max(c["x"] + c["width"] for c in col_cells) - columns_meta.append({ - "type": "column_text", - "x": min_x, - "y": 0, - "width": max_right - min_x, - "height": img_h, - "classification_confidence": 1.0, - "classification_method": "paddle_direct", + # Text: all words joined by space + text = " ".join(w.get("text", "").strip() for w in row_words if w.get("text", "").strip()) + + # Average confidence + confs = [] + for w in row_words: + c = w.get("confidence", 0) + if isinstance(c, float) and c <= 1.0: + c = c * 100 + confs.append(c) + avg_conf = sum(confs) / len(confs) if confs else 0.0 + + # Per-word boxes with absolute pixel coordinates + word_boxes = [] + for w in row_words: + raw_text = w.get("text", "").strip() + if not raw_text: + continue + c = w.get("confidence", 0) + if isinstance(c, float) and c <= 1.0: + c = c * 100 + word_boxes.append({ + "text": raw_text, + "left": w["left"], + "top": w["top"], + "width": w["width"], + "height": w["height"], + "conf": round(c, 1), }) + cell = { + "cell_id": f"PD_R{row_idx:02d}_C0", + "row_index": row_idx, + "col_index": 0, + "col_type": "column_text", + "text": text, + "confidence": round(avg_conf, 1), + "zone_index": 0, + "ocr_engine": "paddle_direct", + "is_bold": False, + "word_boxes": word_boxes, + "bbox_px": {"x": x_min, "y": y_min, "w": bw, "h": bh}, + "bbox_pct": { + "x": round(x_min / img_w * 100, 2) if img_w else 0, + "y": round(y_min / img_h * 100, 2) if img_h else 0, + "w": round(bw / img_w * 100, 2) if img_w else 0, + "h": round(bh / img_h * 100, 2) if img_h else 0, + }, + } + cells.append(cell) + + # Single full-page pseudo-column (all rows belong to column 0) + columns_meta = [{ + "type": "column_text", + "x": 0, + "y": 0, + "width": img_w, + "height": img_h, + "classification_confidence": 1.0, + "classification_method": "paddle_direct", + }] + return cells, columns_meta