fix: paddle_direct reuses build_grid_from_words for correct overlay
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 37s
CI / test-go-edu-search (push) Successful in 35s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 23s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 37s
CI / test-go-edu-search (push) Successful in 35s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 23s
Replaces custom _paddle_words_to_grid_cells with the proven build_grid_from_words from cv_words_first.py — same function the regular pipeline uses with PaddleOCR. Handles phrase splitting, column clustering, and produces cells with word_boxes that the slide/cluster positioning hooks expect. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2541,11 +2541,20 @@ async def paddle_direct(session_id: str):
|
|||||||
if not word_dicts:
|
if not word_dicts:
|
||||||
raise HTTPException(status_code=400, detail="PaddleOCR returned no words")
|
raise HTTPException(status_code=400, detail="PaddleOCR returned no words")
|
||||||
|
|
||||||
cells, columns_meta = _paddle_words_to_grid_cells(word_dicts, img_w, img_h)
|
# Reuse build_grid_from_words — same function that works in the regular
|
||||||
|
# pipeline with PaddleOCR (engine=paddle, grid_method=words_first).
|
||||||
|
# Handles phrase splitting, column clustering, and reading order.
|
||||||
|
cells, columns_meta = build_grid_from_words(word_dicts, img_w, img_h)
|
||||||
duration = time.time() - t0
|
duration = time.time() - t0
|
||||||
|
|
||||||
|
# Tag cells as paddle_direct
|
||||||
|
for cell in cells:
|
||||||
|
cell["ocr_engine"] = "paddle_direct"
|
||||||
|
|
||||||
n_rows = len(set(c["row_index"] for c in cells)) if cells else 0
|
n_rows = len(set(c["row_index"] for c in cells)) if cells else 0
|
||||||
n_cols = len(columns_meta)
|
n_cols = len(columns_meta)
|
||||||
|
col_types = {c.get("type") for c in columns_meta}
|
||||||
|
is_vocab = bool(col_types & {"column_en", "column_de"})
|
||||||
|
|
||||||
word_result = {
|
word_result = {
|
||||||
"cells": cells,
|
"cells": cells,
|
||||||
@@ -2555,7 +2564,7 @@ async def paddle_direct(session_id: str):
|
|||||||
"total_cells": len(cells),
|
"total_cells": len(cells),
|
||||||
},
|
},
|
||||||
"columns_used": columns_meta,
|
"columns_used": columns_meta,
|
||||||
"layout": "generic",
|
"layout": "vocab" if is_vocab else "generic",
|
||||||
"image_width": img_w,
|
"image_width": img_w,
|
||||||
"image_height": img_h,
|
"image_height": img_h,
|
||||||
"duration_seconds": round(duration, 2),
|
"duration_seconds": round(duration, 2),
|
||||||
@@ -2590,127 +2599,6 @@ async def paddle_direct(session_id: str):
|
|||||||
return {"session_id": session_id, **word_result}
|
return {"session_id": session_id, **word_result}
|
||||||
|
|
||||||
|
|
||||||
def _paddle_words_to_grid_cells(
|
|
||||||
word_dicts: List[Dict[str, Any]],
|
|
||||||
img_w: int,
|
|
||||||
img_h: int,
|
|
||||||
) -> tuple:
|
|
||||||
"""Convert PaddleOCR word dicts into GridCell dicts + columns_meta.
|
|
||||||
|
|
||||||
Groups words into rows (Y-proximity), then builds ONE cell per row
|
|
||||||
with all words as word_boxes — matching the format of _build_cells()
|
|
||||||
in cv_words_first.py. This gives OverlayReconstruction a row-spanning
|
|
||||||
bbox_pct for correct font sizing and per-word positions for placement.
|
|
||||||
|
|
||||||
Returns (cells, columns_meta) in the same format as build_grid_from_words.
|
|
||||||
"""
|
|
||||||
if not word_dicts:
|
|
||||||
return [], []
|
|
||||||
|
|
||||||
# Sort by top then left
|
|
||||||
sorted_words = sorted(word_dicts, key=lambda w: (w["top"], w["left"]))
|
|
||||||
|
|
||||||
# Compute median word height for row clustering threshold
|
|
||||||
heights = [w["height"] for w in sorted_words if w.get("height", 0) > 0]
|
|
||||||
median_h = sorted(heights)[len(heights) // 2] if heights else 30
|
|
||||||
row_threshold = max(median_h * 0.5, 8)
|
|
||||||
|
|
||||||
# Cluster into rows
|
|
||||||
rows: List[List[Dict]] = []
|
|
||||||
current_row: List[Dict] = []
|
|
||||||
current_y = -9999.0
|
|
||||||
|
|
||||||
for w in sorted_words:
|
|
||||||
center_y = w["top"] + w["height"] / 2
|
|
||||||
if current_row and abs(center_y - current_y) > row_threshold:
|
|
||||||
rows.append(current_row)
|
|
||||||
current_row = []
|
|
||||||
current_row.append(w)
|
|
||||||
# Running average Y center for the row
|
|
||||||
current_y = sum(ww["top"] + ww["height"] / 2 for ww in current_row) / len(current_row)
|
|
||||||
|
|
||||||
if current_row:
|
|
||||||
rows.append(current_row)
|
|
||||||
|
|
||||||
# Build ONE cell per row (all words in reading order, word_boxes for positioning)
|
|
||||||
cells: List[Dict[str, Any]] = []
|
|
||||||
|
|
||||||
for row_idx, row_words in enumerate(rows):
|
|
||||||
row_words.sort(key=lambda w: w["left"])
|
|
||||||
|
|
||||||
# Tight bbox spanning all words in this row
|
|
||||||
x_min = min(w["left"] for w in row_words)
|
|
||||||
y_min = min(w["top"] for w in row_words)
|
|
||||||
x_max = max(w["left"] + w["width"] for w in row_words)
|
|
||||||
y_max = max(w["top"] + w["height"] for w in row_words)
|
|
||||||
bw = x_max - x_min
|
|
||||||
bh = y_max - y_min
|
|
||||||
|
|
||||||
# Text: all words joined by space
|
|
||||||
text = " ".join(w.get("text", "").strip() for w in row_words if w.get("text", "").strip())
|
|
||||||
|
|
||||||
# Average confidence
|
|
||||||
confs = []
|
|
||||||
for w in row_words:
|
|
||||||
c = w.get("confidence", 0)
|
|
||||||
if isinstance(c, float) and c <= 1.0:
|
|
||||||
c = c * 100
|
|
||||||
confs.append(c)
|
|
||||||
avg_conf = sum(confs) / len(confs) if confs else 0.0
|
|
||||||
|
|
||||||
# Per-word boxes with absolute pixel coordinates
|
|
||||||
word_boxes = []
|
|
||||||
for w in row_words:
|
|
||||||
raw_text = w.get("text", "").strip()
|
|
||||||
if not raw_text:
|
|
||||||
continue
|
|
||||||
c = w.get("confidence", 0)
|
|
||||||
if isinstance(c, float) and c <= 1.0:
|
|
||||||
c = c * 100
|
|
||||||
word_boxes.append({
|
|
||||||
"text": raw_text,
|
|
||||||
"left": w["left"],
|
|
||||||
"top": w["top"],
|
|
||||||
"width": w["width"],
|
|
||||||
"height": w["height"],
|
|
||||||
"conf": round(c, 1),
|
|
||||||
})
|
|
||||||
|
|
||||||
cell = {
|
|
||||||
"cell_id": f"PD_R{row_idx:02d}_C0",
|
|
||||||
"row_index": row_idx,
|
|
||||||
"col_index": 0,
|
|
||||||
"col_type": "column_text",
|
|
||||||
"text": text,
|
|
||||||
"confidence": round(avg_conf, 1),
|
|
||||||
"zone_index": 0,
|
|
||||||
"ocr_engine": "paddle_direct",
|
|
||||||
"is_bold": False,
|
|
||||||
"word_boxes": word_boxes,
|
|
||||||
"bbox_px": {"x": x_min, "y": y_min, "w": bw, "h": bh},
|
|
||||||
"bbox_pct": {
|
|
||||||
"x": round(x_min / img_w * 100, 2) if img_w else 0,
|
|
||||||
"y": round(y_min / img_h * 100, 2) if img_h else 0,
|
|
||||||
"w": round(bw / img_w * 100, 2) if img_w else 0,
|
|
||||||
"h": round(bh / img_h * 100, 2) if img_h else 0,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
cells.append(cell)
|
|
||||||
|
|
||||||
# Single full-page pseudo-column (all rows belong to column 0)
|
|
||||||
columns_meta = [{
|
|
||||||
"type": "column_text",
|
|
||||||
"x": 0,
|
|
||||||
"y": 0,
|
|
||||||
"width": img_w,
|
|
||||||
"height": img_h,
|
|
||||||
"classification_confidence": 1.0,
|
|
||||||
"classification_method": "paddle_direct",
|
|
||||||
}]
|
|
||||||
|
|
||||||
return cells, columns_meta
|
|
||||||
|
|
||||||
|
|
||||||
class WordGroundTruthRequest(BaseModel):
|
class WordGroundTruthRequest(BaseModel):
|
||||||
is_correct: bool
|
is_correct: bool
|
||||||
corrected_entries: Optional[List[Dict[str, Any]]] = None
|
corrected_entries: Optional[List[Dict[str, Any]]] = None
|
||||||
|
|||||||
Reference in New Issue
Block a user