fix: paddle_direct groups words per row (matching _build_cells format)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 34s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m11s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 24s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 34s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m11s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 24s
One cell per row with all words as word_boxes instead of one cell per word. Gives OverlayReconstruction a row-spanning bbox_pct for correct font sizing and per-word positions for slide/cluster placement. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2597,10 +2597,10 @@ def _paddle_words_to_grid_cells(
|
|||||||
) -> tuple:
|
) -> tuple:
|
||||||
"""Convert PaddleOCR word dicts into GridCell dicts + columns_meta.
|
"""Convert PaddleOCR word dicts into GridCell dicts + columns_meta.
|
||||||
|
|
||||||
1. Sort words by (top, left).
|
Groups words into rows (Y-proximity), then builds ONE cell per row
|
||||||
2. Cluster into rows by Y-proximity (threshold = 50% of median word height).
|
with all words as word_boxes — matching the format of _build_cells()
|
||||||
3. Within each row, sort left→right and assign col_index.
|
in cv_words_first.py. This gives OverlayReconstruction a row-spanning
|
||||||
4. Each word → 1 GridCell with word_boxes and bbox_pct.
|
bbox_pct for correct font sizing and per-word positions for placement.
|
||||||
|
|
||||||
Returns (cells, columns_meta) in the same format as build_grid_from_words.
|
Returns (cells, columns_meta) in the same format as build_grid_from_words.
|
||||||
"""
|
"""
|
||||||
@@ -2632,69 +2632,82 @@ def _paddle_words_to_grid_cells(
|
|||||||
if current_row:
|
if current_row:
|
||||||
rows.append(current_row)
|
rows.append(current_row)
|
||||||
|
|
||||||
# Sort each row left→right and build cells
|
# Build ONE cell per row (all words in reading order, word_boxes for positioning)
|
||||||
cells: List[Dict[str, Any]] = []
|
cells: List[Dict[str, Any]] = []
|
||||||
max_col = 0
|
|
||||||
|
|
||||||
for row_idx, row_words in enumerate(rows):
|
for row_idx, row_words in enumerate(rows):
|
||||||
row_words.sort(key=lambda w: w["left"])
|
row_words.sort(key=lambda w: w["left"])
|
||||||
for col_idx, w in enumerate(row_words):
|
|
||||||
left = w["left"]
|
|
||||||
top = w["top"]
|
|
||||||
width = w["width"]
|
|
||||||
height = w["height"]
|
|
||||||
conf = w.get("confidence", 0)
|
|
||||||
if isinstance(conf, float) and conf <= 1.0:
|
|
||||||
conf = conf * 100 # normalize to 0-100
|
|
||||||
|
|
||||||
cell = {
|
# Tight bbox spanning all words in this row
|
||||||
"cell_id": f"PD_R{row_idx:02d}_W{col_idx:02d}",
|
x_min = min(w["left"] for w in row_words)
|
||||||
"x": left,
|
y_min = min(w["top"] for w in row_words)
|
||||||
"y": top,
|
x_max = max(w["left"] + w["width"] for w in row_words)
|
||||||
"width": width,
|
y_max = max(w["top"] + w["height"] for w in row_words)
|
||||||
"height": height,
|
bw = x_max - x_min
|
||||||
"text": w.get("text", ""),
|
bh = y_max - y_min
|
||||||
"confidence": round(conf, 1),
|
|
||||||
"column_index": col_idx,
|
|
||||||
"row_index": row_idx,
|
|
||||||
"zone_index": 0,
|
|
||||||
"ocr_engine": "paddle_direct",
|
|
||||||
"word_boxes": [{
|
|
||||||
"text": w.get("text", ""),
|
|
||||||
"left": left,
|
|
||||||
"top": top,
|
|
||||||
"width": width,
|
|
||||||
"height": height,
|
|
||||||
"confidence": round(conf, 1),
|
|
||||||
}],
|
|
||||||
"bbox_pct": {
|
|
||||||
"x": round(left / img_w * 100, 3),
|
|
||||||
"y": round(top / img_h * 100, 3),
|
|
||||||
"w": round(width / img_w * 100, 3),
|
|
||||||
"h": round(height / img_h * 100, 3),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
cells.append(cell)
|
|
||||||
if col_idx > max_col:
|
|
||||||
max_col = col_idx
|
|
||||||
|
|
||||||
# Build columns_meta — one pseudo-column per column index
|
# Text: all words joined by space
|
||||||
columns_meta = []
|
text = " ".join(w.get("text", "").strip() for w in row_words if w.get("text", "").strip())
|
||||||
for ci in range(max_col + 1):
|
|
||||||
col_cells = [c for c in cells if c["column_index"] == ci]
|
# Average confidence
|
||||||
if col_cells:
|
confs = []
|
||||||
min_x = min(c["x"] for c in col_cells)
|
for w in row_words:
|
||||||
max_right = max(c["x"] + c["width"] for c in col_cells)
|
c = w.get("confidence", 0)
|
||||||
columns_meta.append({
|
if isinstance(c, float) and c <= 1.0:
|
||||||
"type": "column_text",
|
c = c * 100
|
||||||
"x": min_x,
|
confs.append(c)
|
||||||
"y": 0,
|
avg_conf = sum(confs) / len(confs) if confs else 0.0
|
||||||
"width": max_right - min_x,
|
|
||||||
"height": img_h,
|
# Per-word boxes with absolute pixel coordinates
|
||||||
"classification_confidence": 1.0,
|
word_boxes = []
|
||||||
"classification_method": "paddle_direct",
|
for w in row_words:
|
||||||
|
raw_text = w.get("text", "").strip()
|
||||||
|
if not raw_text:
|
||||||
|
continue
|
||||||
|
c = w.get("confidence", 0)
|
||||||
|
if isinstance(c, float) and c <= 1.0:
|
||||||
|
c = c * 100
|
||||||
|
word_boxes.append({
|
||||||
|
"text": raw_text,
|
||||||
|
"left": w["left"],
|
||||||
|
"top": w["top"],
|
||||||
|
"width": w["width"],
|
||||||
|
"height": w["height"],
|
||||||
|
"conf": round(c, 1),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
cell = {
|
||||||
|
"cell_id": f"PD_R{row_idx:02d}_C0",
|
||||||
|
"row_index": row_idx,
|
||||||
|
"col_index": 0,
|
||||||
|
"col_type": "column_text",
|
||||||
|
"text": text,
|
||||||
|
"confidence": round(avg_conf, 1),
|
||||||
|
"zone_index": 0,
|
||||||
|
"ocr_engine": "paddle_direct",
|
||||||
|
"is_bold": False,
|
||||||
|
"word_boxes": word_boxes,
|
||||||
|
"bbox_px": {"x": x_min, "y": y_min, "w": bw, "h": bh},
|
||||||
|
"bbox_pct": {
|
||||||
|
"x": round(x_min / img_w * 100, 2) if img_w else 0,
|
||||||
|
"y": round(y_min / img_h * 100, 2) if img_h else 0,
|
||||||
|
"w": round(bw / img_w * 100, 2) if img_w else 0,
|
||||||
|
"h": round(bh / img_h * 100, 2) if img_h else 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cells.append(cell)
|
||||||
|
|
||||||
|
# Single full-page pseudo-column (all rows belong to column 0)
|
||||||
|
columns_meta = [{
|
||||||
|
"type": "column_text",
|
||||||
|
"x": 0,
|
||||||
|
"y": 0,
|
||||||
|
"width": img_w,
|
||||||
|
"height": img_h,
|
||||||
|
"classification_confidence": 1.0,
|
||||||
|
"classification_method": "paddle_direct",
|
||||||
|
}]
|
||||||
|
|
||||||
return cells, columns_meta
|
return cells, columns_meta
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user