Fix build-box-grids: use structure_result boxes + raw OCR words
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 48s
CI / test-go-edu-search (push) Successful in 44s
CI / test-python-klausur (push) Failing after 2m47s
CI / test-python-agent-core (push) Successful in 33s
CI / test-nodejs-website (push) Successful in 36s

- Source boxes from structure_result (Step 7) instead of grid zones
- Use raw_paddle_words (top/left/width/height) instead of grid cells
- Create new box zones from all detected boxes (not just existing zones)
- Sort zones by y-position for correct reading order
- Include box background color metadata

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-12 21:50:28 +02:00
parent 5da9a550bf
commit 058eadb0e4

View File

@@ -2189,16 +2189,14 @@ async def gutter_repair_apply(session_id: str, request: Request):
@router.post("/sessions/{session_id}/build-box-grids") @router.post("/sessions/{session_id}/build-box-grids")
async def build_box_grids(session_id: str, request: Request): async def build_box_grids(session_id: str, request: Request):
"""Rebuild grid structure for all box zones with layout-aware detection. """Rebuild grid structure for all detected boxes with layout-aware detection.
For each zone with zone_type='box': Uses structure_result.boxes (from Step 7) as the source of box coordinates,
1. Auto-detect layout type (flowing / columnar / bullet_list / header_only) and raw_paddle_words as OCR word source. Creates or updates box zones in
2. Build grid with layout-appropriate parameters the grid_editor_result.
3. Apply SmartSpellChecker corrections
4. Store results back in grid_editor_result.zones[]
Optional body: { "overrides": { "2": "bullet_list" } } Optional body: { "overrides": { "0": "bullet_list" } }
Maps zone_index → forced layout_type. Maps box_index → forced layout_type.
""" """
session = await get_session_db(session_id) session = await get_session_db(session_id)
if not session: if not session:
@@ -2208,8 +2206,20 @@ async def build_box_grids(session_id: str, request: Request):
if not grid_data: if not grid_data:
raise HTTPException(status_code=400, detail="No grid data. Run build-grid first.") raise HTTPException(status_code=400, detail="No grid data. Run build-grid first.")
# Get raw OCR words (with top/left/width/height keys)
word_result = session.get("word_result") or {} word_result = session.get("word_result") or {}
all_words = word_result.get("cells") or word_result.get("words") or [] all_words = word_result.get("raw_paddle_words") or word_result.get("raw_tesseract_words") or []
if not all_words:
raise HTTPException(status_code=400, detail="No raw OCR words available.")
# Get detected boxes from structure_result
structure_result = session.get("structure_result") or {}
gt = session.get("ground_truth") or {}
if not structure_result:
structure_result = gt.get("structure_result") or {}
detected_boxes = structure_result.get("boxes") or []
if not detected_boxes:
return {"session_id": session_id, "box_zones_rebuilt": 0, "spell_fixes": 0, "message": "No boxes detected"}
body = {} body = {}
try: try:
@@ -2218,37 +2228,40 @@ async def build_box_grids(session_id: str, request: Request):
pass pass
layout_overrides = body.get("overrides", {}) layout_overrides = body.get("overrides", {})
from cv_box_layout import classify_box_layout, build_box_zone_grid, _group_into_lines from cv_box_layout import build_box_zone_grid
from grid_editor_helpers import _words_in_zone from grid_editor_helpers import _words_in_zone
img_w = grid_data.get("image_width", 0) img_w = grid_data.get("image_width", 0) or word_result.get("image_width", 0)
img_h = grid_data.get("image_height", 0) img_h = grid_data.get("image_height", 0) or word_result.get("image_height", 0)
zones = grid_data.get("zones", []) zones = grid_data.get("zones", [])
# Find highest existing zone_index
max_zone_idx = max((z.get("zone_index", 0) for z in zones), default=-1)
# Remove old box zones (we'll rebuild them)
zones = [z for z in zones if z.get("zone_type") != "box"]
box_count = 0 box_count = 0
spell_fixes = 0 spell_fixes = 0
for z in zones: for box_idx, box in enumerate(detected_boxes):
if z.get("zone_type") != "box": bx = box.get("x", 0)
continue by = box.get("y", 0)
bw = box.get("w", 0)
bbox = z.get("bbox_px", {}) bh = box.get("h", 0)
bx, by = bbox.get("x", 0), bbox.get("y", 0)
bw, bh = bbox.get("w", 0), bbox.get("h", 0)
if bw <= 0 or bh <= 0: if bw <= 0 or bh <= 0:
continue continue
zone_idx = z.get("zone_index", 0) # Filter raw OCR words inside this box
# Filter words inside this box
zone_words = _words_in_zone(all_words, by, bh, bx, bw) zone_words = _words_in_zone(all_words, by, bh, bx, bw)
if not zone_words: if not zone_words:
logger.info("Box zone %d: no words found in bbox", zone_idx) logger.info("Box %d: no words found in bbox (%d,%d,%d,%d)", box_idx, bx, by, bw, bh)
continue continue
# Get layout override or auto-detect zone_idx = max_zone_idx + 1 + box_idx
forced_layout = layout_overrides.get(str(zone_idx)) forced_layout = layout_overrides.get(str(box_idx))
# Build box grid # Build box grid
box_grid = build_box_zone_grid( box_grid = build_box_zone_grid(
@@ -2272,26 +2285,46 @@ async def build_box_grids(session_id: str, request: Request):
except ImportError: except ImportError:
pass pass
# Update zone data with new grid # Build zone entry
z["columns"] = box_grid["columns"] zone_entry = {
z["rows"] = box_grid["rows"] "zone_index": zone_idx,
z["cells"] = box_grid["cells"] "zone_type": "box",
z["header_rows"] = box_grid.get("header_rows", []) "bbox_px": {"x": bx, "y": by, "w": bw, "h": bh},
z["box_layout_type"] = box_grid.get("box_layout_type", "flowing") "bbox_pct": {
z["box_grid_reviewed"] = False "x": round(bx / img_w * 100, 2) if img_w else 0,
"y": round(by / img_h * 100, 2) if img_h else 0,
"w": round(bw / img_w * 100, 2) if img_w else 0,
"h": round(bh / img_h * 100, 2) if img_h else 0,
},
"border": None,
"word_count": len(zone_words),
"columns": box_grid["columns"],
"rows": box_grid["rows"],
"cells": box_grid["cells"],
"header_rows": box_grid.get("header_rows", []),
"box_layout_type": box_grid.get("box_layout_type", "flowing"),
"box_grid_reviewed": False,
"box_bg_color": box.get("bg_color_name", ""),
"box_bg_hex": box.get("bg_color_hex", ""),
}
zones.append(zone_entry)
box_count += 1 box_count += 1
# Save updated grid back # Sort zones by y-position for correct reading order
zones.sort(key=lambda z: z.get("bbox_px", {}).get("y", 0))
grid_data["zones"] = zones
await update_session_db(session_id, grid_editor_result=grid_data) await update_session_db(session_id, grid_editor_result=grid_data)
logger.info( logger.info(
"build-box-grids session %s: %d box zones rebuilt, %d spell fixes", "build-box-grids session %s: %d boxes processed (%d words spell-fixed) from %d detected",
session_id, box_count, spell_fixes, session_id, box_count, spell_fixes, len(detected_boxes),
) )
return { return {
"session_id": session_id, "session_id": session_id,
"box_zones_rebuilt": box_count, "box_zones_rebuilt": box_count,
"total_detected_boxes": len(detected_boxes),
"spell_fixes": spell_fixes, "spell_fixes": spell_fixes,
"zones": zones, "zones": zones,
} }