""" Grid Editor API — box-grid-review endpoints. """ import logging from fastapi import APIRouter, HTTPException, Request from grid_editor_helpers import _words_in_zone from ocr_pipeline_session_store import ( get_session_db, update_session_db, ) logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["grid-editor"]) @router.post("/sessions/{session_id}/build-box-grids") async def build_box_grids(session_id: str, request: Request): """Rebuild grid structure for all detected boxes with layout-aware detection. Uses structure_result.boxes (from Step 7) as the source of box coordinates, and raw_paddle_words as OCR word source. Creates or updates box zones in the grid_editor_result. Optional body: { "overrides": { "0": "bullet_list" } } Maps box_index -> forced layout_type. """ session = await get_session_db(session_id) if not session: raise HTTPException(status_code=404, detail=f"Session {session_id} not found") grid_data = session.get("grid_editor_result") if not grid_data: raise HTTPException(status_code=400, detail="No grid data. Run build-grid first.") # Get raw OCR words (with top/left/width/height keys) word_result = session.get("word_result") or {} all_words = word_result.get("raw_paddle_words") or word_result.get("raw_tesseract_words") or [] if not all_words: raise HTTPException(status_code=400, detail="No raw OCR words available.") # Get detected boxes from structure_result structure_result = session.get("structure_result") or {} gt = session.get("ground_truth") or {} if not structure_result: structure_result = gt.get("structure_result") or {} detected_boxes = structure_result.get("boxes") or [] if not detected_boxes: return {"session_id": session_id, "box_zones_rebuilt": 0, "spell_fixes": 0, "message": "No boxes detected"} # Filter out false-positive boxes in header/footer margins. img_h_for_filter = grid_data.get("image_height", 0) or word_result.get("image_height", 0) if img_h_for_filter > 0: margin_frac = 0.07 # 7% of image height margin_top = img_h_for_filter * margin_frac margin_bottom = img_h_for_filter * (1 - margin_frac) filtered = [] for box in detected_boxes: by = box.get("y", 0) bh = box.get("h", 0) box_center_y = by + bh / 2 if box_center_y < margin_top or box_center_y > margin_bottom: logger.info("build-box-grids: skipping header/footer box at y=%d h=%d (center=%.0f, margins=%.0f/%.0f)", by, bh, box_center_y, margin_top, margin_bottom) continue filtered.append(box) detected_boxes = filtered body = {} try: body = await request.json() except Exception: pass layout_overrides = body.get("overrides", {}) from cv_box_layout import build_box_zone_grid img_w = grid_data.get("image_width", 0) or word_result.get("image_width", 0) img_h = grid_data.get("image_height", 0) or word_result.get("image_height", 0) zones = grid_data.get("zones", []) # Find highest existing zone_index max_zone_idx = max((z.get("zone_index", 0) for z in zones), default=-1) # Remove old box zones (we'll rebuild them) zones = [z for z in zones if z.get("zone_type") != "box"] box_count = 0 spell_fixes = 0 for box_idx, box in enumerate(detected_boxes): bx = box.get("x", 0) by = box.get("y", 0) bw = box.get("w", 0) bh = box.get("h", 0) if bw <= 0 or bh <= 0: continue # Filter raw OCR words inside this box zone_words = _words_in_zone(all_words, by, bh, bx, bw) if not zone_words: logger.info("Box %d: no words found in bbox (%d,%d,%d,%d)", box_idx, bx, by, bw, bh) continue zone_idx = max_zone_idx + 1 + box_idx forced_layout = layout_overrides.get(str(box_idx)) # Build box grid box_grid = build_box_zone_grid( zone_words, bx, by, bw, bh, zone_idx, img_w, img_h, layout_type=forced_layout, ) # Apply SmartSpellChecker to all box cells try: from smart_spell import SmartSpellChecker ssc = SmartSpellChecker() for cell in box_grid.get("cells", []): text = cell.get("text", "") if not text: continue result = ssc.correct_text(text, lang="auto") if result.changed: cell["text"] = result.corrected spell_fixes += 1 except ImportError: pass # Build zone entry zone_entry = { "zone_index": zone_idx, "zone_type": "box", "bbox_px": {"x": bx, "y": by, "w": bw, "h": bh}, "bbox_pct": { "x": round(bx / img_w * 100, 2) if img_w else 0, "y": round(by / img_h * 100, 2) if img_h else 0, "w": round(bw / img_w * 100, 2) if img_w else 0, "h": round(bh / img_h * 100, 2) if img_h else 0, }, "border": None, "word_count": len(zone_words), "columns": box_grid["columns"], "rows": box_grid["rows"], "cells": box_grid["cells"], "header_rows": box_grid.get("header_rows", []), "box_layout_type": box_grid.get("box_layout_type", "flowing"), "box_grid_reviewed": False, "box_bg_color": box.get("bg_color_name", ""), "box_bg_hex": box.get("bg_color_hex", ""), } zones.append(zone_entry) box_count += 1 # Sort zones by y-position for correct reading order zones.sort(key=lambda z: z.get("bbox_px", {}).get("y", 0)) grid_data["zones"] = zones await update_session_db(session_id, grid_editor_result=grid_data) logger.info( "build-box-grids session %s: %d boxes processed (%d words spell-fixed) from %d detected", session_id, box_count, spell_fixes, len(detected_boxes), ) return { "session_id": session_id, "box_zones_rebuilt": box_count, "total_detected_boxes": len(detected_boxes), "spell_fixes": spell_fixes, "zones": zones, }