""" Grid Detection Models v4 Data classes for OCR grid detection results. Coordinates use percentage (0-100) and mm (A4 format). """ from enum import Enum from dataclasses import dataclass, field from typing import List, Dict, Any # A4 dimensions A4_WIDTH_MM = 210.0 A4_HEIGHT_MM = 297.0 # Column margin (1mm) COLUMN_MARGIN_MM = 1.0 COLUMN_MARGIN_PCT = (COLUMN_MARGIN_MM / A4_WIDTH_MM) * 100 class CellStatus(str, Enum): EMPTY = "empty" RECOGNIZED = "recognized" PROBLEMATIC = "problematic" MANUAL = "manual" class ColumnType(str, Enum): ENGLISH = "english" GERMAN = "german" EXAMPLE = "example" UNKNOWN = "unknown" @dataclass class OCRRegion: """A word/phrase detected by OCR with bounding box coordinates in percentage (0-100).""" text: str confidence: float x: float # X position as percentage of page width y: float # Y position as percentage of page height width: float # Width as percentage of page width height: float # Height as percentage of page height @property def x_mm(self) -> float: return round(self.x / 100 * A4_WIDTH_MM, 1) @property def y_mm(self) -> float: return round(self.y / 100 * A4_HEIGHT_MM, 1) @property def width_mm(self) -> float: return round(self.width / 100 * A4_WIDTH_MM, 1) @property def height_mm(self) -> float: return round(self.height / 100 * A4_HEIGHT_MM, 2) @property def center_x(self) -> float: return self.x + self.width / 2 @property def center_y(self) -> float: return self.y + self.height / 2 @property def right(self) -> float: return self.x + self.width @property def bottom(self) -> float: return self.y + self.height @dataclass class GridCell: """A cell in the detected grid with coordinates in percentage (0-100).""" row: int col: int x: float y: float width: float height: float text: str = "" confidence: float = 0.0 status: CellStatus = CellStatus.EMPTY column_type: ColumnType = ColumnType.UNKNOWN logical_row: int = 0 logical_col: int = 0 is_continuation: bool = False @property def x_mm(self) -> float: return round(self.x / 100 * A4_WIDTH_MM, 1) @property def y_mm(self) -> float: return round(self.y / 100 * A4_HEIGHT_MM, 1) @property def width_mm(self) -> float: return round(self.width / 100 * A4_WIDTH_MM, 1) @property def height_mm(self) -> float: return round(self.height / 100 * A4_HEIGHT_MM, 2) def to_dict(self) -> dict: return { "row": self.row, "col": self.col, "x": round(self.x, 2), "y": round(self.y, 2), "width": round(self.width, 2), "height": round(self.height, 2), "x_mm": self.x_mm, "y_mm": self.y_mm, "width_mm": self.width_mm, "height_mm": self.height_mm, "text": self.text, "confidence": self.confidence, "status": self.status.value, "column_type": self.column_type.value, "logical_row": self.logical_row, "logical_col": self.logical_col, "is_continuation": self.is_continuation, } @dataclass class GridResult: """Result of grid detection.""" rows: int = 0 columns: int = 0 cells: List[List[GridCell]] = field(default_factory=list) column_types: List[str] = field(default_factory=list) column_boundaries: List[float] = field(default_factory=list) row_boundaries: List[float] = field(default_factory=list) deskew_angle: float = 0.0 stats: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> dict: cells_dicts = [] for row_cells in self.cells: cells_dicts.append([c.to_dict() for c in row_cells]) return { "rows": self.rows, "columns": self.columns, "cells": cells_dicts, "column_types": self.column_types, "column_boundaries": [round(b, 2) for b in self.column_boundaries], "row_boundaries": [round(b, 2) for b in self.row_boundaries], "deskew_angle": round(self.deskew_angle, 2), "stats": self.stats, "page_dimensions": { "width_mm": A4_WIDTH_MM, "height_mm": A4_HEIGHT_MM, "format": "A4", }, }