feat: add Structure Detection step to OCR pipeline

New pipeline step between Crop and Columns that visualizes detected document structure: boxes (line-based + shading), page zones, and color regions. Shows original image on the left, annotated overlay on the right. Backend: POST /detect-structure endpoint + /image/structure-overlay Frontend: StepStructureDetection component with zone/box/color details Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 12:31:09 +01:00
parent fbbec6cf5e
commit 5b5213c2b9
5 changed files with 633 additions and 23 deletions
--- a/klausur-service/backend/ocr_pipeline_session_store.py
+++ b/klausur-service/backend/ocr_pipeline_session_store.py
@@ -75,7 +75,8 @@ async def init_ocr_pipeline_tables():
            ADD COLUMN IF NOT EXISTS crop_result JSONB,
            ADD COLUMN IF NOT EXISTS parent_session_id UUID REFERENCES ocr_pipeline_sessions(id) ON DELETE CASCADE,
            ADD COLUMN IF NOT EXISTS box_index INT,
-            ADD COLUMN IF NOT EXISTS grid_editor_result JSONB
+            ADD COLUMN IF NOT EXISTS grid_editor_result JSONB,
+            ADD COLUMN IF NOT EXISTS structure_result JSONB
        """)


@@ -111,7 +112,7 @@ async def create_session_db(
                      word_result, ground_truth, auto_shear_degrees,
                      doc_type, doc_type_result,
                      document_category, pipeline_log,
-                      grid_editor_result,
+                      grid_editor_result, structure_result,
                      parent_session_id, box_index,
                      created_at, updated_at
        """, uuid.UUID(session_id), name, filename, original_png,
@@ -131,7 +132,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
                   word_result, ground_truth, auto_shear_degrees,
                   doc_type, doc_type_result,
                   document_category, pipeline_log,
-                   grid_editor_result,
+                   grid_editor_result, structure_result,
                   parent_session_id, box_index,
                   created_at, updated_at
            FROM ocr_pipeline_sessions WHERE id = $1
@@ -183,11 +184,11 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
        'word_result', 'ground_truth', 'auto_shear_degrees',
        'doc_type', 'doc_type_result',
        'document_category', 'pipeline_log',
-        'grid_editor_result',
+        'grid_editor_result', 'structure_result',
        'parent_session_id', 'box_index',
    }

-    jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log', 'grid_editor_result'}
+    jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log', 'grid_editor_result', 'structure_result'}

    for key, value in kwargs.items():
        if key in allowed_fields:
@@ -313,7 +314,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
            result[key] = result[key].isoformat()

    # JSONB → parsed (asyncpg returns str for JSONB)
-    for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log', 'grid_editor_result']:
+    for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log', 'grid_editor_result', 'structure_result']:
        if key in result and result[key] is not None:
            if isinstance(result[key], str):
                result[key] = json.loads(result[key])