Add Ground Truth regression test system for OCR pipeline

Extract _build_grid_core() from build_grid() endpoint for reuse. New ocr_pipeline_regression.py with endpoints to mark sessions as ground truth, list them, and run regression comparisons after code changes. Frontend button in StepGroundTruth.tsx to mark/update GT. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 13:46:48 +01:00
parent c894a0feeb
commit f655db30e4
5 changed files with 482 additions and 22 deletions
--- a/klausur-service/backend/grid_editor_api.py
+++ b/klausur-service/backend/grid_editor_api.py
@@ -745,42 +745,38 @@ def _filter_footer_words(


 # ---------------------------------------------------------------------------
-# Endpoints
+# Core computation (used by build-grid endpoint and regression tests)
 # ---------------------------------------------------------------------------

-@router.post("/sessions/{session_id}/build-grid")
-async def build_grid(session_id: str):
-    """Build a structured, zone-aware grid from existing Kombi word results.
+async def _build_grid_core(session_id: str, session: dict) -> dict:
+    """Core grid building logic — pure computation, no HTTP or DB side effects.

-    Requires that paddle-kombi or rapid-kombi has already been run on the session.
-    Uses the image for box detection and the word positions for grid structuring.
+    Args:
+        session_id: Session identifier (for logging and image loading).
+        session: Full session dict from get_session_db().

-    Returns a StructuredGrid with zones, each containing their own
-    columns, rows, and cells — ready for the frontend Excel-like editor.
+    Returns:
+        StructuredGrid result dict.
+
+    Raises:
+        ValueError: If session data is incomplete.
    """
    t0 = time.time()

-    # 1. Load session and word results
-    session = await get_session_db(session_id)
-    if not session:
-        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
+    # 1. Validate and load word results
    word_result = session.get("word_result")
    if not word_result or not word_result.get("cells"):
-        raise HTTPException(
-            status_code=400,
-            detail="No word results found. Run paddle-kombi or rapid-kombi first.",
-        )
+        raise ValueError("No word results found. Run paddle-kombi or rapid-kombi first.")

    img_w = word_result.get("image_width", 0)
    img_h = word_result.get("image_height", 0)
    if not img_w or not img_h:
-        raise HTTPException(status_code=400, detail="Missing image dimensions in word_result")
+        raise ValueError("Missing image dimensions in word_result")

    # 2. Flatten all word boxes from cells
    all_words = _flatten_word_boxes(word_result["cells"])
    if not all_words:
-        raise HTTPException(status_code=400, detail="No word boxes found in cells")
+        raise ValueError("No word boxes found in cells")

    logger.info("build-grid session %s: %d words from %d cells",
                session_id, len(all_words), len(word_result["cells"]))
@@ -1313,14 +1309,45 @@ async def build_grid(session_id: str):
        "duration_seconds": round(duration, 2),
    }

-    # 7. Persist to DB
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/build-grid")
+async def build_grid(session_id: str):
+    """Build a structured, zone-aware grid from existing Kombi word results.
+
+    Requires that paddle-kombi or rapid-kombi has already been run on the session.
+    Uses the image for box detection and the word positions for grid structuring.
+
+    Returns a StructuredGrid with zones, each containing their own
+    columns, rows, and cells — ready for the frontend Excel-like editor.
+    """
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+    try:
+        result = await _build_grid_core(session_id, session)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+    # Persist to DB
    await update_session_db(session_id, grid_editor_result=result)

    logger.info(
        "build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
        "%d boxes in %.2fs",
-        session_id, len(zones_data), total_columns, total_rows,
-        total_cells, boxes_detected, duration,
+        session_id,
+        len(result.get("zones", [])),
+        result.get("summary", {}).get("total_columns", 0),
+        result.get("summary", {}).get("total_rows", 0),
+        result.get("summary", {}).get("total_cells", 0),
+        result.get("boxes_detected", 0),
+        result.get("duration_seconds", 0),
    )

    return result