From 410d36f3def271b0f7e9949771ae53e0dec8629b Mon Sep 17 00:00:00 2001
From: Benjamin Admin
 <benjaminadmin@37bf1d39-1dc6-4c68-807f-54c9737f55e1.fritz.box>
Date: Tue, 24 Mar 2026 13:16:44 +0100
Subject: [PATCH] feat: save automatic grid snapshot before manual edits for GT
 comparison

- build-grid now saves the automatic OCR result as ground_truth.auto_grid_snapshot
- mark-ground-truth includes a correction_diff comparing auto vs corrected
- New endpoint GET /correction-diff returns detailed diff with per-col_type
  accuracy breakdown (english, german, ipa, etc.)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 klausur-service/backend/grid_editor_api.py    | 17 ++++-
 .../backend/ocr_pipeline_regression.py        | 75 ++++++++++++++++++-
 2 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py
index 122fe50..8b1baf3 100644
--- a/klausur-service/backend/grid_editor_api.py
+++ b/klausur-service/backend/grid_editor_api.py
@@ -31,6 +31,7 @@ from ocr_pipeline_session_store import (
     get_session_image,
     update_session_db,
 )
+from ocr_pipeline_regression import _build_reference_snapshot
 
 logger = logging.getLogger(__name__)
 
@@ -2814,8 +2815,22 @@ async def build_grid(session_id: str):
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
 
+    # Save automatic grid snapshot for later comparison with manual corrections
+    wr = session.get("word_result") or {}
+    engine = wr.get("ocr_engine", "")
+    if engine in ("kombi", "rapid_kombi"):
+        auto_pipeline = "kombi"
+    elif engine == "paddle_direct":
+        auto_pipeline = "paddle-direct"
+    else:
+        auto_pipeline = "pipeline"
+    auto_snapshot = _build_reference_snapshot(result, pipeline=auto_pipeline)
+
+    gt = session.get("ground_truth") or {}
+    gt["auto_grid_snapshot"] = auto_snapshot
+
     # Persist to DB and advance current_step to 11 (reconstruction complete)
-    await update_session_db(session_id, grid_editor_result=result, current_step=11)
+    await update_session_db(session_id, grid_editor_result=result, ground_truth=gt, current_step=11)
 
     logger.info(
         "build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
diff --git a/klausur-service/backend/ocr_pipeline_regression.py b/klausur-service/backend/ocr_pipeline_regression.py
index 1bec71b..b6e09a0 100644
--- a/klausur-service/backend/ocr_pipeline_regression.py
+++ b/klausur-service/backend/ocr_pipeline_regression.py
@@ -258,9 +258,17 @@ async def mark_ground_truth(
     gt["build_grid_reference"] = reference
     await update_session_db(session_id, ground_truth=gt, current_step=11)
 
+    # Compare with auto-snapshot if available (shows what the user corrected)
+    auto_snapshot = gt.get("auto_grid_snapshot")
+    correction_diff = None
+    if auto_snapshot:
+        correction_diff = compare_grids(auto_snapshot, reference)
+
     logger.info(
-        "Ground truth marked for session %s: %d cells",
-        session_id, len(reference["cells"]),
+        "Ground truth marked for session %s: %d cells (corrections: %s)",
+        session_id,
+        len(reference["cells"]),
+        correction_diff["summary"] if correction_diff else "no auto-snapshot",
     )
 
     return {
@@ -268,6 +276,7 @@ async def mark_ground_truth(
         "session_id": session_id,
         "cells_saved": len(reference["cells"]),
         "summary": reference["summary"],
+        "correction_diff": correction_diff,
     }
 
 
@@ -289,6 +298,68 @@ async def unmark_ground_truth(session_id: str):
     return {"status": "ok", "session_id": session_id}
 
 
+@router.get("/sessions/{session_id}/correction-diff")
+async def get_correction_diff(session_id: str):
+    """Compare automatic OCR grid with manually corrected ground truth.
+
+    Returns a diff showing exactly which cells the user corrected,
+    broken down by col_type (english, german, ipa, etc.).
+    """
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+    gt = session.get("ground_truth") or {}
+    auto_snapshot = gt.get("auto_grid_snapshot")
+    reference = gt.get("build_grid_reference")
+
+    if not auto_snapshot:
+        raise HTTPException(
+            status_code=404,
+            detail="No auto_grid_snapshot found. Re-run build-grid to create one.",
+        )
+    if not reference:
+        raise HTTPException(
+            status_code=404,
+            detail="No ground truth reference found. Mark as ground truth first.",
+        )
+
+    diff = compare_grids(auto_snapshot, reference)
+
+    # Enrich with per-col_type breakdown
+    col_type_stats: Dict[str, Dict[str, int]] = {}
+    for cell_diff in diff.get("cell_diffs", []):
+        if cell_diff["type"] != "text_change":
+            continue
+        # Find col_type from reference cells
+        cell_id = cell_diff["cell_id"]
+        ref_cell = next(
+            (c for c in reference.get("cells", []) if c["cell_id"] == cell_id),
+            None,
+        )
+        ct = ref_cell.get("col_type", "unknown") if ref_cell else "unknown"
+        if ct not in col_type_stats:
+            col_type_stats[ct] = {"total": 0, "corrected": 0}
+        col_type_stats[ct]["corrected"] += 1
+
+    # Count total cells per col_type from reference
+    for cell in reference.get("cells", []):
+        ct = cell.get("col_type", "unknown")
+        if ct not in col_type_stats:
+            col_type_stats[ct] = {"total": 0, "corrected": 0}
+        col_type_stats[ct]["total"] += 1
+
+    # Calculate accuracy per col_type
+    for ct, stats in col_type_stats.items():
+        total = stats["total"]
+        corrected = stats["corrected"]
+        stats["accuracy_pct"] = round((total - corrected) / total * 100, 1) if total > 0 else 100.0
+
+    diff["col_type_breakdown"] = col_type_stats
+
+    return diff
+
+
 @router.get("/ground-truth-sessions")
 async def list_ground_truth_sessions():
     """List all sessions that have a ground-truth reference."""