From 8a60f4bf30b19471f1c2464a6d0b2e3fa14c98cb Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBookPro.fritz.box>
Date: Wed, 11 Mar 2026 08:59:50 +0100
Subject: [PATCH] fix: Overlay-Zellen ohne _heal_row_gaps positionieren
 (skip_heal_gaps)

_heal_row_gaps verschiebt Zell-Positionen nach Entfernung von Artefakt-Zeilen,
was im Overlay zu sichtbarem Versatz fuehrt (z.B. 23px bei "badge").
Neuer skip_heal_gaps Parameter in build_cell_grid_v2 und words-Endpoint
behaelt die exakten Zeilen-Positionen bei.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx          | 2 +-
 .../components/ocr-pipeline/StepWordRecognition.tsx       | 6 ++++--
 klausur-service/backend/cv_cell_grid.py                   | 8 +++++++-
 klausur-service/backend/ocr_pipeline_api.py               | 5 +++++
 4 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
index b9dd5b0..19e8011 100644
--- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
+++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
@@ -218,7 +218,7 @@ export default function OcrOverlayPage() {
       case 4:
         return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
       case 5:
-        return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
+        return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} skipHealGaps />
       case 6:
         return <OverlayReconstruction sessionId={sessionId} onNext={handleNext} />
       default:
diff --git a/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx b/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
index ed98818..d213074 100644
--- a/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
@@ -44,9 +44,11 @@ interface StepWordRecognitionProps {
   sessionId: string | null
   onNext: () => void
   goToStep: (step: number) => void
+  /** Skip _heal_row_gaps in cell grid (better overlay positioning) */
+  skipHealGaps?: boolean
 }
 
-export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRecognitionProps) {
+export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps = false }: StepWordRecognitionProps) {
   const [gridResult, setGridResult] = useState<GridResult | null>(null)
   const [detecting, setDetecting] = useState(false)
   const [error, setError] = useState<string | null>(null)
@@ -110,7 +112,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
       let res: Response | null = null
       for (let attempt = 0; attempt < 2; attempt++) {
         res = await fetch(
-          `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=true&engine=${eng}&pronunciation=${pronunciation}`,
+          `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=true&engine=${eng}&pronunciation=${pronunciation}${skipHealGaps ? '&skip_heal_gaps=true' : ''}`,
           { method: 'POST' },
         )
         if (res.ok) break
diff --git a/klausur-service/backend/cv_cell_grid.py b/klausur-service/backend/cv_cell_grid.py
index e5cf895..748c746 100644
--- a/klausur-service/backend/cv_cell_grid.py
+++ b/klausur-service/backend/cv_cell_grid.py
@@ -264,6 +264,7 @@ def build_cell_grid_v2(
     lang: str = "eng+deu",
     ocr_engine: str = "auto",
     img_bgr: Optional[np.ndarray] = None,
+    skip_heal_gaps: bool = False,
 ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
     """Hybrid Grid: full-page OCR for broad columns, cell-crop for narrow ones.
 
@@ -330,7 +331,12 @@ def build_cell_grid_v2(
     else:
         bottom_bound = content_rows[-1].y + content_rows[-1].height
 
-    _heal_row_gaps(content_rows, top_bound=top_bound, bottom_bound=bottom_bound)
+    # skip_heal_gaps: When True, keep cell positions at their exact row geometry
+    # positions without expanding to fill gaps from removed rows.  Useful for
+    # overlay rendering where pixel-precise positioning matters more than
+    # full-coverage OCR crops.
+    if not skip_heal_gaps:
+        _heal_row_gaps(content_rows, top_bound=top_bound, bottom_bound=bottom_bound)
 
     relevant_cols.sort(key=lambda c: c.x)
 
diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py
index 998870a..7f76846 100644
--- a/klausur-service/backend/ocr_pipeline_api.py
+++ b/klausur-service/backend/ocr_pipeline_api.py
@@ -1857,6 +1857,7 @@ async def detect_words(
     engine: str = "auto",
     pronunciation: str = "british",
     stream: bool = False,
+    skip_heal_gaps: bool = False,
 ):
     """Build word grid from columns × rows, OCR each cell.
 
@@ -1864,6 +1865,8 @@ async def detect_words(
         engine: 'auto' (default), 'tesseract', or 'rapid'
         pronunciation: 'british' (default) or 'american' — for IPA dictionary lookup
         stream: false (default) for JSON response, true for SSE streaming
+        skip_heal_gaps: false (default). When true, cells keep exact row geometry
+            positions without gap-healing expansion. Better for overlay rendering.
     """
     if session_id not in _cache:
         logger.info("detect_words: session %s not in cache, loading from DB", session_id)
@@ -2007,6 +2010,7 @@ async def detect_words(
     cells, columns_meta = build_cell_grid_v2(
         ocr_img, col_regions, row_geoms, img_w, img_h,
         ocr_engine=engine, img_bgr=dewarped_bgr,
+        skip_heal_gaps=skip_heal_gaps,
     )
     duration = time.time() - t0
 
@@ -2136,6 +2140,7 @@ async def _word_batch_stream_generator(
         lambda: build_cell_grid_v2(
             ocr_img, col_regions, row_geoms, img_w, img_h,
             ocr_engine=engine, img_bgr=dewarped_bgr,
+            skip_heal_gaps=skip_heal_gaps,
         ),
     )