diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
index b9dd5b0..19e8011 100644
--- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
+++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
@@ -218,7 +218,7 @@ export default function OcrOverlayPage() {
case 4:
return
case 5:
- return
+ return
case 6:
return
default:
diff --git a/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx b/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
index ed98818..d213074 100644
--- a/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
@@ -44,9 +44,11 @@ interface StepWordRecognitionProps {
sessionId: string | null
onNext: () => void
goToStep: (step: number) => void
+ /** Skip _heal_row_gaps in cell grid (better overlay positioning) */
+ skipHealGaps?: boolean
}
-export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRecognitionProps) {
+export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps = false }: StepWordRecognitionProps) {
const [gridResult, setGridResult] = useState(null)
const [detecting, setDetecting] = useState(false)
const [error, setError] = useState(null)
@@ -110,7 +112,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
let res: Response | null = null
for (let attempt = 0; attempt < 2; attempt++) {
res = await fetch(
- `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=true&engine=${eng}&pronunciation=${pronunciation}`,
+ `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=true&engine=${eng}&pronunciation=${pronunciation}${skipHealGaps ? '&skip_heal_gaps=true' : ''}`,
{ method: 'POST' },
)
if (res.ok) break
diff --git a/klausur-service/backend/cv_cell_grid.py b/klausur-service/backend/cv_cell_grid.py
index e5cf895..748c746 100644
--- a/klausur-service/backend/cv_cell_grid.py
+++ b/klausur-service/backend/cv_cell_grid.py
@@ -264,6 +264,7 @@ def build_cell_grid_v2(
lang: str = "eng+deu",
ocr_engine: str = "auto",
img_bgr: Optional[np.ndarray] = None,
+ skip_heal_gaps: bool = False,
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
"""Hybrid Grid: full-page OCR for broad columns, cell-crop for narrow ones.
@@ -330,7 +331,12 @@ def build_cell_grid_v2(
else:
bottom_bound = content_rows[-1].y + content_rows[-1].height
- _heal_row_gaps(content_rows, top_bound=top_bound, bottom_bound=bottom_bound)
+ # skip_heal_gaps: When True, keep cell positions at their exact row geometry
+ # positions without expanding to fill gaps from removed rows. Useful for
+ # overlay rendering where pixel-precise positioning matters more than
+ # full-coverage OCR crops.
+ if not skip_heal_gaps:
+ _heal_row_gaps(content_rows, top_bound=top_bound, bottom_bound=bottom_bound)
relevant_cols.sort(key=lambda c: c.x)
diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py
index 998870a..7f76846 100644
--- a/klausur-service/backend/ocr_pipeline_api.py
+++ b/klausur-service/backend/ocr_pipeline_api.py
@@ -1857,6 +1857,7 @@ async def detect_words(
engine: str = "auto",
pronunciation: str = "british",
stream: bool = False,
+ skip_heal_gaps: bool = False,
):
"""Build word grid from columns × rows, OCR each cell.
@@ -1864,6 +1865,8 @@ async def detect_words(
engine: 'auto' (default), 'tesseract', or 'rapid'
pronunciation: 'british' (default) or 'american' — for IPA dictionary lookup
stream: false (default) for JSON response, true for SSE streaming
+ skip_heal_gaps: false (default). When true, cells keep exact row geometry
+ positions without gap-healing expansion. Better for overlay rendering.
"""
if session_id not in _cache:
logger.info("detect_words: session %s not in cache, loading from DB", session_id)
@@ -2007,6 +2010,7 @@ async def detect_words(
cells, columns_meta = build_cell_grid_v2(
ocr_img, col_regions, row_geoms, img_w, img_h,
ocr_engine=engine, img_bgr=dewarped_bgr,
+ skip_heal_gaps=skip_heal_gaps,
)
duration = time.time() - t0
@@ -2136,6 +2140,7 @@ async def _word_batch_stream_generator(
lambda: build_cell_grid_v2(
ocr_img, col_regions, row_geoms, img_w, img_h,
ocr_engine=engine, img_bgr=dewarped_bgr,
+ skip_heal_gaps=skip_heal_gaps,
),
)