diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index 11a88f2..a079ee1 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -1377,6 +1377,14 @@ async def detect_structure(session_id: str): for cell in word_result["cells"]: for wb in (cell.get("word_boxes") or []): words.append(wb) + # Fallback: use raw OCR words if cell word_boxes are empty + if not words and word_result: + for key in ("raw_paddle_words_split", "raw_tesseract_words", "raw_paddle_words"): + raw = word_result.get(key, []) + if raw: + words = raw + logger.info("detect-structure: using %d words from %s (no cell word_boxes)", len(words), key) + break # If no words yet, use image dimensions with small margin if words: content_x = max(0, min(int(wb["left"]) for wb in words)) @@ -3529,6 +3537,7 @@ async def paddle_kombi(session_id: str): cropped_png=img_png, current_step=8, ) + cached["word_result"] = word_result logger.info( "paddle_kombi session %s: %d cells (%d rows, %d cols) in %.2fs " @@ -3665,6 +3674,7 @@ async def rapid_kombi(session_id: str): cropped_png=img_png, current_step=8, ) + cached["word_result"] = word_result logger.info( "rapid_kombi session %s: %d cells (%d rows, %d cols) in %.2fs "