diff --git a/klausur-service/backend/cv_ocr_engines.py b/klausur-service/backend/cv_ocr_engines.py index 7260584..a16ed5d 100644 --- a/klausur-service/backend/cv_ocr_engines.py +++ b/klausur-service/backend/cv_ocr_engines.py @@ -412,20 +412,35 @@ async def ocr_region_paddle( if crop.size == 0: return [] - # Encode as PNG - success, png_buf = cv2.imencode(".png", crop) + # Downscale large images to fit within Traefik's 60s timeout. + # PaddleOCR works well at ~1500px max dimension. + h, w = crop.shape[:2] + scale = 1.0 + _MAX_DIM = 1500 + if max(h, w) > _MAX_DIM: + scale = _MAX_DIM / max(h, w) + new_w, new_h = int(w * scale), int(h * scale) + crop = cv2.resize(crop, (new_w, new_h), interpolation=cv2.INTER_AREA) + logger.info("ocr_region_paddle: downscaled %dx%d → %dx%d (scale=%.2f)", + w, h, new_w, new_h, scale) + + # Encode as JPEG (smaller than PNG, faster upload) + success, jpg_buf = cv2.imencode(".jpg", crop, [cv2.IMWRITE_JPEG_QUALITY, 90]) if not success: logger.error("ocr_region_paddle: cv2.imencode failed") return [] - words, _w, _h = await ocr_remote_paddle(png_buf.tobytes()) + words, _w, _h = await ocr_remote_paddle(jpg_buf.tobytes(), filename="scan.jpg") - # Shift coordinates to absolute image space - for w in words: - w["left"] += offset_x - w["top"] += offset_y + # Scale coordinates back to original size and shift to absolute image space + inv_scale = 1.0 / scale if scale != 1.0 else 1.0 + for wd in words: + wd["left"] = int(wd["left"] * inv_scale) + offset_x + wd["top"] = int(wd["top"] * inv_scale) + offset_y + wd["width"] = int(wd["width"] * inv_scale) + wd["height"] = int(wd["height"] * inv_scale) if region is not None: - w["region_type"] = region.type + wd["region_type"] = region.type return words