From 34c649c8bed5ff68e9448d0734d364fffbd30b16 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 4 Mar 2026 22:21:14 +0100 Subject: [PATCH] fix: send SSE keepalive events every 5s during batch OCR Batch OCR takes 30-60s with 3x upscaling. Without keepalive events, proxy servers (Nginx) drop the SSE connection after their read timeout. Now sends keepalive events every 5s to prevent timeout, with elapsed time for debugging. Also checks for client disconnect between keepalives. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/ocr_pipeline_api.py | 25 +++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index 82e745b..7265acb 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -1416,9 +1416,11 @@ async def _word_batch_stream_generator( # 2. Send preparing event (keepalive for proxy) yield f"data: {json.dumps({'type': 'preparing', 'message': 'Cell-First OCR laeuft parallel...'})}\n\n" - # 3. Run batch OCR in thread pool (CPU-bound, don't block event loop) + # 3. Run batch OCR in thread pool with periodic keepalive events. + # The OCR takes 30-60s and proxy servers (Nginx) may drop idle SSE + # connections after 30-60s. Send keepalive every 5s to prevent this. loop = asyncio.get_event_loop() - cells, columns_meta = await loop.run_in_executor( + ocr_future = loop.run_in_executor( None, lambda: build_cell_grid_v2( ocr_img, col_regions, row_geoms, img_w, img_h, @@ -1426,6 +1428,25 @@ async def _word_batch_stream_generator( ), ) + # Send keepalive events every 5 seconds while OCR runs + keepalive_count = 0 + while not ocr_future.done(): + try: + cells, columns_meta = await asyncio.wait_for( + asyncio.shield(ocr_future), timeout=5.0, + ) + break # OCR finished + except asyncio.TimeoutError: + keepalive_count += 1 + elapsed = int(time.time() - t0) + yield f"data: {json.dumps({'type': 'keepalive', 'elapsed': elapsed, 'message': f'OCR laeuft... ({elapsed}s)'})}\n\n" + if await request.is_disconnected(): + logger.info(f"SSE batch: client disconnected during OCR for {session_id}") + ocr_future.cancel() + return + else: + cells, columns_meta = ocr_future.result() + if await request.is_disconnected(): logger.info(f"SSE batch: client disconnected after OCR for {session_id}") return