diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index 82e745b..7265acb 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -1416,9 +1416,11 @@ async def _word_batch_stream_generator( # 2. Send preparing event (keepalive for proxy) yield f"data: {json.dumps({'type': 'preparing', 'message': 'Cell-First OCR laeuft parallel...'})}\n\n" - # 3. Run batch OCR in thread pool (CPU-bound, don't block event loop) + # 3. Run batch OCR in thread pool with periodic keepalive events. + # The OCR takes 30-60s and proxy servers (Nginx) may drop idle SSE + # connections after 30-60s. Send keepalive every 5s to prevent this. loop = asyncio.get_event_loop() - cells, columns_meta = await loop.run_in_executor( + ocr_future = loop.run_in_executor( None, lambda: build_cell_grid_v2( ocr_img, col_regions, row_geoms, img_w, img_h, @@ -1426,6 +1428,25 @@ async def _word_batch_stream_generator( ), ) + # Send keepalive events every 5 seconds while OCR runs + keepalive_count = 0 + while not ocr_future.done(): + try: + cells, columns_meta = await asyncio.wait_for( + asyncio.shield(ocr_future), timeout=5.0, + ) + break # OCR finished + except asyncio.TimeoutError: + keepalive_count += 1 + elapsed = int(time.time() - t0) + yield f"data: {json.dumps({'type': 'keepalive', 'elapsed': elapsed, 'message': f'OCR laeuft... ({elapsed}s)'})}\n\n" + if await request.is_disconnected(): + logger.info(f"SSE batch: client disconnected during OCR for {session_id}") + ocr_future.cancel() + return + else: + cells, columns_meta = ocr_future.result() + if await request.is_disconnected(): logger.info(f"SSE batch: client disconnected after OCR for {session_id}") return