fix: send SSE keepalive events every 5s during batch OCR
Batch OCR takes 30-60s with 3x upscaling. Without keepalive events, proxy servers (Nginx) drop the SSE connection after their read timeout. Now sends keepalive events every 5s to prevent timeout, with elapsed time for debugging. Also checks for client disconnect between keepalives. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1416,9 +1416,11 @@ async def _word_batch_stream_generator(
|
||||
# 2. Send preparing event (keepalive for proxy)
|
||||
yield f"data: {json.dumps({'type': 'preparing', 'message': 'Cell-First OCR laeuft parallel...'})}\n\n"
|
||||
|
||||
# 3. Run batch OCR in thread pool (CPU-bound, don't block event loop)
|
||||
# 3. Run batch OCR in thread pool with periodic keepalive events.
|
||||
# The OCR takes 30-60s and proxy servers (Nginx) may drop idle SSE
|
||||
# connections after 30-60s. Send keepalive every 5s to prevent this.
|
||||
loop = asyncio.get_event_loop()
|
||||
cells, columns_meta = await loop.run_in_executor(
|
||||
ocr_future = loop.run_in_executor(
|
||||
None,
|
||||
lambda: build_cell_grid_v2(
|
||||
ocr_img, col_regions, row_geoms, img_w, img_h,
|
||||
@@ -1426,6 +1428,25 @@ async def _word_batch_stream_generator(
|
||||
),
|
||||
)
|
||||
|
||||
# Send keepalive events every 5 seconds while OCR runs
|
||||
keepalive_count = 0
|
||||
while not ocr_future.done():
|
||||
try:
|
||||
cells, columns_meta = await asyncio.wait_for(
|
||||
asyncio.shield(ocr_future), timeout=5.0,
|
||||
)
|
||||
break # OCR finished
|
||||
except asyncio.TimeoutError:
|
||||
keepalive_count += 1
|
||||
elapsed = int(time.time() - t0)
|
||||
yield f"data: {json.dumps({'type': 'keepalive', 'elapsed': elapsed, 'message': f'OCR laeuft... ({elapsed}s)'})}\n\n"
|
||||
if await request.is_disconnected():
|
||||
logger.info(f"SSE batch: client disconnected during OCR for {session_id}")
|
||||
ocr_future.cancel()
|
||||
return
|
||||
else:
|
||||
cells, columns_meta = ocr_future.result()
|
||||
|
||||
if await request.is_disconnected():
|
||||
logger.info(f"SSE batch: client disconnected after OCR for {session_id}")
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user