From 4970ca903e7e4b3d82ff0e931f409b314374054f Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 1 Mar 2026 12:24:44 +0100 Subject: [PATCH] fix(ocr-pipeline): invalidate downstream results when steps are re-run When columns change (Step 3), invalidate row_result and word_result. When rows change (Step 4), invalidate word_result. This ensures Step 5 always uses the latest row boundaries instead of showing stale cached word_result from a previous run. Applies to both auto-detection and manual override endpoints. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/ocr_pipeline_api.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index 0542209..e0bcf5c 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -708,15 +708,19 @@ async def detect_columns(session_id: str): "duration_seconds": round(duration, 2), } - # Persist to DB + # Persist to DB — also invalidate downstream results (rows, words) await update_session_db( session_id, column_result=column_result, + row_result=None, + word_result=None, current_step=3, ) # Update cache cached["column_result"] = column_result + cached.pop("row_result", None) + cached.pop("word_result", None) col_count = len([c for c in columns if c["type"].startswith("column")]) logger.info(f"OCR Pipeline: columns session {session_id}: " @@ -737,10 +741,13 @@ async def set_manual_columns(session_id: str, req: ManualColumnsRequest): "method": "manual", } - await update_session_db(session_id, column_result=column_result) + await update_session_db(session_id, column_result=column_result, + row_result=None, word_result=None) if session_id in _cache: _cache[session_id]["column_result"] = column_result + _cache[session_id].pop("row_result", None) + _cache[session_id].pop("word_result", None) logger.info(f"OCR Pipeline: manual columns session {session_id}: " f"{len(req.columns)} columns set") @@ -919,14 +926,16 @@ async def detect_rows(session_id: str): "duration_seconds": round(duration, 2), } - # Persist to DB + # Persist to DB — also invalidate word_result since rows changed await update_session_db( session_id, row_result=row_result, + word_result=None, current_step=4, ) cached["row_result"] = row_result + cached.pop("word_result", None) logger.info(f"OCR Pipeline: rows session {session_id}: " f"{len(rows)} rows detected ({duration:.2f}s): {type_counts}") @@ -947,10 +956,11 @@ async def set_manual_rows(session_id: str, req: ManualRowsRequest): "method": "manual", } - await update_session_db(session_id, row_result=row_result) + await update_session_db(session_id, row_result=row_result, word_result=None) if session_id in _cache: _cache[session_id]["row_result"] = row_result + _cache[session_id].pop("word_result", None) logger.info(f"OCR Pipeline: manual rows session {session_id}: " f"{len(req.rows)} rows set")