fix(ocr-pipeline): invalidate downstream results when steps are re-run
When columns change (Step 3), invalidate row_result and word_result. When rows change (Step 4), invalidate word_result. This ensures Step 5 always uses the latest row boundaries instead of showing stale cached word_result from a previous run. Applies to both auto-detection and manual override endpoints. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -708,15 +708,19 @@ async def detect_columns(session_id: str):
|
|||||||
"duration_seconds": round(duration, 2),
|
"duration_seconds": round(duration, 2),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Persist to DB
|
# Persist to DB — also invalidate downstream results (rows, words)
|
||||||
await update_session_db(
|
await update_session_db(
|
||||||
session_id,
|
session_id,
|
||||||
column_result=column_result,
|
column_result=column_result,
|
||||||
|
row_result=None,
|
||||||
|
word_result=None,
|
||||||
current_step=3,
|
current_step=3,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update cache
|
# Update cache
|
||||||
cached["column_result"] = column_result
|
cached["column_result"] = column_result
|
||||||
|
cached.pop("row_result", None)
|
||||||
|
cached.pop("word_result", None)
|
||||||
|
|
||||||
col_count = len([c for c in columns if c["type"].startswith("column")])
|
col_count = len([c for c in columns if c["type"].startswith("column")])
|
||||||
logger.info(f"OCR Pipeline: columns session {session_id}: "
|
logger.info(f"OCR Pipeline: columns session {session_id}: "
|
||||||
@@ -737,10 +741,13 @@ async def set_manual_columns(session_id: str, req: ManualColumnsRequest):
|
|||||||
"method": "manual",
|
"method": "manual",
|
||||||
}
|
}
|
||||||
|
|
||||||
await update_session_db(session_id, column_result=column_result)
|
await update_session_db(session_id, column_result=column_result,
|
||||||
|
row_result=None, word_result=None)
|
||||||
|
|
||||||
if session_id in _cache:
|
if session_id in _cache:
|
||||||
_cache[session_id]["column_result"] = column_result
|
_cache[session_id]["column_result"] = column_result
|
||||||
|
_cache[session_id].pop("row_result", None)
|
||||||
|
_cache[session_id].pop("word_result", None)
|
||||||
|
|
||||||
logger.info(f"OCR Pipeline: manual columns session {session_id}: "
|
logger.info(f"OCR Pipeline: manual columns session {session_id}: "
|
||||||
f"{len(req.columns)} columns set")
|
f"{len(req.columns)} columns set")
|
||||||
@@ -919,14 +926,16 @@ async def detect_rows(session_id: str):
|
|||||||
"duration_seconds": round(duration, 2),
|
"duration_seconds": round(duration, 2),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Persist to DB
|
# Persist to DB — also invalidate word_result since rows changed
|
||||||
await update_session_db(
|
await update_session_db(
|
||||||
session_id,
|
session_id,
|
||||||
row_result=row_result,
|
row_result=row_result,
|
||||||
|
word_result=None,
|
||||||
current_step=4,
|
current_step=4,
|
||||||
)
|
)
|
||||||
|
|
||||||
cached["row_result"] = row_result
|
cached["row_result"] = row_result
|
||||||
|
cached.pop("word_result", None)
|
||||||
|
|
||||||
logger.info(f"OCR Pipeline: rows session {session_id}: "
|
logger.info(f"OCR Pipeline: rows session {session_id}: "
|
||||||
f"{len(rows)} rows detected ({duration:.2f}s): {type_counts}")
|
f"{len(rows)} rows detected ({duration:.2f}s): {type_counts}")
|
||||||
@@ -947,10 +956,11 @@ async def set_manual_rows(session_id: str, req: ManualRowsRequest):
|
|||||||
"method": "manual",
|
"method": "manual",
|
||||||
}
|
}
|
||||||
|
|
||||||
await update_session_db(session_id, row_result=row_result)
|
await update_session_db(session_id, row_result=row_result, word_result=None)
|
||||||
|
|
||||||
if session_id in _cache:
|
if session_id in _cache:
|
||||||
_cache[session_id]["row_result"] = row_result
|
_cache[session_id]["row_result"] = row_result
|
||||||
|
_cache[session_id].pop("word_result", None)
|
||||||
|
|
||||||
logger.info(f"OCR Pipeline: manual rows session {session_id}: "
|
logger.info(f"OCR Pipeline: manual rows session {session_id}: "
|
||||||
f"{len(req.rows)} rows set")
|
f"{len(req.rows)} rows set")
|
||||||
|
|||||||
Reference in New Issue
Block a user