feat: save automatic grid snapshot before manual edits for GT comparison
- build-grid now saves the automatic OCR result as ground_truth.auto_grid_snapshot - mark-ground-truth includes a correction_diff comparing auto vs corrected - New endpoint GET /correction-diff returns detailed diff with per-col_type accuracy breakdown (english, german, ipa, etc.) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -31,6 +31,7 @@ from ocr_pipeline_session_store import (
|
||||
get_session_image,
|
||||
update_session_db,
|
||||
)
|
||||
from ocr_pipeline_regression import _build_reference_snapshot
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -2814,8 +2815,22 @@ async def build_grid(session_id: str):
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
# Save automatic grid snapshot for later comparison with manual corrections
|
||||
wr = session.get("word_result") or {}
|
||||
engine = wr.get("ocr_engine", "")
|
||||
if engine in ("kombi", "rapid_kombi"):
|
||||
auto_pipeline = "kombi"
|
||||
elif engine == "paddle_direct":
|
||||
auto_pipeline = "paddle-direct"
|
||||
else:
|
||||
auto_pipeline = "pipeline"
|
||||
auto_snapshot = _build_reference_snapshot(result, pipeline=auto_pipeline)
|
||||
|
||||
gt = session.get("ground_truth") or {}
|
||||
gt["auto_grid_snapshot"] = auto_snapshot
|
||||
|
||||
# Persist to DB and advance current_step to 11 (reconstruction complete)
|
||||
await update_session_db(session_id, grid_editor_result=result, current_step=11)
|
||||
await update_session_db(session_id, grid_editor_result=result, ground_truth=gt, current_step=11)
|
||||
|
||||
logger.info(
|
||||
"build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
|
||||
|
||||
Reference in New Issue
Block a user