feat(ocr-pipeline): improve LLM review UI + add reconstruction step
StepLlmReview: Show full vocab table with image overlay, row-level
status tracking (pending/active/reviewed/corrected/skipped), and
auto-scroll during SSE streaming. Load previous results on mount.
StepReconstruction: New step 7 with editable text fields at original
bbox positions over dewarped image. Zoom controls, tab navigation,
color-coded columns, save to backend.
Backend: Add POST /sessions/{id}/reconstruction endpoint.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1559,6 +1559,70 @@ async def apply_llm_corrections(session_id: str, request: Request):
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/reconstruction")
|
||||
async def save_reconstruction(session_id: str, request: Request):
|
||||
"""Save edited cell texts from reconstruction step."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
word_result = session.get("word_result")
|
||||
if not word_result:
|
||||
raise HTTPException(status_code=400, detail="No word result found")
|
||||
|
||||
body = await request.json()
|
||||
cell_updates = body.get("cells", [])
|
||||
|
||||
if not cell_updates:
|
||||
await update_session_db(session_id, current_step=7)
|
||||
return {"session_id": session_id, "updated": 0}
|
||||
|
||||
# Build update map: cell_id -> new text
|
||||
update_map = {c["cell_id"]: c["text"] for c in cell_updates}
|
||||
|
||||
# Update cells
|
||||
cells = word_result.get("cells", [])
|
||||
updated_count = 0
|
||||
for cell in cells:
|
||||
if cell["cell_id"] in update_map:
|
||||
cell["text"] = update_map[cell["cell_id"]]
|
||||
cell["status"] = "edited"
|
||||
updated_count += 1
|
||||
|
||||
word_result["cells"] = cells
|
||||
|
||||
# Also update vocab_entries if present
|
||||
entries = word_result.get("vocab_entries") or word_result.get("entries") or []
|
||||
if entries:
|
||||
# Map cell_id pattern "R{row}_C{col}" to entry fields
|
||||
for entry in entries:
|
||||
row_idx = entry.get("row_index", -1)
|
||||
# Check each field's cell
|
||||
for col_idx, field_name in enumerate(["english", "german", "example"]):
|
||||
cell_id = f"R{row_idx:02d}_C{col_idx}"
|
||||
# Also try without zero-padding
|
||||
cell_id_alt = f"R{row_idx}_C{col_idx}"
|
||||
new_text = update_map.get(cell_id) or update_map.get(cell_id_alt)
|
||||
if new_text is not None:
|
||||
entry[field_name] = new_text
|
||||
|
||||
word_result["vocab_entries"] = entries
|
||||
if "entries" in word_result:
|
||||
word_result["entries"] = entries
|
||||
|
||||
await update_session_db(session_id, word_result=word_result, current_step=7)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["word_result"] = word_result
|
||||
|
||||
logger.info(f"Reconstruction saved for session {session_id}: {updated_count} cells updated")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"updated": updated_count,
|
||||
}
|
||||
|
||||
|
||||
async def _get_rows_overlay(session_id: str) -> Response:
|
||||
"""Generate dewarped image with row bands drawn on it."""
|
||||
session = await get_session_db(session_id)
|
||||
|
||||
Reference in New Issue
Block a user