Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m31s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 23s
grid/ package (16 files): grid/build/ — core, zones, cleanup, text_ops, cell_ops, finalize grid/editor/ — api, helpers, columns, filters, headers, zones vocab/ package (10 files): vocab/worksheet/ — api, models, extraction, generation, ocr, upload, analysis, compare vocab/ — session_store, learn_bridge 26 backward-compat shims. Internal imports relative. RAG untouched. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
111 lines
3.4 KiB
Python
111 lines
3.4 KiB
Python
"""
|
|
Grid Editor API — gutter repair endpoints.
|
|
"""
|
|
|
|
import logging
|
|
|
|
from fastapi import APIRouter, HTTPException, Request
|
|
|
|
from ocr_pipeline_session_store import (
|
|
get_session_db,
|
|
update_session_db,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["grid-editor"])
|
|
|
|
|
|
@router.post("/sessions/{session_id}/gutter-repair")
|
|
async def gutter_repair(session_id: str):
|
|
"""Analyse grid for gutter-edge OCR errors and return repair suggestions.
|
|
|
|
Detects:
|
|
- Words truncated/blurred at the book binding (spell_fix)
|
|
- Words split across rows with missing hyphen chars (hyphen_join)
|
|
"""
|
|
session = await get_session_db(session_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
grid_data = session.get("grid_editor_result")
|
|
if not grid_data:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="No grid data. Run build-grid first.",
|
|
)
|
|
|
|
from cv_gutter_repair import analyse_grid_for_gutter_repair
|
|
|
|
image_width = grid_data.get("image_width", 0)
|
|
result = analyse_grid_for_gutter_repair(grid_data, image_width=image_width)
|
|
|
|
# Persist suggestions in ground_truth.gutter_repair (avoids DB migration)
|
|
gt = session.get("ground_truth") or {}
|
|
gt["gutter_repair"] = result
|
|
await update_session_db(session_id, ground_truth=gt)
|
|
|
|
logger.info(
|
|
"gutter-repair session %s: %d suggestions in %.2fs",
|
|
session_id,
|
|
result.get("stats", {}).get("suggestions_found", 0),
|
|
result.get("duration_seconds", 0),
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
@router.post("/sessions/{session_id}/gutter-repair/apply")
|
|
async def gutter_repair_apply(session_id: str, request: Request):
|
|
"""Apply accepted gutter repair suggestions to the grid.
|
|
|
|
Body: { "accepted": ["suggestion_id_1", "suggestion_id_2", ...] }
|
|
"""
|
|
session = await get_session_db(session_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
grid_data = session.get("grid_editor_result")
|
|
if not grid_data:
|
|
raise HTTPException(status_code=400, detail="No grid data.")
|
|
|
|
gt = session.get("ground_truth") or {}
|
|
gutter_result = gt.get("gutter_repair")
|
|
if not gutter_result:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="No gutter repair data. Run gutter-repair first.",
|
|
)
|
|
|
|
body = await request.json()
|
|
accepted_ids = body.get("accepted", [])
|
|
if not accepted_ids:
|
|
return {"applied_count": 0, "changes": []}
|
|
|
|
# text_overrides: { suggestion_id: "alternative_text" }
|
|
# Allows the user to pick a different correction from the alternatives list
|
|
text_overrides = body.get("text_overrides", {})
|
|
|
|
from cv_gutter_repair import apply_gutter_suggestions
|
|
|
|
suggestions = gutter_result.get("suggestions", [])
|
|
|
|
# Apply user-selected alternatives before passing to apply
|
|
for s in suggestions:
|
|
sid = s.get("id", "")
|
|
if sid in text_overrides and text_overrides[sid]:
|
|
s["suggested_text"] = text_overrides[sid]
|
|
|
|
result = apply_gutter_suggestions(grid_data, accepted_ids, suggestions)
|
|
|
|
# Save updated grid back to session
|
|
await update_session_db(session_id, grid_editor_result=grid_data)
|
|
|
|
logger.info(
|
|
"gutter-repair/apply session %s: %d changes applied",
|
|
session_id,
|
|
result.get("applied_count", 0),
|
|
)
|
|
|
|
return result
|