Add Ground Truth regression test system for OCR pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 35s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m47s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 22s

Extract _build_grid_core() from build_grid() endpoint for reuse.
New ocr_pipeline_regression.py with endpoints to mark sessions as
ground truth, list them, and run regression comparisons after code
changes. Frontend button in StepGroundTruth.tsx to mark/update GT.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-18 13:46:48 +01:00
parent c894a0feeb
commit f655db30e4
5 changed files with 482 additions and 22 deletions

View File

@@ -745,42 +745,38 @@ def _filter_footer_words(
# ---------------------------------------------------------------------------
# Endpoints
# Core computation (used by build-grid endpoint and regression tests)
# ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/build-grid")
async def build_grid(session_id: str):
"""Build a structured, zone-aware grid from existing Kombi word results.
async def _build_grid_core(session_id: str, session: dict) -> dict:
"""Core grid building logic — pure computation, no HTTP or DB side effects.
Requires that paddle-kombi or rapid-kombi has already been run on the session.
Uses the image for box detection and the word positions for grid structuring.
Args:
session_id: Session identifier (for logging and image loading).
session: Full session dict from get_session_db().
Returns a StructuredGrid with zones, each containing their own
columns, rows, and cells — ready for the frontend Excel-like editor.
Returns:
StructuredGrid result dict.
Raises:
ValueError: If session data is incomplete.
"""
t0 = time.time()
# 1. Load session and word results
session = await get_session_db(session_id)
if not session:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
# 1. Validate and load word results
word_result = session.get("word_result")
if not word_result or not word_result.get("cells"):
raise HTTPException(
status_code=400,
detail="No word results found. Run paddle-kombi or rapid-kombi first.",
)
raise ValueError("No word results found. Run paddle-kombi or rapid-kombi first.")
img_w = word_result.get("image_width", 0)
img_h = word_result.get("image_height", 0)
if not img_w or not img_h:
raise HTTPException(status_code=400, detail="Missing image dimensions in word_result")
raise ValueError("Missing image dimensions in word_result")
# 2. Flatten all word boxes from cells
all_words = _flatten_word_boxes(word_result["cells"])
if not all_words:
raise HTTPException(status_code=400, detail="No word boxes found in cells")
raise ValueError("No word boxes found in cells")
logger.info("build-grid session %s: %d words from %d cells",
session_id, len(all_words), len(word_result["cells"]))
@@ -1313,14 +1309,45 @@ async def build_grid(session_id: str):
"duration_seconds": round(duration, 2),
}
# 7. Persist to DB
return result
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/build-grid")
async def build_grid(session_id: str):
"""Build a structured, zone-aware grid from existing Kombi word results.
Requires that paddle-kombi or rapid-kombi has already been run on the session.
Uses the image for box detection and the word positions for grid structuring.
Returns a StructuredGrid with zones, each containing their own
columns, rows, and cells — ready for the frontend Excel-like editor.
"""
session = await get_session_db(session_id)
if not session:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
try:
result = await _build_grid_core(session_id, session)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
# Persist to DB
await update_session_db(session_id, grid_editor_result=result)
logger.info(
"build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
"%d boxes in %.2fs",
session_id, len(zones_data), total_columns, total_rows,
total_cells, boxes_detected, duration,
session_id,
len(result.get("zones", [])),
result.get("summary", {}).get("total_columns", 0),
result.get("summary", {}).get("total_rows", 0),
result.get("summary", {}).get("total_cells", 0),
result.get("boxes_detected", 0),
result.get("duration_seconds", 0),
)
return result