Add GT button to OCR overlay, prominent category picker, track pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m51s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 18s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m51s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 18s
- Ground Truth button on last step of Pipeline/Kombi modes in ocr-overlay - Prominent category picker in active session info bar (pulses when unset) - GT badge shown when session has ground truth reference - Backend: auto-detect pipeline from ocr_engine, store in GT snapshot - Pipeline info shown in GT session list and regression reports - Also pass pipeline param from ocr-pipeline StepGroundTruth Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -12,7 +12,7 @@ import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from grid_editor_api import _build_grid_core
|
||||
from ocr_pipeline_session_store import (
|
||||
@@ -49,7 +49,10 @@ def _extract_cells_for_comparison(grid_result: dict) -> List[Dict[str, Any]]:
|
||||
return cells
|
||||
|
||||
|
||||
def _build_reference_snapshot(grid_result: dict) -> dict:
|
||||
def _build_reference_snapshot(
|
||||
grid_result: dict,
|
||||
pipeline: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Build a ground-truth reference snapshot from a grid_editor_result."""
|
||||
cells = _extract_cells_for_comparison(grid_result)
|
||||
|
||||
@@ -57,9 +60,10 @@ def _build_reference_snapshot(grid_result: dict) -> dict:
|
||||
total_columns = sum(len(z.get("columns", [])) for z in grid_result.get("zones", []))
|
||||
total_rows = sum(len(z.get("rows", [])) for z in grid_result.get("zones", []))
|
||||
|
||||
return {
|
||||
snapshot = {
|
||||
"saved_at": datetime.now(timezone.utc).isoformat(),
|
||||
"version": 1,
|
||||
"pipeline": pipeline,
|
||||
"summary": {
|
||||
"total_zones": total_zones,
|
||||
"total_columns": total_columns,
|
||||
@@ -68,6 +72,7 @@ def _build_reference_snapshot(grid_result: dict) -> dict:
|
||||
},
|
||||
"cells": cells,
|
||||
}
|
||||
return snapshot
|
||||
|
||||
|
||||
def compare_grids(reference: dict, current: dict) -> dict:
|
||||
@@ -160,7 +165,10 @@ def compare_grids(reference: dict, current: dict) -> dict:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/mark-ground-truth")
|
||||
async def mark_ground_truth(session_id: str):
|
||||
async def mark_ground_truth(
|
||||
session_id: str,
|
||||
pipeline: Optional[str] = Query(None, description="Pipeline used: kombi, pipeline, paddle-direct"),
|
||||
):
|
||||
"""Save the current build-grid result as ground-truth reference."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
@@ -173,7 +181,18 @@ async def mark_ground_truth(session_id: str):
|
||||
detail="No grid_editor_result found. Run build-grid first.",
|
||||
)
|
||||
|
||||
reference = _build_reference_snapshot(grid_result)
|
||||
# Auto-detect pipeline from word_result if not provided
|
||||
if not pipeline:
|
||||
wr = session.get("word_result") or {}
|
||||
engine = wr.get("ocr_engine", "")
|
||||
if engine in ("kombi", "rapid_kombi"):
|
||||
pipeline = "kombi"
|
||||
elif engine == "paddle_direct":
|
||||
pipeline = "paddle-direct"
|
||||
else:
|
||||
pipeline = "pipeline"
|
||||
|
||||
reference = _build_reference_snapshot(grid_result, pipeline=pipeline)
|
||||
|
||||
# Merge into existing ground_truth JSONB
|
||||
gt = session.get("ground_truth") or {}
|
||||
@@ -224,6 +243,8 @@ async def list_ground_truth_sessions():
|
||||
"session_id": s["id"],
|
||||
"name": s.get("name", ""),
|
||||
"filename": s.get("filename", ""),
|
||||
"document_category": s.get("document_category"),
|
||||
"pipeline": ref.get("pipeline"),
|
||||
"saved_at": ref.get("saved_at"),
|
||||
"summary": ref.get("summary", {}),
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user