|
|
|
@@ -14,20 +14,21 @@ Lizenz: Apache 2.0
|
|
|
|
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import io
|
|
|
|
|
import logging
|
|
|
|
|
import time
|
|
|
|
|
import uuid
|
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
|
from typing import Any, Dict, Optional
|
|
|
|
|
from dataclasses import asdict
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
|
|
|
|
import cv2
|
|
|
|
|
import numpy as np
|
|
|
|
|
from fastapi import APIRouter, File, HTTPException, UploadFile
|
|
|
|
|
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
|
|
|
|
|
from fastapi.responses import Response
|
|
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
from cv_vocab_pipeline import (
|
|
|
|
|
analyze_layout,
|
|
|
|
|
create_ocr_image,
|
|
|
|
|
deskew_image,
|
|
|
|
|
deskew_image_by_word_alignment,
|
|
|
|
@@ -36,34 +37,67 @@ from cv_vocab_pipeline import (
|
|
|
|
|
render_image_high_res,
|
|
|
|
|
render_pdf_high_res,
|
|
|
|
|
)
|
|
|
|
|
from ocr_pipeline_session_store import (
|
|
|
|
|
create_session_db,
|
|
|
|
|
delete_session_db,
|
|
|
|
|
get_session_db,
|
|
|
|
|
get_session_image,
|
|
|
|
|
init_ocr_pipeline_tables,
|
|
|
|
|
list_sessions_db,
|
|
|
|
|
update_session_db,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# In-memory session store (24h TTL)
|
|
|
|
|
# In-memory cache for active sessions (BGR numpy arrays for processing)
|
|
|
|
|
# DB is source of truth, cache holds BGR arrays during active processing.
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
_sessions: Dict[str, Dict[str, Any]] = {}
|
|
|
|
|
SESSION_TTL_HOURS = 24
|
|
|
|
|
_cache: Dict[str, Dict[str, Any]] = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _cleanup_expired():
|
|
|
|
|
"""Remove sessions older than TTL."""
|
|
|
|
|
cutoff = datetime.utcnow() - timedelta(hours=SESSION_TTL_HOURS)
|
|
|
|
|
expired = [sid for sid, s in _sessions.items() if s.get("created_at", datetime.utcnow()) < cutoff]
|
|
|
|
|
for sid in expired:
|
|
|
|
|
del _sessions[sid]
|
|
|
|
|
logger.info(f"OCR Pipeline: expired session {sid}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_session(session_id: str) -> Dict[str, Any]:
|
|
|
|
|
"""Get session or raise 404."""
|
|
|
|
|
session = _sessions.get(session_id)
|
|
|
|
|
async def _load_session_to_cache(session_id: str) -> Dict[str, Any]:
|
|
|
|
|
"""Load session from DB into cache, decoding PNGs to BGR arrays."""
|
|
|
|
|
session = await get_session_db(session_id)
|
|
|
|
|
if not session:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
|
return session
|
|
|
|
|
|
|
|
|
|
if session_id in _cache:
|
|
|
|
|
return _cache[session_id]
|
|
|
|
|
|
|
|
|
|
cache_entry: Dict[str, Any] = {
|
|
|
|
|
"id": session_id,
|
|
|
|
|
**session,
|
|
|
|
|
"original_bgr": None,
|
|
|
|
|
"deskewed_bgr": None,
|
|
|
|
|
"dewarped_bgr": None,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Decode images from DB into BGR numpy arrays
|
|
|
|
|
for img_type, bgr_key in [
|
|
|
|
|
("original", "original_bgr"),
|
|
|
|
|
("deskewed", "deskewed_bgr"),
|
|
|
|
|
("dewarped", "dewarped_bgr"),
|
|
|
|
|
]:
|
|
|
|
|
png_data = await get_session_image(session_id, img_type)
|
|
|
|
|
if png_data:
|
|
|
|
|
arr = np.frombuffer(png_data, dtype=np.uint8)
|
|
|
|
|
bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
|
|
|
|
cache_entry[bgr_key] = bgr
|
|
|
|
|
|
|
|
|
|
_cache[session_id] = cache_entry
|
|
|
|
|
return cache_entry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_cached(session_id: str) -> Dict[str, Any]:
|
|
|
|
|
"""Get from cache or raise 404."""
|
|
|
|
|
entry = _cache.get(session_id)
|
|
|
|
|
if not entry:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not in cache — reload first")
|
|
|
|
|
return entry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@@ -90,15 +124,36 @@ class DewarpGroundTruthRequest(BaseModel):
|
|
|
|
|
notes: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RenameSessionRequest(BaseModel):
|
|
|
|
|
name: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ManualColumnsRequest(BaseModel):
|
|
|
|
|
columns: List[Dict[str, Any]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ColumnGroundTruthRequest(BaseModel):
|
|
|
|
|
is_correct: bool
|
|
|
|
|
notes: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Endpoints
|
|
|
|
|
# Session Management Endpoints
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
@router.get("/sessions")
|
|
|
|
|
async def list_sessions():
|
|
|
|
|
"""List all OCR pipeline sessions."""
|
|
|
|
|
sessions = await list_sessions_db()
|
|
|
|
|
return {"sessions": sessions}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/sessions")
|
|
|
|
|
async def create_session(file: UploadFile = File(...)):
|
|
|
|
|
async def create_session(
|
|
|
|
|
file: UploadFile = File(...),
|
|
|
|
|
name: Optional[str] = Form(None),
|
|
|
|
|
):
|
|
|
|
|
"""Upload a PDF or image file and create a pipeline session."""
|
|
|
|
|
_cleanup_expired()
|
|
|
|
|
|
|
|
|
|
file_data = await file.read()
|
|
|
|
|
filename = file.filename or "upload"
|
|
|
|
|
content_type = file.content_type or ""
|
|
|
|
@@ -114,25 +169,32 @@ async def create_session(file: UploadFile = File(...)):
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise HTTPException(status_code=400, detail=f"Could not process file: {e}")
|
|
|
|
|
|
|
|
|
|
# Encode original as PNG bytes for serving
|
|
|
|
|
# Encode original as PNG bytes
|
|
|
|
|
success, png_buf = cv2.imencode(".png", img_bgr)
|
|
|
|
|
if not success:
|
|
|
|
|
raise HTTPException(status_code=500, detail="Failed to encode image")
|
|
|
|
|
|
|
|
|
|
_sessions[session_id] = {
|
|
|
|
|
original_png = png_buf.tobytes()
|
|
|
|
|
session_name = name or filename
|
|
|
|
|
|
|
|
|
|
# Persist to DB
|
|
|
|
|
await create_session_db(
|
|
|
|
|
session_id=session_id,
|
|
|
|
|
name=session_name,
|
|
|
|
|
filename=filename,
|
|
|
|
|
original_png=original_png,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Cache BGR array for immediate processing
|
|
|
|
|
_cache[session_id] = {
|
|
|
|
|
"id": session_id,
|
|
|
|
|
"filename": filename,
|
|
|
|
|
"created_at": datetime.utcnow(),
|
|
|
|
|
"name": session_name,
|
|
|
|
|
"original_bgr": img_bgr,
|
|
|
|
|
"original_png": png_buf.tobytes(),
|
|
|
|
|
"deskewed_bgr": None,
|
|
|
|
|
"deskewed_png": None,
|
|
|
|
|
"binarized_png": None,
|
|
|
|
|
"deskew_result": None,
|
|
|
|
|
"dewarped_bgr": None,
|
|
|
|
|
"dewarped_png": None,
|
|
|
|
|
"deskew_result": None,
|
|
|
|
|
"dewarp_result": None,
|
|
|
|
|
"auto_shear_degrees": None,
|
|
|
|
|
"ground_truth": {},
|
|
|
|
|
"current_step": 1,
|
|
|
|
|
}
|
|
|
|
@@ -143,6 +205,7 @@ async def create_session(file: UploadFile = File(...)):
|
|
|
|
|
return {
|
|
|
|
|
"session_id": session_id,
|
|
|
|
|
"filename": filename,
|
|
|
|
|
"name": session_name,
|
|
|
|
|
"image_width": img_bgr.shape[1],
|
|
|
|
|
"image_height": img_bgr.shape[0],
|
|
|
|
|
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
|
|
|
|
@@ -151,35 +214,106 @@ async def create_session(file: UploadFile = File(...)):
|
|
|
|
|
|
|
|
|
|
@router.get("/sessions/{session_id}")
|
|
|
|
|
async def get_session_info(session_id: str):
|
|
|
|
|
"""Get session info including deskew/dewarp results for step navigation."""
|
|
|
|
|
session = _get_session(session_id)
|
|
|
|
|
img_bgr = session["original_bgr"]
|
|
|
|
|
"""Get session info including deskew/dewarp/column results for step navigation."""
|
|
|
|
|
session = await get_session_db(session_id)
|
|
|
|
|
if not session:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
|
|
|
|
|
|
# Get image dimensions from original PNG
|
|
|
|
|
original_png = await get_session_image(session_id, "original")
|
|
|
|
|
if original_png:
|
|
|
|
|
arr = np.frombuffer(original_png, dtype=np.uint8)
|
|
|
|
|
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
|
|
|
|
img_w, img_h = img.shape[1], img.shape[0] if img is not None else (0, 0)
|
|
|
|
|
else:
|
|
|
|
|
img_w, img_h = 0, 0
|
|
|
|
|
|
|
|
|
|
result = {
|
|
|
|
|
"session_id": session["id"],
|
|
|
|
|
"filename": session["filename"],
|
|
|
|
|
"image_width": img_bgr.shape[1],
|
|
|
|
|
"image_height": img_bgr.shape[0],
|
|
|
|
|
"filename": session.get("filename", ""),
|
|
|
|
|
"name": session.get("name", ""),
|
|
|
|
|
"image_width": img_w,
|
|
|
|
|
"image_height": img_h,
|
|
|
|
|
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
|
|
|
|
|
"current_step": session.get("current_step", 1),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Include deskew result if available
|
|
|
|
|
if session.get("deskew_result"):
|
|
|
|
|
result["deskew_result"] = session["deskew_result"]
|
|
|
|
|
|
|
|
|
|
# Include dewarp result if available
|
|
|
|
|
if session.get("dewarp_result"):
|
|
|
|
|
result["dewarp_result"] = session["dewarp_result"]
|
|
|
|
|
if session.get("column_result"):
|
|
|
|
|
result["column_result"] = session["column_result"]
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.put("/sessions/{session_id}")
|
|
|
|
|
async def rename_session(session_id: str, req: RenameSessionRequest):
|
|
|
|
|
"""Rename a session."""
|
|
|
|
|
updated = await update_session_db(session_id, name=req.name)
|
|
|
|
|
if not updated:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
|
return {"session_id": session_id, "name": req.name}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.delete("/sessions/{session_id}")
|
|
|
|
|
async def delete_session(session_id: str):
|
|
|
|
|
"""Delete a session."""
|
|
|
|
|
_cache.pop(session_id, None)
|
|
|
|
|
deleted = await delete_session_db(session_id)
|
|
|
|
|
if not deleted:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
|
return {"session_id": session_id, "deleted": True}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Image Endpoints
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
@router.get("/sessions/{session_id}/image/{image_type}")
|
|
|
|
|
async def get_image(session_id: str, image_type: str):
|
|
|
|
|
"""Serve session images: original, deskewed, dewarped, binarized, or columns-overlay."""
|
|
|
|
|
valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay"}
|
|
|
|
|
if image_type not in valid_types:
|
|
|
|
|
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
|
|
|
|
|
|
|
|
|
if image_type == "columns-overlay":
|
|
|
|
|
return await _get_columns_overlay(session_id)
|
|
|
|
|
|
|
|
|
|
# Try cache first for fast serving
|
|
|
|
|
cached = _cache.get(session_id)
|
|
|
|
|
if cached:
|
|
|
|
|
png_key = f"{image_type}_png" if image_type != "original" else None
|
|
|
|
|
bgr_key = f"{image_type}_bgr" if image_type != "binarized" else None
|
|
|
|
|
|
|
|
|
|
# For binarized, check if we have it cached as PNG
|
|
|
|
|
if image_type == "binarized" and cached.get("binarized_png"):
|
|
|
|
|
return Response(content=cached["binarized_png"], media_type="image/png")
|
|
|
|
|
|
|
|
|
|
# Load from DB
|
|
|
|
|
data = await get_session_image(session_id, image_type)
|
|
|
|
|
if not data:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Image '{image_type}' not available yet")
|
|
|
|
|
|
|
|
|
|
return Response(content=data, media_type="image/png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Deskew Endpoints
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
@router.post("/sessions/{session_id}/deskew")
|
|
|
|
|
async def auto_deskew(session_id: str):
|
|
|
|
|
"""Run both deskew methods and pick the best one."""
|
|
|
|
|
session = _get_session(session_id)
|
|
|
|
|
img_bgr = session["original_bgr"]
|
|
|
|
|
# Ensure session is in cache
|
|
|
|
|
if session_id not in _cache:
|
|
|
|
|
await _load_session_to_cache(session_id)
|
|
|
|
|
cached = _get_cached(session_id)
|
|
|
|
|
|
|
|
|
|
img_bgr = cached.get("original_bgr")
|
|
|
|
|
if img_bgr is None:
|
|
|
|
|
raise HTTPException(status_code=400, detail="Original image not available")
|
|
|
|
|
|
|
|
|
|
t0 = time.time()
|
|
|
|
|
|
|
|
|
@@ -202,12 +336,10 @@ async def auto_deskew(session_id: str):
|
|
|
|
|
|
|
|
|
|
duration = time.time() - t0
|
|
|
|
|
|
|
|
|
|
# Pick method with larger detected angle (more correction needed = more skew found)
|
|
|
|
|
# If both are ~0, prefer word alignment as it's more robust
|
|
|
|
|
# Pick best method
|
|
|
|
|
if abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1:
|
|
|
|
|
method_used = "word_alignment"
|
|
|
|
|
angle_applied = angle_wa
|
|
|
|
|
# Decode word alignment result to BGR
|
|
|
|
|
wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8)
|
|
|
|
|
deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR)
|
|
|
|
|
if deskewed_bgr is None:
|
|
|
|
@@ -219,20 +351,19 @@ async def auto_deskew(session_id: str):
|
|
|
|
|
angle_applied = angle_hough
|
|
|
|
|
deskewed_bgr = deskewed_hough
|
|
|
|
|
|
|
|
|
|
# Encode deskewed as PNG
|
|
|
|
|
# Encode as PNG
|
|
|
|
|
success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
|
|
|
|
|
deskewed_png = deskewed_png_buf.tobytes() if success else session["original_png"]
|
|
|
|
|
deskewed_png = deskewed_png_buf.tobytes() if success else b""
|
|
|
|
|
|
|
|
|
|
# Create binarized version
|
|
|
|
|
binarized_png = None
|
|
|
|
|
try:
|
|
|
|
|
binarized = create_ocr_image(deskewed_bgr)
|
|
|
|
|
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
|
|
|
|
binarized_png = bin_buf.tobytes() if success_bin else None
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"Binarization failed: {e}")
|
|
|
|
|
binarized_png = None
|
|
|
|
|
|
|
|
|
|
# Confidence: higher angle = lower confidence that we got it right
|
|
|
|
|
confidence = max(0.5, 1.0 - abs(angle_applied) / 5.0)
|
|
|
|
|
|
|
|
|
|
deskew_result = {
|
|
|
|
@@ -244,13 +375,23 @@ async def auto_deskew(session_id: str):
|
|
|
|
|
"duration_seconds": round(duration, 2),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
session["deskewed_bgr"] = deskewed_bgr
|
|
|
|
|
session["deskewed_png"] = deskewed_png
|
|
|
|
|
session["binarized_png"] = binarized_png
|
|
|
|
|
session["deskew_result"] = deskew_result
|
|
|
|
|
# Update cache
|
|
|
|
|
cached["deskewed_bgr"] = deskewed_bgr
|
|
|
|
|
cached["binarized_png"] = binarized_png
|
|
|
|
|
cached["deskew_result"] = deskew_result
|
|
|
|
|
|
|
|
|
|
# Persist to DB
|
|
|
|
|
db_update = {
|
|
|
|
|
"deskewed_png": deskewed_png,
|
|
|
|
|
"deskew_result": deskew_result,
|
|
|
|
|
"current_step": 2,
|
|
|
|
|
}
|
|
|
|
|
if binarized_png:
|
|
|
|
|
db_update["binarized_png"] = binarized_png
|
|
|
|
|
await update_session_db(session_id, **db_update)
|
|
|
|
|
|
|
|
|
|
logger.info(f"OCR Pipeline: deskew session {session_id}: "
|
|
|
|
|
f"hough={angle_hough:.2f}° wa={angle_wa:.2f}° → {method_used} {angle_applied:.2f}°")
|
|
|
|
|
f"hough={angle_hough:.2f} wa={angle_wa:.2f} -> {method_used} {angle_applied:.2f}")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"session_id": session_id,
|
|
|
|
@@ -263,8 +404,14 @@ async def auto_deskew(session_id: str):
|
|
|
|
|
@router.post("/sessions/{session_id}/deskew/manual")
|
|
|
|
|
async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
|
|
|
|
"""Apply a manual rotation angle to the original image."""
|
|
|
|
|
session = _get_session(session_id)
|
|
|
|
|
img_bgr = session["original_bgr"]
|
|
|
|
|
if session_id not in _cache:
|
|
|
|
|
await _load_session_to_cache(session_id)
|
|
|
|
|
cached = _get_cached(session_id)
|
|
|
|
|
|
|
|
|
|
img_bgr = cached.get("original_bgr")
|
|
|
|
|
if img_bgr is None:
|
|
|
|
|
raise HTTPException(status_code=400, detail="Original image not available")
|
|
|
|
|
|
|
|
|
|
angle = max(-5.0, min(5.0, req.angle))
|
|
|
|
|
|
|
|
|
|
h, w = img_bgr.shape[:2]
|
|
|
|
@@ -275,26 +422,38 @@ async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
|
|
|
|
borderMode=cv2.BORDER_REPLICATE)
|
|
|
|
|
|
|
|
|
|
success, png_buf = cv2.imencode(".png", rotated)
|
|
|
|
|
deskewed_png = png_buf.tobytes() if success else session["original_png"]
|
|
|
|
|
deskewed_png = png_buf.tobytes() if success else b""
|
|
|
|
|
|
|
|
|
|
# Binarize
|
|
|
|
|
binarized_png = None
|
|
|
|
|
try:
|
|
|
|
|
binarized = create_ocr_image(rotated)
|
|
|
|
|
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
|
|
|
|
binarized_png = bin_buf.tobytes() if success_bin else None
|
|
|
|
|
except Exception:
|
|
|
|
|
binarized_png = None
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
session["deskewed_bgr"] = rotated
|
|
|
|
|
session["deskewed_png"] = deskewed_png
|
|
|
|
|
session["binarized_png"] = binarized_png
|
|
|
|
|
session["deskew_result"] = {
|
|
|
|
|
**(session.get("deskew_result") or {}),
|
|
|
|
|
deskew_result = {
|
|
|
|
|
**(cached.get("deskew_result") or {}),
|
|
|
|
|
"angle_applied": round(angle, 3),
|
|
|
|
|
"method_used": "manual",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}°")
|
|
|
|
|
# Update cache
|
|
|
|
|
cached["deskewed_bgr"] = rotated
|
|
|
|
|
cached["binarized_png"] = binarized_png
|
|
|
|
|
cached["deskew_result"] = deskew_result
|
|
|
|
|
|
|
|
|
|
# Persist to DB
|
|
|
|
|
db_update = {
|
|
|
|
|
"deskewed_png": deskewed_png,
|
|
|
|
|
"deskew_result": deskew_result,
|
|
|
|
|
}
|
|
|
|
|
if binarized_png:
|
|
|
|
|
db_update["binarized_png"] = binarized_png
|
|
|
|
|
await update_session_db(session_id, **db_update)
|
|
|
|
|
|
|
|
|
|
logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"session_id": session_id,
|
|
|
|
@@ -304,33 +463,14 @@ async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/sessions/{session_id}/image/{image_type}")
|
|
|
|
|
async def get_image(session_id: str, image_type: str):
|
|
|
|
|
"""Serve session images: original, deskewed, dewarped, or binarized."""
|
|
|
|
|
session = _get_session(session_id)
|
|
|
|
|
|
|
|
|
|
if image_type == "original":
|
|
|
|
|
data = session.get("original_png")
|
|
|
|
|
elif image_type == "deskewed":
|
|
|
|
|
data = session.get("deskewed_png")
|
|
|
|
|
elif image_type == "dewarped":
|
|
|
|
|
data = session.get("dewarped_png")
|
|
|
|
|
elif image_type == "binarized":
|
|
|
|
|
data = session.get("binarized_png")
|
|
|
|
|
else:
|
|
|
|
|
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
|
|
|
|
|
|
|
|
|
if not data:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Image '{image_type}' not available yet")
|
|
|
|
|
|
|
|
|
|
return Response(content=data, media_type="image/png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/sessions/{session_id}/ground-truth/deskew")
|
|
|
|
|
async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthRequest):
|
|
|
|
|
"""Save ground truth feedback for the deskew step."""
|
|
|
|
|
session = _get_session(session_id)
|
|
|
|
|
session = await get_session_db(session_id)
|
|
|
|
|
if not session:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
|
|
|
|
|
|
ground_truth = session.get("ground_truth") or {}
|
|
|
|
|
gt = {
|
|
|
|
|
"is_correct": req.is_correct,
|
|
|
|
|
"corrected_angle": req.corrected_angle,
|
|
|
|
@@ -338,7 +478,13 @@ async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthReques
|
|
|
|
|
"saved_at": datetime.utcnow().isoformat(),
|
|
|
|
|
"deskew_result": session.get("deskew_result"),
|
|
|
|
|
}
|
|
|
|
|
session["ground_truth"]["deskew"] = gt
|
|
|
|
|
ground_truth["deskew"] = gt
|
|
|
|
|
|
|
|
|
|
await update_session_db(session_id, ground_truth=ground_truth)
|
|
|
|
|
|
|
|
|
|
# Update cache
|
|
|
|
|
if session_id in _cache:
|
|
|
|
|
_cache[session_id]["ground_truth"] = ground_truth
|
|
|
|
|
|
|
|
|
|
logger.info(f"OCR Pipeline: ground truth deskew session {session_id}: "
|
|
|
|
|
f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
|
|
|
|
@@ -353,8 +499,11 @@ async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthReques
|
|
|
|
|
@router.post("/sessions/{session_id}/dewarp")
|
|
|
|
|
async def auto_dewarp(session_id: str):
|
|
|
|
|
"""Detect and correct vertical shear on the deskewed image."""
|
|
|
|
|
session = _get_session(session_id)
|
|
|
|
|
deskewed_bgr = session.get("deskewed_bgr")
|
|
|
|
|
if session_id not in _cache:
|
|
|
|
|
await _load_session_to_cache(session_id)
|
|
|
|
|
cached = _get_cached(session_id)
|
|
|
|
|
|
|
|
|
|
deskewed_bgr = cached.get("deskewed_bgr")
|
|
|
|
|
if deskewed_bgr is None:
|
|
|
|
|
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
|
|
|
|
|
|
|
|
@@ -362,30 +511,37 @@ async def auto_dewarp(session_id: str):
|
|
|
|
|
dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
|
|
|
|
|
duration = time.time() - t0
|
|
|
|
|
|
|
|
|
|
# Encode dewarped as PNG
|
|
|
|
|
# Encode as PNG
|
|
|
|
|
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
|
|
|
|
dewarped_png = png_buf.tobytes() if success else session["deskewed_png"]
|
|
|
|
|
dewarped_png = png_buf.tobytes() if success else b""
|
|
|
|
|
|
|
|
|
|
session["dewarped_bgr"] = dewarped_bgr
|
|
|
|
|
session["dewarped_png"] = dewarped_png
|
|
|
|
|
session["auto_shear_degrees"] = dewarp_info.get("shear_degrees", 0.0)
|
|
|
|
|
session["dewarp_result"] = {
|
|
|
|
|
dewarp_result = {
|
|
|
|
|
"method_used": dewarp_info["method"],
|
|
|
|
|
"shear_degrees": dewarp_info["shear_degrees"],
|
|
|
|
|
"confidence": dewarp_info["confidence"],
|
|
|
|
|
"duration_seconds": round(duration, 2),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Update cache
|
|
|
|
|
cached["dewarped_bgr"] = dewarped_bgr
|
|
|
|
|
cached["dewarp_result"] = dewarp_result
|
|
|
|
|
|
|
|
|
|
# Persist to DB
|
|
|
|
|
await update_session_db(
|
|
|
|
|
session_id,
|
|
|
|
|
dewarped_png=dewarped_png,
|
|
|
|
|
dewarp_result=dewarp_result,
|
|
|
|
|
auto_shear_degrees=dewarp_info.get("shear_degrees", 0.0),
|
|
|
|
|
current_step=3,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
logger.info(f"OCR Pipeline: dewarp session {session_id}: "
|
|
|
|
|
f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f}° "
|
|
|
|
|
f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f} "
|
|
|
|
|
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"session_id": session_id,
|
|
|
|
|
"method_used": dewarp_info["method"],
|
|
|
|
|
"shear_degrees": dewarp_info["shear_degrees"],
|
|
|
|
|
"confidence": dewarp_info["confidence"],
|
|
|
|
|
"duration_seconds": round(duration, 2),
|
|
|
|
|
**dewarp_result,
|
|
|
|
|
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -393,9 +549,11 @@ async def auto_dewarp(session_id: str):
|
|
|
|
|
@router.post("/sessions/{session_id}/dewarp/manual")
|
|
|
|
|
async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
|
|
|
|
"""Apply shear correction with a manual angle."""
|
|
|
|
|
session = _get_session(session_id)
|
|
|
|
|
deskewed_bgr = session.get("deskewed_bgr")
|
|
|
|
|
if session_id not in _cache:
|
|
|
|
|
await _load_session_to_cache(session_id)
|
|
|
|
|
cached = _get_cached(session_id)
|
|
|
|
|
|
|
|
|
|
deskewed_bgr = cached.get("deskewed_bgr")
|
|
|
|
|
if deskewed_bgr is None:
|
|
|
|
|
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
|
|
|
|
|
|
|
|
@@ -407,17 +565,26 @@ async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
|
|
|
|
dewarped_bgr = dewarp_image_manual(deskewed_bgr, shear_deg)
|
|
|
|
|
|
|
|
|
|
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
|
|
|
|
dewarped_png = png_buf.tobytes() if success else session.get("deskewed_png")
|
|
|
|
|
dewarped_png = png_buf.tobytes() if success else b""
|
|
|
|
|
|
|
|
|
|
session["dewarped_bgr"] = dewarped_bgr
|
|
|
|
|
session["dewarped_png"] = dewarped_png
|
|
|
|
|
session["dewarp_result"] = {
|
|
|
|
|
**(session.get("dewarp_result") or {}),
|
|
|
|
|
dewarp_result = {
|
|
|
|
|
**(cached.get("dewarp_result") or {}),
|
|
|
|
|
"method_used": "manual",
|
|
|
|
|
"shear_degrees": round(shear_deg, 3),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: shear={shear_deg:.3f}°")
|
|
|
|
|
# Update cache
|
|
|
|
|
cached["dewarped_bgr"] = dewarped_bgr
|
|
|
|
|
cached["dewarp_result"] = dewarp_result
|
|
|
|
|
|
|
|
|
|
# Persist to DB
|
|
|
|
|
await update_session_db(
|
|
|
|
|
session_id,
|
|
|
|
|
dewarped_png=dewarped_png,
|
|
|
|
|
dewarp_result=dewarp_result,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: shear={shear_deg:.3f}")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"session_id": session_id,
|
|
|
|
@@ -430,8 +597,11 @@ async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
|
|
|
|
@router.post("/sessions/{session_id}/ground-truth/dewarp")
|
|
|
|
|
async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthRequest):
|
|
|
|
|
"""Save ground truth feedback for the dewarp step."""
|
|
|
|
|
session = _get_session(session_id)
|
|
|
|
|
session = await get_session_db(session_id)
|
|
|
|
|
if not session:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
|
|
|
|
|
|
ground_truth = session.get("ground_truth") or {}
|
|
|
|
|
gt = {
|
|
|
|
|
"is_correct": req.is_correct,
|
|
|
|
|
"corrected_shear": req.corrected_shear,
|
|
|
|
@@ -439,9 +609,168 @@ async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthReques
|
|
|
|
|
"saved_at": datetime.utcnow().isoformat(),
|
|
|
|
|
"dewarp_result": session.get("dewarp_result"),
|
|
|
|
|
}
|
|
|
|
|
session["ground_truth"]["dewarp"] = gt
|
|
|
|
|
ground_truth["dewarp"] = gt
|
|
|
|
|
|
|
|
|
|
await update_session_db(session_id, ground_truth=ground_truth)
|
|
|
|
|
|
|
|
|
|
if session_id in _cache:
|
|
|
|
|
_cache[session_id]["ground_truth"] = ground_truth
|
|
|
|
|
|
|
|
|
|
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
|
|
|
|
|
f"correct={req.is_correct}, corrected_shear={req.corrected_shear}")
|
|
|
|
|
|
|
|
|
|
return {"session_id": session_id, "ground_truth": gt}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Column Detection Endpoints (Step 3)
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
@router.post("/sessions/{session_id}/columns")
|
|
|
|
|
async def detect_columns(session_id: str):
|
|
|
|
|
"""Run column detection on the dewarped image."""
|
|
|
|
|
if session_id not in _cache:
|
|
|
|
|
await _load_session_to_cache(session_id)
|
|
|
|
|
cached = _get_cached(session_id)
|
|
|
|
|
|
|
|
|
|
dewarped_bgr = cached.get("dewarped_bgr")
|
|
|
|
|
if dewarped_bgr is None:
|
|
|
|
|
raise HTTPException(status_code=400, detail="Dewarp must be completed before column detection")
|
|
|
|
|
|
|
|
|
|
t0 = time.time()
|
|
|
|
|
|
|
|
|
|
# Prepare images for analyze_layout
|
|
|
|
|
gray = cv2.cvtColor(dewarped_bgr, cv2.COLOR_BGR2GRAY)
|
|
|
|
|
# CLAHE-enhanced for layout analysis
|
|
|
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
|
|
|
layout_img = clahe.apply(gray)
|
|
|
|
|
# Binarized for text density
|
|
|
|
|
ocr_img = create_ocr_image(dewarped_bgr)
|
|
|
|
|
|
|
|
|
|
regions = analyze_layout(layout_img, ocr_img)
|
|
|
|
|
duration = time.time() - t0
|
|
|
|
|
|
|
|
|
|
columns = [asdict(r) for r in regions]
|
|
|
|
|
column_result = {
|
|
|
|
|
"columns": columns,
|
|
|
|
|
"duration_seconds": round(duration, 2),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Persist to DB
|
|
|
|
|
await update_session_db(
|
|
|
|
|
session_id,
|
|
|
|
|
column_result=column_result,
|
|
|
|
|
current_step=3,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Update cache
|
|
|
|
|
cached["column_result"] = column_result
|
|
|
|
|
|
|
|
|
|
col_count = len([c for c in columns if c["type"].startswith("column")])
|
|
|
|
|
logger.info(f"OCR Pipeline: columns session {session_id}: "
|
|
|
|
|
f"{col_count} columns detected ({duration:.2f}s)")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"session_id": session_id,
|
|
|
|
|
**column_result,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/sessions/{session_id}/columns/manual")
|
|
|
|
|
async def set_manual_columns(session_id: str, req: ManualColumnsRequest):
|
|
|
|
|
"""Override detected columns with manual definitions."""
|
|
|
|
|
column_result = {
|
|
|
|
|
"columns": req.columns,
|
|
|
|
|
"duration_seconds": 0,
|
|
|
|
|
"method": "manual",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await update_session_db(session_id, column_result=column_result)
|
|
|
|
|
|
|
|
|
|
if session_id in _cache:
|
|
|
|
|
_cache[session_id]["column_result"] = column_result
|
|
|
|
|
|
|
|
|
|
logger.info(f"OCR Pipeline: manual columns session {session_id}: "
|
|
|
|
|
f"{len(req.columns)} columns set")
|
|
|
|
|
|
|
|
|
|
return {"session_id": session_id, **column_result}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/sessions/{session_id}/ground-truth/columns")
|
|
|
|
|
async def save_column_ground_truth(session_id: str, req: ColumnGroundTruthRequest):
|
|
|
|
|
"""Save ground truth feedback for the column detection step."""
|
|
|
|
|
session = await get_session_db(session_id)
|
|
|
|
|
if not session:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
|
|
|
|
|
|
ground_truth = session.get("ground_truth") or {}
|
|
|
|
|
gt = {
|
|
|
|
|
"is_correct": req.is_correct,
|
|
|
|
|
"notes": req.notes,
|
|
|
|
|
"saved_at": datetime.utcnow().isoformat(),
|
|
|
|
|
"column_result": session.get("column_result"),
|
|
|
|
|
}
|
|
|
|
|
ground_truth["columns"] = gt
|
|
|
|
|
|
|
|
|
|
await update_session_db(session_id, ground_truth=ground_truth)
|
|
|
|
|
|
|
|
|
|
if session_id in _cache:
|
|
|
|
|
_cache[session_id]["ground_truth"] = ground_truth
|
|
|
|
|
|
|
|
|
|
return {"session_id": session_id, "ground_truth": gt}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _get_columns_overlay(session_id: str) -> Response:
|
|
|
|
|
"""Generate dewarped image with column borders drawn on it."""
|
|
|
|
|
session = await get_session_db(session_id)
|
|
|
|
|
if not session:
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
|
|
|
|
|
|
column_result = session.get("column_result")
|
|
|
|
|
if not column_result or not column_result.get("columns"):
|
|
|
|
|
raise HTTPException(status_code=404, detail="No column data available")
|
|
|
|
|
|
|
|
|
|
# Load dewarped image
|
|
|
|
|
dewarped_png = await get_session_image(session_id, "dewarped")
|
|
|
|
|
if not dewarped_png:
|
|
|
|
|
raise HTTPException(status_code=404, detail="Dewarped image not available")
|
|
|
|
|
|
|
|
|
|
arr = np.frombuffer(dewarped_png, dtype=np.uint8)
|
|
|
|
|
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
|
|
|
|
if img is None:
|
|
|
|
|
raise HTTPException(status_code=500, detail="Failed to decode image")
|
|
|
|
|
|
|
|
|
|
# Color map for region types
|
|
|
|
|
colors = {
|
|
|
|
|
"column_en": (255, 180, 0), # Blue (BGR)
|
|
|
|
|
"column_de": (0, 200, 0), # Green
|
|
|
|
|
"column_example": (0, 140, 255), # Orange
|
|
|
|
|
"header": (128, 128, 128), # Gray
|
|
|
|
|
"footer": (128, 128, 128), # Gray
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
overlay = img.copy()
|
|
|
|
|
for col in column_result["columns"]:
|
|
|
|
|
x, y = col["x"], col["y"]
|
|
|
|
|
w, h = col["width"], col["height"]
|
|
|
|
|
color = colors.get(col.get("type", ""), (200, 200, 200))
|
|
|
|
|
|
|
|
|
|
# Semi-transparent fill
|
|
|
|
|
cv2.rectangle(overlay, (x, y), (x + w, y + h), color, -1)
|
|
|
|
|
|
|
|
|
|
# Solid border
|
|
|
|
|
cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
|
|
|
|
|
|
|
|
|
|
# Label
|
|
|
|
|
label = col.get("type", "unknown").replace("column_", "").upper()
|
|
|
|
|
cv2.putText(img, label, (x + 10, y + 30),
|
|
|
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
|
|
|
|
|
|
|
|
|
|
# Blend overlay at 20% opacity
|
|
|
|
|
cv2.addWeighted(overlay, 0.2, img, 0.8, 0, img)
|
|
|
|
|
|
|
|
|
|
success, result_png = cv2.imencode(".png", img)
|
|
|
|
|
if not success:
|
|
|
|
|
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
|
|
|
|
|
|
|
|
|
return Response(content=result_png.tobytes(), media_type="image/png")
|
|
|
|
|