feat: Dewarp-Korrektur als Schritt 2 in OCR Pipeline (7 Schritte)

Implementiert Buchwoelbungs-Entzerrung mit zwei Methoden:
- Methode A: Vertikale-Kanten-Analyse (Sobel + Polynom 2. Grades)
- Methode B: Textzeilen-Baseline (Tesseract + Baseline-Kruemmung)
Beste Methode wird automatisch gewaehlt, manueller Slider (-3 bis +3).

Backend: 3 neue Endpoints (auto/manual dewarp, ground truth)
Frontend: StepDewarp + DewarpControls, Pipeline von 6 auf 7 Schritte

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-26 16:46:41 +01:00
parent d552fd8b6b
commit 589d2f811a
13 changed files with 858 additions and 28 deletions

View File

@@ -1,13 +1,14 @@
"""
OCR Pipeline API - Schrittweise Seitenrekonstruktion.
Zerlegt den OCR-Prozess in 6 einzelne Schritte:
Zerlegt den OCR-Prozess in 7 einzelne Schritte:
1. Deskewing - Scan begradigen
2. Spaltenerkennung - Unsichtbare Spalten finden
3. Worterkennung - OCR mit Bounding Boxes
4. Koordinatenzuweisung - Exakte Positionen
5. Seitenrekonstruktion - Seite nachbauen
6. Ground Truth Validierung - Gesamtpruefung
2. Dewarping - Buchwoelbung entzerren
3. Spaltenerkennung - Unsichtbare Spalten finden
4. Worterkennung - OCR mit Bounding Boxes
5. Koordinatenzuweisung - Exakte Positionen
6. Seitenrekonstruktion - Seite nachbauen
7. Ground Truth Validierung - Gesamtpruefung
Lizenz: Apache 2.0
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
@@ -30,6 +31,8 @@ from cv_vocab_pipeline import (
create_ocr_image,
deskew_image,
deskew_image_by_word_alignment,
dewarp_image,
dewarp_image_manual,
render_image_high_res,
render_pdf_high_res,
)
@@ -77,6 +80,16 @@ class DeskewGroundTruthRequest(BaseModel):
notes: Optional[str] = None
class ManualDewarpRequest(BaseModel):
scale: float
class DewarpGroundTruthRequest(BaseModel):
is_correct: bool
corrected_scale: Optional[float] = None
notes: Optional[str] = None
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@@ -116,6 +129,10 @@ async def create_session(file: UploadFile = File(...)):
"deskewed_png": None,
"binarized_png": None,
"deskew_result": None,
"dewarped_bgr": None,
"dewarped_png": None,
"dewarp_result": None,
"displacement_map": None,
"ground_truth": {},
"current_step": 1,
}
@@ -263,13 +280,15 @@ async def manual_deskew(session_id: str, req: ManualDeskewRequest):
@router.get("/sessions/{session_id}/image/{image_type}")
async def get_image(session_id: str, image_type: str):
"""Serve session images: original, deskewed, or binarized."""
"""Serve session images: original, deskewed, dewarped, or binarized."""
session = _get_session(session_id)
if image_type == "original":
data = session.get("original_png")
elif image_type == "deskewed":
data = session.get("deskewed_png")
elif image_type == "dewarped":
data = session.get("dewarped_png")
elif image_type == "binarized":
data = session.get("binarized_png")
else:
@@ -299,3 +318,106 @@ async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthReques
f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
return {"session_id": session_id, "ground_truth": gt}
# ---------------------------------------------------------------------------
# Dewarp Endpoints
# ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/dewarp")
async def auto_dewarp(session_id: str):
"""Run both dewarp methods on the deskewed image and pick the best."""
session = _get_session(session_id)
deskewed_bgr = session.get("deskewed_bgr")
if deskewed_bgr is None:
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
t0 = time.time()
dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
duration = time.time() - t0
# Encode dewarped as PNG
success, png_buf = cv2.imencode(".png", dewarped_bgr)
dewarped_png = png_buf.tobytes() if success else session["deskewed_png"]
session["dewarped_bgr"] = dewarped_bgr
session["dewarped_png"] = dewarped_png
session["dewarp_result"] = {
"method_used": dewarp_info["method"],
"curvature_px": dewarp_info["curvature_px"],
"confidence": dewarp_info["confidence"],
"duration_seconds": round(duration, 2),
}
session["displacement_map"] = dewarp_info.get("displacement_map")
logger.info(f"OCR Pipeline: dewarp session {session_id}: "
f"method={dewarp_info['method']} curvature={dewarp_info['curvature_px']:.1f}px "
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
return {
"session_id": session_id,
"method_used": dewarp_info["method"],
"curvature_px": dewarp_info["curvature_px"],
"confidence": dewarp_info["confidence"],
"duration_seconds": round(duration, 2),
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
}
@router.post("/sessions/{session_id}/dewarp/manual")
async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
"""Apply dewarp with a manually scaled displacement map."""
session = _get_session(session_id)
deskewed_bgr = session.get("deskewed_bgr")
displacement_map = session.get("displacement_map")
if deskewed_bgr is None:
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
scale = max(-3.0, min(3.0, req.scale))
if displacement_map is None or abs(scale) < 0.01:
# No displacement map or zero scale — use deskewed as-is
dewarped_bgr = deskewed_bgr
else:
dewarped_bgr = dewarp_image_manual(deskewed_bgr, displacement_map, scale)
success, png_buf = cv2.imencode(".png", dewarped_bgr)
dewarped_png = png_buf.tobytes() if success else session.get("deskewed_png")
session["dewarped_bgr"] = dewarped_bgr
session["dewarped_png"] = dewarped_png
session["dewarp_result"] = {
**(session.get("dewarp_result") or {}),
"method_used": "manual",
"scale_applied": round(scale, 2),
}
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: scale={scale:.2f}")
return {
"session_id": session_id,
"scale_applied": round(scale, 2),
"method_used": "manual",
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
}
@router.post("/sessions/{session_id}/ground-truth/dewarp")
async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthRequest):
"""Save ground truth feedback for the dewarp step."""
session = _get_session(session_id)
gt = {
"is_correct": req.is_correct,
"corrected_scale": req.corrected_scale,
"notes": req.notes,
"saved_at": datetime.utcnow().isoformat(),
"dewarp_result": session.get("dewarp_result"),
}
session["ground_truth"]["dewarp"] = gt
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
f"correct={req.is_correct}, corrected_scale={req.corrected_scale}")
return {"session_id": session_id, "ground_truth": gt}