Python (6 files in klausur-service): - rbac.py (1,132 → 4), admin_api.py (1,012 → 4) - routes/eh.py (1,111 → 4), ocr_pipeline_geometry.py (1,105 → 5) Python (2 files in backend-lehrer): - unit_api.py (1,226 → 6), game_api.py (1,129 → 5) Website (6 page files): - 4x klausur-korrektur pages (1,249-1,328 LOC each) → shared components in website/components/klausur-korrektur/ (17 shared files) - companion (1,057 → 10), magic-help (1,017 → 8) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
237 lines
7.8 KiB
Python
237 lines
7.8 KiB
Python
"""
|
|
OCR Pipeline Deskew Endpoints (Step 2)
|
|
|
|
Auto deskew, manual deskew, and ground truth for the deskew step.
|
|
Extracted from ocr_pipeline_geometry.py for file-size compliance.
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
from datetime import datetime
|
|
|
|
import cv2
|
|
from fastapi import APIRouter, HTTPException
|
|
|
|
from cv_vocab_pipeline import (
|
|
create_ocr_image,
|
|
deskew_image,
|
|
deskew_image_by_word_alignment,
|
|
deskew_two_pass,
|
|
)
|
|
from ocr_pipeline_session_store import (
|
|
get_session_db,
|
|
update_session_db,
|
|
)
|
|
from ocr_pipeline_common import (
|
|
_cache,
|
|
_load_session_to_cache,
|
|
_get_cached,
|
|
_append_pipeline_log,
|
|
ManualDeskewRequest,
|
|
DeskewGroundTruthRequest,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
|
|
|
|
|
@router.post("/sessions/{session_id}/deskew")
|
|
async def auto_deskew(session_id: str):
|
|
"""Two-pass deskew: iterative projection (wide range) + word-alignment residual."""
|
|
# Ensure session is in cache
|
|
if session_id not in _cache:
|
|
await _load_session_to_cache(session_id)
|
|
cached = _get_cached(session_id)
|
|
|
|
# Deskew runs right after orientation -- use oriented image, fall back to original
|
|
img_bgr = next((v for k in ("oriented_bgr", "original_bgr")
|
|
if (v := cached.get(k)) is not None), None)
|
|
if img_bgr is None:
|
|
raise HTTPException(status_code=400, detail="No image available for deskewing")
|
|
|
|
t0 = time.time()
|
|
|
|
# Two-pass deskew: iterative (+-5 deg) + word-alignment residual check
|
|
deskewed_bgr, angle_applied, two_pass_debug = deskew_two_pass(img_bgr.copy())
|
|
|
|
# Also run individual methods for reporting (non-authoritative)
|
|
try:
|
|
_, angle_hough = deskew_image(img_bgr.copy())
|
|
except Exception:
|
|
angle_hough = 0.0
|
|
|
|
success_enc, png_orig = cv2.imencode(".png", img_bgr)
|
|
orig_bytes = png_orig.tobytes() if success_enc else b""
|
|
try:
|
|
_, angle_wa = deskew_image_by_word_alignment(orig_bytes)
|
|
except Exception:
|
|
angle_wa = 0.0
|
|
|
|
angle_iterative = two_pass_debug.get("pass1_angle", 0.0)
|
|
angle_residual = two_pass_debug.get("pass2_angle", 0.0)
|
|
angle_textline = two_pass_debug.get("pass3_angle", 0.0)
|
|
|
|
duration = time.time() - t0
|
|
|
|
method_used = "three_pass" if abs(angle_textline) >= 0.01 else (
|
|
"two_pass" if abs(angle_residual) >= 0.01 else "iterative"
|
|
)
|
|
|
|
# Encode as PNG
|
|
success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
|
|
deskewed_png = deskewed_png_buf.tobytes() if success else b""
|
|
|
|
# Create binarized version
|
|
binarized_png = None
|
|
try:
|
|
binarized = create_ocr_image(deskewed_bgr)
|
|
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
|
binarized_png = bin_buf.tobytes() if success_bin else None
|
|
except Exception as e:
|
|
logger.warning(f"Binarization failed: {e}")
|
|
|
|
confidence = max(0.5, 1.0 - abs(angle_applied) / 5.0)
|
|
|
|
deskew_result = {
|
|
"angle_hough": round(angle_hough, 3),
|
|
"angle_word_alignment": round(angle_wa, 3),
|
|
"angle_iterative": round(angle_iterative, 3),
|
|
"angle_residual": round(angle_residual, 3),
|
|
"angle_textline": round(angle_textline, 3),
|
|
"angle_applied": round(angle_applied, 3),
|
|
"method_used": method_used,
|
|
"confidence": round(confidence, 2),
|
|
"duration_seconds": round(duration, 2),
|
|
"two_pass_debug": two_pass_debug,
|
|
}
|
|
|
|
# Update cache
|
|
cached["deskewed_bgr"] = deskewed_bgr
|
|
cached["binarized_png"] = binarized_png
|
|
cached["deskew_result"] = deskew_result
|
|
|
|
# Persist to DB
|
|
db_update = {
|
|
"deskewed_png": deskewed_png,
|
|
"deskew_result": deskew_result,
|
|
"current_step": 3,
|
|
}
|
|
if binarized_png:
|
|
db_update["binarized_png"] = binarized_png
|
|
await update_session_db(session_id, **db_update)
|
|
|
|
logger.info(f"OCR Pipeline: deskew session {session_id}: "
|
|
f"hough={angle_hough:.2f} wa={angle_wa:.2f} "
|
|
f"iter={angle_iterative:.2f} residual={angle_residual:.2f} "
|
|
f"textline={angle_textline:.2f} "
|
|
f"-> {method_used} total={angle_applied:.2f}")
|
|
|
|
await _append_pipeline_log(session_id, "deskew", {
|
|
"angle_applied": round(angle_applied, 3),
|
|
"angle_iterative": round(angle_iterative, 3),
|
|
"angle_residual": round(angle_residual, 3),
|
|
"angle_textline": round(angle_textline, 3),
|
|
"confidence": round(confidence, 2),
|
|
"method": method_used,
|
|
}, duration_ms=int(duration * 1000))
|
|
|
|
return {
|
|
"session_id": session_id,
|
|
**deskew_result,
|
|
"deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
|
|
"binarized_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/binarized",
|
|
}
|
|
|
|
|
|
@router.post("/sessions/{session_id}/deskew/manual")
|
|
async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
|
"""Apply a manual rotation angle to the oriented image."""
|
|
if session_id not in _cache:
|
|
await _load_session_to_cache(session_id)
|
|
cached = _get_cached(session_id)
|
|
|
|
img_bgr = next((v for k in ("oriented_bgr", "original_bgr")
|
|
if (v := cached.get(k)) is not None), None)
|
|
if img_bgr is None:
|
|
raise HTTPException(status_code=400, detail="No image available for deskewing")
|
|
|
|
angle = max(-5.0, min(5.0, req.angle))
|
|
|
|
h, w = img_bgr.shape[:2]
|
|
center = (w // 2, h // 2)
|
|
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
rotated = cv2.warpAffine(img_bgr, M, (w, h),
|
|
flags=cv2.INTER_LINEAR,
|
|
borderMode=cv2.BORDER_REPLICATE)
|
|
|
|
success, png_buf = cv2.imencode(".png", rotated)
|
|
deskewed_png = png_buf.tobytes() if success else b""
|
|
|
|
# Binarize
|
|
binarized_png = None
|
|
try:
|
|
binarized = create_ocr_image(rotated)
|
|
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
|
binarized_png = bin_buf.tobytes() if success_bin else None
|
|
except Exception:
|
|
pass
|
|
|
|
deskew_result = {
|
|
**(cached.get("deskew_result") or {}),
|
|
"angle_applied": round(angle, 3),
|
|
"method_used": "manual",
|
|
}
|
|
|
|
# Update cache
|
|
cached["deskewed_bgr"] = rotated
|
|
cached["binarized_png"] = binarized_png
|
|
cached["deskew_result"] = deskew_result
|
|
|
|
# Persist to DB
|
|
db_update = {
|
|
"deskewed_png": deskewed_png,
|
|
"deskew_result": deskew_result,
|
|
}
|
|
if binarized_png:
|
|
db_update["binarized_png"] = binarized_png
|
|
await update_session_db(session_id, **db_update)
|
|
|
|
logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}")
|
|
|
|
return {
|
|
"session_id": session_id,
|
|
"angle_applied": round(angle, 3),
|
|
"method_used": "manual",
|
|
"deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
|
|
}
|
|
|
|
|
|
@router.post("/sessions/{session_id}/ground-truth/deskew")
|
|
async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthRequest):
|
|
"""Save ground truth feedback for the deskew step."""
|
|
session = await get_session_db(session_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
|
|
|
ground_truth = session.get("ground_truth") or {}
|
|
gt = {
|
|
"is_correct": req.is_correct,
|
|
"corrected_angle": req.corrected_angle,
|
|
"notes": req.notes,
|
|
"saved_at": datetime.utcnow().isoformat(),
|
|
"deskew_result": session.get("deskew_result"),
|
|
}
|
|
ground_truth["deskew"] = gt
|
|
|
|
await update_session_db(session_id, ground_truth=ground_truth)
|
|
|
|
# Update cache
|
|
if session_id in _cache:
|
|
_cache[session_id]["ground_truth"] = ground_truth
|
|
|
|
logger.info(f"OCR Pipeline: ground truth deskew session {session_id}: "
|
|
f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
|
|
|
|
return {"session_id": session_id, "ground_truth": gt}
|