breakpilot-lehrer/klausur-service/backend/ocr_pipeline_sessions_images.py

"""
OCR Pipeline Sessions Images — image serving, thumbnails, pipeline log,
categories, and document type detection.

Extracted from ocr_pipeline_sessions.py for modularity.

Lizenz: Apache 2.0
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""

import logging
import time
from typing import Any, Dict

import cv2
import numpy as np
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import Response

from cv_vocab_pipeline import create_ocr_image, detect_document_type
from ocr_pipeline_common import (
    VALID_DOCUMENT_CATEGORIES,
    _append_pipeline_log,
    _cache,
    _get_base_image_png,
    _get_cached,
    _load_session_to_cache,
)
from ocr_pipeline_overlays import render_overlay
from ocr_pipeline_session_store import (
    get_session_db,
    get_session_image,
    update_session_db,
)

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])


# ---------------------------------------------------------------------------
# Thumbnail & Log Endpoints
# ---------------------------------------------------------------------------

@router.get("/sessions/{session_id}/thumbnail")
async def get_session_thumbnail(session_id: str, size: int = Query(default=80, ge=16, le=400)):
    """Return a small thumbnail of the original image."""
    original_png = await get_session_image(session_id, "original")
    if not original_png:
        raise HTTPException(status_code=404, detail=f"Session {session_id} not found or no image")
    arr = np.frombuffer(original_png, dtype=np.uint8)
    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
    if img is None:
        raise HTTPException(status_code=500, detail="Failed to decode image")
    h, w = img.shape[:2]
    scale = size / max(h, w)
    new_w, new_h = int(w * scale), int(h * scale)
    thumb = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
    _, png_bytes = cv2.imencode(".png", thumb)
    return Response(content=png_bytes.tobytes(), media_type="image/png",
                    headers={"Cache-Control": "public, max-age=3600"})


@router.get("/sessions/{session_id}/pipeline-log")
async def get_pipeline_log(session_id: str):
    """Get the pipeline execution log for a session."""
    session = await get_session_db(session_id)
    if not session:
        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
    return {"session_id": session_id, "pipeline_log": session.get("pipeline_log") or {"steps": []}}


@router.get("/categories")
async def list_categories():
    """List valid document categories."""
    return {"categories": sorted(VALID_DOCUMENT_CATEGORIES)}


# ---------------------------------------------------------------------------
# Image Endpoints
# ---------------------------------------------------------------------------

@router.get("/sessions/{session_id}/image/{image_type}")
async def get_image(session_id: str, image_type: str):
    """Serve session images: original, deskewed, dewarped, binarized, structure-overlay, columns-overlay, or rows-overlay."""
    valid_types = {"original", "oriented", "cropped", "deskewed", "dewarped", "binarized", "structure-overlay", "columns-overlay", "rows-overlay", "words-overlay", "clean"}
    if image_type not in valid_types:
        raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")

    if image_type == "structure-overlay":
        return await render_overlay("structure", session_id)

    if image_type == "columns-overlay":
        return await render_overlay("columns", session_id)

    if image_type == "rows-overlay":
        return await render_overlay("rows", session_id)

    if image_type == "words-overlay":
        return await render_overlay("words", session_id)

    # Try cache first for fast serving
    cached = _cache.get(session_id)
    if cached:
        png_key = f"{image_type}_png" if image_type != "original" else None
        bgr_key = f"{image_type}_bgr" if image_type != "binarized" else None

        # For binarized, check if we have it cached as PNG
        if image_type == "binarized" and cached.get("binarized_png"):
            return Response(content=cached["binarized_png"], media_type="image/png")

    # Load from DB — for cropped/dewarped, fall back through the chain
    if image_type in ("cropped", "dewarped"):
        data = await _get_base_image_png(session_id)
    else:
        data = await get_session_image(session_id, image_type)
    if not data:
        raise HTTPException(status_code=404, detail=f"Image '{image_type}' not available yet")

    return Response(content=data, media_type="image/png")


# ---------------------------------------------------------------------------
# Document Type Detection (between Dewarp and Columns)
# ---------------------------------------------------------------------------

@router.post("/sessions/{session_id}/detect-type")
async def detect_type(session_id: str):
    """Detect document type (vocab_table, full_text, generic_table).

    Should be called after crop (clean image available).
    Falls back to dewarped if crop was skipped.
    Stores result in session for frontend to decide pipeline flow.
    """
    if session_id not in _cache:
        await _load_session_to_cache(session_id)
    cached = _get_cached(session_id)

    img_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
    if img_bgr is None:
        raise HTTPException(status_code=400, detail="Crop or dewarp must be completed first")

    t0 = time.time()
    ocr_img = create_ocr_image(img_bgr)
    result = detect_document_type(ocr_img, img_bgr)
    duration = time.time() - t0

    result_dict = {
        "doc_type": result.doc_type,
        "confidence": result.confidence,
        "pipeline": result.pipeline,
        "skip_steps": result.skip_steps,
        "features": result.features,
        "duration_seconds": round(duration, 2),
    }

    # Persist to DB
    await update_session_db(
        session_id,
        doc_type=result.doc_type,
        doc_type_result=result_dict,
    )

    cached["doc_type_result"] = result_dict

    logger.info(f"OCR Pipeline: detect-type session {session_id}: "
                f"{result.doc_type} (confidence={result.confidence}, {duration:.2f}s)")

    await _append_pipeline_log(session_id, "detect_type", {
        "doc_type": result.doc_type,
        "pipeline": result.pipeline,
        "confidence": result.confidence,
        **{k: v for k, v in (result.features or {}).items() if isinstance(v, (int, float, str, bool))},
    }, duration_ms=int(duration * 1000))

    return {"session_id": session_id, **result_dict}