breakpilot-lehrer/klausur-service/backend/grid_editor_api_grid.py

"""
Grid Editor API — grid build, save, and retrieve endpoints.
"""

import logging
import time
from typing import Any, Dict

from fastapi import APIRouter, HTTPException, Query, Request

from grid_build_core import _build_grid_core
from ocr_pipeline_session_store import (
    get_session_db,
    update_session_db,
)
from ocr_pipeline_common import (
    _cache,
    _load_session_to_cache,
    _get_cached,
)

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["grid-editor"])


@router.post("/sessions/{session_id}/build-grid")
async def build_grid(
    session_id: str,
    ipa_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"),
    syllable_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"),
    enhance: bool = Query(True, description="Step 3: CLAHE + denoise for degraded scans"),
    max_cols: int = Query(0, description="Step 2: Max column count (0=unlimited)"),
    min_conf: int = Query(0, description="Step 1: Min OCR confidence (0=auto)"),
):
    """Build a structured, zone-aware grid from existing Kombi word results.

    Requires that paddle-kombi or rapid-kombi has already been run on the session.
    Uses the image for box detection and the word positions for grid structuring.

    Query params:
        ipa_mode: "auto" (only when English IPA detected), "all" (force), "none" (skip)
        syllable_mode: "auto" (only when original has dividers), "all" (force), "none" (skip)

    Returns a StructuredGrid with zones, each containing their own
    columns, rows, and cells — ready for the frontend Excel-like editor.
    """
    session = await get_session_db(session_id)
    if not session:
        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")

    try:
        result = await _build_grid_core(
            session_id, session,
            ipa_mode=ipa_mode, syllable_mode=syllable_mode,
            enhance=enhance,
            max_columns=max_cols if max_cols > 0 else None,
            min_conf=min_conf if min_conf > 0 else None,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    # Save automatic grid snapshot for later comparison with manual corrections
    # Lazy import to avoid circular dependency with ocr_pipeline_regression
    from ocr_pipeline_regression import _build_reference_snapshot

    wr = session.get("word_result") or {}
    engine = wr.get("ocr_engine", "")
    if engine in ("kombi", "rapid_kombi"):
        auto_pipeline = "kombi"
    elif engine == "paddle_direct":
        auto_pipeline = "paddle-direct"
    else:
        auto_pipeline = "pipeline"
    auto_snapshot = _build_reference_snapshot(result, pipeline=auto_pipeline)

    gt = session.get("ground_truth") or {}
    gt["auto_grid_snapshot"] = auto_snapshot

    # Persist to DB and advance current_step to 11 (reconstruction complete)
    await update_session_db(session_id, grid_editor_result=result, ground_truth=gt, current_step=11)

    logger.info(
        "build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
        "%d boxes in %.2fs",
        session_id,
        len(result.get("zones", [])),
        result.get("summary", {}).get("total_columns", 0),
        result.get("summary", {}).get("total_rows", 0),
        result.get("summary", {}).get("total_cells", 0),
        result.get("boxes_detected", 0),
        result.get("duration_seconds", 0),
    )

    return result


@router.post("/sessions/{session_id}/rerun-ocr-and-build-grid")
async def rerun_ocr_and_build_grid(
    session_id: str,
    ipa_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"),
    syllable_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"),
    enhance: bool = Query(True, description="Step 3: CLAHE + denoise for degraded scans"),
    max_cols: int = Query(0, description="Step 2: Max column count (0=unlimited)"),
    min_conf: int = Query(0, description="Step 1: Min OCR confidence (0=auto)"),
    vision_fusion: bool = Query(False, description="Step 4: Vision-LLM fusion for degraded scans"),
    doc_category: str = Query("", description="Document type for Vision-LLM prompt context"),
):
    """Re-run OCR with quality settings, then rebuild the grid.

    Unlike build-grid (which only rebuilds from existing words),
    this endpoint re-runs the full OCR pipeline on the cropped image
    with optional CLAHE enhancement, then builds the grid.

    Steps executed: Image Enhancement -> OCR -> Grid Build
    """
    session = await get_session_db(session_id)
    if not session:
        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")

    import time as _time
    t0 = _time.time()

    # 1. Load the cropped/dewarped image from cache or session
    if session_id not in _cache:
        await _load_session_to_cache(session_id)
    cached = _get_cached(session_id)

    dewarped_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
    if dewarped_bgr is None:
        raise HTTPException(status_code=400, detail="No cropped/dewarped image available. Run preprocessing steps first.")

    import numpy as np
    img_h, img_w = dewarped_bgr.shape[:2]
    ocr_input = dewarped_bgr.copy()

    # 2. Scan quality assessment
    scan_quality_info = {}
    try:
        from scan_quality import score_scan_quality
        quality_report = score_scan_quality(ocr_input)
        scan_quality_info = quality_report.to_dict()
        actual_min_conf = min_conf if min_conf > 0 else quality_report.recommended_min_conf
    except Exception as e:
        logger.warning(f"rerun-ocr: scan quality failed: {e}")
        actual_min_conf = min_conf if min_conf > 0 else 40

    # 3. Image enhancement (Step 3)
    is_degraded = scan_quality_info.get("is_degraded", False)
    if enhance and is_degraded:
        try:
            from ocr_image_enhance import enhance_for_ocr
            ocr_input = enhance_for_ocr(ocr_input, is_degraded=True)
            logger.info("rerun-ocr: CLAHE enhancement applied")
        except Exception as e:
            logger.warning(f"rerun-ocr: enhancement failed: {e}")

    # 4. Run dual-engine OCR
    from PIL import Image
    import pytesseract

    # RapidOCR
    rapid_words = []
    try:
        from cv_ocr_engines import ocr_region_rapid
        from cv_vocab_types import PageRegion
        full_region = PageRegion(type="full_page", x=0, y=0, width=img_w, height=img_h)
        rapid_words = ocr_region_rapid(ocr_input, full_region) or []
    except Exception as e:
        logger.warning(f"rerun-ocr: RapidOCR failed: {e}")

    # Tesseract
    pil_img = Image.fromarray(ocr_input[:, :, ::-1])
    data = pytesseract.image_to_data(pil_img, lang='eng+deu', config='--psm 6 --oem 3', output_type=pytesseract.Output.DICT)
    tess_words = []
    for i in range(len(data["text"])):
        text = (data["text"][i] or "").strip()
        conf_raw = str(data["conf"][i])
        conf = int(conf_raw) if conf_raw.lstrip("-").isdigit() else -1
        if not text or conf < actual_min_conf:
            continue
        tess_words.append({
            "text": text, "left": data["left"][i], "top": data["top"][i],
            "width": data["width"][i], "height": data["height"][i], "conf": conf,
        })

    # 5. Merge OCR results
    from ocr_pipeline_ocr_merge import _split_paddle_multi_words, _merge_paddle_tesseract, _deduplicate_words
    rapid_split = _split_paddle_multi_words(rapid_words) if rapid_words else []
    if rapid_split or tess_words:
        merged_words = _merge_paddle_tesseract(rapid_split, tess_words)
        merged_words = _deduplicate_words(merged_words)
    else:
        merged_words = tess_words

    # 6. Store updated word_result in session
    cells_for_storage = [{"text": w["text"], "left": w["left"], "top": w["top"],
                          "width": w["width"], "height": w["height"], "conf": w.get("conf", 0)}
                         for w in merged_words]
    word_result = {
        "cells": [{"text": " ".join(w["text"] for w in merged_words),
                    "word_boxes": cells_for_storage}],
        "image_width": img_w,
        "image_height": img_h,
        "ocr_engine": "rapid_kombi",
        "word_count": len(merged_words),
        "raw_paddle_words": rapid_words,
    }
    # 6b. Vision-LLM Fusion (Step 4) — correct OCR using Vision model
    vision_applied = False
    if vision_fusion:
        try:
            from vision_ocr_fusion import vision_fuse_ocr
            category = doc_category or session.get("document_category") or "vokabelseite"
            logger.info(f"rerun-ocr: running Vision-LLM fusion (category={category})")
            merged_words = await vision_fuse_ocr(ocr_input, merged_words, category)
            vision_applied = True
            # Rebuild storage from fused words
            cells_for_storage = [{"text": w["text"], "left": w["left"], "top": w["top"],
                                  "width": w["width"], "height": w["height"], "conf": w.get("conf", 0)}
                                 for w in merged_words]
            word_result["cells"] = [{"text": " ".join(w["text"] for w in merged_words),
                                     "word_boxes": cells_for_storage}]
            word_result["word_count"] = len(merged_words)
            word_result["ocr_engine"] = "vision_fusion"
        except Exception as e:
            logger.warning(f"rerun-ocr: Vision-LLM fusion failed: {e}")

    await update_session_db(session_id, word_result=word_result)

    # Reload session with updated word_result
    session = await get_session_db(session_id)

    ocr_duration = _time.time() - t0
    logger.info(
        "rerun-ocr session %s: %d words (rapid=%d, tess=%d, merged=%d) in %.1fs "
        "(enhance=%s, min_conf=%d, quality=%s)",
        session_id, len(merged_words), len(rapid_words), len(tess_words),
        len(merged_words), ocr_duration, enhance, actual_min_conf,
        scan_quality_info.get("quality_pct", "?"),
    )

    # 7. Build grid from new words
    try:
        result = await _build_grid_core(
            session_id, session,
            ipa_mode=ipa_mode, syllable_mode=syllable_mode,
            enhance=enhance,
            max_columns=max_cols if max_cols > 0 else None,
            min_conf=min_conf if min_conf > 0 else None,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    # Persist grid
    await update_session_db(session_id, grid_editor_result=result, current_step=11)

    # Add quality info to response
    result["scan_quality"] = scan_quality_info
    result["ocr_stats"] = {
        "rapid_words": len(rapid_words),
        "tess_words": len(tess_words),
        "merged_words": len(merged_words),
        "min_conf_used": actual_min_conf,
        "enhance_applied": enhance and is_degraded,
        "vision_fusion_applied": vision_applied,
        "document_category": doc_category or session.get("document_category", ""),
        "ocr_duration_seconds": round(ocr_duration, 1),
    }

    total_duration = _time.time() - t0
    logger.info(
        "rerun-ocr+build-grid session %s: %d zones, %d cols, %d cells in %.1fs",
        session_id,
        len(result.get("zones", [])),
        result.get("summary", {}).get("total_columns", 0),
        result.get("summary", {}).get("total_cells", 0),
        total_duration,
    )

    return result


@router.post("/sessions/{session_id}/save-grid")
async def save_grid(session_id: str, request: Request):
    """Save edited grid data from the frontend Excel-like editor.

    Receives the full StructuredGrid with user edits (text changes,
    formatting changes like bold columns, header rows, etc.) and
    persists it to the session's grid_editor_result.
    """
    session = await get_session_db(session_id)
    if not session:
        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")

    body = await request.json()

    # Validate basic structure
    if "zones" not in body:
        raise HTTPException(status_code=400, detail="Missing 'zones' in request body")

    # Preserve metadata from the original build
    existing = session.get("grid_editor_result") or {}
    result = {
        "session_id": session_id,
        "image_width": body.get("image_width", existing.get("image_width", 0)),
        "image_height": body.get("image_height", existing.get("image_height", 0)),
        "zones": body["zones"],
        "boxes_detected": body.get("boxes_detected", existing.get("boxes_detected", 0)),
        "summary": body.get("summary", existing.get("summary", {})),
        "formatting": body.get("formatting", existing.get("formatting", {})),
        "duration_seconds": existing.get("duration_seconds", 0),
        "edited": True,
    }

    await update_session_db(session_id, grid_editor_result=result, current_step=11)

    logger.info("save-grid session %s: %d zones saved", session_id, len(body["zones"]))

    return {"session_id": session_id, "saved": True}


@router.get("/sessions/{session_id}/grid-editor")
async def get_grid(session_id: str):
    """Retrieve the current grid editor state for a session."""
    session = await get_session_db(session_id)
    if not session:
        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")

    result = session.get("grid_editor_result")
    if not result:
        raise HTTPException(
            status_code=404,
            detail="No grid editor data. Run build-grid first.",
        )

    return result