[split-required] Split final 43 files (500-668 LOC) to complete refactoring
klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
176
klausur-service/backend/ocr_pipeline_sessions_images.py
Normal file
176
klausur-service/backend/ocr_pipeline_sessions_images.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""
|
||||
OCR Pipeline Sessions Images — image serving, thumbnails, pipeline log,
|
||||
categories, and document type detection.
|
||||
|
||||
Extracted from ocr_pipeline_sessions.py for modularity.
|
||||
|
||||
Lizenz: Apache 2.0
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from fastapi.responses import Response
|
||||
|
||||
from cv_vocab_pipeline import create_ocr_image, detect_document_type
|
||||
from ocr_pipeline_common import (
|
||||
VALID_DOCUMENT_CATEGORIES,
|
||||
_append_pipeline_log,
|
||||
_cache,
|
||||
_get_base_image_png,
|
||||
_get_cached,
|
||||
_load_session_to_cache,
|
||||
)
|
||||
from ocr_pipeline_overlays import render_overlay
|
||||
from ocr_pipeline_session_store import (
|
||||
get_session_db,
|
||||
get_session_image,
|
||||
update_session_db,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Thumbnail & Log Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/sessions/{session_id}/thumbnail")
|
||||
async def get_session_thumbnail(session_id: str, size: int = Query(default=80, ge=16, le=400)):
|
||||
"""Return a small thumbnail of the original image."""
|
||||
original_png = await get_session_image(session_id, "original")
|
||||
if not original_png:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found or no image")
|
||||
arr = np.frombuffer(original_png, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
raise HTTPException(status_code=500, detail="Failed to decode image")
|
||||
h, w = img.shape[:2]
|
||||
scale = size / max(h, w)
|
||||
new_w, new_h = int(w * scale), int(h * scale)
|
||||
thumb = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
|
||||
_, png_bytes = cv2.imencode(".png", thumb)
|
||||
return Response(content=png_bytes.tobytes(), media_type="image/png",
|
||||
headers={"Cache-Control": "public, max-age=3600"})
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/pipeline-log")
|
||||
async def get_pipeline_log(session_id: str):
|
||||
"""Get the pipeline execution log for a session."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
return {"session_id": session_id, "pipeline_log": session.get("pipeline_log") or {"steps": []}}
|
||||
|
||||
|
||||
@router.get("/categories")
|
||||
async def list_categories():
|
||||
"""List valid document categories."""
|
||||
return {"categories": sorted(VALID_DOCUMENT_CATEGORIES)}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/sessions/{session_id}/image/{image_type}")
|
||||
async def get_image(session_id: str, image_type: str):
|
||||
"""Serve session images: original, deskewed, dewarped, binarized, structure-overlay, columns-overlay, or rows-overlay."""
|
||||
valid_types = {"original", "oriented", "cropped", "deskewed", "dewarped", "binarized", "structure-overlay", "columns-overlay", "rows-overlay", "words-overlay", "clean"}
|
||||
if image_type not in valid_types:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
||||
|
||||
if image_type == "structure-overlay":
|
||||
return await render_overlay("structure", session_id)
|
||||
|
||||
if image_type == "columns-overlay":
|
||||
return await render_overlay("columns", session_id)
|
||||
|
||||
if image_type == "rows-overlay":
|
||||
return await render_overlay("rows", session_id)
|
||||
|
||||
if image_type == "words-overlay":
|
||||
return await render_overlay("words", session_id)
|
||||
|
||||
# Try cache first for fast serving
|
||||
cached = _cache.get(session_id)
|
||||
if cached:
|
||||
png_key = f"{image_type}_png" if image_type != "original" else None
|
||||
bgr_key = f"{image_type}_bgr" if image_type != "binarized" else None
|
||||
|
||||
# For binarized, check if we have it cached as PNG
|
||||
if image_type == "binarized" and cached.get("binarized_png"):
|
||||
return Response(content=cached["binarized_png"], media_type="image/png")
|
||||
|
||||
# Load from DB — for cropped/dewarped, fall back through the chain
|
||||
if image_type in ("cropped", "dewarped"):
|
||||
data = await _get_base_image_png(session_id)
|
||||
else:
|
||||
data = await get_session_image(session_id, image_type)
|
||||
if not data:
|
||||
raise HTTPException(status_code=404, detail=f"Image '{image_type}' not available yet")
|
||||
|
||||
return Response(content=data, media_type="image/png")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Document Type Detection (between Dewarp and Columns)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/detect-type")
|
||||
async def detect_type(session_id: str):
|
||||
"""Detect document type (vocab_table, full_text, generic_table).
|
||||
|
||||
Should be called after crop (clean image available).
|
||||
Falls back to dewarped if crop was skipped.
|
||||
Stores result in session for frontend to decide pipeline flow.
|
||||
"""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
img_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Crop or dewarp must be completed first")
|
||||
|
||||
t0 = time.time()
|
||||
ocr_img = create_ocr_image(img_bgr)
|
||||
result = detect_document_type(ocr_img, img_bgr)
|
||||
duration = time.time() - t0
|
||||
|
||||
result_dict = {
|
||||
"doc_type": result.doc_type,
|
||||
"confidence": result.confidence,
|
||||
"pipeline": result.pipeline,
|
||||
"skip_steps": result.skip_steps,
|
||||
"features": result.features,
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
# Persist to DB
|
||||
await update_session_db(
|
||||
session_id,
|
||||
doc_type=result.doc_type,
|
||||
doc_type_result=result_dict,
|
||||
)
|
||||
|
||||
cached["doc_type_result"] = result_dict
|
||||
|
||||
logger.info(f"OCR Pipeline: detect-type session {session_id}: "
|
||||
f"{result.doc_type} (confidence={result.confidence}, {duration:.2f}s)")
|
||||
|
||||
await _append_pipeline_log(session_id, "detect_type", {
|
||||
"doc_type": result.doc_type,
|
||||
"pipeline": result.pipeline,
|
||||
"confidence": result.confidence,
|
||||
**{k: v for k, v in (result.features or {}).items() if isinstance(v, (int, float, str, bool))},
|
||||
}, duration_ms=int(duration * 1000))
|
||||
|
||||
return {"session_id": session_id, **result_dict}
|
||||
Reference in New Issue
Block a user