""" OCR Pipeline LLM Review — LLM-based correction endpoints. Extracted from ocr_pipeline_postprocess.py. Lizenz: Apache 2.0 DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ import json import logging from datetime import datetime from typing import Dict, List from fastapi import APIRouter, HTTPException, Request from fastapi.responses import StreamingResponse from cv_vocab_pipeline import ( OLLAMA_REVIEW_MODEL, llm_review_entries, llm_review_entries_streaming, ) from ocr_pipeline_session_store import ( get_session_db, update_session_db, ) from ocr_pipeline_common import ( _cache, _append_pipeline_log, ) logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"]) # --------------------------------------------------------------------------- # Step 8: LLM Review # --------------------------------------------------------------------------- @router.post("/sessions/{session_id}/llm-review") async def run_llm_review(session_id: str, request: Request, stream: bool = False): """Run LLM-based correction on vocab entries from Step 5. Query params: stream: false (default) for JSON response, true for SSE streaming """ session = await get_session_db(session_id) if not session: raise HTTPException(status_code=404, detail=f"Session {session_id} not found") word_result = session.get("word_result") if not word_result: raise HTTPException(status_code=400, detail="No word result found — run Step 5 first") entries = word_result.get("vocab_entries") or word_result.get("entries") or [] if not entries: raise HTTPException(status_code=400, detail="No vocab entries found — run Step 5 first") # Optional model override from request body body = {} try: body = await request.json() except Exception: pass model = body.get("model") or OLLAMA_REVIEW_MODEL if stream: return StreamingResponse( _llm_review_stream_generator(session_id, entries, word_result, model, request), media_type="text/event-stream", headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"}, ) # Non-streaming path try: result = await llm_review_entries(entries, model=model) except Exception as e: import traceback logger.error(f"LLM review failed for session {session_id}: {type(e).__name__}: {e}\n{traceback.format_exc()}") raise HTTPException(status_code=502, detail=f"LLM review failed ({type(e).__name__}): {e}") # Store result inside word_result as a sub-key word_result["llm_review"] = { "changes": result["changes"], "model_used": result["model_used"], "duration_ms": result["duration_ms"], "entries_corrected": result["entries_corrected"], } await update_session_db(session_id, word_result=word_result, current_step=9) if session_id in _cache: _cache[session_id]["word_result"] = word_result logger.info(f"LLM review session {session_id}: {len(result['changes'])} changes, " f"{result['duration_ms']}ms, model={result['model_used']}") await _append_pipeline_log(session_id, "correction", { "engine": "llm", "model": result["model_used"], "total_entries": len(entries), "corrections_proposed": len(result["changes"]), }, duration_ms=result["duration_ms"]) return { "session_id": session_id, "changes": result["changes"], "model_used": result["model_used"], "duration_ms": result["duration_ms"], "total_entries": len(entries), "corrections_found": len(result["changes"]), } async def _llm_review_stream_generator( session_id: str, entries: List[Dict], word_result: Dict, model: str, request: Request, ): """SSE generator that yields batch-by-batch LLM review progress.""" try: async for event in llm_review_entries_streaming(entries, model=model): if await request.is_disconnected(): logger.info(f"SSE: client disconnected during LLM review for {session_id}") return yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n" # On complete: persist to DB if event.get("type") == "complete": word_result["llm_review"] = { "changes": event["changes"], "model_used": event["model_used"], "duration_ms": event["duration_ms"], "entries_corrected": event["entries_corrected"], } await update_session_db(session_id, word_result=word_result, current_step=9) if session_id in _cache: _cache[session_id]["word_result"] = word_result logger.info(f"LLM review SSE session {session_id}: {event['corrections_found']} changes, " f"{event['duration_ms']}ms, skipped={event['skipped']}, model={event['model_used']}") except Exception as e: import traceback logger.error(f"LLM review SSE failed for {session_id}: {type(e).__name__}: {e}\n{traceback.format_exc()}") error_event = {"type": "error", "detail": f"{type(e).__name__}: {e}"} yield f"data: {json.dumps(error_event)}\n\n" @router.post("/sessions/{session_id}/llm-review/apply") async def apply_llm_corrections(session_id: str, request: Request): """Apply selected LLM corrections to vocab entries.""" session = await get_session_db(session_id) if not session: raise HTTPException(status_code=404, detail=f"Session {session_id} not found") word_result = session.get("word_result") if not word_result: raise HTTPException(status_code=400, detail="No word result found") llm_review = word_result.get("llm_review") if not llm_review: raise HTTPException(status_code=400, detail="No LLM review found — run /llm-review first") body = await request.json() accepted_indices = set(body.get("accepted_indices", [])) # indices into changes[] changes = llm_review.get("changes", []) entries = word_result.get("vocab_entries") or word_result.get("entries") or [] # Build a lookup: (row_index, field) -> new_value for accepted changes corrections = {} applied_count = 0 for idx, change in enumerate(changes): if idx in accepted_indices: key = (change["row_index"], change["field"]) corrections[key] = change["new"] applied_count += 1 # Apply corrections to entries for entry in entries: row_idx = entry.get("row_index", -1) for field_name in ("english", "german", "example"): key = (row_idx, field_name) if key in corrections: entry[field_name] = corrections[key] entry["llm_corrected"] = True # Update word_result word_result["vocab_entries"] = entries word_result["entries"] = entries word_result["llm_review"]["applied_count"] = applied_count word_result["llm_review"]["applied_at"] = datetime.utcnow().isoformat() await update_session_db(session_id, word_result=word_result) if session_id in _cache: _cache[session_id]["word_result"] = word_result logger.info(f"Applied {applied_count}/{len(changes)} LLM corrections for session {session_id}") return { "session_id": session_id, "applied_count": applied_count, "total_changes": len(changes), }