From 53b0d778530dcc76c3eb1f7888cf586fa6caf316 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sat, 11 Apr 2026 14:39:48 +0200 Subject: [PATCH] Multi-page PDF support: create one session per page When uploading a PDF with > 1 page to the OCR pipeline, each page now gets its own session (grouped by document_group_id). Previously only page 1 was processed. The response includes a pages array with all session IDs so the frontend can navigate between them. Single-page PDFs and images continue to work as before. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/ocr_pipeline_sessions.py | 103 +++++++++++++++++- 1 file changed, 100 insertions(+), 3 deletions(-) diff --git a/klausur-service/backend/ocr_pipeline_sessions.py b/klausur-service/backend/ocr_pipeline_sessions.py index 99372eb..cd747b8 100644 --- a/klausur-service/backend/ocr_pipeline_sessions.py +++ b/klausur-service/backend/ocr_pipeline_sessions.py @@ -71,13 +71,36 @@ async def create_session( file: UploadFile = File(...), name: Optional[str] = Form(None), ): - """Upload a PDF or image file and create a pipeline session.""" + """Upload a PDF or image file and create a pipeline session. + + For multi-page PDFs (> 1 page), each page becomes its own session + grouped under a ``document_group_id``. The response includes a + ``pages`` array with one entry per page/session. + """ file_data = await file.read() filename = file.filename or "upload" content_type = file.content_type or "" - session_id = str(uuid.uuid4()) is_pdf = content_type == "application/pdf" or filename.lower().endswith(".pdf") + session_name = name or filename + + # --- Multi-page PDF handling --- + if is_pdf: + try: + import fitz # PyMuPDF + pdf_doc = fitz.open(stream=file_data, filetype="pdf") + page_count = pdf_doc.page_count + pdf_doc.close() + except Exception as e: + raise HTTPException(status_code=400, detail=f"Could not read PDF: {e}") + + if page_count > 1: + return await _create_multi_page_sessions( + file_data, filename, session_name, page_count, + ) + + # --- Single page (image or 1-page PDF) --- + session_id = str(uuid.uuid4()) try: if is_pdf: @@ -93,7 +116,6 @@ async def create_session( raise HTTPException(status_code=500, detail="Failed to encode image") original_png = png_buf.tobytes() - session_name = name or filename # Persist to DB await create_session_db( @@ -134,6 +156,81 @@ async def create_session( } +async def _create_multi_page_sessions( + pdf_data: bytes, + filename: str, + base_name: str, + page_count: int, +) -> dict: + """Create one session per PDF page, grouped by document_group_id.""" + document_group_id = str(uuid.uuid4()) + pages = [] + + for page_idx in range(page_count): + session_id = str(uuid.uuid4()) + page_name = f"{base_name} — Seite {page_idx + 1}" + + try: + img_bgr = render_pdf_high_res(pdf_data, page_number=page_idx, zoom=3.0) + except Exception as e: + logger.warning(f"Failed to render PDF page {page_idx + 1}: {e}") + continue + + ok, png_buf = cv2.imencode(".png", img_bgr) + if not ok: + continue + page_png = png_buf.tobytes() + + await create_session_db( + session_id=session_id, + name=page_name, + filename=filename, + original_png=page_png, + document_group_id=document_group_id, + page_number=page_idx + 1, + ) + + _cache[session_id] = { + "id": session_id, + "filename": filename, + "name": page_name, + "original_bgr": img_bgr, + "oriented_bgr": None, + "cropped_bgr": None, + "deskewed_bgr": None, + "dewarped_bgr": None, + "orientation_result": None, + "crop_result": None, + "deskew_result": None, + "dewarp_result": None, + "ground_truth": {}, + "current_step": 1, + } + + h, w = img_bgr.shape[:2] + pages.append({ + "session_id": session_id, + "name": page_name, + "page_number": page_idx + 1, + "image_width": w, + "image_height": h, + "original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original", + }) + + logger.info( + f"OCR Pipeline: created page session {session_id} " + f"(page {page_idx + 1}/{page_count}) from {filename} ({w}x{h})" + ) + + return { + "document_group_id": document_group_id, + "filename": filename, + "name": base_name, + "page_count": page_count, + "pages": pages, + } + + @router.get("/sessions/{session_id}") async def get_session_info(session_id: str): """Get session info including deskew/dewarp/column results for step navigation."""