Multi-page PDF support: create one session per page
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Failing after 27s
CI / test-go-edu-search (push) Successful in 39s
CI / test-python-klausur (push) Failing after 2m36s
CI / test-python-agent-core (push) Successful in 24s
CI / test-nodejs-website (push) Successful in 35s

When uploading a PDF with > 1 page to the OCR pipeline, each page
now gets its own session (grouped by document_group_id). Previously
only page 1 was processed. The response includes a pages array with
all session IDs so the frontend can navigate between them.

Single-page PDFs and images continue to work as before.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-11 14:39:48 +02:00
parent aed0edbf6d
commit 53b0d77853

View File

@@ -71,13 +71,36 @@ async def create_session(
file: UploadFile = File(...),
name: Optional[str] = Form(None),
):
"""Upload a PDF or image file and create a pipeline session."""
"""Upload a PDF or image file and create a pipeline session.
For multi-page PDFs (> 1 page), each page becomes its own session
grouped under a ``document_group_id``. The response includes a
``pages`` array with one entry per page/session.
"""
file_data = await file.read()
filename = file.filename or "upload"
content_type = file.content_type or ""
session_id = str(uuid.uuid4())
is_pdf = content_type == "application/pdf" or filename.lower().endswith(".pdf")
session_name = name or filename
# --- Multi-page PDF handling ---
if is_pdf:
try:
import fitz # PyMuPDF
pdf_doc = fitz.open(stream=file_data, filetype="pdf")
page_count = pdf_doc.page_count
pdf_doc.close()
except Exception as e:
raise HTTPException(status_code=400, detail=f"Could not read PDF: {e}")
if page_count > 1:
return await _create_multi_page_sessions(
file_data, filename, session_name, page_count,
)
# --- Single page (image or 1-page PDF) ---
session_id = str(uuid.uuid4())
try:
if is_pdf:
@@ -93,7 +116,6 @@ async def create_session(
raise HTTPException(status_code=500, detail="Failed to encode image")
original_png = png_buf.tobytes()
session_name = name or filename
# Persist to DB
await create_session_db(
@@ -134,6 +156,81 @@ async def create_session(
}
async def _create_multi_page_sessions(
pdf_data: bytes,
filename: str,
base_name: str,
page_count: int,
) -> dict:
"""Create one session per PDF page, grouped by document_group_id."""
document_group_id = str(uuid.uuid4())
pages = []
for page_idx in range(page_count):
session_id = str(uuid.uuid4())
page_name = f"{base_name} — Seite {page_idx + 1}"
try:
img_bgr = render_pdf_high_res(pdf_data, page_number=page_idx, zoom=3.0)
except Exception as e:
logger.warning(f"Failed to render PDF page {page_idx + 1}: {e}")
continue
ok, png_buf = cv2.imencode(".png", img_bgr)
if not ok:
continue
page_png = png_buf.tobytes()
await create_session_db(
session_id=session_id,
name=page_name,
filename=filename,
original_png=page_png,
document_group_id=document_group_id,
page_number=page_idx + 1,
)
_cache[session_id] = {
"id": session_id,
"filename": filename,
"name": page_name,
"original_bgr": img_bgr,
"oriented_bgr": None,
"cropped_bgr": None,
"deskewed_bgr": None,
"dewarped_bgr": None,
"orientation_result": None,
"crop_result": None,
"deskew_result": None,
"dewarp_result": None,
"ground_truth": {},
"current_step": 1,
}
h, w = img_bgr.shape[:2]
pages.append({
"session_id": session_id,
"name": page_name,
"page_number": page_idx + 1,
"image_width": w,
"image_height": h,
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
})
logger.info(
f"OCR Pipeline: created page session {session_id} "
f"(page {page_idx + 1}/{page_count}) from {filename} ({w}x{h})"
)
return {
"document_group_id": document_group_id,
"filename": filename,
"name": base_name,
"page_count": page_count,
"pages": pages,
}
@router.get("/sessions/{session_id}")
async def get_session_info(session_id: str):
"""Get session info including deskew/dewarp/column results for step navigation."""