Multi-page PDF support: create one session per page
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Failing after 27s
CI / test-go-edu-search (push) Successful in 39s
CI / test-python-klausur (push) Failing after 2m36s
CI / test-python-agent-core (push) Successful in 24s
CI / test-nodejs-website (push) Successful in 35s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Failing after 27s
CI / test-go-edu-search (push) Successful in 39s
CI / test-python-klausur (push) Failing after 2m36s
CI / test-python-agent-core (push) Successful in 24s
CI / test-nodejs-website (push) Successful in 35s
When uploading a PDF with > 1 page to the OCR pipeline, each page now gets its own session (grouped by document_group_id). Previously only page 1 was processed. The response includes a pages array with all session IDs so the frontend can navigate between them. Single-page PDFs and images continue to work as before. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -71,13 +71,36 @@ async def create_session(
|
||||
file: UploadFile = File(...),
|
||||
name: Optional[str] = Form(None),
|
||||
):
|
||||
"""Upload a PDF or image file and create a pipeline session."""
|
||||
"""Upload a PDF or image file and create a pipeline session.
|
||||
|
||||
For multi-page PDFs (> 1 page), each page becomes its own session
|
||||
grouped under a ``document_group_id``. The response includes a
|
||||
``pages`` array with one entry per page/session.
|
||||
"""
|
||||
file_data = await file.read()
|
||||
filename = file.filename or "upload"
|
||||
content_type = file.content_type or ""
|
||||
|
||||
session_id = str(uuid.uuid4())
|
||||
is_pdf = content_type == "application/pdf" or filename.lower().endswith(".pdf")
|
||||
session_name = name or filename
|
||||
|
||||
# --- Multi-page PDF handling ---
|
||||
if is_pdf:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
pdf_doc = fitz.open(stream=file_data, filetype="pdf")
|
||||
page_count = pdf_doc.page_count
|
||||
pdf_doc.close()
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Could not read PDF: {e}")
|
||||
|
||||
if page_count > 1:
|
||||
return await _create_multi_page_sessions(
|
||||
file_data, filename, session_name, page_count,
|
||||
)
|
||||
|
||||
# --- Single page (image or 1-page PDF) ---
|
||||
session_id = str(uuid.uuid4())
|
||||
|
||||
try:
|
||||
if is_pdf:
|
||||
@@ -93,7 +116,6 @@ async def create_session(
|
||||
raise HTTPException(status_code=500, detail="Failed to encode image")
|
||||
|
||||
original_png = png_buf.tobytes()
|
||||
session_name = name or filename
|
||||
|
||||
# Persist to DB
|
||||
await create_session_db(
|
||||
@@ -134,6 +156,81 @@ async def create_session(
|
||||
}
|
||||
|
||||
|
||||
async def _create_multi_page_sessions(
|
||||
pdf_data: bytes,
|
||||
filename: str,
|
||||
base_name: str,
|
||||
page_count: int,
|
||||
) -> dict:
|
||||
"""Create one session per PDF page, grouped by document_group_id."""
|
||||
document_group_id = str(uuid.uuid4())
|
||||
pages = []
|
||||
|
||||
for page_idx in range(page_count):
|
||||
session_id = str(uuid.uuid4())
|
||||
page_name = f"{base_name} — Seite {page_idx + 1}"
|
||||
|
||||
try:
|
||||
img_bgr = render_pdf_high_res(pdf_data, page_number=page_idx, zoom=3.0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to render PDF page {page_idx + 1}: {e}")
|
||||
continue
|
||||
|
||||
ok, png_buf = cv2.imencode(".png", img_bgr)
|
||||
if not ok:
|
||||
continue
|
||||
page_png = png_buf.tobytes()
|
||||
|
||||
await create_session_db(
|
||||
session_id=session_id,
|
||||
name=page_name,
|
||||
filename=filename,
|
||||
original_png=page_png,
|
||||
document_group_id=document_group_id,
|
||||
page_number=page_idx + 1,
|
||||
)
|
||||
|
||||
_cache[session_id] = {
|
||||
"id": session_id,
|
||||
"filename": filename,
|
||||
"name": page_name,
|
||||
"original_bgr": img_bgr,
|
||||
"oriented_bgr": None,
|
||||
"cropped_bgr": None,
|
||||
"deskewed_bgr": None,
|
||||
"dewarped_bgr": None,
|
||||
"orientation_result": None,
|
||||
"crop_result": None,
|
||||
"deskew_result": None,
|
||||
"dewarp_result": None,
|
||||
"ground_truth": {},
|
||||
"current_step": 1,
|
||||
}
|
||||
|
||||
h, w = img_bgr.shape[:2]
|
||||
pages.append({
|
||||
"session_id": session_id,
|
||||
"name": page_name,
|
||||
"page_number": page_idx + 1,
|
||||
"image_width": w,
|
||||
"image_height": h,
|
||||
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
|
||||
})
|
||||
|
||||
logger.info(
|
||||
f"OCR Pipeline: created page session {session_id} "
|
||||
f"(page {page_idx + 1}/{page_count}) from {filename} ({w}x{h})"
|
||||
)
|
||||
|
||||
return {
|
||||
"document_group_id": document_group_id,
|
||||
"filename": filename,
|
||||
"name": base_name,
|
||||
"page_count": page_count,
|
||||
"pages": pages,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}")
|
||||
async def get_session_info(session_id: str):
|
||||
"""Get session info including deskew/dewarp/column results for step navigation."""
|
||||
|
||||
Reference in New Issue
Block a user