diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index e145565..fc5e690 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -2094,6 +2094,9 @@ def _split_broad_columns( """ result: List[ColumnGeometry] = [] + logger.info(f"SplitBroadCols: input {len(geometries)} cols: " + f"{[(g.index, g.x, g.width, g.word_count, round(g.width_ratio, 3)) for g in geometries]}") + for geo in geometries: if geo.width_ratio <= _broad_threshold or len(geo.words) < 10: result.append(geo) diff --git a/klausur-service/backend/vocab_worksheet_api.py b/klausur-service/backend/vocab_worksheet_api.py index 20b70f0..5b8d45a 100644 --- a/klausur-service/backend/vocab_worksheet_api.py +++ b/klausur-service/backend/vocab_worksheet_api.py @@ -1172,10 +1172,25 @@ async def upload_pdf_get_info( session["pdf_page_count"] = page_count session["status"] = "pdf_uploaded" + # Detect orientation for each page so thumbnails are shown correctly + page_rotations: dict = {} + if OCR_PIPELINE_AVAILABLE: + for pg in range(page_count): + try: + img_bgr = render_pdf_high_res(content, pg, zoom=1.0) + _, rotation = detect_and_fix_orientation(img_bgr) + if rotation: + page_rotations[pg] = rotation + logger.info(f"Page {pg + 1}: orientation {rotation}°") + except Exception as e: + logger.warning(f"Orientation detection failed for page {pg + 1}: {e}") + session["page_rotations"] = page_rotations + return { "session_id": session_id, "page_count": page_count, "filename": file.filename, + "page_rotations": page_rotations, }