fix: Orientierungserkennung beim PDF-Upload statt erst bei OCR

Rotation wird jetzt in upload_pdf_get_info() erkannt, damit Thumbnails bei der Seitenauswahl bereits richtig herum angezeigt werden. Debug-Logging fuer _split_broad_columns hinzugefuegt. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 19:11:45 +01:00
parent 02631dc4e0
commit e8ba5ec073
2 changed files with 18 additions and 0 deletions
@@ -2094,6 +2094,9 @@ def _split_broad_columns(
    """
    result: List[ColumnGeometry] = []
    logger.info(f"SplitBroadCols: input {len(geometries)} cols: "
                f"{[(g.index, g.x, g.width, g.word_count, round(g.width_ratio, 3)) for g in geometries]}")
    for geo in geometries:
        if geo.width_ratio <= _broad_threshold or len(geo.words) < 10:
            result.append(geo)
@@ -1172,10 +1172,25 @@ async def upload_pdf_get_info(
    session["pdf_page_count"] = page_count
    session["status"] = "pdf_uploaded"
    # Detect orientation for each page so thumbnails are shown correctly
    page_rotations: dict = {}
    if OCR_PIPELINE_AVAILABLE:
        for pg in range(page_count):
            try:
                img_bgr = render_pdf_high_res(content, pg, zoom=1.0)
                _, rotation = detect_and_fix_orientation(img_bgr)
                if rotation:
                    page_rotations[pg] = rotation
                    logger.info(f"Page {pg + 1}: orientation {rotation}°")
            except Exception as e:
                logger.warning(f"Orientation detection failed for page {pg + 1}: {e}")
    session["page_rotations"] = page_rotations
    return {
        "session_id": session_id,
        "page_count": page_count,
        "filename": file.filename,
        "page_rotations": page_rotations,
    }