fix: Orientierungserkennung beim PDF-Upload statt erst bei OCR
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 23s
CI / test-go-edu-search (push) Successful in 23s
CI / test-python-klausur (push) Failing after 1m47s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 17s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 23s
CI / test-go-edu-search (push) Successful in 23s
CI / test-python-klausur (push) Failing after 1m47s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 17s
Rotation wird jetzt in upload_pdf_get_info() erkannt, damit Thumbnails bei der Seitenauswahl bereits richtig herum angezeigt werden. Debug-Logging fuer _split_broad_columns hinzugefuegt. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2094,6 +2094,9 @@ def _split_broad_columns(
|
|||||||
"""
|
"""
|
||||||
result: List[ColumnGeometry] = []
|
result: List[ColumnGeometry] = []
|
||||||
|
|
||||||
|
logger.info(f"SplitBroadCols: input {len(geometries)} cols: "
|
||||||
|
f"{[(g.index, g.x, g.width, g.word_count, round(g.width_ratio, 3)) for g in geometries]}")
|
||||||
|
|
||||||
for geo in geometries:
|
for geo in geometries:
|
||||||
if geo.width_ratio <= _broad_threshold or len(geo.words) < 10:
|
if geo.width_ratio <= _broad_threshold or len(geo.words) < 10:
|
||||||
result.append(geo)
|
result.append(geo)
|
||||||
|
|||||||
@@ -1172,10 +1172,25 @@ async def upload_pdf_get_info(
|
|||||||
session["pdf_page_count"] = page_count
|
session["pdf_page_count"] = page_count
|
||||||
session["status"] = "pdf_uploaded"
|
session["status"] = "pdf_uploaded"
|
||||||
|
|
||||||
|
# Detect orientation for each page so thumbnails are shown correctly
|
||||||
|
page_rotations: dict = {}
|
||||||
|
if OCR_PIPELINE_AVAILABLE:
|
||||||
|
for pg in range(page_count):
|
||||||
|
try:
|
||||||
|
img_bgr = render_pdf_high_res(content, pg, zoom=1.0)
|
||||||
|
_, rotation = detect_and_fix_orientation(img_bgr)
|
||||||
|
if rotation:
|
||||||
|
page_rotations[pg] = rotation
|
||||||
|
logger.info(f"Page {pg + 1}: orientation {rotation}°")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Orientation detection failed for page {pg + 1}: {e}")
|
||||||
|
session["page_rotations"] = page_rotations
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
"page_count": page_count,
|
"page_count": page_count,
|
||||||
"filename": file.filename,
|
"filename": file.filename,
|
||||||
|
"page_rotations": page_rotations,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user