Revert "fix: Zeilen-Regularisierung im Overlay ueberspringen (generisch fuer gemischte Inhalte)"
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m2s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 24s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m2s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 24s
This reverts commit b91f799ccf.
This commit is contained in:
@@ -1525,7 +1525,6 @@ def detect_row_geometry(
|
||||
word_dicts: List[Dict],
|
||||
left_x: int, right_x: int,
|
||||
top_y: int, bottom_y: int,
|
||||
skip_regularize: bool = False,
|
||||
) -> List['RowGeometry']:
|
||||
"""Detect row geometry using horizontal whitespace-gap analysis.
|
||||
|
||||
@@ -1790,13 +1789,8 @@ def detect_row_geometry(
|
||||
# and evenly-spaced rows than the gap-based approach alone.
|
||||
# Also detects section breaks (headings, paragraphs) where the pitch
|
||||
# exceeds 1.8× the median, and handles each section independently.
|
||||
#
|
||||
# skip_regularize=True: Keep gap-based rows as-is. Useful for full-page
|
||||
# overlay rendering where mixed content (info boxes, different line
|
||||
# spacings) must preserve original geometry faithfully.
|
||||
if not skip_regularize:
|
||||
rows = _regularize_row_grid(rows, word_dicts, left_x, right_x, top_y,
|
||||
content_w, content_h, inv)
|
||||
rows = _regularize_row_grid(rows, word_dicts, left_x, right_x, top_y,
|
||||
content_w, content_h, inv)
|
||||
|
||||
type_counts = {}
|
||||
for r in rows:
|
||||
|
||||
@@ -1577,7 +1577,7 @@ async def _get_columns_overlay(session_id: str) -> Response:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/rows")
|
||||
async def detect_rows(session_id: str, skip_regularize: bool = False):
|
||||
async def detect_rows(session_id: str):
|
||||
"""Run row detection on the cropped (or dewarped) image using horizontal gap analysis."""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
@@ -1686,7 +1686,6 @@ async def detect_rows(session_id: str, skip_regularize: bool = False):
|
||||
combined_h = combined_inv.shape[0]
|
||||
rows = detect_row_geometry(
|
||||
combined_inv, combined_words, left_x, right_x, 0, combined_h,
|
||||
skip_regularize=skip_regularize,
|
||||
)
|
||||
|
||||
# Remap y-coordinates back to absolute page coords
|
||||
@@ -1703,12 +1702,10 @@ async def detect_rows(session_id: str, skip_regularize: bool = False):
|
||||
r.y = abs_y
|
||||
r.height = abs_y_end - abs_y
|
||||
else:
|
||||
rows = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y,
|
||||
skip_regularize=skip_regularize)
|
||||
rows = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y)
|
||||
else:
|
||||
# No boxes — standard row detection
|
||||
rows = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y,
|
||||
skip_regularize=skip_regularize)
|
||||
rows = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y)
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user