Revert "fix: Zeilen-Regularisierung im Overlay ueberspringen (generisch fuer gemischte Inhalte)"
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m2s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 24s

This reverts commit b91f799ccf.
This commit is contained in:
Benjamin Admin
2026-03-11 08:44:07 +01:00
parent b91f799ccf
commit e3ee1de790
4 changed files with 9 additions and 23 deletions

View File

@@ -216,7 +216,7 @@ export default function OcrOverlayPage() {
case 3: case 3:
return <StepCrop sessionId={sessionId} onNext={handleNext} /> return <StepCrop sessionId={sessionId} onNext={handleNext} />
case 4: case 4:
return <StepRowDetection sessionId={sessionId} onNext={handleNext} skipRegularize /> return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
case 5: case 5:
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} /> return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
case 6: case 6:

View File

@@ -8,11 +8,9 @@ const KLAUSUR_API = '/klausur-api'
interface StepRowDetectionProps { interface StepRowDetectionProps {
sessionId: string | null sessionId: string | null
onNext: () => void onNext: () => void
/** Skip word-center regularization (better for mixed-content pages with boxes) */
skipRegularize?: boolean
} }
export function StepRowDetection({ sessionId, onNext, skipRegularize = false }: StepRowDetectionProps) { export function StepRowDetection({ sessionId, onNext }: StepRowDetectionProps) {
const [rowResult, setRowResult] = useState<RowResult | null>(null) const [rowResult, setRowResult] = useState<RowResult | null>(null)
const [detecting, setDetecting] = useState(false) const [detecting, setDetecting] = useState(false)
const [error, setError] = useState<string | null>(null) const [error, setError] = useState<string | null>(null)
@@ -48,10 +46,7 @@ export function StepRowDetection({ sessionId, onNext, skipRegularize = false }:
setDetecting(true) setDetecting(true)
setError(null) setError(null)
try { try {
const rowsUrl = skipRegularize const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/rows`, {
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/rows?skip_regularize=true`
: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/rows`
const res = await fetch(rowsUrl, {
method: 'POST', method: 'POST',
}) })
if (!res.ok) { if (!res.ok) {
@@ -65,7 +60,7 @@ export function StepRowDetection({ sessionId, onNext, skipRegularize = false }:
} finally { } finally {
setDetecting(false) setDetecting(false)
} }
}, [sessionId, skipRegularize]) }, [sessionId])
const handleGroundTruth = useCallback(async (isCorrect: boolean) => { const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
if (!sessionId) return if (!sessionId) return

View File

@@ -1525,7 +1525,6 @@ def detect_row_geometry(
word_dicts: List[Dict], word_dicts: List[Dict],
left_x: int, right_x: int, left_x: int, right_x: int,
top_y: int, bottom_y: int, top_y: int, bottom_y: int,
skip_regularize: bool = False,
) -> List['RowGeometry']: ) -> List['RowGeometry']:
"""Detect row geometry using horizontal whitespace-gap analysis. """Detect row geometry using horizontal whitespace-gap analysis.
@@ -1790,13 +1789,8 @@ def detect_row_geometry(
# and evenly-spaced rows than the gap-based approach alone. # and evenly-spaced rows than the gap-based approach alone.
# Also detects section breaks (headings, paragraphs) where the pitch # Also detects section breaks (headings, paragraphs) where the pitch
# exceeds 1.8× the median, and handles each section independently. # exceeds 1.8× the median, and handles each section independently.
# rows = _regularize_row_grid(rows, word_dicts, left_x, right_x, top_y,
# skip_regularize=True: Keep gap-based rows as-is. Useful for full-page content_w, content_h, inv)
# overlay rendering where mixed content (info boxes, different line
# spacings) must preserve original geometry faithfully.
if not skip_regularize:
rows = _regularize_row_grid(rows, word_dicts, left_x, right_x, top_y,
content_w, content_h, inv)
type_counts = {} type_counts = {}
for r in rows: for r in rows:

View File

@@ -1577,7 +1577,7 @@ async def _get_columns_overlay(session_id: str) -> Response:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/rows") @router.post("/sessions/{session_id}/rows")
async def detect_rows(session_id: str, skip_regularize: bool = False): async def detect_rows(session_id: str):
"""Run row detection on the cropped (or dewarped) image using horizontal gap analysis.""" """Run row detection on the cropped (or dewarped) image using horizontal gap analysis."""
if session_id not in _cache: if session_id not in _cache:
await _load_session_to_cache(session_id) await _load_session_to_cache(session_id)
@@ -1686,7 +1686,6 @@ async def detect_rows(session_id: str, skip_regularize: bool = False):
combined_h = combined_inv.shape[0] combined_h = combined_inv.shape[0]
rows = detect_row_geometry( rows = detect_row_geometry(
combined_inv, combined_words, left_x, right_x, 0, combined_h, combined_inv, combined_words, left_x, right_x, 0, combined_h,
skip_regularize=skip_regularize,
) )
# Remap y-coordinates back to absolute page coords # Remap y-coordinates back to absolute page coords
@@ -1703,12 +1702,10 @@ async def detect_rows(session_id: str, skip_regularize: bool = False):
r.y = abs_y r.y = abs_y
r.height = abs_y_end - abs_y r.height = abs_y_end - abs_y
else: else:
rows = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y, rows = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y)
skip_regularize=skip_regularize)
else: else:
# No boxes — standard row detection # No boxes — standard row detection
rows = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y, rows = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y)
skip_regularize=skip_regularize)
duration = time.time() - t0 duration = time.time() - t0