feat: Full-Row OCR mit Spacing fuer Box-Sub-Sessions
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 40s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m16s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 22s

Sub-Sessions ueberspringen Spaltenerkennung und nutzen stattdessen eine
Pseudo-Spalte ueber die volle Breite. Text wird mit proportionalem
Spacing aus Wort-Positionen rekonstruiert, um raeumliches Layout zu erhalten.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-10 08:28:29 +01:00
parent 34adb437d0
commit 23b7840ea7
4 changed files with 91 additions and 1 deletions

View File

@@ -45,6 +45,7 @@ export function StepColumnDetection({ sessionId, onNext, onBoxSessionsCreated }:
const [savedGtColumns, setSavedGtColumns] = useState<PageRegion[] | null>(null)
const [creatingBoxSessions, setCreatingBoxSessions] = useState(false)
const [existingSubSessions, setExistingSubSessions] = useState<SubSession[] | null>(null)
const [isSubSession, setIsSubSession] = useState(false)
// Fetch session info (image dimensions) + check for cached column result
useEffect(() => {
@@ -58,14 +59,31 @@ export function StepColumnDetection({ sessionId, onNext, onBoxSessionsCreated }:
if (info.image_width && info.image_height) {
setImageDimensions({ width: info.image_width, height: info.image_height })
}
const isSub = !!info.parent_session_id
setIsSubSession(isSub)
if (info.sub_sessions && info.sub_sessions.length > 0) {
setExistingSubSessions(info.sub_sessions)
onBoxSessionsCreated?.(info.sub_sessions)
}
if (info.column_result) {
setColumnResult(info.column_result)
// Sub-session with pseudo-column already set → auto-advance
if (isSub) {
onNext()
return
}
return
}
// Sub-session without columns → auto-detect (creates pseudo-column)
if (isSub) {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns`, { method: 'POST' })
if (res.ok) {
const data: ColumnResult = await res.json()
setColumnResult(data)
onNext()
return
}
}
}
} catch (e) {
console.error('Failed to fetch session info:', e)

View File

@@ -24,6 +24,7 @@ from cv_ocr_engines import (
_fix_phonetic_brackets,
_split_comma_entries,
_words_to_reading_order_text,
_words_to_spaced_text,
ocr_region_lighton,
ocr_region_rapid,
ocr_region_trocr,
@@ -371,7 +372,16 @@ def build_cell_grid_v2(
if words:
y_tol = max(15, row.height)
text = _words_to_reading_order_text(words, y_tolerance_px=y_tol)
# Single full-width column (box sub-session): preserve spacing
is_single_full_column = (
len(relevant_cols) == 1
and img_w > 0
and relevant_cols[0].width / img_w > 0.9
)
if is_single_full_column:
text = _words_to_spaced_text(words, y_tolerance_px=y_tol)
else:
text = _words_to_reading_order_text(words, y_tolerance_px=y_tol)
avg_conf = round(sum(w['conf'] for w in words) / len(words), 1)
else:
text = ''

View File

@@ -124,6 +124,40 @@ def _words_to_reading_order_text(words: List[Dict], y_tolerance_px: int = 15) ->
return '\n'.join(lines)
def _words_to_spaced_text(words: List[Dict], y_tolerance_px: int = 15) -> str:
"""Join OCR words preserving proportional horizontal spacing.
Instead of single spaces between words, inserts multiple spaces based on
the pixel gap between words relative to average character width.
Useful for box sub-sessions where spatial layout matters.
"""
lines = _group_words_into_lines(words, y_tolerance_px=y_tolerance_px)
result_lines = []
for line_words in lines:
if not line_words:
continue
sorted_words = sorted(line_words, key=lambda w: w['left'])
# Calculate average character width from all words in line
total_chars = sum(len(w['text']) for w in sorted_words if w.get('text'))
total_width = sum(w['width'] for w in sorted_words if w.get('text'))
avg_char_width = total_width / total_chars if total_chars > 0 else 10
parts = []
for i, word in enumerate(sorted_words):
parts.append(word.get('text', ''))
if i < len(sorted_words) - 1:
next_word = sorted_words[i + 1]
gap_px = next_word['left'] - (word['left'] + word['width'])
num_spaces = max(1, round(gap_px / avg_char_width))
parts.append(' ' * num_spaces)
result_lines.append(''.join(parts))
return '\n'.join(result_lines)
# --- RapidOCR integration (PaddleOCR models on ONNX Runtime) ---
_rapid_engine = None

View File

@@ -1209,6 +1209,34 @@ async def detect_columns(session_id: str):
if img_bgr is None:
raise HTTPException(status_code=400, detail="Crop or dewarp must be completed before column detection")
# Sub-sessions: skip column detection, create single pseudo-column
session = await get_session_db(session_id)
if session and session.get("parent_session_id"):
h, w = img_bgr.shape[:2]
column_result = {
"columns": [{
"type": "column_text",
"x": 0, "y": 0,
"width": w, "height": h,
}],
"zones": None,
"boxes_detected": 0,
"duration_seconds": 0,
"method": "sub_session_pseudo_column",
}
await update_session_db(
session_id,
column_result=column_result,
row_result=None,
word_result=None,
current_step=6,
)
cached["column_result"] = column_result
cached.pop("row_result", None)
cached.pop("word_result", None)
logger.info(f"OCR Pipeline: sub-session {session_id}: pseudo-column {w}x{h}px")
return {"session_id": session_id, **column_result}
t0 = time.time()
# Binarized image for layout analysis