debug: Logging fuer Sub-Session Woertererkennung
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 31s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-agent-core (push) Has been cancelled
CI / test-nodejs-website (push) Has been cancelled
CI / test-python-klausur (push) Has been cancelled
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 31s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-agent-core (push) Has been cancelled
CI / test-nodejs-website (push) Has been cancelled
CI / test-python-klausur (push) Has been cancelled
Zeigt low-confidence Woerter (conf<30) und Zellinhalte pro Zeile, um fehlende Euro/Pfund-Betraege zu diagnostizieren. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -370,22 +370,28 @@ def build_cell_grid_v2(
|
||||
# Filter low-confidence words
|
||||
words = [w for w in words if w.get('conf', 0) >= _MIN_WORD_CONF]
|
||||
|
||||
# Single full-width column (box sub-session): preserve spacing
|
||||
is_single_full_column = (
|
||||
len(relevant_cols) == 1
|
||||
and img_w > 0
|
||||
and relevant_cols[0].width / img_w > 0.9
|
||||
)
|
||||
|
||||
if words:
|
||||
y_tol = max(15, row.height)
|
||||
# Single full-width column (box sub-session): preserve spacing
|
||||
is_single_full_column = (
|
||||
len(relevant_cols) == 1
|
||||
and img_w > 0
|
||||
and relevant_cols[0].width / img_w > 0.9
|
||||
)
|
||||
if is_single_full_column:
|
||||
text = _words_to_spaced_text(words, y_tolerance_px=y_tol)
|
||||
logger.debug(f"R{row_idx:02d}: {len(words)} words, "
|
||||
f"text={text!r:.100}")
|
||||
else:
|
||||
text = _words_to_reading_order_text(words, y_tolerance_px=y_tol)
|
||||
avg_conf = round(sum(w['conf'] for w in words) / len(words), 1)
|
||||
else:
|
||||
text = ''
|
||||
avg_conf = 0.0
|
||||
if is_single_full_column:
|
||||
logger.debug(f"R{row_idx:02d}: 0 words (row has "
|
||||
f"{row.word_count} total, y={row.y}..{row.y+row.height})")
|
||||
|
||||
# Apply noise filter
|
||||
text = _clean_cell_text(text)
|
||||
|
||||
Reference in New Issue
Block a user