From 964c916a816172decfc1569776e54a5f2be0b5f7 Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBookPro.fritz.box>
Date: Tue, 10 Mar 2026 09:41:25 +0100
Subject: [PATCH] fix: _clean_cell_text entfernt Waehrungssymbole am Zeilenende
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_is_noise_tail_token() stuft rein nicht-alphabetische Tokens wie
€0.50, £1, €2.50 als OCR-Noise ein und entfernt sie. Zusaetzlich
zerstoert ' '.join(tokens) das proportionale Spacing.

Fuer Single-Column Sub-Sessions wird _clean_cell_text uebersprungen.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 klausur-service/backend/cv_cell_grid.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/klausur-service/backend/cv_cell_grid.py b/klausur-service/backend/cv_cell_grid.py
index 56ccc53..e20499d 100644
--- a/klausur-service/backend/cv_cell_grid.py
+++ b/klausur-service/backend/cv_cell_grid.py
@@ -393,8 +393,13 @@ def build_cell_grid_v2(
                         logger.info(f"R{row_idx:02d}: 0 words (row has "
                                     f"{row.word_count} total, y={row.y}..{row.y+row.height})")
 
-                # Apply noise filter
-                text = _clean_cell_text(text)
+                # Apply noise filter — but NOT for single-column sub-sessions:
+                # 1. _clean_cell_text strips trailing non-alpha tokens (e.g. €0.50,
+                #    £1, €2.50) which are valid content in box layouts.
+                # 2. _clean_cell_text joins tokens with single space, destroying
+                #    the proportional spacing from _words_to_spaced_text.
+                if not is_single_full_column:
+                    text = _clean_cell_text(text)
 
                 cell = {
                     'cell_id': f"R{row_idx:02d}_C{col_idx}",