From b1f7fee2840fbf2e4409a5550780a7329d3e594d Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 3 Mar 2026 14:50:16 +0100 Subject: [PATCH] =?UTF-8?q?fix(ocr-review):=20add=20pipe=E2=86=921=20as=20?= =?UTF-8?q?valid=20OCR=20correction=20in=20=5Fis=5Fspurious=5Fchange?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend _OCR_CHAR_MAP to treat '|' as a possible misread of digit '1' in addition to letters l/L/i/I. Fixes cases like 'cross = |. Kreuz' → 'cross = 1. Kreuz' (numbered list prefix) being rejected. Co-Authored-By: Claude Sonnet 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 0fd82f1..3f4578f 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -5487,8 +5487,8 @@ def _is_spurious_change(old_val: str, new_val: str) -> bool: '6': set('gG'), '8': set('bB'), # Non-letter symbols mistaken for letters - '|': set('lLiI'), # pipe → lowercase l or capital I - 'l': set('iI|'), # lowercase l → capital I (and reverse) + '|': set('lLiI1'), # pipe → lowercase l, capital I, or digit 1 + 'l': set('iI|1'), # lowercase l → capital I (and reverse) } has_valid_fix = False if len(old_val) == len(new_val):