diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 3f39caa..b739f27 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -1198,10 +1198,22 @@ async def _build_grid_core( has_real_ipa = any(c in _REAL_IPA_CHARS_SET for c in text) # Comma-separated text is a content continuation, not a footer has_commas = ',' in text - # Long text (>20 chars) is unlikely a page number - is_short = len(text) <= 20 + # Written-out page numbers like "two hundred and nine" + _NUMBER_WORDS = { + "one", "two", "three", "four", "five", "six", "seven", + "eight", "nine", "ten", "eleven", "twelve", "thirteen", + "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", + "nineteen", "twenty", "thirty", "forty", "fifty", "sixty", + "seventy", "eighty", "ninety", "hundred", "thousand", "and", + "einhundert", "zweihundert", "dreihundert", "vierhundert", + "und", "zehn", "zwanzig", "dreißig", "vierzig", "fünfzig", + } + text_words = set(text.lower().split()) + is_written_number = len(text_words) >= 2 and text_words.issubset(_NUMBER_WORDS) + # Short text or written-out number + is_page_number = len(text) <= 20 or is_written_number if (text and not has_real_ipa and not has_commas - and is_short + and is_page_number and last_cells[0].get("col_type") != "heading"): footer_rows.append({ "row_index": last_ri,