Files
breakpilot-lehrer/klausur-service/backend/tests/test_cell_phonetics.py
Benjamin Admin 2f51ac617f
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 34s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m5s
CI / test-python-agent-core (push) Successful in 23s
CI / test-nodejs-website (push) Successful in 22s
feat: IPA-Lautschrift in Cell-Texte einfuegen (fuer Overlay-Modus)
fix_cell_phonetics() ersetzt fehlerhafte IPA-Klammern UND fuegt fehlende
Lautschrift fuer englische Woerter ein (z.B. badge, film, challenge, profit).
Wird auf alle Zellen mit col_type column_en/column_text angewandt.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 15:47:26 +01:00

118 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for fix_cell_phonetics and _insert_missing_ipa."""
import pytest
from unittest.mock import patch, MagicMock
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
class TestInsertMissingIpa:
"""Tests for _insert_missing_ipa function."""
def test_single_headword_gets_ipa(self):
"""Single English headword should get IPA inserted."""
from cv_ocr_engines import _insert_missing_ipa
result = _insert_missing_ipa("badge", "british")
assert "[" in result and "]" in result
assert result.startswith("badge [")
def test_short_phrase_first_word_gets_ipa(self):
"""First real word in short phrase gets IPA."""
from cv_ocr_engines import _insert_missing_ipa
result = _insert_missing_ipa("film", "british")
assert "[" in result
def test_long_sentence_unchanged(self):
"""Sentences with >6 words should not get IPA."""
from cv_ocr_engines import _insert_missing_ipa
text = "Can I borrow your CD player from you please"
result = _insert_missing_ipa(text, "british")
assert result == text
def test_existing_brackets_unchanged(self):
"""Text with existing brackets should not get double IPA."""
from cv_ocr_engines import _insert_missing_ipa
text = "dance [dˈɑːns]"
result = _insert_missing_ipa(text, "british")
assert result == text
def test_empty_text_unchanged(self):
"""Empty text returns empty."""
from cv_ocr_engines import _insert_missing_ipa
assert _insert_missing_ipa("", "british") == ""
assert _insert_missing_ipa(" ", "british") == ""
def test_grammar_words_skipped(self):
"""Grammar particles should not get IPA."""
from cv_ocr_engines import _insert_missing_ipa
# "sth" is in _GRAMMAR_BRACKET_WORDS
result = _insert_missing_ipa("sth", "british")
assert "[" not in result
def test_german_word_no_ipa(self):
"""German words (no IPA entry) stay unchanged."""
from cv_ocr_engines import _insert_missing_ipa
result = _insert_missing_ipa("Anstecknadel", "british")
assert result == "Anstecknadel"
class TestFixCellPhonetics:
"""Tests for fix_cell_phonetics function."""
def test_english_column_cells_processed(self):
"""Cells with col_type column_en should be processed."""
from cv_ocr_engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_en", "text": "badge"},
{"cell_id": "c2", "col_type": "column_de", "text": "Anstecknadel"},
]
fix_cell_phonetics(cells, pronunciation="british")
# English cell should have IPA
assert "[" in cells[0]["text"]
# German cell should be unchanged
assert cells[1]["text"] == "Anstecknadel"
def test_column_text_cells_processed(self):
"""Cells with col_type column_text should be processed."""
from cv_ocr_engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_text", "text": "challenge"},
]
fix_cell_phonetics(cells, pronunciation="british")
assert "[" in cells[0]["text"]
def test_garbled_ipa_replaced(self):
"""Garbled IPA brackets should be replaced with correct IPA."""
from cv_ocr_engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_en", "text": "dance {'tfatno]"},
]
fix_cell_phonetics(cells, pronunciation="british")
# Should have proper IPA now
text = cells[0]["text"]
assert "dance [" in text
assert "{'tfatno]" not in text
def test_empty_cells_unchanged(self):
"""Empty cells should not cause errors."""
from cv_ocr_engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_en", "text": ""},
{"cell_id": "c2", "col_type": "column_en", "text": None},
]
fix_cell_phonetics(cells, pronunciation="british")
assert cells[0]["text"] == ""
def test_non_english_col_types_skipped(self):
"""Cells with column_de, column_example etc. should not be processed."""
from cv_ocr_engines import fix_cell_phonetics
cells = [
{"cell_id": "c1", "col_type": "column_de", "text": "Eis (gefrorenes Wasser)"},
{"cell_id": "c2", "col_type": "column_example", "text": "(sich beschweren)"},
]
fix_cell_phonetics(cells, pronunciation="british")
assert cells[0]["text"] == "Eis (gefrorenes Wasser)"
assert cells[1]["text"] == "(sich beschweren)"