"""Tests for HTML detection and stripping in document upload.""" from html_utils import ( decode_html_bytes, looks_like_html as _looks_like_html, strip_html as _strip_html, ) class TestLooksLikeHtml: def test_html_document(self): assert _looks_like_html("
Text
") def test_html_div(self): assert _looks_like_html('Absatz 1
Absatz 2
" result = _strip_html(html) assert "Absatz 1" in result assert "Absatz 2" in result # Paragraphs should be on separate lines lines = [ln.strip() for ln in result.split("\n") if ln.strip()] assert len(lines) >= 2 def test_preserves_section_headers(self): """§ signs must be at line starts after stripping.""" html = '§ 1 Text
' result = _strip_html(html) assert "color" not in result assert "alert" not in result assert "§ 1 Text" in result def test_br_becomes_newline(self): html = "Zeile 1