"""Tests fuer legal_metadata (rag_reingest_spec.md §3-Normalisierung).""" import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) import legal_metadata as L def test_detect_citation_style(): assert L.detect_citation_style("§ 38") == "paragraph" assert L.detect_citation_style("Artikel 13") == "article" assert L.detect_citation_style("Art. 13") == "article" def test_normalize_article(): assert L.normalize_article("§ 38") == "38" assert L.normalize_article("Artikel 13") == "13" assert L.normalize_article("Art. 13a") == "13a" assert L.normalize_article("[Artikel 17]") == "17" assert L.normalize_article("") == "" # Strikt: Datum/Seite/Titel sind KEINE Fundstelle -> leer (keine falsche Zitierung) assert L.normalize_article("4.5.2016 DE Amtsblatt der Europaeischen Union L 119/9") == "" assert L.normalize_article("Begriffsbestimmungen") == "" def test_normalize_paragraph(): assert L.normalize_paragraph("(1)") == "1" assert L.normalize_paragraph("Abs. 2") == "2" assert L.normalize_paragraph("") == "" def test_extract_sub(): assert L.extract_sub("... lit. c ...") == "lit. c" assert L.extract_sub("Satz 2 des Absatzes") == "Satz 2" assert L.extract_sub("kein Sub hier") == "" def test_format_article_label_paragraph_style(): # Klarname (regulation_short) wird NICHT mehr uppercased -> camelCase bleibt erhalten. assert L.format_article_label("ProdHaftG", "paragraph", "1", "", "") == "ProdHaftG § 1" assert L.format_article_label("BDSG", "paragraph", "38", "1", "") == "BDSG § 38 Abs. 1" assert L.format_article_label("BDSG", "paragraph", "38", "", "") == "BDSG § 38" def test_format_article_label_article_style(): assert L.format_article_label("DSGVO", "article", "13", "1", "lit. c") == "Art. 13 Abs. 1 lit. c DSGVO" assert L.format_article_label("DSGVO", "article", "30", "", "") == "Art. 30 DSGVO" # CH-Recht: Klarname + Art. assert L.format_article_label("RevDSG", "article", "12", "", "") == "Art. 12 RevDSG" def test_compute_chunk_hash_stable_and_ws_insensitive(): a = L.compute_chunk_hash("Hallo Welt\n") b = L.compute_chunk_hash("Hallo Welt") assert a == b and len(a) == 64 def test_deterministic_point_id_stable(): i1 = L.deterministic_point_id("BDSG", "38", "1", 0, "2026-06-19") i2 = L.deterministic_point_id("BDSG", "38", "1", 0, "2026-06-19") i3 = L.deterministic_point_id("BDSG", "38", "1", 1, "2026-06-19") assert i1 == i2 and i1 != i3 and len(i1) == 36 # UUID def test_build_legal_fields_citation_style_override(): # DSGVO = EU-VO -> citation_style 'article' pro Regulierung, NICHT '§' aus dem Section-String out = L.build_legal_fields( {"section": "§ 4", "section_title": "Begriffsbestimmungen", "paragraph": ""}, "DSGVO", citation_style="article", ) assert out["citation_style"] == "article" assert out["article"] == "4" assert out["article_label"] == "Art. 4 DSGVO" def test_build_legal_fields_integration(): out = L.build_legal_fields( {"section": "§ 38", "section_title": "Datenschutzbeauftragte", "paragraph": "(1)"}, "bdsg", chunk_text="... in der Regel mindestens 20 Personen ...", display_name="BDSG", ) assert out["citation_style"] == "paragraph" assert out["article"] == "38" assert out["paragraph"] == "" # artikel-genau, kein geratener Absatz assert out["regulation_code"] == "BDSG" # Feldwert bleibt GROSS assert out["article_label"] == "BDSG § 38" assert out["section_header"] == "Datenschutzbeauftragte" assert out["is_recital"] is False def test_build_legal_fields_camelcase_display_name(): # camelCase-Klarname bleibt im Label erhalten, Feld regulation_code bleibt GROSS. out = L.build_legal_fields( {"section": "§ 1", "section_title": "Haftung"}, "PRODHAFTG", display_name="ProdHaftG", ) assert out["regulation_code"] == "PRODHAFTG" assert out["article_label"] == "ProdHaftG § 1" def test_build_legal_fields_ruling_uses_aktenzeichen_not_paragraph(): # Urteil: Querverweis "§ 87 BetrVG" im Text darf KEIN Label "AZ § 87" erzeugen. out = L.build_legal_fields( {"section": "§ 87", "section_title": "Mitbestimmung"}, "BAG_1_ABR_22_21", source_type="urteil", display_name="BAG, 1 ABR 22/21", ) assert out["article"] == "" assert out["article_label"] == "BAG, 1 ABR 22/21" assert out["regulation_code"] == "BAG_1_ABR_22_21"