"""Tests for B9 Multi-Entity-Impressum-Check. GT-Anker: Elli IMPRESSUM-001 — bei mehreren Entitäten USt-IdNr nur bei einer von zwei vorhanden → Finding muss per-Entity erkennen. """ from compliance.services.impressum_multi_entity_check import ( _clean_entity_name, _slice_entities, check_multi_entity_impressum, ) _ELLI_LIKE = """ Impressum Volkswagen Group Charging GmbH Karl-Liebknecht-Str. 32 10178 Berlin Amtsgericht Charlottenburg HRB 208967 B Geschäftsführer: Giovanni Palazzo, Mark Möller Telefon: 00800 3554 1111 E-Mail: info@elli.eco Elli Mobility GmbH Karl-Liebknecht-Str. 32 10178 Berlin Amtsgericht Charlottenburg HRB 274616 B USt-IdNr.: DE814424009 Geschäftsführer: Joschi Jennermann Telefon: 00800 00002030 E-Mail: ellimobility@elli.eco """ _ELLI_REAL_WORLD = """ eco Volkswagen Group Charging GmbH ist im Handelsregister des Amtsgerichtes Charlottenburg unter der Nummer HRB 208967 B eingetragen. Verantwortlich für den Inhalt nach § 55 Abs. 2 RStV: Giovanni Palazzo. eco Die Elli Mobility GmbH ist im Handelsregister des Amtsgerichtes Charlottenburg unter der Nummer HRB 274616 B eingetragen. Die Umsatzsteueridentifikationsnummer der Elli Mobility GmbH ist DE814424009. Postanschrift: Karl-Liebknecht-Str. 32. Geschäftsführer: Joschi Jennermann Sebastian Steffen. """ class TestRealWorldElliPattern: """Regression: Elli's reale HTML→Text-extrahierte Form mit leading-noise-Artefakten (eco/Die), HRB-Boundary, und USt-IdNr- Vollform statt Abkürzung.""" def test_slice_finds_two_clean_entities(self): slices = _slice_entities(_ELLI_REAL_WORLD) names = [n for n, _ in slices] assert names == [ "Volkswagen Group Charging GmbH", "Elli Mobility GmbH", ] def test_ust_id_long_form_detected(self): # "Umsatzsteueridentifikationsnummer der ... ist DE..." findings = check_multi_entity_impressum( {"doc_texts": {"impressum": _ELLI_REAL_WORLD}} ) ust = [f for f in findings if f["check_id"] == "IMPRESSUM-MULTI-UST_ID"] assert len(ust) == 1 assert ust[0]["entities_missing"] == [ "Volkswagen Group Charging GmbH", ] assert ust[0]["entities_present"] == ["Elli Mobility GmbH"] def test_blocklist_filters_false_positives(self): # "Programmierung der Webseite Elli Mobility GmbH" was an # over-match before the blocklist. With it, the only "entity" # candidates per HRB-block are the actual GmbHs. noisy = ( "Acme GmbH HRB 1000 B Berlin.\n" "Foo Holding GmbH HRB 2000 B München.\n" "Programmierung der Webseite Acme GmbH.\n" "Umsatzsteueridentifikationsnummer der Foo Holding GmbH " "ist DE111111111." ) slices = _slice_entities(noisy) # Both real entities, no false positives. names = sorted(n for n, _ in slices) assert names == ["Acme GmbH", "Foo Holding GmbH"] class TestCleanEntityName: def test_strips_header_prefix(self): assert _clean_entity_name( "Impressum\n\nVolkswagen Group Charging GmbH" ) == "Volkswagen Group Charging GmbH" def test_strips_email_artifact(self): # mid-text matches may capture an email TLD before the next entity assert _clean_entity_name( "eco\n\nElli Mobility GmbH" ) == "Elli Mobility GmbH" def test_collapses_whitespace(self): assert _clean_entity_name( "Acme Holding GmbH" ) == "Acme Holding GmbH" def test_clean_name_passes_through(self): assert _clean_entity_name("Elli Mobility GmbH") == "Elli Mobility GmbH" class TestSliceEntities: def test_no_entities_empty(self): assert _slice_entities("Random text without companies.") == [] def test_single_entity_no_slice(self): # Single-entity impressum is the normal case — multi-entity-only. assert _slice_entities("Acme GmbH\nMusterstr 1\n") == [] def test_two_entities_clean_names(self): slices = _slice_entities(_ELLI_LIKE) names = [n for n, _ in slices] assert names == [ "Volkswagen Group Charging GmbH", "Elli Mobility GmbH", ] class TestCheck: def test_elli_pattern_finds_missing_ust_id(self): findings = check_multi_entity_impressum( {"doc_texts": {"impressum": _ELLI_LIKE}} ) ust_findings = [ f for f in findings if f["check_id"] == "IMPRESSUM-MULTI-UST_ID" ] assert len(ust_findings) == 1 f = ust_findings[0] assert f["entities_missing"] == ["Volkswagen Group Charging GmbH"] assert f["entities_present"] == ["Elli Mobility GmbH"] assert f["severity"] == "MEDIUM" assert "TMG" in f["norm"] def test_no_impressum_no_findings(self): assert check_multi_entity_impressum({"doc_texts": {}}) == [] def test_single_entity_no_findings(self): text = "Acme GmbH\nMusterstr 1\nUSt-IdNr DE123456789" assert check_multi_entity_impressum( {"doc_texts": {"impressum": text}} ) == [] def test_both_entities_have_ust_id_no_finding(self): text = ( "Acme GmbH\nUSt-IdNr DE111111111\n\n" "Foo Holding GmbH\nUSt-IdNr DE222222222\n" ) ust_findings = [ f for f in check_multi_entity_impressum( {"doc_texts": {"impressum": text}} ) if f["check_id"] == "IMPRESSUM-MULTI-UST_ID" ] assert ust_findings == []