"""Tests for B22 Cross-Domain-Legal-Doc-Detector.""" from compliance.services.cross_domain_doc_check import ( _site_origin_sld, _sld, check_cross_domain_docs, ) class TestSld: def test_simple(self): assert _sld("www.bmw.de") == "bmw" def test_compound_tld(self): assert _sld("docs.example.co.uk") == "example" def test_no_www(self): assert _sld("elli.eco") == "elli" class TestPrimaryDetection: def test_majority_wins(self): state = {"doc_entries": [ {"url": "https://elli.eco/de/impressum"}, {"url": "https://elli.eco/de/datenschutz"}, {"url": "https://docs.logpay.de/_docs/agb.pdf"}, ]} assert _site_origin_sld(state) == "elli" def test_auto_discovered_excluded(self): # discovery results don't influence primary detection state = {"doc_entries": [ {"url": "https://elli.eco/de/impressum", "auto_discovered": False}, {"url": "https://discovered.tld/foo", "auto_discovered": True}, ]} assert _site_origin_sld(state) == "elli" class TestCheck: def test_elli_logpay_pattern(self): state = {"doc_entries": [ {"doc_type": "dse", "url": "https://www.elli.eco/de/datenschutz"}, {"doc_type": "impressum", "url": "https://www.elli.eco/de/impressum"}, {"doc_type": "agb", "url": "https://docs.logpay.de/_docs/de/" "allgemeine_geschaeftsbedingungen_de_EM.pdf"}, ]} findings = check_cross_domain_docs(state) assert len(findings) == 1 f = findings[0] assert f["check_id"] == "CROSS-DOMAIN-DOC-001" assert f["severity"] == "HIGH" # AGB is HIGH assert f["doc_type"] == "agb" assert f["site_sld"] == "elli" assert f["host_sld"] == "logpay" def test_same_subdomain_no_finding(self): # docs.bmw.de is same SLD as www.bmw.de — no finding state = {"doc_entries": [ {"doc_type": "dse", "url": "https://www.bmw.de/de/datenschutz.html"}, {"doc_type": "agb", "url": "https://docs.bmw.de/agb.pdf"}, ]} findings = check_cross_domain_docs(state) assert findings == [] def test_no_primary_no_finding(self): # No URLs at all state = {"doc_entries": []} assert check_cross_domain_docs(state) == [] def test_severity_per_doc_type(self): state = {"doc_entries": [ {"doc_type": "agb", "url": "https://acme.de/x"}, {"doc_type": "dse", "url": "https://docs.thirdparty.com/agb"}, {"doc_type": "impressum", "url": "https://www.other.com/impressum"}, ]} findings = check_cross_domain_docs(state) sev_by_doc = {f["doc_type"]: f["severity"] for f in findings} # agb is on primary (acme.de) — no finding # dse on thirdparty.com → MEDIUM # impressum on other.com → INFO assert sev_by_doc.get("dse") == "MEDIUM" assert sev_by_doc.get("impressum") == "INFO"