"""Tests for B16 URL-Slug-Drift-Detector (GT URL-STRUCTURE-001).""" from unittest.mock import patch from compliance.services.url_slug_drift_check import ( _origin_and_prefix, _strip_path_slug, check_url_slug_drift, ) class TestStripPathSlug: def test_simple(self): assert _strip_path_slug("https://x.de/cookies") == "cookies" def test_with_lang_prefix(self): assert _strip_path_slug("https://x.de/de/cookies") == "cookies" def test_trailing_slash(self): assert _strip_path_slug("https://x.de/cookies/") == "cookies" def test_empty(self): assert _strip_path_slug("") == "" class TestOriginAndPrefix: def test_lang_prefix(self): assert _origin_and_prefix("https://www.elli.eco/de/cookies") == ( "https://www.elli.eco", "/de", ) def test_no_lang_prefix(self): assert _origin_and_prefix("https://x.de/cookies") == ( "https://x.de", "", ) def test_locale_prefix(self): assert _origin_and_prefix("https://x.de/de-de/cookies") == ( "https://x.de", "/de-de", ) class TestCheckURLSlugDrift: def test_no_docs_no_findings(self): assert check_url_slug_drift({"doc_entries": []}) == [] def test_disabled_via_env_no_findings(self, monkeypatch): monkeypatch.setenv("URL_SLUG_PROBE_DISABLED", "1") # Reload module-level _DISABLED flag import importlib from compliance.services import url_slug_drift_check importlib.reload(url_slug_drift_check) result = url_slug_drift_check.check_url_slug_drift({ "doc_entries": [{ "doc_type": "cookie", "url": "https://x.de/de/cookies", "text": "x" * 500, }] }) assert result == [] # Restore monkeypatch.delenv("URL_SLUG_PROBE_DISABLED") importlib.reload(url_slug_drift_check) def test_all_alternatives_200_no_finding(self): with patch( "compliance.services.url_slug_drift_check._head_status", return_value=200, ): result = check_url_slug_drift({ "doc_entries": [{ "doc_type": "cookie", "url": "https://x.de/de/cookies", "text": "x" * 500, }] }) assert result == [] def test_alternative_404_emits_finding(self): with patch( "compliance.services.url_slug_drift_check._head_status", return_value=404, ): result = check_url_slug_drift({ "doc_entries": [{ "doc_type": "cookie", "url": "https://x.de/de/cookies", "text": "x" * 500, }] }) assert len(result) == 1 f = result[0] assert f["check_id"] == "URL-SLUG-DRIFT-001" assert f["severity"] == "LOW" assert f["doc_type"] == "cookie" assert "cookie-richtlinie" in f["alt_slugs_404"] def test_short_text_skipped(self): # text < 400 chars → not counted as reachable doc with patch( "compliance.services.url_slug_drift_check._head_status", return_value=404, ): result = check_url_slug_drift({ "doc_entries": [{ "doc_type": "cookie", "url": "https://x.de/de/cookies", "text": "x" * 50, }] }) assert result == [] def test_elli_pattern_cookie_and_agb_both_emit(self): # Simulate Elli: cookie under /de/cookies, but cookie-richtlinie 404. # agb-doc resolves at /de/nutzungsbedingungen with /agb 404. # Note: nutzungsbedingungen is its own doc_type — Elli's "AGB" # label thus has no canonical doc on the site. state = { "doc_entries": [ {"doc_type": "cookie", "url": "https://x.de/de/cookies", "text": "x" * 500}, {"doc_type": "nutzungsbedingungen", "url": "https://x.de/de/nutzungsbedingungen", "text": "x" * 500}, ] } with patch( "compliance.services.url_slug_drift_check._head_status", return_value=404, ): result = check_url_slug_drift(state) # cookie has 2 canonical alts (cookie-richtlinie, cookie-policy); # nutzungsbedingungen has 2 alts (terms-of-use, terms-of-service). # Both should emit findings since all alts return 404. doc_types = {f["doc_type"] for f in result} assert "cookie" in doc_types assert "nutzungsbedingungen" in doc_types