"""Tests for B14 retention-conflict-Detector (GT TH-RETENTION-001).""" from compliance.services.retention_conflict_check import ( _cluster_values, check_retention_conflicts, ) class TestClusterValues: def test_empty(self): assert _cluster_values([]) == [] def test_single_value(self): assert _cluster_values([7]) == [[7]] def test_two_close_values_one_cluster(self): # 30 and 31 days within 20% tolerance assert _cluster_values([30, 31]) == [[30, 31]] def test_two_distant_values_two_clusters(self): # 7 and 30 days — well outside 20% tolerance clusters = _cluster_values([7, 30]) assert len(clusters) == 2 def test_equivalent_durations_collapse(self): # 30 Tage and 1 Monat (==30 Tage) → one cluster clusters = _cluster_values([30, 30]) assert clusters == [[30, 30]] class TestCheckRetentionConflicts: def test_no_doc_no_findings(self): assert check_retention_conflicts({}) == [] def test_logfile_7_vs_30_finding(self): text = ( "Server-Logfiles werden für 7 Tage gespeichert. " "Bei Sicherheitsvorfällen werden die Logfiles bis zu 30 Tage " "aufbewahrt." ) findings = check_retention_conflicts({"doc_texts": {"dse": text}}) assert len(findings) == 1 f = findings[0] assert f["check_id"] == "RETENTION-CONFLICT-001" assert f["category"] == "logfile" assert f["doc_type"] == "dse" assert 7.0 in f["values_days"] assert 30.0 in f["values_days"] def test_logfile_single_value_no_finding(self): text = "Logfiles werden 7 Tage aufbewahrt." assert check_retention_conflicts({"doc_texts": {"dse": text}}) == [] def test_logfile_close_values_no_finding(self): # 30 days vs ~1 Monat — same cluster text = ( "Logfiles werden 30 Tage gespeichert. " "Die Aufbewahrungsdauer beträgt 1 Monat." ) # NOTE: parse_duration_to_days('1 Monat') → 30 days; same cluster. findings = check_retention_conflicts({"doc_texts": {"dse": text}}) # Either no finding (preferred) or zero because clusters collapse. cf = [f for f in findings if f["category"] == "logfile"] assert cf == [] def test_only_categorisations_with_two_clusters_emit(self): # Logfile two values + contact_form single → only logfile fires. text = ( "Server-Logfiles werden 7 Tage gespeichert. " "Außerdem speichern wir Logfiles bis zu 90 Tage. " "Kontaktformular-Daten werden 6 Monate aufbewahrt." ) findings = check_retention_conflicts({"doc_texts": {"dse": text}}) cats = [f["category"] for f in findings] assert "logfile" in cats assert "contact_form" not in cats def test_dse_and_cookie_doc_separately(self): text_dse = "Logfiles werden 7 Tage gespeichert. Logfiles 30 Tage." text_cookie = "Session-Cookie läuft nach 1 Tag ab." findings = check_retention_conflicts({ "doc_texts": {"dse": text_dse, "cookie": text_cookie} }) # Only logfile conflict in dse, nothing in cookie. assert len(findings) == 1 assert findings[0]["doc_type"] == "dse"