"""Tests for B3 cross-doc retention comparator.""" from compliance.services.retention_comparator import ( RetentionClaim, build_retention_theme_summary, compare_retention, extract_retention_claims, max_age_to_days, parse_duration_to_days, ) class TestParseDurationToDays: def test_months(self): d, k = parse_duration_to_days("14 Monate") assert k == "days" assert d == 14 * 30 def test_jahre(self): d, k = parse_duration_to_days("2 Jahre") assert k == "days" assert d == 2 * 365 def test_hours_short(self): d, k = parse_duration_to_days("24h") assert k == "days" assert d == 1.0 def test_days(self): d, k = parse_duration_to_days("30 Tage") assert k == "days" assert d == 30 def test_minutes(self): d, k = parse_duration_to_days("1 Minute") assert k == "days" assert abs(d - 1 / 1440) < 1e-9 def test_session(self): d, k = parse_duration_to_days("Sitzungsdauer") assert k == "session" assert d is None def test_session_token(self): d, k = parse_duration_to_days("Session") assert k == "session" def test_persistent(self): d, k = parse_duration_to_days("unbegrenzt") assert k == "persistent" def test_empty(self): d, k = parse_duration_to_days("") assert k == "unknown" assert d is None def test_none(self): d, k = parse_duration_to_days(None) assert k == "unknown" assert d is None def test_decimal_comma(self): d, k = parse_duration_to_days("1,5 Jahre") assert k == "days" assert d == 1.5 * 365 class TestMaxAgeToDays: def test_one_year(self): assert abs(max_age_to_days(365 * 86400) - 365) < 1e-9 def test_session_none(self): assert max_age_to_days(None) is None def test_bad_input(self): assert max_age_to_days("bad") is None class TestExtractRetentionClaims: def test_finds_global_claim(self): dsi = ( "Wir verarbeiten Ihre Daten gemäß Art. 6 DSGVO. " "Die Speicherdauer der Daten beträgt grundsätzlich 6 Monate. " "Danach werden die Daten gelöscht." ) claims = extract_retention_claims(dsi) assert len(claims) == 1 assert claims[0].days == 6 * 30 def test_finds_cookie_specific(self): dsi = ( "Wir nutzen Google Analytics. " "Das Cookie _ga wird für 14 Monate gespeichert. " "Weitere Hinweise finden Sie unten." ) claims = extract_retention_claims( dsi, cookie_names=["_ga"], vendor_names=["Google Analytics"], ) assert len(claims) >= 1 ga_claim = next(c for c in claims if "_ga" in c.context_terms) assert ga_claim.days == 14 * 30 def test_ignores_non_retention_sentence(self): dsi = "Wir sind 14 Monate am Markt. Das ist keine Speicherdauer." # "14 Monate" present but no retention anchor — skip. assert extract_retention_claims(dsi) == [] def test_empty_text(self): assert extract_retention_claims("") == [] class TestCompareRetention: def test_match_all_three(self): dsi_claims = [RetentionClaim( sentence="Speicherdauer 14 Monate.", days=14 * 30, is_session=False, is_persistent=False, context_terms=[], )] out = compare_retention( cookie_name="_ga", table_duration="14 Monate", actual_max_age_seconds=14 * 30 * 86400, dsi_claims=dsi_claims, ) assert out["matches"] is True assert out["severity"] is None def test_dsi_under_actual_is_HIGH(self): # DSI claims 6 months, real cookie lives 14 months. dsi_claims = [RetentionClaim( sentence="Speicherdauer 6 Monate.", days=6 * 30, is_session=False, is_persistent=False, context_terms=[], )] out = compare_retention( cookie_name="_ga", table_duration="14 Monate", actual_max_age_seconds=14 * 30 * 86400, dsi_claims=dsi_claims, ) assert out["matches"] is False assert out["mismatch_type"] == "dsi_under_actual" assert out["severity_reason"] == "factually_wrong" assert out["severity"] == "HIGH" assert out["diff_days"] == 14 * 30 - 6 * 30 def test_table_under_actual_is_HIGH(self): # Table says 7 days, real cookie lives 365 days. out = compare_retention( cookie_name="_fbp", table_duration="7 Tage", actual_max_age_seconds=365 * 86400, ) assert out["matches"] is False assert out["mismatch_type"] == "table_under_actual" assert out["severity"] == "HIGH" def test_dsi_vs_table_is_MEDIUM(self): # DSI says 6 months, table says 14 months, no actual. dsi_claims = [RetentionClaim( sentence="Speicherdauer 6 Monate.", days=6 * 30, is_session=False, is_persistent=False, context_terms=[], )] out = compare_retention( cookie_name="_ga", table_duration="14 Monate", actual_max_age_seconds=None, dsi_claims=dsi_claims, ) assert out["matches"] is False assert out["mismatch_type"] == "dsi_vs_table" assert out["severity"] == "MEDIUM" def test_actual_under_table_is_LOW_safari_itp_hint(self): # Table says 2 years, real cookie lives 7 days (Safari ITP). out = compare_retention( cookie_name="_ga", table_duration="2 Jahre", actual_max_age_seconds=7 * 86400, ) assert out["matches"] is False assert out["mismatch_type"] == "actual_under_table" assert out["severity"] == "LOW" assert "possible_safari_itp_cap" in out["notes"] def test_only_one_source_is_incomplete(self): out = compare_retention( cookie_name="_ga", table_duration="14 Monate", actual_max_age_seconds=None, dsi_claims=[], ) assert out["severity_reason"] == "incomplete" assert out["severity"] == "LOW" def test_tolerance_5pct(self): # 14 Monate (420d) vs 410d — within 5% tolerance, match. out = compare_retention( cookie_name="_ga", table_duration="14 Monate", actual_max_age_seconds=410 * 86400, ) assert out["matches"] is True def test_cookie_specific_dsi_beats_generic(self): dsi_claims = [ RetentionClaim( sentence="Speicherdauer grundsätzlich 6 Monate.", days=6 * 30, is_session=False, is_persistent=False, context_terms=[], ), RetentionClaim( sentence="_ga: Speicherdauer 14 Monate.", days=14 * 30, is_session=False, is_persistent=False, context_terms=["_ga"], ), ] out = compare_retention( cookie_name="_ga", table_duration="14 Monate", actual_max_age_seconds=14 * 30 * 86400, dsi_claims=dsi_claims, ) # The cookie-specific claim should win → all three match. assert out["matches"] is True assert out["dsi_days"] == 14 * 30 class TestBuildRetentionThemeSummary: def _claim(self, sentence, days): return RetentionClaim( sentence=sentence, days=days, is_session=False, is_persistent=False, context_terms=[], ) def test_aggregate(self): findings = [ compare_retention( "_a", "14 Monate", 14 * 30 * 86400, [self._claim("14 Monate", 14 * 30)], ), compare_retention( "_b", "6 Monate", 14 * 30 * 86400, [self._claim("6 Monate", 6 * 30)], ), compare_retention( "_c", "14 Monate", None, [], ), ] s = build_retention_theme_summary(findings) assert s["theme_id"] == "TH-RETENTION" assert s["total"] == 3 assert s["passed"] == 1 assert s["incomplete"] == 1 assert s["failed"] == 1 assert s["by_severity"].get("HIGH") == 1 assert s["by_mismatch_type"].get("dsi_under_actual") == 1 assert len(s["top_fails"]) == 1