refactor(agent-check): split routes file (2692→347 LOC) + wire B1/B3/A1 [guardrail-change]
Phase-5 split of agent_compliance_check_routes.py — the 2700-line
monolith was decomposed into 19 modules in compliance/api/agent_check/:
- Phase A-F: resolve / profile+check / banner+TCF / vendors raw+finalize /
HTML blocks top+mid+bot / email / persist
- Helpers: _constants, _helpers, _fetch, _discovery, _single_check
- Schemas + State + thin _orchestrator
A1 ZIP-Anhang nativ in _phase_e_email: evidence_zip_builder.py bundles
slices + manifest.json + audit_metadata.json (SHA256 per slice +
build_sha + source_url). smtp_sender.py erweitert um attachments-Parameter.
B1 COOKIE-CONSENT-UX-001 (Mobile Reachability): consent_reachability_check.py
parses footer anchors, classifies intent (reopen_cmp / info_only /
browser_deflect) + target (same_page_cmp / new_tab / external).
_b1_wiring.py fetches homepage with iPhone-UA + renders Art-7-Abs-3
severity-coloured block.
B3 TH-RETENTION (Cross-Doc Speicherdauer): retention_comparator.py
compares DSI claim ↔ cookie-table duration ↔ actual Max-Age/expires
with 5% tolerance + severity hierarchy (dsi_under_actual HIGH,
table_under_actual HIGH, dsi_vs_table MEDIUM, actual_under_table LOW
Safari-ITP-Hint). _b3_wiring.py + Top-10 mismatches table in mail.
Side-effects:
- Fixed silent UnboundLocalError in original Step 5 (gf_one_pager used
audit_quality_findings before declaration, caught by surrounding
except → block never rendered). New _phase_d3_blocks_bot.py runs
audit-quality FIRST.
- agent_compliance_check_routes.py removed from loc-exceptions.txt
("Phase 5 split target" — done).
Tests: 55/55 grün (B1 22 + B3 27 + saving_scan 6).
E2E: smoke against Elli DSE+Cookie produced HIGH/missing B1 finding,
TH-RETENTION table (17 cookies / 3 ✓ / 3 ✗ / 11 ?), evidence-zip
with 2 slices + manifest + audit_metadata (12089B, SHA256-chained,
source verified), email sent (attachments=1).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,153 @@
|
||||
"""Tests for B1 static consent-reachability analysis."""
|
||||
|
||||
from compliance.services.consent_reachability_check import (
|
||||
classify_anchor_target,
|
||||
evaluate_reachability,
|
||||
find_consent_anchors_in_footer,
|
||||
)
|
||||
|
||||
|
||||
def _wrap(footer_inner: str) -> str:
|
||||
return (
|
||||
"<html><body>"
|
||||
"<main>some content</main>"
|
||||
f"<footer>{footer_inner}</footer>"
|
||||
"</body></html>"
|
||||
)
|
||||
|
||||
|
||||
class TestFindConsentAnchors:
|
||||
def test_finds_reopen_link_german(self):
|
||||
html = _wrap('<a href="#" onclick="UC_UI.showSecondLayer()">'
|
||||
'Cookie-Einstellungen</a>')
|
||||
anchors = find_consent_anchors_in_footer(html)
|
||||
assert len(anchors) == 1
|
||||
assert anchors[0]["intent"] == "reopen_cmp"
|
||||
|
||||
def test_finds_reopen_button(self):
|
||||
html = _wrap('<button data-cmp="show">Cookies verwalten</button>')
|
||||
anchors = find_consent_anchors_in_footer(html)
|
||||
assert anchors[0]["intent"] == "reopen_cmp"
|
||||
|
||||
def test_info_only_link_to_policy(self):
|
||||
html = _wrap('<a href="/cookie-richtlinie">Cookie-Richtlinie</a>')
|
||||
anchors = find_consent_anchors_in_footer(html)
|
||||
assert len(anchors) == 1
|
||||
assert anchors[0]["intent"] == "info_only"
|
||||
|
||||
def test_browser_deflection_link(self):
|
||||
html = _wrap('<a href="/cookies">Browser-Einstellungen</a>')
|
||||
anchors = find_consent_anchors_in_footer(html)
|
||||
assert anchors[0]["intent"] == "browser_deflect"
|
||||
|
||||
def test_ignores_anchors_outside_footer(self):
|
||||
html = ('<html><body>'
|
||||
'<a href="#">Cookie-Einstellungen</a>'
|
||||
'<footer><a href="/impressum">Impressum</a></footer>'
|
||||
'</body></html>')
|
||||
assert find_consent_anchors_in_footer(html) == []
|
||||
|
||||
def test_role_contentinfo_treated_as_footer(self):
|
||||
html = ('<html><body>'
|
||||
'<div role="contentinfo">'
|
||||
'<a href="#" data-cmp="open">Cookie-Einstellungen</a>'
|
||||
'</div></body></html>')
|
||||
anchors = find_consent_anchors_in_footer(html)
|
||||
assert len(anchors) == 1
|
||||
|
||||
def test_class_with_footer_treated_as_footer(self):
|
||||
html = ('<html><body>'
|
||||
'<div class="site-footer">'
|
||||
'<a href="#" data-cmp="open">Cookies verwalten</a>'
|
||||
'</div></body></html>')
|
||||
anchors = find_consent_anchors_in_footer(html)
|
||||
assert len(anchors) == 1
|
||||
|
||||
def test_empty_html(self):
|
||||
assert find_consent_anchors_in_footer("") == []
|
||||
|
||||
def test_malformed_html(self):
|
||||
# broken markup shouldn't crash
|
||||
anchors = find_consent_anchors_in_footer("<footer><a>foo")
|
||||
# may or may not yield results; must not raise
|
||||
assert isinstance(anchors, list)
|
||||
|
||||
|
||||
class TestClassifyAnchorTarget:
|
||||
def test_onclick_classifies_as_cmp(self):
|
||||
a = {"href": "#", "onclick": "showCmp()"}
|
||||
assert classify_anchor_target(a, "https://x.de/") == "same_page_cmp"
|
||||
|
||||
def test_data_cmp_classifies_as_cmp(self):
|
||||
a = {"href": "#", "data_cmp": "show"}
|
||||
assert classify_anchor_target(a, "https://x.de/") == "same_page_cmp"
|
||||
|
||||
def test_javascript_link(self):
|
||||
a = {"href": "javascript:void(0)"}
|
||||
assert classify_anchor_target(a, "https://x.de/") == "javascript"
|
||||
|
||||
def test_new_tab(self):
|
||||
a = {"href": "/cookie", "target": "_blank"}
|
||||
assert classify_anchor_target(a, "https://x.de/") == "new_tab"
|
||||
|
||||
def test_hash_only(self):
|
||||
a = {"href": "#cookies"}
|
||||
assert classify_anchor_target(a, "https://x.de/") == "same_page_cmp"
|
||||
|
||||
def test_same_origin_relative(self):
|
||||
a = {"href": "/cookie-richtlinie"}
|
||||
assert classify_anchor_target(a, "https://x.de/") == "same_origin"
|
||||
|
||||
def test_external_origin(self):
|
||||
a = {"href": "https://other.de/policy"}
|
||||
assert classify_anchor_target(a, "https://x.de/") == "external"
|
||||
|
||||
|
||||
class TestEvaluateReachability:
|
||||
def test_pass_when_reopen_in_same_page(self):
|
||||
html = _wrap('<a href="#" data-cmp="open">Cookie-Einstellungen</a>')
|
||||
r = evaluate_reachability(html, "https://x.de/")
|
||||
assert r["check_id"] == "COOKIE-CONSENT-UX-001"
|
||||
assert r["passed"] is True
|
||||
assert r["severity"] is None
|
||||
assert r["has_reopen_anchor"] is True
|
||||
|
||||
def test_fail_missing_when_no_reopen(self):
|
||||
html = _wrap('<a href="/cookie-richtlinie">Cookie-Richtlinie</a>')
|
||||
r = evaluate_reachability(html, "https://x.de/")
|
||||
assert r["passed"] is False
|
||||
assert r["severity"] == "HIGH"
|
||||
assert r["severity_reason"] == "missing"
|
||||
|
||||
def test_medium_when_reopen_opens_new_tab(self):
|
||||
# The Elli case: footer link points at cookie policy in a new
|
||||
# tab, no in-place CMP open.
|
||||
html = _wrap(
|
||||
'<a href="/cookie-einstellungen" target="_blank">'
|
||||
'Cookie-Einstellungen</a>'
|
||||
)
|
||||
r = evaluate_reachability(html, "https://x.de/")
|
||||
assert r["passed"] is False
|
||||
assert r["severity"] == "MEDIUM"
|
||||
assert r["severity_reason"] == "misclassified"
|
||||
|
||||
def test_high_when_only_browser_deflection(self):
|
||||
html = _wrap('<a href="/cookies">Browser-Einstellungen</a>')
|
||||
r = evaluate_reachability(html, "https://x.de/")
|
||||
assert r["passed"] is False
|
||||
assert r["severity"] == "HIGH"
|
||||
assert r["severity_reason"] == "factually_wrong"
|
||||
|
||||
def test_empty_footer_is_fail(self):
|
||||
r = evaluate_reachability(_wrap(""), "https://x.de/")
|
||||
assert r["passed"] is False
|
||||
assert r["severity"] == "HIGH"
|
||||
|
||||
def test_reopen_external_origin_is_medium(self):
|
||||
html = _wrap(
|
||||
'<a href="https://privacy.other.com/manage">'
|
||||
'Cookie-Einstellungen</a>'
|
||||
)
|
||||
r = evaluate_reachability(html, "https://x.de/")
|
||||
assert r["passed"] is False
|
||||
assert r["severity"] == "MEDIUM"
|
||||
@@ -0,0 +1,259 @@
|
||||
"""Tests for B3 cross-doc retention comparator."""
|
||||
|
||||
from compliance.services.retention_comparator import (
|
||||
RetentionClaim,
|
||||
build_retention_theme_summary,
|
||||
compare_retention,
|
||||
extract_retention_claims,
|
||||
max_age_to_days,
|
||||
parse_duration_to_days,
|
||||
)
|
||||
|
||||
|
||||
class TestParseDurationToDays:
|
||||
def test_months(self):
|
||||
d, k = parse_duration_to_days("14 Monate")
|
||||
assert k == "days"
|
||||
assert d == 14 * 30
|
||||
|
||||
def test_jahre(self):
|
||||
d, k = parse_duration_to_days("2 Jahre")
|
||||
assert k == "days"
|
||||
assert d == 2 * 365
|
||||
|
||||
def test_hours_short(self):
|
||||
d, k = parse_duration_to_days("24h")
|
||||
assert k == "days"
|
||||
assert d == 1.0
|
||||
|
||||
def test_days(self):
|
||||
d, k = parse_duration_to_days("30 Tage")
|
||||
assert k == "days"
|
||||
assert d == 30
|
||||
|
||||
def test_minutes(self):
|
||||
d, k = parse_duration_to_days("1 Minute")
|
||||
assert k == "days"
|
||||
assert abs(d - 1 / 1440) < 1e-9
|
||||
|
||||
def test_session(self):
|
||||
d, k = parse_duration_to_days("Sitzungsdauer")
|
||||
assert k == "session"
|
||||
assert d is None
|
||||
|
||||
def test_session_token(self):
|
||||
d, k = parse_duration_to_days("Session")
|
||||
assert k == "session"
|
||||
|
||||
def test_persistent(self):
|
||||
d, k = parse_duration_to_days("unbegrenzt")
|
||||
assert k == "persistent"
|
||||
|
||||
def test_empty(self):
|
||||
d, k = parse_duration_to_days("")
|
||||
assert k == "unknown"
|
||||
assert d is None
|
||||
|
||||
def test_none(self):
|
||||
d, k = parse_duration_to_days(None)
|
||||
assert k == "unknown"
|
||||
assert d is None
|
||||
|
||||
def test_decimal_comma(self):
|
||||
d, k = parse_duration_to_days("1,5 Jahre")
|
||||
assert k == "days"
|
||||
assert d == 1.5 * 365
|
||||
|
||||
|
||||
class TestMaxAgeToDays:
|
||||
def test_one_year(self):
|
||||
assert abs(max_age_to_days(365 * 86400) - 365) < 1e-9
|
||||
|
||||
def test_session_none(self):
|
||||
assert max_age_to_days(None) is None
|
||||
|
||||
def test_bad_input(self):
|
||||
assert max_age_to_days("bad") is None
|
||||
|
||||
|
||||
class TestExtractRetentionClaims:
|
||||
def test_finds_global_claim(self):
|
||||
dsi = (
|
||||
"Wir verarbeiten Ihre Daten gemäß Art. 6 DSGVO. "
|
||||
"Die Speicherdauer der Daten beträgt grundsätzlich 6 Monate. "
|
||||
"Danach werden die Daten gelöscht."
|
||||
)
|
||||
claims = extract_retention_claims(dsi)
|
||||
assert len(claims) == 1
|
||||
assert claims[0].days == 6 * 30
|
||||
|
||||
def test_finds_cookie_specific(self):
|
||||
dsi = (
|
||||
"Wir nutzen Google Analytics. "
|
||||
"Das Cookie _ga wird für 14 Monate gespeichert. "
|
||||
"Weitere Hinweise finden Sie unten."
|
||||
)
|
||||
claims = extract_retention_claims(
|
||||
dsi, cookie_names=["_ga"], vendor_names=["Google Analytics"],
|
||||
)
|
||||
assert len(claims) >= 1
|
||||
ga_claim = next(c for c in claims if "_ga" in c.context_terms)
|
||||
assert ga_claim.days == 14 * 30
|
||||
|
||||
def test_ignores_non_retention_sentence(self):
|
||||
dsi = "Wir sind 14 Monate am Markt. Das ist keine Speicherdauer."
|
||||
# "14 Monate" present but no retention anchor — skip.
|
||||
assert extract_retention_claims(dsi) == []
|
||||
|
||||
def test_empty_text(self):
|
||||
assert extract_retention_claims("") == []
|
||||
|
||||
|
||||
class TestCompareRetention:
|
||||
def test_match_all_three(self):
|
||||
dsi_claims = [RetentionClaim(
|
||||
sentence="Speicherdauer 14 Monate.",
|
||||
days=14 * 30, is_session=False, is_persistent=False,
|
||||
context_terms=[],
|
||||
)]
|
||||
out = compare_retention(
|
||||
cookie_name="_ga",
|
||||
table_duration="14 Monate",
|
||||
actual_max_age_seconds=14 * 30 * 86400,
|
||||
dsi_claims=dsi_claims,
|
||||
)
|
||||
assert out["matches"] is True
|
||||
assert out["severity"] is None
|
||||
|
||||
def test_dsi_under_actual_is_HIGH(self):
|
||||
# DSI claims 6 months, real cookie lives 14 months.
|
||||
dsi_claims = [RetentionClaim(
|
||||
sentence="Speicherdauer 6 Monate.",
|
||||
days=6 * 30, is_session=False, is_persistent=False,
|
||||
context_terms=[],
|
||||
)]
|
||||
out = compare_retention(
|
||||
cookie_name="_ga",
|
||||
table_duration="14 Monate",
|
||||
actual_max_age_seconds=14 * 30 * 86400,
|
||||
dsi_claims=dsi_claims,
|
||||
)
|
||||
assert out["matches"] is False
|
||||
assert out["mismatch_type"] == "dsi_under_actual"
|
||||
assert out["severity_reason"] == "factually_wrong"
|
||||
assert out["severity"] == "HIGH"
|
||||
assert out["diff_days"] == 14 * 30 - 6 * 30
|
||||
|
||||
def test_table_under_actual_is_HIGH(self):
|
||||
# Table says 7 days, real cookie lives 365 days.
|
||||
out = compare_retention(
|
||||
cookie_name="_fbp",
|
||||
table_duration="7 Tage",
|
||||
actual_max_age_seconds=365 * 86400,
|
||||
)
|
||||
assert out["matches"] is False
|
||||
assert out["mismatch_type"] == "table_under_actual"
|
||||
assert out["severity"] == "HIGH"
|
||||
|
||||
def test_dsi_vs_table_is_MEDIUM(self):
|
||||
# DSI says 6 months, table says 14 months, no actual.
|
||||
dsi_claims = [RetentionClaim(
|
||||
sentence="Speicherdauer 6 Monate.",
|
||||
days=6 * 30, is_session=False, is_persistent=False,
|
||||
context_terms=[],
|
||||
)]
|
||||
out = compare_retention(
|
||||
cookie_name="_ga",
|
||||
table_duration="14 Monate",
|
||||
actual_max_age_seconds=None,
|
||||
dsi_claims=dsi_claims,
|
||||
)
|
||||
assert out["matches"] is False
|
||||
assert out["mismatch_type"] == "dsi_vs_table"
|
||||
assert out["severity"] == "MEDIUM"
|
||||
|
||||
def test_actual_under_table_is_LOW_safari_itp_hint(self):
|
||||
# Table says 2 years, real cookie lives 7 days (Safari ITP).
|
||||
out = compare_retention(
|
||||
cookie_name="_ga",
|
||||
table_duration="2 Jahre",
|
||||
actual_max_age_seconds=7 * 86400,
|
||||
)
|
||||
assert out["matches"] is False
|
||||
assert out["mismatch_type"] == "actual_under_table"
|
||||
assert out["severity"] == "LOW"
|
||||
assert "possible_safari_itp_cap" in out["notes"]
|
||||
|
||||
def test_only_one_source_is_incomplete(self):
|
||||
out = compare_retention(
|
||||
cookie_name="_ga",
|
||||
table_duration="14 Monate",
|
||||
actual_max_age_seconds=None,
|
||||
dsi_claims=[],
|
||||
)
|
||||
assert out["severity_reason"] == "incomplete"
|
||||
assert out["severity"] == "LOW"
|
||||
|
||||
def test_tolerance_5pct(self):
|
||||
# 14 Monate (420d) vs 410d — within 5% tolerance, match.
|
||||
out = compare_retention(
|
||||
cookie_name="_ga",
|
||||
table_duration="14 Monate",
|
||||
actual_max_age_seconds=410 * 86400,
|
||||
)
|
||||
assert out["matches"] is True
|
||||
|
||||
def test_cookie_specific_dsi_beats_generic(self):
|
||||
dsi_claims = [
|
||||
RetentionClaim(
|
||||
sentence="Speicherdauer grundsätzlich 6 Monate.",
|
||||
days=6 * 30, is_session=False, is_persistent=False,
|
||||
context_terms=[],
|
||||
),
|
||||
RetentionClaim(
|
||||
sentence="_ga: Speicherdauer 14 Monate.",
|
||||
days=14 * 30, is_session=False, is_persistent=False,
|
||||
context_terms=["_ga"],
|
||||
),
|
||||
]
|
||||
out = compare_retention(
|
||||
cookie_name="_ga",
|
||||
table_duration="14 Monate",
|
||||
actual_max_age_seconds=14 * 30 * 86400,
|
||||
dsi_claims=dsi_claims,
|
||||
)
|
||||
# The cookie-specific claim should win → all three match.
|
||||
assert out["matches"] is True
|
||||
assert out["dsi_days"] == 14 * 30
|
||||
|
||||
|
||||
class TestBuildRetentionThemeSummary:
|
||||
def _claim(self, sentence, days):
|
||||
return RetentionClaim(
|
||||
sentence=sentence, days=days,
|
||||
is_session=False, is_persistent=False, context_terms=[],
|
||||
)
|
||||
|
||||
def test_aggregate(self):
|
||||
findings = [
|
||||
compare_retention(
|
||||
"_a", "14 Monate", 14 * 30 * 86400,
|
||||
[self._claim("14 Monate", 14 * 30)],
|
||||
),
|
||||
compare_retention(
|
||||
"_b", "6 Monate", 14 * 30 * 86400,
|
||||
[self._claim("6 Monate", 6 * 30)],
|
||||
),
|
||||
compare_retention(
|
||||
"_c", "14 Monate", None, [],
|
||||
),
|
||||
]
|
||||
s = build_retention_theme_summary(findings)
|
||||
assert s["theme_id"] == "TH-RETENTION"
|
||||
assert s["total"] == 3
|
||||
assert s["passed"] == 1
|
||||
assert s["incomplete"] == 1
|
||||
assert s["failed"] == 1
|
||||
assert s["by_severity"].get("HIGH") == 1
|
||||
assert s["by_mismatch_type"].get("dsi_under_actual") == 1
|
||||
assert len(s["top_fails"]) == 1
|
||||
Reference in New Issue
Block a user