Files
breakpilot-compliance/backend-compliance/tests/test_consent_reachability_check.py
T
Benjamin Admin c2c8783fee refactor(agent-check): split routes file (2692→347 LOC) + wire B1/B3/A1 [guardrail-change]
Phase-5 split of agent_compliance_check_routes.py — the 2700-line
monolith was decomposed into 19 modules in compliance/api/agent_check/:

  - Phase A-F: resolve / profile+check / banner+TCF / vendors raw+finalize /
    HTML blocks top+mid+bot / email / persist
  - Helpers: _constants, _helpers, _fetch, _discovery, _single_check
  - Schemas + State + thin _orchestrator

A1 ZIP-Anhang nativ in _phase_e_email: evidence_zip_builder.py bundles
slices + manifest.json + audit_metadata.json (SHA256 per slice +
build_sha + source_url). smtp_sender.py erweitert um attachments-Parameter.

B1 COOKIE-CONSENT-UX-001 (Mobile Reachability): consent_reachability_check.py
parses footer anchors, classifies intent (reopen_cmp / info_only /
browser_deflect) + target (same_page_cmp / new_tab / external).
_b1_wiring.py fetches homepage with iPhone-UA + renders Art-7-Abs-3
severity-coloured block.

B3 TH-RETENTION (Cross-Doc Speicherdauer): retention_comparator.py
compares DSI claim ↔ cookie-table duration ↔ actual Max-Age/expires
with 5% tolerance + severity hierarchy (dsi_under_actual HIGH,
table_under_actual HIGH, dsi_vs_table MEDIUM, actual_under_table LOW
Safari-ITP-Hint). _b3_wiring.py + Top-10 mismatches table in mail.

Side-effects:
- Fixed silent UnboundLocalError in original Step 5 (gf_one_pager used
  audit_quality_findings before declaration, caught by surrounding
  except → block never rendered). New _phase_d3_blocks_bot.py runs
  audit-quality FIRST.
- agent_compliance_check_routes.py removed from loc-exceptions.txt
  ("Phase 5 split target" — done).

Tests: 55/55 grün (B1 22 + B3 27 + saving_scan 6).
E2E: smoke against Elli DSE+Cookie produced HIGH/missing B1 finding,
TH-RETENTION table (17 cookies / 3 ✓ / 3 ✗ / 11 ?), evidence-zip
with 2 slices + manifest + audit_metadata (12089B, SHA256-chained,
source verified), email sent (attachments=1).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-06 14:47:25 +02:00

154 lines
5.7 KiB
Python

"""Tests for B1 static consent-reachability analysis."""
from compliance.services.consent_reachability_check import (
classify_anchor_target,
evaluate_reachability,
find_consent_anchors_in_footer,
)
def _wrap(footer_inner: str) -> str:
return (
"<html><body>"
"<main>some content</main>"
f"<footer>{footer_inner}</footer>"
"</body></html>"
)
class TestFindConsentAnchors:
def test_finds_reopen_link_german(self):
html = _wrap('<a href="#" onclick="UC_UI.showSecondLayer()">'
'Cookie-Einstellungen</a>')
anchors = find_consent_anchors_in_footer(html)
assert len(anchors) == 1
assert anchors[0]["intent"] == "reopen_cmp"
def test_finds_reopen_button(self):
html = _wrap('<button data-cmp="show">Cookies verwalten</button>')
anchors = find_consent_anchors_in_footer(html)
assert anchors[0]["intent"] == "reopen_cmp"
def test_info_only_link_to_policy(self):
html = _wrap('<a href="/cookie-richtlinie">Cookie-Richtlinie</a>')
anchors = find_consent_anchors_in_footer(html)
assert len(anchors) == 1
assert anchors[0]["intent"] == "info_only"
def test_browser_deflection_link(self):
html = _wrap('<a href="/cookies">Browser-Einstellungen</a>')
anchors = find_consent_anchors_in_footer(html)
assert anchors[0]["intent"] == "browser_deflect"
def test_ignores_anchors_outside_footer(self):
html = ('<html><body>'
'<a href="#">Cookie-Einstellungen</a>'
'<footer><a href="/impressum">Impressum</a></footer>'
'</body></html>')
assert find_consent_anchors_in_footer(html) == []
def test_role_contentinfo_treated_as_footer(self):
html = ('<html><body>'
'<div role="contentinfo">'
'<a href="#" data-cmp="open">Cookie-Einstellungen</a>'
'</div></body></html>')
anchors = find_consent_anchors_in_footer(html)
assert len(anchors) == 1
def test_class_with_footer_treated_as_footer(self):
html = ('<html><body>'
'<div class="site-footer">'
'<a href="#" data-cmp="open">Cookies verwalten</a>'
'</div></body></html>')
anchors = find_consent_anchors_in_footer(html)
assert len(anchors) == 1
def test_empty_html(self):
assert find_consent_anchors_in_footer("") == []
def test_malformed_html(self):
# broken markup shouldn't crash
anchors = find_consent_anchors_in_footer("<footer><a>foo")
# may or may not yield results; must not raise
assert isinstance(anchors, list)
class TestClassifyAnchorTarget:
def test_onclick_classifies_as_cmp(self):
a = {"href": "#", "onclick": "showCmp()"}
assert classify_anchor_target(a, "https://x.de/") == "same_page_cmp"
def test_data_cmp_classifies_as_cmp(self):
a = {"href": "#", "data_cmp": "show"}
assert classify_anchor_target(a, "https://x.de/") == "same_page_cmp"
def test_javascript_link(self):
a = {"href": "javascript:void(0)"}
assert classify_anchor_target(a, "https://x.de/") == "javascript"
def test_new_tab(self):
a = {"href": "/cookie", "target": "_blank"}
assert classify_anchor_target(a, "https://x.de/") == "new_tab"
def test_hash_only(self):
a = {"href": "#cookies"}
assert classify_anchor_target(a, "https://x.de/") == "same_page_cmp"
def test_same_origin_relative(self):
a = {"href": "/cookie-richtlinie"}
assert classify_anchor_target(a, "https://x.de/") == "same_origin"
def test_external_origin(self):
a = {"href": "https://other.de/policy"}
assert classify_anchor_target(a, "https://x.de/") == "external"
class TestEvaluateReachability:
def test_pass_when_reopen_in_same_page(self):
html = _wrap('<a href="#" data-cmp="open">Cookie-Einstellungen</a>')
r = evaluate_reachability(html, "https://x.de/")
assert r["check_id"] == "COOKIE-CONSENT-UX-001"
assert r["passed"] is True
assert r["severity"] is None
assert r["has_reopen_anchor"] is True
def test_fail_missing_when_no_reopen(self):
html = _wrap('<a href="/cookie-richtlinie">Cookie-Richtlinie</a>')
r = evaluate_reachability(html, "https://x.de/")
assert r["passed"] is False
assert r["severity"] == "HIGH"
assert r["severity_reason"] == "missing"
def test_medium_when_reopen_opens_new_tab(self):
# The Elli case: footer link points at cookie policy in a new
# tab, no in-place CMP open.
html = _wrap(
'<a href="/cookie-einstellungen" target="_blank">'
'Cookie-Einstellungen</a>'
)
r = evaluate_reachability(html, "https://x.de/")
assert r["passed"] is False
assert r["severity"] == "MEDIUM"
assert r["severity_reason"] == "misclassified"
def test_high_when_only_browser_deflection(self):
html = _wrap('<a href="/cookies">Browser-Einstellungen</a>')
r = evaluate_reachability(html, "https://x.de/")
assert r["passed"] is False
assert r["severity"] == "HIGH"
assert r["severity_reason"] == "factually_wrong"
def test_empty_footer_is_fail(self):
r = evaluate_reachability(_wrap(""), "https://x.de/")
assert r["passed"] is False
assert r["severity"] == "HIGH"
def test_reopen_external_origin_is_medium(self):
html = _wrap(
'<a href="https://privacy.other.com/manage">'
'Cookie-Einstellungen</a>'
)
r = evaluate_reachability(html, "https://x.de/")
assert r["passed"] is False
assert r["severity"] == "MEDIUM"