feat(compliance-check): integrate banner cross-check + extract to module
Build + Deploy / build-admin-compliance (push) Successful in 1m57s
Build + Deploy / build-backend-compliance (push) Successful in 3m20s
Build + Deploy / build-ai-sdk (push) Successful in 48s
Build + Deploy / build-developer-portal (push) Successful in 1m6s
Build + Deploy / build-tts (push) Successful in 1m43s
Build + Deploy / build-document-crawler (push) Successful in 44s
Build + Deploy / build-dsms-gateway (push) Successful in 31s
Build + Deploy / build-dsms-node (push) Successful in 18s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m40s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 47s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 28s
CI / test-python-dsms-gateway (push) Successful in 20s
CI / validate-canonical-controls (push) Successful in 14s
Build + Deploy / trigger-orca (push) Successful in 3m26s
Build + Deploy / build-admin-compliance (push) Successful in 1m57s
Build + Deploy / build-backend-compliance (push) Successful in 3m20s
Build + Deploy / build-ai-sdk (push) Successful in 48s
Build + Deploy / build-developer-portal (push) Successful in 1m6s
Build + Deploy / build-tts (push) Successful in 1m43s
Build + Deploy / build-document-crawler (push) Successful in 44s
Build + Deploy / build-dsms-gateway (push) Successful in 31s
Build + Deploy / build-dsms-node (push) Successful in 18s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m40s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 47s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 28s
CI / test-python-dsms-gateway (push) Successful in 20s
CI / validate-canonical-controls (push) Successful in 14s
Build + Deploy / trigger-orca (push) Successful in 3m26s
Add automatic banner check (Step 3b) and banner-vs-cookie cross-check (Step 3c) to unified compliance check. Extract cross-check logic to banner_cookie_cross_check.py to keep routes under 500 LOC. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -203,6 +203,44 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
results.append(result)
|
||||
total_findings += result.findings_count
|
||||
|
||||
# Step 3b: Banner-Check (automatic, uses first URL or homepage)
|
||||
banner_result = None
|
||||
banner_url = req.documents[0].url if req.documents and req.documents[0].url else ""
|
||||
# Use the homepage (strip path) for banner check
|
||||
if banner_url:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(banner_url)
|
||||
banner_url = f"{parsed.scheme}://{parsed.netloc}"
|
||||
if banner_url:
|
||||
_update(check_id, "Cookie-Banner wird geprueft...")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(
|
||||
f"{CONSENT_TESTER_URL}/scan",
|
||||
json={"url": banner_url, "timeout_per_phase": 10},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
banner_result = resp.json()
|
||||
except Exception as e:
|
||||
logger.warning("Banner check failed: %s", e)
|
||||
|
||||
# Step 3c: Cross-check Banner vs Cookie-Richtlinie
|
||||
if banner_result and "cookie" in doc_texts:
|
||||
_update(check_id, "Banner vs. Cookie-Richtlinie abgleichen...")
|
||||
cross_findings = _cross_check_banner_vs_cookie(
|
||||
banner_result, doc_texts["cookie"],
|
||||
)
|
||||
if cross_findings:
|
||||
# Add cross-check findings to cookie results
|
||||
for r in results:
|
||||
if r.doc_type == "cookie":
|
||||
for cf in cross_findings:
|
||||
r.checks.append(CheckItem(**cf))
|
||||
# Recompute
|
||||
l2 = [c for c in r.checks if c.level == 2 and not c.skipped]
|
||||
l2p = sum(1 for c in l2 if c.passed)
|
||||
r.correctness_pct = round(l2p / len(l2) * 100) if l2 else 0
|
||||
|
||||
# Step 4: Build report
|
||||
_update(check_id, "Report wird erstellt...")
|
||||
report_html = build_html_report(results, None)
|
||||
@@ -223,6 +261,11 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
response = {
|
||||
"results": [_result_to_dict(r) for r in results],
|
||||
"business_profile": profile_dict,
|
||||
"banner_result": {
|
||||
"detected": banner_result.get("banner_detected", False) if banner_result else False,
|
||||
"provider": banner_result.get("banner_provider", "") if banner_result else "",
|
||||
"violations": len(banner_result.get("banner_checks", {}).get("violations", [])) if banner_result else 0,
|
||||
} if banner_result else None,
|
||||
"total_documents": len(results),
|
||||
"total_findings": total_findings,
|
||||
"email_status": email_result.get("status", "failed"),
|
||||
@@ -437,3 +480,7 @@ def _build_profile_html(profile) -> str:
|
||||
f'<td>{int(profile.confidence * 100)}%</td></tr>'
|
||||
'</table></div>'
|
||||
)
|
||||
|
||||
|
||||
# Cross-check extracted to compliance.services.banner_cookie_cross_check
|
||||
from compliance.services.banner_cookie_cross_check import cross_check_banner_vs_cookie as _cross_check_banner_vs_cookie
|
||||
|
||||
@@ -0,0 +1,145 @@
|
||||
"""
|
||||
Banner ↔ Cookie-Richtlinie Cross-Check.
|
||||
|
||||
Compares what the cookie banner actually does (Playwright scan)
|
||||
with what the cookie policy text promises. Finds mismatches:
|
||||
- Services used but not documented
|
||||
- Tracking before consent despite claiming consent-only
|
||||
- False "no tracking" claims contradicted by actual cookies
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def cross_check_banner_vs_cookie(
|
||||
banner_result: dict,
|
||||
cookie_text: str,
|
||||
) -> list[dict]:
|
||||
"""Cross-check: Does the banner behavior match the cookie policy text?
|
||||
|
||||
Returns list of CheckItem-compatible dicts for mismatches.
|
||||
"""
|
||||
findings: list[dict] = []
|
||||
cookie_lower = cookie_text.lower()
|
||||
|
||||
phases = banner_result.get("phases", {})
|
||||
before = phases.get("before_consent", {})
|
||||
after_accept = phases.get("after_accept", {})
|
||||
|
||||
# 1. Services found by banner but not mentioned in cookie policy
|
||||
tracking_before = before.get("tracking_services", [])
|
||||
all_tracking = set(tracking_before)
|
||||
for s in after_accept.get("tracking_services", []):
|
||||
if isinstance(s, str):
|
||||
all_tracking.add(s)
|
||||
|
||||
for service in all_tracking:
|
||||
service_lower = service.lower()
|
||||
mentioned = any(kw in cookie_lower for kw in [
|
||||
service_lower,
|
||||
service_lower.replace(" ", ""),
|
||||
service_lower.split()[0] if " " in service_lower else service_lower,
|
||||
])
|
||||
if not mentioned:
|
||||
findings.append({
|
||||
"id": f"cross-{service_lower.replace(' ', '_')[:20]}",
|
||||
"label": f"Dienst '{service}' fehlt in Cookie-Richtlinie",
|
||||
"passed": False,
|
||||
"severity": "HIGH",
|
||||
"level": 2,
|
||||
"parent": None,
|
||||
"skipped": False,
|
||||
"matched_text": "",
|
||||
"hint": (
|
||||
f"Der Banner-Scan hat '{service}' auf der Website erkannt, "
|
||||
f"aber die Cookie-Richtlinie erwaehnt diesen Dienst nicht. "
|
||||
f"Gemaess Art. 13(1)(e) DSGVO muessen alle Empfaenger/"
|
||||
f"Dienste in der DSI benannt werden."
|
||||
),
|
||||
"source": "cross_check",
|
||||
})
|
||||
|
||||
# 2. Tracking before consent but cookie policy says "nur mit Einwilligung"
|
||||
if tracking_before:
|
||||
consent_words = re.findall(
|
||||
r"(?:einwilligung|consent|zustimmung).*?"
|
||||
r"(?:erforderlich|notwendig|vorher|vorab)",
|
||||
cookie_lower,
|
||||
)
|
||||
if consent_words:
|
||||
findings.append({
|
||||
"id": "cross-tracking-before-consent",
|
||||
"label": "Tracking vor Consent trotz Einwilligungspflicht",
|
||||
"passed": False,
|
||||
"severity": "CRITICAL",
|
||||
"level": 2,
|
||||
"parent": None,
|
||||
"skipped": False,
|
||||
"matched_text": ", ".join(tracking_before[:3]),
|
||||
"hint": (
|
||||
f"Die Cookie-Richtlinie spricht von Einwilligungspflicht, "
|
||||
f"aber der Banner-Scan hat {len(tracking_before)} Tracking-"
|
||||
f"Dienst(e) VOR Consent erkannt: {', '.join(tracking_before[:3])}. "
|
||||
f"Dies ist ein Verstoss gegen §25 Abs. 1 TDDDG."
|
||||
),
|
||||
"source": "cross_check",
|
||||
})
|
||||
|
||||
# 3. Banner sets tracking cookies but policy claims "no tracking"
|
||||
accept_cookies = after_accept.get("cookies", [])
|
||||
tracking_prefixes = ("_ga", "_gid", "_fbp", "_fbc", "IDE", "_gcl",
|
||||
"fr", "_pin", "_tt_", "li_sugr", "_hj")
|
||||
tracking_cookies = [c for c in accept_cookies
|
||||
if any(c.startswith(p) for p in tracking_prefixes)]
|
||||
if tracking_cookies:
|
||||
no_tracking_claim = any(kw in cookie_lower for kw in [
|
||||
"keine tracking", "kein tracking", "keine marketing",
|
||||
"keine werbe", "no tracking",
|
||||
])
|
||||
if no_tracking_claim:
|
||||
findings.append({
|
||||
"id": "cross-false-no-tracking",
|
||||
"label": "Cookie-Richtlinie behauptet 'kein Tracking' — "
|
||||
"Banner widerspricht",
|
||||
"passed": False,
|
||||
"severity": "CRITICAL",
|
||||
"level": 2,
|
||||
"parent": None,
|
||||
"skipped": False,
|
||||
"matched_text": ", ".join(tracking_cookies[:5]),
|
||||
"hint": (
|
||||
f"Die Cookie-Richtlinie behauptet kein Tracking, aber "
|
||||
f"nach Akzeptieren wurden Tracking-Cookies gesetzt: "
|
||||
f"{', '.join(tracking_cookies[:5])}. "
|
||||
f"Transparenzverstoss nach Art. 5(1)(a) DSGVO."
|
||||
),
|
||||
"source": "cross_check",
|
||||
})
|
||||
|
||||
# 4. Banner detected but no cookie policy provided
|
||||
banner_detected = banner_result.get("banner_detected", False)
|
||||
if banner_detected and len(cookie_lower.strip()) < 100:
|
||||
findings.append({
|
||||
"id": "cross-banner-no-policy",
|
||||
"label": "Cookie-Banner vorhanden aber keine Cookie-Richtlinie",
|
||||
"passed": False,
|
||||
"severity": "HIGH",
|
||||
"level": 2,
|
||||
"parent": None,
|
||||
"skipped": False,
|
||||
"matched_text": "",
|
||||
"hint": (
|
||||
"Die Website zeigt ein Cookie-Banner, aber es wurde keine "
|
||||
"Cookie-Richtlinie eingereicht. Eine separate Cookie-Richtlinie "
|
||||
"oder ein Cookie-Abschnitt in der DSI ist nach §25 TDDDG "
|
||||
"erforderlich wenn nicht-essentielle Cookies verwendet werden."
|
||||
),
|
||||
"source": "cross_check",
|
||||
})
|
||||
|
||||
logger.info("Cross-check: %d findings (%d services, %d tracking before)",
|
||||
len(findings), len(all_tracking), len(tracking_before))
|
||||
return findings
|
||||
Reference in New Issue
Block a user