diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py index c47f955..e484724 100644 --- a/backend-compliance/compliance/api/agent_compliance_check_routes.py +++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py @@ -203,6 +203,44 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): results.append(result) total_findings += result.findings_count + # Step 3b: Banner-Check (automatic, uses first URL or homepage) + banner_result = None + banner_url = req.documents[0].url if req.documents and req.documents[0].url else "" + # Use the homepage (strip path) for banner check + if banner_url: + from urllib.parse import urlparse + parsed = urlparse(banner_url) + banner_url = f"{parsed.scheme}://{parsed.netloc}" + if banner_url: + _update(check_id, "Cookie-Banner wird geprueft...") + try: + async with httpx.AsyncClient(timeout=120.0) as client: + resp = await client.post( + f"{CONSENT_TESTER_URL}/scan", + json={"url": banner_url, "timeout_per_phase": 10}, + ) + if resp.status_code == 200: + banner_result = resp.json() + except Exception as e: + logger.warning("Banner check failed: %s", e) + + # Step 3c: Cross-check Banner vs Cookie-Richtlinie + if banner_result and "cookie" in doc_texts: + _update(check_id, "Banner vs. Cookie-Richtlinie abgleichen...") + cross_findings = _cross_check_banner_vs_cookie( + banner_result, doc_texts["cookie"], + ) + if cross_findings: + # Add cross-check findings to cookie results + for r in results: + if r.doc_type == "cookie": + for cf in cross_findings: + r.checks.append(CheckItem(**cf)) + # Recompute + l2 = [c for c in r.checks if c.level == 2 and not c.skipped] + l2p = sum(1 for c in l2 if c.passed) + r.correctness_pct = round(l2p / len(l2) * 100) if l2 else 0 + # Step 4: Build report _update(check_id, "Report wird erstellt...") report_html = build_html_report(results, None) @@ -223,6 +261,11 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): response = { "results": [_result_to_dict(r) for r in results], "business_profile": profile_dict, + "banner_result": { + "detected": banner_result.get("banner_detected", False) if banner_result else False, + "provider": banner_result.get("banner_provider", "") if banner_result else "", + "violations": len(banner_result.get("banner_checks", {}).get("violations", [])) if banner_result else 0, + } if banner_result else None, "total_documents": len(results), "total_findings": total_findings, "email_status": email_result.get("status", "failed"), @@ -437,3 +480,7 @@ def _build_profile_html(profile) -> str: f'{int(profile.confidence * 100)}%' '' ) + + +# Cross-check extracted to compliance.services.banner_cookie_cross_check +from compliance.services.banner_cookie_cross_check import cross_check_banner_vs_cookie as _cross_check_banner_vs_cookie diff --git a/backend-compliance/compliance/services/banner_cookie_cross_check.py b/backend-compliance/compliance/services/banner_cookie_cross_check.py new file mode 100644 index 0000000..30f45a1 --- /dev/null +++ b/backend-compliance/compliance/services/banner_cookie_cross_check.py @@ -0,0 +1,145 @@ +""" +Banner ↔ Cookie-Richtlinie Cross-Check. + +Compares what the cookie banner actually does (Playwright scan) +with what the cookie policy text promises. Finds mismatches: +- Services used but not documented +- Tracking before consent despite claiming consent-only +- False "no tracking" claims contradicted by actual cookies +""" + +import logging +import re + +logger = logging.getLogger(__name__) + + +def cross_check_banner_vs_cookie( + banner_result: dict, + cookie_text: str, +) -> list[dict]: + """Cross-check: Does the banner behavior match the cookie policy text? + + Returns list of CheckItem-compatible dicts for mismatches. + """ + findings: list[dict] = [] + cookie_lower = cookie_text.lower() + + phases = banner_result.get("phases", {}) + before = phases.get("before_consent", {}) + after_accept = phases.get("after_accept", {}) + + # 1. Services found by banner but not mentioned in cookie policy + tracking_before = before.get("tracking_services", []) + all_tracking = set(tracking_before) + for s in after_accept.get("tracking_services", []): + if isinstance(s, str): + all_tracking.add(s) + + for service in all_tracking: + service_lower = service.lower() + mentioned = any(kw in cookie_lower for kw in [ + service_lower, + service_lower.replace(" ", ""), + service_lower.split()[0] if " " in service_lower else service_lower, + ]) + if not mentioned: + findings.append({ + "id": f"cross-{service_lower.replace(' ', '_')[:20]}", + "label": f"Dienst '{service}' fehlt in Cookie-Richtlinie", + "passed": False, + "severity": "HIGH", + "level": 2, + "parent": None, + "skipped": False, + "matched_text": "", + "hint": ( + f"Der Banner-Scan hat '{service}' auf der Website erkannt, " + f"aber die Cookie-Richtlinie erwaehnt diesen Dienst nicht. " + f"Gemaess Art. 13(1)(e) DSGVO muessen alle Empfaenger/" + f"Dienste in der DSI benannt werden." + ), + "source": "cross_check", + }) + + # 2. Tracking before consent but cookie policy says "nur mit Einwilligung" + if tracking_before: + consent_words = re.findall( + r"(?:einwilligung|consent|zustimmung).*?" + r"(?:erforderlich|notwendig|vorher|vorab)", + cookie_lower, + ) + if consent_words: + findings.append({ + "id": "cross-tracking-before-consent", + "label": "Tracking vor Consent trotz Einwilligungspflicht", + "passed": False, + "severity": "CRITICAL", + "level": 2, + "parent": None, + "skipped": False, + "matched_text": ", ".join(tracking_before[:3]), + "hint": ( + f"Die Cookie-Richtlinie spricht von Einwilligungspflicht, " + f"aber der Banner-Scan hat {len(tracking_before)} Tracking-" + f"Dienst(e) VOR Consent erkannt: {', '.join(tracking_before[:3])}. " + f"Dies ist ein Verstoss gegen §25 Abs. 1 TDDDG." + ), + "source": "cross_check", + }) + + # 3. Banner sets tracking cookies but policy claims "no tracking" + accept_cookies = after_accept.get("cookies", []) + tracking_prefixes = ("_ga", "_gid", "_fbp", "_fbc", "IDE", "_gcl", + "fr", "_pin", "_tt_", "li_sugr", "_hj") + tracking_cookies = [c for c in accept_cookies + if any(c.startswith(p) for p in tracking_prefixes)] + if tracking_cookies: + no_tracking_claim = any(kw in cookie_lower for kw in [ + "keine tracking", "kein tracking", "keine marketing", + "keine werbe", "no tracking", + ]) + if no_tracking_claim: + findings.append({ + "id": "cross-false-no-tracking", + "label": "Cookie-Richtlinie behauptet 'kein Tracking' — " + "Banner widerspricht", + "passed": False, + "severity": "CRITICAL", + "level": 2, + "parent": None, + "skipped": False, + "matched_text": ", ".join(tracking_cookies[:5]), + "hint": ( + f"Die Cookie-Richtlinie behauptet kein Tracking, aber " + f"nach Akzeptieren wurden Tracking-Cookies gesetzt: " + f"{', '.join(tracking_cookies[:5])}. " + f"Transparenzverstoss nach Art. 5(1)(a) DSGVO." + ), + "source": "cross_check", + }) + + # 4. Banner detected but no cookie policy provided + banner_detected = banner_result.get("banner_detected", False) + if banner_detected and len(cookie_lower.strip()) < 100: + findings.append({ + "id": "cross-banner-no-policy", + "label": "Cookie-Banner vorhanden aber keine Cookie-Richtlinie", + "passed": False, + "severity": "HIGH", + "level": 2, + "parent": None, + "skipped": False, + "matched_text": "", + "hint": ( + "Die Website zeigt ein Cookie-Banner, aber es wurde keine " + "Cookie-Richtlinie eingereicht. Eine separate Cookie-Richtlinie " + "oder ein Cookie-Abschnitt in der DSI ist nach §25 TDDDG " + "erforderlich wenn nicht-essentielle Cookies verwendet werden." + ), + "source": "cross_check", + }) + + logger.info("Cross-check: %d findings (%d services, %d tracking before)", + len(findings), len(all_tracking), len(tracking_before)) + return findings