0f3ec9061e
1. Intake prompt: "BETREIBER verarbeitet" statt "Text erwaehnt".
IHK berichtet ueber Gesundheitsdaten → false. Vorher: true.
2. §312k Check: nur bei E-Commerce/Abo-Websites (Warenkorb, Shop, PayPal etc.)
IHK hat keine Vertraege → kein Kuendigungsbutton noetig.
3. docs-src/ restored from commit 9824304
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
149 lines
5.8 KiB
Python
149 lines
5.8 KiB
Python
"""
|
|
Website Compliance Checks — checks public website for consumer protection
|
|
compliance (§312k BGB, §5 TMG, Art. 13 DSGVO, Cookie-Banner).
|
|
|
|
Extracted from agent_analyze_routes.py to keep route files slim.
|
|
"""
|
|
|
|
import re
|
|
|
|
import httpx
|
|
|
|
|
|
class FollowUpQuestion:
|
|
def __init__(self, id: str, question: str, legal_basis: str, severity: str, finding_if_no: str):
|
|
self.id = id
|
|
self.question = question
|
|
self.legal_basis = legal_basis
|
|
self.severity = severity
|
|
self.finding_if_no = finding_if_no
|
|
|
|
|
|
async def check_website_compliance(
|
|
client: httpx.AsyncClient, url: str, html: str,
|
|
) -> tuple[list[str], list[FollowUpQuestion]]:
|
|
"""Scan public website for consumer protection compliance."""
|
|
findings: list[str] = []
|
|
follow_ups: list[FollowUpQuestion] = []
|
|
html_lower = html.lower()
|
|
base_domain = re.sub(r"https?://([^/]+).*", r"\1", url)
|
|
|
|
# E-Commerce detection — §312k only applies to sites with online contracts
|
|
ecommerce_indicators = [
|
|
r"warenkorb", r"cart", r"shop", r"bestell", r"order",
|
|
r"checkout", r"kasse", r"kaufen", r"add.?to.?cart",
|
|
r"stripe|paypal|klarna|mollie|adyen",
|
|
r"abo", r"mitgliedschaft", r"subscription", r"premium",
|
|
]
|
|
is_ecommerce = any(re.search(p, html_lower) for p in ecommerce_indicators)
|
|
|
|
# --- §312k BGB: Kündigungsbutton (NUR bei E-Commerce/Abo-Websites) ---
|
|
cancel_patterns = [
|
|
r'href="[^"]*(?:kuendig|kündig|cancel|vertrag.?beenden|abo.?beenden|mitgliedschaft.?beenden)[^"]*"',
|
|
r'(?:kündigen|kuendigen|vertrag beenden|abo beenden|mitgliedschaft kündigen)',
|
|
]
|
|
has_cancel_link = any(re.search(p, html_lower) for p in cancel_patterns)
|
|
|
|
cancel_urls_to_probe = [
|
|
f"https://{base_domain}/kuendigen",
|
|
f"https://{base_domain}/cancel",
|
|
f"https://{base_domain}/vertrag-kuendigen",
|
|
f"https://{base_domain}/abo-kuendigen",
|
|
f"https://{base_domain}/account/cancel",
|
|
]
|
|
if not has_cancel_link:
|
|
for probe_url in cancel_urls_to_probe:
|
|
try:
|
|
probe = await client.head(probe_url, follow_redirects=True, timeout=5.0)
|
|
if probe.status_code < 400:
|
|
has_cancel_link = True
|
|
break
|
|
except Exception:
|
|
continue
|
|
|
|
if not has_cancel_link and is_ecommerce:
|
|
findings.append(
|
|
"[§312k BGB] Kein oeffentlich sichtbarer Kuendigungsbutton gefunden. "
|
|
"Seit 01.07.2022 muessen online geschlossene Vertraege mit max. 2 Klicks kuendbar sein."
|
|
)
|
|
follow_ups.append(FollowUpQuestion(
|
|
id="cancel_button_312k",
|
|
question="Koennen Sie nach Login im Kundenbereich innerhalb von 2 Klicks Ihren Vertrag kuendigen?",
|
|
legal_basis="§ 312k BGB (Kuendigungsbutton), Omnibus-Richtlinie (EU) 2019/2161",
|
|
severity="high",
|
|
finding_if_no=(
|
|
"[§312k BGB] VERSTOSS: Kein funktionaler Kuendigungsbutton vorhanden. "
|
|
"Der Anbieter ist verpflichtet, einen leicht auffindbaren Kuendigungsbutton "
|
|
"bereitzustellen (max. 2 Klicks). Ein Zwang zur telefonischen Kuendigung "
|
|
"oder Kuendigung per Brief ist rechtswidrig."
|
|
),
|
|
))
|
|
|
|
# --- Impressumspflicht (§5 TMG / §18 MStV) ---
|
|
imprint_patterns = [
|
|
r'href="[^"]*(?:impressum|imprint|legal.?notice|about.?us/legal)[^"]*"',
|
|
r'>impressum<',
|
|
]
|
|
has_imprint = any(re.search(p, html_lower) for p in imprint_patterns)
|
|
if not has_imprint:
|
|
findings.append(
|
|
"[§5 TMG] Kein Impressum-Link auf der Seite gefunden. "
|
|
"Geschaeftsmaessige Online-Dienste muessen ein leicht erreichbares Impressum bereitstellen."
|
|
)
|
|
|
|
# --- Datenschutzerklaerung verlinkt? ---
|
|
privacy_patterns = [
|
|
r'href="[^"]*(?:datenschutz|privacy|dsgvo)[^"]*"',
|
|
r'>datenschutz<',
|
|
]
|
|
has_privacy = any(re.search(p, html_lower) for p in privacy_patterns)
|
|
if not has_privacy:
|
|
findings.append(
|
|
"[Art. 13 DSGVO] Kein Link zur Datenschutzerklaerung gefunden. "
|
|
"Nutzer muessen ueber die Verarbeitung personenbezogener Daten informiert werden."
|
|
)
|
|
|
|
# --- Cookie-Consent-Banner ---
|
|
cookie_patterns = [
|
|
r'(?:cookie.?consent|cookie.?banner|consent.?manager|didomi|cookiebot|onetrust|usercentrics)',
|
|
r'(?:gdpr|dsgvo).?(?:consent|einwilligung)',
|
|
]
|
|
has_cookie_consent = any(re.search(p, html_lower) for p in cookie_patterns)
|
|
if not has_cookie_consent:
|
|
follow_ups.append(FollowUpQuestion(
|
|
id="cookie_consent",
|
|
question="Wird beim ersten Besuch der Website ein Cookie-Consent-Banner angezeigt?",
|
|
legal_basis="§ 25 TDDDG (ehem. TTDSG), Art. 5(3) ePrivacy-Richtlinie",
|
|
severity="medium",
|
|
finding_if_no=(
|
|
"[§25 TDDDG] Kein Cookie-Consent-Banner erkannt. "
|
|
"Vor dem Setzen nicht-essentieller Cookies ist eine Einwilligung erforderlich."
|
|
),
|
|
))
|
|
|
|
return findings, follow_ups
|
|
|
|
|
|
def to_string_list(items: list) -> list[str]:
|
|
"""Convert list of dicts or strings to list of strings."""
|
|
result = []
|
|
for item in (items or []):
|
|
if isinstance(item, dict):
|
|
desc = item.get("description", item.get("name", item.get("code", str(item))))
|
|
code = item.get("code", item.get("id", ""))
|
|
result.append(f"[{code}] {desc}" if code else str(desc))
|
|
else:
|
|
result.append(str(item))
|
|
return result
|
|
|
|
|
|
def risk_to_escalation(risk_level: str) -> str:
|
|
"""Map UCCA risk level to escalation level."""
|
|
mapping = {
|
|
"MINIMAL": "E0",
|
|
"LIMITED": "E1",
|
|
"HIGH": "E2",
|
|
"UNACCEPTABLE": "E3",
|
|
}
|
|
return mapping.get(risk_level.upper() if risk_level else "", "E0")
|