Files
breakpilot-compliance/backend-compliance/compliance/services/doc_checks/impressum_checks.py
T
Benjamin Admin b363c28539 feat: Add 76 Level-2 regex checks for document correctness verification
Split dsi_document_checker.py (466 LOC) into doc_checks/ package (9 files).
Two-pass L1→L2 logic: L1 checks "Is it mentioned?", L2 checks "Is it correct?"
(e.g. controller has full address, specific Art. 6 lit., concrete time periods).

138 total checks (62 L1 + 76 L2) across 7 doc types:
- DSE Art. 13: 31, Impressum §5 TMG: 16, Cookie §25 TDDDG: 15
- Widerruf §355: 15, AGB §305ff: 21, Social Media Art. 26: 20, DSFA Art. 35: 18

Frontend: hierarchical L1→L2 display with dual progress bars
(green=completeness, blue=correctness).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-07 12:37:03 +02:00

186 lines
6.2 KiB
Python

"""
Impressum checks — §5 TMG / §18 MStV.
Level 1: Pflichtangabe erwaehnt?
Level 2: Pflichtangabe korrekt/vollstaendig?
"""
IMPRESSUM_CHECKLIST = [
# ── L1: Name des Anbieters ────────────────────────────────────────
{
"id": "name",
"label": "Name des Anbieters",
"level": 1, "parent": None,
"patterns": [
r"(?:gmbh|ag|e\.v\.|ohg|kg|gbr|ug|mbh|inc|ltd)",
r"firma", r"unternehmen",
],
"severity": "HIGH",
},
# ── L1: Anschrift ─────────────────────────────────────────────────
{
"id": "address",
"label": "Anschrift",
"level": 1, "parent": None,
"patterns": [
r"(?:str(?:asse|\.)|weg|platz|allee)\s*\d",
r"d-\d{5}", r"\d{5}\s+\w+",
],
"severity": "HIGH",
},
{
"id": "address_zip_city",
"label": "PLZ + Ort vorhanden",
"level": 2, "parent": "address",
"patterns": [
r"\d{5}\s+[A-Z\u00c0-\u017e]\w{2,}",
],
"severity": "MEDIUM",
},
{
"id": "address_street_number",
"label": "Strasse + Hausnummer vorhanden",
"level": 2, "parent": "address",
"patterns": [
r"[A-Z\u00c0-\u017e]\w+(?:str|stra(?:ss|ß)e|weg|platz|allee|gasse|ring|damm|ufer)\s*\.?\s*\d+",
r"\w+\s+(?:str|stra(?:ss|ß)e|weg|platz|allee)\s*\.?\s*\d+",
],
"severity": "MEDIUM",
},
# ── L1: Kontaktdaten ──────────────────────────────────────────────
{
"id": "contact",
"label": "Kontaktdaten (E-Mail + Telefon)",
"level": 1, "parent": None,
"patterns": [
r"(?:e-?mail|mail).*@", r"telefon|phone|tel\.",
r"\+?\d[\d\s/\-]{8,}",
],
"severity": "HIGH",
},
{
"id": "contact_email_format",
"label": "E-Mail-Adresse im korrekten Format",
"level": 2, "parent": "contact",
"patterns": [
r"[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}",
],
"severity": "MEDIUM",
},
{
"id": "contact_phone_format",
"label": "Telefonnummer vorhanden",
"level": 2, "parent": "contact",
"patterns": [
r"(?:tel(?:efon)?|phone|fon)\s*[.:]\s*[\+\d][\d\s/\-]{6,}",
r"\+49\s*[\d\s/\-]{8,}",
r"0\d{2,4}\s*[/\-\s]\s*\d{4,}",
],
"severity": "MEDIUM",
},
# ── L1: Handelsregister ───────────────────────────────────────────
{
"id": "register",
"label": "Handelsregister / Registernummer",
"level": 1, "parent": None,
"patterns": [
r"(?:handelsregister|hrb|hra|registergericht|amtsgericht)",
r"register.*(?:nr|nummer)",
],
"severity": "MEDIUM",
},
{
"id": "register_court",
"label": "Registergericht benannt (Amtsgericht X)",
"level": 2, "parent": "register",
"patterns": [
r"(?:amtsgericht|registergericht)\s+[A-Z\u00c0-\u017e]\w+",
r"ag\s+[A-Z\u00c0-\u017e]\w+",
],
"severity": "LOW",
},
{
"id": "register_number",
"label": "Registernummer (HRB/HRA + Nummer)",
"level": 2, "parent": "register",
"patterns": [
r"(?:hrb|hra)\s*\d+",
],
"severity": "LOW",
},
# ── L1: USt-IdNr ──────────────────────────────────────────────────
{
"id": "vat",
"label": "USt-IdNr.",
"level": 1, "parent": None,
"patterns": [
r"ust.*id", r"umsatzsteuer.*identifikation",
r"vat.*id", r"de\s*\d{9}",
],
"severity": "MEDIUM",
},
{
"id": "vat_de_format",
"label": "USt-IdNr. im Format DE + 9 Ziffern",
"level": 2, "parent": "vat",
"patterns": [
r"de\s*\d{9}",
],
"severity": "LOW",
},
# ── L1: Vertretungsberechtigte ────────────────────────────────────
{
"id": "representative",
"label": "Vertretungsberechtigte",
"level": 1, "parent": None,
"patterns": [
r"vertretungsberechtigt", r"gesch(?:ae|ä)ftsf(?:ue|ü)hr",
r"vorstand", r"inhaber",
],
"severity": "MEDIUM",
},
{
"id": "representative_person",
"label": "Name der vertretungsberechtigten Person",
"level": 2, "parent": "representative",
"patterns": [
r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hr|vorstand|inhaber)\w*\s*:\s*[A-Z\u00c0-\u017e]",
r"(?:vertreten\s+durch|repr(?:ae|ä)sentiert)\s*:?\s*[A-Z\u00c0-\u017e]",
],
"severity": "LOW",
},
# ── Neue L1: Redaktionell Verantwortlicher ────────────────────────
{
"id": "editorial_visdp",
"label": "V.i.S.d.P. / Redaktionell Verantwortlicher (§18 MStV)",
"level": 1, "parent": None,
"patterns": [
r"v\.?\s*i\.?\s*s\.?\s*d\.?\s*p",
r"(?:redaktionell|inhaltlich)\s+verantwortlich",
r"§\s*18\s+m(?:edien)?st(?:aat)?v",
],
"severity": "LOW",
},
# ── Neue L1: Streitbeilegung ──────────────────────────────────────
{
"id": "dispute_resolution",
"label": "Verbraucherstreitbeilegung / OS-Plattform",
"level": 1, "parent": None,
"patterns": [
r"verbraucherstreitbeilegung|streitschlichtung",
r"(?:os|odr)[\-\s]plattform",
r"ec\.europa\.eu.*odr",
r"vsbg|verbraucherstreitbeilegungsgesetz",
r"alternative\s+streitbeilegung",
],
"severity": "LOW",
},
]