feat: Add 76 Level-2 regex checks for document correctness verification
Split dsi_document_checker.py (466 LOC) into doc_checks/ package (9 files). Two-pass L1→L2 logic: L1 checks "Is it mentioned?", L2 checks "Is it correct?" (e.g. controller has full address, specific Art. 6 lit., concrete time periods). 138 total checks (62 L1 + 76 L2) across 7 doc types: - DSE Art. 13: 31, Impressum §5 TMG: 16, Cookie §25 TDDDG: 15 - Widerruf §355: 15, AGB §305ff: 21, Social Media Art. 26: 20, DSFA Art. 35: 18 Frontend: hierarchical L1→L2 display with dual progress bars (green=completeness, blue=correctness). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,185 @@
|
||||
"""
|
||||
Impressum checks — §5 TMG / §18 MStV.
|
||||
|
||||
Level 1: Pflichtangabe erwaehnt?
|
||||
Level 2: Pflichtangabe korrekt/vollstaendig?
|
||||
"""
|
||||
|
||||
IMPRESSUM_CHECKLIST = [
|
||||
# ── L1: Name des Anbieters ────────────────────────────────────────
|
||||
{
|
||||
"id": "name",
|
||||
"label": "Name des Anbieters",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:gmbh|ag|e\.v\.|ohg|kg|gbr|ug|mbh|inc|ltd)",
|
||||
r"firma", r"unternehmen",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
|
||||
# ── L1: Anschrift ─────────────────────────────────────────────────
|
||||
{
|
||||
"id": "address",
|
||||
"label": "Anschrift",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:str(?:asse|\.)|weg|platz|allee)\s*\d",
|
||||
r"d-\d{5}", r"\d{5}\s+\w+",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "address_zip_city",
|
||||
"label": "PLZ + Ort vorhanden",
|
||||
"level": 2, "parent": "address",
|
||||
"patterns": [
|
||||
r"\d{5}\s+[A-Z\u00c0-\u017e]\w{2,}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "address_street_number",
|
||||
"label": "Strasse + Hausnummer vorhanden",
|
||||
"level": 2, "parent": "address",
|
||||
"patterns": [
|
||||
r"[A-Z\u00c0-\u017e]\w+(?:str|stra(?:ss|ß)e|weg|platz|allee|gasse|ring|damm|ufer)\s*\.?\s*\d+",
|
||||
r"\w+\s+(?:str|stra(?:ss|ß)e|weg|platz|allee)\s*\.?\s*\d+",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Kontaktdaten ──────────────────────────────────────────────
|
||||
{
|
||||
"id": "contact",
|
||||
"label": "Kontaktdaten (E-Mail + Telefon)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:e-?mail|mail).*@", r"telefon|phone|tel\.",
|
||||
r"\+?\d[\d\s/\-]{8,}",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "contact_email_format",
|
||||
"label": "E-Mail-Adresse im korrekten Format",
|
||||
"level": 2, "parent": "contact",
|
||||
"patterns": [
|
||||
r"[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "contact_phone_format",
|
||||
"label": "Telefonnummer vorhanden",
|
||||
"level": 2, "parent": "contact",
|
||||
"patterns": [
|
||||
r"(?:tel(?:efon)?|phone|fon)\s*[.:]\s*[\+\d][\d\s/\-]{6,}",
|
||||
r"\+49\s*[\d\s/\-]{8,}",
|
||||
r"0\d{2,4}\s*[/\-\s]\s*\d{4,}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Handelsregister ───────────────────────────────────────────
|
||||
{
|
||||
"id": "register",
|
||||
"label": "Handelsregister / Registernummer",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:handelsregister|hrb|hra|registergericht|amtsgericht)",
|
||||
r"register.*(?:nr|nummer)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "register_court",
|
||||
"label": "Registergericht benannt (Amtsgericht X)",
|
||||
"level": 2, "parent": "register",
|
||||
"patterns": [
|
||||
r"(?:amtsgericht|registergericht)\s+[A-Z\u00c0-\u017e]\w+",
|
||||
r"ag\s+[A-Z\u00c0-\u017e]\w+",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "register_number",
|
||||
"label": "Registernummer (HRB/HRA + Nummer)",
|
||||
"level": 2, "parent": "register",
|
||||
"patterns": [
|
||||
r"(?:hrb|hra)\s*\d+",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: USt-IdNr ──────────────────────────────────────────────────
|
||||
{
|
||||
"id": "vat",
|
||||
"label": "USt-IdNr.",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"ust.*id", r"umsatzsteuer.*identifikation",
|
||||
r"vat.*id", r"de\s*\d{9}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "vat_de_format",
|
||||
"label": "USt-IdNr. im Format DE + 9 Ziffern",
|
||||
"level": 2, "parent": "vat",
|
||||
"patterns": [
|
||||
r"de\s*\d{9}",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Vertretungsberechtigte ────────────────────────────────────
|
||||
{
|
||||
"id": "representative",
|
||||
"label": "Vertretungsberechtigte",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"vertretungsberechtigt", r"gesch(?:ae|ä)ftsf(?:ue|ü)hr",
|
||||
r"vorstand", r"inhaber",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "representative_person",
|
||||
"label": "Name der vertretungsberechtigten Person",
|
||||
"level": 2, "parent": "representative",
|
||||
"patterns": [
|
||||
r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hr|vorstand|inhaber)\w*\s*:\s*[A-Z\u00c0-\u017e]",
|
||||
r"(?:vertreten\s+durch|repr(?:ae|ä)sentiert)\s*:?\s*[A-Z\u00c0-\u017e]",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── Neue L1: Redaktionell Verantwortlicher ────────────────────────
|
||||
{
|
||||
"id": "editorial_visdp",
|
||||
"label": "V.i.S.d.P. / Redaktionell Verantwortlicher (§18 MStV)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"v\.?\s*i\.?\s*s\.?\s*d\.?\s*p",
|
||||
r"(?:redaktionell|inhaltlich)\s+verantwortlich",
|
||||
r"§\s*18\s+m(?:edien)?st(?:aat)?v",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── Neue L1: Streitbeilegung ──────────────────────────────────────
|
||||
{
|
||||
"id": "dispute_resolution",
|
||||
"label": "Verbraucherstreitbeilegung / OS-Plattform",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"verbraucherstreitbeilegung|streitschlichtung",
|
||||
r"(?:os|odr)[\-\s]plattform",
|
||||
r"ec\.europa\.eu.*odr",
|
||||
r"vsbg|verbraucherstreitbeilegungsgesetz",
|
||||
r"alternative\s+streitbeilegung",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
]
|
||||
Reference in New Issue
Block a user