feat: Add 76 Level-2 regex checks for document correctness verification
Split dsi_document_checker.py (466 LOC) into doc_checks/ package (9 files). Two-pass L1→L2 logic: L1 checks "Is it mentioned?", L2 checks "Is it correct?" (e.g. controller has full address, specific Art. 6 lit., concrete time periods). 138 total checks (62 L1 + 76 L2) across 7 doc types: - DSE Art. 13: 31, Impressum §5 TMG: 16, Cookie §25 TDDDG: 15 - Widerruf §355: 15, AGB §305ff: 21, Social Media Art. 26: 20, DSFA Art. 35: 18 Frontend: hierarchical L1→L2 display with dual progress bars (green=completeness, blue=correctness). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,9 @@ interface CheckItem {
|
||||
passed: boolean
|
||||
severity: string
|
||||
matched_text: string
|
||||
level?: number
|
||||
parent?: string | null
|
||||
skipped?: boolean
|
||||
}
|
||||
|
||||
interface DocResult {
|
||||
@@ -16,6 +19,7 @@ interface DocResult {
|
||||
doc_type: string
|
||||
word_count: number
|
||||
completeness_pct: number
|
||||
correctness_pct?: number
|
||||
checks: CheckItem[]
|
||||
findings_count: number
|
||||
error: string
|
||||
@@ -27,13 +31,69 @@ const DOC_TYPE_LABELS: Record<string, string> = {
|
||||
social_media: 'Social Media', dsfa: 'DSFA', joint_controller: 'Art. 26',
|
||||
}
|
||||
|
||||
interface GroupedCheck {
|
||||
check: CheckItem
|
||||
children: CheckItem[]
|
||||
}
|
||||
|
||||
function groupChecks(checks: CheckItem[]): GroupedCheck[] {
|
||||
const l1 = checks.filter(c => (c.level ?? 1) === 1)
|
||||
return l1.map(c => ({
|
||||
check: c,
|
||||
children: checks.filter(ch => ch.parent === c.id && (ch.level ?? 1) === 2),
|
||||
}))
|
||||
}
|
||||
|
||||
function CheckIcon({ passed, skipped }: { passed: boolean; skipped?: boolean }) {
|
||||
if (skipped) {
|
||||
return (
|
||||
<svg className="w-4 h-4 text-gray-300 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M20 12H4" />
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
if (passed) {
|
||||
return (
|
||||
<svg className="w-4 h-4 text-green-500 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
return (
|
||||
<svg className="w-4 h-4 text-red-500 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
function L2Summary({ children }: { children: CheckItem[] }) {
|
||||
const active = children.filter(c => !c.skipped)
|
||||
if (active.length === 0) return null
|
||||
const passed = active.filter(c => c.passed).length
|
||||
return (
|
||||
<span className="text-xs text-gray-400 ml-1">
|
||||
({passed}/{active.length})
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
export function ChecklistView({ results }: { results: DocResult[] }) {
|
||||
const [expanded, setExpanded] = useState<number | null>(null)
|
||||
const [expandedL1, setExpandedL1] = useState<Set<string>>(new Set())
|
||||
|
||||
if (!results || results.length === 0) return null
|
||||
|
||||
const totalOk = results.filter(r => r.completeness_pct === 100).length
|
||||
|
||||
const toggleL1 = (id: string) => {
|
||||
setExpandedL1(prev => {
|
||||
const next = new Set(prev)
|
||||
if (next.has(id)) next.delete(id)
|
||||
else next.add(id)
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<div className="flex items-center justify-between">
|
||||
@@ -46,8 +106,15 @@ export function ChecklistView({ results }: { results: DocResult[] }) {
|
||||
{results.map((r, i) => {
|
||||
const isExp = expanded === i
|
||||
const pct = r.completeness_pct
|
||||
const cpct = r.correctness_pct ?? 0
|
||||
const barColor = pct === 100 ? 'bg-green-500' : pct >= 80 ? 'bg-green-400' : pct >= 50 ? 'bg-yellow-500' : 'bg-red-500'
|
||||
const cBarColor = cpct >= 80 ? 'bg-blue-400' : cpct >= 50 ? 'bg-blue-300' : 'bg-blue-200'
|
||||
const typeLabel = DOC_TYPE_LABELS[r.doc_type] || r.doc_type
|
||||
const grouped = groupChecks(r.checks)
|
||||
const l1Checks = r.checks.filter(c => (c.level ?? 1) === 1)
|
||||
const l2Active = r.checks.filter(c => (c.level ?? 1) === 2 && !c.skipped)
|
||||
const l1Passed = l1Checks.filter(c => c.passed).length
|
||||
const l2Passed = l2Active.filter(c => c.passed).length
|
||||
|
||||
return (
|
||||
<div key={i} className="border border-gray-200 rounded-lg overflow-hidden">
|
||||
@@ -66,8 +133,9 @@ export function ChecklistView({ results }: { results: DocResult[] }) {
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="text-sm font-medium text-gray-900 truncate">{r.label}</div>
|
||||
<div className="text-xs text-gray-500 truncate">
|
||||
{r.checks.length > 0
|
||||
? `${r.checks.filter(c => c.passed).length} von ${r.checks.length} Pruefpunkten bestanden`
|
||||
{l1Checks.length > 0
|
||||
? `${l1Passed}/${l1Checks.length} Pflichtangaben`
|
||||
+ (l2Active.length > 0 ? `, ${l2Passed}/${l2Active.length} Detailpruefungen` : '')
|
||||
: r.url}
|
||||
</div>
|
||||
</div>
|
||||
@@ -76,14 +144,24 @@ export function ChecklistView({ results }: { results: DocResult[] }) {
|
||||
{r.error ? (
|
||||
<span className="text-xs text-red-600 font-medium">Fehler</span>
|
||||
) : (
|
||||
<>
|
||||
<div className="w-16 h-2 bg-gray-200 rounded-full overflow-hidden">
|
||||
<div className="flex flex-col gap-1">
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="w-16 h-1.5 bg-gray-200 rounded-full overflow-hidden">
|
||||
<div className={`h-full rounded-full ${barColor}`} style={{ width: `${pct}%` }} />
|
||||
</div>
|
||||
<span className={`text-xs font-medium w-10 text-right ${
|
||||
pct === 100 ? 'text-green-700' : pct >= 50 ? 'text-yellow-700' : 'text-red-700'
|
||||
}`}>{pct}%</span>
|
||||
</>
|
||||
</div>
|
||||
{l2Active.length > 0 && (
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="w-16 h-1.5 bg-gray-200 rounded-full overflow-hidden">
|
||||
<div className={`h-full rounded-full ${cBarColor}`} style={{ width: `${cpct}%` }} />
|
||||
</div>
|
||||
<span className="text-xs font-medium w-10 text-right text-blue-600">{cpct}%</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</button>
|
||||
@@ -93,30 +171,65 @@ export function ChecklistView({ results }: { results: DocResult[] }) {
|
||||
{r.error ? (
|
||||
<p className="text-sm text-red-600">{r.error}</p>
|
||||
) : (
|
||||
<div className="space-y-1.5">
|
||||
{r.checks.map((check, ci) => (
|
||||
<div key={ci} className="flex items-start gap-2">
|
||||
{check.passed ? (
|
||||
<svg className="w-4 h-4 text-green-500 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4 text-red-500 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
<div className="space-y-1">
|
||||
{grouped.map((g) => {
|
||||
const hasChildren = g.children.length > 0
|
||||
const isL1Exp = expandedL1.has(g.check.id)
|
||||
|
||||
return (
|
||||
<div key={g.check.id}>
|
||||
{/* L1 check */}
|
||||
<div
|
||||
className={`flex items-start gap-2 ${hasChildren ? 'cursor-pointer' : ''}`}
|
||||
onClick={hasChildren ? () => toggleL1(g.check.id) : undefined}
|
||||
>
|
||||
<CheckIcon passed={g.check.passed} />
|
||||
<div className="flex-1">
|
||||
<div className={`text-sm ${g.check.passed ? 'text-gray-700' : 'text-red-700 font-medium'}`}>
|
||||
{g.check.label}
|
||||
{hasChildren && <L2Summary>{g.children}</L2Summary>}
|
||||
{hasChildren && (
|
||||
<svg className={`w-3 h-3 inline ml-1 text-gray-400 transition-transform ${isL1Exp ? 'rotate-90' : ''}`}
|
||||
fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||
</svg>
|
||||
)}
|
||||
<div className="flex-1">
|
||||
<div className={`text-sm ${check.passed ? 'text-gray-700' : 'text-red-700 font-medium'}`}>
|
||||
{check.label}
|
||||
</div>
|
||||
{check.passed && check.matched_text && (
|
||||
{g.check.passed && g.check.matched_text && !hasChildren && (
|
||||
<div className="text-xs text-gray-400 mt-0.5 font-mono truncate">
|
||||
"...{check.matched_text}..."
|
||||
"...{g.check.matched_text}..."
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* L2 children */}
|
||||
{hasChildren && isL1Exp && (
|
||||
<div className="ml-6 mt-1 space-y-1 border-l-2 border-gray-200 pl-3">
|
||||
{g.children.map((ch) => (
|
||||
<div key={ch.id} className="flex items-start gap-2">
|
||||
<CheckIcon passed={ch.passed} skipped={ch.skipped} />
|
||||
<div className="flex-1">
|
||||
<div className={`text-xs ${
|
||||
ch.skipped ? 'text-gray-400 italic'
|
||||
: ch.passed ? 'text-gray-600' : 'text-red-600 font-medium'
|
||||
}`}>
|
||||
{ch.label}
|
||||
{ch.skipped && ' (uebersprungen)'}
|
||||
</div>
|
||||
{ch.passed && ch.matched_text && (
|
||||
<div className="text-xs text-gray-400 mt-0.5 font-mono truncate">
|
||||
"...{ch.matched_text}..."
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
{r.word_count > 0 && (
|
||||
<div className="text-xs text-gray-400 mt-2 pt-2 border-t border-gray-200">
|
||||
{r.word_count} Woerter analysiert
|
||||
|
||||
@@ -48,6 +48,9 @@ class CheckItem(BaseModel):
|
||||
passed: bool
|
||||
severity: str
|
||||
matched_text: str = ""
|
||||
level: int = 1
|
||||
parent: str | None = None
|
||||
skipped: bool = False
|
||||
|
||||
|
||||
class DocCheckResult(BaseModel):
|
||||
@@ -56,6 +59,7 @@ class DocCheckResult(BaseModel):
|
||||
doc_type: str
|
||||
word_count: int = 0
|
||||
completeness_pct: int = 0
|
||||
correctness_pct: int = 0
|
||||
checks: list[CheckItem] = []
|
||||
findings_count: int = 0
|
||||
error: str = ""
|
||||
@@ -229,27 +233,30 @@ async def _check_single_document(entry: DocCheckEntry) -> list[DocCheckResult]:
|
||||
|
||||
def _run_checklist(text: str, doc_type: str, label: str, url: str, word_count: int = 0) -> DocCheckResult:
|
||||
"""Run checklist against text and return structured result."""
|
||||
import re as _re
|
||||
findings = check_document_completeness(text, doc_type, label, url)
|
||||
|
||||
all_checks: list[CheckItem] = []
|
||||
completeness = 0
|
||||
correctness = 0
|
||||
for f in findings:
|
||||
if "SCORE" in f.get("code", ""):
|
||||
for c in f.get("all_checks", []):
|
||||
all_checks.append(CheckItem(
|
||||
id=c["id"], label=c["label"], passed=c["passed"],
|
||||
severity=c["severity"], matched_text=c.get("matched_text", ""),
|
||||
level=c.get("level", 1),
|
||||
parent=c.get("parent"),
|
||||
skipped=c.get("skipped", False),
|
||||
))
|
||||
pct_match = _re.search(r"(\d+)%", f.get("text", ""))
|
||||
if pct_match:
|
||||
completeness = int(pct_match.group(1))
|
||||
completeness = f.get("completeness_pct", 0)
|
||||
correctness = f.get("correctness_pct", 0)
|
||||
|
||||
non_score = [f for f in findings if "SCORE" not in f.get("code", "")]
|
||||
return DocCheckResult(
|
||||
label=label, url=url, doc_type=doc_type,
|
||||
word_count=word_count or len(text.split()),
|
||||
completeness_pct=completeness,
|
||||
correctness_pct=correctness,
|
||||
checks=all_checks, findings_count=len(non_score),
|
||||
)
|
||||
|
||||
@@ -374,11 +381,15 @@ def _build_report(results: list[DocCheckResult], cookie_result: dict | None) ->
|
||||
status = "OK" if r.completeness_pct == 100 else "LUECKENHAFT" if r.completeness_pct >= 50 else "MANGELHAFT"
|
||||
if r.error:
|
||||
status = "FEHLER"
|
||||
parts.append(f"[{status}] {r.label} ({r.completeness_pct}%, {r.word_count} Woerter)")
|
||||
detail = f", Korrektheit {r.correctness_pct}%" if r.correctness_pct else ""
|
||||
parts.append(f"[{status}] {r.label} ({r.completeness_pct}%{detail}, {r.word_count} Woerter)")
|
||||
|
||||
for check in r.checks:
|
||||
if check.skipped:
|
||||
continue
|
||||
icon = "+" if check.passed else "!!"
|
||||
parts.append(f" [{icon}] {check.label}")
|
||||
indent = " " if check.level == 2 else " "
|
||||
parts.append(f"{indent}[{icon}] {check.label}")
|
||||
|
||||
if r.error:
|
||||
parts.append(f" FEHLER: {r.error}")
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
"""
|
||||
doc_checks — Legal document compliance checkers.
|
||||
|
||||
Provides checklists and functions for verifying legal documents
|
||||
(DSI, AGB, Impressum, Cookie, Widerruf, Social Media, DSFA)
|
||||
against their mandatory content requirements.
|
||||
|
||||
Two check levels:
|
||||
L1 — "Is the mandatory field mentioned?"
|
||||
L2 — "Is it correct/complete?"
|
||||
"""
|
||||
|
||||
from .runner import check_document_completeness, classify_document_type
|
||||
from .dse_checks import ART13_CHECKLIST
|
||||
from .widerruf_checks import WIDERRUF_CHECKLIST
|
||||
from .agb_checks import AGB_CHECKLIST
|
||||
from .impressum_checks import IMPRESSUM_CHECKLIST
|
||||
from .cookie_checks import COOKIE_CHECKLIST
|
||||
from .social_media_checks import JOINT_CONTROLLER_CHECKLIST
|
||||
from .dsfa_checks import DSFA_CHECKLIST
|
||||
|
||||
__all__ = [
|
||||
"check_document_completeness",
|
||||
"classify_document_type",
|
||||
"ART13_CHECKLIST",
|
||||
"WIDERRUF_CHECKLIST",
|
||||
"AGB_CHECKLIST",
|
||||
"IMPRESSUM_CHECKLIST",
|
||||
"COOKIE_CHECKLIST",
|
||||
"JOINT_CONTROLLER_CHECKLIST",
|
||||
"DSFA_CHECKLIST",
|
||||
]
|
||||
@@ -0,0 +1,261 @@
|
||||
"""
|
||||
AGB checks — §305ff BGB.
|
||||
|
||||
Level 1: Pflichtangabe erwaehnt?
|
||||
Level 2: Pflichtangabe korrekt/vollstaendig?
|
||||
"""
|
||||
|
||||
AGB_CHECKLIST = [
|
||||
# ── L1: Geltungsbereich ───────────────────────────────────────────
|
||||
{
|
||||
"id": "scope",
|
||||
"label": "Geltungsbereich",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"geltungsbereich", r"geltung", r"scope",
|
||||
r"diese\s+(?:agb|bedingungen)\s+gelten",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "incorporation_clause",
|
||||
"label": "Einbeziehungsklausel (§305 BGB)",
|
||||
"level": 2, "parent": "scope",
|
||||
"patterns": [
|
||||
r"einbezieh", r"bestandteil\s+des\s+vertrag",
|
||||
r"(?:mit|durch)\s+(?:der\s+)?bestellung.*(?:anerkennen|akzeptieren|zustimm)",
|
||||
r"§\s*305",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Vertragsschluss ───────────────────────────────────────────
|
||||
{
|
||||
"id": "contract",
|
||||
"label": "Vertragsschluss",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"vertragsschluss", r"zustandekommen",
|
||||
r"contract\s+formation", r"angebot\s+und\s+annahme",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
|
||||
# ── L1: Haftung ───────────────────────────────────────────────────
|
||||
{
|
||||
"id": "liability",
|
||||
"label": "Haftung / Haftungsbeschraenkung",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"haftung", r"liability",
|
||||
r"schadensersatz", r"haftungsbeschr(?:ae|ä)nkung",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
|
||||
# ── L1: Gerichtsstand / Anwendbares Recht ─────────────────────────
|
||||
{
|
||||
"id": "jurisdiction",
|
||||
"label": "Gerichtsstand / Anwendbares Recht",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"gerichtsstand", r"anwendbares\s+recht",
|
||||
r"jurisdiction", r"governing\s+law",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "dispute_odr_link",
|
||||
"label": "ODR-Plattform-Link (EU-Verordnung 524/2013)",
|
||||
"level": 2, "parent": "jurisdiction",
|
||||
"patterns": [
|
||||
r"ec\.europa\.eu.*odr",
|
||||
r"(?:os|odr)[\-\s]plattform",
|
||||
r"(?:online[\-\s]?streitbeilegung|online\s+dispute\s+resolution)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "choice_of_law_specific",
|
||||
"label": "Konkretes Recht benannt (z.B. deutsches Recht)",
|
||||
"level": 2, "parent": "jurisdiction",
|
||||
"patterns": [
|
||||
r"(?:deutsches|(?:oe|ö)sterreichisches|schweizerisches)\s+recht",
|
||||
r"recht\s+der\s+bundesrepublik",
|
||||
r"german\s+law|law\s+of\s+germany",
|
||||
r"un[\-\s]kaufrecht.*(?:ausgeschlossen|findet\s+keine\s+anwendung)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Zahlungsbedingungen ───────────────────────────────────────
|
||||
{
|
||||
"id": "payment",
|
||||
"label": "Zahlungsbedingungen",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"zahlungsbedingung", r"payment\s+terms",
|
||||
r"(?:preis|kosten|entgelt|verg(?:ue|ü)tung)",
|
||||
r"zahlungsweise", r"rechnungsstellung",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "payment_methods",
|
||||
"label": "Konkrete Zahlungsarten benannt",
|
||||
"level": 2, "parent": "payment",
|
||||
"patterns": [
|
||||
r"(?:vorkasse|nachnahme|lastschrift|sepa|(?:ue|ü)berweisung|kreditkarte|paypal|sofort(?:ue|ü)berweisung|klarna|rechnung|giropay|apple\s*pay|google\s*pay)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "payment_due_date",
|
||||
"label": "Faelligkeit/Zahlungsziel angegeben",
|
||||
"level": 2, "parent": "payment",
|
||||
"patterns": [
|
||||
r"(?:f(?:ae|ä)llig|zahlbar|zahlungsziel)\s+(?:sofort|innerhalb|nach|bei|mit)",
|
||||
r"(?:netto|brutto)\s*\d+\s+tage",
|
||||
r"zahlungsfrist",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Lieferung ─────────────────────────────────────────────────
|
||||
{
|
||||
"id": "delivery",
|
||||
"label": "Lieferung / Leistungserbringung",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"lieferung", r"leistungserbringung", r"delivery",
|
||||
r"lieferfrist", r"bereitstellung",
|
||||
r"(?:zugang|zugriff).*(?:dienst|leistung)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "delivery_timeframe",
|
||||
"label": "Konkrete Lieferzeit/Frist angegeben",
|
||||
"level": 2, "parent": "delivery",
|
||||
"patterns": [
|
||||
r"(?:lieferzeit|lieferfrist|versanddauer)\s*(?:betr(?:ae|ä)gt|von|ca\.?|circa)",
|
||||
r"\d+[\-\s]+(?:\d+\s+)?(?:werk)?tage.*(?:liefer|versand)",
|
||||
r"(?:liefer|versand).*\d+[\-\s]+(?:\d+\s+)?(?:werk)?tage",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Gewaehrleistung ───────────────────────────────────────────
|
||||
{
|
||||
"id": "warranty",
|
||||
"label": "Gewaehrleistung / Maengelrechte",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"gew(?:ae|ä)hrleistung", r"m(?:ae|ä)ngelrecht",
|
||||
r"warranty", r"sachm(?:ae|ä)ngel",
|
||||
r"gew(?:ae|ä)hrleistungsfrist",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "warranty_period",
|
||||
"label": "Gewaehrleistungsfrist angegeben",
|
||||
"level": 2, "parent": "warranty",
|
||||
"patterns": [
|
||||
r"(?:gew(?:ae|ä)hrleistungsfrist|verj(?:ae|ä)hrungsfrist)\s*(?:betr(?:ae|ä)gt|von|:)",
|
||||
r"(?:2|zwei)\s+jahre.*(?:gew(?:ae|ä)hrleistung|m(?:ae|ä)ngel|verj(?:ae|ä)hrung)",
|
||||
r"(?:gew(?:ae|ä)hrleistung|m(?:ae|ä)ngel|verj(?:ae|ä)hrung).*(?:2|zwei)\s+jahre",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Kuendigung ────────────────────────────────────────────────
|
||||
{
|
||||
"id": "termination",
|
||||
"label": "Kuendigung / Vertragsbeendigung",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"k(?:ue|ü)ndigung", r"vertragsbeendigung",
|
||||
r"termination", r"laufzeit.*(?:vertrag|abo)",
|
||||
r"k(?:ue|ü)ndigungsfrist",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "termination_period",
|
||||
"label": "Kuendigungsfrist angegeben",
|
||||
"level": 2, "parent": "termination",
|
||||
"patterns": [
|
||||
r"k(?:ue|ü)ndigungsfrist\s+(?:von|betr(?:ae|ä)gt)",
|
||||
r"\d+\s+(?:tage?|wochen?|monate?).*(?:k(?:ue|ü)ndig|frist)",
|
||||
r"(?:k(?:ue|ü)ndig|frist).*\d+\s+(?:tage?|wochen?|monate?)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "termination_form",
|
||||
"label": "Formvorschrift fuer Kuendigung",
|
||||
"level": 2, "parent": "termination",
|
||||
"patterns": [
|
||||
r"k(?:ue|ü)ndigung.*(?:schriftlich|textform|per\s+(?:brief|e-?mail|fax))",
|
||||
r"(?:schriftlich|textform).*k(?:ue|ü)ndigung",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Datenschutzhinweis in AGB ─────────────────────────────────
|
||||
{
|
||||
"id": "data_protection",
|
||||
"label": "Datenschutzhinweis in AGB",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"datenschutz.*(?:agb|bedingung)",
|
||||
r"(?:agb|bedingung).*datenschutz",
|
||||
r"personenbezogen.*daten.*(?:agb|vertrag)",
|
||||
r"dsgvo.*(?:agb|vertrag)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── Neue L1: Salvatorische Klausel ────────────────────────────────
|
||||
{
|
||||
"id": "salvatory_clause",
|
||||
"label": "Salvatorische Klausel",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"salvatorische",
|
||||
r"(?:unwirksamkeit|nichtigkeit)\s+(?:einer|einzelner)\s+(?:bestimmung|klausel|regelung)",
|
||||
r"(?:sollte|sofern).*(?:bestimmung|klausel).*(?:unwirksam|nichtig)",
|
||||
r"(?:uebrigen|übrigen)\s+bestimmungen.*(?:unberuehrt|unberührt|wirksam|bestehen)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── Neue L1: Aenderungsklausel ────────────────────────────────────
|
||||
{
|
||||
"id": "amendment_clause",
|
||||
"label": "Aenderungsklausel fuer AGB",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:ae|ä)nderung.*(?:agb|bedingung)",
|
||||
r"(?:agb|bedingung).*(?:ae|ä)nder",
|
||||
r"(?:anpassung|aktualisierung).*(?:agb|bedingung|geschaeftsbedingung|geschäftsbedingung)",
|
||||
r"(?:neue\s+fassung|neufassung).*(?:agb|bedingung)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── Neue L1: Verbraucherrechte §309 ───────────────────────────────
|
||||
{
|
||||
"id": "consumer_rights",
|
||||
"label": "Verbraucherrechte nicht eingeschraenkt (§309 BGB)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"§\s*309",
|
||||
r"verbraucherrecht",
|
||||
r"(?:gesetzlich|zwingende)\w*\s+recht\w*.*(?:unberuehrt|unberührt|bestehen\s+bleiben)",
|
||||
r"(?:verbrauch|konsument).*(?:recht|anspruch|schutz)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
]
|
||||
@@ -0,0 +1,179 @@
|
||||
"""
|
||||
Cookie-Richtlinie checks — §25 TDDDG / ePrivacy.
|
||||
|
||||
Level 1: Pflichtangabe erwaehnt?
|
||||
Level 2: Pflichtangabe korrekt/vollstaendig?
|
||||
"""
|
||||
|
||||
COOKIE_CHECKLIST = [
|
||||
# ── L1: Arten der Cookies ─────────────────────────────────────────
|
||||
{
|
||||
"id": "cookie_types",
|
||||
"label": "Arten der Cookies",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:notwendig|essentiell|funktional|statistik|marketing|tracking)",
|
||||
r"cookie.*(?:art|typ|kategori)",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "cookie_names_listed",
|
||||
"label": "Konkrete Cookie-Namen aufgelistet",
|
||||
"level": 2, "parent": "cookie_types",
|
||||
"patterns": [
|
||||
r"(?:_ga|_gid|_gat|_fbp|_gcl|phpsessid|jsessionid|csrf|xsrf)",
|
||||
r"cookie[\-_]?name\s*[:\|]",
|
||||
r"name\s+des\s+cookie",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "cookie_essential_justified",
|
||||
"label": "Essentiell/Notwendig-Cookies begruendet",
|
||||
"level": 2, "parent": "cookie_types",
|
||||
"patterns": [
|
||||
r"(?:essentiell|notwendig|technisch\s+(?:erforderlich|notwendig)).*(?:funktion|betrieb|sicherheit|warenkorb|session|anmeldung)",
|
||||
r"(?:unbedingt|zwingend)\s+erforderlich",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Zwecke der Cookies ────────────────────────────────────────
|
||||
{
|
||||
"id": "purposes",
|
||||
"label": "Zwecke der Cookies",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"zweck.*cookie", r"cookie.*zweck",
|
||||
r"(?:wofuer|wozu|warum).*cookie",
|
||||
r"cookies?\s+(?:ein|ver)?\s*,?\s*um\s+",
|
||||
r"(?:setzen|verwenden|nutzen)\s+.*cookies?\s+.*(?:um|fuer|für)",
|
||||
r"(?:analyse|marketing|tracking|funktional)\w*\s*cookies?\s*\.?\s*(?:um|damit|diese|sie)",
|
||||
r"cookies?\s+(?:dienen|helfen|erm(?:oe|ö)glichen)",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "cookie_providers_named",
|
||||
"label": "Konkrete Anbieter/Dienste benannt",
|
||||
"level": 2, "parent": "purposes",
|
||||
"patterns": [
|
||||
r"(?:google\s+(?:analytics|tag\s+manager|ads)|matomo|piwik|hotjar|hubspot|facebook\s+pixel|meta\s+pixel|linkedin\s+insight|microsoft\s+clarity)",
|
||||
r"(?:anbieter|provider|dienst)\s*[:\|]\s*[A-Z]",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "cookie_analytics_named",
|
||||
"label": "Analytics-/Statistik-Tools konkret benannt",
|
||||
"level": 2, "parent": "purposes",
|
||||
"patterns": [
|
||||
r"google\s+analytics|matomo|piwik|plausible|fathom|adobe\s+analytics|microsoft\s+clarity|hotjar|etracker",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "cookie_marketing_named",
|
||||
"label": "Marketing-/Tracking-Tools konkret benannt",
|
||||
"level": 2, "parent": "purposes",
|
||||
"patterns": [
|
||||
r"(?:facebook|meta)\s+pixel|google\s+ads|linkedin\s+insight|tiktok\s+pixel|pinterest\s+tag|criteo|adroll|taboola",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Speicherdauer ─────────────────────────────────────────────
|
||||
{
|
||||
"id": "retention",
|
||||
"label": "Speicherdauer der Cookies",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:speicherdauer|laufzeit|g(?:ue|ü)ltigk|ablauf).*cookie",
|
||||
r"cookie.*(?:\d+\s+(?:tag|monat|jahr)|session)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "cookie_duration_values",
|
||||
"label": "Konkrete Speicherdauern pro Cookie",
|
||||
"level": 2, "parent": "retention",
|
||||
"patterns": [
|
||||
r"\d+\s+(?:tag|monat|jahr|minute|stunde|day|month|year)",
|
||||
r"session[\-\s]?cookie",
|
||||
r"(?:ablauf|expiry|laufzeit)\s*[:\|]\s*\d+",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Drittanbieter ─────────────────────────────────────────────
|
||||
{
|
||||
"id": "third_party",
|
||||
"label": "Drittanbieter-Cookies",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"drittanbieter", r"third.?party",
|
||||
r"(?:google|facebook|meta|microsoft).*cookie",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "cookie_legal_basis",
|
||||
"label": "Rechtsgrundlage fuer Cookies (§25 TDDDG / Art. 6(1)(a))",
|
||||
"level": 2, "parent": "third_party",
|
||||
"patterns": [
|
||||
r"§\s*25\s*(?:abs\.)?\s*(?:1|2)?\s*tdddg",
|
||||
r"art\.\s*6\s*(?:abs\.\s*)?1\s*(?:lit\.\s*)?a.*(?:cookie|einwilligung)",
|
||||
r"einwilligung.*(?:cookie|tracking|marketing)",
|
||||
r"ttdsg|tdddg|§\s*25",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Widerspruch ───────────────────────────────────────────────
|
||||
{
|
||||
"id": "opt_out",
|
||||
"label": "Widerspruchsmoeglichkeit",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:widerspruch|opt.?out|ablehnen|deaktivieren).*cookie",
|
||||
r"cookie.*(?:ablehnen|deaktivieren|l(?:oe|ö)schen)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "cookie_consent_mechanism",
|
||||
"label": "Consent-Tool/Banner beschrieben",
|
||||
"level": 2, "parent": "opt_out",
|
||||
"patterns": [
|
||||
r"(?:cookie|consent)\s*[\-\s]?(?:banner|hinweis|tool|management|einstellung)",
|
||||
r"(?:cookiebot|usercentrics|onetrust|borlabs|complianz|klaro|tarteaucitron)",
|
||||
r"einwilligung\s+(?:jederzeit|widerrufen|zurueckziehen|zur(?:ue|ü)ckziehen)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "cookie_browser_settings",
|
||||
"label": "Browser-Einstellungen zum Cookie-Management",
|
||||
"level": 2, "parent": "opt_out",
|
||||
"patterns": [
|
||||
r"browser[\-\s]?einstellung",
|
||||
r"(?:in\s+ihrem|im)\s+browser.*(?:cookie|deaktivieren|l(?:oe|ö)schen|blockieren)",
|
||||
r"(?:chrome|firefox|safari|edge).*(?:cookie|einstellung)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── Neue L1: Cookie-Tabelle ───────────────────────────────────────
|
||||
{
|
||||
"id": "cookie_table",
|
||||
"label": "Strukturierte Cookie-Tabelle/Liste",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:cookie[\-\s])?(?:tabelle|uebersicht|übersicht|liste|aufstellung)",
|
||||
r"(?:name|bezeichnung)\s*[\|\t]\s*(?:anbieter|zweck|dauer|laufzeit)",
|
||||
r"(?:first[\-\s]?party|third[\-\s]?party)\s*[\|\t]",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
]
|
||||
@@ -0,0 +1,359 @@
|
||||
"""
|
||||
DSE (Datenschutzinformation) checks — Art. 13/14 DSGVO.
|
||||
|
||||
Level 1: Pflichtangabe erwaehnt?
|
||||
Level 2: Pflichtangabe korrekt/vollstaendig?
|
||||
"""
|
||||
|
||||
ART13_CHECKLIST = [
|
||||
# ── L1: Verantwortlicher ──────────────────────────────────────────
|
||||
{
|
||||
"id": "controller",
|
||||
"label": "Verantwortlicher (Art. 13(1)(a))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"verantwortlich\w*\s+(?:ist|im sinne|fuer|f(?:ue|ü)r)",
|
||||
r"kontaktdaten\s+des\s+verantwortlichen",
|
||||
r"name\s+(?:und|&)\s+kontaktdaten\s+des",
|
||||
r"controller", r"verantwortliche\s+stelle",
|
||||
r"responsible\s+(?:party|for)",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "controller_address",
|
||||
"label": "Vollstaendige Anschrift (PLZ + Ort)",
|
||||
"level": 2, "parent": "controller",
|
||||
"patterns": [
|
||||
r"\d{5}\s+[A-Z\u00c0-\u017e]\w+",
|
||||
r"[A-Z\u00c0-\u017e]\w+(?:str|stra(?:ss|ß)e|weg|platz|allee|gasse)\s*\.?\s*\d",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "controller_email",
|
||||
"label": "E-Mail-Adresse des Verantwortlichen",
|
||||
"level": 2, "parent": "controller",
|
||||
"patterns": [
|
||||
r"[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "controller_phone",
|
||||
"label": "Telefonnummer des Verantwortlichen",
|
||||
"level": 2, "parent": "controller",
|
||||
"patterns": [
|
||||
r"(?:tel(?:efon)?|phone|fon)\s*[.:]\s*[\+\d][\d\s/\-]{6,}",
|
||||
r"\+49\s*[\d\s/\-]{8,}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Datenschutzbeauftragter ───────────────────────────────────
|
||||
{
|
||||
"id": "dpo",
|
||||
"label": "Datenschutzbeauftragter (Art. 13(1)(b))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"datenschutzbeauftragt", r"data\s+protection\s+officer",
|
||||
r"kontaktdaten\s+de[rs]\s+(?:beh(?:oe|ö)rdlichen\s+)?datenschutz",
|
||||
r"dsb", r"dpo",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "dpo_contact",
|
||||
"label": "Kontaktdaten des DSB (E-Mail oder Telefon)",
|
||||
"level": 2, "parent": "dpo",
|
||||
"patterns": [
|
||||
r"datenschutz(?:beauftragter?|beauftragte).*?[a-z0-9._%+\-]+@",
|
||||
r"dsb.*?@|dpo.*?@",
|
||||
r"datenschutz@",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Zwecke der Verarbeitung ───────────────────────────────────
|
||||
{
|
||||
"id": "purposes",
|
||||
"label": "Zwecke der Verarbeitung (Art. 13(1)(c))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"zweck\w*\s+(?:der|und|die)\s+(?:verarbeitung|datenerhebung|datenverarbeitung|rechtsgrundlage)",
|
||||
r"purpose\w*\s+(?:of|for)\s+(?:processing|data)",
|
||||
r"zu\s+welch\w+\s+zweck",
|
||||
r"welche\s+daten\s+werden.*verarbeitet",
|
||||
r"daten\s+werden\s+(?:zu|fuer|für)\s+(?:folgende|diese)",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "purposes_specific",
|
||||
"label": "Konkrete Zwecke benannt (min. 2)",
|
||||
"level": 2, "parent": "purposes",
|
||||
"patterns": [
|
||||
r"(?:kontaktaufnahme|vertragserfuellung|vertragserf(?:ue|ü)llung|newsletter|analyse|statistik|werbung|marketing|bewerbung|bestellabwicklung|kundenkonto)",
|
||||
r"(?:bereitstellung|betrieb|sicherheit|optimierung)\s+(?:der|des|unserer|unseres)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Rechtsgrundlage ───────────────────────────────────────────
|
||||
{
|
||||
"id": "legal_basis",
|
||||
"label": "Rechtsgrundlage (Art. 13(1)(c))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"rechtsgrundlage", r"art\.\s*6\s*(?:abs|absatz)?\s*\.?\s*1",
|
||||
r"legal\s+basis", r"berechtigtes\s+interesse",
|
||||
r"auf\s+grundlage\s+(?:von|des|der)\s+(?:art|§)",
|
||||
r"lit\.\s*[a-f][\s\)]",
|
||||
r"gem(?:ae|ä)(?:ss|ß)\s+art",
|
||||
r"§\s*\d+\s+(?:abs|ihkg|bdsg|ldsg|bbig|tdddg)",
|
||||
r"einwilligung\s+gem",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "legal_basis_consent_6a",
|
||||
"label": "Art. 6(1)(a) — Einwilligung",
|
||||
"level": 2, "parent": "legal_basis",
|
||||
"patterns": [
|
||||
r"art\.\s*6\s*(?:abs\.\s*)?1\s*(?:s\.\s*1\s*)?(?:lit\.\s*)?a",
|
||||
r"einwilligung\s+(?:gem|nach|i\.?\s*s\.?\s*d\.?)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "legal_basis_contract_6b",
|
||||
"label": "Art. 6(1)(b) — Vertragserfullung",
|
||||
"level": 2, "parent": "legal_basis",
|
||||
"patterns": [
|
||||
r"art\.\s*6\s*(?:abs\.\s*)?1\s*(?:s\.\s*1\s*)?(?:lit\.\s*)?b",
|
||||
r"vertragserf(?:ue|ü)llung",
|
||||
r"durchf(?:ue|ü)hrung\s+(?:eines|des|vorvertragliche)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "legal_basis_interest_6f",
|
||||
"label": "Art. 6(1)(f) — Berechtigtes Interesse",
|
||||
"level": 2, "parent": "legal_basis",
|
||||
"patterns": [
|
||||
r"art\.\s*6\s*(?:abs\.\s*)?1\s*(?:s\.\s*1\s*)?(?:lit\.\s*)?f",
|
||||
r"berechtigte[sn]?\s+interesse",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "legal_basis_balancing",
|
||||
"label": "Interessenabwaegung dokumentiert",
|
||||
"level": 2, "parent": "legal_basis",
|
||||
"patterns": [
|
||||
r"interessenabw(?:ae|ä)gung",
|
||||
r"(?:ueberwiegen|überwiegen).*interesse",
|
||||
r"abw(?:ae|ä)gung.*(?:recht|interesse|freiheit)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Empfaenger ────────────────────────────────────────────────
|
||||
{
|
||||
"id": "recipients",
|
||||
"label": "Empfaenger (Art. 13(1)(e))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"empf(?:ae|ä)nger", r"(?:ueber|über|weiter)mitt(?:el|l)ung",
|
||||
r"recipient", r"weitergabe\s+(?:an|von)\s+daten",
|
||||
r"dritte", r"third\s+part",
|
||||
r"auftragsverarbeit",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "recipients_categories",
|
||||
"label": "Konkrete Empfaenger-Kategorien benannt",
|
||||
"level": 2, "parent": "recipients",
|
||||
"patterns": [
|
||||
r"(?:hosting|server|cloud|payment|zahlungs|versand|logistik|steuerberater|buchhalter|newsletter|crm|erp)",
|
||||
r"(?:dienstleister|auftragnehmer|subunternehmer).*(?:fuer|für|im bereich)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "recipients_processor",
|
||||
"label": "Auftragsverarbeiter erwaehnt",
|
||||
"level": 2, "parent": "recipients",
|
||||
"patterns": [
|
||||
r"auftragsverarbeit(?:er|ung)",
|
||||
r"art\.\s*28",
|
||||
r"avv|av-vertrag|auftragsverarbeitungsvertrag",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Drittlandtransfer ─────────────────────────────────────────
|
||||
{
|
||||
"id": "third_country",
|
||||
"label": "Drittlandtransfer (Art. 13(1)(f))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"drittland", r"dritt\s*staat", r"drittl(?:ae|ä)nder",
|
||||
r"third\s+countr", r"angemessenheitsbeschluss",
|
||||
r"standard\s*vertragsklausel", r"scc",
|
||||
r"(?:ueber|über)mittlung.*(?:ausserhalb|außerhalb)",
|
||||
r"(?:europ(?:ae|ä)ischen\s+wirtschaftsraum|ewr|eea)",
|
||||
r"privacy\s+shield", r"data\s+privacy\s+framework",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "third_country_mechanism",
|
||||
"label": "Transfermechanismus benannt (SCC/DPF/Angemessenheit)",
|
||||
"level": 2, "parent": "third_country",
|
||||
"patterns": [
|
||||
r"standard\s*vertragsklausel|scc|standard\s+contractual",
|
||||
r"data\s+privacy\s+framework|dpf",
|
||||
r"angemessenheitsbeschluss|adequacy\s+decision",
|
||||
r"art\.\s*4[5-9]",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Speicherdauer ─────────────────────────────────────────────
|
||||
{
|
||||
"id": "retention",
|
||||
"label": "Speicherdauer (Art. 13(2)(a))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"speicherdauer", r"aufbewahrungsfrist",
|
||||
r"(?:wie\s+lange|dauer)\s+(?:der\s+)?(?:werden|gespeicher|speicherung)",
|
||||
r"retention\s+period", r"l(?:oe|ö)sch(?:ung|frist|konzept)",
|
||||
r"daten\s+werden\s+gel(?:oe|ö)scht",
|
||||
r"(?:\d+\s+(?:tage|monate|jahre)|nach\s+\d+\s+(?:tag|monat|jahr))",
|
||||
r"dauer\s+der\s+speicherung",
|
||||
r"aufbewahrung(?:sdauer|spflicht|szeit)",
|
||||
r"gesetzliche.*aufbewahrung",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "retention_periods",
|
||||
"label": "Konkrete Zeitangaben (Tage/Monate/Jahre)",
|
||||
"level": 2, "parent": "retention",
|
||||
"patterns": [
|
||||
r"\d+\s+(?:tage?|monate?|jahre?|days?|months?|years?)",
|
||||
r"(?:6|10)\s+jahre.*(?:handels|steuer|hgb|ao)",
|
||||
r"(?:nach|innerhalb)\s+(?:von\s+)?\d+\s+(?:tag|monat|jahr)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "retention_deletion",
|
||||
"label": "Loeschkonzept/-prozess beschrieben",
|
||||
"level": 2, "parent": "retention",
|
||||
"patterns": [
|
||||
r"l(?:oe|ö)schkonzept", r"l(?:oe|ö)schfrist",
|
||||
r"(?:regel|routinem(?:ae|ä)(?:ss|ß)ig).*l(?:oe|ö)sch",
|
||||
r"nach\s+(?:ablauf|wegfall).*(?:gel(?:oe|ö)scht|l(?:oe|ö)sch)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Betroffenenrechte ─────────────────────────────────────────
|
||||
{
|
||||
"id": "rights",
|
||||
"label": "Betroffenenrechte (Art. 13(2)(b))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"recht\s+auf\s+auskunft", r"recht\s+auf\s+l(?:oe|ö)schung",
|
||||
r"recht\s+auf\s+berichtigung", r"widerspruchsrecht",
|
||||
r"art\.\s*1[5-9]", r"art\.\s*2[0-2]",
|
||||
r"right\s+to\s+(?:access|erasure|rectification|object)",
|
||||
r"betroffenenrecht", r"rechte\s+(?:des|der)\s+betroffenen",
|
||||
r"ihnen\s+(?:stehen|steht)\s+(?:ein|folgende)\s+recht",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "rights_art15",
|
||||
"label": "Recht auf Auskunft (Art. 15)",
|
||||
"level": 2, "parent": "rights",
|
||||
"patterns": [r"art\.\s*15", r"recht\s+auf\s+auskunft", r"right\s+(?:of|to)\s+access"],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "rights_art16",
|
||||
"label": "Recht auf Berichtigung (Art. 16)",
|
||||
"level": 2, "parent": "rights",
|
||||
"patterns": [r"art\.\s*16", r"recht\s+auf\s+berichtigung", r"right\s+to\s+rectification"],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "rights_art17",
|
||||
"label": "Recht auf Loeschung (Art. 17)",
|
||||
"level": 2, "parent": "rights",
|
||||
"patterns": [r"art\.\s*17", r"recht\s+auf\s+l(?:oe|ö)schung", r"right\s+to\s+erasure"],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "rights_art18",
|
||||
"label": "Recht auf Einschraenkung (Art. 18)",
|
||||
"level": 2, "parent": "rights",
|
||||
"patterns": [r"art\.\s*18", r"einschr(?:ae|ä)nkung\s+der\s+verarbeitung", r"right\s+to\s+restriction"],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "rights_art20",
|
||||
"label": "Recht auf Datenportabilitaet (Art. 20)",
|
||||
"level": 2, "parent": "rights",
|
||||
"patterns": [r"art\.\s*20", r"daten(?:ue|ü)bertragbarkeit|datenportabilit", r"right\s+to\s+data\s+portability"],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "rights_art21",
|
||||
"label": "Widerspruchsrecht (Art. 21)",
|
||||
"level": 2, "parent": "rights",
|
||||
"patterns": [r"art\.\s*21", r"widerspruchsrecht", r"right\s+to\s+object"],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "rights_art22_profiling",
|
||||
"label": "Automatisierte Entscheidungen / Profiling (Art. 22)",
|
||||
"level": 2, "parent": "rights",
|
||||
"patterns": [
|
||||
r"art\.\s*22", r"automatisierte\s+entscheidung",
|
||||
r"profiling", r"automated\s+(?:decision|individual)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Beschwerderecht ───────────────────────────────────────────
|
||||
{
|
||||
"id": "complaint",
|
||||
"label": "Beschwerderecht (Art. 13(2)(d))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"beschwerderecht", r"aufsichtsbeh(?:oe|ö)rde",
|
||||
r"right\s+to\s+lodge\s+a\s+complaint",
|
||||
r"supervisory\s+authority", r"datenschutzbeh(?:oe|ö)rde",
|
||||
r"recht\s+auf\s+beschwerde", r"art\.\s*77",
|
||||
r"beschwerde.*(?:wenden|einlegen|erheben)",
|
||||
r"(?:zust(?:ae|ä)ndige|competent)\s+(?:beh(?:oe|ö)rde|authority)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "complaint_authority_named",
|
||||
"label": "Konkrete Aufsichtsbehoerde benannt",
|
||||
"level": 2, "parent": "complaint",
|
||||
"patterns": [
|
||||
r"(?:landes|l)(?:beauftragt|datenschutz).*(?:niedersachsen|bayern|nrw|nordrhein|hessen|baden|schleswig|brandenburg|sachsen|berlin|hamburg|bremen|thueringen|thüringen|saarland|rheinland|mecklenburg)",
|
||||
r"l(?:an)?fdi\s+\w+",
|
||||
r"bfdi",
|
||||
r"(?:bayerische|hessische|s(?:ae|ä)chsische|berliner)\s+(?:datenschutz|aufsicht)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
]
|
||||
@@ -0,0 +1,230 @@
|
||||
"""
|
||||
DSFA checks — Art. 35 DSGVO (Datenschutz-Folgenabschaetzung).
|
||||
|
||||
Level 1: Pflichtangabe erwaehnt?
|
||||
Level 2: Pflichtangabe korrekt/vollstaendig?
|
||||
"""
|
||||
|
||||
DSFA_CHECKLIST = [
|
||||
# ── L1: Schwellwertanalyse ────────────────────────────────────────
|
||||
{
|
||||
"id": "trigger",
|
||||
"label": "Schwellwertanalyse / Ausloesepruefung (Art. 35(1))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"art\.\s*35\s*(?:abs|absatz)?\s*\.?\s*1",
|
||||
r"hohes\s+risiko",
|
||||
r"voraussichtlich.*risiko",
|
||||
r"schwellwert",
|
||||
r"folgen.*(?:verarbeitung|schutz).*personenbezogen",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
|
||||
# ── L1: Beschreibung der Verarbeitungsvorgaenge ───────────────────
|
||||
{
|
||||
"id": "description",
|
||||
"label": "Beschreibung der Verarbeitungsvorgaenge (Art. 35(7)(a))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"beschreibung.*verarbeitung", r"verarbeitungsvorg(?:ae|ä)ng",
|
||||
r"systematische\s+beschreibung", r"gegenstand.*verarbeitung",
|
||||
r"social\s*media.*(?:angebot|nutzung|besteht\s+aus)",
|
||||
r"(?:kan(?:ae|ä)le|plattform).*(?:facebook|twitter|instagram|youtube|linkedin|xing)",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "processing_named",
|
||||
"label": "Konkreter Verarbeitungsvorgang benannt",
|
||||
"level": 2, "parent": "description",
|
||||
"patterns": [
|
||||
r"(?:betrieb|nutzung|verwaltung|pflege)\s+(?:der|von|unserer|eines)\s+(?:seite|profil|kanal|account|fanpage|pr(?:ae|ä)senz)",
|
||||
r"(?:verarbeitung|erhebung|speicherung)\s+(?:von|der)\s+(?:nutzerdaten|personenbezogen|besucher|mitglieder)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Notwendigkeit / Verhaeltnismaessigkeit ────────────────────
|
||||
{
|
||||
"id": "necessity",
|
||||
"label": "Notwendigkeit und Verhaeltnismaessigkeit (Art. 35(7)(b))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"notwendigkeit", r"verh(?:ae|ä)ltnism(?:ae|ä)(?:ss|ß)igkeit",
|
||||
r"erforderlichkeit", r"zweckbindung",
|
||||
r"geringen?\s+umfang",
|
||||
r"nur\s+(?:die|sehr).*daten.*(?:verarbeitet|erhoben)",
|
||||
r"freiwillig\s+angegeben",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "legal_basis_dsfa",
|
||||
"label": "Rechtsgrundlage fuer die Verarbeitung",
|
||||
"level": 2, "parent": "necessity",
|
||||
"patterns": [
|
||||
r"rechtsgrundlage.*(?:art\.\s*6|berechtigte|einwilligung)",
|
||||
r"art\.\s*6\s*(?:abs\.\s*)?1\s*(?:lit\.\s*)?[a-f]",
|
||||
r"(?:einwilligung|vertrag|berechtigt).*(?:rechtsgrundlage|grundlage)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Risikobewertung ───────────────────────────────────────────
|
||||
{
|
||||
"id": "risks",
|
||||
"label": "Risikobewertung fuer Betroffene (Art. 35(7)(c))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"risiko.*(?:bewertung|analyse|einsch(?:ae|ä)tzung|abw(?:ae|ä)gung)",
|
||||
r"risiken.*(?:rechte|freiheit)",
|
||||
r"eintrittswahrscheinlichkeit",
|
||||
r"schwere.*(?:risiko|auswirkung)",
|
||||
r"hohes\s+risiko.*(?:rechte|freiheit)",
|
||||
r"systematische\s+beobachtung",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "risk_probability",
|
||||
"label": "Eintrittswahrscheinlichkeit bewertet",
|
||||
"level": 2, "parent": "risks",
|
||||
"patterns": [
|
||||
r"eintrittswahrscheinlichkeit",
|
||||
r"(?:wahrscheinlichkeit|likelihood)\s*[:\|]",
|
||||
r"(?:gering|mittel|hoch)\w*\s+(?:wahrscheinlichkeit|eintritt)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "risk_severity",
|
||||
"label": "Schwere/Auswirkung bewertet",
|
||||
"level": 2, "parent": "risks",
|
||||
"patterns": [
|
||||
r"(?:schwere|auswirkung|schadensh(?:oe|ö)he|schadenpotential|schadenpotenzial)\s*[:\|]",
|
||||
r"(?:gering|mittel|hoch|kritisch)\w*\s+(?:schwere|auswirkung|schaden)",
|
||||
r"(?:physisch|materiell|immateriell)\w*\s+(?:schaden|nachteil|beeintr(?:ae|ä)chtigung)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Abhilfemassnahmen ─────────────────────────────────────────
|
||||
{
|
||||
"id": "measures",
|
||||
"label": "Abhilfemassnahmen (Art. 35(7)(d))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"abhilfe", r"(?:ma(?:ss|ß)nahm).*(?:risiko|schutz|minderung)",
|
||||
r"schutzma(?:ss|ß)nahm",
|
||||
r"(?:technisch|organisatorisch).*ma(?:ss|ß)nahm",
|
||||
r"tom",
|
||||
r"risiko.*(?:minim|reduz|begrenzen)",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "tom_encryption",
|
||||
"label": "Verschluesselung als Massnahme",
|
||||
"level": 2, "parent": "measures",
|
||||
"patterns": [
|
||||
r"verschl(?:ue|ü)sselung|encryption|ssl|tls|https",
|
||||
r"(?:transport|ende[\-\s]zu[\-\s]ende)[\-\s]?verschl(?:ue|ü)sselung",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "tom_pseudonymization",
|
||||
"label": "Pseudonymisierung als Massnahme",
|
||||
"level": 2, "parent": "measures",
|
||||
"patterns": [
|
||||
r"pseudonymisierung|anonymisierung",
|
||||
r"(?:pseudonymisiert|anonymisiert).*(?:daten|verarbeit)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "tom_access_control",
|
||||
"label": "Zugriffskontrolle als Massnahme",
|
||||
"level": 2, "parent": "measures",
|
||||
"patterns": [
|
||||
r"zugriffskontrolle|zugangskontrolle|zutrittskontrolle",
|
||||
r"(?:berechtigungs|rollen).*(?:konzept|management|vergabe)",
|
||||
r"(?:need[\-\s]to[\-\s]know|least\s+privilege|minimalprinzip)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "tom_logging",
|
||||
"label": "Protokollierung/Logging als Massnahme",
|
||||
"level": 2, "parent": "measures",
|
||||
"patterns": [
|
||||
r"(?:protokollierung|logging|audit[\-\s]?trail|nachvollziehbarkeit)",
|
||||
r"(?:zugriff|(?:ae|ä)nderung).*(?:protokoll|logging|nachvollzieh)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Landesbehoerden ───────────────────────────────────────────
|
||||
{
|
||||
"id": "lfdi",
|
||||
"label": "Beruecksichtigung Landesbehoerden-Richtlinie",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"l(?:an)?fdi", r"landesbeauftragt.*datenschutz",
|
||||
r"landes.?datenschutz",
|
||||
r"richtlinie.*(?:land|lfdi|landes)",
|
||||
r"(?:aufsichtsbeh(?:oe|ö)rde|beh(?:oe|ö)rde).*(?:richtlinie|empfehlung|vorgabe)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Einbeziehung DSB ──────────────────────────────────────────
|
||||
{
|
||||
"id": "stakeholders",
|
||||
"label": "Einbeziehung des DSB (Art. 35(2))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"datenschutzbeauftragt.*(?:einbez|konsult|beteilig|rat)",
|
||||
r"dsb.*(?:konsult|einbez|rat)",
|
||||
r"stellungnahme.*dsb",
|
||||
r"(?:rat|empfehlung).*datenschutzbeauftragt",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "dsb_opinion_documented",
|
||||
"label": "DSB-Stellungnahme dokumentiert",
|
||||
"level": 2, "parent": "stakeholders",
|
||||
"patterns": [
|
||||
r"stellungnahme.*(?:dsb|datenschutzbeauftragt)",
|
||||
r"(?:dsb|datenschutzbeauftragt).*(?:stellungnahme|empfiehlt|bewertet|best(?:ae|ä)tigt)",
|
||||
r"(?:empfehlung|beurteilung|einsch(?:ae|ä)tzung)\s+(?:des|der)\s+(?:dsb|datenschutzbeauftragt)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Dokumentation ─────────────────────────────────────────────
|
||||
{
|
||||
"id": "documentation",
|
||||
"label": "Dokumentation der Ergebnisse",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:dokument|ergebnis|bericht).*(?:dsfa|folgenabsch(?:ae|ä)tzung)",
|
||||
r"(?:ergebnis|schlussfolgerung|bewertung).*(?:risiko|verarbeitung)",
|
||||
r"vorliegend.*(?:dsfa|analyse|bewertung|absch(?:ae|ä)tzung)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "review_cycle",
|
||||
"label": "Ueberpruefungszyklus/Review-Termin",
|
||||
"level": 2, "parent": "documentation",
|
||||
"patterns": [
|
||||
r"(?:ueberpr(?:ue|ü)f|überpr(?:ue|ü)f|review|aktualisierung).*(?:zyklus|turnus|j(?:ae|ä)hrlich|regelm(?:ae|ä)(?:ss|ß)ig|termin)",
|
||||
r"(?:regelm(?:ae|ä)(?:ss|ß)ig|j(?:ae|ä)hrlich|quartal|halbjahr).*(?:ueberpr(?:ue|ü)f|überpr(?:ue|ü)f|review|aktualisier)",
|
||||
r"n(?:ae|ä)chste\s+(?:ueberpr(?:ue|ü)fung|überprüfung|review)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
]
|
||||
@@ -0,0 +1,185 @@
|
||||
"""
|
||||
Impressum checks — §5 TMG / §18 MStV.
|
||||
|
||||
Level 1: Pflichtangabe erwaehnt?
|
||||
Level 2: Pflichtangabe korrekt/vollstaendig?
|
||||
"""
|
||||
|
||||
IMPRESSUM_CHECKLIST = [
|
||||
# ── L1: Name des Anbieters ────────────────────────────────────────
|
||||
{
|
||||
"id": "name",
|
||||
"label": "Name des Anbieters",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:gmbh|ag|e\.v\.|ohg|kg|gbr|ug|mbh|inc|ltd)",
|
||||
r"firma", r"unternehmen",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
|
||||
# ── L1: Anschrift ─────────────────────────────────────────────────
|
||||
{
|
||||
"id": "address",
|
||||
"label": "Anschrift",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:str(?:asse|\.)|weg|platz|allee)\s*\d",
|
||||
r"d-\d{5}", r"\d{5}\s+\w+",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "address_zip_city",
|
||||
"label": "PLZ + Ort vorhanden",
|
||||
"level": 2, "parent": "address",
|
||||
"patterns": [
|
||||
r"\d{5}\s+[A-Z\u00c0-\u017e]\w{2,}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "address_street_number",
|
||||
"label": "Strasse + Hausnummer vorhanden",
|
||||
"level": 2, "parent": "address",
|
||||
"patterns": [
|
||||
r"[A-Z\u00c0-\u017e]\w+(?:str|stra(?:ss|ß)e|weg|platz|allee|gasse|ring|damm|ufer)\s*\.?\s*\d+",
|
||||
r"\w+\s+(?:str|stra(?:ss|ß)e|weg|platz|allee)\s*\.?\s*\d+",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Kontaktdaten ──────────────────────────────────────────────
|
||||
{
|
||||
"id": "contact",
|
||||
"label": "Kontaktdaten (E-Mail + Telefon)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:e-?mail|mail).*@", r"telefon|phone|tel\.",
|
||||
r"\+?\d[\d\s/\-]{8,}",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "contact_email_format",
|
||||
"label": "E-Mail-Adresse im korrekten Format",
|
||||
"level": 2, "parent": "contact",
|
||||
"patterns": [
|
||||
r"[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "contact_phone_format",
|
||||
"label": "Telefonnummer vorhanden",
|
||||
"level": 2, "parent": "contact",
|
||||
"patterns": [
|
||||
r"(?:tel(?:efon)?|phone|fon)\s*[.:]\s*[\+\d][\d\s/\-]{6,}",
|
||||
r"\+49\s*[\d\s/\-]{8,}",
|
||||
r"0\d{2,4}\s*[/\-\s]\s*\d{4,}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Handelsregister ───────────────────────────────────────────
|
||||
{
|
||||
"id": "register",
|
||||
"label": "Handelsregister / Registernummer",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:handelsregister|hrb|hra|registergericht|amtsgericht)",
|
||||
r"register.*(?:nr|nummer)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "register_court",
|
||||
"label": "Registergericht benannt (Amtsgericht X)",
|
||||
"level": 2, "parent": "register",
|
||||
"patterns": [
|
||||
r"(?:amtsgericht|registergericht)\s+[A-Z\u00c0-\u017e]\w+",
|
||||
r"ag\s+[A-Z\u00c0-\u017e]\w+",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "register_number",
|
||||
"label": "Registernummer (HRB/HRA + Nummer)",
|
||||
"level": 2, "parent": "register",
|
||||
"patterns": [
|
||||
r"(?:hrb|hra)\s*\d+",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: USt-IdNr ──────────────────────────────────────────────────
|
||||
{
|
||||
"id": "vat",
|
||||
"label": "USt-IdNr.",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"ust.*id", r"umsatzsteuer.*identifikation",
|
||||
r"vat.*id", r"de\s*\d{9}",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "vat_de_format",
|
||||
"label": "USt-IdNr. im Format DE + 9 Ziffern",
|
||||
"level": 2, "parent": "vat",
|
||||
"patterns": [
|
||||
r"de\s*\d{9}",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Vertretungsberechtigte ────────────────────────────────────
|
||||
{
|
||||
"id": "representative",
|
||||
"label": "Vertretungsberechtigte",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"vertretungsberechtigt", r"gesch(?:ae|ä)ftsf(?:ue|ü)hr",
|
||||
r"vorstand", r"inhaber",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "representative_person",
|
||||
"label": "Name der vertretungsberechtigten Person",
|
||||
"level": 2, "parent": "representative",
|
||||
"patterns": [
|
||||
r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hr|vorstand|inhaber)\w*\s*:\s*[A-Z\u00c0-\u017e]",
|
||||
r"(?:vertreten\s+durch|repr(?:ae|ä)sentiert)\s*:?\s*[A-Z\u00c0-\u017e]",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── Neue L1: Redaktionell Verantwortlicher ────────────────────────
|
||||
{
|
||||
"id": "editorial_visdp",
|
||||
"label": "V.i.S.d.P. / Redaktionell Verantwortlicher (§18 MStV)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"v\.?\s*i\.?\s*s\.?\s*d\.?\s*p",
|
||||
r"(?:redaktionell|inhaltlich)\s+verantwortlich",
|
||||
r"§\s*18\s+m(?:edien)?st(?:aat)?v",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── Neue L1: Streitbeilegung ──────────────────────────────────────
|
||||
{
|
||||
"id": "dispute_resolution",
|
||||
"label": "Verbraucherstreitbeilegung / OS-Plattform",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"verbraucherstreitbeilegung|streitschlichtung",
|
||||
r"(?:os|odr)[\-\s]plattform",
|
||||
r"ec\.europa\.eu.*odr",
|
||||
r"vsbg|verbraucherstreitbeilegungsgesetz",
|
||||
r"alternative\s+streitbeilegung",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
]
|
||||
@@ -0,0 +1,227 @@
|
||||
"""
|
||||
Document check runner — two-pass L1/L2 logic.
|
||||
|
||||
Pass 1: Run all L1 checks ("Is it mentioned?")
|
||||
Pass 2: Run L2 checks only where their L1 parent passed ("Is it correct?")
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .dse_checks import ART13_CHECKLIST
|
||||
from .widerruf_checks import WIDERRUF_CHECKLIST
|
||||
from .agb_checks import AGB_CHECKLIST
|
||||
from .impressum_checks import IMPRESSUM_CHECKLIST
|
||||
from .cookie_checks import COOKIE_CHECKLIST
|
||||
from .social_media_checks import JOINT_CONTROLLER_CHECKLIST
|
||||
from .dsfa_checks import DSFA_CHECKLIST
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Map doc_type strings to (checklist, label)
|
||||
_CHECKLIST_MAP = {
|
||||
"dse": (ART13_CHECKLIST, "Art. 13 DSGVO"),
|
||||
"datenschutz": (ART13_CHECKLIST, "Art. 13 DSGVO"),
|
||||
"privacy": (ART13_CHECKLIST, "Art. 13 DSGVO"),
|
||||
"widerruf": (WIDERRUF_CHECKLIST, "§355 BGB"),
|
||||
"withdrawal": (WIDERRUF_CHECKLIST, "§355 BGB"),
|
||||
"cancellation": (WIDERRUF_CHECKLIST, "§355 BGB"),
|
||||
"agb": (AGB_CHECKLIST, "§305ff BGB"),
|
||||
"terms": (AGB_CHECKLIST, "§305ff BGB"),
|
||||
"nutzungsbedingungen": (AGB_CHECKLIST, "§305ff BGB"),
|
||||
"impressum": (IMPRESSUM_CHECKLIST, "§5 TMG / §18 MStV"),
|
||||
"imprint": (IMPRESSUM_CHECKLIST, "§5 TMG / §18 MStV"),
|
||||
"cookie": (COOKIE_CHECKLIST, "§25 TDDDG"),
|
||||
"social_media": (JOINT_CONTROLLER_CHECKLIST, "Art. 26 DSGVO"),
|
||||
"joint_controller": (JOINT_CONTROLLER_CHECKLIST, "Art. 26 DSGVO"),
|
||||
"dsfa": (DSFA_CHECKLIST, "Art. 35 DSGVO"),
|
||||
}
|
||||
|
||||
|
||||
def _match_patterns(patterns: list[str], text_lower: str):
|
||||
"""Try each regex pattern against text, return first Match or None."""
|
||||
for p in patterns:
|
||||
m = re.search(p, text_lower)
|
||||
if m:
|
||||
return m
|
||||
return None
|
||||
|
||||
|
||||
def _extract_context(text_lower: str, match) -> str:
|
||||
"""Extract ~30 chars around a match for evidence display."""
|
||||
if not match:
|
||||
return ""
|
||||
start = max(0, match.start() - 30)
|
||||
end = min(len(text_lower), match.end() + 30)
|
||||
return text_lower[start:end].strip()
|
||||
|
||||
|
||||
def check_document_completeness(
|
||||
text: str,
|
||||
doc_type: str,
|
||||
doc_title: str,
|
||||
doc_url: str,
|
||||
) -> list[dict]:
|
||||
"""Check a legal document against its type-specific requirements.
|
||||
|
||||
Two-pass approach:
|
||||
L1 — Is the mandatory field mentioned at all?
|
||||
L2 — Is it correct/complete? (only checked if L1 parent passed)
|
||||
|
||||
Returns a list of findings (summary + missing items).
|
||||
"""
|
||||
findings = []
|
||||
text_lower = text.lower()
|
||||
|
||||
if not text or len(text) < 50:
|
||||
findings.append({
|
||||
"code": f"DSI-EMPTY-{doc_type.upper()}",
|
||||
"severity": "HIGH",
|
||||
"text": f"Dokument '{doc_title}' ist leer oder zu kurz fuer eine Pruefung.",
|
||||
"doc_title": doc_title, "doc_url": doc_url, "doc_type": doc_type,
|
||||
})
|
||||
return findings
|
||||
|
||||
word_count = len(text.split())
|
||||
if word_count < 200 and doc_type == "dse":
|
||||
findings.append({
|
||||
"code": f"DSI-SCORE-{doc_type.upper()}",
|
||||
"severity": "LOW",
|
||||
"text": (
|
||||
f"'{doc_title}': Kurzhinweis ({word_count} Woerter) — zu kurz fuer "
|
||||
f"eine vollstaendige Art. 13 DSGVO Pruefung. Kein eigenstaendiges DSI-Dokument."
|
||||
),
|
||||
"doc_title": doc_title, "doc_url": doc_url, "doc_type": doc_type,
|
||||
"all_checks": [],
|
||||
})
|
||||
return findings
|
||||
|
||||
entry = _CHECKLIST_MAP.get(doc_type, (ART13_CHECKLIST, "Art. 13 DSGVO"))
|
||||
checklist, label = entry
|
||||
|
||||
l1_checks = [c for c in checklist if c.get("level", 1) == 1]
|
||||
l2_checks = [c for c in checklist if c.get("level", 1) == 2]
|
||||
|
||||
# ── Pass 1: L1 checks ────────────────────────────────────────────
|
||||
passed_l1_ids: set[str] = set()
|
||||
all_checks: list[dict] = []
|
||||
l1_present = 0
|
||||
|
||||
for check in l1_checks:
|
||||
match = _match_patterns(check["patterns"], text_lower)
|
||||
passed = match is not None
|
||||
if passed:
|
||||
passed_l1_ids.add(check["id"])
|
||||
l1_present += 1
|
||||
else:
|
||||
findings.append({
|
||||
"code": f"DSI-MISSING-{check['id'].upper()}",
|
||||
"severity": check.get("severity", "MEDIUM"),
|
||||
"text": (
|
||||
f"'{doc_title}': Pflichtangabe '{check['label']}' nicht gefunden. "
|
||||
f"Erforderlich nach {label}."
|
||||
),
|
||||
"doc_title": doc_title, "doc_url": doc_url,
|
||||
"doc_type": doc_type, "check_id": check["id"],
|
||||
})
|
||||
all_checks.append({
|
||||
"id": check["id"], "label": check["label"],
|
||||
"passed": passed, "severity": check.get("severity", "MEDIUM"),
|
||||
"matched_text": _extract_context(text_lower, match),
|
||||
"level": 1, "parent": None, "skipped": False,
|
||||
})
|
||||
|
||||
# ── Pass 2: L2 checks (only if parent L1 passed) ─────────────────
|
||||
l2_total = 0
|
||||
l2_passed = 0
|
||||
|
||||
for check in l2_checks:
|
||||
parent = check.get("parent")
|
||||
skipped = parent not in passed_l1_ids
|
||||
passed = False
|
||||
matched_text = ""
|
||||
|
||||
if not skipped:
|
||||
l2_total += 1
|
||||
match = _match_patterns(check["patterns"], text_lower)
|
||||
passed = match is not None
|
||||
if passed:
|
||||
l2_passed += 1
|
||||
matched_text = _extract_context(text_lower, match)
|
||||
else:
|
||||
findings.append({
|
||||
"code": f"DSI-DETAIL-{check['id'].upper()}",
|
||||
"severity": check.get("severity", "MEDIUM"),
|
||||
"text": (
|
||||
f"'{doc_title}': Detailpruefung '{check['label']}' "
|
||||
f"nicht bestanden. Empfohlen nach {label}."
|
||||
),
|
||||
"doc_title": doc_title, "doc_url": doc_url,
|
||||
"doc_type": doc_type, "check_id": check["id"],
|
||||
})
|
||||
|
||||
all_checks.append({
|
||||
"id": check["id"], "label": check["label"],
|
||||
"passed": passed, "severity": check.get("severity", "MEDIUM"),
|
||||
"matched_text": matched_text,
|
||||
"level": 2, "parent": parent, "skipped": skipped,
|
||||
})
|
||||
|
||||
# ── Summary ───────────────────────────────────────────────────────
|
||||
l1_total = len(l1_checks)
|
||||
completeness_pct = round(l1_present / l1_total * 100) if l1_total else 0
|
||||
correctness_pct = round(l2_passed / l2_total * 100) if l2_total else 0
|
||||
|
||||
severity = (
|
||||
"OK" if completeness_pct == 100
|
||||
else "LOW" if completeness_pct >= 80
|
||||
else "MEDIUM" if completeness_pct >= 50
|
||||
else "HIGH"
|
||||
)
|
||||
|
||||
summary_text = (
|
||||
f"'{doc_title}': {l1_present}/{l1_total} Pflichtangaben vorhanden "
|
||||
f"({completeness_pct}%)."
|
||||
)
|
||||
if completeness_pct < 100:
|
||||
summary_text += f" Fehlend: {l1_total - l1_present} Angaben nach {label}."
|
||||
if l2_total > 0:
|
||||
summary_text += (
|
||||
f" Detailpruefung: {l2_passed}/{l2_total} bestanden "
|
||||
f"({correctness_pct}%)."
|
||||
)
|
||||
|
||||
findings.insert(0, {
|
||||
"code": f"DSI-SCORE-{doc_type.upper()}",
|
||||
"severity": severity,
|
||||
"text": summary_text,
|
||||
"doc_title": doc_title, "doc_url": doc_url, "doc_type": doc_type,
|
||||
"all_checks": all_checks,
|
||||
"completeness_pct": completeness_pct,
|
||||
"correctness_pct": correctness_pct,
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def classify_document_type(title: str, url: str) -> str:
|
||||
"""Classify a document by its title/URL into a legal document type."""
|
||||
combined = f"{title} {url}".lower()
|
||||
|
||||
if any(kw in combined for kw in ["datenschutzfolge", "dsfa", "risikoanalyse für nutzung"]):
|
||||
return "dsfa"
|
||||
if any(kw in combined for kw in ["social media", "facebook", "instagram", "linkedin", "fanpage"]):
|
||||
if any(kw in combined for kw in ["datenschutzerkl", "datenschutz für", "datenschutzinformation"]):
|
||||
return "social_media"
|
||||
if any(kw in combined for kw in ["datenschutz", "privacy", "dsgvo", "data protection", "données"]):
|
||||
return "dse"
|
||||
if any(kw in combined for kw in ["widerruf", "withdrawal", "rétractation", "desistimiento"]):
|
||||
return "widerruf"
|
||||
if any(kw in combined for kw in ["agb", "allgemeine geschäftsbedingungen", "terms",
|
||||
"nutzungsbedingungen", "conditions"]):
|
||||
return "agb"
|
||||
if any(kw in combined for kw in ["cookie", "slapuk", "evästeet", "kakor"]):
|
||||
return "cookie"
|
||||
if any(kw in combined for kw in ["impressum", "imprint", "legal notice", "mentions légales"]):
|
||||
return "impressum"
|
||||
return "other"
|
||||
@@ -0,0 +1,253 @@
|
||||
"""
|
||||
Social Media DSE checks — Art. 26 DSGVO Joint Controller.
|
||||
|
||||
Level 1: Pflichtangabe erwaehnt?
|
||||
Level 2: Pflichtangabe korrekt/vollstaendig?
|
||||
"""
|
||||
|
||||
JOINT_CONTROLLER_CHECKLIST = [
|
||||
# ── L1: Gemeinsam Verantwortliche ─────────────────────────────────
|
||||
{
|
||||
"id": "joint_parties",
|
||||
"label": "Gemeinsam Verantwortliche benannt (Art. 26(1))",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"gemeinsam.*verantwortlich", r"joint.*controller",
|
||||
r"gemeinsame\s+verantwortlichkeit",
|
||||
r"art\.\s*26", r"mitverantwortlich",
|
||||
r"wir.*(?:und|gemeinsam).*(?:betreiber|facebook|meta|google)",
|
||||
r"(?:betreiber|netzwerk).*verantwortlich",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "facebook_meta_named",
|
||||
"label": "Facebook/Meta konkret als Verantwortlicher benannt",
|
||||
"level": 2, "parent": "joint_parties",
|
||||
"patterns": [
|
||||
r"(?:facebook|meta)\s+(?:ireland|platforms|inc)",
|
||||
r"meta\s+platforms.*(?:verantwortlich|controller|betreiber)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Vereinbarung Art. 26 ──────────────────────────────────────
|
||||
{
|
||||
"id": "arrangement",
|
||||
"label": "Vereinbarung nach Art. 26 DSGVO",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"vereinbarung.*art\.\s*26", r"art\.\s*26.*vereinbarung",
|
||||
r"page\s*controller", r"fanpage", r"insights",
|
||||
r"gemeinsame.*verantwortung.*(?:vertrag|vereinbarung)",
|
||||
r"addendum|nachtrag|seiten.*insights",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "insights_referenced",
|
||||
"label": "Seiteninsights / Page Insights erwaehnt",
|
||||
"level": 2, "parent": "arrangement",
|
||||
"patterns": [
|
||||
r"(?:seiten[\-\s]?)?insights",
|
||||
r"page\s+insights",
|
||||
r"(?:statistik|nutzungsstatistik).*(?:facebook|meta|fanpage|seite)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "page_controller_addendum",
|
||||
"label": "Page Controller Addendum / Seiten-Insights-Ergaenzung",
|
||||
"level": 2, "parent": "arrangement",
|
||||
"patterns": [
|
||||
r"page\s+controller\s+addendum",
|
||||
r"seiten[\-\s]?insights[\-\s]?erg(?:ae|ä)nzung",
|
||||
r"(?:addendum|nachtrag|erg(?:ae|ä)nzung).*(?:controller|verantwortlich)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Anlaufstelle ──────────────────────────────────────────────
|
||||
{
|
||||
"id": "contact_point",
|
||||
"label": "Anlaufstelle fuer Betroffene (Art. 26(1) S.3)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"anlaufstelle", r"kontaktstelle",
|
||||
r"ansprechpartner.*betroffene",
|
||||
r"rechte.*(?:gegen(?:ue|ü)ber)\s+(?:uns|beiden)",
|
||||
r"rechte.*geltend\s+machen",
|
||||
r"wenden\s+sie\s+sich",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "contact_both_parties",
|
||||
"label": "Kontaktdaten beider Verantwortlicher",
|
||||
"level": 2, "parent": "contact_point",
|
||||
"patterns": [
|
||||
r"(?:sowohl|beide).*(?:kontakt|wenden|geltend)",
|
||||
r"(?:uns|bei\s+uns).*(?:als\s+auch|oder|und).*(?:facebook|meta|google|plattform)",
|
||||
r"(?:facebook|meta|google|plattform).*(?:als\s+auch|oder|und).*(?:uns|bei\s+uns)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Verarbeitungsaufteilung ───────────────────────────────────
|
||||
{
|
||||
"id": "processing_split",
|
||||
"label": "Verarbeitungsaufteilung (wer macht was)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:wir|betreiber).*(?:verarbeiten|erheben|nutzen).*(?:daten|informationen)",
|
||||
r"(?:facebook|meta|google|youtube|instagram|linkedin|twitter|x\.com).*(?:verarbeit|erhebt|nutzt|speichert)",
|
||||
r"bei\s+besuch\s+(?:unserer|der)\s+(?:seite|fanpage|profil)",
|
||||
r"(?:nutzungsstatistik|statistik|insight).*(?:betreiber|netzwerk)",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
|
||||
# ── L1: Datenkategorien ───────────────────────────────────────────
|
||||
{
|
||||
"id": "social_data_types",
|
||||
"label": "Kategorien verarbeiteter Daten",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:nutzungsstatistik|insight|reichweite|interaktion|klick|aufruf)",
|
||||
r"(?:ip.?adresse|standort|browser|ger(?:ae|ä)t|alter|geschlecht)",
|
||||
r"(?:personenbezogen|daten).*(?:social|netzwerk|plattform)",
|
||||
r"(?:nutzername|beitr(?:ae|ä)g|profil|like|kommentar)",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
|
||||
# ── L1: Plattformen ───────────────────────────────────────────────
|
||||
{
|
||||
"id": "platforms",
|
||||
"label": "Auflistung der genutzten Plattformen",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:facebook|instagram|youtube|twitter|x\.com|linkedin|xing|tiktok)",
|
||||
r"(?:kan(?:ae|ä)le|plattform|netzwerk|profil|account|auftritte).*(?:social|medien)",
|
||||
r"social\s*media.*(?:angebot|pr(?:ae|ä)senz|auftritte)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "platform_dse_links",
|
||||
"label": "Links zu Datenschutzerklaerungen der Plattformen",
|
||||
"level": 2, "parent": "platforms",
|
||||
"patterns": [
|
||||
r"(?:datenschutz|privacy).*(?:facebook|meta|google|youtube|instagram|linkedin|twitter)",
|
||||
r"(?:facebook|meta|google|youtube|instagram|linkedin|twitter).*(?:datenschutz|privacy)",
|
||||
r"(?:privacy\s+policy|datenschutzerkl(?:ae|ä)rung).*(?:finden\s+sie|abrufbar|unter)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Drittlandtransfer ─────────────────────────────────────────
|
||||
{
|
||||
"id": "third_country",
|
||||
"label": "Drittlandtransfer (USA bei Social Media)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"(?:usa|vereinigte\s+staaten|drittland|drittstaaten)",
|
||||
r"privacy\s+shield|data\s+privacy\s+framework|angemessenheitsbeschluss",
|
||||
r"standardvertragsklausel|standard.*contractual",
|
||||
r"(?:(?:ue|ü)bermittlung|(?:ueber|über)mittlung).*(?:usa|drittland|au(?:ss|ß)erhalb)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "usa_transfer_scc",
|
||||
"label": "Standardvertragsklauseln (SCC) fuer US-Transfer",
|
||||
"level": 2, "parent": "third_country",
|
||||
"patterns": [
|
||||
r"standard\s*vertragsklausel|scc",
|
||||
r"standard\s+contractual\s+clause",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "usa_transfer_dpf",
|
||||
"label": "Data Privacy Framework (DPF) fuer US-Transfer",
|
||||
"level": 2, "parent": "third_country",
|
||||
"patterns": [
|
||||
r"data\s+privacy\s+framework|dpf",
|
||||
r"angemessenheitsbeschluss.*(?:usa|us|amerika)",
|
||||
r"adequacy\s+decision",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Rechtsgrundlage ───────────────────────────────────────────
|
||||
{
|
||||
"id": "legal_basis",
|
||||
"label": "Rechtsgrundlage (Art. 6 DSGVO)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"rechtsgrundlage", r"art\.\s*6",
|
||||
r"berechtigtes\s+interesse",
|
||||
r"einwilligung.*art\.\s*6", r"lit\.\s*[a-f]",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "legal_basis_specific_lit",
|
||||
"label": "Konkretes Art. 6(1) lit. angegeben",
|
||||
"level": 2, "parent": "legal_basis",
|
||||
"patterns": [
|
||||
r"art\.\s*6\s*(?:abs\.\s*)?1\s*(?:s\.\s*1\s*)?(?:lit\.\s*)?[a-f]",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Betroffenenrechte ─────────────────────────────────────────
|
||||
{
|
||||
"id": "rights",
|
||||
"label": "Betroffenenrechte (Art. 15-21)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"recht\s+auf\s+auskunft", r"recht\s+auf\s+l(?:oe|ö)schung",
|
||||
r"art\.\s*1[5-9]", r"betroffenenrecht",
|
||||
r"ihre\s+rechte", r"rechte.*betroffene",
|
||||
r"widerspruchsrecht",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "opt_out_social",
|
||||
"label": "Opt-Out-Moeglichkeit fuer Social-Media-Tracking",
|
||||
"level": 2, "parent": "rights",
|
||||
"patterns": [
|
||||
r"(?:opt[\-\s]?out|widerspruch|deaktivieren).*(?:social|facebook|tracking|insight)",
|
||||
r"(?:social|facebook|tracking|insight).*(?:opt[\-\s]?out|widerspruch|deaktivieren)",
|
||||
r"(?:abmelden|abschalten).*(?:tracking|statistik|insight)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Social Bookmarks vs Plugins ───────────────────────────────
|
||||
{
|
||||
"id": "social_bookmarks",
|
||||
"label": "Hinweis auf Social Bookmarks vs. Plugins",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"social\s*(?:bookmark|plugin|button|widget)",
|
||||
r"(?:kein|keine).*(?:plugin|widget|button).*(?:gesetzt|eingebunden|geladen)",
|
||||
r"(?:link|verweis|weiterleitung).*(?:dienst|anbieter|netzwerk)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "two_click_solution",
|
||||
"label": "2-Klick-Loesung oder vergleichbare Technik",
|
||||
"level": 2, "parent": "social_bookmarks",
|
||||
"patterns": [
|
||||
r"(?:zwei|2)[\-\s]?klick",
|
||||
r"(?:shariff|share[\-\s]?buttons?\s+ohne\s+tracking)",
|
||||
r"(?:erst|nur)\s+(?:bei|nach|durch)\s+(?:klick|aktivierung).*(?:daten|verbindung)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
]
|
||||
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
Widerrufsbelehrung checks — §355 BGB, §312g BGB.
|
||||
|
||||
Level 1: Pflichtangabe erwaehnt?
|
||||
Level 2: Pflichtangabe korrekt/vollstaendig?
|
||||
"""
|
||||
|
||||
WIDERRUF_CHECKLIST = [
|
||||
# ── L1: Belehrung ueber Widerrufsrecht ────────────────────────────
|
||||
{
|
||||
"id": "right_info",
|
||||
"label": "Belehrung ueber Widerrufsrecht",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"widerrufsrecht",
|
||||
r"right\s+of\s+withdrawal",
|
||||
r"recht\s+(?:zum|auf)\s+widerruf",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
|
||||
# ── L1: Widerrufsfrist ────────────────────────────────────────────
|
||||
{
|
||||
"id": "deadline",
|
||||
"label": "Widerrufsfrist (14 Tage)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"14\s+tage", r"vierzehn\s+tage",
|
||||
r"14\s+days", r"fourteen\s+days",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "deadline_calendar_days",
|
||||
"label": "Kalendertage explizit angegeben",
|
||||
"level": 2, "parent": "deadline",
|
||||
"patterns": [
|
||||
r"14\s+kalendertage|vierzehn\s+kalendertage",
|
||||
r"14\s+calendar\s+days",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "deadline_receipt_trigger",
|
||||
"label": "Fristbeginn bei Zugang/Erhalt definiert",
|
||||
"level": 2, "parent": "deadline",
|
||||
"patterns": [
|
||||
r"frist\s+beginnt.*(?:zugang|erhalt|empfang|tag\s+nach)",
|
||||
r"ab\s+(?:dem\s+)?(?:tag|zeitpunkt).*(?:zugang|erhalt|empfang|lieferung)",
|
||||
r"beginnt\s+(?:mit|ab)\s+(?:dem\s+)?(?:zugang|erhalt)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── L1: Form des Widerrufs ────────────────────────────────────────
|
||||
{
|
||||
"id": "form",
|
||||
"label": "Form des Widerrufs",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"widerrufsformular", r"muster.?widerruf",
|
||||
r"withdrawal\s+form", r"formular",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "form_text_required",
|
||||
"label": "Textform-Anforderung (Brief, E-Mail, Fax)",
|
||||
"level": 2, "parent": "form",
|
||||
"patterns": [
|
||||
r"(?:textform|schriftlich|per\s+(?:brief|e-?mail|fax|post))",
|
||||
r"(?:mittels|durch)\s+(?:einer?\s+)?(?:eindeutige|klare)\w*\s+erkl(?:ae|ä)rung",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
{
|
||||
"id": "model_form",
|
||||
"label": "Muster-Widerrufsformular beigefuegt/verlinkt",
|
||||
"level": 2, "parent": "form",
|
||||
"patterns": [
|
||||
r"muster[\-\s]?widerrufsformular",
|
||||
r"(?:beigef(?:ue|ü)gt|anlage|anhang|formular).*widerruf",
|
||||
r"widerruf.*(?:beigef(?:ue|ü)gt|anlage|anhang|formular)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Folgen des Widerrufs ──────────────────────────────────────
|
||||
{
|
||||
"id": "consequences",
|
||||
"label": "Folgen des Widerrufs",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"folgen\s+des\s+widerrufs",
|
||||
r"consequences\s+of\s+withdrawal",
|
||||
r"r(?:ue|ü)ckerstattung",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "refund_timeline",
|
||||
"label": "Rueckerstattung innerhalb von 14 Tagen",
|
||||
"level": 2, "parent": "consequences",
|
||||
"patterns": [
|
||||
r"(?:r(?:ue|ü)ckerstattung|r(?:ue|ü)ckzahlung|erstatten).*14\s+tage",
|
||||
r"14\s+tage.*(?:r(?:ue|ü)ckerstatt|r(?:ue|ü)ckzahl|erstatt)",
|
||||
r"(?:unverz(?:ue|ü)glich|sp(?:ae|ä)testens).*(?:r(?:ue|ü)ck|erstatt)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "return_costs",
|
||||
"label": "Ruecksendekosten-Regelung",
|
||||
"level": 2, "parent": "consequences",
|
||||
"patterns": [
|
||||
r"(?:r(?:ue|ü)cksende|versand|porto)kosten",
|
||||
r"kosten\s+(?:der|fuer|für)\s+r(?:ue|ü)cksendung",
|
||||
r"(?:tragen|uebernehmen|übernehmen)\s+(?:die\s+)?(?:kosten|r(?:ue|ü)cksende)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Empfaenger des Widerrufs ──────────────────────────────────
|
||||
{
|
||||
"id": "recipient",
|
||||
"label": "Empfaenger des Widerrufs (Name + Anschrift)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"widerruf.*(?:richten|senden|erkl(?:ae|ä)ren)\s+(?:an|gegen(?:ue|ü)ber)",
|
||||
r"(?:name|firma|anschrift).*widerruf",
|
||||
r"widerruf.*(?:per|via|an)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "recipient_full_address",
|
||||
"label": "Vollstaendige Adresse des Empfaengers",
|
||||
"level": 2, "parent": "recipient",
|
||||
"patterns": [
|
||||
r"widerruf.*\d{5}\s+[A-Z\u00c0-\u017e]",
|
||||
r"\d{5}\s+[A-Z\u00c0-\u017e]\w+.*widerruf",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Hinweis kein Grund erforderlich ───────────────────────────
|
||||
{
|
||||
"id": "no_reason",
|
||||
"label": "Hinweis: kein Grund erforderlich",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"ohne\s+(?:angabe|nennung).*(?:grund|gr(?:ue|ü)nde)",
|
||||
r"(?:kein|keine).*(?:begr(?:ue|ü)ndung|grund).*(?:erforderlich|n(?:oe|ö)tig)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
|
||||
# ── L1: Online-Kuendigungsbutton ──────────────────────────────────
|
||||
{
|
||||
"id": "digital_button",
|
||||
"label": "Online-Kuendigungsbutton (§312k BGB)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"k(?:ue|ü)ndigungsbutton", r"§\s*312k",
|
||||
r"online.*k(?:ue|ü)ndig",
|
||||
r"k(?:ue|ü)ndigung.*(?:button|link|formular|online)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
|
||||
# ── Neue L1: Ausnahme digitale Inhalte ────────────────────────────
|
||||
{
|
||||
"id": "digital_content_exception",
|
||||
"label": "Ausnahme fuer digitale Inhalte (§356 BGB)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
r"§\s*356",
|
||||
r"digital\w*\s+(?:inhalte?|g(?:ue|ü)ter|dienstleistung)",
|
||||
r"(?:erlischt|verl(?:ue|ü)st|kein\s+widerrufsrecht).*digital",
|
||||
r"(?:ausnahme|ausschluss).*widerruf.*digital",
|
||||
],
|
||||
"severity": "LOW",
|
||||
},
|
||||
]
|
||||
@@ -1,465 +1,18 @@
|
||||
"""
|
||||
DSI Document Checker — validates discovered legal documents against
|
||||
mandatory content requirements.
|
||||
DSI Document Checker — backward-compatible shim.
|
||||
|
||||
Checks each document type against its specific legal requirements:
|
||||
- Datenschutzinformation: Art. 13/14 DSGVO (9 Pflichtangaben)
|
||||
- AGB: §305ff BGB
|
||||
- Widerrufsbelehrung: §355, §312g BGB
|
||||
- Cookie-Richtlinie: §25 TDDDG
|
||||
- Impressum: §5 TMG / §18 MStV
|
||||
All logic moved to compliance.services.doc_checks package.
|
||||
This file re-exports the public API for existing consumers.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Art. 13 DSGVO mandatory fields for privacy policies
|
||||
ART13_CHECKLIST = [
|
||||
{
|
||||
"id": "controller",
|
||||
"label": "Verantwortlicher (Art. 13(1)(a))",
|
||||
"patterns": [
|
||||
r"verantwortlich\w*\s+(?:ist|im sinne|fuer|f(?:ue|ü)r)",
|
||||
r"kontaktdaten\s+des\s+verantwortlichen",
|
||||
r"name\s+(?:und|&)\s+kontaktdaten\s+des",
|
||||
r"controller", r"verantwortliche\s+stelle",
|
||||
r"responsible\s+(?:party|for)",
|
||||
r"ihk\s+\w+\s+bodensee", # IHK-specific: org name as controller
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "dpo",
|
||||
"label": "Datenschutzbeauftragter (Art. 13(1)(b))",
|
||||
"patterns": [
|
||||
r"datenschutzbeauftragt", r"data\s+protection\s+officer",
|
||||
r"kontaktdaten\s+de[rs]\s+(?:behördlichen\s+)?datenschutz",
|
||||
r"dsb", r"dpo",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "purposes",
|
||||
"label": "Zwecke der Verarbeitung (Art. 13(1)(c))",
|
||||
"patterns": [
|
||||
r"zweck\w*\s+(?:der|und|die)\s+(?:verarbeitung|datenerhebung|datenverarbeitung|rechtsgrundlage)",
|
||||
r"purpose\w*\s+(?:of|for)\s+(?:processing|data)",
|
||||
r"zu\s+welch\w+\s+zweck",
|
||||
r"welche\s+daten\s+werden.*verarbeitet",
|
||||
r"daten\s+werden\s+(?:zu|fuer|für)\s+(?:folgende|diese)",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "legal_basis",
|
||||
"label": "Rechtsgrundlage (Art. 13(1)(c))",
|
||||
"patterns": [
|
||||
r"rechtsgrundlage", r"art\.\s*6\s*(?:abs|absatz)?\s*\.?\s*1",
|
||||
r"legal\s+basis", r"berechtigtes\s+interesse",
|
||||
r"auf\s+grundlage\s+(?:von|des|der)\s+(?:art|§)",
|
||||
r"lit\.\s*[a-f][\s\)]",
|
||||
r"auf\s+(?:der\s+)?grundlage\s+(?:von\s+)?art",
|
||||
r"gem(?:ae|ä)(?:ss|ß)\s+art", # gemäß Art.
|
||||
r"(?:verarbeitung|erhebung).*(?:auf\s+grundlage|gem)",
|
||||
r"§\s*\d+\s+(?:abs|ihkg|bdsg|ldsg|bbig|tdddg)",
|
||||
r"einwilligung\s+gem",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "recipients",
|
||||
"label": "Empfaenger (Art. 13(1)(e))",
|
||||
"patterns": [
|
||||
r"empf(?:ae|ä)nger", r"(?:ueber|über|weiter)mitt(?:el|l)ung",
|
||||
r"recipient", r"weitergabe\s+(?:an|von)\s+daten",
|
||||
r"dritte", r"third\s+part",
|
||||
r"welche\s+daten\s+werden\s+(?:ueber|über)mittelt",
|
||||
r"auftragsverarbeit",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "third_country",
|
||||
"label": "Drittlandtransfer (Art. 13(1)(f))",
|
||||
"patterns": [
|
||||
r"drittland", r"dritt\s*staat", r"drittl(?:ae|ä)nder",
|
||||
r"third\s+countr", r"angemessenheitsbeschluss",
|
||||
r"standard\s*vertragsklausel", r"scc",
|
||||
r"(?:ueber|über)mittlung.*(?:ausserhalb|außerhalb)",
|
||||
r"(?:europ(?:ae|ä)ischen\s+wirtschaftsraum|ewr|eea)",
|
||||
r"privacy\s+shield", r"data\s+privacy\s+framework",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
{
|
||||
"id": "retention",
|
||||
"label": "Speicherdauer (Art. 13(2)(a))",
|
||||
"patterns": [
|
||||
r"speicherdauer", r"aufbewahrungsfrist",
|
||||
r"(?:wie\s+lange|dauer)\s+(?:der\s+)?(?:werden|gespeicher|speicherung)",
|
||||
r"retention\s+period", r"l(?:oe|ö)sch(?:ung|frist|konzept)",
|
||||
r"wie\s+lange\s+werden\s+die\s+daten\s+aufbewahrt",
|
||||
r"daten\s+werden\s+gel(?:oe|ö)scht",
|
||||
r"(?:\d+\s+(?:tage|monate|jahre)|nach\s+\d+\s+(?:tag|monat|jahr))",
|
||||
r"dauer\s+der\s+speicherung",
|
||||
r"aufbewahrung(?:sdauer|spflicht|szeit)",
|
||||
r"gesetzliche.*aufbewahrung",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "rights",
|
||||
"label": "Betroffenenrechte (Art. 13(2)(b))",
|
||||
"patterns": [
|
||||
r"recht\s+auf\s+auskunft", r"recht\s+auf\s+l(?:oe|ö)schung",
|
||||
r"recht\s+auf\s+berichtigung", r"widerspruchsrecht",
|
||||
r"art\.\s*1[5-9]", r"art\.\s*2[0-2]",
|
||||
r"right\s+to\s+(?:access|erasure|rectification|object)",
|
||||
r"betroffenenrecht", r"rechte\s+(?:des|der)\s+betroffenen",
|
||||
r"welche\s+rechte\s+ha(?:t|ben)\s+(?:der|die|sie)",
|
||||
r"ihnen\s+(?:stehen|steht)\s+(?:ein|folgende)\s+recht",
|
||||
],
|
||||
"severity": "HIGH",
|
||||
},
|
||||
{
|
||||
"id": "complaint",
|
||||
"label": "Beschwerderecht (Art. 13(2)(d))",
|
||||
"patterns": [
|
||||
r"beschwerderecht", r"aufsichtsbeh(?:oe|ö)rde",
|
||||
r"right\s+to\s+lodge\s+a\s+complaint",
|
||||
r"supervisory\s+authority", r"datenschutzbeh(?:oe|ö)rde",
|
||||
r"recht\s+auf\s+beschwerde", r"art\.\s*77",
|
||||
r"beschwerde.*(?:wenden|einlegen|erheben)",
|
||||
r"(?:zuständige|competent)\s+(?:behörde|beh(?:oe|ö)rde|authority)",
|
||||
],
|
||||
"severity": "MEDIUM",
|
||||
},
|
||||
]
|
||||
|
||||
# §355 BGB requirements for cancellation/withdrawal policies
|
||||
WIDERRUF_CHECKLIST = [
|
||||
{"id": "right_info", "label": "Belehrung ueber Widerrufsrecht",
|
||||
"patterns": [r"widerrufsrecht", r"right\s+of\s+withdrawal", r"recht\s+(?:zum|auf)\s+widerruf"]},
|
||||
{"id": "deadline", "label": "Widerrufsfrist (14 Tage)",
|
||||
"patterns": [r"14\s+tage", r"vierzehn\s+tage", r"14\s+days", r"fourteen\s+days"]},
|
||||
{"id": "form", "label": "Form des Widerrufs",
|
||||
"patterns": [r"widerrufsformular", r"muster.?widerruf", r"withdrawal\s+form", r"formular"]},
|
||||
{"id": "consequences", "label": "Folgen des Widerrufs",
|
||||
"patterns": [r"folgen\s+des\s+widerrufs", r"consequences\s+of\s+withdrawal", r"r(?:ue|ü)ckerstattung"]},
|
||||
{"id": "recipient", "label": "Empfaenger des Widerrufs (Name + Anschrift)",
|
||||
"patterns": [r"widerruf.*(?:richten|senden|erkl(?:ae|ä)ren)\s+(?:an|gegenueber|gegenüber)",
|
||||
r"(?:name|firma|anschrift).*widerruf", r"widerruf.*(?:per|via|an)"]},
|
||||
{"id": "no_reason", "label": "Hinweis: kein Grund erforderlich",
|
||||
"patterns": [r"ohne\s+(?:angabe|nennung).*(?:grund|gr(?:ue|ü)nde)",
|
||||
r"(?:kein|keine).*(?:begruendung|begründung|grund).*(?:erforderlich|noetig|nötig)"]},
|
||||
{"id": "digital_button", "label": "Online-Kuendigungsbutton (§312k BGB)",
|
||||
"patterns": [r"k(?:ue|ü)ndigungsbutton", r"§\s*312k", r"online.*k(?:ue|ü)ndig",
|
||||
r"k(?:ue|ü)ndigung.*(?:button|link|formular|online)"]},
|
||||
]
|
||||
|
||||
# AGB requirements (§305ff BGB)
|
||||
AGB_CHECKLIST = [
|
||||
{"id": "scope", "label": "Geltungsbereich",
|
||||
"patterns": [r"geltungsbereich", r"geltung", r"scope", r"diese\s+(?:agb|bedingungen)\s+gelten"]},
|
||||
{"id": "contract", "label": "Vertragsschluss",
|
||||
"patterns": [r"vertragsschluss", r"zustandekommen", r"contract\s+formation", r"angebot\s+und\s+annahme"]},
|
||||
{"id": "liability", "label": "Haftung / Haftungsbeschraenkung",
|
||||
"patterns": [r"haftung", r"liability", r"schadensersatz", r"haftungsbeschr(?:ae|ä)nkung"]},
|
||||
{"id": "jurisdiction", "label": "Gerichtsstand / Anwendbares Recht",
|
||||
"patterns": [r"gerichtsstand", r"anwendbares\s+recht", r"jurisdiction", r"governing\s+law"]},
|
||||
{"id": "payment", "label": "Zahlungsbedingungen",
|
||||
"patterns": [r"zahlungsbedingung", r"payment\s+terms", r"(?:preis|kosten|entgelt|vergütung)",
|
||||
r"zahlungsweise", r"rechnungsstellung"]},
|
||||
{"id": "delivery", "label": "Lieferung / Leistungserbringung",
|
||||
"patterns": [r"lieferung", r"leistungserbringung", r"delivery", r"lieferfrist",
|
||||
r"bereitstellung", r"(?:zugang|zugriff).*(?:dienst|leistung)"]},
|
||||
{"id": "warranty", "label": "Gewaehrleistung / Maengelrechte",
|
||||
"patterns": [r"gew(?:ae|ä)hrleistung", r"m(?:ae|ä)ngelrecht", r"warranty", r"sachm(?:ae|ä)ngel",
|
||||
r"gew(?:ae|ä)hrleistungsfrist"]},
|
||||
{"id": "termination", "label": "Kuendigung / Vertragsbeendigung",
|
||||
"patterns": [r"k(?:ue|ü)ndigung", r"vertragsbeendigung", r"termination",
|
||||
r"laufzeit.*(?:vertrag|abo)", r"k(?:ue|ü)ndigungsfrist"]},
|
||||
{"id": "data_protection", "label": "Datenschutzhinweis in AGB",
|
||||
"patterns": [r"datenschutz.*(?:agb|bedingung)", r"(?:agb|bedingung).*datenschutz",
|
||||
r"personenbezogen.*daten.*(?:agb|vertrag)", r"dsgvo.*(?:agb|vertrag)"]},
|
||||
]
|
||||
|
||||
# §5 TMG / §18 MStV Impressum requirements
|
||||
IMPRESSUM_CHECKLIST = [
|
||||
{"id": "name", "label": "Name des Anbieters",
|
||||
"patterns": [r"(?:gmbh|ag|e\.v\.|ohg|kg|gbr|ug|mbh|inc|ltd)", r"firma", r"unternehmen"]},
|
||||
{"id": "address", "label": "Anschrift",
|
||||
"patterns": [r"(?:str(?:asse|\.)|weg|platz|allee)\s*\d", r"d-\d{5}", r"\d{5}\s+\w+"]},
|
||||
{"id": "contact", "label": "Kontaktdaten (E-Mail + Telefon)",
|
||||
"patterns": [r"(?:e-?mail|mail).*@", r"telefon|phone|tel\.", r"\+?\d[\d\s/\-]{8,}"]},
|
||||
{"id": "register", "label": "Handelsregister / Registernummer",
|
||||
"patterns": [r"(?:handelsregister|hrb|hra|registergericht|amtsgericht)", r"register.*(?:nr|nummer)"]},
|
||||
{"id": "vat", "label": "USt-IdNr.",
|
||||
"patterns": [r"ust.*id", r"umsatzsteuer.*identifikation", r"vat.*id", r"de\s*\d{9}"]},
|
||||
{"id": "representative", "label": "Vertretungsberechtigte",
|
||||
"patterns": [r"vertretungsberechtigt", r"geschäftsführ", r"vorstand", r"inhaber"]},
|
||||
]
|
||||
|
||||
# §25 TDDDG Cookie policy requirements
|
||||
COOKIE_CHECKLIST = [
|
||||
{"id": "cookie_types", "label": "Arten der Cookies",
|
||||
"patterns": [r"(?:notwendig|essentiell|funktional|statistik|marketing|tracking)", r"cookie.*(?:art|typ|kategori)"]},
|
||||
{"id": "purposes", "label": "Zwecke der Cookies",
|
||||
"patterns": [r"zweck.*cookie", r"cookie.*zweck", r"(?:wofuer|wozu|warum).*cookie",
|
||||
r"cookies?\s+(?:ein|ver)?\s*,?\s*um\s+", r"(?:setzen|verwenden|nutzen)\s+.*cookies?\s+.*(?:um|fuer|für)",
|
||||
r"(?:analyse|marketing|tracking|funktional)\w*\s*cookies?\s*\.?\s*(?:um|damit|diese|sie)",
|
||||
r"cookies?\s+(?:dienen|helfen|ermöglichen|ermoeglichen)"]},
|
||||
{"id": "retention", "label": "Speicherdauer der Cookies",
|
||||
"patterns": [r"(?:speicherdauer|laufzeit|gueltigk|ablauf).*cookie", r"cookie.*(?:\d+\s+(?:tag|monat|jahr)|session)"]},
|
||||
{"id": "third_party", "label": "Drittanbieter-Cookies",
|
||||
"patterns": [r"drittanbieter", r"third.?party", r"(?:google|facebook|meta|microsoft).*cookie"]},
|
||||
{"id": "opt_out", "label": "Widerspruchsmoeglichkeit",
|
||||
"patterns": [r"(?:widerspruch|opt.?out|ablehnen|deaktivieren).*cookie", r"cookie.*(?:ablehnen|deaktivieren|loeschen)"]},
|
||||
]
|
||||
|
||||
# Art. 26 DSGVO Joint Controller (Social Media DSE)
|
||||
JOINT_CONTROLLER_CHECKLIST = [
|
||||
{"id": "joint_parties", "label": "Gemeinsam Verantwortliche benannt (Art. 26(1))",
|
||||
"patterns": [r"gemeinsam.*verantwortlich", r"joint.*controller", r"gemeinsame\s+verantwortlichkeit",
|
||||
r"art\.\s*26", r"mitverantwortlich",
|
||||
r"wir.*(?:und|gemeinsam).*(?:betreiber|facebook|meta|google)",
|
||||
r"(?:betreiber|netzwerk).*verantwortlich"]},
|
||||
{"id": "arrangement", "label": "Vereinbarung nach Art. 26 DSGVO",
|
||||
"patterns": [r"vereinbarung.*art\.\s*26", r"art\.\s*26.*vereinbarung",
|
||||
r"page\s*controller", r"fanpage", r"insights",
|
||||
r"gemeinsame.*verantwortung.*(?:vertrag|vereinbarung)",
|
||||
r"addendum|nachtrag|seiten.*insights"]},
|
||||
{"id": "contact_point", "label": "Anlaufstelle fuer Betroffene (Art. 26(1) S.3)",
|
||||
"patterns": [r"anlaufstelle", r"kontaktstelle", r"ansprechpartner.*betroffene",
|
||||
r"rechte.*(?:gegenueber|gegenüber)\s+(?:uns|beiden)",
|
||||
r"rechte.*(?:sowohl|grundsaetzlich|grundsätzlich).*(?:uns|als auch)",
|
||||
r"rechte.*geltend\s+machen", r"wenden\s+sie\s+sich"]},
|
||||
{"id": "processing_split", "label": "Verarbeitungsaufteilung (wer macht was)",
|
||||
"patterns": [r"(?:wir|betreiber).*(?:verarbeiten|erheben|nutzen).*(?:daten|informationen)",
|
||||
r"(?:facebook|meta|google|youtube|instagram|linkedin|twitter|x\.com).*(?:verarbeit|erhebt|nutzt|speichert)",
|
||||
r"bei\s+besuch\s+(?:unserer|der)\s+(?:seite|fanpage|profil)",
|
||||
r"(?:senden|ver(?:oe|ö)ffentlich|teilen).*(?:inhalte|beitr(?:ae|ä)ge)",
|
||||
r"(?:nutzungsstatistik|statistik|insight).*(?:betreiber|netzwerk)"]},
|
||||
{"id": "social_data_types", "label": "Kategorien verarbeiteter Daten",
|
||||
"patterns": [r"(?:nutzungsstatistik|insight|reichweite|interaktion|klick|aufruf)",
|
||||
r"(?:ip.?adresse|standort|browser|ger(?:ae|ä)t|alter|geschlecht)",
|
||||
r"(?:personenbezogen|daten).*(?:social|netzwerk|plattform)",
|
||||
r"(?:nutzername|beitr(?:ae|ä)g|profil|like|kommentar)",
|
||||
r"(?:sensitive|besondere).*(?:daten|kategori)"]},
|
||||
{"id": "platforms", "label": "Auflistung der genutzten Plattformen",
|
||||
"patterns": [r"(?:facebook|instagram|youtube|twitter|x\.com|linkedin|xing|tiktok)",
|
||||
r"(?:kan(?:ae|ä)le|plattform|netzwerk|profil|account|auftritte).*(?:social|medien)",
|
||||
r"social\s*media.*(?:angebot|pr(?:ae|ä)senz|auftritte)"]},
|
||||
{"id": "third_country", "label": "Drittlandtransfer (USA bei Social Media)",
|
||||
"patterns": [r"(?:usa|vereinigte\s+staaten|drittland|drittstaaten)",
|
||||
r"privacy\s+shield|data\s+privacy\s+framework|angemessenheitsbeschluss",
|
||||
r"standardvertragsklausel|standard.*contractual",
|
||||
r"(?:uebermittlung|übermittlung).*(?:usa|drittland|ausserhalb|außerhalb)"]},
|
||||
{"id": "legal_basis", "label": "Rechtsgrundlage (Art. 6 DSGVO)",
|
||||
"patterns": [r"rechtsgrundlage", r"art\.\s*6", r"berechtigtes\s+interesse",
|
||||
r"einwilligung.*art\.\s*6", r"lit\.\s*[a-f]"]},
|
||||
{"id": "rights", "label": "Betroffenenrechte (Art. 15-21)",
|
||||
"patterns": [r"recht\s+auf\s+auskunft", r"recht\s+auf\s+l(?:oe|ö)schung",
|
||||
r"art\.\s*1[5-9]", r"betroffenenrecht",
|
||||
r"ihre\s+rechte", r"rechte.*betroffene", r"widerspruchsrecht"]},
|
||||
{"id": "social_bookmarks", "label": "Hinweis auf Social Bookmarks vs. Plugins",
|
||||
"patterns": [r"social\s*(?:bookmark|plugin|button|widget)",
|
||||
r"(?:kein|keine).*(?:plugin|widget|button).*(?:gesetzt|eingebunden|geladen)",
|
||||
r"(?:link|verweis|weiterleitung).*(?:dienst|anbieter|netzwerk)"]},
|
||||
]
|
||||
|
||||
# DSFA checklist (Art. 35 DSGVO)
|
||||
DSFA_CHECKLIST = [
|
||||
{"id": "trigger", "label": "Schwellwertanalyse / Ausloesepruefung (Art. 35(1))",
|
||||
"patterns": [r"art\.\s*35\s*(?:abs|absatz)?\s*\.?\s*1", r"hohes\s+risiko",
|
||||
r"voraussichtlich.*risiko", r"schwellwert",
|
||||
r"folgen.*(?:verarbeitung|schutz).*personenbezogen"]},
|
||||
{"id": "description", "label": "Beschreibung der Verarbeitungsvorgaenge (Art. 35(7)(a))",
|
||||
"patterns": [r"beschreibung.*verarbeitung", r"verarbeitungsvorg(?:ae|ä)ng",
|
||||
r"systematische\s+beschreibung", r"gegenstand.*verarbeitung",
|
||||
r"social\s*media.*(?:angebot|nutzung|besteht\s+aus)",
|
||||
r"(?:kan(?:ae|ä)le|plattform).*(?:facebook|twitter|instagram|youtube|linkedin|xing)"]},
|
||||
{"id": "necessity", "label": "Notwendigkeit und Verhaeltnismaessigkeit (Art. 35(7)(b))",
|
||||
"patterns": [r"notwendigkeit", r"verh(?:ae|ä)ltnism(?:ae|ä)ssigkeit",
|
||||
r"erforderlichkeit", r"zweckbindung",
|
||||
r"geringen?\s+umfang", r"nur\s+(?:die|sehr).*daten.*(?:verarbeitet|erhoben)",
|
||||
r"freiwillig\s+angegeben"]},
|
||||
{"id": "risks", "label": "Risikobewertung fuer Betroffene (Art. 35(7)(c))",
|
||||
"patterns": [r"risiko.*(?:bewertung|analyse|einsch(?:ae|ä)tzung|abw(?:ae|ä)gung)",
|
||||
r"risiken.*(?:rechte|freiheit)", r"eintrittswahrscheinlichkeit",
|
||||
r"schwere.*(?:risiko|auswirkung)",
|
||||
r"hohes\s+risiko.*(?:rechte|freiheit)",
|
||||
r"systematische\s+beobachtung",
|
||||
r"(?:sensitiv|politisch|sexuell|gesundheit).*(?:daten|offenbar)"]},
|
||||
{"id": "measures", "label": "Abhilfemassnahmen (Art. 35(7)(d))",
|
||||
"patterns": [r"abhilfe", r"(?:ma(?:ss|ß)nahm).*(?:risiko|schutz|minderung)",
|
||||
r"schutzma(?:ss|ß)nahm", r"(?:technisch|organisatorisch).*ma(?:ss|ß)nahm",
|
||||
r"tom", r"risiko.*(?:minim|reduz|begrenzen)",
|
||||
r"(?:einschr(?:ae|ä)nk|begrenz).*(?:verarbeitung|zugriff)"]},
|
||||
{"id": "lfdi", "label": "Beruecksichtigung Landesbehoerden-Richtlinie",
|
||||
"patterns": [r"l(?:an)?fdi", r"landesbeauftragt.*datenschutz",
|
||||
r"landes.?datenschutz", r"richtlinie.*(?:land|lfdi|landes)",
|
||||
r"(?:aufsichtsbeh(?:oe|ö)rde|beh(?:oe|ö)rde).*(?:richtlinie|empfehlung|vorgabe)"]},
|
||||
{"id": "stakeholders", "label": "Einbeziehung des DSB (Art. 35(2))",
|
||||
"patterns": [r"datenschutzbeauftragt.*(?:einbez|konsult|beteilig|rat)",
|
||||
r"dsb.*(?:konsult|einbez|rat)", r"stellungnahme.*dsb",
|
||||
r"(?:rat|empfehlung).*datenschutzbeauftragt"]},
|
||||
{"id": "documentation", "label": "Dokumentation der Ergebnisse",
|
||||
"patterns": [r"(?:dokument|ergebnis|bericht).*(?:dsfa|folgenabsch(?:ae|ä)tzung)",
|
||||
r"(?:ergebnis|schlussfolgerung|bewertung).*(?:risiko|verarbeitung)",
|
||||
r"vorliegend.*(?:dsfa|analyse|bewertung|absch(?:ae|ä)tzung)"]},
|
||||
]
|
||||
|
||||
|
||||
def check_document_completeness(
|
||||
text: str,
|
||||
doc_type: str,
|
||||
doc_title: str,
|
||||
doc_url: str,
|
||||
) -> list[dict]:
|
||||
"""Check a legal document against its type-specific requirements.
|
||||
|
||||
Returns a list of findings (missing/present fields).
|
||||
"""
|
||||
findings = []
|
||||
text_lower = text.lower()
|
||||
|
||||
if not text or len(text) < 50:
|
||||
findings.append({
|
||||
"code": f"DSI-EMPTY-{doc_type.upper()}",
|
||||
"severity": "HIGH",
|
||||
"text": f"Dokument '{doc_title}' ist leer oder zu kurz fuer eine Pruefung.",
|
||||
"doc_title": doc_title,
|
||||
"doc_url": doc_url,
|
||||
"doc_type": doc_type,
|
||||
})
|
||||
return findings
|
||||
|
||||
# Short documents (< 200 words) are likely navigation snippets or
|
||||
# introductory pages, not full Art. 13 documents — flag but don't check
|
||||
word_count = len(text.split())
|
||||
if word_count < 200 and doc_type == "dse":
|
||||
findings.append({
|
||||
"code": f"DSI-SCORE-{doc_type.upper()}",
|
||||
"severity": "LOW",
|
||||
"text": (
|
||||
f"'{doc_title}': Kurzhinweis ({word_count} Woerter) — zu kurz fuer "
|
||||
f"eine vollstaendige Art. 13 DSGVO Pruefung. Kein eigenstaendiges DSI-Dokument."
|
||||
),
|
||||
"doc_title": doc_title,
|
||||
"doc_url": doc_url,
|
||||
"doc_type": doc_type,
|
||||
"all_checks": [], # No checks run for short documents
|
||||
})
|
||||
return findings
|
||||
|
||||
# Select checklist based on document type
|
||||
if doc_type in ("dse", "datenschutz", "privacy"):
|
||||
checklist = ART13_CHECKLIST
|
||||
label = "Art. 13 DSGVO"
|
||||
elif doc_type in ("widerruf", "withdrawal", "cancellation"):
|
||||
checklist = WIDERRUF_CHECKLIST
|
||||
label = "§355 BGB"
|
||||
elif doc_type in ("agb", "terms", "nutzungsbedingungen"):
|
||||
checklist = AGB_CHECKLIST
|
||||
label = "§305ff BGB"
|
||||
elif doc_type in ("impressum", "imprint"):
|
||||
checklist = IMPRESSUM_CHECKLIST
|
||||
label = "§5 TMG / §18 MStV"
|
||||
elif doc_type in ("cookie",):
|
||||
checklist = COOKIE_CHECKLIST
|
||||
label = "§25 TDDDG"
|
||||
elif doc_type in ("social_media", "joint_controller"):
|
||||
checklist = JOINT_CONTROLLER_CHECKLIST
|
||||
label = "Art. 26 DSGVO"
|
||||
elif doc_type in ("dsfa",):
|
||||
checklist = DSFA_CHECKLIST
|
||||
label = "Art. 35 DSGVO"
|
||||
else:
|
||||
checklist = ART13_CHECKLIST # Default: check as DSE
|
||||
label = "Art. 13 DSGVO"
|
||||
|
||||
present = 0
|
||||
total = len(checklist)
|
||||
all_checks: list[dict] = []
|
||||
|
||||
for check in checklist:
|
||||
match = None
|
||||
for p in check["patterns"]:
|
||||
m = re.search(p, text_lower)
|
||||
if m:
|
||||
match = m
|
||||
break
|
||||
|
||||
passed = match is not None
|
||||
matched_text = ""
|
||||
if match:
|
||||
start = max(0, match.start() - 30)
|
||||
end = min(len(text_lower), match.end() + 30)
|
||||
matched_text = text_lower[start:end].strip()
|
||||
present += 1
|
||||
else:
|
||||
findings.append({
|
||||
"code": f"DSI-MISSING-{check['id'].upper()}",
|
||||
"severity": check.get("severity", "MEDIUM"),
|
||||
"text": (
|
||||
f"'{doc_title}': Pflichtangabe '{check['label']}' nicht gefunden. "
|
||||
f"Erforderlich nach {label}."
|
||||
),
|
||||
"doc_title": doc_title,
|
||||
"doc_url": doc_url,
|
||||
"doc_type": doc_type,
|
||||
"check_id": check["id"],
|
||||
})
|
||||
|
||||
all_checks.append({
|
||||
"id": check["id"],
|
||||
"label": check["label"],
|
||||
"passed": passed,
|
||||
"severity": check.get("severity", "MEDIUM"),
|
||||
"matched_text": matched_text,
|
||||
})
|
||||
|
||||
# Always add summary finding (even at 100% — needed for completeness tracking)
|
||||
if total > 0:
|
||||
pct = round(present / total * 100)
|
||||
findings.insert(0, {
|
||||
"code": f"DSI-SCORE-{doc_type.upper()}",
|
||||
"severity": "OK" if pct == 100 else "LOW" if pct >= 80 else "MEDIUM" if pct >= 50 else "HIGH",
|
||||
"text": (
|
||||
f"'{doc_title}': {present}/{total} Pflichtangaben vorhanden ({pct}%)."
|
||||
+ (f" Fehlend: {total - present} Angaben nach {label}." if pct < 100 else "")
|
||||
),
|
||||
"doc_title": doc_title,
|
||||
"doc_url": doc_url,
|
||||
"doc_type": doc_type,
|
||||
"all_checks": all_checks,
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def classify_document_type(title: str, url: str) -> str:
|
||||
"""Classify a document by its title/URL into a legal document type."""
|
||||
combined = f"{title} {url}".lower()
|
||||
|
||||
if any(kw in combined for kw in ["datenschutzfolge", "dsfa", "risikoanalyse für nutzung"]):
|
||||
return "dsfa"
|
||||
if any(kw in combined for kw in ["social media", "facebook", "instagram", "linkedin", "fanpage"]):
|
||||
if any(kw in combined for kw in ["datenschutzerkl", "datenschutz für", "datenschutzinformation"]):
|
||||
return "social_media"
|
||||
if any(kw in combined for kw in ["datenschutz", "privacy", "dsgvo", "data protection", "données"]):
|
||||
return "dse"
|
||||
if any(kw in combined for kw in ["widerruf", "withdrawal", "rétractation", "desistimiento"]):
|
||||
return "widerruf"
|
||||
if any(kw in combined for kw in ["agb", "allgemeine geschäftsbedingungen", "terms",
|
||||
"nutzungsbedingungen", "conditions"]):
|
||||
return "agb"
|
||||
if any(kw in combined for kw in ["cookie", "slapuk", "evästeet", "kakor"]):
|
||||
return "cookie"
|
||||
if any(kw in combined for kw in ["impressum", "imprint", "legal notice", "mentions légales"]):
|
||||
return "impressum"
|
||||
return "other"
|
||||
from compliance.services.doc_checks import ( # noqa: F401
|
||||
check_document_completeness,
|
||||
classify_document_type,
|
||||
ART13_CHECKLIST,
|
||||
WIDERRUF_CHECKLIST,
|
||||
AGB_CHECKLIST,
|
||||
IMPRESSUM_CHECKLIST,
|
||||
COOKIE_CHECKLIST,
|
||||
JOINT_CONTROLLER_CHECKLIST,
|
||||
DSFA_CHECKLIST,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user