test: Lit-Mapping validation — Dict vs Control Library comparison
8 test cases with deliberately wrong legal basis assignments: - Cookie tracking on lit. f (should be lit. a) - Analytics on lit. b (should be lit. a) - Newsletter on lit. f (should be lit. a) - Klarna without Art. 22 - Session recording on lit. f - 2 correct cases (should NOT trigger findings) Runs both hardcoded dict AND Control Library query, compares results. If Control Library passes all → dict can be removed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Lit-Mapping Validation Test — verifies that BOTH the hardcoded dict AND
|
||||
the Control Library detect the same legal basis errors.
|
||||
|
||||
If both produce the same results, we can safely delete the dict.
|
||||
|
||||
Test cases use deliberately WRONG legal basis assignments that are
|
||||
common mistakes on real websites.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent to path for imports
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# TEST CASES — Deliberately wrong DSE text blocks
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
TEST_CASES = [
|
||||
{
|
||||
"id": "cookie_tracking_wrong_litf",
|
||||
"description": "Cookie-Tracking auf lit. f statt lit. a",
|
||||
"dse_text": (
|
||||
"Wir setzen Cookies und aehnliche Tracking-Technologien ein, "
|
||||
"um die Nutzung unserer Website zu analysieren. Die Verarbeitung "
|
||||
"erfolgt auf Grundlage unseres berechtigten Interesses gemaess "
|
||||
"Art. 6 Abs. 1 lit. f DSGVO an der Optimierung unseres Angebots."
|
||||
),
|
||||
"expected_finding": True,
|
||||
"expected_purpose": "cookie_tracking",
|
||||
"correct_basis": "lit. a (Einwilligung)",
|
||||
"wrong_basis": "lit. f",
|
||||
},
|
||||
{
|
||||
"id": "analytics_wrong_litb",
|
||||
"description": "Google Analytics auf lit. b (Vertragserfuellung) statt lit. a",
|
||||
"dse_text": (
|
||||
"Wir nutzen Google Analytics zur Webanalyse. Die Datenverarbeitung "
|
||||
"erfolgt auf Basis der Vertragserfuellung gemaess Art. 6 Abs. 1 lit. b DSGVO, "
|
||||
"da die Analyse fuer die Erbringung unserer Dienste erforderlich ist."
|
||||
),
|
||||
"expected_finding": True,
|
||||
"expected_purpose": "web_analytics",
|
||||
"correct_basis": "lit. a (Einwilligung)",
|
||||
"wrong_basis": "lit. b",
|
||||
},
|
||||
{
|
||||
"id": "newsletter_wrong_litf",
|
||||
"description": "Newsletter auf lit. f statt lit. a",
|
||||
"dse_text": (
|
||||
"Wir versenden regelmaessig Newsletter mit Informationen zu unseren Produkten. "
|
||||
"Die Verarbeitung Ihrer E-Mail-Adresse erfolgt auf Grundlage unseres "
|
||||
"berechtigten Interesses gemaess Art. 6 Abs. 1 lit. f DSGVO an der "
|
||||
"Direktwerbung fuer eigene aehnliche Produkte."
|
||||
),
|
||||
"expected_finding": True,
|
||||
"expected_purpose": "marketing_email",
|
||||
"correct_basis": "lit. a (Einwilligung)",
|
||||
"wrong_basis": "lit. f",
|
||||
},
|
||||
{
|
||||
"id": "remarketing_wrong_litf",
|
||||
"description": "Remarketing/Retargeting auf lit. f statt lit. a",
|
||||
"dse_text": (
|
||||
"Wir setzen Remarketing-Technologien ein, um Ihnen auf anderen Websites "
|
||||
"personalisierte Werbung anzuzeigen. Die Verarbeitung basiert auf unserem "
|
||||
"berechtigten Interesse an effektiver Werbung (Art. 6 Abs. 1 lit. f DSGVO)."
|
||||
),
|
||||
"expected_finding": True,
|
||||
"expected_purpose": "remarketing",
|
||||
"correct_basis": "lit. a (Einwilligung)",
|
||||
"wrong_basis": "lit. f",
|
||||
},
|
||||
{
|
||||
"id": "klarna_missing_art22",
|
||||
"description": "Klarna Bonitaetspruefung ohne Art. 22 Hinweis",
|
||||
"dse_text": (
|
||||
"Bei Auswahl der Zahlungsart Rechnung ueber Klarna wird eine "
|
||||
"Bonitaetspruefung durchgefuehrt. Klarna AB, Stockholm, Schweden, "
|
||||
"uebermittelt Ihre Daten an Auskunfteien. Rechtsgrundlage ist "
|
||||
"Art. 6 Abs. 1 lit. b DSGVO (Vertragserfuellung)."
|
||||
),
|
||||
"expected_finding": True,
|
||||
"expected_purpose": "credit_check",
|
||||
"correct_basis": "lit. b/f + Art. 22 DSGVO Hinweis",
|
||||
"wrong_basis": "(fehlt)",
|
||||
},
|
||||
{
|
||||
"id": "session_recording_wrong_litf",
|
||||
"description": "Session Recording (Hotjar) auf lit. f statt lit. a",
|
||||
"dse_text": (
|
||||
"Wir nutzen Hotjar zur Analyse des Nutzerverhaltens mittels Session Recording "
|
||||
"und Heatmaps. Die Aufzeichnung der Nutzersitzungen erfolgt auf Grundlage "
|
||||
"unseres berechtigten Interesses (Art. 6 Abs. 1 lit. f DSGVO)."
|
||||
),
|
||||
"expected_finding": True,
|
||||
"expected_purpose": "session_recording",
|
||||
"correct_basis": "lit. a (Einwilligung)",
|
||||
"wrong_basis": "lit. f",
|
||||
},
|
||||
{
|
||||
"id": "payment_correct_litb",
|
||||
"description": "Zahlung korrekt auf lit. b — sollte KEIN Finding sein",
|
||||
"dse_text": (
|
||||
"Die Verarbeitung Ihrer Zahlungsdaten durch unseren Zahlungsdienstleister "
|
||||
"Stripe erfolgt auf Grundlage der Vertragserfuellung gemaess "
|
||||
"Art. 6 Abs. 1 lit. b DSGVO."
|
||||
),
|
||||
"expected_finding": False,
|
||||
"expected_purpose": None,
|
||||
"correct_basis": "lit. b (Vertragserfuellung)",
|
||||
"wrong_basis": None,
|
||||
},
|
||||
{
|
||||
"id": "analytics_correct_lita",
|
||||
"description": "Analytics korrekt auf lit. a — sollte KEIN Finding sein",
|
||||
"dse_text": (
|
||||
"Wir setzen Google Analytics nur mit Ihrer ausdruecklichen Einwilligung "
|
||||
"gemaess Art. 6 Abs. 1 lit. a DSGVO ein. Sie koennen Ihre Einwilligung "
|
||||
"jederzeit widerrufen."
|
||||
),
|
||||
"expected_finding": False,
|
||||
"expected_purpose": None,
|
||||
"correct_basis": "lit. a (Einwilligung)",
|
||||
"wrong_basis": None,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_hardcoded_dict():
|
||||
"""Test the hardcoded CORRECT_BASIS dict against test cases."""
|
||||
from compliance.services.legal_basis_validator import validate_legal_bases
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("TEST 1: Hartkodiertes Dict (legal_basis_validator.py)")
|
||||
print("=" * 70)
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
for tc in TEST_CASES:
|
||||
findings = validate_legal_bases(tc["dse_text"])
|
||||
has_finding = len(findings) > 0
|
||||
|
||||
if has_finding == tc["expected_finding"]:
|
||||
status = "PASS"
|
||||
passed += 1
|
||||
else:
|
||||
status = "FAIL"
|
||||
failed += 1
|
||||
|
||||
print(f" [{status}] {tc['id']}: {tc['description']}")
|
||||
if has_finding:
|
||||
for f in findings:
|
||||
print(f" → {f.text[:80]}")
|
||||
elif tc["expected_finding"]:
|
||||
print(f" → ERWARTET: Finding fuer {tc['expected_purpose']}, aber KEINS gefunden")
|
||||
|
||||
print(f"\n Ergebnis: {passed} bestanden, {failed} fehlgeschlagen\n")
|
||||
return passed, failed
|
||||
|
||||
|
||||
def test_control_library():
|
||||
"""Test the Control Library against the same test cases.
|
||||
|
||||
Queries canonical_controls for lit-mapping controls and checks
|
||||
if they would detect the same errors.
|
||||
"""
|
||||
try:
|
||||
import asyncpg
|
||||
except ImportError:
|
||||
print("\n SKIP: asyncpg nicht installiert — Control Library Test uebersprungen")
|
||||
return 0, 0
|
||||
|
||||
db_url = os.environ.get(
|
||||
"COMPLIANCE_DATABASE_URL",
|
||||
os.environ.get("DATABASE_URL", ""),
|
||||
)
|
||||
if not db_url:
|
||||
print("\n SKIP: Keine DATABASE_URL — Control Library Test uebersprungen")
|
||||
return 0, 0
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("TEST 2: Control Library (canonical_controls)")
|
||||
print("=" * 70)
|
||||
|
||||
async def _run():
|
||||
pool = await asyncpg.create_pool(db_url, min_size=1, max_size=2)
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
# Fetch lit-mapping relevant controls
|
||||
controls = await conn.fetch("""
|
||||
SELECT control_id, title, objective, requirements
|
||||
FROM compliance.canonical_controls
|
||||
WHERE (
|
||||
title ILIKE '%einwilligung%tracking%'
|
||||
OR title ILIKE '%rechtsgrundlage%cookie%'
|
||||
OR title ILIKE '%consent%cookie%'
|
||||
OR title ILIKE '%einwilligung%cookie%'
|
||||
OR title ILIKE '%art. 22%'
|
||||
OR title ILIKE '%automatisierte%entscheidung%'
|
||||
OR requirements ILIKE '%lit. a%tracking%'
|
||||
OR requirements ILIKE '%einwilligung%analytics%'
|
||||
)
|
||||
AND release_state = 'published'
|
||||
LIMIT 50
|
||||
""")
|
||||
|
||||
print(f" Gefundene Lit-Mapping Controls: {len(controls)}")
|
||||
for c in controls[:10]:
|
||||
print(f" [{c['control_id']}] {c['title'][:60]}")
|
||||
|
||||
if not controls:
|
||||
print(" WARNUNG: Keine Lit-Mapping Controls in der DB!")
|
||||
return 0, 0
|
||||
|
||||
# For each test case, check if a control would catch it
|
||||
for tc in TEST_CASES:
|
||||
text_lower = tc["dse_text"].lower()
|
||||
matched_control = None
|
||||
|
||||
for c in controls:
|
||||
title_lower = (c["title"] or "").lower()
|
||||
req_lower = (c["requirements"] or "").lower()
|
||||
obj_lower = (c["objective"] or "").lower()
|
||||
|
||||
# Check if this control is relevant for this test case
|
||||
relevant = False
|
||||
if tc["expected_purpose"] == "cookie_tracking":
|
||||
relevant = "cookie" in title_lower or "tracking" in title_lower
|
||||
elif tc["expected_purpose"] == "web_analytics":
|
||||
relevant = "analytics" in title_lower or "tracking" in title_lower
|
||||
elif tc["expected_purpose"] == "marketing_email":
|
||||
relevant = "newsletter" in title_lower or "marketing" in title_lower
|
||||
elif tc["expected_purpose"] == "remarketing":
|
||||
relevant = "remarketing" in title_lower or "retargeting" in title_lower
|
||||
elif tc["expected_purpose"] == "credit_check":
|
||||
relevant = "art. 22" in title_lower or "bonitaet" in title_lower
|
||||
elif tc["expected_purpose"] == "session_recording":
|
||||
relevant = "recording" in title_lower or "heatmap" in title_lower
|
||||
|
||||
if relevant:
|
||||
# Check if the control requires consent (lit. a)
|
||||
requires_consent = (
|
||||
"einwilligung" in req_lower
|
||||
or "consent" in req_lower
|
||||
or "lit. a" in req_lower
|
||||
)
|
||||
if requires_consent and tc["expected_finding"]:
|
||||
matched_control = c
|
||||
break
|
||||
|
||||
has_match = matched_control is not None
|
||||
# For negative test cases (no finding expected), no match = correct
|
||||
if not tc["expected_finding"]:
|
||||
correct = not has_match
|
||||
else:
|
||||
correct = has_match
|
||||
|
||||
if correct:
|
||||
status = "PASS"
|
||||
passed += 1
|
||||
else:
|
||||
status = "FAIL"
|
||||
failed += 1
|
||||
|
||||
print(f" [{status}] {tc['id']}: {tc['description']}")
|
||||
if matched_control:
|
||||
print(f" → Control: [{matched_control['control_id']}] {matched_control['title'][:60]}")
|
||||
elif tc["expected_finding"]:
|
||||
print(f" → KEIN passender Control gefunden!")
|
||||
|
||||
finally:
|
||||
await pool.close()
|
||||
|
||||
print(f"\n Ergebnis: {passed} bestanden, {failed} fehlgeschlagen\n")
|
||||
return passed, failed
|
||||
|
||||
return asyncio.run(_run())
|
||||
|
||||
|
||||
def test_comparison():
|
||||
"""Compare results: Dict vs. Control Library."""
|
||||
print("\n" + "=" * 70)
|
||||
print("VERGLEICH: Dict vs. Control Library")
|
||||
print("=" * 70)
|
||||
|
||||
dict_passed, dict_failed = test_hardcoded_dict()
|
||||
ctrl_passed, ctrl_failed = test_control_library()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("ZUSAMMENFASSUNG")
|
||||
print("=" * 70)
|
||||
print(f" Dict: {dict_passed}/{dict_passed + dict_failed} bestanden")
|
||||
print(f" Control Library: {ctrl_passed}/{ctrl_passed + ctrl_failed} bestanden")
|
||||
|
||||
if ctrl_passed >= dict_passed and ctrl_failed == 0:
|
||||
print("\n ✓ Control Library deckt alle Faelle ab → Dict kann entfernt werden")
|
||||
elif ctrl_passed > 0:
|
||||
print("\n ⚠ Control Library deckt teilweise ab → Dict als Fallback behalten")
|
||||
else:
|
||||
print("\n ✗ Control Library deckt nichts ab → Dict wird noch gebraucht")
|
||||
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_comparison()
|
||||
Reference in New Issue
Block a user