test: Lit-Mapping validation — Dict vs Control Library comparison
8 test cases with deliberately wrong legal basis assignments: - Cookie tracking on lit. f (should be lit. a) - Analytics on lit. b (should be lit. a) - Newsletter on lit. f (should be lit. a) - Klarna without Art. 22 - Session recording on lit. f - 2 correct cases (should NOT trigger findings) Runs both hardcoded dict AND Control Library query, compares results. If Control Library passes all → dict can be removed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,316 @@
|
|||||||
|
"""
|
||||||
|
Lit-Mapping Validation Test — verifies that BOTH the hardcoded dict AND
|
||||||
|
the Control Library detect the same legal basis errors.
|
||||||
|
|
||||||
|
If both produce the same results, we can safely delete the dict.
|
||||||
|
|
||||||
|
Test cases use deliberately WRONG legal basis assignments that are
|
||||||
|
common mistakes on real websites.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Add parent to path for imports
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
# TEST CASES — Deliberately wrong DSE text blocks
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
TEST_CASES = [
|
||||||
|
{
|
||||||
|
"id": "cookie_tracking_wrong_litf",
|
||||||
|
"description": "Cookie-Tracking auf lit. f statt lit. a",
|
||||||
|
"dse_text": (
|
||||||
|
"Wir setzen Cookies und aehnliche Tracking-Technologien ein, "
|
||||||
|
"um die Nutzung unserer Website zu analysieren. Die Verarbeitung "
|
||||||
|
"erfolgt auf Grundlage unseres berechtigten Interesses gemaess "
|
||||||
|
"Art. 6 Abs. 1 lit. f DSGVO an der Optimierung unseres Angebots."
|
||||||
|
),
|
||||||
|
"expected_finding": True,
|
||||||
|
"expected_purpose": "cookie_tracking",
|
||||||
|
"correct_basis": "lit. a (Einwilligung)",
|
||||||
|
"wrong_basis": "lit. f",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "analytics_wrong_litb",
|
||||||
|
"description": "Google Analytics auf lit. b (Vertragserfuellung) statt lit. a",
|
||||||
|
"dse_text": (
|
||||||
|
"Wir nutzen Google Analytics zur Webanalyse. Die Datenverarbeitung "
|
||||||
|
"erfolgt auf Basis der Vertragserfuellung gemaess Art. 6 Abs. 1 lit. b DSGVO, "
|
||||||
|
"da die Analyse fuer die Erbringung unserer Dienste erforderlich ist."
|
||||||
|
),
|
||||||
|
"expected_finding": True,
|
||||||
|
"expected_purpose": "web_analytics",
|
||||||
|
"correct_basis": "lit. a (Einwilligung)",
|
||||||
|
"wrong_basis": "lit. b",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "newsletter_wrong_litf",
|
||||||
|
"description": "Newsletter auf lit. f statt lit. a",
|
||||||
|
"dse_text": (
|
||||||
|
"Wir versenden regelmaessig Newsletter mit Informationen zu unseren Produkten. "
|
||||||
|
"Die Verarbeitung Ihrer E-Mail-Adresse erfolgt auf Grundlage unseres "
|
||||||
|
"berechtigten Interesses gemaess Art. 6 Abs. 1 lit. f DSGVO an der "
|
||||||
|
"Direktwerbung fuer eigene aehnliche Produkte."
|
||||||
|
),
|
||||||
|
"expected_finding": True,
|
||||||
|
"expected_purpose": "marketing_email",
|
||||||
|
"correct_basis": "lit. a (Einwilligung)",
|
||||||
|
"wrong_basis": "lit. f",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "remarketing_wrong_litf",
|
||||||
|
"description": "Remarketing/Retargeting auf lit. f statt lit. a",
|
||||||
|
"dse_text": (
|
||||||
|
"Wir setzen Remarketing-Technologien ein, um Ihnen auf anderen Websites "
|
||||||
|
"personalisierte Werbung anzuzeigen. Die Verarbeitung basiert auf unserem "
|
||||||
|
"berechtigten Interesse an effektiver Werbung (Art. 6 Abs. 1 lit. f DSGVO)."
|
||||||
|
),
|
||||||
|
"expected_finding": True,
|
||||||
|
"expected_purpose": "remarketing",
|
||||||
|
"correct_basis": "lit. a (Einwilligung)",
|
||||||
|
"wrong_basis": "lit. f",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "klarna_missing_art22",
|
||||||
|
"description": "Klarna Bonitaetspruefung ohne Art. 22 Hinweis",
|
||||||
|
"dse_text": (
|
||||||
|
"Bei Auswahl der Zahlungsart Rechnung ueber Klarna wird eine "
|
||||||
|
"Bonitaetspruefung durchgefuehrt. Klarna AB, Stockholm, Schweden, "
|
||||||
|
"uebermittelt Ihre Daten an Auskunfteien. Rechtsgrundlage ist "
|
||||||
|
"Art. 6 Abs. 1 lit. b DSGVO (Vertragserfuellung)."
|
||||||
|
),
|
||||||
|
"expected_finding": True,
|
||||||
|
"expected_purpose": "credit_check",
|
||||||
|
"correct_basis": "lit. b/f + Art. 22 DSGVO Hinweis",
|
||||||
|
"wrong_basis": "(fehlt)",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "session_recording_wrong_litf",
|
||||||
|
"description": "Session Recording (Hotjar) auf lit. f statt lit. a",
|
||||||
|
"dse_text": (
|
||||||
|
"Wir nutzen Hotjar zur Analyse des Nutzerverhaltens mittels Session Recording "
|
||||||
|
"und Heatmaps. Die Aufzeichnung der Nutzersitzungen erfolgt auf Grundlage "
|
||||||
|
"unseres berechtigten Interesses (Art. 6 Abs. 1 lit. f DSGVO)."
|
||||||
|
),
|
||||||
|
"expected_finding": True,
|
||||||
|
"expected_purpose": "session_recording",
|
||||||
|
"correct_basis": "lit. a (Einwilligung)",
|
||||||
|
"wrong_basis": "lit. f",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "payment_correct_litb",
|
||||||
|
"description": "Zahlung korrekt auf lit. b — sollte KEIN Finding sein",
|
||||||
|
"dse_text": (
|
||||||
|
"Die Verarbeitung Ihrer Zahlungsdaten durch unseren Zahlungsdienstleister "
|
||||||
|
"Stripe erfolgt auf Grundlage der Vertragserfuellung gemaess "
|
||||||
|
"Art. 6 Abs. 1 lit. b DSGVO."
|
||||||
|
),
|
||||||
|
"expected_finding": False,
|
||||||
|
"expected_purpose": None,
|
||||||
|
"correct_basis": "lit. b (Vertragserfuellung)",
|
||||||
|
"wrong_basis": None,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "analytics_correct_lita",
|
||||||
|
"description": "Analytics korrekt auf lit. a — sollte KEIN Finding sein",
|
||||||
|
"dse_text": (
|
||||||
|
"Wir setzen Google Analytics nur mit Ihrer ausdruecklichen Einwilligung "
|
||||||
|
"gemaess Art. 6 Abs. 1 lit. a DSGVO ein. Sie koennen Ihre Einwilligung "
|
||||||
|
"jederzeit widerrufen."
|
||||||
|
),
|
||||||
|
"expected_finding": False,
|
||||||
|
"expected_purpose": None,
|
||||||
|
"correct_basis": "lit. a (Einwilligung)",
|
||||||
|
"wrong_basis": None,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_hardcoded_dict():
|
||||||
|
"""Test the hardcoded CORRECT_BASIS dict against test cases."""
|
||||||
|
from compliance.services.legal_basis_validator import validate_legal_bases
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("TEST 1: Hartkodiertes Dict (legal_basis_validator.py)")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for tc in TEST_CASES:
|
||||||
|
findings = validate_legal_bases(tc["dse_text"])
|
||||||
|
has_finding = len(findings) > 0
|
||||||
|
|
||||||
|
if has_finding == tc["expected_finding"]:
|
||||||
|
status = "PASS"
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
status = "FAIL"
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
print(f" [{status}] {tc['id']}: {tc['description']}")
|
||||||
|
if has_finding:
|
||||||
|
for f in findings:
|
||||||
|
print(f" → {f.text[:80]}")
|
||||||
|
elif tc["expected_finding"]:
|
||||||
|
print(f" → ERWARTET: Finding fuer {tc['expected_purpose']}, aber KEINS gefunden")
|
||||||
|
|
||||||
|
print(f"\n Ergebnis: {passed} bestanden, {failed} fehlgeschlagen\n")
|
||||||
|
return passed, failed
|
||||||
|
|
||||||
|
|
||||||
|
def test_control_library():
|
||||||
|
"""Test the Control Library against the same test cases.
|
||||||
|
|
||||||
|
Queries canonical_controls for lit-mapping controls and checks
|
||||||
|
if they would detect the same errors.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import asyncpg
|
||||||
|
except ImportError:
|
||||||
|
print("\n SKIP: asyncpg nicht installiert — Control Library Test uebersprungen")
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
db_url = os.environ.get(
|
||||||
|
"COMPLIANCE_DATABASE_URL",
|
||||||
|
os.environ.get("DATABASE_URL", ""),
|
||||||
|
)
|
||||||
|
if not db_url:
|
||||||
|
print("\n SKIP: Keine DATABASE_URL — Control Library Test uebersprungen")
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("TEST 2: Control Library (canonical_controls)")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
pool = await asyncpg.create_pool(db_url, min_size=1, max_size=2)
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
# Fetch lit-mapping relevant controls
|
||||||
|
controls = await conn.fetch("""
|
||||||
|
SELECT control_id, title, objective, requirements
|
||||||
|
FROM compliance.canonical_controls
|
||||||
|
WHERE (
|
||||||
|
title ILIKE '%einwilligung%tracking%'
|
||||||
|
OR title ILIKE '%rechtsgrundlage%cookie%'
|
||||||
|
OR title ILIKE '%consent%cookie%'
|
||||||
|
OR title ILIKE '%einwilligung%cookie%'
|
||||||
|
OR title ILIKE '%art. 22%'
|
||||||
|
OR title ILIKE '%automatisierte%entscheidung%'
|
||||||
|
OR requirements ILIKE '%lit. a%tracking%'
|
||||||
|
OR requirements ILIKE '%einwilligung%analytics%'
|
||||||
|
)
|
||||||
|
AND release_state = 'published'
|
||||||
|
LIMIT 50
|
||||||
|
""")
|
||||||
|
|
||||||
|
print(f" Gefundene Lit-Mapping Controls: {len(controls)}")
|
||||||
|
for c in controls[:10]:
|
||||||
|
print(f" [{c['control_id']}] {c['title'][:60]}")
|
||||||
|
|
||||||
|
if not controls:
|
||||||
|
print(" WARNUNG: Keine Lit-Mapping Controls in der DB!")
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
# For each test case, check if a control would catch it
|
||||||
|
for tc in TEST_CASES:
|
||||||
|
text_lower = tc["dse_text"].lower()
|
||||||
|
matched_control = None
|
||||||
|
|
||||||
|
for c in controls:
|
||||||
|
title_lower = (c["title"] or "").lower()
|
||||||
|
req_lower = (c["requirements"] or "").lower()
|
||||||
|
obj_lower = (c["objective"] or "").lower()
|
||||||
|
|
||||||
|
# Check if this control is relevant for this test case
|
||||||
|
relevant = False
|
||||||
|
if tc["expected_purpose"] == "cookie_tracking":
|
||||||
|
relevant = "cookie" in title_lower or "tracking" in title_lower
|
||||||
|
elif tc["expected_purpose"] == "web_analytics":
|
||||||
|
relevant = "analytics" in title_lower or "tracking" in title_lower
|
||||||
|
elif tc["expected_purpose"] == "marketing_email":
|
||||||
|
relevant = "newsletter" in title_lower or "marketing" in title_lower
|
||||||
|
elif tc["expected_purpose"] == "remarketing":
|
||||||
|
relevant = "remarketing" in title_lower or "retargeting" in title_lower
|
||||||
|
elif tc["expected_purpose"] == "credit_check":
|
||||||
|
relevant = "art. 22" in title_lower or "bonitaet" in title_lower
|
||||||
|
elif tc["expected_purpose"] == "session_recording":
|
||||||
|
relevant = "recording" in title_lower or "heatmap" in title_lower
|
||||||
|
|
||||||
|
if relevant:
|
||||||
|
# Check if the control requires consent (lit. a)
|
||||||
|
requires_consent = (
|
||||||
|
"einwilligung" in req_lower
|
||||||
|
or "consent" in req_lower
|
||||||
|
or "lit. a" in req_lower
|
||||||
|
)
|
||||||
|
if requires_consent and tc["expected_finding"]:
|
||||||
|
matched_control = c
|
||||||
|
break
|
||||||
|
|
||||||
|
has_match = matched_control is not None
|
||||||
|
# For negative test cases (no finding expected), no match = correct
|
||||||
|
if not tc["expected_finding"]:
|
||||||
|
correct = not has_match
|
||||||
|
else:
|
||||||
|
correct = has_match
|
||||||
|
|
||||||
|
if correct:
|
||||||
|
status = "PASS"
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
status = "FAIL"
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
print(f" [{status}] {tc['id']}: {tc['description']}")
|
||||||
|
if matched_control:
|
||||||
|
print(f" → Control: [{matched_control['control_id']}] {matched_control['title'][:60]}")
|
||||||
|
elif tc["expected_finding"]:
|
||||||
|
print(f" → KEIN passender Control gefunden!")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await pool.close()
|
||||||
|
|
||||||
|
print(f"\n Ergebnis: {passed} bestanden, {failed} fehlgeschlagen\n")
|
||||||
|
return passed, failed
|
||||||
|
|
||||||
|
return asyncio.run(_run())
|
||||||
|
|
||||||
|
|
||||||
|
def test_comparison():
|
||||||
|
"""Compare results: Dict vs. Control Library."""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("VERGLEICH: Dict vs. Control Library")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
dict_passed, dict_failed = test_hardcoded_dict()
|
||||||
|
ctrl_passed, ctrl_failed = test_control_library()
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("ZUSAMMENFASSUNG")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f" Dict: {dict_passed}/{dict_passed + dict_failed} bestanden")
|
||||||
|
print(f" Control Library: {ctrl_passed}/{ctrl_passed + ctrl_failed} bestanden")
|
||||||
|
|
||||||
|
if ctrl_passed >= dict_passed and ctrl_failed == 0:
|
||||||
|
print("\n ✓ Control Library deckt alle Faelle ab → Dict kann entfernt werden")
|
||||||
|
elif ctrl_passed > 0:
|
||||||
|
print("\n ⚠ Control Library deckt teilweise ab → Dict als Fallback behalten")
|
||||||
|
else:
|
||||||
|
print("\n ✗ Control Library deckt nichts ab → Dict wird noch gebraucht")
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_comparison()
|
||||||
Reference in New Issue
Block a user