From 0f3ba9c207fee2f87f07f31ccbc9c727e12948cc Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 29 Apr 2026 16:56:38 +0200 Subject: [PATCH] =?UTF-8?q?test:=20Lit-Mapping=20validation=20=E2=80=94=20?= =?UTF-8?q?Dict=20vs=20Control=20Library=20comparison?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 8 test cases with deliberately wrong legal basis assignments: - Cookie tracking on lit. f (should be lit. a) - Analytics on lit. b (should be lit. a) - Newsletter on lit. f (should be lit. a) - Klarna without Art. 22 - Session recording on lit. f - 2 correct cases (should NOT trigger findings) Runs both hardcoded dict AND Control Library query, compares results. If Control Library passes all → dict can be removed. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../tests/test_lit_mapping_validation.py | 316 ++++++++++++++++++ 1 file changed, 316 insertions(+) create mode 100644 backend-compliance/tests/test_lit_mapping_validation.py diff --git a/backend-compliance/tests/test_lit_mapping_validation.py b/backend-compliance/tests/test_lit_mapping_validation.py new file mode 100644 index 0000000..6e45ec8 --- /dev/null +++ b/backend-compliance/tests/test_lit_mapping_validation.py @@ -0,0 +1,316 @@ +""" +Lit-Mapping Validation Test — verifies that BOTH the hardcoded dict AND +the Control Library detect the same legal basis errors. + +If both produce the same results, we can safely delete the dict. + +Test cases use deliberately WRONG legal basis assignments that are +common mistakes on real websites. +""" + +import asyncio +import json +import os +import sys + +# Add parent to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +# ═══════════════════════════════════════════════════════════════ +# TEST CASES — Deliberately wrong DSE text blocks +# ═══════════════════════════════════════════════════════════════ + +TEST_CASES = [ + { + "id": "cookie_tracking_wrong_litf", + "description": "Cookie-Tracking auf lit. f statt lit. a", + "dse_text": ( + "Wir setzen Cookies und aehnliche Tracking-Technologien ein, " + "um die Nutzung unserer Website zu analysieren. Die Verarbeitung " + "erfolgt auf Grundlage unseres berechtigten Interesses gemaess " + "Art. 6 Abs. 1 lit. f DSGVO an der Optimierung unseres Angebots." + ), + "expected_finding": True, + "expected_purpose": "cookie_tracking", + "correct_basis": "lit. a (Einwilligung)", + "wrong_basis": "lit. f", + }, + { + "id": "analytics_wrong_litb", + "description": "Google Analytics auf lit. b (Vertragserfuellung) statt lit. a", + "dse_text": ( + "Wir nutzen Google Analytics zur Webanalyse. Die Datenverarbeitung " + "erfolgt auf Basis der Vertragserfuellung gemaess Art. 6 Abs. 1 lit. b DSGVO, " + "da die Analyse fuer die Erbringung unserer Dienste erforderlich ist." + ), + "expected_finding": True, + "expected_purpose": "web_analytics", + "correct_basis": "lit. a (Einwilligung)", + "wrong_basis": "lit. b", + }, + { + "id": "newsletter_wrong_litf", + "description": "Newsletter auf lit. f statt lit. a", + "dse_text": ( + "Wir versenden regelmaessig Newsletter mit Informationen zu unseren Produkten. " + "Die Verarbeitung Ihrer E-Mail-Adresse erfolgt auf Grundlage unseres " + "berechtigten Interesses gemaess Art. 6 Abs. 1 lit. f DSGVO an der " + "Direktwerbung fuer eigene aehnliche Produkte." + ), + "expected_finding": True, + "expected_purpose": "marketing_email", + "correct_basis": "lit. a (Einwilligung)", + "wrong_basis": "lit. f", + }, + { + "id": "remarketing_wrong_litf", + "description": "Remarketing/Retargeting auf lit. f statt lit. a", + "dse_text": ( + "Wir setzen Remarketing-Technologien ein, um Ihnen auf anderen Websites " + "personalisierte Werbung anzuzeigen. Die Verarbeitung basiert auf unserem " + "berechtigten Interesse an effektiver Werbung (Art. 6 Abs. 1 lit. f DSGVO)." + ), + "expected_finding": True, + "expected_purpose": "remarketing", + "correct_basis": "lit. a (Einwilligung)", + "wrong_basis": "lit. f", + }, + { + "id": "klarna_missing_art22", + "description": "Klarna Bonitaetspruefung ohne Art. 22 Hinweis", + "dse_text": ( + "Bei Auswahl der Zahlungsart Rechnung ueber Klarna wird eine " + "Bonitaetspruefung durchgefuehrt. Klarna AB, Stockholm, Schweden, " + "uebermittelt Ihre Daten an Auskunfteien. Rechtsgrundlage ist " + "Art. 6 Abs. 1 lit. b DSGVO (Vertragserfuellung)." + ), + "expected_finding": True, + "expected_purpose": "credit_check", + "correct_basis": "lit. b/f + Art. 22 DSGVO Hinweis", + "wrong_basis": "(fehlt)", + }, + { + "id": "session_recording_wrong_litf", + "description": "Session Recording (Hotjar) auf lit. f statt lit. a", + "dse_text": ( + "Wir nutzen Hotjar zur Analyse des Nutzerverhaltens mittels Session Recording " + "und Heatmaps. Die Aufzeichnung der Nutzersitzungen erfolgt auf Grundlage " + "unseres berechtigten Interesses (Art. 6 Abs. 1 lit. f DSGVO)." + ), + "expected_finding": True, + "expected_purpose": "session_recording", + "correct_basis": "lit. a (Einwilligung)", + "wrong_basis": "lit. f", + }, + { + "id": "payment_correct_litb", + "description": "Zahlung korrekt auf lit. b — sollte KEIN Finding sein", + "dse_text": ( + "Die Verarbeitung Ihrer Zahlungsdaten durch unseren Zahlungsdienstleister " + "Stripe erfolgt auf Grundlage der Vertragserfuellung gemaess " + "Art. 6 Abs. 1 lit. b DSGVO." + ), + "expected_finding": False, + "expected_purpose": None, + "correct_basis": "lit. b (Vertragserfuellung)", + "wrong_basis": None, + }, + { + "id": "analytics_correct_lita", + "description": "Analytics korrekt auf lit. a — sollte KEIN Finding sein", + "dse_text": ( + "Wir setzen Google Analytics nur mit Ihrer ausdruecklichen Einwilligung " + "gemaess Art. 6 Abs. 1 lit. a DSGVO ein. Sie koennen Ihre Einwilligung " + "jederzeit widerrufen." + ), + "expected_finding": False, + "expected_purpose": None, + "correct_basis": "lit. a (Einwilligung)", + "wrong_basis": None, + }, +] + + +def test_hardcoded_dict(): + """Test the hardcoded CORRECT_BASIS dict against test cases.""" + from compliance.services.legal_basis_validator import validate_legal_bases + + print("\n" + "=" * 70) + print("TEST 1: Hartkodiertes Dict (legal_basis_validator.py)") + print("=" * 70) + + passed = 0 + failed = 0 + + for tc in TEST_CASES: + findings = validate_legal_bases(tc["dse_text"]) + has_finding = len(findings) > 0 + + if has_finding == tc["expected_finding"]: + status = "PASS" + passed += 1 + else: + status = "FAIL" + failed += 1 + + print(f" [{status}] {tc['id']}: {tc['description']}") + if has_finding: + for f in findings: + print(f" → {f.text[:80]}") + elif tc["expected_finding"]: + print(f" → ERWARTET: Finding fuer {tc['expected_purpose']}, aber KEINS gefunden") + + print(f"\n Ergebnis: {passed} bestanden, {failed} fehlgeschlagen\n") + return passed, failed + + +def test_control_library(): + """Test the Control Library against the same test cases. + + Queries canonical_controls for lit-mapping controls and checks + if they would detect the same errors. + """ + try: + import asyncpg + except ImportError: + print("\n SKIP: asyncpg nicht installiert — Control Library Test uebersprungen") + return 0, 0 + + db_url = os.environ.get( + "COMPLIANCE_DATABASE_URL", + os.environ.get("DATABASE_URL", ""), + ) + if not db_url: + print("\n SKIP: Keine DATABASE_URL — Control Library Test uebersprungen") + return 0, 0 + + print("\n" + "=" * 70) + print("TEST 2: Control Library (canonical_controls)") + print("=" * 70) + + async def _run(): + pool = await asyncpg.create_pool(db_url, min_size=1, max_size=2) + passed = 0 + failed = 0 + + try: + async with pool.acquire() as conn: + # Fetch lit-mapping relevant controls + controls = await conn.fetch(""" + SELECT control_id, title, objective, requirements + FROM compliance.canonical_controls + WHERE ( + title ILIKE '%einwilligung%tracking%' + OR title ILIKE '%rechtsgrundlage%cookie%' + OR title ILIKE '%consent%cookie%' + OR title ILIKE '%einwilligung%cookie%' + OR title ILIKE '%art. 22%' + OR title ILIKE '%automatisierte%entscheidung%' + OR requirements ILIKE '%lit. a%tracking%' + OR requirements ILIKE '%einwilligung%analytics%' + ) + AND release_state = 'published' + LIMIT 50 + """) + + print(f" Gefundene Lit-Mapping Controls: {len(controls)}") + for c in controls[:10]: + print(f" [{c['control_id']}] {c['title'][:60]}") + + if not controls: + print(" WARNUNG: Keine Lit-Mapping Controls in der DB!") + return 0, 0 + + # For each test case, check if a control would catch it + for tc in TEST_CASES: + text_lower = tc["dse_text"].lower() + matched_control = None + + for c in controls: + title_lower = (c["title"] or "").lower() + req_lower = (c["requirements"] or "").lower() + obj_lower = (c["objective"] or "").lower() + + # Check if this control is relevant for this test case + relevant = False + if tc["expected_purpose"] == "cookie_tracking": + relevant = "cookie" in title_lower or "tracking" in title_lower + elif tc["expected_purpose"] == "web_analytics": + relevant = "analytics" in title_lower or "tracking" in title_lower + elif tc["expected_purpose"] == "marketing_email": + relevant = "newsletter" in title_lower or "marketing" in title_lower + elif tc["expected_purpose"] == "remarketing": + relevant = "remarketing" in title_lower or "retargeting" in title_lower + elif tc["expected_purpose"] == "credit_check": + relevant = "art. 22" in title_lower or "bonitaet" in title_lower + elif tc["expected_purpose"] == "session_recording": + relevant = "recording" in title_lower or "heatmap" in title_lower + + if relevant: + # Check if the control requires consent (lit. a) + requires_consent = ( + "einwilligung" in req_lower + or "consent" in req_lower + or "lit. a" in req_lower + ) + if requires_consent and tc["expected_finding"]: + matched_control = c + break + + has_match = matched_control is not None + # For negative test cases (no finding expected), no match = correct + if not tc["expected_finding"]: + correct = not has_match + else: + correct = has_match + + if correct: + status = "PASS" + passed += 1 + else: + status = "FAIL" + failed += 1 + + print(f" [{status}] {tc['id']}: {tc['description']}") + if matched_control: + print(f" → Control: [{matched_control['control_id']}] {matched_control['title'][:60]}") + elif tc["expected_finding"]: + print(f" → KEIN passender Control gefunden!") + + finally: + await pool.close() + + print(f"\n Ergebnis: {passed} bestanden, {failed} fehlgeschlagen\n") + return passed, failed + + return asyncio.run(_run()) + + +def test_comparison(): + """Compare results: Dict vs. Control Library.""" + print("\n" + "=" * 70) + print("VERGLEICH: Dict vs. Control Library") + print("=" * 70) + + dict_passed, dict_failed = test_hardcoded_dict() + ctrl_passed, ctrl_failed = test_control_library() + + print("\n" + "=" * 70) + print("ZUSAMMENFASSUNG") + print("=" * 70) + print(f" Dict: {dict_passed}/{dict_passed + dict_failed} bestanden") + print(f" Control Library: {ctrl_passed}/{ctrl_passed + ctrl_failed} bestanden") + + if ctrl_passed >= dict_passed and ctrl_failed == 0: + print("\n ✓ Control Library deckt alle Faelle ab → Dict kann entfernt werden") + elif ctrl_passed > 0: + print("\n ⚠ Control Library deckt teilweise ab → Dict als Fallback behalten") + else: + print("\n ✗ Control Library deckt nichts ab → Dict wird noch gebraucht") + + print("=" * 70) + + +if __name__ == "__main__": + test_comparison()