""" Adversarial Test Suite — 30 tricky cases that challenge the control ontology and dedup engine with edge cases. Tests categories: A. Wrong legal basis (plausible but incorrect) — 8 cases B. Dark patterns (subtle UI manipulation) — 6 cases C. Almost-complete documents (missing 1 field) — 6 cases D. Semantically similar but different controls — 5 cases E. Homonyms (different meaning, same words) — 5 cases """ import os import sys import yaml import pytest sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from services.control_ontology import classify_obligation, classify_action ADVERSARIAL_PATH = os.path.join(os.path.dirname(__file__), "adversarial_cases.yaml") with open(ADVERSARIAL_PATH) as f: _ADV = yaml.safe_load(f) TESTS = _ADV["tests"] def _tests_by_category(cat: str) -> list: return [t for t in TESTS if t["category"] == cat] # ============================================================================ # D. Semantically similar but different — must NOT be deduped # ============================================================================ class TestSimilarButDifferent: """Controls that sound alike but are different — dedup must keep both.""" @pytest.mark.parametrize("case", _tests_by_category("similar_but_different"), ids=lambda c: c["id"]) def test_not_duplicate(self, case): assert case["expected"]["is_duplicate"] is False, ( f"{case['id']}: These controls MUST NOT be marked as duplicates" ) def test_admin_vs_user_mfa(self): """ADV-SEM-001: Admin-MFA and User-MFA are different controls.""" case = next(t for t in TESTS if t["id"] == "ADV-SEM-001") a = classify_obligation(case["control_a"], "") b = classify_obligation(case["control_b"], "") # Both should be atomic (not filtered out) assert a["routing"] == "atomic" assert b["routing"] == "atomic" def test_encryption_at_rest_vs_in_transit(self): """ADV-SEM-004: at rest vs in transit are different controls.""" a_action = classify_action("Verschluesselung at rest implementieren") b_action = classify_action("Verschluesselung in transit implementieren") # Both should classify as "encrypt" or "implement" assert a_action in ("encrypt", "implement") assert b_action in ("encrypt", "implement") # ============================================================================ # E. Homonyms — same words, different domains # ============================================================================ class TestHomonymDifferent: """Controls using same words but from different domains — must NOT merge.""" @pytest.mark.parametrize("case", _tests_by_category("homonym_different"), ids=lambda c: c["id"]) def test_not_duplicate(self, case): assert case["expected"]["is_duplicate"] is False, ( f"{case['id']}: Homonyms must NOT be treated as duplicates" ) def test_dsgvo_audit_vs_hgb_audit(self): """ADV-HOM-003: Data protection audit vs financial audit.""" a = classify_obligation("Audit der Datenschutz-Compliance durchfuehren", "") b = classify_obligation("Audit der Jahresabschlusspruefung durchfuehren", "") assert a["routing"] == "atomic" assert b["routing"] == "atomic" # "durchfuehren" maps to "implement" — key point is both are atomic, not filtered # ============================================================================ # A. Wrong legal basis — structural tests # ============================================================================ class TestWrongLegalBasis: """Verify that wrong legal basis cases have correct expected metadata.""" @pytest.mark.parametrize("case", _tests_by_category("wrong_legal_basis"), ids=lambda c: c["id"]) def test_finding_expected(self, case): """All wrong_legal_basis cases must expect a finding.""" assert case["expected"]["finding"] is True @pytest.mark.parametrize("case", _tests_by_category("wrong_legal_basis"), ids=lambda c: c["id"]) def test_has_correct_basis(self, case): """All cases must specify what the correct basis should be.""" assert "correct_basis" in case["expected"] assert len(case["expected"]["correct_basis"]) > 0 def test_analytics_requires_consent(self): """ADV-LIT-001: Analytics on lit. f is always wrong.""" case = next(t for t in TESTS if t["id"] == "ADV-LIT-001") assert "lit. a" in case["expected"]["correct_basis"] assert "Planet49" in case["expected"]["reason"] # ============================================================================ # B. Dark Patterns — structural tests # ============================================================================ class TestDarkPatterns: """Verify dark pattern test case structure.""" @pytest.mark.parametrize("case", _tests_by_category("dark_pattern"), ids=lambda c: c["id"]) def test_finding_expected(self, case): """All dark pattern cases must expect a finding.""" assert case["expected"]["finding"] is True @pytest.mark.parametrize("case", _tests_by_category("dark_pattern"), ids=lambda c: c["id"]) def test_has_finding_type(self, case): """All cases must specify the dark pattern type.""" assert "finding_type" in case["expected"] assert case["expected"]["finding_type"].startswith("dark_pattern_") # ============================================================================ # C. Incomplete documents — structural tests # ============================================================================ class TestIncompleteDocuments: """Verify incomplete document test case structure.""" @pytest.mark.parametrize("case", _tests_by_category("incomplete_document"), ids=lambda c: c["id"]) def test_has_reason(self, case): """All cases must have a reason.""" assert "reason" in case["expected"] assert len(case["expected"]["reason"]) > 0 def test_agb_gerichtsstand_no_finding(self): """ADV-DOC-005: Missing Gerichtsstand in B2C AGB is NOT a finding.""" case = next(t for t in TESTS if t["id"] == "ADV-DOC-005") assert case["expected"]["finding"] is False # ============================================================================ # Meta tests — validate test suite integrity # ============================================================================ class TestSuiteIntegrity: """Verify the adversarial test suite itself is complete and consistent.""" def test_total_count(self): assert len(TESTS) == 30 def test_unique_ids(self): ids = [t["id"] for t in TESTS] assert len(ids) == len(set(ids)), "Duplicate test IDs found" def test_all_categories_present(self): categories = {t["category"] for t in TESTS} expected = {"wrong_legal_basis", "dark_pattern", "incomplete_document", "similar_but_different", "homonym_different"} assert categories == expected def test_category_counts(self): counts = {} for t in TESTS: counts[t["category"]] = counts.get(t["category"], 0) + 1 assert counts["wrong_legal_basis"] == 8 assert counts["dark_pattern"] == 6 assert counts["incomplete_document"] == 6 assert counts["similar_but_different"] == 5 assert counts["homonym_different"] == 5 def test_all_have_difficulty(self): for t in TESTS: assert "difficulty" in t, f"{t['id']} missing difficulty" assert t["difficulty"] in ("easy", "medium", "hard")