breakpilot-core/control-pipeline/tests/test_adversarial.py

"""
Adversarial Test Suite — 30 tricky cases that challenge the control ontology
and dedup engine with edge cases.

Tests categories:
  A. Wrong legal basis (plausible but incorrect) — 8 cases
  B. Dark patterns (subtle UI manipulation) — 6 cases
  C. Almost-complete documents (missing 1 field) — 6 cases
  D. Semantically similar but different controls — 5 cases
  E. Homonyms (different meaning, same words) — 5 cases
"""

import os
import sys
import yaml
import pytest

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from services.control_ontology import classify_obligation, classify_action

ADVERSARIAL_PATH = os.path.join(os.path.dirname(__file__), "adversarial_cases.yaml")

with open(ADVERSARIAL_PATH) as f:
    _ADV = yaml.safe_load(f)

TESTS = _ADV["tests"]


def _tests_by_category(cat: str) -> list:
    return [t for t in TESTS if t["category"] == cat]


# ============================================================================
# D. Semantically similar but different — must NOT be deduped
# ============================================================================

class TestSimilarButDifferent:
    """Controls that sound alike but are different — dedup must keep both."""

    @pytest.mark.parametrize("case", _tests_by_category("similar_but_different"),
                             ids=lambda c: c["id"])
    def test_not_duplicate(self, case):
        assert case["expected"]["is_duplicate"] is False, (
            f"{case['id']}: These controls MUST NOT be marked as duplicates"
        )

    def test_admin_vs_user_mfa(self):
        """ADV-SEM-001: Admin-MFA and User-MFA are different controls."""
        case = next(t for t in TESTS if t["id"] == "ADV-SEM-001")
        a = classify_obligation(case["control_a"], "")
        b = classify_obligation(case["control_b"], "")
        # Both should be atomic (not filtered out)
        assert a["routing"] == "atomic"
        assert b["routing"] == "atomic"

    def test_encryption_at_rest_vs_in_transit(self):
        """ADV-SEM-004: at rest vs in transit are different controls."""
        a_action = classify_action("Verschluesselung at rest implementieren")
        b_action = classify_action("Verschluesselung in transit implementieren")
        # Both should classify as "encrypt" or "implement"
        assert a_action in ("encrypt", "implement")
        assert b_action in ("encrypt", "implement")


# ============================================================================
# E. Homonyms — same words, different domains
# ============================================================================

class TestHomonymDifferent:
    """Controls using same words but from different domains — must NOT merge."""

    @pytest.mark.parametrize("case", _tests_by_category("homonym_different"),
                             ids=lambda c: c["id"])
    def test_not_duplicate(self, case):
        assert case["expected"]["is_duplicate"] is False, (
            f"{case['id']}: Homonyms must NOT be treated as duplicates"
        )

    def test_dsgvo_audit_vs_hgb_audit(self):
        """ADV-HOM-003: Data protection audit vs financial audit."""
        a = classify_obligation("Audit der Datenschutz-Compliance durchfuehren", "")
        b = classify_obligation("Audit der Jahresabschlusspruefung durchfuehren", "")
        assert a["routing"] == "atomic"
        assert b["routing"] == "atomic"
        # "durchfuehren" maps to "implement" — key point is both are atomic, not filtered


# ============================================================================
# A. Wrong legal basis — structural tests
# ============================================================================

class TestWrongLegalBasis:
    """Verify that wrong legal basis cases have correct expected metadata."""

    @pytest.mark.parametrize("case", _tests_by_category("wrong_legal_basis"),
                             ids=lambda c: c["id"])
    def test_finding_expected(self, case):
        """All wrong_legal_basis cases must expect a finding."""
        assert case["expected"]["finding"] is True

    @pytest.mark.parametrize("case", _tests_by_category("wrong_legal_basis"),
                             ids=lambda c: c["id"])
    def test_has_correct_basis(self, case):
        """All cases must specify what the correct basis should be."""
        assert "correct_basis" in case["expected"]
        assert len(case["expected"]["correct_basis"]) > 0

    def test_analytics_requires_consent(self):
        """ADV-LIT-001: Analytics on lit. f is always wrong."""
        case = next(t for t in TESTS if t["id"] == "ADV-LIT-001")
        assert "lit. a" in case["expected"]["correct_basis"]
        assert "Planet49" in case["expected"]["reason"]


# ============================================================================
# B. Dark Patterns — structural tests
# ============================================================================

class TestDarkPatterns:
    """Verify dark pattern test case structure."""

    @pytest.mark.parametrize("case", _tests_by_category("dark_pattern"),
                             ids=lambda c: c["id"])
    def test_finding_expected(self, case):
        """All dark pattern cases must expect a finding."""
        assert case["expected"]["finding"] is True

    @pytest.mark.parametrize("case", _tests_by_category("dark_pattern"),
                             ids=lambda c: c["id"])
    def test_has_finding_type(self, case):
        """All cases must specify the dark pattern type."""
        assert "finding_type" in case["expected"]
        assert case["expected"]["finding_type"].startswith("dark_pattern_")


# ============================================================================
# C. Incomplete documents — structural tests
# ============================================================================

class TestIncompleteDocuments:
    """Verify incomplete document test case structure."""

    @pytest.mark.parametrize("case", _tests_by_category("incomplete_document"),
                             ids=lambda c: c["id"])
    def test_has_reason(self, case):
        """All cases must have a reason."""
        assert "reason" in case["expected"]
        assert len(case["expected"]["reason"]) > 0

    def test_agb_gerichtsstand_no_finding(self):
        """ADV-DOC-005: Missing Gerichtsstand in B2C AGB is NOT a finding."""
        case = next(t for t in TESTS if t["id"] == "ADV-DOC-005")
        assert case["expected"]["finding"] is False


# ============================================================================
# Meta tests — validate test suite integrity
# ============================================================================

class TestSuiteIntegrity:
    """Verify the adversarial test suite itself is complete and consistent."""

    def test_total_count(self):
        assert len(TESTS) == 30

    def test_unique_ids(self):
        ids = [t["id"] for t in TESTS]
        assert len(ids) == len(set(ids)), "Duplicate test IDs found"

    def test_all_categories_present(self):
        categories = {t["category"] for t in TESTS}
        expected = {"wrong_legal_basis", "dark_pattern", "incomplete_document",
                    "similar_but_different", "homonym_different"}
        assert categories == expected

    def test_category_counts(self):
        counts = {}
        for t in TESTS:
            counts[t["category"]] = counts.get(t["category"], 0) + 1
        assert counts["wrong_legal_basis"] == 8
        assert counts["dark_pattern"] == 6
        assert counts["incomplete_document"] == 6
        assert counts["similar_but_different"] == 5
        assert counts["homonym_different"] == 5

    def test_all_have_difficulty(self):
        for t in TESTS:
            assert "difficulty" in t, f"{t['id']} missing difficulty"
            assert t["difficulty"] in ("easy", "medium", "hard")