breakpilot-compliance/backend-compliance/tests/test_ai_legal_basis_check.py

"""Tests for B15 AI-Act Rechtsgrundlage-Check (GT AI-ACT-RISK-001)."""

from compliance.services.ai_legal_basis_check import (
    _find_llm_mention,
    _has_lit_a,
    _has_lit_f,
    check_ai_legal_basis,
)


class TestLitFDetection:
    def test_berechtigtes_interesse(self):
        assert _has_lit_f("Wir verarbeiten auf Grundlage des berechtigten "
                          "Interesses.")

    def test_lit_f_short(self):
        assert _has_lit_f("Rechtsgrundlage: Art. 6 Abs. 1 lit. f DSGVO")

    def test_lit_f_english(self):
        assert _has_lit_f("Legal basis: legitimate interest.")

    def test_lit_a_not_lit_f(self):
        assert not _has_lit_f("Verarbeitung nur mit Einwilligung.")


class TestLitADetection:
    def test_einwilligung(self):
        assert _has_lit_a("Verarbeitung auf Grundlage einer Einwilligung.")

    def test_lit_a_short(self):
        assert _has_lit_a("Art. 6 Abs. 1 lit. a DSGVO")

    def test_lit_a_english(self):
        assert _has_lit_a("Processing requires consent.")


class TestLLMMention:
    def test_vertex_ai(self):
        m = _find_llm_mention("Wir nutzen Google Vertex AI für den Chatbot.")
        assert m is not None
        assert m[0] == "vertex_ai_chatbot"

    def test_openai(self):
        m = _find_llm_mention("Der Bot basiert auf ChatGPT von OpenAI.")
        assert m is not None
        assert m[0] == "openai_chatbot"

    def test_anthropic(self):
        m = _find_llm_mention("Antworten via Anthropic Claude 3.")
        assert m is not None
        assert m[0] == "anthropic_claude"

    def test_no_llm_mention(self):
        # iAdvize is ai_capable=true but NOT an LLM-vendor — should be
        # filtered out by the LLM-tightening hint list.
        m = _find_llm_mention("Live-Chat mit iAdvize SAS.")
        assert m is None


class TestCheckAILegalBasis:
    def test_vertex_ai_lit_f_finding(self):
        dse = (
            "Wir setzen für den AI Assistant Google Vertex AI ein. "
            "Rechtsgrundlage ist Art. 6 Abs. 1 lit. f DSGVO "
            "(berechtigtes Interesse)."
        )
        findings = check_ai_legal_basis({"doc_texts": {"dse": dse}})
        assert len(findings) == 1
        f = findings[0]
        assert f["check_id"] == "AI-LEGAL-BASIS-001"
        assert f["severity"] == "MEDIUM"
        assert "Vertex" in f["provider"]

    def test_vertex_ai_with_consent_no_finding(self):
        # If consent is ALSO named in the paragraph, no finding.
        dse = (
            "Vertex AI verarbeitet nur nach vorheriger Einwilligung "
            "(Art. 6 Abs. 1 lit. a). Optional auch berechtigtes Interesse "
            "(lit. f) für Server-Logs."
        )
        assert check_ai_legal_basis({"doc_texts": {"dse": dse}}) == []

    def test_no_llm_no_finding(self):
        dse = "Wir nutzen iAdvize Live-Chat auf Grundlage des "\
              "berechtigten Interesses."
        assert check_ai_legal_basis({"doc_texts": {"dse": dse}}) == []

    def test_llm_without_lit_f_no_finding(self):
        dse = "OpenAI / GPT verarbeitet nur mit Einwilligung."
        assert check_ai_legal_basis({"doc_texts": {"dse": dse}}) == []

    def test_dedup_same_provider_per_doc(self):
        dse = (
            "Vertex AI auf berechtigtem Interesse.\n\n"
            "Vertex AI nochmals mit berechtigtem Interesse genannt."
        )
        findings = check_ai_legal_basis({"doc_texts": {"dse": dse}})
        assert len(findings) == 1

    def test_separate_doc_types_dedup_per_doc(self):
        dse = "OpenAI / GPT auf berechtigtem Interesse."
        cookie = "OpenAI ChatGPT auf Grundlage des berechtigten Interesses."
        findings = check_ai_legal_basis({
            "doc_texts": {"dse": dse, "cookie": cookie}
        })
        # one finding per (doc, provider)
        assert len(findings) == 2
        doc_types = {f["doc_type"] for f in findings}
        assert doc_types == {"dse", "cookie"}