diff --git a/backend-compliance/compliance/api/agent_check/_b15_wiring.py b/backend-compliance/compliance/api/agent_check/_b15_wiring.py new file mode 100644 index 00000000..53db3ac2 --- /dev/null +++ b/backend-compliance/compliance/api/agent_check/_b15_wiring.py @@ -0,0 +1,64 @@ +"""B15 wiring — AI-Act Rechtsgrundlage-Check für LLM-Vendors. + +Hängt sich an `state["extra_findings"]` an und rendert einen V2-Block +(`ai_legal_basis_html`). +""" + +from __future__ import annotations + +import html +import logging + +from compliance.services.ai_legal_basis_check import check_ai_legal_basis + +logger = logging.getLogger(__name__) + + +def run_b15(state: dict) -> None: + new = check_ai_legal_basis(state) + if not new: + return + extras = state.get("extra_findings") or [] + extras.extend(new) + state["extra_findings"] = extras + state["ai_legal_basis_html"] = _render(new) + logger.info("B15 ai-legal-basis: %d finding(s)", len(new)) + + +def _render(findings: list[dict]) -> str: + cards = [] + for f in findings: + sev = (f.get("severity") or "").upper() + color = "#dc2626" if sev == "HIGH" else "#f59e0b" + meta = ( + "
" + f"Provider: {html.escape(f.get('provider') or '?')} · " + f"Doc: {html.escape(f.get('doc_type') or '?')}
" + ) + cards.append( + f"
" + f"
" + f"{sev} · {html.escape(f.get('check_id') or '')}
" + f"
" + f"{html.escape(f.get('title') or '')}
" + f"
" + f"{html.escape(f.get('norm') or '')}
" + f"{meta}" + f"
" + f"{html.escape(f.get('evidence') or '')}
" + f"
" + f"→ Empfehlung: " + f"{html.escape(f.get('action') or '')}
" + "
" + ) + return ( + "
" + "

" + "🤖 AI-Act Rechtsgrundlage (LLM-Vendor auf berechtigtem Interesse)" + "

" + + "".join(cards) + + "
" + ) diff --git a/backend-compliance/compliance/api/agent_check/_orchestrator.py b/backend-compliance/compliance/api/agent_check/_orchestrator.py index 126d33b8..de219e12 100644 --- a/backend-compliance/compliance/api/agent_check/_orchestrator.py +++ b/backend-compliance/compliance/api/agent_check/_orchestrator.py @@ -25,6 +25,7 @@ from ._b9b10_wiring import run_b9b10 from ._b12_wiring import run_b12 from ._b13_wiring import run_b13 from ._b14_wiring import run_b14 +from ._b15_wiring import run_b15 from ._constants import _compliance_check_jobs from ._phase_a_resolve import run_phase_a from ._phase_b_profile_check import run_phase_b @@ -74,6 +75,7 @@ async def run_compliance_check(check_id: str, req) -> None: run_b12(state) # Chatbot-Cookie-Klassifikation (B11 ist in B9B10) run_b13(state) # Widerrufsbelehrung-Reachability (B2C-Pflicht) run_b14(state) # Widersprüchliche Speicherdauer im selben Doc + run_b15(state) # AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f) # Phase D-3 top/mid/bot: Step 5 HTML blocks await run_phase_d3_top(state) await run_phase_d3_mid(state) diff --git a/backend-compliance/compliance/services/ai_legal_basis_check.py b/backend-compliance/compliance/services/ai_legal_basis_check.py new file mode 100644 index 00000000..ecd7e4fe --- /dev/null +++ b/backend-compliance/compliance/services/ai_legal_basis_check.py @@ -0,0 +1,184 @@ +"""B15 — AI-Act Rechtsgrundlage-Check für LLM-Vendors. + +Erkennt: LLM/GPAI-System (Vertex AI, OpenAI/GPT, Claude) wird in +DSE/Cookie-Doc auf Art. 6 Abs. 1 lit. f (berechtigtes Interesse) +gestützt — statt auf lit. a (Einwilligung). + +Norm-Argument: + - LLMs verarbeiten Prompts + Outputs als personenbezogene Daten + - oft US-Transfer (Vertex / OpenAI / Anthropic) + - LLM-Logging hat Profiling-Ähnlichkeit + → DSK + EDPB-Linie: Einwilligung ist sauberere Rechtsgrundlage, + lit. f-Interessenabwägung ist bei diesen Faktoren fragwürdig. + +Norm: DSGVO Art. 6 Abs. 1 lit. a vs lit. f + AI Act Art. 50 + 51. + +GT-Anker: Elli AI-ACT-RISK-001 — Vertex-AI-Chatbot mit lit. f +deklariert. + +Heuristik: + 1. Absatz-Splitting (\\n\\n). + 2. Pro Absatz: enthält Mention eines LLM-Providers UND + "berechtigtes Interesse" / "lit. f" / "legitimate interest"? + 3. Wenn ja → Finding MEDIUM. +""" + +from __future__ import annotations + +import json +import logging +import os +import re + +logger = logging.getLogger(__name__) + +_KB_PATH = os.path.join( + os.path.dirname(__file__), + "specialist_agents", "_kb", "chat_providers.json", +) + + +def _load_llm_providers() -> list[dict]: + """Return KB entries marked ai_capable AND that look LLM-based. + + Not every chat-platform with 'ai_capable=true' is an LLM-vendor + in the AI-Act-Art. 50 sense. We tighten the list with a name + regex (LLM/GPT/Claude/Vertex/Gemini) plus a 'type' substring + check so that ordinary chat widgets that only ROUTE to AI don't + trigger this finding. + """ + try: + with open(_KB_PATH, encoding="utf-8") as f: + kb = json.load(f) + except Exception as e: + logger.warning("AI-legal-basis KB load failed: %s", e) + return [] + out: list[dict] = [] + llm_type_hints = ("ai-chatbot", "conversational-ai", + "ai chatbot", "llm", "gpt", "claude", "vertex") + for pid, prov in (kb.get("providers") or {}).items(): + if not prov.get("ai_capable"): + continue + type_str = (prov.get("type") or "").lower() + company = (prov.get("company") or "").lower() + if (any(h in type_str for h in llm_type_hints) + or any(h in company for h in llm_type_hints)): + out.append({"id": pid, "data": prov}) + return out + + +_LLM_PROVIDERS = _load_llm_providers() + +# Aliases that appear in DSE-prose for each provider. Built from KB + +# common-knowledge synonyms (Google's "Vertex AI" is also referenced as +# "Google Cloud AI" / "PaLM" / "Gemini" / "Generative AI"). +_LLM_NAME_ALIASES: dict[str, list[str]] = { + "vertex_ai_chatbot": [ + "vertex ai", "vertex-ai", "vertexai", "google cloud ai", + "google generative ai", "google palm", "palm 2", "gemini", + ], + "openai_chatbot": [ + "openai", "open ai", "gpt-3", "gpt-4", "gpt 3", "gpt 4", + "chatgpt", "chat gpt", "azure openai", + ], + "anthropic_claude": [ + "anthropic", "claude 3", "claude-3", "claude.ai", "claude ai", + ], +} + + +_LIT_F_PATTERNS = ( + re.compile(r"berechtigt(?:e[snm]?)?\s+interess", re.IGNORECASE), + re.compile(r"Art\.?\s*6\s*Abs\.?\s*1\s*(?:lit\.?\s*)?f\b", re.IGNORECASE), + re.compile(r"Art\.?\s*6\s*(?:Abs\.?\s*1)?\s*\(\s*1\s*\)\s*\(?f", re.IGNORECASE), + re.compile(r"legitimate\s+interest", re.IGNORECASE), +) + + +_LIT_A_PATTERNS = ( + re.compile(r"einwilligung", re.IGNORECASE), + re.compile(r"Art\.?\s*6\s*Abs\.?\s*1\s*(?:lit\.?\s*)?a\b", re.IGNORECASE), + re.compile(r"\bconsent\b", re.IGNORECASE), +) + + +def _paragraph_split(text: str) -> list[str]: + return [p.strip() for p in re.split(r"\n\s*\n", text or "") if p.strip()] + + +def _has_lit_f(paragraph: str) -> bool: + return any(p.search(paragraph) for p in _LIT_F_PATTERNS) + + +def _has_lit_a(paragraph: str) -> bool: + return any(p.search(paragraph) for p in _LIT_A_PATTERNS) + + +def _find_llm_mention(paragraph: str) -> tuple[str, str] | None: + p_lc = paragraph.lower() + for prov in _LLM_PROVIDERS: + aliases = _LLM_NAME_ALIASES.get(prov["id"]) or [] + # also include the company name directly + aliases = aliases + [(prov["data"].get("company") or "").lower()] + for alias in aliases: + if alias and alias in p_lc: + return prov["id"], prov["data"].get("company") or prov["id"] + return None + + +def check_ai_legal_basis(state: dict) -> list[dict]: + """Emit findings when an LLM provider is mentioned in a paragraph + that grounds processing on Art. 6 Abs. 1 lit. f.""" + doc_texts = state.get("doc_texts") or {} + findings: list[dict] = [] + seen: set[tuple[str, str]] = set() + for doc_type in ("dse", "cookie"): + text = doc_texts.get(doc_type) or "" + if not text: + continue + for para in _paragraph_split(text): + mention = _find_llm_mention(para) + if not mention: + continue + if not _has_lit_f(para): + continue + # If the same paragraph ALSO names lit. a / Einwilligung, + # the lit. f reference is likely about a side-purpose + # (e.g. analytics-Logging) — skip to avoid noise. + if _has_lit_a(para): + continue + prov_id, prov_company = mention + key = (doc_type, prov_id) + if key in seen: + continue + seen.add(key) + findings.append({ + "check_id": "AI-LEGAL-BASIS-001", + "severity": "MEDIUM", + "severity_reason": "questionable_basis", + "doc_type": doc_type, + "provider": prov_company, + "title": ( + f"LLM-System '{prov_company}' auf Art. 6 Abs. 1 lit. f " + "gestützt statt auf Einwilligung" + ), + "norm": ( + "DSGVO Art. 6 Abs. 1 lit. a vs lit. f + " + "AI Act Art. 50 + 51" + ), + "evidence": ( + "LLM-Provider in einem Absatz erwähnt, der berechtigtes " + "Interesse / lit. f als Rechtsgrundlage angibt. Bei " + "Prompt-/Output-Logging mit US-Transfer und Profiling-" + "Ähnlichkeit ist die Interessenabwägung fragwürdig." + ), + "action": ( + f"Rechtsgrundlage für {prov_company} auf Art. 6 Abs. 1 " + "lit. a (Einwilligung) umstellen. Pre-Interaction-" + "Consent + AI-Act Art. 50 Disclosure am Chat-UI " + "einrichten." + ), + }) + if findings: + logger.info("B15 ai-legal-basis: %d finding(s)", len(findings)) + return findings diff --git a/backend-compliance/compliance/services/mail_render_v2/_compose.py b/backend-compliance/compliance/services/mail_render_v2/_compose.py index 9839315a..083ae470 100644 --- a/backend-compliance/compliance/services/mail_render_v2/_compose.py +++ b/backend-compliance/compliance/services/mail_render_v2/_compose.py @@ -52,6 +52,8 @@ def compose_v2(state: dict) -> str: state.get("widerruf_reach_html", ""), # B14 Widersprüchliche Speicherdauer im selben Doc state.get("retention_conflict_html", ""), + # B15 AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f) + state.get("ai_legal_basis_html", ""), # Browser-Matrix (Stage 1.c) state.get("browser_matrix_html", ""), # All legacy build_*_html() wrapped in V2 sections — preserves diff --git a/backend-compliance/tests/test_ai_legal_basis_check.py b/backend-compliance/tests/test_ai_legal_basis_check.py new file mode 100644 index 00000000..fd9a070d --- /dev/null +++ b/backend-compliance/tests/test_ai_legal_basis_check.py @@ -0,0 +1,109 @@ +"""Tests for B15 AI-Act Rechtsgrundlage-Check (GT AI-ACT-RISK-001).""" + +from compliance.services.ai_legal_basis_check import ( + _find_llm_mention, + _has_lit_a, + _has_lit_f, + check_ai_legal_basis, +) + + +class TestLitFDetection: + def test_berechtigtes_interesse(self): + assert _has_lit_f("Wir verarbeiten auf Grundlage des berechtigten " + "Interesses.") + + def test_lit_f_short(self): + assert _has_lit_f("Rechtsgrundlage: Art. 6 Abs. 1 lit. f DSGVO") + + def test_lit_f_english(self): + assert _has_lit_f("Legal basis: legitimate interest.") + + def test_lit_a_not_lit_f(self): + assert not _has_lit_f("Verarbeitung nur mit Einwilligung.") + + +class TestLitADetection: + def test_einwilligung(self): + assert _has_lit_a("Verarbeitung auf Grundlage einer Einwilligung.") + + def test_lit_a_short(self): + assert _has_lit_a("Art. 6 Abs. 1 lit. a DSGVO") + + def test_lit_a_english(self): + assert _has_lit_a("Processing requires consent.") + + +class TestLLMMention: + def test_vertex_ai(self): + m = _find_llm_mention("Wir nutzen Google Vertex AI für den Chatbot.") + assert m is not None + assert m[0] == "vertex_ai_chatbot" + + def test_openai(self): + m = _find_llm_mention("Der Bot basiert auf ChatGPT von OpenAI.") + assert m is not None + assert m[0] == "openai_chatbot" + + def test_anthropic(self): + m = _find_llm_mention("Antworten via Anthropic Claude 3.") + assert m is not None + assert m[0] == "anthropic_claude" + + def test_no_llm_mention(self): + # iAdvize is ai_capable=true but NOT an LLM-vendor — should be + # filtered out by the LLM-tightening hint list. + m = _find_llm_mention("Live-Chat mit iAdvize SAS.") + assert m is None + + +class TestCheckAILegalBasis: + def test_vertex_ai_lit_f_finding(self): + dse = ( + "Wir setzen für den AI Assistant Google Vertex AI ein. " + "Rechtsgrundlage ist Art. 6 Abs. 1 lit. f DSGVO " + "(berechtigtes Interesse)." + ) + findings = check_ai_legal_basis({"doc_texts": {"dse": dse}}) + assert len(findings) == 1 + f = findings[0] + assert f["check_id"] == "AI-LEGAL-BASIS-001" + assert f["severity"] == "MEDIUM" + assert "Vertex" in f["provider"] + + def test_vertex_ai_with_consent_no_finding(self): + # If consent is ALSO named in the paragraph, no finding. + dse = ( + "Vertex AI verarbeitet nur nach vorheriger Einwilligung " + "(Art. 6 Abs. 1 lit. a). Optional auch berechtigtes Interesse " + "(lit. f) für Server-Logs." + ) + assert check_ai_legal_basis({"doc_texts": {"dse": dse}}) == [] + + def test_no_llm_no_finding(self): + dse = "Wir nutzen iAdvize Live-Chat auf Grundlage des "\ + "berechtigten Interesses." + assert check_ai_legal_basis({"doc_texts": {"dse": dse}}) == [] + + def test_llm_without_lit_f_no_finding(self): + dse = "OpenAI / GPT verarbeitet nur mit Einwilligung." + assert check_ai_legal_basis({"doc_texts": {"dse": dse}}) == [] + + def test_dedup_same_provider_per_doc(self): + dse = ( + "Vertex AI auf berechtigtem Interesse.\n\n" + "Vertex AI nochmals mit berechtigtem Interesse genannt." + ) + findings = check_ai_legal_basis({"doc_texts": {"dse": dse}}) + assert len(findings) == 1 + + def test_separate_doc_types_dedup_per_doc(self): + dse = "OpenAI / GPT auf berechtigtem Interesse." + cookie = "OpenAI ChatGPT auf Grundlage des berechtigten Interesses." + findings = check_ai_legal_basis({ + "doc_texts": {"dse": dse, "cookie": cookie} + }) + # one finding per (doc, provider) + assert len(findings) == 2 + doc_types = {f["doc_type"] for f in findings} + assert doc_types == {"dse", "cookie"}