feat(control-pipeline): 24 demo test cases for applicability engine

YAML-based test package with 4 categories (6 each):
- Standard sector cases (Telko, SaaS, Energie, Automotive, Health, Law)
- Scope-beats-sector (Bank+Battery, KI-Recruiting, White-Label, Payments)
- False friends (Stripe!=PSD2, Hotline!=TKG, Repo-signals!=regulation)
- Escalation (IoT-SIM, FinTech unclear, Treuhand, KI-Diagnose)

Enforces 5 acceptance rules: no false certainty, scope>sector,
repo signals insufficient, standard first, 40%+ negative tests.

Scoring framework: must_include + must_not_include + reasoning + escalation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-23 17:42:38 +02:00
parent 1f8667c7da
commit e8ec50e0fc
2 changed files with 848 additions and 419 deletions

View File

@@ -0,0 +1,725 @@
# ============================================================================
# BreakPilot Compliance — Demo Test Cases
# ============================================================================
#
# 24 Use Cases in 4 Kategorien:
# A. Standard (6) — Branchen-Default korrekt?
# B. Scope schlaegt Branche (6) — Sonderfaelle additiv?
# C. Falsche Freunde / Negativ (6) — Keine Falschzuweisung?
# D. Eskalation (6) — Unsicherheit erkannt?
#
# Jeder Case erzwingt 5 Outputs:
# 1. applicable_industries
# 2. scope_triggers
# 3. excluded_by_default
# 4. reasoning_summary
# 5. confidence + escalation
#
# Akzeptanzregeln:
# R1: Keine harte Falschsicherheit bei unklarem Sachverhalt
# R2: Scope schlaegt Branchen-Default (additiv)
# R3: Repo-Signale allein reichen nicht fuer harte Regulierung
# R4: Standardfall zuerst, Sonderfall additiv
# R5: Mindestens 40% Negativtests
#
# Scoring pro Case:
# must_include_match: 0..1
# must_not_include_match: 0..1
# reasoning_correct: 0..1
# escalation_correct: 0..1
# total_score: 0..4
# ============================================================================
# ============================================================================
# A. STANDARD-BRANCHENFAELLE (6)
# ============================================================================
- id: DEMO-APP-005
title: "Telekommunikationsanbieter"
category: standard
goal: "TKG/Telko-Pflichten im Standardfall"
company_profile:
sector: "Telekommunikation"
size: "medium"
country: "DE"
facts:
- "Erbringt Kommunikationsdienste"
- "Bietet Mobilfunkvertraege"
scope_answers:
provides_telecom_service: true
is_kritis_operator: true
expected:
applicable_industries: ["Telekommunikation"]
scope_triggers: ["provides_telecom_service", "is_kritis_operator"]
applicable_controls_should_include:
- "TKG"
- "TTDSG"
- "NIS2"
- "DSGVO"
applicable_controls_should_not_include:
- "PSD2"
- "Batterieverordnung"
- "MDR"
excluded_by_default: ["PSD2", "Batterieverordnung", "MDR"]
reasoning_summary: "Telko-Anbieter ist KRITIS-Betreiber, TKG und TTDSG direkt anwendbar."
confidence: 0.95
escalation_expected: false
- id: DEMO-APP-009
title: "Reines SaaS-Unternehmen"
category: standard
goal: "Batterie-/Produktregulierung darf nicht anspringen"
company_profile:
sector: "Technologie/IT"
size: "small"
country: "DE"
facts:
- "Nur Browser-Anwendung"
- "Keine Hardware"
scope_answers:
distributes_physical_products: false
contains_battery: false
uses_ai: false
expected:
applicable_industries: ["Technologie/IT"]
scope_triggers: []
applicable_controls_should_include:
- "DSGVO"
- "OWASP"
applicable_controls_should_not_include:
- "Batterieverordnung"
- "Maschinenverordnung"
- "MDR"
- "TKG"
- "PSD2"
excluded_by_default: ["Batterieverordnung", "Maschinenverordnung", "MDR", "TKG", "PSD2"]
reasoning_summary: "Reines SaaS ohne Hardware, ohne KI, ohne Finanzfunktion."
confidence: 0.95
escalation_expected: false
- id: DEMO-STD-003
title: "Mittelstaendischer Energieversorger (Stadtwerk)"
category: standard
goal: "KRITIS + NIS2 korrekt zugewiesen"
company_profile:
sector: "Energie"
size: "medium"
country: "DE"
facts:
- "Stadtwerk mit 200 Mitarbeitern"
- "Strom- und Gasversorgung"
- "Leitwarte mit Kameraueberwachung"
scope_answers:
is_kritis_operator: true
employee_monitoring: true
expected:
applicable_industries: ["Energie"]
scope_triggers: ["is_kritis_operator", "employee_monitoring"]
applicable_controls_should_include:
- "NIS2"
- "KRITIS"
- "BSI Grundschutz"
- "DSGVO"
- "BDSG"
applicable_controls_should_not_include:
- "PSD2"
- "AI Act"
- "MDR"
excluded_by_default: ["PSD2", "AI Act", "MDR", "TKG"]
reasoning_summary: "Stadtwerk ist KRITIS-Betreiber im Energiesektor, NIS2 ab medium."
confidence: 0.95
escalation_expected: false
- id: DEMO-STD-004
title: "Automobilzulieferer mit Prototypen"
category: standard
goal: "TISAX-Readiness korrekt vorbereitet"
company_profile:
sector: "Automobil"
size: "large"
country: "DE"
facts:
- "500 Mitarbeiter"
- "Prototypenfertigung fuer OEMs"
- "Internationale Lieferkette"
scope_answers:
handles_prototypes: true
supply_chain_automotive: true
third_country_transfer: true
expected:
applicable_industries: ["Automobil"]
scope_triggers: ["handles_prototypes", "supply_chain_automotive", "third_country_transfer"]
applicable_controls_should_include:
- "ISO 27001"
- "Prototypenschutz"
- "NIS2"
- "DSGVO"
- "CE"
applicable_controls_should_not_include:
- "PSD2"
- "TKG"
- "MDR"
excluded_by_default: ["PSD2", "TKG", "MDR"]
reasoning_summary: "Automobilzulieferer braucht TISAX-Readiness (ISO 27001 Basis), Prototypenschutz."
confidence: 0.90
escalation_expected: false
- id: DEMO-STD-005
title: "Gesundheits-App mit KI (DiGA)"
category: standard
goal: "KI + Gesundheitsdaten + MDR korrekt"
company_profile:
sector: "Gesundheitswesen"
size: "small"
country: "DE"
facts:
- "KI-basierte Gesundheits-App"
- "Verarbeitet Gesundheitsdaten"
- "Automatisierte Empfehlungen"
scope_answers:
uses_ai: true
processes_health_data: true
automated_decisions: true
expected:
applicable_industries: ["Gesundheitswesen"]
scope_triggers: ["uses_ai", "processes_health_data", "automated_decisions"]
applicable_controls_should_include:
- "DSGVO Art. 9"
- "DSGVO Art. 22"
- "DSGVO Art. 35"
- "AI Act"
- "MDR"
- "BSI TR-03161"
applicable_controls_should_not_include:
- "PSD2"
- "TKG"
- "Batterieverordnung"
excluded_by_default: ["PSD2", "TKG", "Batterieverordnung"]
reasoning_summary: "Gesundheits-App mit KI trifft DSGVO Art. 9/22/35, AI Act Hochrisiko, MDR."
confidence: 0.90
escalation_expected: false
- id: DEMO-STD-006
title: "Rechtsanwaltskanzlei mit KI und US-Cloud"
category: standard
goal: "Berufsrecht + KI + Drittland korrekt"
company_profile:
sector: "Recht/Kanzlei"
size: "small"
country: "DE"
facts:
- "30 Anwaelte"
- "KI fuer Dokumentenanalyse"
- "US-Cloud-Dienste"
scope_answers:
uses_ai: true
third_country_transfer: true
handles_legal_privilege: true
expected:
applicable_industries: ["Recht/Kanzlei"]
scope_triggers: ["uses_ai", "third_country_transfer", "handles_legal_privilege"]
applicable_controls_should_include:
- "DSGVO"
- "DSGVO Art. 46 (SCC)"
- "AI Act"
- "BRAO"
applicable_controls_should_not_include:
- "NIS2"
- "PSD2"
- "TKG"
excluded_by_default: ["NIS2", "PSD2", "TKG", "MDR"]
reasoning_summary: "Kanzlei mit KI und US-Cloud braucht DSGVO+SCC, AI Act, Berufsrecht."
confidence: 0.90
escalation_expected: false
# ============================================================================
# B. SCOPE SCHLAEGT BRANCHE (6)
# ============================================================================
- id: DEMO-APP-003
title: "Bank vertreibt TAN-Generator mit Batterie"
category: scope_beats_sector
goal: "Batterie-Controls trotz Bankensektor additiv"
company_profile:
sector: "Finanzdienstleistungen"
size: "large"
country: "DE"
facts:
- "Bank gibt TAN-Generatoren an Kunden aus"
- "Geraet enthaelt Batterie"
- "Physisches Produkt wird in Verkehr gebracht"
scope_answers:
distributes_physical_products: true
contains_battery: true
financial_institution: true
expected:
applicable_industries: ["Finanzdienstleistungen"]
scope_triggers: ["distributes_physical_products", "contains_battery"]
applicable_controls_should_include:
- "bankenspezifische Controls"
- "batteriebezogene Controls"
applicable_controls_should_not_include:
- "TKG"
excluded_by_default: ["TKG", "Maschinenverordnung"]
reasoning_summary: "Bank bringt physisches Produkt mit Batterie in Verkehr — Batterieverordnung additiv."
confidence: 0.85
escalation_expected: false
- id: DEMO-APP-008
title: "Chemieunternehmen mit akkubetriebenen Messgeraeten"
category: scope_beats_sector
goal: "Batteriepflichten im naheliegenden Fall"
company_profile:
sector: "Chemie"
size: "medium"
country: "DE"
facts:
- "Vertreibt Messgeraete mit Akku"
scope_answers:
distributes_physical_products: true
contains_battery: true
expected:
applicable_industries: ["Chemie"]
scope_triggers: ["distributes_physical_products", "contains_battery"]
applicable_controls_should_include:
- "Batterieverordnung"
- "CE"
applicable_controls_should_not_include:
- "PSD2"
- "TKG"
excluded_by_default: ["PSD2", "TKG"]
reasoning_summary: "Chemieunternehmen bringt akkubetriebene Geraete in Verkehr."
confidence: 0.90
escalation_expected: false
- id: DEMO-APP-011
title: "KI im Recruiting"
category: scope_beats_sector
goal: "KI/HR/AGG Controls unabhaengig von Branche"
company_profile:
sector: "Beliebig"
size: "medium"
country: "DE"
facts:
- "KI priorisiert Bewerbungen"
- "Automatisierte Absagen werden vorbereitet"
scope_answers:
uses_ai: true
automated_decisions: true
expected:
applicable_industries: ["all"]
scope_triggers: ["uses_ai", "automated_decisions"]
applicable_controls_should_include:
- "DSGVO Art. 22"
- "AGG"
- "AI Act"
excluded_by_default: []
reasoning_summary: "KI im Recruiting loest DSGVO Art. 22, AGG-Diskriminierungsschutz und AI Act aus."
confidence: 0.70
escalation_expected: true
escalation_reason: "KI-basierte HR-Entscheidungen sind AI Act Hochrisiko — vertiefte Pruefung"
- id: DEMO-APP-013
title: "Schulmessenger mit KI-Uebersetzung"
category: scope_beats_sector
goal: "KI + Kinderdaten + Drittland erkennen"
company_profile:
sector: "Bildung"
size: "medium"
country: "DE"
facts:
- "Zwei-Wege-Kommunikation Schule-Eltern"
- "Nachrichten werden automatisch uebersetzt"
- "Personenbezogene Daten von Eltern und Kindern"
scope_answers:
processes_minors_data: true
uses_ai: true
third_country_transfer: true # Uebersetzungs-API
expected:
applicable_industries: ["Bildung"]
scope_triggers: ["processes_minors_data", "uses_ai", "third_country_transfer"]
applicable_controls_should_include:
- "DSGVO Art. 8"
- "AI Act"
- "DSGVO Art. 46 (SCC)"
applicable_controls_should_not_include:
- "TKG"
- "PSD2"
excluded_by_default: ["TKG", "PSD2"]
reasoning_summary: "Schulkommunikation mit KI-Uebersetzung und Kinderdaten loest DSGVO Art. 8 + AI Act + SCC aus."
confidence: 0.70
escalation_expected: true
escalation_reason: "KI-Drittland-/Modellgovernance-Review fuer Uebersetzungs-API"
- id: DEMO-APP-016
title: "White-Label-Hardwarevertrieb"
category: scope_beats_sector
goal: "Inverkehrbringen unter eigener Marke = Produktpflichten"
company_profile:
sector: "E-Commerce/Handel"
size: "small"
country: "DE"
facts:
- "Vertreibt Geraete unter eigener Marke"
- "Produktion durch Dritten"
- "Geraet enthaelt Akku"
scope_answers:
places_product_on_market_under_own_brand: true
contains_battery: true
distributes_physical_products: true
expected:
applicable_industries: ["E-Commerce/Handel"]
scope_triggers: ["places_product_on_market_under_own_brand", "contains_battery"]
applicable_controls_should_include:
- "Batterieverordnung"
- "CE"
- "Produkthaftung"
excluded_by_default: ["PSD2", "TKG"]
reasoning_summary: "White-Label = Inverkehrbringer unter eigener Marke, traegt Produktpflichten."
confidence: 0.90
escalation_expected: false
- id: DEMO-APP-002
title: "Industrieplattform mit eigener Zahlungsabwicklung"
category: scope_beats_sector
goal: "Echte Finanzregulierung bei Geschaeftsmodell-Wechsel"
company_profile:
sector: "Produktion/Industrie"
size: "medium"
country: "DE"
facts:
- "Betreibt B2B-Marktplatz"
- "Haelt Kundengelder kurzzeitig zwischen"
- "Leitet Zahlungen an Haendler weiter"
scope_answers:
operates_payment_service: true
holds_client_funds: true
marketplace_model: true
expected:
applicable_industries: ["Produktion/Industrie"]
scope_triggers: ["operates_payment_service", "holds_client_funds"]
applicable_controls_should_include:
- "PSD2"
- "AML/KYC"
excluded_by_default: []
reasoning_summary: "Industrieplattform mit eigenem Payment = regulatorische Zahlungsdienstpflicht."
confidence: 0.60
escalation_expected: true
escalation_reason: "Regulatorische Einordnung erfordert vertiefte Pruefung (Erlaubnispflicht)"
# ============================================================================
# C. FALSCHE FREUNDE / NEGATIVTESTS (6)
# ============================================================================
- id: DEMO-APP-001
title: "Industrieunternehmen mit Webshop und Stripe Checkout"
category: false_friends
goal: "Stripe darf nicht PSD2 ausloesen"
company_profile:
sector: "Produktion/Industrie"
size: "medium"
country: "DE"
business_model: "B2B-Hersteller mit ergaenzendem Webshop"
facts:
- "Verkauft Ersatzteile ueber Webshop"
- "Nutzt Stripe Checkout als externen Zahlungsdienstleister"
- "Speichert keine vollstaendigen Kartendaten selbst"
- "Keine Zahlungsabwicklung im eigenen Namen"
scope_answers:
operates_payment_service: false
stores_card_data: false
sells_physical_products: true
repo_signals:
- "stripe checkout"
expected:
applicable_industries: ["Produktion/Industrie"]
scope_triggers: []
applicable_controls_should_include:
- "DSGVO Datenschutzhinweise"
- "DSGVO Empfaenger-/Dienstleistertransparenz"
- "VVT"
applicable_controls_should_not_include:
- "PSD2"
- "AML"
- "Batterieverordnung"
- "TKG"
excluded_by_default: ["PSD2", "AML", "Batterieverordnung", "TKG"]
reasoning_summary: "Stripe ist externer Zahlungsdienstleister; Haendler wird nicht reguliertes Zahlungsinstitut."
confidence: 0.92
escalation_expected: false
- id: DEMO-APP-004
title: "Direktbank ohne physische Produkte"
category: false_friends
goal: "Keine Batteriepflichten nur wegen Bank"
company_profile:
sector: "Finanzdienstleistungen"
size: "large"
country: "DE"
facts:
- "Nur Mobile App und Webbanking"
- "Keine Token, keine TAN-Geraete, keine Hardware"
scope_answers:
distributes_physical_products: false
contains_battery: false
expected:
applicable_industries: ["Finanzdienstleistungen"]
scope_triggers: []
applicable_controls_should_include:
- "PSD2"
- "DSGVO"
- "BaFin"
applicable_controls_should_not_include:
- "Batterieverordnung"
- "Maschinenverordnung"
- "CE"
excluded_by_default: ["Batterieverordnung", "Maschinenverordnung", "CE"]
reasoning_summary: "Reine Digitalbank ohne physische Produkte — keine Produktregulierung."
confidence: 0.95
escalation_expected: false
- id: DEMO-APP-006
title: "Maschinenbauer mit Kundenhotline"
category: false_friends
goal: "Hotline darf nicht TKG triggern"
company_profile:
sector: "Produktion/Industrie"
size: "medium"
country: "DE"
facts:
- "Hat Support-Hotline fuer Kunden"
- "Erbringt keinen oeffentlichen Telekommunikationsdienst"
scope_answers:
provides_telecom_service: false
operates_customer_hotline: true
expected:
applicable_industries: ["Produktion/Industrie"]
scope_triggers: []
applicable_controls_should_not_include:
- "TKG"
- "TTDSG Telko-Kernpflichten"
excluded_by_default: ["TKG"]
reasoning_summary: "Kundenhotline ist kein oeffentlicher Telekommunikationsdienst."
confidence: 0.92
escalation_expected: false
- id: DEMO-APP-012
title: "Schule mit Einweg-Elternkommunikation"
category: false_friends
goal: "Einweg-Nachrichten sind kein Telko-Dienst"
company_profile:
sector: "Bildung"
size: "medium"
country: "DE"
facts:
- "Eltern erhalten Einweg-Nachrichten"
- "Keine offene Chat-Funktion"
scope_answers:
provides_telecom_service: false
processes_minors_data: true
expected:
applicable_industries: ["Bildung"]
scope_triggers: ["processes_minors_data"]
applicable_controls_should_include:
- "DSGVO"
- "DSGVO Art. 8"
applicable_controls_should_not_include:
- "TKG"
excluded_by_default: ["TKG", "PSD2"]
reasoning_summary: "Einweg-Elterninfo ist kein oeffentlicher Kommunikationsdienst."
confidence: 0.90
escalation_expected: false
- id: DEMO-APP-014
title: "Repo enthaelt Stripe SDK — nur SaaS-Billing"
category: false_friends
goal: "Repo-Signale duerfen Scope nicht uebersteuern"
company_profile:
sector: "Technologie/IT"
size: "small"
country: "DE"
facts:
- "Repo enthaelt stripe dependency"
- "Nur Billing fuer eigenes SaaS-Abo"
- "Keine Zahlungsabwicklung fuer Dritte"
repo_signals:
- "stripe"
scope_answers:
operates_payment_service: false
expected:
applicable_industries: ["Technologie/IT"]
scope_triggers: []
applicable_controls_should_not_include:
- "PSD2"
applicable_controls_should_include:
- "DSGVO"
- "Vendor-/Security-Controls"
excluded_by_default: ["PSD2"]
reasoning_summary: "Stripe SDK im Repo = SaaS-Billing, nicht eigene Zahlungsabwicklung."
confidence: 0.90
escalation_expected: false
- id: DEMO-APP-017
title: "Interne Nutzung batteriebetriebener Geraete"
category: false_friends
goal: "Keine Batterie-Inverkehrbringungspflichten"
company_profile:
sector: "Beliebig"
size: "medium"
country: "DE"
facts:
- "Mitarbeiter nutzen Laptops und Scanner"
- "Kein Vertrieb eigener Batterieprodukte"
scope_answers:
distributes_physical_products: false
places_battery_products_on_market: false
expected:
applicable_industries: ["all"]
scope_triggers: []
applicable_controls_should_not_include:
- "Batterieverordnung (Inverkehrbringen)"
excluded_by_default: ["Batterieverordnung"]
reasoning_summary: "Interne Nutzung von Geraeten mit Batterien loest keine Inverkehrbringungspflichten aus."
confidence: 0.95
escalation_expected: false
# ============================================================================
# D. ESKALATIONSFAELLE (6)
# ============================================================================
- id: DEMO-APP-007
title: "IoT-Hersteller mit vernetztem Geraet (SIM + Funkmodul)"
category: escalation
goal: "Abgrenzung Hardware vs. Kommunikationsdienst"
company_profile:
sector: "Produktion/Industrie"
size: "medium"
country: "DE"
facts:
- "Verkauft Geraet mit Funkmodul"
- "Geraet kommuniziert ueber Mobilfunk"
- "SIM-Konnektivitaet wird mitgeliefert"
scope_answers:
sells_connected_device: true
provides_embedded_connectivity: true
expected:
applicable_industries: ["Produktion/Industrie"]
scope_triggers: ["sells_connected_device", "provides_embedded_connectivity"]
applicable_controls_should_include:
- "CE"
- "Cyber Resilience Act"
- "Funkgeraeterichtlinie (RED)"
escalation_expected: true
escalation_reason: "Abgrenzung Hardwareprodukt vs. Kommunikationsdienst vertieft pruefen"
confidence: 0.55
- id: DEMO-APP-010
title: "Plattform mit Verkaeufer-Onboarding und Transaktionsmonitoring"
category: escalation
goal: "AML/KYC Relevanz differenziert"
company_profile:
sector: "Technologie/IT"
size: "medium"
country: "DE"
facts:
- "Onboardet externe Verkaeufer"
- "Prueft Identitaet gewerblicher Anbieter"
- "Ueberwacht verdaechtige Zahlungsstroeme"
scope_answers:
marketplace_model: true
performs_kyc: true
monitors_transactions: true
expected:
applicable_industries: ["Technologie/IT"]
scope_triggers: ["marketplace_model", "performs_kyc", "monitors_transactions"]
applicable_controls_should_include:
- "AML/KYC Review-Controls"
escalation_expected: true
escalation_reason: "Klaerung ob GwG-/aufsichtsrechtliche Pflichten oder nur Fraud-/Plattformkontrollen"
confidence: 0.50
- id: DEMO-APP-015
title: "Repo zeigt Wallet-/Custody-Funktionen"
category: escalation
goal: "Technische Signale deuten auf Regulierung"
company_profile:
sector: "Technologie/IT"
size: "small"
country: "DE"
facts:
- "Produktbeschreibung unvollstaendig"
repo_signals:
- "wallet_service"
- "custody"
- "kyc_provider"
- "transaction_monitoring"
scope_answers: {}
expected:
applicable_controls_should_include:
- "Finanz-/AML-nahe Review-Controls"
escalation_expected: true
escalation_reason: "Technische Signale deuten auf regulierungsnahe Funktion hin"
confidence: 0.35
- id: DEMO-APP-018
title: "Unklare FinTech-Beschreibung"
category: escalation
goal: "Unsicherheit explizit erkennen"
company_profile:
sector: "Technologie/IT"
size: "small"
country: "DE"
facts:
- "App verwaltet Geldfluesse zwischen Nutzern"
- "Details zur Vertragsrolle unklar"
scope_answers: {}
expected:
escalation_expected: true
escalation_reason: "Geschaeftsmodell fuer finale regulatorische Einordnung zu unbestimmt"
confidence: 0.30
pass_criteria:
- "Keine harte Falschaussage"
- "Gezielte Nachfragen oder LLM-Review"
- id: DEMO-ESC-005
title: "Unternehmen bietet Treuhandkonto fuer Immobilienkauf"
category: escalation
goal: "Finanzregulierung bei Treuhandmodell"
company_profile:
sector: "Immobilien"
size: "medium"
country: "DE"
facts:
- "Bietet Treuhandkonto fuer Immobilientransaktionen"
- "Haelt Kundengelder temporaer"
- "Nicht als Finanzinstitut lizenziert"
scope_answers:
holds_client_funds: true
expected:
escalation_expected: true
escalation_reason: "Treuhandmodell kann Erlaubnispflicht nach ZAG ausloesen — juristische Pruefung noetig"
confidence: 0.40
- id: DEMO-ESC-006
title: "Startup nutzt KI fuer medizinische Diagnoseunterstuetzung"
category: escalation
goal: "MDR + AI Act Hochrisiko Abgrenzung"
company_profile:
sector: "Gesundheitswesen"
size: "micro"
country: "DE"
facts:
- "KI gibt Diagnosevorschlaege"
- "Aerzte treffen finale Entscheidung"
- "Unklar ob Medizinprodukt"
scope_answers:
uses_ai: true
processes_health_data: true
provides_diagnostic_support: true
expected:
applicable_controls_should_include:
- "AI Act Hochrisiko"
- "DSGVO Art. 9"
escalation_expected: true
escalation_reason: "Abgrenzung KI-Diagnoseunterstuetzung vs. Medizinprodukt (MDR) vertieft pruefen"
confidence: 0.45

View File

@@ -1,469 +1,173 @@
""" """
Applicability Use Case Tests — Real-world scenarios for control assignment. Applicability Use Case Tests — Demo test package for control assignment.
These test cases verify that our Applicability Engine correctly assigns Loads 24 use cases from demo_cases.yaml and validates structure,
and does NOT assign controls based on company profile + scope answers. consistency, and (when DB available) actual control assignment.
Each test case represents a real business scenario discussed during Categories:
product development. They serve as: A. Standard (6) — Branchen-Default korrekt?
1. Regression tests for the Applicability Engine B. Scope schlaegt Branche (6) — Sonderfaelle additiv?
2. Demo cases for the SDK C. Falsche Freunde / Negativ (6) — Keine Falschzuweisung?
3. Documentation of regulatory nuances D. Eskalation (6) — Unsicherheit erkannt?
Run: pytest tests/test_applicability_use_cases.py -v Run: pytest tests/test_applicability_use_cases.py -v
""" """
import os
import pytest import pytest
import yaml
# Load demo cases from YAML
CASES_PATH = os.path.join(os.path.dirname(__file__), "demo_cases.yaml")
with open(CASES_PATH) as f:
DEMO_CASES = yaml.safe_load(f)
CASE_IDS = [c["id"] for c in DEMO_CASES]
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Test Case Data: Company Profiles + Expected Results # Structure Tests (always run)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
USE_CASES = [ class TestDemoCaseStructure:
# =================================================================== """Verify demo cases are well-formed."""
# CASE 1: Bank mit TAN-Generator (Batterie im Produkt)
# ===================================================================
{
"id": "bank_tan_generator",
"name": "Bank gibt TAN-Generator mit Batterie an Kunden raus",
"company": {
"industry": "Finanzdienstleistungen",
"size": "large",
"scope_answers": {
"payment_services": True, # Bank ist Zahlungsdienstleister
"processes_health_data": False,
"uses_ai": False,
"third_country_transfer": True,
"manufactures_batteries": False, # Bank STELLT NICHT HER
},
},
"must_match": [
"PSD2", # Bank IST Zahlungsdienstleister
"DSGVO", # Immer
"AML", # Bank hat AML-Pflichten
],
"must_not_match": [
"Batterieverordnung", # Bank stellt TAN-Generator nicht her
"Maschinenverordnung", # Kein Maschinenbau
"MDR", # Keine Medizinprodukte
],
"rationale": (
"Die Bank beschafft den TAN-Generator von einem Hersteller. "
"Der Hersteller unterliegt der Batterieverordnung, nicht die Bank. "
"Die Bank ist aber selbst PSD2-reguliert als Zahlungsdienstleister."
),
},
# =================================================================== def test_case_count(self):
# CASE 2: Industrieunternehmen eroeffnet Webshop mit Stripe assert len(DEMO_CASES) == 24, f"Expected 24 cases, got {len(DEMO_CASES)}"
# ===================================================================
{
"id": "industrie_webshop_stripe",
"name": "Maschinenbau-Firma eroeffnet Webshop mit Stripe-Zahlung",
"company": {
"industry": "Maschinenbau",
"size": "medium",
"scope_answers": {
"payment_services": False, # Stripe ist der Zahlungsdienstleister!
"uses_ai": False,
"third_country_transfer": True, # Stripe ist US-Unternehmen
"processes_health_data": False,
"has_webshop": True,
},
},
"must_match": [
"DSGVO", # Immer
"DSGVO_AV_Vertrag", # Stripe als Auftragsverarbeiter
"DSGVO_Datenschutzinfo", # Stripe in Datenschutzerklaerung nennen
"Maschinenverordnung", # Kerngeschaeft
"CE", # Maschinenbau braucht CE
],
"must_not_match": [
"PSD2", # Stripe ist der Zahlungsdienstleister, NICHT die Firma
"AML", # Keine eigene Zahlungsabwicklung
"BaFin", # Kein Finanzinstitut
],
"rationale": (
"Stripe ist Zahlungsdienstleister in eigenem Auftrag. Der Webshop-Betreiber "
"wird nicht zum regulierten Zahlungsinstitut. Er muss nur Stripe als "
"Auftragsverarbeiter in der Datenschutzinformation korrekt benennen (DSGVO Art. 13/14). "
"Rechtsanwaltlich begleitete Stripe-Anbindung hat das bestaetigt."
),
},
# =================================================================== def test_category_distribution(self):
# CASE 3: Kleines SaaS-Startup (5 Personen) cats = [c.get("category", "unknown") for c in DEMO_CASES]
# =================================================================== assert cats.count("standard") == 6
{ assert cats.count("scope_beats_sector") == 6
"id": "saas_startup_klein", assert cats.count("false_friends") == 6
"name": "5-Personen SaaS-Startup (Cloud-Software, keine KI)", assert cats.count("escalation") == 6
"company": {
"industry": "Technologie/IT",
"size": "micro",
"scope_answers": {
"uses_ai": False,
"third_country_transfer": False, # EU-only Hosting
"processes_health_data": False,
"automated_decisions": False,
"payment_services": False,
"is_kritis_operator": False,
},
},
"must_match": [
"DSGVO", # Immer
"OWASP", # Software-Sicherheit
],
"must_not_match": [
"NIS2", # Zu klein (NIS2 ab medium/50 MA)
"AI_Act", # Keine KI
"Batterieverordnung", # Kein Hardware-Produkt
"TKG", # Kein Telko-Anbieter
"MDR", # Keine Medizinprodukte
"PSD2", # Kein Zahlungsdienstleister
"KRITIS", # Zu klein, kein kritischer Sektor
],
"rationale": (
"Ein Kleinstunternehmen ohne KI, ohne KRITIS-Zugehoerigkeit, ohne "
"Drittlandtransfer braucht nur DSGVO-Basics und Software-Sicherheit. "
"NIS2 greift erst ab 50 Mitarbeitern / 10 Mio Umsatz."
),
},
# =================================================================== @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS)
# CASE 4: Mittelstaendischer Energieversorger def test_required_fields(self, case):
# =================================================================== assert case.get("id"), "Missing id"
{ assert case.get("title"), "Missing title"
"id": "energieversorger_mittelstand", assert case.get("category"), "Missing category"
"name": "Stadtwerk mit 200 Mitarbeitern (Strom + Gas)", assert case.get("goal"), "Missing goal"
"company": { assert case.get("company_profile"), "Missing company_profile"
"industry": "Energie", assert case.get("facts"), "Missing facts"
"size": "medium", assert case.get("expected"), "Missing expected"
"scope_answers": {
"is_kritis_operator": True,
"uses_ai": False,
"third_country_transfer": False,
"processes_health_data": False,
"employee_monitoring": True, # Leitwarte mit Kameras
},
},
"must_match": [
"DSGVO",
"NIS2", # Energie = KRITIS-Sektor + medium
"KRITIS", # Energieversorger
"BDSG", # Mitarbeiterueberwachung
"BSI_Grundschutz", # KRITIS-Betreiber
],
"must_not_match": [
"PSD2",
"AI_Act",
"MDR",
"TKG",
"Batterieverordnung",
],
"rationale": (
"Stadtwerk ist KRITIS-Betreiber im Energiesektor. NIS2 greift ab medium "
"(50 MA). BSI-Grundschutz ist de-facto Pflicht fuer KRITIS. "
"Mitarbeiterueberwachung (Leitwarte) erfordert BDSG-Compliance."
),
},
# =================================================================== @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS)
# CASE 5: Gesundheits-App Startup mit KI def test_expected_has_escalation(self, case):
# =================================================================== expected = case["expected"]
{ assert "escalation_expected" in expected, f"{case['id']}: Missing escalation_expected"
"id": "health_app_ki",
"name": "Startup entwickelt KI-basierte Gesundheits-App (DiGA)",
"company": {
"industry": "Gesundheitswesen",
"size": "small",
"scope_answers": {
"uses_ai": True,
"processes_health_data": True,
"automated_decisions": True,
"third_country_transfer": False,
"is_kritis_operator": False,
},
},
"must_match": [
"DSGVO",
"DSGVO_Art9", # Gesundheitsdaten = besondere Kategorie
"DSGVO_Art22", # Automatisierte Entscheidungen
"DSGVO_Art35", # DSFA fuer Gesundheitsdaten + KI
"AI_Act", # KI-Einsatz
"MDR", # Gesundheits-App kann Medizinprodukt sein
"BSI_TR_03161", # Technische Richtlinie fuer mobile Gesundheits-Apps
"DiGAV", # Digitale Gesundheitsanwendung
],
"must_not_match": [
"PSD2",
"TKG",
"Batterieverordnung",
"Maschinenverordnung",
"NIS2", # Zu klein
],
"rationale": (
"Gesundheits-App mit KI trifft die schaerfsten Anforderungen: "
"DSGVO Art. 9 (Gesundheitsdaten), Art. 22 (automatisierte Entscheidungen), "
"Art. 35 (DSFA Pflicht), AI Act (Hochrisiko-KI im Gesundheitsbereich), "
"MDR (evtl. Medizinprodukt), BSI TR-03161 (Sicherheit mobiler Gesundheits-Apps)."
),
},
# =================================================================== @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS)
# CASE 6: Automobilzulieferer (TISAX-relevant) def test_escalation_has_reason(self, case):
# =================================================================== expected = case["expected"]
{ if expected.get("escalation_expected"):
"id": "automotive_zulieferer", assert expected.get("escalation_reason"), \
"name": "Automobilzulieferer mit 500 MA, Prototypen-Fertigung", f"{case['id']}: escalation_expected=true but no escalation_reason"
"company": {
"industry": "Automobil",
"size": "large",
"scope_answers": {
"uses_ai": False,
"third_country_transfer": True, # Lieferkette international
"is_kritis_operator": False,
"handles_prototypes": True,
"supply_chain_automotive": True,
},
},
"must_match": [
"DSGVO",
"NIS2", # Large + Automotive (Lieferkette)
"ISO27001", # TISAX basiert auf ISO 27001
"Prototypenschutz", # OEM-Anforderung
"CE", # Produkte in EU
"Maschinenverordnung", # Produktion
],
"must_not_match": [
"PSD2",
"TKG",
"MDR",
"AI_Act",
],
"rationale": (
"Automobilzulieferer braucht TISAX-Readiness (basiert auf ISO 27001), "
"Prototypenschutz (OEM-Vorgabe), und NIS2 (Lieferkette, large). "
"TISAX selbst koennen wir nicht direkt zuweisen (VDA ISA proprietaer), "
"aber die zugrunde liegenden ISO/NIST Controls decken es ab."
),
},
# =================================================================== @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS)
# CASE 7: Rechtsanwaltskanzlei def test_no_overlap_include_exclude(self, case):
# =================================================================== expected = case["expected"]
{ include = set(expected.get("applicable_controls_should_include", []))
"id": "rechtsanwaltskanzlei", exclude = set(expected.get("applicable_controls_should_not_include", []))
"name": "Wirtschaftskanzlei mit 30 Anwaelten", overlap = include & exclude
"company": { assert not overlap, f"{case['id']}: Overlap in include/exclude: {overlap}"
"industry": "Recht/Kanzlei",
"size": "small",
"scope_answers": {
"uses_ai": True, # KI fuer Dokumentenanalyse
"third_country_transfer": True, # US-Cloud-Dienste
"processes_health_data": False,
"automated_decisions": False,
"handles_legal_privilege": True,
},
},
"must_match": [
"DSGVO",
"DSGVO_Art46", # Drittlandtransfer (SCC)
"AI_Act", # KI-Einsatz
"BRAO", # Berufsordnung Rechtsanwaelte
"Mandantengeheimnis", # Berufsgeheimnis
],
"must_not_match": [
"NIS2", # Zu klein, kein KRITIS-Sektor
"PSD2",
"TKG",
"MDR",
"Batterieverordnung",
],
"rationale": (
"Kanzlei mit KI-Tools und US-Cloud braucht DSGVO + SCC (Drittland), "
"AI Act (KI-Einsatz), und berufsrechtliche Anforderungen (BRAO, "
"Mandantengeheimnis). NIS2 greift nicht (kein KRITIS-Sektor, zu klein)."
),
},
# =================================================================== @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS)
# CASE 8: E-Commerce Haendler mit eigenem Zahlungssystem def test_confidence_range(self, case):
# =================================================================== conf = case["expected"].get("confidence")
{ if conf is not None:
"id": "ecommerce_eigene_zahlung", assert 0.0 <= conf <= 1.0, f"{case['id']}: confidence {conf} out of range"
"name": "Online-Haendler mit eigenem Payment-Processing (keine Stripe-Delegation)",
"company": {
"industry": "E-Commerce/Handel",
"size": "medium",
"scope_answers": {
"payment_services": True, # EIGENE Zahlungsabwicklung
"uses_ai": True, # KI-Empfehlungen
"third_country_transfer": True,
"processes_minors_data": True, # Spielzeug-Shop
},
},
"must_match": [
"DSGVO",
"DSGVO_Art8", # Kinderdaten
"PSD2", # EIGENER Payment-Service
"AI_Act", # KI-Empfehlungssystem
],
"must_not_match": [
"TKG",
"MDR",
"Maschinenverordnung",
],
"rationale": (
"Unterschied zu Case 2: Dieser Haendler betreibt EIGENES Payment-Processing, "
"ist also PSD2-reguliert. Dazu: Kinderdaten (Spielzeug-Shop) erfordern "
"DSGVO Art. 8 (Einwilligung Erziehungsberechtigter). KI-Empfehlungen "
"fallen unter AI Act."
),
},
# =================================================================== def test_negative_test_ratio(self):
# CASE 9: Bildungseinrichtung (Schule) """At least 40% of cases should test non-assignment (R5)."""
# =================================================================== cases_with_not_include = sum(
{ 1 for c in DEMO_CASES
"id": "schule", if c["expected"].get("applicable_controls_should_not_include")
"name": "Oeffentliche Schule mit 80 Lehrkraeften", )
"company": { ratio = cases_with_not_include / len(DEMO_CASES)
"industry": "Bildung", assert ratio >= 0.4, f"Only {ratio:.0%} cases have must_not_include (need >= 40%)"
"size": "medium",
"scope_answers": {
"processes_minors_data": True,
"uses_ai": True, # KI-Lernplattform
"video_surveillance": True, # Schulgelaende
"employee_monitoring": False,
"is_public_sector": True,
},
},
"must_match": [
"DSGVO",
"DSGVO_Art8", # Kinderdaten
"DSGVO_Art35", # DSFA (Kinderdaten + KI + Video)
"AI_Act", # KI-Lernplattform
"Schulrecht", # Landesschulgesetz
"BDSG", # Oeffentliche Stelle
],
"must_not_match": [
"PSD2",
"NIS2", # Bildung kein KRITIS-Sektor
"TKG",
"AML",
],
"rationale": (
"Schule verarbeitet Kinderdaten (DSGVO Art. 8), nutzt KI (AI Act), "
"hat Videoueberwachung (DSFA Pflicht). Als oeffentliche Stelle gilt BDSG. "
"NIS2 erfasst Bildung nicht als KRITIS-Sektor."
),
},
# =================================================================== def test_unique_ids(self):
# CASE 10: Telko-Unternehmen assert len(CASE_IDS) == len(set(CASE_IDS)), "Duplicate case IDs found"
# ===================================================================
{
"id": "telko_provider",
"name": "Regionaler Internetanbieter mit 150 MA",
"company": {
"industry": "Telekommunikation",
"size": "medium",
"scope_answers": {
"is_kritis_operator": True,
"uses_ai": False,
"third_country_transfer": False,
"processes_health_data": False,
},
},
"must_match": [
"DSGVO",
"TKG", # Telko-spezifisch
"TTDSG", # Telekommunikation-Telemedien-Datenschutz
"NIS2", # KRITIS + medium
"KRITIS",
"BSI_Grundschutz",
],
"must_not_match": [
"PSD2",
"AI_Act",
"MDR",
"Batterieverordnung",
"Maschinenverordnung",
],
"rationale": (
"Telko-Anbieter ist KRITIS-Betreiber, TKG und TTDSG sind direkt anwendbar. "
"NIS2 greift (KRITIS + medium). BSI-Grundschutz de-facto Pflicht."
),
},
]
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Test Functions # Acceptance Rule Tests (always run)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
class TestApplicabilityUseCases: class TestAcceptanceRules:
"""Verify that the Applicability Engine assigns controls correctly.""" """Verify acceptance rules are encoded in the test data."""
@pytest.mark.parametrize("case", USE_CASES, ids=[c["id"] for c in USE_CASES]) def test_r1_no_false_certainty(self):
def test_use_case_documented(self, case): """R1: Escalation cases must not have high confidence."""
"""Each use case has required fields.""" for case in DEMO_CASES:
assert case["id"] if case["expected"].get("escalation_expected"):
assert case["name"] conf = case["expected"].get("confidence", 1.0)
assert case["company"]["industry"] assert conf < 0.80, \
assert case["company"]["size"] f"{case['id']}: escalation_expected but confidence={conf} (should be < 0.80)"
assert case["must_match"]
assert case["must_not_match"]
assert case["rationale"]
@pytest.mark.parametrize("case", USE_CASES, ids=[c["id"] for c in USE_CASES]) def test_r2_scope_beats_sector(self):
def test_must_match_not_overlap_must_not(self, case): """R2: scope_beats_sector cases must have scope_triggers."""
"""must_match and must_not_match should not overlap.""" for case in DEMO_CASES:
overlap = set(case["must_match"]) & set(case["must_not_match"]) if case.get("category") == "scope_beats_sector":
assert not overlap, f"Overlap in {case['id']}: {overlap}" triggers = case["expected"].get("scope_triggers", [])
assert triggers, f"{case['id']}: scope_beats_sector but no scope_triggers"
@pytest.mark.parametrize("case", USE_CASES, ids=[c["id"] for c in USE_CASES]) def test_r3_repo_signals_not_sufficient(self):
def test_scope_answers_are_booleans(self, case): """R3: Cases with repo_signals should not auto-assign heavy regulation."""
"""Scope answers should be boolean values.""" for case in DEMO_CASES:
for key, val in case["company"]["scope_answers"].items(): if case.get("repo_signals") and not case["expected"].get("escalation_expected"):
assert isinstance(val, bool), f"{case['id']}: scope {key} is {type(val)}, expected bool" # If repo signals exist but no escalation, PSD2 etc. must NOT be included
include = case["expected"].get("applicable_controls_should_include", [])
for ctrl in include:
assert "PSD2" not in ctrl, \
f"{case['id']}: repo_signals + PSD2 included without escalation (R3)"
def test_r4_standard_first(self):
"""R4: Standard cases should have applicable_industries set."""
for case in DEMO_CASES:
if case.get("category") == "standard":
industries = case["expected"].get("applicable_industries")
assert industries, f"{case['id']}: standard case without applicable_industries"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Integration test placeholder — runs against real DB + Applicability Engine # Integration Tests (require DB + Applicability Engine)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.skip(reason="Requires running DB + Applicability Engine") @pytest.mark.skip(reason="Requires running DB + Applicability Engine — enable for SDK demo")
class TestApplicabilityIntegration: class TestApplicabilityIntegration:
"""Run use cases against the real Applicability Engine. """Run use cases against the real Applicability Engine.
Enable by removing @skip and setting DATABASE_URL. Enable by removing @skip and ensuring DATABASE_URL is set.
These tests query the actual canonical_controls table
and verify that the correct controls are returned. Scoring per case:
must_include_match: 0..1
must_not_include_match: 0..1
reasoning_correct: 0..1
escalation_correct: 0..1
total_score: 0..4
""" """
@pytest.mark.parametrize("case", USE_CASES, ids=[c["id"] for c in USE_CASES]) @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS)
def test_applicability_engine(self, case): def test_applicability(self, case):
"""Verify control assignment for each use case.""" # TODO: Implement against real ApplicabilityEngine
# TODO: Import ApplicabilityEngine, query DB, check results
# from services.applicability_engine import get_applicable_controls # from services.applicability_engine import get_applicable_controls
# from db.session import SessionLocal # from db.session import SessionLocal
# #
# db = SessionLocal() # db = SessionLocal()
# result = get_applicable_controls( # result = get_applicable_controls(
# db=db, # db=db,
# industry=case["company"]["industry"], # industry=case["company_profile"]["sector"],
# company_size=case["company"]["size"], # company_size=case["company_profile"].get("size", "medium"),
# scope_signals=case["company"]["scope_answers"], # scope_signals=case.get("scope_answers", {}),
# ) # )
# control_sources = {c.source_citation.get("source", "") for c in result["controls"]}
# #
# for required in case["must_match"]: # # Score: must_include_match
# assert any(required.lower() in s.lower() for s in control_sources), \ # for required in case["expected"].get("applicable_controls_should_include", []):
# f"{case['id']}: Expected {required} in results" # assert any(required.lower() in str(c).lower() for c in result["controls"])
# #
# for forbidden in case["must_not_match"]: # # Score: must_not_include_match
# assert not any(forbidden.lower() in s.lower() for s in control_sources), \ # for forbidden in case["expected"].get("applicable_controls_should_not_include", []):
# f"{case['id']}: {forbidden} should NOT be in results" # assert not any(forbidden.lower() in str(c).lower() for c in result["controls"])
pass pass