diff --git a/control-pipeline/tests/demo_cases.yaml b/control-pipeline/tests/demo_cases.yaml new file mode 100644 index 0000000..8ec3687 --- /dev/null +++ b/control-pipeline/tests/demo_cases.yaml @@ -0,0 +1,725 @@ +# ============================================================================ +# BreakPilot Compliance — Demo Test Cases +# ============================================================================ +# +# 24 Use Cases in 4 Kategorien: +# A. Standard (6) — Branchen-Default korrekt? +# B. Scope schlaegt Branche (6) — Sonderfaelle additiv? +# C. Falsche Freunde / Negativ (6) — Keine Falschzuweisung? +# D. Eskalation (6) — Unsicherheit erkannt? +# +# Jeder Case erzwingt 5 Outputs: +# 1. applicable_industries +# 2. scope_triggers +# 3. excluded_by_default +# 4. reasoning_summary +# 5. confidence + escalation +# +# Akzeptanzregeln: +# R1: Keine harte Falschsicherheit bei unklarem Sachverhalt +# R2: Scope schlaegt Branchen-Default (additiv) +# R3: Repo-Signale allein reichen nicht fuer harte Regulierung +# R4: Standardfall zuerst, Sonderfall additiv +# R5: Mindestens 40% Negativtests +# +# Scoring pro Case: +# must_include_match: 0..1 +# must_not_include_match: 0..1 +# reasoning_correct: 0..1 +# escalation_correct: 0..1 +# total_score: 0..4 +# ============================================================================ + +# ============================================================================ +# A. STANDARD-BRANCHENFAELLE (6) +# ============================================================================ + +- id: DEMO-APP-005 + title: "Telekommunikationsanbieter" + category: standard + goal: "TKG/Telko-Pflichten im Standardfall" + company_profile: + sector: "Telekommunikation" + size: "medium" + country: "DE" + facts: + - "Erbringt Kommunikationsdienste" + - "Bietet Mobilfunkvertraege" + scope_answers: + provides_telecom_service: true + is_kritis_operator: true + expected: + applicable_industries: ["Telekommunikation"] + scope_triggers: ["provides_telecom_service", "is_kritis_operator"] + applicable_controls_should_include: + - "TKG" + - "TTDSG" + - "NIS2" + - "DSGVO" + applicable_controls_should_not_include: + - "PSD2" + - "Batterieverordnung" + - "MDR" + excluded_by_default: ["PSD2", "Batterieverordnung", "MDR"] + reasoning_summary: "Telko-Anbieter ist KRITIS-Betreiber, TKG und TTDSG direkt anwendbar." + confidence: 0.95 + escalation_expected: false + +- id: DEMO-APP-009 + title: "Reines SaaS-Unternehmen" + category: standard + goal: "Batterie-/Produktregulierung darf nicht anspringen" + company_profile: + sector: "Technologie/IT" + size: "small" + country: "DE" + facts: + - "Nur Browser-Anwendung" + - "Keine Hardware" + scope_answers: + distributes_physical_products: false + contains_battery: false + uses_ai: false + expected: + applicable_industries: ["Technologie/IT"] + scope_triggers: [] + applicable_controls_should_include: + - "DSGVO" + - "OWASP" + applicable_controls_should_not_include: + - "Batterieverordnung" + - "Maschinenverordnung" + - "MDR" + - "TKG" + - "PSD2" + excluded_by_default: ["Batterieverordnung", "Maschinenverordnung", "MDR", "TKG", "PSD2"] + reasoning_summary: "Reines SaaS ohne Hardware, ohne KI, ohne Finanzfunktion." + confidence: 0.95 + escalation_expected: false + +- id: DEMO-STD-003 + title: "Mittelstaendischer Energieversorger (Stadtwerk)" + category: standard + goal: "KRITIS + NIS2 korrekt zugewiesen" + company_profile: + sector: "Energie" + size: "medium" + country: "DE" + facts: + - "Stadtwerk mit 200 Mitarbeitern" + - "Strom- und Gasversorgung" + - "Leitwarte mit Kameraueberwachung" + scope_answers: + is_kritis_operator: true + employee_monitoring: true + expected: + applicable_industries: ["Energie"] + scope_triggers: ["is_kritis_operator", "employee_monitoring"] + applicable_controls_should_include: + - "NIS2" + - "KRITIS" + - "BSI Grundschutz" + - "DSGVO" + - "BDSG" + applicable_controls_should_not_include: + - "PSD2" + - "AI Act" + - "MDR" + excluded_by_default: ["PSD2", "AI Act", "MDR", "TKG"] + reasoning_summary: "Stadtwerk ist KRITIS-Betreiber im Energiesektor, NIS2 ab medium." + confidence: 0.95 + escalation_expected: false + +- id: DEMO-STD-004 + title: "Automobilzulieferer mit Prototypen" + category: standard + goal: "TISAX-Readiness korrekt vorbereitet" + company_profile: + sector: "Automobil" + size: "large" + country: "DE" + facts: + - "500 Mitarbeiter" + - "Prototypenfertigung fuer OEMs" + - "Internationale Lieferkette" + scope_answers: + handles_prototypes: true + supply_chain_automotive: true + third_country_transfer: true + expected: + applicable_industries: ["Automobil"] + scope_triggers: ["handles_prototypes", "supply_chain_automotive", "third_country_transfer"] + applicable_controls_should_include: + - "ISO 27001" + - "Prototypenschutz" + - "NIS2" + - "DSGVO" + - "CE" + applicable_controls_should_not_include: + - "PSD2" + - "TKG" + - "MDR" + excluded_by_default: ["PSD2", "TKG", "MDR"] + reasoning_summary: "Automobilzulieferer braucht TISAX-Readiness (ISO 27001 Basis), Prototypenschutz." + confidence: 0.90 + escalation_expected: false + +- id: DEMO-STD-005 + title: "Gesundheits-App mit KI (DiGA)" + category: standard + goal: "KI + Gesundheitsdaten + MDR korrekt" + company_profile: + sector: "Gesundheitswesen" + size: "small" + country: "DE" + facts: + - "KI-basierte Gesundheits-App" + - "Verarbeitet Gesundheitsdaten" + - "Automatisierte Empfehlungen" + scope_answers: + uses_ai: true + processes_health_data: true + automated_decisions: true + expected: + applicable_industries: ["Gesundheitswesen"] + scope_triggers: ["uses_ai", "processes_health_data", "automated_decisions"] + applicable_controls_should_include: + - "DSGVO Art. 9" + - "DSGVO Art. 22" + - "DSGVO Art. 35" + - "AI Act" + - "MDR" + - "BSI TR-03161" + applicable_controls_should_not_include: + - "PSD2" + - "TKG" + - "Batterieverordnung" + excluded_by_default: ["PSD2", "TKG", "Batterieverordnung"] + reasoning_summary: "Gesundheits-App mit KI trifft DSGVO Art. 9/22/35, AI Act Hochrisiko, MDR." + confidence: 0.90 + escalation_expected: false + +- id: DEMO-STD-006 + title: "Rechtsanwaltskanzlei mit KI und US-Cloud" + category: standard + goal: "Berufsrecht + KI + Drittland korrekt" + company_profile: + sector: "Recht/Kanzlei" + size: "small" + country: "DE" + facts: + - "30 Anwaelte" + - "KI fuer Dokumentenanalyse" + - "US-Cloud-Dienste" + scope_answers: + uses_ai: true + third_country_transfer: true + handles_legal_privilege: true + expected: + applicable_industries: ["Recht/Kanzlei"] + scope_triggers: ["uses_ai", "third_country_transfer", "handles_legal_privilege"] + applicable_controls_should_include: + - "DSGVO" + - "DSGVO Art. 46 (SCC)" + - "AI Act" + - "BRAO" + applicable_controls_should_not_include: + - "NIS2" + - "PSD2" + - "TKG" + excluded_by_default: ["NIS2", "PSD2", "TKG", "MDR"] + reasoning_summary: "Kanzlei mit KI und US-Cloud braucht DSGVO+SCC, AI Act, Berufsrecht." + confidence: 0.90 + escalation_expected: false + +# ============================================================================ +# B. SCOPE SCHLAEGT BRANCHE (6) +# ============================================================================ + +- id: DEMO-APP-003 + title: "Bank vertreibt TAN-Generator mit Batterie" + category: scope_beats_sector + goal: "Batterie-Controls trotz Bankensektor additiv" + company_profile: + sector: "Finanzdienstleistungen" + size: "large" + country: "DE" + facts: + - "Bank gibt TAN-Generatoren an Kunden aus" + - "Geraet enthaelt Batterie" + - "Physisches Produkt wird in Verkehr gebracht" + scope_answers: + distributes_physical_products: true + contains_battery: true + financial_institution: true + expected: + applicable_industries: ["Finanzdienstleistungen"] + scope_triggers: ["distributes_physical_products", "contains_battery"] + applicable_controls_should_include: + - "bankenspezifische Controls" + - "batteriebezogene Controls" + applicable_controls_should_not_include: + - "TKG" + excluded_by_default: ["TKG", "Maschinenverordnung"] + reasoning_summary: "Bank bringt physisches Produkt mit Batterie in Verkehr — Batterieverordnung additiv." + confidence: 0.85 + escalation_expected: false + +- id: DEMO-APP-008 + title: "Chemieunternehmen mit akkubetriebenen Messgeraeten" + category: scope_beats_sector + goal: "Batteriepflichten im naheliegenden Fall" + company_profile: + sector: "Chemie" + size: "medium" + country: "DE" + facts: + - "Vertreibt Messgeraete mit Akku" + scope_answers: + distributes_physical_products: true + contains_battery: true + expected: + applicable_industries: ["Chemie"] + scope_triggers: ["distributes_physical_products", "contains_battery"] + applicable_controls_should_include: + - "Batterieverordnung" + - "CE" + applicable_controls_should_not_include: + - "PSD2" + - "TKG" + excluded_by_default: ["PSD2", "TKG"] + reasoning_summary: "Chemieunternehmen bringt akkubetriebene Geraete in Verkehr." + confidence: 0.90 + escalation_expected: false + +- id: DEMO-APP-011 + title: "KI im Recruiting" + category: scope_beats_sector + goal: "KI/HR/AGG Controls unabhaengig von Branche" + company_profile: + sector: "Beliebig" + size: "medium" + country: "DE" + facts: + - "KI priorisiert Bewerbungen" + - "Automatisierte Absagen werden vorbereitet" + scope_answers: + uses_ai: true + automated_decisions: true + expected: + applicable_industries: ["all"] + scope_triggers: ["uses_ai", "automated_decisions"] + applicable_controls_should_include: + - "DSGVO Art. 22" + - "AGG" + - "AI Act" + excluded_by_default: [] + reasoning_summary: "KI im Recruiting loest DSGVO Art. 22, AGG-Diskriminierungsschutz und AI Act aus." + confidence: 0.70 + escalation_expected: true + escalation_reason: "KI-basierte HR-Entscheidungen sind AI Act Hochrisiko — vertiefte Pruefung" + +- id: DEMO-APP-013 + title: "Schulmessenger mit KI-Uebersetzung" + category: scope_beats_sector + goal: "KI + Kinderdaten + Drittland erkennen" + company_profile: + sector: "Bildung" + size: "medium" + country: "DE" + facts: + - "Zwei-Wege-Kommunikation Schule-Eltern" + - "Nachrichten werden automatisch uebersetzt" + - "Personenbezogene Daten von Eltern und Kindern" + scope_answers: + processes_minors_data: true + uses_ai: true + third_country_transfer: true # Uebersetzungs-API + expected: + applicable_industries: ["Bildung"] + scope_triggers: ["processes_minors_data", "uses_ai", "third_country_transfer"] + applicable_controls_should_include: + - "DSGVO Art. 8" + - "AI Act" + - "DSGVO Art. 46 (SCC)" + applicable_controls_should_not_include: + - "TKG" + - "PSD2" + excluded_by_default: ["TKG", "PSD2"] + reasoning_summary: "Schulkommunikation mit KI-Uebersetzung und Kinderdaten loest DSGVO Art. 8 + AI Act + SCC aus." + confidence: 0.70 + escalation_expected: true + escalation_reason: "KI-Drittland-/Modellgovernance-Review fuer Uebersetzungs-API" + +- id: DEMO-APP-016 + title: "White-Label-Hardwarevertrieb" + category: scope_beats_sector + goal: "Inverkehrbringen unter eigener Marke = Produktpflichten" + company_profile: + sector: "E-Commerce/Handel" + size: "small" + country: "DE" + facts: + - "Vertreibt Geraete unter eigener Marke" + - "Produktion durch Dritten" + - "Geraet enthaelt Akku" + scope_answers: + places_product_on_market_under_own_brand: true + contains_battery: true + distributes_physical_products: true + expected: + applicable_industries: ["E-Commerce/Handel"] + scope_triggers: ["places_product_on_market_under_own_brand", "contains_battery"] + applicable_controls_should_include: + - "Batterieverordnung" + - "CE" + - "Produkthaftung" + excluded_by_default: ["PSD2", "TKG"] + reasoning_summary: "White-Label = Inverkehrbringer unter eigener Marke, traegt Produktpflichten." + confidence: 0.90 + escalation_expected: false + +- id: DEMO-APP-002 + title: "Industrieplattform mit eigener Zahlungsabwicklung" + category: scope_beats_sector + goal: "Echte Finanzregulierung bei Geschaeftsmodell-Wechsel" + company_profile: + sector: "Produktion/Industrie" + size: "medium" + country: "DE" + facts: + - "Betreibt B2B-Marktplatz" + - "Haelt Kundengelder kurzzeitig zwischen" + - "Leitet Zahlungen an Haendler weiter" + scope_answers: + operates_payment_service: true + holds_client_funds: true + marketplace_model: true + expected: + applicable_industries: ["Produktion/Industrie"] + scope_triggers: ["operates_payment_service", "holds_client_funds"] + applicable_controls_should_include: + - "PSD2" + - "AML/KYC" + excluded_by_default: [] + reasoning_summary: "Industrieplattform mit eigenem Payment = regulatorische Zahlungsdienstpflicht." + confidence: 0.60 + escalation_expected: true + escalation_reason: "Regulatorische Einordnung erfordert vertiefte Pruefung (Erlaubnispflicht)" + +# ============================================================================ +# C. FALSCHE FREUNDE / NEGATIVTESTS (6) +# ============================================================================ + +- id: DEMO-APP-001 + title: "Industrieunternehmen mit Webshop und Stripe Checkout" + category: false_friends + goal: "Stripe darf nicht PSD2 ausloesen" + company_profile: + sector: "Produktion/Industrie" + size: "medium" + country: "DE" + business_model: "B2B-Hersteller mit ergaenzendem Webshop" + facts: + - "Verkauft Ersatzteile ueber Webshop" + - "Nutzt Stripe Checkout als externen Zahlungsdienstleister" + - "Speichert keine vollstaendigen Kartendaten selbst" + - "Keine Zahlungsabwicklung im eigenen Namen" + scope_answers: + operates_payment_service: false + stores_card_data: false + sells_physical_products: true + repo_signals: + - "stripe checkout" + expected: + applicable_industries: ["Produktion/Industrie"] + scope_triggers: [] + applicable_controls_should_include: + - "DSGVO Datenschutzhinweise" + - "DSGVO Empfaenger-/Dienstleistertransparenz" + - "VVT" + applicable_controls_should_not_include: + - "PSD2" + - "AML" + - "Batterieverordnung" + - "TKG" + excluded_by_default: ["PSD2", "AML", "Batterieverordnung", "TKG"] + reasoning_summary: "Stripe ist externer Zahlungsdienstleister; Haendler wird nicht reguliertes Zahlungsinstitut." + confidence: 0.92 + escalation_expected: false + +- id: DEMO-APP-004 + title: "Direktbank ohne physische Produkte" + category: false_friends + goal: "Keine Batteriepflichten nur wegen Bank" + company_profile: + sector: "Finanzdienstleistungen" + size: "large" + country: "DE" + facts: + - "Nur Mobile App und Webbanking" + - "Keine Token, keine TAN-Geraete, keine Hardware" + scope_answers: + distributes_physical_products: false + contains_battery: false + expected: + applicable_industries: ["Finanzdienstleistungen"] + scope_triggers: [] + applicable_controls_should_include: + - "PSD2" + - "DSGVO" + - "BaFin" + applicable_controls_should_not_include: + - "Batterieverordnung" + - "Maschinenverordnung" + - "CE" + excluded_by_default: ["Batterieverordnung", "Maschinenverordnung", "CE"] + reasoning_summary: "Reine Digitalbank ohne physische Produkte — keine Produktregulierung." + confidence: 0.95 + escalation_expected: false + +- id: DEMO-APP-006 + title: "Maschinenbauer mit Kundenhotline" + category: false_friends + goal: "Hotline darf nicht TKG triggern" + company_profile: + sector: "Produktion/Industrie" + size: "medium" + country: "DE" + facts: + - "Hat Support-Hotline fuer Kunden" + - "Erbringt keinen oeffentlichen Telekommunikationsdienst" + scope_answers: + provides_telecom_service: false + operates_customer_hotline: true + expected: + applicable_industries: ["Produktion/Industrie"] + scope_triggers: [] + applicable_controls_should_not_include: + - "TKG" + - "TTDSG Telko-Kernpflichten" + excluded_by_default: ["TKG"] + reasoning_summary: "Kundenhotline ist kein oeffentlicher Telekommunikationsdienst." + confidence: 0.92 + escalation_expected: false + +- id: DEMO-APP-012 + title: "Schule mit Einweg-Elternkommunikation" + category: false_friends + goal: "Einweg-Nachrichten sind kein Telko-Dienst" + company_profile: + sector: "Bildung" + size: "medium" + country: "DE" + facts: + - "Eltern erhalten Einweg-Nachrichten" + - "Keine offene Chat-Funktion" + scope_answers: + provides_telecom_service: false + processes_minors_data: true + expected: + applicable_industries: ["Bildung"] + scope_triggers: ["processes_minors_data"] + applicable_controls_should_include: + - "DSGVO" + - "DSGVO Art. 8" + applicable_controls_should_not_include: + - "TKG" + excluded_by_default: ["TKG", "PSD2"] + reasoning_summary: "Einweg-Elterninfo ist kein oeffentlicher Kommunikationsdienst." + confidence: 0.90 + escalation_expected: false + +- id: DEMO-APP-014 + title: "Repo enthaelt Stripe SDK — nur SaaS-Billing" + category: false_friends + goal: "Repo-Signale duerfen Scope nicht uebersteuern" + company_profile: + sector: "Technologie/IT" + size: "small" + country: "DE" + facts: + - "Repo enthaelt stripe dependency" + - "Nur Billing fuer eigenes SaaS-Abo" + - "Keine Zahlungsabwicklung fuer Dritte" + repo_signals: + - "stripe" + scope_answers: + operates_payment_service: false + expected: + applicable_industries: ["Technologie/IT"] + scope_triggers: [] + applicable_controls_should_not_include: + - "PSD2" + applicable_controls_should_include: + - "DSGVO" + - "Vendor-/Security-Controls" + excluded_by_default: ["PSD2"] + reasoning_summary: "Stripe SDK im Repo = SaaS-Billing, nicht eigene Zahlungsabwicklung." + confidence: 0.90 + escalation_expected: false + +- id: DEMO-APP-017 + title: "Interne Nutzung batteriebetriebener Geraete" + category: false_friends + goal: "Keine Batterie-Inverkehrbringungspflichten" + company_profile: + sector: "Beliebig" + size: "medium" + country: "DE" + facts: + - "Mitarbeiter nutzen Laptops und Scanner" + - "Kein Vertrieb eigener Batterieprodukte" + scope_answers: + distributes_physical_products: false + places_battery_products_on_market: false + expected: + applicable_industries: ["all"] + scope_triggers: [] + applicable_controls_should_not_include: + - "Batterieverordnung (Inverkehrbringen)" + excluded_by_default: ["Batterieverordnung"] + reasoning_summary: "Interne Nutzung von Geraeten mit Batterien loest keine Inverkehrbringungspflichten aus." + confidence: 0.95 + escalation_expected: false + +# ============================================================================ +# D. ESKALATIONSFAELLE (6) +# ============================================================================ + +- id: DEMO-APP-007 + title: "IoT-Hersteller mit vernetztem Geraet (SIM + Funkmodul)" + category: escalation + goal: "Abgrenzung Hardware vs. Kommunikationsdienst" + company_profile: + sector: "Produktion/Industrie" + size: "medium" + country: "DE" + facts: + - "Verkauft Geraet mit Funkmodul" + - "Geraet kommuniziert ueber Mobilfunk" + - "SIM-Konnektivitaet wird mitgeliefert" + scope_answers: + sells_connected_device: true + provides_embedded_connectivity: true + expected: + applicable_industries: ["Produktion/Industrie"] + scope_triggers: ["sells_connected_device", "provides_embedded_connectivity"] + applicable_controls_should_include: + - "CE" + - "Cyber Resilience Act" + - "Funkgeraeterichtlinie (RED)" + escalation_expected: true + escalation_reason: "Abgrenzung Hardwareprodukt vs. Kommunikationsdienst vertieft pruefen" + confidence: 0.55 + +- id: DEMO-APP-010 + title: "Plattform mit Verkaeufer-Onboarding und Transaktionsmonitoring" + category: escalation + goal: "AML/KYC Relevanz differenziert" + company_profile: + sector: "Technologie/IT" + size: "medium" + country: "DE" + facts: + - "Onboardet externe Verkaeufer" + - "Prueft Identitaet gewerblicher Anbieter" + - "Ueberwacht verdaechtige Zahlungsstroeme" + scope_answers: + marketplace_model: true + performs_kyc: true + monitors_transactions: true + expected: + applicable_industries: ["Technologie/IT"] + scope_triggers: ["marketplace_model", "performs_kyc", "monitors_transactions"] + applicable_controls_should_include: + - "AML/KYC Review-Controls" + escalation_expected: true + escalation_reason: "Klaerung ob GwG-/aufsichtsrechtliche Pflichten oder nur Fraud-/Plattformkontrollen" + confidence: 0.50 + +- id: DEMO-APP-015 + title: "Repo zeigt Wallet-/Custody-Funktionen" + category: escalation + goal: "Technische Signale deuten auf Regulierung" + company_profile: + sector: "Technologie/IT" + size: "small" + country: "DE" + facts: + - "Produktbeschreibung unvollstaendig" + repo_signals: + - "wallet_service" + - "custody" + - "kyc_provider" + - "transaction_monitoring" + scope_answers: {} + expected: + applicable_controls_should_include: + - "Finanz-/AML-nahe Review-Controls" + escalation_expected: true + escalation_reason: "Technische Signale deuten auf regulierungsnahe Funktion hin" + confidence: 0.35 + +- id: DEMO-APP-018 + title: "Unklare FinTech-Beschreibung" + category: escalation + goal: "Unsicherheit explizit erkennen" + company_profile: + sector: "Technologie/IT" + size: "small" + country: "DE" + facts: + - "App verwaltet Geldfluesse zwischen Nutzern" + - "Details zur Vertragsrolle unklar" + scope_answers: {} + expected: + escalation_expected: true + escalation_reason: "Geschaeftsmodell fuer finale regulatorische Einordnung zu unbestimmt" + confidence: 0.30 + pass_criteria: + - "Keine harte Falschaussage" + - "Gezielte Nachfragen oder LLM-Review" + +- id: DEMO-ESC-005 + title: "Unternehmen bietet Treuhandkonto fuer Immobilienkauf" + category: escalation + goal: "Finanzregulierung bei Treuhandmodell" + company_profile: + sector: "Immobilien" + size: "medium" + country: "DE" + facts: + - "Bietet Treuhandkonto fuer Immobilientransaktionen" + - "Haelt Kundengelder temporaer" + - "Nicht als Finanzinstitut lizenziert" + scope_answers: + holds_client_funds: true + expected: + escalation_expected: true + escalation_reason: "Treuhandmodell kann Erlaubnispflicht nach ZAG ausloesen — juristische Pruefung noetig" + confidence: 0.40 + +- id: DEMO-ESC-006 + title: "Startup nutzt KI fuer medizinische Diagnoseunterstuetzung" + category: escalation + goal: "MDR + AI Act Hochrisiko Abgrenzung" + company_profile: + sector: "Gesundheitswesen" + size: "micro" + country: "DE" + facts: + - "KI gibt Diagnosevorschlaege" + - "Aerzte treffen finale Entscheidung" + - "Unklar ob Medizinprodukt" + scope_answers: + uses_ai: true + processes_health_data: true + provides_diagnostic_support: true + expected: + applicable_controls_should_include: + - "AI Act Hochrisiko" + - "DSGVO Art. 9" + escalation_expected: true + escalation_reason: "Abgrenzung KI-Diagnoseunterstuetzung vs. Medizinprodukt (MDR) vertieft pruefen" + confidence: 0.45 diff --git a/control-pipeline/tests/test_applicability_use_cases.py b/control-pipeline/tests/test_applicability_use_cases.py index b1e72f5..636b1d5 100644 --- a/control-pipeline/tests/test_applicability_use_cases.py +++ b/control-pipeline/tests/test_applicability_use_cases.py @@ -1,469 +1,173 @@ """ -Applicability Use Case Tests — Real-world scenarios for control assignment. +Applicability Use Case Tests — Demo test package for control assignment. -These test cases verify that our Applicability Engine correctly assigns -and does NOT assign controls based on company profile + scope answers. +Loads 24 use cases from demo_cases.yaml and validates structure, +consistency, and (when DB available) actual control assignment. -Each test case represents a real business scenario discussed during -product development. They serve as: -1. Regression tests for the Applicability Engine -2. Demo cases for the SDK -3. Documentation of regulatory nuances +Categories: + A. Standard (6) — Branchen-Default korrekt? + B. Scope schlaegt Branche (6) — Sonderfaelle additiv? + C. Falsche Freunde / Negativ (6) — Keine Falschzuweisung? + D. Eskalation (6) — Unsicherheit erkannt? Run: pytest tests/test_applicability_use_cases.py -v """ +import os import pytest +import yaml + +# Load demo cases from YAML +CASES_PATH = os.path.join(os.path.dirname(__file__), "demo_cases.yaml") +with open(CASES_PATH) as f: + DEMO_CASES = yaml.safe_load(f) + +CASE_IDS = [c["id"] for c in DEMO_CASES] # --------------------------------------------------------------------------- -# Test Case Data: Company Profiles + Expected Results +# Structure Tests (always run) # --------------------------------------------------------------------------- -USE_CASES = [ - # =================================================================== - # CASE 1: Bank mit TAN-Generator (Batterie im Produkt) - # =================================================================== - { - "id": "bank_tan_generator", - "name": "Bank gibt TAN-Generator mit Batterie an Kunden raus", - "company": { - "industry": "Finanzdienstleistungen", - "size": "large", - "scope_answers": { - "payment_services": True, # Bank ist Zahlungsdienstleister - "processes_health_data": False, - "uses_ai": False, - "third_country_transfer": True, - "manufactures_batteries": False, # Bank STELLT NICHT HER - }, - }, - "must_match": [ - "PSD2", # Bank IST Zahlungsdienstleister - "DSGVO", # Immer - "AML", # Bank hat AML-Pflichten - ], - "must_not_match": [ - "Batterieverordnung", # Bank stellt TAN-Generator nicht her - "Maschinenverordnung", # Kein Maschinenbau - "MDR", # Keine Medizinprodukte - ], - "rationale": ( - "Die Bank beschafft den TAN-Generator von einem Hersteller. " - "Der Hersteller unterliegt der Batterieverordnung, nicht die Bank. " - "Die Bank ist aber selbst PSD2-reguliert als Zahlungsdienstleister." - ), - }, +class TestDemoCaseStructure: + """Verify demo cases are well-formed.""" - # =================================================================== - # CASE 2: Industrieunternehmen eroeffnet Webshop mit Stripe - # =================================================================== - { - "id": "industrie_webshop_stripe", - "name": "Maschinenbau-Firma eroeffnet Webshop mit Stripe-Zahlung", - "company": { - "industry": "Maschinenbau", - "size": "medium", - "scope_answers": { - "payment_services": False, # Stripe ist der Zahlungsdienstleister! - "uses_ai": False, - "third_country_transfer": True, # Stripe ist US-Unternehmen - "processes_health_data": False, - "has_webshop": True, - }, - }, - "must_match": [ - "DSGVO", # Immer - "DSGVO_AV_Vertrag", # Stripe als Auftragsverarbeiter - "DSGVO_Datenschutzinfo", # Stripe in Datenschutzerklaerung nennen - "Maschinenverordnung", # Kerngeschaeft - "CE", # Maschinenbau braucht CE - ], - "must_not_match": [ - "PSD2", # Stripe ist der Zahlungsdienstleister, NICHT die Firma - "AML", # Keine eigene Zahlungsabwicklung - "BaFin", # Kein Finanzinstitut - ], - "rationale": ( - "Stripe ist Zahlungsdienstleister in eigenem Auftrag. Der Webshop-Betreiber " - "wird nicht zum regulierten Zahlungsinstitut. Er muss nur Stripe als " - "Auftragsverarbeiter in der Datenschutzinformation korrekt benennen (DSGVO Art. 13/14). " - "Rechtsanwaltlich begleitete Stripe-Anbindung hat das bestaetigt." - ), - }, + def test_case_count(self): + assert len(DEMO_CASES) == 24, f"Expected 24 cases, got {len(DEMO_CASES)}" - # =================================================================== - # CASE 3: Kleines SaaS-Startup (5 Personen) - # =================================================================== - { - "id": "saas_startup_klein", - "name": "5-Personen SaaS-Startup (Cloud-Software, keine KI)", - "company": { - "industry": "Technologie/IT", - "size": "micro", - "scope_answers": { - "uses_ai": False, - "third_country_transfer": False, # EU-only Hosting - "processes_health_data": False, - "automated_decisions": False, - "payment_services": False, - "is_kritis_operator": False, - }, - }, - "must_match": [ - "DSGVO", # Immer - "OWASP", # Software-Sicherheit - ], - "must_not_match": [ - "NIS2", # Zu klein (NIS2 ab medium/50 MA) - "AI_Act", # Keine KI - "Batterieverordnung", # Kein Hardware-Produkt - "TKG", # Kein Telko-Anbieter - "MDR", # Keine Medizinprodukte - "PSD2", # Kein Zahlungsdienstleister - "KRITIS", # Zu klein, kein kritischer Sektor - ], - "rationale": ( - "Ein Kleinstunternehmen ohne KI, ohne KRITIS-Zugehoerigkeit, ohne " - "Drittlandtransfer braucht nur DSGVO-Basics und Software-Sicherheit. " - "NIS2 greift erst ab 50 Mitarbeitern / 10 Mio Umsatz." - ), - }, + def test_category_distribution(self): + cats = [c.get("category", "unknown") for c in DEMO_CASES] + assert cats.count("standard") == 6 + assert cats.count("scope_beats_sector") == 6 + assert cats.count("false_friends") == 6 + assert cats.count("escalation") == 6 - # =================================================================== - # CASE 4: Mittelstaendischer Energieversorger - # =================================================================== - { - "id": "energieversorger_mittelstand", - "name": "Stadtwerk mit 200 Mitarbeitern (Strom + Gas)", - "company": { - "industry": "Energie", - "size": "medium", - "scope_answers": { - "is_kritis_operator": True, - "uses_ai": False, - "third_country_transfer": False, - "processes_health_data": False, - "employee_monitoring": True, # Leitwarte mit Kameras - }, - }, - "must_match": [ - "DSGVO", - "NIS2", # Energie = KRITIS-Sektor + medium - "KRITIS", # Energieversorger - "BDSG", # Mitarbeiterueberwachung - "BSI_Grundschutz", # KRITIS-Betreiber - ], - "must_not_match": [ - "PSD2", - "AI_Act", - "MDR", - "TKG", - "Batterieverordnung", - ], - "rationale": ( - "Stadtwerk ist KRITIS-Betreiber im Energiesektor. NIS2 greift ab medium " - "(50 MA). BSI-Grundschutz ist de-facto Pflicht fuer KRITIS. " - "Mitarbeiterueberwachung (Leitwarte) erfordert BDSG-Compliance." - ), - }, + @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS) + def test_required_fields(self, case): + assert case.get("id"), "Missing id" + assert case.get("title"), "Missing title" + assert case.get("category"), "Missing category" + assert case.get("goal"), "Missing goal" + assert case.get("company_profile"), "Missing company_profile" + assert case.get("facts"), "Missing facts" + assert case.get("expected"), "Missing expected" - # =================================================================== - # CASE 5: Gesundheits-App Startup mit KI - # =================================================================== - { - "id": "health_app_ki", - "name": "Startup entwickelt KI-basierte Gesundheits-App (DiGA)", - "company": { - "industry": "Gesundheitswesen", - "size": "small", - "scope_answers": { - "uses_ai": True, - "processes_health_data": True, - "automated_decisions": True, - "third_country_transfer": False, - "is_kritis_operator": False, - }, - }, - "must_match": [ - "DSGVO", - "DSGVO_Art9", # Gesundheitsdaten = besondere Kategorie - "DSGVO_Art22", # Automatisierte Entscheidungen - "DSGVO_Art35", # DSFA fuer Gesundheitsdaten + KI - "AI_Act", # KI-Einsatz - "MDR", # Gesundheits-App kann Medizinprodukt sein - "BSI_TR_03161", # Technische Richtlinie fuer mobile Gesundheits-Apps - "DiGAV", # Digitale Gesundheitsanwendung - ], - "must_not_match": [ - "PSD2", - "TKG", - "Batterieverordnung", - "Maschinenverordnung", - "NIS2", # Zu klein - ], - "rationale": ( - "Gesundheits-App mit KI trifft die schaerfsten Anforderungen: " - "DSGVO Art. 9 (Gesundheitsdaten), Art. 22 (automatisierte Entscheidungen), " - "Art. 35 (DSFA Pflicht), AI Act (Hochrisiko-KI im Gesundheitsbereich), " - "MDR (evtl. Medizinprodukt), BSI TR-03161 (Sicherheit mobiler Gesundheits-Apps)." - ), - }, + @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS) + def test_expected_has_escalation(self, case): + expected = case["expected"] + assert "escalation_expected" in expected, f"{case['id']}: Missing escalation_expected" - # =================================================================== - # CASE 6: Automobilzulieferer (TISAX-relevant) - # =================================================================== - { - "id": "automotive_zulieferer", - "name": "Automobilzulieferer mit 500 MA, Prototypen-Fertigung", - "company": { - "industry": "Automobil", - "size": "large", - "scope_answers": { - "uses_ai": False, - "third_country_transfer": True, # Lieferkette international - "is_kritis_operator": False, - "handles_prototypes": True, - "supply_chain_automotive": True, - }, - }, - "must_match": [ - "DSGVO", - "NIS2", # Large + Automotive (Lieferkette) - "ISO27001", # TISAX basiert auf ISO 27001 - "Prototypenschutz", # OEM-Anforderung - "CE", # Produkte in EU - "Maschinenverordnung", # Produktion - ], - "must_not_match": [ - "PSD2", - "TKG", - "MDR", - "AI_Act", - ], - "rationale": ( - "Automobilzulieferer braucht TISAX-Readiness (basiert auf ISO 27001), " - "Prototypenschutz (OEM-Vorgabe), und NIS2 (Lieferkette, large). " - "TISAX selbst koennen wir nicht direkt zuweisen (VDA ISA proprietaer), " - "aber die zugrunde liegenden ISO/NIST Controls decken es ab." - ), - }, + @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS) + def test_escalation_has_reason(self, case): + expected = case["expected"] + if expected.get("escalation_expected"): + assert expected.get("escalation_reason"), \ + f"{case['id']}: escalation_expected=true but no escalation_reason" - # =================================================================== - # CASE 7: Rechtsanwaltskanzlei - # =================================================================== - { - "id": "rechtsanwaltskanzlei", - "name": "Wirtschaftskanzlei mit 30 Anwaelten", - "company": { - "industry": "Recht/Kanzlei", - "size": "small", - "scope_answers": { - "uses_ai": True, # KI fuer Dokumentenanalyse - "third_country_transfer": True, # US-Cloud-Dienste - "processes_health_data": False, - "automated_decisions": False, - "handles_legal_privilege": True, - }, - }, - "must_match": [ - "DSGVO", - "DSGVO_Art46", # Drittlandtransfer (SCC) - "AI_Act", # KI-Einsatz - "BRAO", # Berufsordnung Rechtsanwaelte - "Mandantengeheimnis", # Berufsgeheimnis - ], - "must_not_match": [ - "NIS2", # Zu klein, kein KRITIS-Sektor - "PSD2", - "TKG", - "MDR", - "Batterieverordnung", - ], - "rationale": ( - "Kanzlei mit KI-Tools und US-Cloud braucht DSGVO + SCC (Drittland), " - "AI Act (KI-Einsatz), und berufsrechtliche Anforderungen (BRAO, " - "Mandantengeheimnis). NIS2 greift nicht (kein KRITIS-Sektor, zu klein)." - ), - }, + @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS) + def test_no_overlap_include_exclude(self, case): + expected = case["expected"] + include = set(expected.get("applicable_controls_should_include", [])) + exclude = set(expected.get("applicable_controls_should_not_include", [])) + overlap = include & exclude + assert not overlap, f"{case['id']}: Overlap in include/exclude: {overlap}" - # =================================================================== - # CASE 8: E-Commerce Haendler mit eigenem Zahlungssystem - # =================================================================== - { - "id": "ecommerce_eigene_zahlung", - "name": "Online-Haendler mit eigenem Payment-Processing (keine Stripe-Delegation)", - "company": { - "industry": "E-Commerce/Handel", - "size": "medium", - "scope_answers": { - "payment_services": True, # EIGENE Zahlungsabwicklung - "uses_ai": True, # KI-Empfehlungen - "third_country_transfer": True, - "processes_minors_data": True, # Spielzeug-Shop - }, - }, - "must_match": [ - "DSGVO", - "DSGVO_Art8", # Kinderdaten - "PSD2", # EIGENER Payment-Service - "AI_Act", # KI-Empfehlungssystem - ], - "must_not_match": [ - "TKG", - "MDR", - "Maschinenverordnung", - ], - "rationale": ( - "Unterschied zu Case 2: Dieser Haendler betreibt EIGENES Payment-Processing, " - "ist also PSD2-reguliert. Dazu: Kinderdaten (Spielzeug-Shop) erfordern " - "DSGVO Art. 8 (Einwilligung Erziehungsberechtigter). KI-Empfehlungen " - "fallen unter AI Act." - ), - }, + @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS) + def test_confidence_range(self, case): + conf = case["expected"].get("confidence") + if conf is not None: + assert 0.0 <= conf <= 1.0, f"{case['id']}: confidence {conf} out of range" - # =================================================================== - # CASE 9: Bildungseinrichtung (Schule) - # =================================================================== - { - "id": "schule", - "name": "Oeffentliche Schule mit 80 Lehrkraeften", - "company": { - "industry": "Bildung", - "size": "medium", - "scope_answers": { - "processes_minors_data": True, - "uses_ai": True, # KI-Lernplattform - "video_surveillance": True, # Schulgelaende - "employee_monitoring": False, - "is_public_sector": True, - }, - }, - "must_match": [ - "DSGVO", - "DSGVO_Art8", # Kinderdaten - "DSGVO_Art35", # DSFA (Kinderdaten + KI + Video) - "AI_Act", # KI-Lernplattform - "Schulrecht", # Landesschulgesetz - "BDSG", # Oeffentliche Stelle - ], - "must_not_match": [ - "PSD2", - "NIS2", # Bildung kein KRITIS-Sektor - "TKG", - "AML", - ], - "rationale": ( - "Schule verarbeitet Kinderdaten (DSGVO Art. 8), nutzt KI (AI Act), " - "hat Videoueberwachung (DSFA Pflicht). Als oeffentliche Stelle gilt BDSG. " - "NIS2 erfasst Bildung nicht als KRITIS-Sektor." - ), - }, + def test_negative_test_ratio(self): + """At least 40% of cases should test non-assignment (R5).""" + cases_with_not_include = sum( + 1 for c in DEMO_CASES + if c["expected"].get("applicable_controls_should_not_include") + ) + ratio = cases_with_not_include / len(DEMO_CASES) + assert ratio >= 0.4, f"Only {ratio:.0%} cases have must_not_include (need >= 40%)" - # =================================================================== - # CASE 10: Telko-Unternehmen - # =================================================================== - { - "id": "telko_provider", - "name": "Regionaler Internetanbieter mit 150 MA", - "company": { - "industry": "Telekommunikation", - "size": "medium", - "scope_answers": { - "is_kritis_operator": True, - "uses_ai": False, - "third_country_transfer": False, - "processes_health_data": False, - }, - }, - "must_match": [ - "DSGVO", - "TKG", # Telko-spezifisch - "TTDSG", # Telekommunikation-Telemedien-Datenschutz - "NIS2", # KRITIS + medium - "KRITIS", - "BSI_Grundschutz", - ], - "must_not_match": [ - "PSD2", - "AI_Act", - "MDR", - "Batterieverordnung", - "Maschinenverordnung", - ], - "rationale": ( - "Telko-Anbieter ist KRITIS-Betreiber, TKG und TTDSG sind direkt anwendbar. " - "NIS2 greift (KRITIS + medium). BSI-Grundschutz de-facto Pflicht." - ), - }, -] + def test_unique_ids(self): + assert len(CASE_IDS) == len(set(CASE_IDS)), "Duplicate case IDs found" # --------------------------------------------------------------------------- -# Test Functions +# Acceptance Rule Tests (always run) # --------------------------------------------------------------------------- -class TestApplicabilityUseCases: - """Verify that the Applicability Engine assigns controls correctly.""" +class TestAcceptanceRules: + """Verify acceptance rules are encoded in the test data.""" - @pytest.mark.parametrize("case", USE_CASES, ids=[c["id"] for c in USE_CASES]) - def test_use_case_documented(self, case): - """Each use case has required fields.""" - assert case["id"] - assert case["name"] - assert case["company"]["industry"] - assert case["company"]["size"] - assert case["must_match"] - assert case["must_not_match"] - assert case["rationale"] + def test_r1_no_false_certainty(self): + """R1: Escalation cases must not have high confidence.""" + for case in DEMO_CASES: + if case["expected"].get("escalation_expected"): + conf = case["expected"].get("confidence", 1.0) + assert conf < 0.80, \ + f"{case['id']}: escalation_expected but confidence={conf} (should be < 0.80)" - @pytest.mark.parametrize("case", USE_CASES, ids=[c["id"] for c in USE_CASES]) - def test_must_match_not_overlap_must_not(self, case): - """must_match and must_not_match should not overlap.""" - overlap = set(case["must_match"]) & set(case["must_not_match"]) - assert not overlap, f"Overlap in {case['id']}: {overlap}" + def test_r2_scope_beats_sector(self): + """R2: scope_beats_sector cases must have scope_triggers.""" + for case in DEMO_CASES: + if case.get("category") == "scope_beats_sector": + triggers = case["expected"].get("scope_triggers", []) + assert triggers, f"{case['id']}: scope_beats_sector but no scope_triggers" - @pytest.mark.parametrize("case", USE_CASES, ids=[c["id"] for c in USE_CASES]) - def test_scope_answers_are_booleans(self, case): - """Scope answers should be boolean values.""" - for key, val in case["company"]["scope_answers"].items(): - assert isinstance(val, bool), f"{case['id']}: scope {key} is {type(val)}, expected bool" + def test_r3_repo_signals_not_sufficient(self): + """R3: Cases with repo_signals should not auto-assign heavy regulation.""" + for case in DEMO_CASES: + if case.get("repo_signals") and not case["expected"].get("escalation_expected"): + # If repo signals exist but no escalation, PSD2 etc. must NOT be included + include = case["expected"].get("applicable_controls_should_include", []) + for ctrl in include: + assert "PSD2" not in ctrl, \ + f"{case['id']}: repo_signals + PSD2 included without escalation (R3)" + + def test_r4_standard_first(self): + """R4: Standard cases should have applicable_industries set.""" + for case in DEMO_CASES: + if case.get("category") == "standard": + industries = case["expected"].get("applicable_industries") + assert industries, f"{case['id']}: standard case without applicable_industries" # --------------------------------------------------------------------------- -# Integration test placeholder — runs against real DB + Applicability Engine +# Integration Tests (require DB + Applicability Engine) # --------------------------------------------------------------------------- -@pytest.mark.skip(reason="Requires running DB + Applicability Engine") +@pytest.mark.skip(reason="Requires running DB + Applicability Engine — enable for SDK demo") class TestApplicabilityIntegration: """Run use cases against the real Applicability Engine. - Enable by removing @skip and setting DATABASE_URL. - These tests query the actual canonical_controls table - and verify that the correct controls are returned. + Enable by removing @skip and ensuring DATABASE_URL is set. + + Scoring per case: + must_include_match: 0..1 + must_not_include_match: 0..1 + reasoning_correct: 0..1 + escalation_correct: 0..1 + total_score: 0..4 """ - @pytest.mark.parametrize("case", USE_CASES, ids=[c["id"] for c in USE_CASES]) - def test_applicability_engine(self, case): - """Verify control assignment for each use case.""" - # TODO: Import ApplicabilityEngine, query DB, check results + @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS) + def test_applicability(self, case): + # TODO: Implement against real ApplicabilityEngine # from services.applicability_engine import get_applicable_controls # from db.session import SessionLocal # # db = SessionLocal() # result = get_applicable_controls( # db=db, - # industry=case["company"]["industry"], - # company_size=case["company"]["size"], - # scope_signals=case["company"]["scope_answers"], + # industry=case["company_profile"]["sector"], + # company_size=case["company_profile"].get("size", "medium"), + # scope_signals=case.get("scope_answers", {}), # ) - # control_sources = {c.source_citation.get("source", "") for c in result["controls"]} # - # for required in case["must_match"]: - # assert any(required.lower() in s.lower() for s in control_sources), \ - # f"{case['id']}: Expected {required} in results" + # # Score: must_include_match + # for required in case["expected"].get("applicable_controls_should_include", []): + # assert any(required.lower() in str(c).lower() for c in result["controls"]) # - # for forbidden in case["must_not_match"]: - # assert not any(forbidden.lower() in s.lower() for s in control_sources), \ - # f"{case['id']}: {forbidden} should NOT be in results" + # # Score: must_not_include_match + # for forbidden in case["expected"].get("applicable_controls_should_not_include", []): + # assert not any(forbidden.lower() in str(c).lower() for c in result["controls"]) pass