"""Use-Case-Register — Single Source of Truth fuer Use Cases × Verifikations- Methoden. Jede Master Control wird auf >=1 Use Case (n:m) und genau eine Verifikations- Methode gemappt. Use Cases sind NICHT nur dokumenten-basiert: >=50% sind Source-Code / IT-Prozess (Code Security, Network Security, CRA, ISMS, TISAX). Neuer Use Case = 1 Eintrag in `_USE_CASES`. Kein DB-Schema-Change noetig. Dieses Modul ist die kanonische Quelle; die heute verstreuten doc_type-Listen (rag_document_checker._DOC_TYPE_MAP, legacy_url_discovery._SLUG_FAMILY, doc_type_classifier, Migration 145) werden spaeter test-gated darauf reduziert. """ from __future__ import annotations import hashlib import json from dataclasses import dataclass # Wie wird eine MC geprueft? — kanonische Verifikations-Methoden-Taxonomie. VERIFICATION_METHODS: tuple[str, ...] = ( "document", # veroeffentlichtes Dokument lesen (Impressum, DSE, ...) "source_code", # Repo/Code scannen (SAST, Secrets, Dependencies, Review) "network", # Netzwerk/Infra scannen (Ports, TLS, Header, Config) "it_process", # Prozess-/Nachweis-Review (Verfahren, Evidence) "hybrid", # Kombination mehrerer Methoden "manual", # menschliche Attestierung ) USE_CASE_GROUPS: tuple[str, ...] = ( "document", "security", "cross_cutting", "product", ) @dataclass(frozen=True) class UseCase: key: str # stabil, snake_case label: str group: str # USE_CASE_GROUPS regulations: tuple[str, ...] = () verification_methods: tuple[str, ...] = () doc_types: tuple[str, ...] = () # operative doc_type-Aliase (Doku-UCs) scope_tokens: tuple[str, ...] = () # canonical_controls.scope_doc_type categories: tuple[str, ...] = () # canonical_controls.category keyword_tokens: tuple[str, ...] = () # canonical_name/title-Stichwoerter enabled: bool = True _USE_CASES: tuple[UseCase, ...] = ( # ── Dokument-Use-Cases ────────────────────────────────────────── UseCase("impressum", "Impressum (§5 TMG/DDG)", "document", regulations=("TMG", "DDG", "MStV"), verification_methods=("document",), doc_types=("impressum",), scope_tokens=("impressum",), categories=("compliance",), keyword_tokens=("impressum", "anbieterkennzeichnung")), UseCase("dse", "Datenschutzerklärung", "document", regulations=("DSGVO",), verification_methods=("document",), doc_types=("dse",), scope_tokens=("dse",), categories=("privacy", "data_protection"), keyword_tokens=("datenschutz", "privacy")), UseCase("agb", "AGB", "document", regulations=("BGB",), verification_methods=("document",), doc_types=("agb",), scope_tokens=("agb",), categories=("compliance",), keyword_tokens=("geschäftsbedingungen", "agb")), UseCase("cookie_banner", "Cookie-Banner & -Richtlinie", "document", regulations=("TDDDG", "ePrivacy", "DSGVO"), verification_methods=("document", "source_code"), doc_types=("cookie",), scope_tokens=("cookie_richtlinie", "banner_implementation", "cmp_audit"), categories=("privacy",), keyword_tokens=("cookie", "consent", "einwilligung")), UseCase("widerruf", "Widerrufsbelehrung", "document", regulations=("BGB",), verification_methods=("document",), doc_types=("widerruf",), scope_tokens=("widerruf",), categories=("compliance",), keyword_tokens=("widerruf", "widerrufsbelehrung")), UseCase("dsr", "Betroffenenrechte (DSR)", "document", regulations=("DSGVO",), verification_methods=("document", "it_process"), scope_tokens=("process",), categories=("privacy", "operations"), keyword_tokens=("betroffenenrecht", "auskunft", "löschung", "dsr")), UseCase("loeschkonzept", "Löschkonzept", "document", regulations=("DSGVO",), verification_methods=("document", "it_process"), doc_types=("loeschkonzept",), scope_tokens=("process", "accounting"), categories=("data_protection",), keyword_tokens=("löschung", "löschfrist", "aufbewahrung")), UseCase("avv", "Auftragsverarbeitung (AVV)", "document", regulations=("DSGVO",), verification_methods=("document",), doc_types=("avv",), scope_tokens=("avv", "jc"), categories=("compliance",), keyword_tokens=("auftragsverarbeitung", "avv")), UseCase("dsfa", "Datenschutz-Folgenabschätzung", "document", regulations=("DSGVO",), verification_methods=("document", "it_process"), doc_types=("dsfa",), scope_tokens=("tom", "process"), categories=("risk", "privacy"), keyword_tokens=("folgenabschätzung", "dsfa")), # ── Security / Code-Use-Cases ─────────────────────────────────── UseCase("code_security", "Code Security", "security", regulations=("CRA", "OWASP", "ISO 27001"), verification_methods=("source_code", "hybrid"), categories=("testing", "application", "encryption", "authentication", "identity"), keyword_tokens=("sast", "secret", "dependency", "vulnerability", "injection", "code")), UseCase("network_security", "Network Security", "security", regulations=("ISO 27001", "BSI", "NIS2"), verification_methods=("network", "hybrid"), categories=("network", "system", "operations"), keyword_tokens=("firewall", "tls", "port", "segmentation", "network", "header")), # ── Querschnitt / Multi-Methode ───────────────────────────────── UseCase("cra", "Cyber Resilience Act", "cross_cutting", regulations=("CRA",), verification_methods=("document", "source_code", "network", "it_process"), categories=("security", "supply_chain", "testing", "incident"), keyword_tokens=("cra", "sbom", "konformität", "produktsicherheit")), UseCase("isms", "ISMS (ISO 27001)", "cross_cutting", regulations=("ISO 27001",), verification_methods=("it_process", "document", "hybrid"), categories=("governance", "security", "operations", "incident"), keyword_tokens=("isms", "risikomanagement", "soa")), UseCase("tisax", "TISAX", "cross_cutting", regulations=("VDA ISA", "TISAX"), verification_methods=("it_process", "document", "network", "hybrid"), categories=("security", "governance", "operations"), keyword_tokens=("tisax", "vda", "prototypenschutz")), UseCase("kritis", "KRITIS / NIS2-Umsetzung", "security", regulations=("KRITIS-Dachgesetz", "BSI-KritisV", "BSIG"), verification_methods=("it_process", "network", "document")), UseCase("dora", "DORA (Digital Operational Resilience)", "cross_cutting", regulations=("DORA",), verification_methods=("it_process", "document", "network")), # ── Produkt-/Sektor-Use-Cases (je Quell-Regulierung) ──────────── UseCase("ai_act", "KI-Verordnung (AI Act)", "product", regulations=("KI-Verordnung", "AI Act", "NIST AI RMF"), verification_methods=("document", "it_process", "source_code")), UseCase("mica", "Markets in Crypto-Assets (MiCA)", "product", regulations=("MiCA",), verification_methods=("document", "it_process")), UseCase("mdr", "Medizinprodukte (MDR)", "product", regulations=("MDR",), verification_methods=("document", "source_code", "it_process")), UseCase("maschinen", "Maschinenverordnung", "product", regulations=("Maschinenverordnung",), verification_methods=("document", "source_code", "it_process")), UseCase("batterie", "Batterieverordnung", "product", regulations=("Batterieverordnung", "Batteriegesetz"), verification_methods=("document", "it_process")), UseCase("ehds", "European Health Data Space", "product", regulations=("EHDS",), verification_methods=("document", "it_process", "source_code")), UseCase("produktsicherheit", "Produktsicherheit (GPSR)", "product", regulations=("Produktsicherheitsverordnung", "EU Blue Guide"), verification_methods=("document", "it_process")), UseCase("dsa", "Digital Services Act", "product", regulations=("Digital Services Act",), verification_methods=("document", "it_process")), UseCase("dma", "Digital Markets Act", "product", regulations=("Digital Markets Act",), verification_methods=("document", "it_process")), UseCase("data_governance", "Data Act / Data Governance Act", "product", regulations=("Data Act", "Data Governance Act"), verification_methods=("document", "it_process")), UseCase("zahlungsdienste", "Zahlungsdienste (PSD2)", "product", regulations=("Zahlungsdiensterichtlinie",), verification_methods=("document", "it_process", "source_code")), UseCase("geldwaesche", "Geldwäsche (AML/GwG)", "product", regulations=("AML-Verordnung", "GwG"), verification_methods=("document", "it_process")), UseCase("lieferkette", "Lieferkettensorgfalt (LkSG)", "product", regulations=("LkSG",), verification_methods=("document", "it_process")), UseCase("whistleblowing", "Hinweisgeberschutz (HinSchG)", "product", regulations=("HinSchG",), verification_methods=("document", "it_process")), UseCase("barrierefreiheit", "Barrierefreiheit (EAA)", "product", regulations=("European Accessibility Act",), verification_methods=("document", "source_code")), # ── Weitere Rechts-Use-Cases (Dokument) ───────────────────────── UseCase("verbraucherschutz", "Verbraucherschutz", "document", regulations=("Konsumentenschutzgesetz", "Digitale-Inhalte-Richtlinie"), verification_methods=("document",)), UseCase("urheberrecht", "Urheberrecht", "document", regulations=("UrhG", "DSM-Urheberrechtsrichtlinie"), verification_methods=("document",)), UseCase("wettbewerbsrecht", "Wettbewerbsrecht (UWG)", "document", regulations=("UWG",), verification_methods=("document",)), UseCase("gleichbehandlung", "Gleichbehandlung (AGG)", "document", regulations=("AGG",), verification_methods=("document", "it_process")), UseCase("steuerrecht", "Steuerrecht", "document", regulations=("Abgabenordnung", "BAO"), verification_methods=("document", "it_process")), UseCase("handelsrecht", "Handelsrecht", "document", regulations=("HGB", "UGB", "ABGB"), verification_methods=("document", "it_process")), ) # Deterministischer Quell-Regulierung → Use-Case-Mapper. # control_parent_links.source_regulation (117 distinct) → ~30 Domaenen. # Reihenfolge = SPEZIFISCH zuerst (Substring, case-insensitive); der erste # Treffer gewinnt. Die Datenschutz-Catch-alls (edpb/dsk/dsgvo) ganz hinten, # damit spezifische Regeln (z.B. 'DSK OH Telemedien') zuerst greifen koennen. _REGULATION_RULES: tuple[tuple[str, str], ...] = ( # Security / Code (Security-Team) ("owasp", "code_security"), ("nist sp 800-218", "code_security"), ("ssdf", "code_security"), ("nist sp 800-63", "code_security"), ("nistir 8259", "code_security"), ("cisa", "code_security"), ("nist sp 800-207", "network_security"), ("zero trust", "network_security"), ("nis2", "network_security"), ("nis-2", "network_security"), ("enisa", "network_security"), ("bsi-gesetz", "network_security"), ("bsig", "network_security"), ("cybersecurity act", "network_security"), ("kritis", "kritis"), ("nist cybersecurity framework", "isms"), ("nist sp 800-53", "isms"), ("digital operational resilience", "dora"), ("dora", "dora"), # Produkt / Sektor ("cyber resilience act", "cra"), ("(cra)", "cra"), ("nist ai risk", "ai_act"), ("ki-verordnung", "ai_act"), ("ki-vo", "ai_act"), ("ai act", "ai_act"), ("oecd ki", "ai_act"), ("crypto-assets", "mica"), ("mica", "mica"), ("medizinprodukte", "mdr"), ("(mdr)", "mdr"), ("maschinenverordnung", "maschinen"), ("batterie", "batterie"), ("health data space", "ehds"), ("produktsicherheit", "produktsicherheit"), ("blue guide", "produktsicherheit"), ("digital services act", "dsa"), ("digital markets act", "dma"), ("data act", "data_governance"), ("data governance", "data_governance"), ("zahlungsdienste", "zahlungsdienste"), ("geldwaesche", "geldwaesche"), ("aml-verordnung", "geldwaesche"), ("lieferkettensorgfalt", "lieferkette"), ("lksg", "lieferkette"), ("hinweisgeberschutz", "whistleblowing"), ("hinschg", "whistleblowing"), ("accessibility act", "barrierefreiheit"), # Website / Telemedien / Recht (User-Domaene) ("tdddg", "cookie_banner"), ("eprivacy", "cookie_banner"), ("telemedien", "impressum"), ("telekommunikationsgesetz", "impressum"), ("tkg", "impressum"), ("tmg", "impressum"), ("mediengesetz", "impressum"), ("gewerbeordnung", "impressum"), ("e-commerce", "agb"), ("digitale-inhalte", "agb"), ("konsumentenschutz", "verbraucherschutz"), ("urheberrecht", "urheberrecht"), ("urhg", "urheberrecht"), ("uwg", "wettbewerbsrecht"), ("handelsgesetzbuch", "handelsrecht"), ("hgb", "handelsrecht"), ("ugb", "handelsrecht"), ("abgb", "handelsrecht"), ("bgb", "agb"), ("gleichbehandlung", "gleichbehandlung"), ("(agg)", "gleichbehandlung"), ("abgabenordnung", "steuerrecht"), ("bao", "steuerrecht"), ("standardvertragsklauseln", "avv"), ("(scc)", "avv"), # Datenschutz-Catch-alls (zuletzt) ("nist privacy framework", "dse"), ("dsgvo", "dse"), ("datenschutzgesetz", "dse"), ("bdsg", "dse"), ("edpb", "dse"), ("edps", "dse"), ("dsk ", "dse"), ("wp29", "dse"), ("bfdi", "dse"), ("data privacy framework", "dse"), ("datenschutz", "dse"), ) def use_case_for_regulation(regulation: str | None) -> str | None: """Deterministisch: Quell-Regulierung → Domaenen-Use-Case (erster Substring-Treffer). None wenn keine Regel passt (→ Fallback/Review).""" if not regulation: return None low = regulation.lower() for needle, uc in _REGULATION_RULES: if needle in low: return uc return None REGISTRY: dict[str, UseCase] = {uc.key: uc for uc in _USE_CASES} # canonical_controls.evidence_type / .verification_method → unsere Methode # (fuer den deterministischen Seed; der LLM-Pass verfeinert). _EVIDENCE_TO_METHOD: dict[str, str] = { "document": "document", "code": "source_code", "code_review": "source_code", "process": "it_process", "tool": "network", "hybrid": "hybrid", } def _reverse(attr: str) -> dict[str, list[str]]: out: dict[str, list[str]] = {} for uc in _USE_CASES: if not uc.enabled: continue for tok in getattr(uc, attr): out.setdefault(tok, []).append(uc.key) return out scope_token_to_use_cases: dict[str, list[str]] = _reverse("scope_tokens") category_to_use_cases: dict[str, list[str]] = _reverse("categories") doc_type_to_use_cases: dict[str, list[str]] = _reverse("doc_types") def is_valid_use_case(key: str) -> bool: return key in REGISTRY and REGISTRY[key].enabled def is_valid_verification_method(method: str) -> bool: return method in VERIFICATION_METHODS def evidence_to_verification_method(value: str | None) -> str | None: """Heuristik-Mapping fuer den Seed (None wenn unbekannt).""" if not value: return None return _EVIDENCE_TO_METHOD.get(value.strip().lower()) def enabled_use_cases() -> list[UseCase]: return [uc for uc in _USE_CASES if uc.enabled] def primary_verification_method(use_case_key: str | None) -> str | None: """Repraesentative Verifikations-Methode eines Use Case (erste deklarierte) — deterministischer Fallback, wenn die MC-Member kein evidence_type tragen.""" uc = REGISTRY.get(use_case_key) if use_case_key else None return uc.verification_methods[0] if uc and uc.verification_methods else None def seed_classify( scopes=(), categories=(), vmethods=(), etypes=(), ) -> tuple[list[str], str | None]: """Deterministischer Seed (kein LLM): (use_cases, verification_method) aus den aggregierten Member-Signalen einer Master Control — scope_doc_type + category → Use Cases; verification_method/evidence_type → Methode. Pure → testbar.""" ucs: set[str] = set() for s in scopes or (): if s: ucs.update(scope_token_to_use_cases.get(s, ())) for c in categories or (): if c: ucs.update(category_to_use_cases.get(c, ())) method: str | None = None for v in list(vmethods or ()) + list(etypes or ()): m = evidence_to_verification_method(v) if m: method = m break return sorted(ucs), method def taxonomy_for_prompt() -> str: """Kompakter Anker-Block fuer den LLM-Klassifizierer (gecacht).""" lines = ["USE CASES (key — Label — Regulierungen — Methoden):"] for uc in enabled_use_cases(): lines.append( f" {uc.key} — {uc.label} — {', '.join(uc.regulations) or '-'}" f" — {', '.join(uc.verification_methods)}" ) lines.append("VERIFIKATIONS-METHODEN: " + ", ".join(VERIFICATION_METHODS)) return "\n".join(lines) def frontend_list() -> list[dict]: """Schlanke Liste fuers Frontend-Dropdown (Twin: use-case-registry.ts).""" return [ {"key": uc.key, "label": uc.label, "group": uc.group, "verification_methods": list(uc.verification_methods)} for uc in enabled_use_cases() ] def registry_hash() -> str: """Stabiler Hash → Re-Klassifizierung bei Taxonomie-Aenderung.""" payload = json.dumps( [[uc.key, uc.group, list(uc.regulations), list(uc.verification_methods), list(uc.doc_types), list(uc.scope_tokens), list(uc.categories)] for uc in _USE_CASES], sort_keys=True, ensure_ascii=False, ) return hashlib.sha256(payload.encode("utf-8")).hexdigest()