Files
breakpilot-compliance/backend-compliance/compliance/data/use_case_registry.py
T
Benjamin Admin 6ca4dcde3e
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 12s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 31s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
feat(use-cases): deterministisches source_regulation-Mapping + Primaerzweck [migration-approved]
Use-Case-Zuordnung jetzt DETERMINISTISCH aus der Quell-Regulierung (statt
LLM/scope-category): control_parent_links.source_regulation (79% der 13.588
MCs) -> Keyword-Mapper -> ~30 Domaenen-Use-Cases. 117/117 Regulierungen
gemappt (dse 44 Leitlinien, code_security 10, network_security 9, ...).

- use_case_registry.py: 37 Use Cases (Doku + Security + Produkt/Sektor:
  cra/ai_act/mica/mdr/maschinen/batterie/ehds/dsa/dma/psd2/aml/lksg/...) +
  use_case_for_regulation() Keyword-Mapper (117 Regulierungen abgedeckt).
- migration 150: is_primary auf mc_use_case_mappings + neue mc_regulations
  (MC->source_regulation, n:m, is_primary) als feine Filter-Dimension.
- classify_mc_use_cases.py: source_regulation-getriebener Seed; Primaerzweck =
  dominante Regulierung, Mehrfachzwecke = weitere. PYTHONPATH-Bootstrap.
- 18 Registry-Tests gruen (Mapper-Abdeckung + Konsistenz-Invariante).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 16:27:06 +02:00

426 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Use-Case-Register — Single Source of Truth fuer Use Cases × Verifikations-
Methoden.
Jede Master Control wird auf >=1 Use Case (n:m) und genau eine Verifikations-
Methode gemappt. Use Cases sind NICHT nur dokumenten-basiert: >=50% sind
Source-Code / IT-Prozess (Code Security, Network Security, CRA, ISMS, TISAX).
Neuer Use Case = 1 Eintrag in `_USE_CASES`. Kein DB-Schema-Change noetig.
Dieses Modul ist die kanonische Quelle; die heute verstreuten doc_type-Listen
(rag_document_checker._DOC_TYPE_MAP, legacy_url_discovery._SLUG_FAMILY,
doc_type_classifier, Migration 145) werden spaeter test-gated darauf reduziert.
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass
# Wie wird eine MC geprueft? — kanonische Verifikations-Methoden-Taxonomie.
VERIFICATION_METHODS: tuple[str, ...] = (
"document", # veroeffentlichtes Dokument lesen (Impressum, DSE, ...)
"source_code", # Repo/Code scannen (SAST, Secrets, Dependencies, Review)
"network", # Netzwerk/Infra scannen (Ports, TLS, Header, Config)
"it_process", # Prozess-/Nachweis-Review (Verfahren, Evidence)
"hybrid", # Kombination mehrerer Methoden
"manual", # menschliche Attestierung
)
USE_CASE_GROUPS: tuple[str, ...] = (
"document", "security", "cross_cutting", "product",
)
@dataclass(frozen=True)
class UseCase:
key: str # stabil, snake_case
label: str
group: str # USE_CASE_GROUPS
regulations: tuple[str, ...] = ()
verification_methods: tuple[str, ...] = ()
doc_types: tuple[str, ...] = () # operative doc_type-Aliase (Doku-UCs)
scope_tokens: tuple[str, ...] = () # canonical_controls.scope_doc_type
categories: tuple[str, ...] = () # canonical_controls.category
keyword_tokens: tuple[str, ...] = () # canonical_name/title-Stichwoerter
enabled: bool = True
_USE_CASES: tuple[UseCase, ...] = (
# ── Dokument-Use-Cases ──────────────────────────────────────────
UseCase("impressum", "Impressum (§5 TMG/DDG)", "document",
regulations=("TMG", "DDG", "MStV"),
verification_methods=("document",),
doc_types=("impressum",), scope_tokens=("impressum",),
categories=("compliance",),
keyword_tokens=("impressum", "anbieterkennzeichnung")),
UseCase("dse", "Datenschutzerklärung", "document",
regulations=("DSGVO",),
verification_methods=("document",),
doc_types=("dse",), scope_tokens=("dse",),
categories=("privacy", "data_protection"),
keyword_tokens=("datenschutz", "privacy")),
UseCase("agb", "AGB", "document",
regulations=("BGB",),
verification_methods=("document",),
doc_types=("agb",), scope_tokens=("agb",),
categories=("compliance",),
keyword_tokens=("geschäftsbedingungen", "agb")),
UseCase("cookie_banner", "Cookie-Banner & -Richtlinie", "document",
regulations=("TDDDG", "ePrivacy", "DSGVO"),
verification_methods=("document", "source_code"),
doc_types=("cookie",),
scope_tokens=("cookie_richtlinie", "banner_implementation",
"cmp_audit"),
categories=("privacy",),
keyword_tokens=("cookie", "consent", "einwilligung")),
UseCase("widerruf", "Widerrufsbelehrung", "document",
regulations=("BGB",),
verification_methods=("document",),
doc_types=("widerruf",), scope_tokens=("widerruf",),
categories=("compliance",),
keyword_tokens=("widerruf", "widerrufsbelehrung")),
UseCase("dsr", "Betroffenenrechte (DSR)", "document",
regulations=("DSGVO",),
verification_methods=("document", "it_process"),
scope_tokens=("process",),
categories=("privacy", "operations"),
keyword_tokens=("betroffenenrecht", "auskunft", "löschung", "dsr")),
UseCase("loeschkonzept", "Löschkonzept", "document",
regulations=("DSGVO",),
verification_methods=("document", "it_process"),
doc_types=("loeschkonzept",),
scope_tokens=("process", "accounting"),
categories=("data_protection",),
keyword_tokens=("löschung", "löschfrist", "aufbewahrung")),
UseCase("avv", "Auftragsverarbeitung (AVV)", "document",
regulations=("DSGVO",),
verification_methods=("document",),
doc_types=("avv",), scope_tokens=("avv", "jc"),
categories=("compliance",),
keyword_tokens=("auftragsverarbeitung", "avv")),
UseCase("dsfa", "Datenschutz-Folgenabschätzung", "document",
regulations=("DSGVO",),
verification_methods=("document", "it_process"),
doc_types=("dsfa",), scope_tokens=("tom", "process"),
categories=("risk", "privacy"),
keyword_tokens=("folgenabschätzung", "dsfa")),
# ── Security / Code-Use-Cases ───────────────────────────────────
UseCase("code_security", "Code Security", "security",
regulations=("CRA", "OWASP", "ISO 27001"),
verification_methods=("source_code", "hybrid"),
categories=("testing", "application", "encryption",
"authentication", "identity"),
keyword_tokens=("sast", "secret", "dependency", "vulnerability",
"injection", "code")),
UseCase("network_security", "Network Security", "security",
regulations=("ISO 27001", "BSI", "NIS2"),
verification_methods=("network", "hybrid"),
categories=("network", "system", "operations"),
keyword_tokens=("firewall", "tls", "port", "segmentation",
"network", "header")),
# ── Querschnitt / Multi-Methode ─────────────────────────────────
UseCase("cra", "Cyber Resilience Act", "cross_cutting",
regulations=("CRA",),
verification_methods=("document", "source_code", "network",
"it_process"),
categories=("security", "supply_chain", "testing", "incident"),
keyword_tokens=("cra", "sbom", "konformität", "produktsicherheit")),
UseCase("isms", "ISMS (ISO 27001)", "cross_cutting",
regulations=("ISO 27001",),
verification_methods=("it_process", "document", "hybrid"),
categories=("governance", "security", "operations", "incident"),
keyword_tokens=("isms", "risikomanagement", "soa")),
UseCase("tisax", "TISAX", "cross_cutting",
regulations=("VDA ISA", "TISAX"),
verification_methods=("it_process", "document", "network",
"hybrid"),
categories=("security", "governance", "operations"),
keyword_tokens=("tisax", "vda", "prototypenschutz")),
UseCase("kritis", "KRITIS / NIS2-Umsetzung", "security",
regulations=("KRITIS-Dachgesetz", "BSI-KritisV", "BSIG"),
verification_methods=("it_process", "network", "document")),
UseCase("dora", "DORA (Digital Operational Resilience)",
"cross_cutting", regulations=("DORA",),
verification_methods=("it_process", "document", "network")),
# ── Produkt-/Sektor-Use-Cases (je Quell-Regulierung) ────────────
UseCase("ai_act", "KI-Verordnung (AI Act)", "product",
regulations=("KI-Verordnung", "AI Act", "NIST AI RMF"),
verification_methods=("document", "it_process", "source_code")),
UseCase("mica", "Markets in Crypto-Assets (MiCA)", "product",
regulations=("MiCA",),
verification_methods=("document", "it_process")),
UseCase("mdr", "Medizinprodukte (MDR)", "product",
regulations=("MDR",),
verification_methods=("document", "source_code", "it_process")),
UseCase("maschinen", "Maschinenverordnung", "product",
regulations=("Maschinenverordnung",),
verification_methods=("document", "source_code", "it_process")),
UseCase("batterie", "Batterieverordnung", "product",
regulations=("Batterieverordnung", "Batteriegesetz"),
verification_methods=("document", "it_process")),
UseCase("ehds", "European Health Data Space", "product",
regulations=("EHDS",),
verification_methods=("document", "it_process", "source_code")),
UseCase("produktsicherheit", "Produktsicherheit (GPSR)", "product",
regulations=("Produktsicherheitsverordnung", "EU Blue Guide"),
verification_methods=("document", "it_process")),
UseCase("dsa", "Digital Services Act", "product",
regulations=("Digital Services Act",),
verification_methods=("document", "it_process")),
UseCase("dma", "Digital Markets Act", "product",
regulations=("Digital Markets Act",),
verification_methods=("document", "it_process")),
UseCase("data_governance", "Data Act / Data Governance Act", "product",
regulations=("Data Act", "Data Governance Act"),
verification_methods=("document", "it_process")),
UseCase("zahlungsdienste", "Zahlungsdienste (PSD2)", "product",
regulations=("Zahlungsdiensterichtlinie",),
verification_methods=("document", "it_process", "source_code")),
UseCase("geldwaesche", "Geldwäsche (AML/GwG)", "product",
regulations=("AML-Verordnung", "GwG"),
verification_methods=("document", "it_process")),
UseCase("lieferkette", "Lieferkettensorgfalt (LkSG)", "product",
regulations=("LkSG",),
verification_methods=("document", "it_process")),
UseCase("whistleblowing", "Hinweisgeberschutz (HinSchG)", "product",
regulations=("HinSchG",),
verification_methods=("document", "it_process")),
UseCase("barrierefreiheit", "Barrierefreiheit (EAA)", "product",
regulations=("European Accessibility Act",),
verification_methods=("document", "source_code")),
# ── Weitere Rechts-Use-Cases (Dokument) ─────────────────────────
UseCase("verbraucherschutz", "Verbraucherschutz", "document",
regulations=("Konsumentenschutzgesetz",
"Digitale-Inhalte-Richtlinie"),
verification_methods=("document",)),
UseCase("urheberrecht", "Urheberrecht", "document",
regulations=("UrhG", "DSM-Urheberrechtsrichtlinie"),
verification_methods=("document",)),
UseCase("wettbewerbsrecht", "Wettbewerbsrecht (UWG)", "document",
regulations=("UWG",), verification_methods=("document",)),
UseCase("gleichbehandlung", "Gleichbehandlung (AGG)", "document",
regulations=("AGG",),
verification_methods=("document", "it_process")),
UseCase("steuerrecht", "Steuerrecht", "document",
regulations=("Abgabenordnung", "BAO"),
verification_methods=("document", "it_process")),
UseCase("handelsrecht", "Handelsrecht", "document",
regulations=("HGB", "UGB", "ABGB"),
verification_methods=("document", "it_process")),
)
# Deterministischer Quell-Regulierung → Use-Case-Mapper.
# control_parent_links.source_regulation (117 distinct) → ~30 Domaenen.
# Reihenfolge = SPEZIFISCH zuerst (Substring, case-insensitive); der erste
# Treffer gewinnt. Die Datenschutz-Catch-alls (edpb/dsk/dsgvo) ganz hinten,
# damit spezifische Regeln (z.B. 'DSK OH Telemedien') zuerst greifen koennen.
_REGULATION_RULES: tuple[tuple[str, str], ...] = (
# Security / Code (Security-Team)
("owasp", "code_security"),
("nist sp 800-218", "code_security"),
("ssdf", "code_security"),
("nist sp 800-63", "code_security"),
("nistir 8259", "code_security"),
("cisa", "code_security"),
("nist sp 800-207", "network_security"),
("zero trust", "network_security"),
("nis2", "network_security"),
("nis-2", "network_security"),
("enisa", "network_security"),
("bsi-gesetz", "network_security"),
("bsig", "network_security"),
("cybersecurity act", "network_security"),
("kritis", "kritis"),
("nist cybersecurity framework", "isms"),
("nist sp 800-53", "isms"),
("digital operational resilience", "dora"),
("dora", "dora"),
# Produkt / Sektor
("cyber resilience act", "cra"),
("(cra)", "cra"),
("nist ai risk", "ai_act"),
("ki-verordnung", "ai_act"),
("ki-vo", "ai_act"),
("ai act", "ai_act"),
("oecd ki", "ai_act"),
("crypto-assets", "mica"),
("mica", "mica"),
("medizinprodukte", "mdr"),
("(mdr)", "mdr"),
("maschinenverordnung", "maschinen"),
("batterie", "batterie"),
("health data space", "ehds"),
("produktsicherheit", "produktsicherheit"),
("blue guide", "produktsicherheit"),
("digital services act", "dsa"),
("digital markets act", "dma"),
("data act", "data_governance"),
("data governance", "data_governance"),
("zahlungsdienste", "zahlungsdienste"),
("geldwaesche", "geldwaesche"),
("aml-verordnung", "geldwaesche"),
("lieferkettensorgfalt", "lieferkette"),
("lksg", "lieferkette"),
("hinweisgeberschutz", "whistleblowing"),
("hinschg", "whistleblowing"),
("accessibility act", "barrierefreiheit"),
# Website / Telemedien / Recht (User-Domaene)
("tdddg", "cookie_banner"),
("eprivacy", "cookie_banner"),
("telemedien", "impressum"),
("telekommunikationsgesetz", "impressum"),
("tkg", "impressum"),
("tmg", "impressum"),
("mediengesetz", "impressum"),
("gewerbeordnung", "impressum"),
("e-commerce", "agb"),
("digitale-inhalte", "agb"),
("konsumentenschutz", "verbraucherschutz"),
("urheberrecht", "urheberrecht"),
("urhg", "urheberrecht"),
("uwg", "wettbewerbsrecht"),
("handelsgesetzbuch", "handelsrecht"),
("hgb", "handelsrecht"),
("ugb", "handelsrecht"),
("abgb", "handelsrecht"),
("bgb", "agb"),
("gleichbehandlung", "gleichbehandlung"),
("(agg)", "gleichbehandlung"),
("abgabenordnung", "steuerrecht"),
("bao", "steuerrecht"),
("standardvertragsklauseln", "avv"),
("(scc)", "avv"),
# Datenschutz-Catch-alls (zuletzt)
("nist privacy framework", "dse"),
("dsgvo", "dse"),
("datenschutzgesetz", "dse"),
("bdsg", "dse"),
("edpb", "dse"),
("edps", "dse"),
("dsk ", "dse"),
("wp29", "dse"),
("bfdi", "dse"),
("data privacy framework", "dse"),
("datenschutz", "dse"),
)
def use_case_for_regulation(regulation: str | None) -> str | None:
"""Deterministisch: Quell-Regulierung → Domaenen-Use-Case (erster
Substring-Treffer). None wenn keine Regel passt (→ Fallback/Review)."""
if not regulation:
return None
low = regulation.lower()
for needle, uc in _REGULATION_RULES:
if needle in low:
return uc
return None
REGISTRY: dict[str, UseCase] = {uc.key: uc for uc in _USE_CASES}
# canonical_controls.evidence_type / .verification_method → unsere Methode
# (fuer den deterministischen Seed; der LLM-Pass verfeinert).
_EVIDENCE_TO_METHOD: dict[str, str] = {
"document": "document",
"code": "source_code",
"code_review": "source_code",
"process": "it_process",
"tool": "network",
"hybrid": "hybrid",
}
def _reverse(attr: str) -> dict[str, list[str]]:
out: dict[str, list[str]] = {}
for uc in _USE_CASES:
if not uc.enabled:
continue
for tok in getattr(uc, attr):
out.setdefault(tok, []).append(uc.key)
return out
scope_token_to_use_cases: dict[str, list[str]] = _reverse("scope_tokens")
category_to_use_cases: dict[str, list[str]] = _reverse("categories")
doc_type_to_use_cases: dict[str, list[str]] = _reverse("doc_types")
def is_valid_use_case(key: str) -> bool:
return key in REGISTRY and REGISTRY[key].enabled
def is_valid_verification_method(method: str) -> bool:
return method in VERIFICATION_METHODS
def evidence_to_verification_method(value: str | None) -> str | None:
"""Heuristik-Mapping fuer den Seed (None wenn unbekannt)."""
if not value:
return None
return _EVIDENCE_TO_METHOD.get(value.strip().lower())
def enabled_use_cases() -> list[UseCase]:
return [uc for uc in _USE_CASES if uc.enabled]
def seed_classify(
scopes=(), categories=(), vmethods=(), etypes=(),
) -> tuple[list[str], str | None]:
"""Deterministischer Seed (kein LLM): (use_cases, verification_method)
aus den aggregierten Member-Signalen einer Master Control —
scope_doc_type + category → Use Cases; verification_method/evidence_type
→ Methode. Pure → testbar."""
ucs: set[str] = set()
for s in scopes or ():
if s:
ucs.update(scope_token_to_use_cases.get(s, ()))
for c in categories or ():
if c:
ucs.update(category_to_use_cases.get(c, ()))
method: str | None = None
for v in list(vmethods or ()) + list(etypes or ()):
m = evidence_to_verification_method(v)
if m:
method = m
break
return sorted(ucs), method
def taxonomy_for_prompt() -> str:
"""Kompakter Anker-Block fuer den LLM-Klassifizierer (gecacht)."""
lines = ["USE CASES (key — Label — Regulierungen — Methoden):"]
for uc in enabled_use_cases():
lines.append(
f" {uc.key}{uc.label}{', '.join(uc.regulations) or '-'}"
f"{', '.join(uc.verification_methods)}"
)
lines.append("VERIFIKATIONS-METHODEN: " + ", ".join(VERIFICATION_METHODS))
return "\n".join(lines)
def frontend_list() -> list[dict]:
"""Schlanke Liste fuers Frontend-Dropdown (Twin: use-case-registry.ts)."""
return [
{"key": uc.key, "label": uc.label, "group": uc.group,
"verification_methods": list(uc.verification_methods)}
for uc in enabled_use_cases()
]
def registry_hash() -> str:
"""Stabiler Hash → Re-Klassifizierung bei Taxonomie-Aenderung."""
payload = json.dumps(
[[uc.key, uc.group, list(uc.regulations),
list(uc.verification_methods), list(uc.doc_types),
list(uc.scope_tokens), list(uc.categories)]
for uc in _USE_CASES],
sort_keys=True, ensure_ascii=False,
)
return hashlib.sha256(payload.encode("utf-8")).hexdigest()