feat(use-cases): deterministisches source_regulation-Mapping + Primaerzweck [migration-approved]
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 12s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 31s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

Use-Case-Zuordnung jetzt DETERMINISTISCH aus der Quell-Regulierung (statt
LLM/scope-category): control_parent_links.source_regulation (79% der 13.588
MCs) -> Keyword-Mapper -> ~30 Domaenen-Use-Cases. 117/117 Regulierungen
gemappt (dse 44 Leitlinien, code_security 10, network_security 9, ...).

- use_case_registry.py: 37 Use Cases (Doku + Security + Produkt/Sektor:
  cra/ai_act/mica/mdr/maschinen/batterie/ehds/dsa/dma/psd2/aml/lksg/...) +
  use_case_for_regulation() Keyword-Mapper (117 Regulierungen abgedeckt).
- migration 150: is_primary auf mc_use_case_mappings + neue mc_regulations
  (MC->source_regulation, n:m, is_primary) als feine Filter-Dimension.
- classify_mc_use_cases.py: source_regulation-getriebener Seed; Primaerzweck =
  dominante Regulierung, Mehrfachzwecke = weitere. PYTHONPATH-Bootstrap.
- 18 Registry-Tests gruen (Mapper-Abdeckung + Konsistenz-Invariante).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-09 16:27:06 +02:00
parent a48e919caa
commit 6ca4dcde3e
4 changed files with 354 additions and 47 deletions
@@ -28,7 +28,9 @@ VERIFICATION_METHODS: tuple[str, ...] = (
"manual", # menschliche Attestierung
)
USE_CASE_GROUPS: tuple[str, ...] = ("document", "security", "cross_cutting")
USE_CASE_GROUPS: tuple[str, ...] = (
"document", "security", "cross_cutting", "product",
)
@dataclass(frozen=True)
@@ -136,9 +138,188 @@ _USE_CASES: tuple[UseCase, ...] = (
"hybrid"),
categories=("security", "governance", "operations"),
keyword_tokens=("tisax", "vda", "prototypenschutz")),
UseCase("kritis", "KRITIS / NIS2-Umsetzung", "security",
regulations=("KRITIS-Dachgesetz", "BSI-KritisV", "BSIG"),
verification_methods=("it_process", "network", "document")),
UseCase("dora", "DORA (Digital Operational Resilience)",
"cross_cutting", regulations=("DORA",),
verification_methods=("it_process", "document", "network")),
# ── Produkt-/Sektor-Use-Cases (je Quell-Regulierung) ────────────
UseCase("ai_act", "KI-Verordnung (AI Act)", "product",
regulations=("KI-Verordnung", "AI Act", "NIST AI RMF"),
verification_methods=("document", "it_process", "source_code")),
UseCase("mica", "Markets in Crypto-Assets (MiCA)", "product",
regulations=("MiCA",),
verification_methods=("document", "it_process")),
UseCase("mdr", "Medizinprodukte (MDR)", "product",
regulations=("MDR",),
verification_methods=("document", "source_code", "it_process")),
UseCase("maschinen", "Maschinenverordnung", "product",
regulations=("Maschinenverordnung",),
verification_methods=("document", "source_code", "it_process")),
UseCase("batterie", "Batterieverordnung", "product",
regulations=("Batterieverordnung", "Batteriegesetz"),
verification_methods=("document", "it_process")),
UseCase("ehds", "European Health Data Space", "product",
regulations=("EHDS",),
verification_methods=("document", "it_process", "source_code")),
UseCase("produktsicherheit", "Produktsicherheit (GPSR)", "product",
regulations=("Produktsicherheitsverordnung", "EU Blue Guide"),
verification_methods=("document", "it_process")),
UseCase("dsa", "Digital Services Act", "product",
regulations=("Digital Services Act",),
verification_methods=("document", "it_process")),
UseCase("dma", "Digital Markets Act", "product",
regulations=("Digital Markets Act",),
verification_methods=("document", "it_process")),
UseCase("data_governance", "Data Act / Data Governance Act", "product",
regulations=("Data Act", "Data Governance Act"),
verification_methods=("document", "it_process")),
UseCase("zahlungsdienste", "Zahlungsdienste (PSD2)", "product",
regulations=("Zahlungsdiensterichtlinie",),
verification_methods=("document", "it_process", "source_code")),
UseCase("geldwaesche", "Geldwäsche (AML/GwG)", "product",
regulations=("AML-Verordnung", "GwG"),
verification_methods=("document", "it_process")),
UseCase("lieferkette", "Lieferkettensorgfalt (LkSG)", "product",
regulations=("LkSG",),
verification_methods=("document", "it_process")),
UseCase("whistleblowing", "Hinweisgeberschutz (HinSchG)", "product",
regulations=("HinSchG",),
verification_methods=("document", "it_process")),
UseCase("barrierefreiheit", "Barrierefreiheit (EAA)", "product",
regulations=("European Accessibility Act",),
verification_methods=("document", "source_code")),
# ── Weitere Rechts-Use-Cases (Dokument) ─────────────────────────
UseCase("verbraucherschutz", "Verbraucherschutz", "document",
regulations=("Konsumentenschutzgesetz",
"Digitale-Inhalte-Richtlinie"),
verification_methods=("document",)),
UseCase("urheberrecht", "Urheberrecht", "document",
regulations=("UrhG", "DSM-Urheberrechtsrichtlinie"),
verification_methods=("document",)),
UseCase("wettbewerbsrecht", "Wettbewerbsrecht (UWG)", "document",
regulations=("UWG",), verification_methods=("document",)),
UseCase("gleichbehandlung", "Gleichbehandlung (AGG)", "document",
regulations=("AGG",),
verification_methods=("document", "it_process")),
UseCase("steuerrecht", "Steuerrecht", "document",
regulations=("Abgabenordnung", "BAO"),
verification_methods=("document", "it_process")),
UseCase("handelsrecht", "Handelsrecht", "document",
regulations=("HGB", "UGB", "ABGB"),
verification_methods=("document", "it_process")),
)
# Deterministischer Quell-Regulierung → Use-Case-Mapper.
# control_parent_links.source_regulation (117 distinct) → ~30 Domaenen.
# Reihenfolge = SPEZIFISCH zuerst (Substring, case-insensitive); der erste
# Treffer gewinnt. Die Datenschutz-Catch-alls (edpb/dsk/dsgvo) ganz hinten,
# damit spezifische Regeln (z.B. 'DSK OH Telemedien') zuerst greifen koennen.
_REGULATION_RULES: tuple[tuple[str, str], ...] = (
# Security / Code (Security-Team)
("owasp", "code_security"),
("nist sp 800-218", "code_security"),
("ssdf", "code_security"),
("nist sp 800-63", "code_security"),
("nistir 8259", "code_security"),
("cisa", "code_security"),
("nist sp 800-207", "network_security"),
("zero trust", "network_security"),
("nis2", "network_security"),
("nis-2", "network_security"),
("enisa", "network_security"),
("bsi-gesetz", "network_security"),
("bsig", "network_security"),
("cybersecurity act", "network_security"),
("kritis", "kritis"),
("nist cybersecurity framework", "isms"),
("nist sp 800-53", "isms"),
("digital operational resilience", "dora"),
("dora", "dora"),
# Produkt / Sektor
("cyber resilience act", "cra"),
("(cra)", "cra"),
("nist ai risk", "ai_act"),
("ki-verordnung", "ai_act"),
("ki-vo", "ai_act"),
("ai act", "ai_act"),
("oecd ki", "ai_act"),
("crypto-assets", "mica"),
("mica", "mica"),
("medizinprodukte", "mdr"),
("(mdr)", "mdr"),
("maschinenverordnung", "maschinen"),
("batterie", "batterie"),
("health data space", "ehds"),
("produktsicherheit", "produktsicherheit"),
("blue guide", "produktsicherheit"),
("digital services act", "dsa"),
("digital markets act", "dma"),
("data act", "data_governance"),
("data governance", "data_governance"),
("zahlungsdienste", "zahlungsdienste"),
("geldwaesche", "geldwaesche"),
("aml-verordnung", "geldwaesche"),
("lieferkettensorgfalt", "lieferkette"),
("lksg", "lieferkette"),
("hinweisgeberschutz", "whistleblowing"),
("hinschg", "whistleblowing"),
("accessibility act", "barrierefreiheit"),
# Website / Telemedien / Recht (User-Domaene)
("tdddg", "cookie_banner"),
("eprivacy", "cookie_banner"),
("telemedien", "impressum"),
("telekommunikationsgesetz", "impressum"),
("tkg", "impressum"),
("tmg", "impressum"),
("mediengesetz", "impressum"),
("gewerbeordnung", "impressum"),
("e-commerce", "agb"),
("digitale-inhalte", "agb"),
("konsumentenschutz", "verbraucherschutz"),
("urheberrecht", "urheberrecht"),
("urhg", "urheberrecht"),
("uwg", "wettbewerbsrecht"),
("handelsgesetzbuch", "handelsrecht"),
("hgb", "handelsrecht"),
("ugb", "handelsrecht"),
("abgb", "handelsrecht"),
("bgb", "agb"),
("gleichbehandlung", "gleichbehandlung"),
("(agg)", "gleichbehandlung"),
("abgabenordnung", "steuerrecht"),
("bao", "steuerrecht"),
("standardvertragsklauseln", "avv"),
("(scc)", "avv"),
# Datenschutz-Catch-alls (zuletzt)
("nist privacy framework", "dse"),
("dsgvo", "dse"),
("datenschutzgesetz", "dse"),
("bdsg", "dse"),
("edpb", "dse"),
("edps", "dse"),
("dsk ", "dse"),
("wp29", "dse"),
("bfdi", "dse"),
("data privacy framework", "dse"),
("datenschutz", "dse"),
)
def use_case_for_regulation(regulation: str | None) -> str | None:
"""Deterministisch: Quell-Regulierung → Domaenen-Use-Case (erster
Substring-Treffer). None wenn keine Regel passt (→ Fallback/Review)."""
if not regulation:
return None
low = regulation.lower()
for needle, uc in _REGULATION_RULES:
if needle in low:
return uc
return None
REGISTRY: dict[str, UseCase] = {uc.key: uc for uc in _USE_CASES}