feat(use-cases): Fundament — Use-Case-Register + n:m-Mapping-Migration + Seed [migration-approved]
CI / detect-changes (push) Successful in 9s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

Layer 1+2 (Fundament) des Use-Case-Mapping-Systems (Plan genehmigt):
- compliance/data/use_case_registry.py: Single Source of Truth fuer 14 Use
  Cases x Verifikations-Methoden (Doku/Source-Code/Netzwerk/IT-Prozess).
  Erweiterbar (neuer UC = 1 Eintrag). code_security/network_security als
  Uebergabe-Punkte fuers Security-Team (SBOM/SAST/DAST/Pentest).
- migrations/149_mc_use_case_mappings.sql: add-only n:m mc_use_case_mappings
  + mc_verification (1/MC) + sync_state. use_case ohne SQL-CHECK (erweiterbar).
- scripts/classify_mc_use_cases.py: Seed-Stufe (deterministisch, kein LLM).
  LLM-Stufe (Phase 3) folgt.
- Tests: test_use_case_registry.py (14 gruen).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-09 15:30:34 +02:00
parent 0bf9c54d27
commit dca7740d8c
4 changed files with 518 additions and 0 deletions
@@ -0,0 +1,244 @@
"""Use-Case-Register — Single Source of Truth fuer Use Cases × Verifikations-
Methoden.
Jede Master Control wird auf >=1 Use Case (n:m) und genau eine Verifikations-
Methode gemappt. Use Cases sind NICHT nur dokumenten-basiert: >=50% sind
Source-Code / IT-Prozess (Code Security, Network Security, CRA, ISMS, TISAX).
Neuer Use Case = 1 Eintrag in `_USE_CASES`. Kein DB-Schema-Change noetig.
Dieses Modul ist die kanonische Quelle; die heute verstreuten doc_type-Listen
(rag_document_checker._DOC_TYPE_MAP, legacy_url_discovery._SLUG_FAMILY,
doc_type_classifier, Migration 145) werden spaeter test-gated darauf reduziert.
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass
# Wie wird eine MC geprueft? — kanonische Verifikations-Methoden-Taxonomie.
VERIFICATION_METHODS: tuple[str, ...] = (
"document", # veroeffentlichtes Dokument lesen (Impressum, DSE, ...)
"source_code", # Repo/Code scannen (SAST, Secrets, Dependencies, Review)
"network", # Netzwerk/Infra scannen (Ports, TLS, Header, Config)
"it_process", # Prozess-/Nachweis-Review (Verfahren, Evidence)
"hybrid", # Kombination mehrerer Methoden
"manual", # menschliche Attestierung
)
USE_CASE_GROUPS: tuple[str, ...] = ("document", "security", "cross_cutting")
@dataclass(frozen=True)
class UseCase:
key: str # stabil, snake_case
label: str
group: str # USE_CASE_GROUPS
regulations: tuple[str, ...] = ()
verification_methods: tuple[str, ...] = ()
doc_types: tuple[str, ...] = () # operative doc_type-Aliase (Doku-UCs)
scope_tokens: tuple[str, ...] = () # canonical_controls.scope_doc_type
categories: tuple[str, ...] = () # canonical_controls.category
keyword_tokens: tuple[str, ...] = () # canonical_name/title-Stichwoerter
enabled: bool = True
_USE_CASES: tuple[UseCase, ...] = (
# ── Dokument-Use-Cases ──────────────────────────────────────────
UseCase("impressum", "Impressum (§5 TMG/DDG)", "document",
regulations=("TMG", "DDG", "MStV"),
verification_methods=("document",),
doc_types=("impressum",), scope_tokens=("impressum",),
categories=("compliance",),
keyword_tokens=("impressum", "anbieterkennzeichnung")),
UseCase("dse", "Datenschutzerklärung", "document",
regulations=("DSGVO",),
verification_methods=("document",),
doc_types=("dse",), scope_tokens=("dse",),
categories=("privacy", "data_protection"),
keyword_tokens=("datenschutz", "privacy")),
UseCase("agb", "AGB", "document",
regulations=("BGB",),
verification_methods=("document",),
doc_types=("agb",), scope_tokens=("agb",),
categories=("compliance",),
keyword_tokens=("geschäftsbedingungen", "agb")),
UseCase("cookie_banner", "Cookie-Banner & -Richtlinie", "document",
regulations=("TDDDG", "ePrivacy", "DSGVO"),
verification_methods=("document", "source_code"),
doc_types=("cookie",),
scope_tokens=("cookie_richtlinie", "banner_implementation",
"cmp_audit"),
categories=("privacy",),
keyword_tokens=("cookie", "consent", "einwilligung")),
UseCase("widerruf", "Widerrufsbelehrung", "document",
regulations=("BGB",),
verification_methods=("document",),
doc_types=("widerruf",), scope_tokens=("widerruf",),
categories=("compliance",),
keyword_tokens=("widerruf", "widerrufsbelehrung")),
UseCase("dsr", "Betroffenenrechte (DSR)", "document",
regulations=("DSGVO",),
verification_methods=("document", "it_process"),
scope_tokens=("process",),
categories=("privacy", "operations"),
keyword_tokens=("betroffenenrecht", "auskunft", "löschung", "dsr")),
UseCase("loeschkonzept", "Löschkonzept", "document",
regulations=("DSGVO",),
verification_methods=("document", "it_process"),
doc_types=("loeschkonzept",),
scope_tokens=("process", "accounting"),
categories=("data_protection",),
keyword_tokens=("löschung", "löschfrist", "aufbewahrung")),
UseCase("avv", "Auftragsverarbeitung (AVV)", "document",
regulations=("DSGVO",),
verification_methods=("document",),
doc_types=("avv",), scope_tokens=("avv", "jc"),
categories=("compliance",),
keyword_tokens=("auftragsverarbeitung", "avv")),
UseCase("dsfa", "Datenschutz-Folgenabschätzung", "document",
regulations=("DSGVO",),
verification_methods=("document", "it_process"),
doc_types=("dsfa",), scope_tokens=("tom", "process"),
categories=("risk", "privacy"),
keyword_tokens=("folgenabschätzung", "dsfa")),
# ── Security / Code-Use-Cases ───────────────────────────────────
UseCase("code_security", "Code Security", "security",
regulations=("CRA", "OWASP", "ISO 27001"),
verification_methods=("source_code", "hybrid"),
categories=("testing", "application", "encryption",
"authentication", "identity"),
keyword_tokens=("sast", "secret", "dependency", "vulnerability",
"injection", "code")),
UseCase("network_security", "Network Security", "security",
regulations=("ISO 27001", "BSI", "NIS2"),
verification_methods=("network", "hybrid"),
categories=("network", "system", "operations"),
keyword_tokens=("firewall", "tls", "port", "segmentation",
"network", "header")),
# ── Querschnitt / Multi-Methode ─────────────────────────────────
UseCase("cra", "Cyber Resilience Act", "cross_cutting",
regulations=("CRA",),
verification_methods=("document", "source_code", "network",
"it_process"),
categories=("security", "supply_chain", "testing", "incident"),
keyword_tokens=("cra", "sbom", "konformität", "produktsicherheit")),
UseCase("isms", "ISMS (ISO 27001)", "cross_cutting",
regulations=("ISO 27001",),
verification_methods=("it_process", "document", "hybrid"),
categories=("governance", "security", "operations", "incident"),
keyword_tokens=("isms", "risikomanagement", "soa")),
UseCase("tisax", "TISAX", "cross_cutting",
regulations=("VDA ISA", "TISAX"),
verification_methods=("it_process", "document", "network",
"hybrid"),
categories=("security", "governance", "operations"),
keyword_tokens=("tisax", "vda", "prototypenschutz")),
)
REGISTRY: dict[str, UseCase] = {uc.key: uc for uc in _USE_CASES}
# canonical_controls.evidence_type / .verification_method → unsere Methode
# (fuer den deterministischen Seed; der LLM-Pass verfeinert).
_EVIDENCE_TO_METHOD: dict[str, str] = {
"document": "document",
"code": "source_code",
"code_review": "source_code",
"process": "it_process",
"tool": "network",
"hybrid": "hybrid",
}
def _reverse(attr: str) -> dict[str, list[str]]:
out: dict[str, list[str]] = {}
for uc in _USE_CASES:
if not uc.enabled:
continue
for tok in getattr(uc, attr):
out.setdefault(tok, []).append(uc.key)
return out
scope_token_to_use_cases: dict[str, list[str]] = _reverse("scope_tokens")
category_to_use_cases: dict[str, list[str]] = _reverse("categories")
doc_type_to_use_cases: dict[str, list[str]] = _reverse("doc_types")
def is_valid_use_case(key: str) -> bool:
return key in REGISTRY and REGISTRY[key].enabled
def is_valid_verification_method(method: str) -> bool:
return method in VERIFICATION_METHODS
def evidence_to_verification_method(value: str | None) -> str | None:
"""Heuristik-Mapping fuer den Seed (None wenn unbekannt)."""
if not value:
return None
return _EVIDENCE_TO_METHOD.get(value.strip().lower())
def enabled_use_cases() -> list[UseCase]:
return [uc for uc in _USE_CASES if uc.enabled]
def seed_classify(
scopes=(), categories=(), vmethods=(), etypes=(),
) -> tuple[list[str], str | None]:
"""Deterministischer Seed (kein LLM): (use_cases, verification_method)
aus den aggregierten Member-Signalen einer Master Control —
scope_doc_type + category → Use Cases; verification_method/evidence_type
→ Methode. Pure → testbar."""
ucs: set[str] = set()
for s in scopes or ():
if s:
ucs.update(scope_token_to_use_cases.get(s, ()))
for c in categories or ():
if c:
ucs.update(category_to_use_cases.get(c, ()))
method: str | None = None
for v in list(vmethods or ()) + list(etypes or ()):
m = evidence_to_verification_method(v)
if m:
method = m
break
return sorted(ucs), method
def taxonomy_for_prompt() -> str:
"""Kompakter Anker-Block fuer den LLM-Klassifizierer (gecacht)."""
lines = ["USE CASES (key — Label — Regulierungen — Methoden):"]
for uc in enabled_use_cases():
lines.append(
f" {uc.key}{uc.label}{', '.join(uc.regulations) or '-'}"
f"{', '.join(uc.verification_methods)}"
)
lines.append("VERIFIKATIONS-METHODEN: " + ", ".join(VERIFICATION_METHODS))
return "\n".join(lines)
def frontend_list() -> list[dict]:
"""Schlanke Liste fuers Frontend-Dropdown (Twin: use-case-registry.ts)."""
return [
{"key": uc.key, "label": uc.label, "group": uc.group,
"verification_methods": list(uc.verification_methods)}
for uc in enabled_use_cases()
]
def registry_hash() -> str:
"""Stabiler Hash → Re-Klassifizierung bei Taxonomie-Aenderung."""
payload = json.dumps(
[[uc.key, uc.group, list(uc.regulations),
list(uc.verification_methods), list(uc.doc_types),
list(uc.scope_tokens), list(uc.categories)]
for uc in _USE_CASES],
sort_keys=True, ensure_ascii=False,
)
return hashlib.sha256(payload.encode("utf-8")).hexdigest()