ef746ea8f0
CI / detect-changes (push) Successful in 6s
CI / branch-name (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Failing after 15s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go (push) Has been skipped
CI / nodejs-build (push) Has been skipped
Member-canonical_controls tragen meist kein evidence_type/verification_method (wie schon source_citation). primary_verification_method() leitet die Methode deterministisch aus dem Primaer-Use-Case ab (impressum->document, code_security->source_code, ...). Populiert mc_verification beim naechsten Seed. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
433 lines
19 KiB
Python
433 lines
19 KiB
Python
"""Use-Case-Register — Single Source of Truth fuer Use Cases × Verifikations-
|
||
Methoden.
|
||
|
||
Jede Master Control wird auf >=1 Use Case (n:m) und genau eine Verifikations-
|
||
Methode gemappt. Use Cases sind NICHT nur dokumenten-basiert: >=50% sind
|
||
Source-Code / IT-Prozess (Code Security, Network Security, CRA, ISMS, TISAX).
|
||
|
||
Neuer Use Case = 1 Eintrag in `_USE_CASES`. Kein DB-Schema-Change noetig.
|
||
Dieses Modul ist die kanonische Quelle; die heute verstreuten doc_type-Listen
|
||
(rag_document_checker._DOC_TYPE_MAP, legacy_url_discovery._SLUG_FAMILY,
|
||
doc_type_classifier, Migration 145) werden spaeter test-gated darauf reduziert.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import hashlib
|
||
import json
|
||
from dataclasses import dataclass
|
||
|
||
|
||
# Wie wird eine MC geprueft? — kanonische Verifikations-Methoden-Taxonomie.
|
||
VERIFICATION_METHODS: tuple[str, ...] = (
|
||
"document", # veroeffentlichtes Dokument lesen (Impressum, DSE, ...)
|
||
"source_code", # Repo/Code scannen (SAST, Secrets, Dependencies, Review)
|
||
"network", # Netzwerk/Infra scannen (Ports, TLS, Header, Config)
|
||
"it_process", # Prozess-/Nachweis-Review (Verfahren, Evidence)
|
||
"hybrid", # Kombination mehrerer Methoden
|
||
"manual", # menschliche Attestierung
|
||
)
|
||
|
||
USE_CASE_GROUPS: tuple[str, ...] = (
|
||
"document", "security", "cross_cutting", "product",
|
||
)
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class UseCase:
|
||
key: str # stabil, snake_case
|
||
label: str
|
||
group: str # USE_CASE_GROUPS
|
||
regulations: tuple[str, ...] = ()
|
||
verification_methods: tuple[str, ...] = ()
|
||
doc_types: tuple[str, ...] = () # operative doc_type-Aliase (Doku-UCs)
|
||
scope_tokens: tuple[str, ...] = () # canonical_controls.scope_doc_type
|
||
categories: tuple[str, ...] = () # canonical_controls.category
|
||
keyword_tokens: tuple[str, ...] = () # canonical_name/title-Stichwoerter
|
||
enabled: bool = True
|
||
|
||
|
||
_USE_CASES: tuple[UseCase, ...] = (
|
||
# ── Dokument-Use-Cases ──────────────────────────────────────────
|
||
UseCase("impressum", "Impressum (§5 TMG/DDG)", "document",
|
||
regulations=("TMG", "DDG", "MStV"),
|
||
verification_methods=("document",),
|
||
doc_types=("impressum",), scope_tokens=("impressum",),
|
||
categories=("compliance",),
|
||
keyword_tokens=("impressum", "anbieterkennzeichnung")),
|
||
UseCase("dse", "Datenschutzerklärung", "document",
|
||
regulations=("DSGVO",),
|
||
verification_methods=("document",),
|
||
doc_types=("dse",), scope_tokens=("dse",),
|
||
categories=("privacy", "data_protection"),
|
||
keyword_tokens=("datenschutz", "privacy")),
|
||
UseCase("agb", "AGB", "document",
|
||
regulations=("BGB",),
|
||
verification_methods=("document",),
|
||
doc_types=("agb",), scope_tokens=("agb",),
|
||
categories=("compliance",),
|
||
keyword_tokens=("geschäftsbedingungen", "agb")),
|
||
UseCase("cookie_banner", "Cookie-Banner & -Richtlinie", "document",
|
||
regulations=("TDDDG", "ePrivacy", "DSGVO"),
|
||
verification_methods=("document", "source_code"),
|
||
doc_types=("cookie",),
|
||
scope_tokens=("cookie_richtlinie", "banner_implementation",
|
||
"cmp_audit"),
|
||
categories=("privacy",),
|
||
keyword_tokens=("cookie", "consent", "einwilligung")),
|
||
UseCase("widerruf", "Widerrufsbelehrung", "document",
|
||
regulations=("BGB",),
|
||
verification_methods=("document",),
|
||
doc_types=("widerruf",), scope_tokens=("widerruf",),
|
||
categories=("compliance",),
|
||
keyword_tokens=("widerruf", "widerrufsbelehrung")),
|
||
UseCase("dsr", "Betroffenenrechte (DSR)", "document",
|
||
regulations=("DSGVO",),
|
||
verification_methods=("document", "it_process"),
|
||
scope_tokens=("process",),
|
||
categories=("privacy", "operations"),
|
||
keyword_tokens=("betroffenenrecht", "auskunft", "löschung", "dsr")),
|
||
UseCase("loeschkonzept", "Löschkonzept", "document",
|
||
regulations=("DSGVO",),
|
||
verification_methods=("document", "it_process"),
|
||
doc_types=("loeschkonzept",),
|
||
scope_tokens=("process", "accounting"),
|
||
categories=("data_protection",),
|
||
keyword_tokens=("löschung", "löschfrist", "aufbewahrung")),
|
||
UseCase("avv", "Auftragsverarbeitung (AVV)", "document",
|
||
regulations=("DSGVO",),
|
||
verification_methods=("document",),
|
||
doc_types=("avv",), scope_tokens=("avv", "jc"),
|
||
categories=("compliance",),
|
||
keyword_tokens=("auftragsverarbeitung", "avv")),
|
||
UseCase("dsfa", "Datenschutz-Folgenabschätzung", "document",
|
||
regulations=("DSGVO",),
|
||
verification_methods=("document", "it_process"),
|
||
doc_types=("dsfa",), scope_tokens=("tom", "process"),
|
||
categories=("risk", "privacy"),
|
||
keyword_tokens=("folgenabschätzung", "dsfa")),
|
||
# ── Security / Code-Use-Cases ───────────────────────────────────
|
||
UseCase("code_security", "Code Security", "security",
|
||
regulations=("CRA", "OWASP", "ISO 27001"),
|
||
verification_methods=("source_code", "hybrid"),
|
||
categories=("testing", "application", "encryption",
|
||
"authentication", "identity"),
|
||
keyword_tokens=("sast", "secret", "dependency", "vulnerability",
|
||
"injection", "code")),
|
||
UseCase("network_security", "Network Security", "security",
|
||
regulations=("ISO 27001", "BSI", "NIS2"),
|
||
verification_methods=("network", "hybrid"),
|
||
categories=("network", "system", "operations"),
|
||
keyword_tokens=("firewall", "tls", "port", "segmentation",
|
||
"network", "header")),
|
||
# ── Querschnitt / Multi-Methode ─────────────────────────────────
|
||
UseCase("cra", "Cyber Resilience Act", "cross_cutting",
|
||
regulations=("CRA",),
|
||
verification_methods=("document", "source_code", "network",
|
||
"it_process"),
|
||
categories=("security", "supply_chain", "testing", "incident"),
|
||
keyword_tokens=("cra", "sbom", "konformität", "produktsicherheit")),
|
||
UseCase("isms", "ISMS (ISO 27001)", "cross_cutting",
|
||
regulations=("ISO 27001",),
|
||
verification_methods=("it_process", "document", "hybrid"),
|
||
categories=("governance", "security", "operations", "incident"),
|
||
keyword_tokens=("isms", "risikomanagement", "soa")),
|
||
UseCase("tisax", "TISAX", "cross_cutting",
|
||
regulations=("VDA ISA", "TISAX"),
|
||
verification_methods=("it_process", "document", "network",
|
||
"hybrid"),
|
||
categories=("security", "governance", "operations"),
|
||
keyword_tokens=("tisax", "vda", "prototypenschutz")),
|
||
UseCase("kritis", "KRITIS / NIS2-Umsetzung", "security",
|
||
regulations=("KRITIS-Dachgesetz", "BSI-KritisV", "BSIG"),
|
||
verification_methods=("it_process", "network", "document")),
|
||
UseCase("dora", "DORA (Digital Operational Resilience)",
|
||
"cross_cutting", regulations=("DORA",),
|
||
verification_methods=("it_process", "document", "network")),
|
||
# ── Produkt-/Sektor-Use-Cases (je Quell-Regulierung) ────────────
|
||
UseCase("ai_act", "KI-Verordnung (AI Act)", "product",
|
||
regulations=("KI-Verordnung", "AI Act", "NIST AI RMF"),
|
||
verification_methods=("document", "it_process", "source_code")),
|
||
UseCase("mica", "Markets in Crypto-Assets (MiCA)", "product",
|
||
regulations=("MiCA",),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("mdr", "Medizinprodukte (MDR)", "product",
|
||
regulations=("MDR",),
|
||
verification_methods=("document", "source_code", "it_process")),
|
||
UseCase("maschinen", "Maschinenverordnung", "product",
|
||
regulations=("Maschinenverordnung",),
|
||
verification_methods=("document", "source_code", "it_process")),
|
||
UseCase("batterie", "Batterieverordnung", "product",
|
||
regulations=("Batterieverordnung", "Batteriegesetz"),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("ehds", "European Health Data Space", "product",
|
||
regulations=("EHDS",),
|
||
verification_methods=("document", "it_process", "source_code")),
|
||
UseCase("produktsicherheit", "Produktsicherheit (GPSR)", "product",
|
||
regulations=("Produktsicherheitsverordnung", "EU Blue Guide"),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("dsa", "Digital Services Act", "product",
|
||
regulations=("Digital Services Act",),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("dma", "Digital Markets Act", "product",
|
||
regulations=("Digital Markets Act",),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("data_governance", "Data Act / Data Governance Act", "product",
|
||
regulations=("Data Act", "Data Governance Act"),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("zahlungsdienste", "Zahlungsdienste (PSD2)", "product",
|
||
regulations=("Zahlungsdiensterichtlinie",),
|
||
verification_methods=("document", "it_process", "source_code")),
|
||
UseCase("geldwaesche", "Geldwäsche (AML/GwG)", "product",
|
||
regulations=("AML-Verordnung", "GwG"),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("lieferkette", "Lieferkettensorgfalt (LkSG)", "product",
|
||
regulations=("LkSG",),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("whistleblowing", "Hinweisgeberschutz (HinSchG)", "product",
|
||
regulations=("HinSchG",),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("barrierefreiheit", "Barrierefreiheit (EAA)", "product",
|
||
regulations=("European Accessibility Act",),
|
||
verification_methods=("document", "source_code")),
|
||
# ── Weitere Rechts-Use-Cases (Dokument) ─────────────────────────
|
||
UseCase("verbraucherschutz", "Verbraucherschutz", "document",
|
||
regulations=("Konsumentenschutzgesetz",
|
||
"Digitale-Inhalte-Richtlinie"),
|
||
verification_methods=("document",)),
|
||
UseCase("urheberrecht", "Urheberrecht", "document",
|
||
regulations=("UrhG", "DSM-Urheberrechtsrichtlinie"),
|
||
verification_methods=("document",)),
|
||
UseCase("wettbewerbsrecht", "Wettbewerbsrecht (UWG)", "document",
|
||
regulations=("UWG",), verification_methods=("document",)),
|
||
UseCase("gleichbehandlung", "Gleichbehandlung (AGG)", "document",
|
||
regulations=("AGG",),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("steuerrecht", "Steuerrecht", "document",
|
||
regulations=("Abgabenordnung", "BAO"),
|
||
verification_methods=("document", "it_process")),
|
||
UseCase("handelsrecht", "Handelsrecht", "document",
|
||
regulations=("HGB", "UGB", "ABGB"),
|
||
verification_methods=("document", "it_process")),
|
||
)
|
||
|
||
|
||
# Deterministischer Quell-Regulierung → Use-Case-Mapper.
|
||
# control_parent_links.source_regulation (117 distinct) → ~30 Domaenen.
|
||
# Reihenfolge = SPEZIFISCH zuerst (Substring, case-insensitive); der erste
|
||
# Treffer gewinnt. Die Datenschutz-Catch-alls (edpb/dsk/dsgvo) ganz hinten,
|
||
# damit spezifische Regeln (z.B. 'DSK OH Telemedien') zuerst greifen koennen.
|
||
_REGULATION_RULES: tuple[tuple[str, str], ...] = (
|
||
# Security / Code (Security-Team)
|
||
("owasp", "code_security"),
|
||
("nist sp 800-218", "code_security"),
|
||
("ssdf", "code_security"),
|
||
("nist sp 800-63", "code_security"),
|
||
("nistir 8259", "code_security"),
|
||
("cisa", "code_security"),
|
||
("nist sp 800-207", "network_security"),
|
||
("zero trust", "network_security"),
|
||
("nis2", "network_security"),
|
||
("nis-2", "network_security"),
|
||
("enisa", "network_security"),
|
||
("bsi-gesetz", "network_security"),
|
||
("bsig", "network_security"),
|
||
("cybersecurity act", "network_security"),
|
||
("kritis", "kritis"),
|
||
("nist cybersecurity framework", "isms"),
|
||
("nist sp 800-53", "isms"),
|
||
("digital operational resilience", "dora"),
|
||
("dora", "dora"),
|
||
# Produkt / Sektor
|
||
("cyber resilience act", "cra"),
|
||
("(cra)", "cra"),
|
||
("nist ai risk", "ai_act"),
|
||
("ki-verordnung", "ai_act"),
|
||
("ki-vo", "ai_act"),
|
||
("ai act", "ai_act"),
|
||
("oecd ki", "ai_act"),
|
||
("crypto-assets", "mica"),
|
||
("mica", "mica"),
|
||
("medizinprodukte", "mdr"),
|
||
("(mdr)", "mdr"),
|
||
("maschinenverordnung", "maschinen"),
|
||
("batterie", "batterie"),
|
||
("health data space", "ehds"),
|
||
("produktsicherheit", "produktsicherheit"),
|
||
("blue guide", "produktsicherheit"),
|
||
("digital services act", "dsa"),
|
||
("digital markets act", "dma"),
|
||
("data act", "data_governance"),
|
||
("data governance", "data_governance"),
|
||
("zahlungsdienste", "zahlungsdienste"),
|
||
("geldwaesche", "geldwaesche"),
|
||
("aml-verordnung", "geldwaesche"),
|
||
("lieferkettensorgfalt", "lieferkette"),
|
||
("lksg", "lieferkette"),
|
||
("hinweisgeberschutz", "whistleblowing"),
|
||
("hinschg", "whistleblowing"),
|
||
("accessibility act", "barrierefreiheit"),
|
||
# Website / Telemedien / Recht (User-Domaene)
|
||
("tdddg", "cookie_banner"),
|
||
("eprivacy", "cookie_banner"),
|
||
("telemedien", "impressum"),
|
||
("telekommunikationsgesetz", "impressum"),
|
||
("tkg", "impressum"),
|
||
("tmg", "impressum"),
|
||
("mediengesetz", "impressum"),
|
||
("gewerbeordnung", "impressum"),
|
||
("e-commerce", "agb"),
|
||
("digitale-inhalte", "agb"),
|
||
("konsumentenschutz", "verbraucherschutz"),
|
||
("urheberrecht", "urheberrecht"),
|
||
("urhg", "urheberrecht"),
|
||
("uwg", "wettbewerbsrecht"),
|
||
("handelsgesetzbuch", "handelsrecht"),
|
||
("hgb", "handelsrecht"),
|
||
("ugb", "handelsrecht"),
|
||
("abgb", "handelsrecht"),
|
||
("bgb", "agb"),
|
||
("gleichbehandlung", "gleichbehandlung"),
|
||
("(agg)", "gleichbehandlung"),
|
||
("abgabenordnung", "steuerrecht"),
|
||
("bao", "steuerrecht"),
|
||
("standardvertragsklauseln", "avv"),
|
||
("(scc)", "avv"),
|
||
# Datenschutz-Catch-alls (zuletzt)
|
||
("nist privacy framework", "dse"),
|
||
("dsgvo", "dse"),
|
||
("datenschutzgesetz", "dse"),
|
||
("bdsg", "dse"),
|
||
("edpb", "dse"),
|
||
("edps", "dse"),
|
||
("dsk ", "dse"),
|
||
("wp29", "dse"),
|
||
("bfdi", "dse"),
|
||
("data privacy framework", "dse"),
|
||
("datenschutz", "dse"),
|
||
)
|
||
|
||
|
||
def use_case_for_regulation(regulation: str | None) -> str | None:
|
||
"""Deterministisch: Quell-Regulierung → Domaenen-Use-Case (erster
|
||
Substring-Treffer). None wenn keine Regel passt (→ Fallback/Review)."""
|
||
if not regulation:
|
||
return None
|
||
low = regulation.lower()
|
||
for needle, uc in _REGULATION_RULES:
|
||
if needle in low:
|
||
return uc
|
||
return None
|
||
|
||
|
||
REGISTRY: dict[str, UseCase] = {uc.key: uc for uc in _USE_CASES}
|
||
|
||
|
||
# canonical_controls.evidence_type / .verification_method → unsere Methode
|
||
# (fuer den deterministischen Seed; der LLM-Pass verfeinert).
|
||
_EVIDENCE_TO_METHOD: dict[str, str] = {
|
||
"document": "document",
|
||
"code": "source_code",
|
||
"code_review": "source_code",
|
||
"process": "it_process",
|
||
"tool": "network",
|
||
"hybrid": "hybrid",
|
||
}
|
||
|
||
|
||
def _reverse(attr: str) -> dict[str, list[str]]:
|
||
out: dict[str, list[str]] = {}
|
||
for uc in _USE_CASES:
|
||
if not uc.enabled:
|
||
continue
|
||
for tok in getattr(uc, attr):
|
||
out.setdefault(tok, []).append(uc.key)
|
||
return out
|
||
|
||
|
||
scope_token_to_use_cases: dict[str, list[str]] = _reverse("scope_tokens")
|
||
category_to_use_cases: dict[str, list[str]] = _reverse("categories")
|
||
doc_type_to_use_cases: dict[str, list[str]] = _reverse("doc_types")
|
||
|
||
|
||
def is_valid_use_case(key: str) -> bool:
|
||
return key in REGISTRY and REGISTRY[key].enabled
|
||
|
||
|
||
def is_valid_verification_method(method: str) -> bool:
|
||
return method in VERIFICATION_METHODS
|
||
|
||
|
||
def evidence_to_verification_method(value: str | None) -> str | None:
|
||
"""Heuristik-Mapping fuer den Seed (None wenn unbekannt)."""
|
||
if not value:
|
||
return None
|
||
return _EVIDENCE_TO_METHOD.get(value.strip().lower())
|
||
|
||
|
||
def enabled_use_cases() -> list[UseCase]:
|
||
return [uc for uc in _USE_CASES if uc.enabled]
|
||
|
||
|
||
def primary_verification_method(use_case_key: str | None) -> str | None:
|
||
"""Repraesentative Verifikations-Methode eines Use Case (erste deklarierte)
|
||
— deterministischer Fallback, wenn die MC-Member kein evidence_type tragen."""
|
||
uc = REGISTRY.get(use_case_key) if use_case_key else None
|
||
return uc.verification_methods[0] if uc and uc.verification_methods else None
|
||
|
||
|
||
def seed_classify(
|
||
scopes=(), categories=(), vmethods=(), etypes=(),
|
||
) -> tuple[list[str], str | None]:
|
||
"""Deterministischer Seed (kein LLM): (use_cases, verification_method)
|
||
aus den aggregierten Member-Signalen einer Master Control —
|
||
scope_doc_type + category → Use Cases; verification_method/evidence_type
|
||
→ Methode. Pure → testbar."""
|
||
ucs: set[str] = set()
|
||
for s in scopes or ():
|
||
if s:
|
||
ucs.update(scope_token_to_use_cases.get(s, ()))
|
||
for c in categories or ():
|
||
if c:
|
||
ucs.update(category_to_use_cases.get(c, ()))
|
||
method: str | None = None
|
||
for v in list(vmethods or ()) + list(etypes or ()):
|
||
m = evidence_to_verification_method(v)
|
||
if m:
|
||
method = m
|
||
break
|
||
return sorted(ucs), method
|
||
|
||
|
||
def taxonomy_for_prompt() -> str:
|
||
"""Kompakter Anker-Block fuer den LLM-Klassifizierer (gecacht)."""
|
||
lines = ["USE CASES (key — Label — Regulierungen — Methoden):"]
|
||
for uc in enabled_use_cases():
|
||
lines.append(
|
||
f" {uc.key} — {uc.label} — {', '.join(uc.regulations) or '-'}"
|
||
f" — {', '.join(uc.verification_methods)}"
|
||
)
|
||
lines.append("VERIFIKATIONS-METHODEN: " + ", ".join(VERIFICATION_METHODS))
|
||
return "\n".join(lines)
|
||
|
||
|
||
def frontend_list() -> list[dict]:
|
||
"""Schlanke Liste fuers Frontend-Dropdown (Twin: use-case-registry.ts)."""
|
||
return [
|
||
{"key": uc.key, "label": uc.label, "group": uc.group,
|
||
"verification_methods": list(uc.verification_methods)}
|
||
for uc in enabled_use_cases()
|
||
]
|
||
|
||
|
||
def registry_hash() -> str:
|
||
"""Stabiler Hash → Re-Klassifizierung bei Taxonomie-Aenderung."""
|
||
payload = json.dumps(
|
||
[[uc.key, uc.group, list(uc.regulations),
|
||
list(uc.verification_methods), list(uc.doc_types),
|
||
list(uc.scope_tokens), list(uc.categories)]
|
||
for uc in _USE_CASES],
|
||
sort_keys=True, ensure_ascii=False,
|
||
)
|
||
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|