"""Use-Case-Register — Single Source of Truth fuer Use Cases × Verifikations-
Methoden.

Jede Master Control wird auf >=1 Use Case (n:m) und genau eine Verifikations-
Methode gemappt. Use Cases sind NICHT nur dokumenten-basiert: >=50% sind
Source-Code / IT-Prozess (Code Security, Network Security, CRA, ISMS, TISAX).

Neuer Use Case = 1 Eintrag in `_USE_CASES`. Kein DB-Schema-Change noetig.
Dieses Modul ist die kanonische Quelle; die heute verstreuten doc_type-Listen
(rag_document_checker._DOC_TYPE_MAP, legacy_url_discovery._SLUG_FAMILY,
doc_type_classifier, Migration 145) werden spaeter test-gated darauf reduziert.
"""

from __future__ import annotations

import hashlib
import json
from dataclasses import dataclass


# Wie wird eine MC geprueft? — kanonische Verifikations-Methoden-Taxonomie.
VERIFICATION_METHODS: tuple[str, ...] = (
    "document",     # veroeffentlichtes Dokument lesen (Impressum, DSE, ...)
    "source_code",  # Repo/Code scannen (SAST, Secrets, Dependencies, Review)
    "network",      # Netzwerk/Infra scannen (Ports, TLS, Header, Config)
    "it_process",   # Prozess-/Nachweis-Review (Verfahren, Evidence)
    "hybrid",       # Kombination mehrerer Methoden
    "manual",       # menschliche Attestierung
)

USE_CASE_GROUPS: tuple[str, ...] = (
    "document", "security", "cross_cutting", "product",
)


@dataclass(frozen=True)
class UseCase:
    key: str                                # stabil, snake_case
    label: str
    group: str                              # USE_CASE_GROUPS
    regulations: tuple[str, ...] = ()
    verification_methods: tuple[str, ...] = ()
    doc_types: tuple[str, ...] = ()         # operative doc_type-Aliase (Doku-UCs)
    scope_tokens: tuple[str, ...] = ()      # canonical_controls.scope_doc_type
    categories: tuple[str, ...] = ()        # canonical_controls.category
    keyword_tokens: tuple[str, ...] = ()    # canonical_name/title-Stichwoerter
    enabled: bool = True


_USE_CASES: tuple[UseCase, ...] = (
    # ── Dokument-Use-Cases ──────────────────────────────────────────
    UseCase("impressum", "Impressum (§5 TMG/DDG)", "document",
            regulations=("TMG", "DDG", "MStV"),
            verification_methods=("document",),
            doc_types=("impressum",), scope_tokens=("impressum",),
            categories=("compliance",),
            keyword_tokens=("impressum", "anbieterkennzeichnung")),
    UseCase("dse", "Datenschutzerklärung", "document",
            regulations=("DSGVO",),
            verification_methods=("document",),
            doc_types=("dse",), scope_tokens=("dse",),
            categories=("privacy", "data_protection"),
            keyword_tokens=("datenschutz", "privacy")),
    UseCase("agb", "AGB", "document",
            regulations=("BGB",),
            verification_methods=("document",),
            doc_types=("agb",), scope_tokens=("agb",),
            categories=("compliance",),
            keyword_tokens=("geschäftsbedingungen", "agb")),
    UseCase("cookie_banner", "Cookie-Banner & -Richtlinie", "document",
            regulations=("TDDDG", "ePrivacy", "DSGVO"),
            verification_methods=("document", "source_code"),
            doc_types=("cookie",),
            scope_tokens=("cookie_richtlinie", "banner_implementation",
                          "cmp_audit"),
            categories=("privacy",),
            keyword_tokens=("cookie", "consent", "einwilligung")),
    UseCase("widerruf", "Widerrufsbelehrung", "document",
            regulations=("BGB",),
            verification_methods=("document",),
            doc_types=("widerruf",), scope_tokens=("widerruf",),
            categories=("compliance",),
            keyword_tokens=("widerruf", "widerrufsbelehrung")),
    UseCase("dsr", "Betroffenenrechte (DSR)", "document",
            regulations=("DSGVO",),
            verification_methods=("document", "it_process"),
            scope_tokens=("process",),
            categories=("privacy", "operations"),
            keyword_tokens=("betroffenenrecht", "auskunft", "löschung", "dsr")),
    UseCase("loeschkonzept", "Löschkonzept", "document",
            regulations=("DSGVO",),
            verification_methods=("document", "it_process"),
            doc_types=("loeschkonzept",),
            scope_tokens=("process", "accounting"),
            categories=("data_protection",),
            keyword_tokens=("löschung", "löschfrist", "aufbewahrung")),
    UseCase("avv", "Auftragsverarbeitung (AVV)", "document",
            regulations=("DSGVO",),
            verification_methods=("document",),
            doc_types=("avv",), scope_tokens=("avv", "jc"),
            categories=("compliance",),
            keyword_tokens=("auftragsverarbeitung", "avv")),
    UseCase("dsfa", "Datenschutz-Folgenabschätzung", "document",
            regulations=("DSGVO",),
            verification_methods=("document", "it_process"),
            doc_types=("dsfa",), scope_tokens=("tom", "process"),
            categories=("risk", "privacy"),
            keyword_tokens=("folgenabschätzung", "dsfa")),
    # ── Security / Code-Use-Cases ───────────────────────────────────
    UseCase("code_security", "Code Security", "security",
            regulations=("CRA", "OWASP", "ISO 27001"),
            verification_methods=("source_code", "hybrid"),
            categories=("testing", "application", "encryption",
                        "authentication", "identity"),
            keyword_tokens=("sast", "secret", "dependency", "vulnerability",
                            "injection", "code")),
    UseCase("network_security", "Network Security", "security",
            regulations=("ISO 27001", "BSI", "NIS2"),
            verification_methods=("network", "hybrid"),
            categories=("network", "system", "operations"),
            keyword_tokens=("firewall", "tls", "port", "segmentation",
                            "network", "header")),
    # ── Querschnitt / Multi-Methode ─────────────────────────────────
    UseCase("cra", "Cyber Resilience Act", "cross_cutting",
            regulations=("CRA",),
            verification_methods=("document", "source_code", "network",
                                  "it_process"),
            categories=("security", "supply_chain", "testing", "incident"),
            keyword_tokens=("cra", "sbom", "konformität", "produktsicherheit")),
    UseCase("isms", "ISMS (ISO 27001)", "cross_cutting",
            regulations=("ISO 27001",),
            verification_methods=("it_process", "document", "hybrid"),
            categories=("governance", "security", "operations", "incident"),
            keyword_tokens=("isms", "risikomanagement", "soa")),
    UseCase("tisax", "TISAX", "cross_cutting",
            regulations=("VDA ISA", "TISAX"),
            verification_methods=("it_process", "document", "network",
                                  "hybrid"),
            categories=("security", "governance", "operations"),
            keyword_tokens=("tisax", "vda", "prototypenschutz")),
    UseCase("kritis", "KRITIS / NIS2-Umsetzung", "security",
            regulations=("KRITIS-Dachgesetz", "BSI-KritisV", "BSIG"),
            verification_methods=("it_process", "network", "document")),
    UseCase("dora", "DORA (Digital Operational Resilience)",
            "cross_cutting", regulations=("DORA",),
            verification_methods=("it_process", "document", "network")),
    # ── Produkt-/Sektor-Use-Cases (je Quell-Regulierung) ────────────
    UseCase("ai_act", "KI-Verordnung (AI Act)", "product",
            regulations=("KI-Verordnung", "AI Act", "NIST AI RMF"),
            verification_methods=("document", "it_process", "source_code")),
    UseCase("mica", "Markets in Crypto-Assets (MiCA)", "product",
            regulations=("MiCA",),
            verification_methods=("document", "it_process")),
    UseCase("mdr", "Medizinprodukte (MDR)", "product",
            regulations=("MDR",),
            verification_methods=("document", "source_code", "it_process")),
    UseCase("maschinen", "Maschinenverordnung", "product",
            regulations=("Maschinenverordnung",),
            verification_methods=("document", "source_code", "it_process")),
    UseCase("batterie", "Batterieverordnung", "product",
            regulations=("Batterieverordnung", "Batteriegesetz"),
            verification_methods=("document", "it_process")),
    UseCase("ehds", "European Health Data Space", "product",
            regulations=("EHDS",),
            verification_methods=("document", "it_process", "source_code")),
    UseCase("produktsicherheit", "Produktsicherheit (GPSR)", "product",
            regulations=("Produktsicherheitsverordnung", "EU Blue Guide"),
            verification_methods=("document", "it_process")),
    UseCase("dsa", "Digital Services Act", "product",
            regulations=("Digital Services Act",),
            verification_methods=("document", "it_process")),
    UseCase("dma", "Digital Markets Act", "product",
            regulations=("Digital Markets Act",),
            verification_methods=("document", "it_process")),
    UseCase("data_governance", "Data Act / Data Governance Act", "product",
            regulations=("Data Act", "Data Governance Act"),
            verification_methods=("document", "it_process")),
    UseCase("zahlungsdienste", "Zahlungsdienste (PSD2)", "product",
            regulations=("Zahlungsdiensterichtlinie",),
            verification_methods=("document", "it_process", "source_code")),
    UseCase("geldwaesche", "Geldwäsche (AML/GwG)", "product",
            regulations=("AML-Verordnung", "GwG"),
            verification_methods=("document", "it_process")),
    UseCase("lieferkette", "Lieferkettensorgfalt (LkSG)", "product",
            regulations=("LkSG",),
            verification_methods=("document", "it_process")),
    UseCase("whistleblowing", "Hinweisgeberschutz (HinSchG)", "product",
            regulations=("HinSchG",),
            verification_methods=("document", "it_process")),
    UseCase("barrierefreiheit", "Barrierefreiheit (EAA)", "product",
            regulations=("European Accessibility Act",),
            verification_methods=("document", "source_code")),
    # ── Weitere Rechts-Use-Cases (Dokument) ─────────────────────────
    UseCase("verbraucherschutz", "Verbraucherschutz", "document",
            regulations=("Konsumentenschutzgesetz",
                         "Digitale-Inhalte-Richtlinie"),
            verification_methods=("document",)),
    UseCase("urheberrecht", "Urheberrecht", "document",
            regulations=("UrhG", "DSM-Urheberrechtsrichtlinie"),
            verification_methods=("document",)),
    UseCase("wettbewerbsrecht", "Wettbewerbsrecht (UWG)", "document",
            regulations=("UWG",), verification_methods=("document",)),
    UseCase("gleichbehandlung", "Gleichbehandlung (AGG)", "document",
            regulations=("AGG",),
            verification_methods=("document", "it_process")),
    UseCase("steuerrecht", "Steuerrecht", "document",
            regulations=("Abgabenordnung", "BAO"),
            verification_methods=("document", "it_process")),
    UseCase("handelsrecht", "Handelsrecht", "document",
            regulations=("HGB", "UGB", "ABGB"),
            verification_methods=("document", "it_process")),
)


# Deterministischer Quell-Regulierung → Use-Case-Mapper.
# control_parent_links.source_regulation (117 distinct) → ~30 Domaenen.
# Reihenfolge = SPEZIFISCH zuerst (Substring, case-insensitive); der erste
# Treffer gewinnt. Die Datenschutz-Catch-alls (edpb/dsk/dsgvo) ganz hinten,
# damit spezifische Regeln (z.B. 'DSK OH Telemedien') zuerst greifen koennen.
_REGULATION_RULES: tuple[tuple[str, str], ...] = (
    # Security / Code (Security-Team)
    ("owasp", "code_security"),
    ("nist sp 800-218", "code_security"),
    ("ssdf", "code_security"),
    ("nist sp 800-63", "code_security"),
    ("nistir 8259", "code_security"),
    ("cisa", "code_security"),
    ("nist sp 800-207", "network_security"),
    ("zero trust", "network_security"),
    ("nis2", "network_security"),
    ("nis-2", "network_security"),
    ("enisa", "network_security"),
    ("bsi-gesetz", "network_security"),
    ("bsig", "network_security"),
    ("cybersecurity act", "network_security"),
    ("kritis", "kritis"),
    ("nist cybersecurity framework", "isms"),
    ("nist sp 800-53", "isms"),
    ("digital operational resilience", "dora"),
    ("dora", "dora"),
    # Produkt / Sektor
    ("cyber resilience act", "cra"),
    ("(cra)", "cra"),
    ("nist ai risk", "ai_act"),
    ("ki-verordnung", "ai_act"),
    ("ki-vo", "ai_act"),
    ("ai act", "ai_act"),
    ("oecd ki", "ai_act"),
    ("crypto-assets", "mica"),
    ("mica", "mica"),
    ("medizinprodukte", "mdr"),
    ("(mdr)", "mdr"),
    ("maschinenverordnung", "maschinen"),
    ("batterie", "batterie"),
    ("health data space", "ehds"),
    ("produktsicherheit", "produktsicherheit"),
    ("blue guide", "produktsicherheit"),
    ("digital services act", "dsa"),
    ("digital markets act", "dma"),
    ("data act", "data_governance"),
    ("data governance", "data_governance"),
    ("zahlungsdienste", "zahlungsdienste"),
    ("geldwaesche", "geldwaesche"),
    ("aml-verordnung", "geldwaesche"),
    ("lieferkettensorgfalt", "lieferkette"),
    ("lksg", "lieferkette"),
    ("hinweisgeberschutz", "whistleblowing"),
    ("hinschg", "whistleblowing"),
    ("accessibility act", "barrierefreiheit"),
    # Website / Telemedien / Recht (User-Domaene)
    ("tdddg", "cookie_banner"),
    ("eprivacy", "cookie_banner"),
    ("telemedien", "impressum"),
    ("telekommunikationsgesetz", "impressum"),
    ("tkg", "impressum"),
    ("tmg", "impressum"),
    ("mediengesetz", "impressum"),
    ("gewerbeordnung", "impressum"),
    ("e-commerce", "agb"),
    ("digitale-inhalte", "agb"),
    ("konsumentenschutz", "verbraucherschutz"),
    ("urheberrecht", "urheberrecht"),
    ("urhg", "urheberrecht"),
    ("uwg", "wettbewerbsrecht"),
    ("handelsgesetzbuch", "handelsrecht"),
    ("hgb", "handelsrecht"),
    ("ugb", "handelsrecht"),
    ("abgb", "handelsrecht"),
    ("bgb", "agb"),
    ("gleichbehandlung", "gleichbehandlung"),
    ("(agg)", "gleichbehandlung"),
    ("abgabenordnung", "steuerrecht"),
    ("bao", "steuerrecht"),
    ("standardvertragsklauseln", "avv"),
    ("(scc)", "avv"),
    # Datenschutz-Catch-alls (zuletzt)
    ("nist privacy framework", "dse"),
    ("dsgvo", "dse"),
    ("datenschutzgesetz", "dse"),
    ("bdsg", "dse"),
    ("edpb", "dse"),
    ("edps", "dse"),
    ("dsk ", "dse"),
    ("wp29", "dse"),
    ("bfdi", "dse"),
    ("data privacy framework", "dse"),
    ("datenschutz", "dse"),
)


def use_case_for_regulation(regulation: str | None) -> str | None:
    """Deterministisch: Quell-Regulierung → Domaenen-Use-Case (erster
    Substring-Treffer). None wenn keine Regel passt (→ Fallback/Review)."""
    if not regulation:
        return None
    low = regulation.lower()
    for needle, uc in _REGULATION_RULES:
        if needle in low:
            return uc
    return None


REGISTRY: dict[str, UseCase] = {uc.key: uc for uc in _USE_CASES}


# canonical_controls.evidence_type / .verification_method → unsere Methode
# (fuer den deterministischen Seed; der LLM-Pass verfeinert).
_EVIDENCE_TO_METHOD: dict[str, str] = {
    "document": "document",
    "code": "source_code",
    "code_review": "source_code",
    "process": "it_process",
    "tool": "network",
    "hybrid": "hybrid",
}


def _reverse(attr: str) -> dict[str, list[str]]:
    out: dict[str, list[str]] = {}
    for uc in _USE_CASES:
        if not uc.enabled:
            continue
        for tok in getattr(uc, attr):
            out.setdefault(tok, []).append(uc.key)
    return out


scope_token_to_use_cases: dict[str, list[str]] = _reverse("scope_tokens")
category_to_use_cases: dict[str, list[str]] = _reverse("categories")
doc_type_to_use_cases: dict[str, list[str]] = _reverse("doc_types")


def is_valid_use_case(key: str) -> bool:
    return key in REGISTRY and REGISTRY[key].enabled


def is_valid_verification_method(method: str) -> bool:
    return method in VERIFICATION_METHODS


def evidence_to_verification_method(value: str | None) -> str | None:
    """Heuristik-Mapping fuer den Seed (None wenn unbekannt)."""
    if not value:
        return None
    return _EVIDENCE_TO_METHOD.get(value.strip().lower())


def enabled_use_cases() -> list[UseCase]:
    return [uc for uc in _USE_CASES if uc.enabled]


def primary_verification_method(use_case_key: str | None) -> str | None:
    """Repraesentative Verifikations-Methode eines Use Case (erste deklarierte)
    — deterministischer Fallback, wenn die MC-Member kein evidence_type tragen."""
    uc = REGISTRY.get(use_case_key) if use_case_key else None
    return uc.verification_methods[0] if uc and uc.verification_methods else None


def seed_classify(
    scopes=(), categories=(), vmethods=(), etypes=(),
) -> tuple[list[str], str | None]:
    """Deterministischer Seed (kein LLM): (use_cases, verification_method)
    aus den aggregierten Member-Signalen einer Master Control —
    scope_doc_type + category → Use Cases; verification_method/evidence_type
    → Methode. Pure → testbar."""
    ucs: set[str] = set()
    for s in scopes or ():
        if s:
            ucs.update(scope_token_to_use_cases.get(s, ()))
    for c in categories or ():
        if c:
            ucs.update(category_to_use_cases.get(c, ()))
    method: str | None = None
    for v in list(vmethods or ()) + list(etypes or ()):
        m = evidence_to_verification_method(v)
        if m:
            method = m
            break
    return sorted(ucs), method


def taxonomy_for_prompt() -> str:
    """Kompakter Anker-Block fuer den LLM-Klassifizierer (gecacht)."""
    lines = ["USE CASES (key — Label — Regulierungen — Methoden):"]
    for uc in enabled_use_cases():
        lines.append(
            f"  {uc.key} — {uc.label} — {', '.join(uc.regulations) or '-'}"
            f" — {', '.join(uc.verification_methods)}"
        )
    lines.append("VERIFIKATIONS-METHODEN: " + ", ".join(VERIFICATION_METHODS))
    return "\n".join(lines)


def frontend_list() -> list[dict]:
    """Schlanke Liste fuers Frontend-Dropdown (Twin: use-case-registry.ts)."""
    return [
        {"key": uc.key, "label": uc.label, "group": uc.group,
         "verification_methods": list(uc.verification_methods)}
        for uc in enabled_use_cases()
    ]


def registry_hash() -> str:
    """Stabiler Hash → Re-Klassifizierung bei Taxonomie-Aenderung."""
    payload = json.dumps(
        [[uc.key, uc.group, list(uc.regulations),
          list(uc.verification_methods), list(uc.doc_types),
          list(uc.scope_tokens), list(uc.categories)]
         for uc in _USE_CASES],
        sort_keys=True, ensure_ascii=False,
    )
    return hashlib.sha256(payload.encode("utf-8")).hexdigest()