feat: Framework Decomposition Engine + Composite Detection for Pass 0b

Adds a routing layer between Pass 0a and Pass 0b that classifies obligations into atomic/compound/framework_container. Framework-container obligations (e.g. "CCM-Praktiken fuer AIS") are decomposed into concrete sub-obligations via an internal framework registry before Pass 0b composition. - New: framework_decomposition.py with routing, matching, decomposition - New: Framework registry (NIST SP 800-53, OWASP ASVS, CSA CCM) as JSON - New: Composite detection flags on atomic controls (is_composite, atomicity) - New: gen_meta fields: framework_ref, framework_domain, decomposition_source - Integration: _route_and_compose() in run_pass0b() deterministic path - 248 tests (198 decomposition + 50 framework), all passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 12:11:55 +01:00
parent 1a63f5857b
commit 48ca0a6bef
8 changed files with 2744 additions and 18 deletions
@@ -0,0 +1,714 @@
+"""Framework Decomposition Engine — decomposes framework-container obligations.
+
+Sits between Pass 0a (obligation extraction) and Pass 0b (atomic control
+composition).  Detects obligations that reference a framework domain (e.g.
+"CCM-Praktiken fuer AIS") and decomposes them into concrete sub-obligations
+using an internal framework registry.
+
+Three routing types:
+    atomic              → pass through to Pass 0b unchanged
+    compound            → split compound verbs, then Pass 0b
+    framework_container → decompose via registry, then Pass 0b
+
+The registry is a set of JSON files under compliance/data/frameworks/.
+"""
+
+import json
+import logging
+import os
+import re
+import uuid
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Registry loading
+# ---------------------------------------------------------------------------
+
+_REGISTRY_DIR = Path(__file__).resolve().parent.parent / "data" / "frameworks"
+_REGISTRY: dict[str, dict] = {}  # framework_id → framework dict
+
+
+def _load_registry() -> dict[str, dict]:
+    """Load all framework JSON files from the registry directory."""
+    registry: dict[str, dict] = {}
+    if not _REGISTRY_DIR.is_dir():
+        logger.warning("Framework registry dir not found: %s", _REGISTRY_DIR)
+        return registry
+
+    for fpath in sorted(_REGISTRY_DIR.glob("*.json")):
+        try:
+            with open(fpath, encoding="utf-8") as f:
+                fw = json.load(f)
+            fw_id = fw.get("framework_id", fpath.stem)
+            registry[fw_id] = fw
+            logger.info(
+                "Loaded framework: %s (%d domains)",
+                fw_id,
+                len(fw.get("domains", [])),
+            )
+        except Exception:
+            logger.exception("Failed to load framework file: %s", fpath)
+    return registry
+
+
+def get_registry() -> dict[str, dict]:
+    """Return the global framework registry (lazy-loaded)."""
+    global _REGISTRY
+    if not _REGISTRY:
+        _REGISTRY = _load_registry()
+    return _REGISTRY
+
+
+def reload_registry() -> dict[str, dict]:
+    """Force-reload the framework registry from disk."""
+    global _REGISTRY
+    _REGISTRY = _load_registry()
+    return _REGISTRY
+
+
+# ---------------------------------------------------------------------------
+# Framework alias index (built from registry)
+# ---------------------------------------------------------------------------
+
+def _build_alias_index(registry: dict[str, dict]) -> dict[str, str]:
+    """Build a lowercase alias → framework_id lookup."""
+    idx: dict[str, str] = {}
+    for fw_id, fw in registry.items():
+        # Framework-level aliases
+        idx[fw_id.lower()] = fw_id
+        name = fw.get("display_name", "")
+        if name:
+            idx[name.lower()] = fw_id
+        # Common short forms
+        for part in fw_id.lower().replace("_", " ").split():
+            if len(part) >= 3:
+                idx[part] = fw_id
+    return idx
+
+
+# ---------------------------------------------------------------------------
+# Routing — classify obligation type
+# ---------------------------------------------------------------------------
+
+# Extended patterns for framework detection (beyond the simple _COMPOSITE_RE
+# in decomposition_pass.py — here we also capture the framework name)
+_FRAMEWORK_PATTERN = re.compile(
+    r"(?:praktiken|kontrollen|ma(?:ss|ß)nahmen|anforderungen|vorgaben|controls|practices|measures|requirements)"
+    r"\s+(?:f(?:ue|ü)r|aus|gem(?:ae|ä)(?:ss|ß)|nach|from|of|for|per)\s+"
+    r"(.+?)(?:\s+(?:m(?:ue|ü)ssen|sollen|sind|werden|implementieren|umsetzen|einf(?:ue|ü)hren)|\.|,|$)",
+    re.IGNORECASE,
+)
+
+# Direct framework name references
+_DIRECT_FRAMEWORK_RE = re.compile(
+    r"\b(?:CSA\s*CCM|NIST\s*(?:SP\s*)?800-53|OWASP\s*(?:ASVS|SAMM|Top\s*10)"
+    r"|CIS\s*Controls|BSI\s*(?:IT-)?Grundschutz|ENISA|ISO\s*2700[12]"
+    r"|COBIT|SOX|PCI\s*DSS|HITRUST|SOC\s*2|KRITIS)\b",
+    re.IGNORECASE,
+)
+
+# Compound verb patterns (multiple main verbs)
+_COMPOUND_VERB_RE = re.compile(
+    r"\b(?:und|sowie|als\s+auch|or|and)\b",
+    re.IGNORECASE,
+)
+
+# No-split phrases that look compound but aren't
+_NO_SPLIT_PHRASES = [
+    "pflegen und aufrechterhalten",
+    "dokumentieren und pflegen",
+    "definieren und dokumentieren",
+    "erstellen und freigeben",
+    "pruefen und genehmigen",
+    "identifizieren und bewerten",
+    "erkennen und melden",
+    "define and maintain",
+    "create and maintain",
+    "establish and maintain",
+    "monitor and review",
+    "detect and respond",
+]
+
+
+@dataclass
+class RoutingResult:
+    """Result of obligation routing classification."""
+    routing_type: str  # atomic | compound | framework_container | unknown_review
+    framework_ref: Optional[str] = None
+    framework_domain: Optional[str] = None
+    domain_title: Optional[str] = None
+    confidence: float = 0.0
+    reason: str = ""
+
+
+def classify_routing(
+    obligation_text: str,
+    action_raw: str,
+    object_raw: str,
+    condition_raw: Optional[str] = None,
+) -> RoutingResult:
+    """Classify an obligation into atomic / compound / framework_container."""
+    combined = f"{obligation_text} {object_raw}".lower()
+
+    # --- Step 1: Framework container detection ---
+    fw_result = _detect_framework(obligation_text, object_raw)
+    if fw_result.routing_type == "framework_container":
+        return fw_result
+
+    # --- Step 2: Compound verb detection ---
+    if _is_compound_obligation(action_raw, obligation_text):
+        return RoutingResult(
+            routing_type="compound",
+            confidence=0.7,
+            reason="multiple_main_verbs",
+        )
+
+    # --- Step 3: Default = atomic ---
+    return RoutingResult(
+        routing_type="atomic",
+        confidence=0.9,
+        reason="single_action_single_object",
+    )
+
+
+def _detect_framework(
+    obligation_text: str, object_raw: str,
+) -> RoutingResult:
+    """Detect if obligation references a framework domain."""
+    combined = f"{obligation_text} {object_raw}"
+    registry = get_registry()
+    alias_idx = _build_alias_index(registry)
+
+    # Strategy 1: direct framework name match
+    m = _DIRECT_FRAMEWORK_RE.search(combined)
+    if m:
+        fw_name = m.group(0).strip()
+        fw_id = _resolve_framework_id(fw_name, alias_idx, registry)
+        if fw_id:
+            domain_id, domain_title = _match_domain(
+                combined, registry[fw_id],
+            )
+            return RoutingResult(
+                routing_type="framework_container",
+                framework_ref=fw_id,
+                framework_domain=domain_id,
+                domain_title=domain_title,
+                confidence=0.95 if domain_id else 0.75,
+                reason=f"direct_framework_match:{fw_name}",
+            )
+        else:
+            # Framework name recognized but not in registry
+            return RoutingResult(
+                routing_type="framework_container",
+                framework_ref=None,
+                framework_domain=None,
+                confidence=0.6,
+                reason=f"direct_framework_match_no_registry:{fw_name}",
+            )
+
+    # Strategy 2: pattern match ("Praktiken fuer X")
+    m2 = _FRAMEWORK_PATTERN.search(combined)
+    if m2:
+        ref_text = m2.group(1).strip()
+        fw_id, domain_id, domain_title = _resolve_from_ref_text(
+            ref_text, registry, alias_idx,
+        )
+        if fw_id:
+            return RoutingResult(
+                routing_type="framework_container",
+                framework_ref=fw_id,
+                framework_domain=domain_id,
+                domain_title=domain_title,
+                confidence=0.85 if domain_id else 0.65,
+                reason=f"pattern_match:{ref_text}",
+            )
+
+    # Strategy 3: keyword-heavy object
+    if _has_framework_keywords(object_raw):
+        return RoutingResult(
+            routing_type="framework_container",
+            framework_ref=None,
+            framework_domain=None,
+            confidence=0.5,
+            reason="framework_keywords_in_object",
+        )
+
+    return RoutingResult(routing_type="atomic", confidence=0.0)
+
+
+def _resolve_framework_id(
+    name: str,
+    alias_idx: dict[str, str],
+    registry: dict[str, dict],
+) -> Optional[str]:
+    """Resolve a framework name to its registry ID."""
+    normalized = re.sub(r"\s+", " ", name.strip().lower())
+    # Direct alias match
+    if normalized in alias_idx:
+        return alias_idx[normalized]
+    # Try compact form (strip spaces, hyphens, underscores)
+    compact = re.sub(r"[\s_\-]+", "", normalized)
+    for alias, fw_id in alias_idx.items():
+        if re.sub(r"[\s_\-]+", "", alias) == compact:
+            return fw_id
+    # Substring match in display names
+    for fw_id, fw in registry.items():
+        display = fw.get("display_name", "").lower()
+        if normalized in display or display in normalized:
+            return fw_id
+    # Partial match: check if normalized contains any alias (for multi-word refs)
+    for alias, fw_id in alias_idx.items():
+        if len(alias) >= 4 and alias in normalized:
+            return fw_id
+    return None
+
+
+def _match_domain(
+    text: str, framework: dict,
+) -> tuple[Optional[str], Optional[str]]:
+    """Match a domain within a framework from text references."""
+    text_lower = text.lower()
+    best_id: Optional[str] = None
+    best_title: Optional[str] = None
+    best_score = 0
+
+    for domain in framework.get("domains", []):
+        score = 0
+        domain_id = domain["domain_id"]
+        title = domain.get("title", "")
+
+        # Exact domain ID match (e.g. "AIS")
+        if re.search(rf"\b{re.escape(domain_id)}\b", text, re.IGNORECASE):
+            score += 10
+
+        # Full title match
+        if title.lower() in text_lower:
+            score += 8
+
+        # Alias match
+        for alias in domain.get("aliases", []):
+            if alias.lower() in text_lower:
+                score += 6
+                break
+
+        # Keyword overlap
+        kw_hits = sum(
+            1 for kw in domain.get("keywords", [])
+            if kw.lower() in text_lower
+        )
+        score += kw_hits
+
+        if score > best_score:
+            best_score = score
+            best_id = domain_id
+            best_title = title
+
+    if best_score >= 3:
+        return best_id, best_title
+    return None, None
+
+
+def _resolve_from_ref_text(
+    ref_text: str,
+    registry: dict[str, dict],
+    alias_idx: dict[str, str],
+) -> tuple[Optional[str], Optional[str], Optional[str]]:
+    """Resolve framework + domain from a reference text like 'AIS' or 'Application Security'."""
+    ref_lower = ref_text.lower()
+
+    for fw_id, fw in registry.items():
+        for domain in fw.get("domains", []):
+            # Check domain ID
+            if domain["domain_id"].lower() in ref_lower:
+                return fw_id, domain["domain_id"], domain.get("title")
+            # Check title
+            if domain.get("title", "").lower() in ref_lower:
+                return fw_id, domain["domain_id"], domain.get("title")
+            # Check aliases
+            for alias in domain.get("aliases", []):
+                if alias.lower() in ref_lower or ref_lower in alias.lower():
+                    return fw_id, domain["domain_id"], domain.get("title")
+
+    return None, None, None
+
+
+_FRAMEWORK_KW_SET = {
+    "praktiken", "kontrollen", "massnahmen", "maßnahmen",
+    "anforderungen", "vorgaben", "framework", "standard",
+    "baseline", "katalog", "domain", "family", "category",
+    "practices", "controls", "measures", "requirements",
+}
+
+
+def _has_framework_keywords(text: str) -> bool:
+    """Check if text contains framework-indicator keywords."""
+    words = set(re.findall(r"[a-zäöüß]+", text.lower()))
+    return len(words & _FRAMEWORK_KW_SET) >= 2
+
+
+def _is_compound_obligation(action_raw: str, obligation_text: str) -> bool:
+    """Detect if the obligation has multiple competing main verbs."""
+    if not action_raw:
+        return False
+
+    action_lower = action_raw.lower().strip()
+
+    # Check no-split phrases first
+    for phrase in _NO_SPLIT_PHRASES:
+        if phrase in action_lower:
+            return False
+
+    # Must have a conjunction
+    if not _COMPOUND_VERB_RE.search(action_lower):
+        return False
+
+    # Split by conjunctions and check if we get 2+ meaningful verbs
+    parts = re.split(r"\b(?:und|sowie|als\s+auch|or|and)\b", action_lower)
+    meaningful = [p.strip() for p in parts if len(p.strip()) >= 3]
+    return len(meaningful) >= 2
+
+
+# ---------------------------------------------------------------------------
+# Framework Decomposition
+# ---------------------------------------------------------------------------
+
+@dataclass
+class DecomposedObligation:
+    """A concrete obligation derived from a framework container."""
+    obligation_candidate_id: str
+    parent_control_id: str
+    parent_framework_container_id: str
+    source_ref_law: str
+    source_ref_article: str
+    obligation_text: str
+    actor: str
+    action_raw: str
+    object_raw: str
+    condition_raw: Optional[str] = None
+    trigger_raw: Optional[str] = None
+    routing_type: str = "atomic"
+    release_state: str = "decomposed"
+    subcontrol_id: str = ""
+    # Metadata
+    action_hint: str = ""
+    object_hint: str = ""
+    object_class: str = ""
+    keywords: list[str] = field(default_factory=list)
+
+
+@dataclass
+class FrameworkDecompositionResult:
+    """Result of framework decomposition."""
+    framework_container_id: str
+    source_obligation_candidate_id: str
+    framework_ref: Optional[str]
+    framework_domain: Optional[str]
+    domain_title: Optional[str]
+    matched_subcontrols: list[str]
+    decomposition_confidence: float
+    release_state: str  # decomposed | unmatched | error
+    decomposed_obligations: list[DecomposedObligation]
+    issues: list[str]
+
+
+def decompose_framework_container(
+    obligation_candidate_id: str,
+    parent_control_id: str,
+    obligation_text: str,
+    framework_ref: Optional[str],
+    framework_domain: Optional[str],
+    actor: str = "organization",
+) -> FrameworkDecompositionResult:
+    """Decompose a framework-container obligation into concrete sub-obligations.
+
+    Steps:
+    1. Resolve framework from registry
+    2. Resolve domain within framework
+    3. Select relevant subcontrols (keyword filter or full domain)
+    4. Generate decomposed obligations
+    """
+    container_id = f"FWC-{uuid.uuid4().hex[:8]}"
+    registry = get_registry()
+    issues: list[str] = []
+
+    # Step 1: Resolve framework
+    fw = None
+    if framework_ref and framework_ref in registry:
+        fw = registry[framework_ref]
+    else:
+        # Try to find by name in text
+        fw, framework_ref = _find_framework_in_text(obligation_text, registry)
+
+    if not fw:
+        issues.append("ERROR: framework_not_matched")
+        return FrameworkDecompositionResult(
+            framework_container_id=container_id,
+            source_obligation_candidate_id=obligation_candidate_id,
+            framework_ref=framework_ref,
+            framework_domain=framework_domain,
+            domain_title=None,
+            matched_subcontrols=[],
+            decomposition_confidence=0.0,
+            release_state="unmatched",
+            decomposed_obligations=[],
+            issues=issues,
+        )
+
+    # Step 2: Resolve domain
+    domain_data = None
+    domain_title = None
+    if framework_domain:
+        for d in fw.get("domains", []):
+            if d["domain_id"].lower() == framework_domain.lower():
+                domain_data = d
+                domain_title = d.get("title")
+                break
+    if not domain_data:
+        # Try matching from text
+        domain_id, domain_title = _match_domain(obligation_text, fw)
+        if domain_id:
+            for d in fw.get("domains", []):
+                if d["domain_id"] == domain_id:
+                    domain_data = d
+                    framework_domain = domain_id
+                    break
+
+    if not domain_data:
+        issues.append("WARN: domain_not_matched — using all domains")
+        # Fall back to all subcontrols across all domains
+        all_subcontrols = []
+        for d in fw.get("domains", []):
+            for sc in d.get("subcontrols", []):
+                sc["_domain_id"] = d["domain_id"]
+                all_subcontrols.append(sc)
+        subcontrols = _select_subcontrols(obligation_text, all_subcontrols)
+        if not subcontrols:
+            issues.append("ERROR: no_subcontrols_matched")
+            return FrameworkDecompositionResult(
+                framework_container_id=container_id,
+                source_obligation_candidate_id=obligation_candidate_id,
+                framework_ref=framework_ref,
+                framework_domain=framework_domain,
+                domain_title=None,
+                matched_subcontrols=[],
+                decomposition_confidence=0.0,
+                release_state="unmatched",
+                decomposed_obligations=[],
+                issues=issues,
+            )
+    else:
+        # Step 3: Select subcontrols from domain
+        raw_subcontrols = domain_data.get("subcontrols", [])
+        subcontrols = _select_subcontrols(obligation_text, raw_subcontrols)
+        if not subcontrols:
+            # Full domain decomposition
+            subcontrols = raw_subcontrols
+
+    # Quality check: too many subcontrols
+    if len(subcontrols) > 25:
+        issues.append(f"WARN: {len(subcontrols)} subcontrols — may be too broad")
+
+    # Step 4: Generate decomposed obligations
+    display_name = fw.get("display_name", framework_ref or "Unknown")
+    decomposed: list[DecomposedObligation] = []
+    matched_ids: list[str] = []
+
+    for sc in subcontrols:
+        sc_id = sc.get("subcontrol_id", "")
+        matched_ids.append(sc_id)
+
+        action_hint = sc.get("action_hint", "")
+        object_hint = sc.get("object_hint", "")
+
+        # Quality warnings
+        if not action_hint:
+            issues.append(f"WARN: {sc_id} missing action_hint")
+        if not object_hint:
+            issues.append(f"WARN: {sc_id} missing object_hint")
+
+        obl_id = f"{obligation_candidate_id}-{sc_id}"
+
+        decomposed.append(DecomposedObligation(
+            obligation_candidate_id=obl_id,
+            parent_control_id=parent_control_id,
+            parent_framework_container_id=container_id,
+            source_ref_law=display_name,
+            source_ref_article=sc_id,
+            obligation_text=sc.get("statement", ""),
+            actor=actor,
+            action_raw=action_hint or _infer_action(sc.get("statement", "")),
+            object_raw=object_hint or _infer_object(sc.get("statement", "")),
+            routing_type="atomic",
+            release_state="decomposed",
+            subcontrol_id=sc_id,
+            action_hint=action_hint,
+            object_hint=object_hint,
+            object_class=sc.get("object_class", ""),
+            keywords=sc.get("keywords", []),
+        ))
+
+    # Check if decomposed are identical to container
+    for d in decomposed:
+        if d.obligation_text.strip() == obligation_text.strip():
+            issues.append(f"WARN: {d.subcontrol_id} identical to container text")
+
+    confidence = _compute_decomposition_confidence(
+        framework_ref, framework_domain, domain_data, len(subcontrols), issues,
+    )
+
+    return FrameworkDecompositionResult(
+        framework_container_id=container_id,
+        source_obligation_candidate_id=obligation_candidate_id,
+        framework_ref=framework_ref,
+        framework_domain=framework_domain,
+        domain_title=domain_title,
+        matched_subcontrols=matched_ids,
+        decomposition_confidence=confidence,
+        release_state="decomposed",
+        decomposed_obligations=decomposed,
+        issues=issues,
+    )
+
+
+def _find_framework_in_text(
+    text: str, registry: dict[str, dict],
+) -> tuple[Optional[dict], Optional[str]]:
+    """Try to find a framework by searching text for known names."""
+    alias_idx = _build_alias_index(registry)
+    m = _DIRECT_FRAMEWORK_RE.search(text)
+    if m:
+        fw_id = _resolve_framework_id(m.group(0), alias_idx, registry)
+        if fw_id and fw_id in registry:
+            return registry[fw_id], fw_id
+    return None, None
+
+
+def _select_subcontrols(
+    obligation_text: str, subcontrols: list[dict],
+) -> list[dict]:
+    """Select relevant subcontrols based on keyword matching.
+
+    Returns empty list if no targeted match found (caller falls back to
+    full domain).
+    """
+    text_lower = obligation_text.lower()
+    scored: list[tuple[int, dict]] = []
+
+    for sc in subcontrols:
+        score = 0
+        for kw in sc.get("keywords", []):
+            if kw.lower() in text_lower:
+                score += 1
+        # Title match
+        title = sc.get("title", "").lower()
+        if title and title in text_lower:
+            score += 3
+        # Object hint in text
+        obj = sc.get("object_hint", "").lower()
+        if obj and obj in text_lower:
+            score += 2
+
+        if score > 0:
+            scored.append((score, sc))
+
+    if not scored:
+        return []
+
+    # Only return those with meaningful overlap (score >= 2)
+    scored.sort(key=lambda x: x[0], reverse=True)
+    return [sc for score, sc in scored if score >= 2]
+
+
+def _infer_action(statement: str) -> str:
+    """Infer a basic action verb from a statement."""
+    s = statement.lower()
+    if any(w in s for w in ["definiert", "definieren", "define"]):
+        return "definieren"
+    if any(w in s for w in ["implementiert", "implementieren", "implement"]):
+        return "implementieren"
+    if any(w in s for w in ["dokumentiert", "dokumentieren", "document"]):
+        return "dokumentieren"
+    if any(w in s for w in ["ueberwacht", "ueberwachen", "monitor"]):
+        return "ueberwachen"
+    if any(w in s for w in ["getestet", "testen", "test"]):
+        return "testen"
+    if any(w in s for w in ["geschuetzt", "schuetzen", "protect"]):
+        return "implementieren"
+    if any(w in s for w in ["verwaltet", "verwalten", "manage"]):
+        return "pflegen"
+    if any(w in s for w in ["gemeldet", "melden", "report"]):
+        return "melden"
+    return "implementieren"
+
+
+def _infer_object(statement: str) -> str:
+    """Infer the primary object from a statement (first noun phrase)."""
+    # Simple heuristic: take the text after "muessen"/"muss" up to the verb
+    m = re.search(
+        r"(?:muessen|muss|m(?:ü|ue)ssen)\s+(.+?)(?:\s+werden|\s+sein|\.|,|$)",
+        statement,
+        re.IGNORECASE,
+    )
+    if m:
+        return m.group(1).strip()[:80]
+    # Fallback: first 80 chars
+    return statement[:80] if statement else ""
+
+
+def _compute_decomposition_confidence(
+    framework_ref: Optional[str],
+    domain: Optional[str],
+    domain_data: Optional[dict],
+    num_subcontrols: int,
+    issues: list[str],
+) -> float:
+    """Compute confidence score for the decomposition."""
+    score = 0.3
+    if framework_ref:
+        score += 0.25
+    if domain:
+        score += 0.20
+    if domain_data:
+        score += 0.10
+    if 1 <= num_subcontrols <= 15:
+        score += 0.10
+    elif num_subcontrols > 15:
+        score += 0.05  # less confident with too many
+
+    # Penalize errors
+    errors = sum(1 for i in issues if i.startswith("ERROR:"))
+    score -= errors * 0.15
+    return round(max(min(score, 1.0), 0.0), 2)
+
+
+# ---------------------------------------------------------------------------
+# Registry statistics (for admin/debugging)
+# ---------------------------------------------------------------------------
+
+def registry_stats() -> dict:
+    """Return summary statistics about the loaded registry."""
+    reg = get_registry()
+    stats = {
+        "frameworks": len(reg),
+        "details": [],
+    }
+    total_domains = 0
+    total_subcontrols = 0
+    for fw_id, fw in reg.items():
+        domains = fw.get("domains", [])
+        n_sc = sum(len(d.get("subcontrols", [])) for d in domains)
+        total_domains += len(domains)
+        total_subcontrols += n_sc
+        stats["details"].append({
+            "framework_id": fw_id,
+            "display_name": fw.get("display_name", ""),
+            "domains": len(domains),
+            "subcontrols": n_sc,
+        })
+    stats["total_domains"] = total_domains
+    stats["total_subcontrols"] = total_subcontrols
+    return stats