feat: Framework Decomposition Engine + Composite Detection for Pass 0b

Adds a routing layer between Pass 0a and Pass 0b that classifies obligations
into atomic/compound/framework_container. Framework-container obligations
(e.g. "CCM-Praktiken fuer AIS") are decomposed into concrete sub-obligations
via an internal framework registry before Pass 0b composition.

- New: framework_decomposition.py with routing, matching, decomposition
- New: Framework registry (NIST SP 800-53, OWASP ASVS, CSA CCM) as JSON
- New: Composite detection flags on atomic controls (is_composite, atomicity)
- New: gen_meta fields: framework_ref, framework_domain, decomposition_source
- Integration: _route_and_compose() in run_pass0b() deterministic path
- 248 tests (198 decomposition + 50 framework), all passing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-23 12:11:55 +01:00
parent 1a63f5857b
commit 48ca0a6bef
8 changed files with 2744 additions and 18 deletions

View File

@@ -1493,7 +1493,37 @@ def _normalize_object(object_raw: str) -> str:
return obj[:80] or "unknown"
# ── 7b. Output Validator (Negativregeln) ─────────────────────────────────
# ── 7b. Framework / Composite Detection ──────────────────────────────────
_FRAMEWORK_KEYWORDS: list[str] = [
"praktiken", "kontrollen gemäß", "maßnahmen gemäß", "anforderungen aus",
"anforderungen gemäß", "gemäß .+ umzusetzen", "framework", "standard",
"controls for", "practices for", "requirements from",
]
_COMPOSITE_OBJECT_KEYWORDS: list[str] = [
"ccm", "nist", "iso 27001", "iso 27002", "owasp", "bsi",
"cis controls", "cobit", "sox", "pci dss", "hitrust",
"soc 2", "soc2", "enisa", "kritis",
]
_COMPOSITE_RE = re.compile(
"|".join(_FRAMEWORK_KEYWORDS + _COMPOSITE_OBJECT_KEYWORDS),
re.IGNORECASE,
)
def _is_composite_obligation(obligation_text: str, object_: str) -> bool:
"""Detect framework-level / composite obligations that are NOT atomic.
Returns True if the obligation references a framework domain, standard,
or set of practices rather than a single auditable requirement.
"""
combined = f"{obligation_text} {object_}"
return bool(_COMPOSITE_RE.search(combined))
# ── 7c. Output Validator (Negativregeln) ─────────────────────────────────
def _validate_atomic_control(
atomic: "AtomicControlCandidate",
@@ -1544,6 +1574,9 @@ def _validate_atomic_control(
if object_class == "general":
issues.append("WARN: object_class is 'general' (unclassified)")
if getattr(atomic, "_is_composite", False):
issues.append("WARN: composite/framework obligation — requires further decomposition")
for issue in issues:
if issue.startswith("ERROR:"):
logger.warning("Validation: %s — title=%s", issue, atomic.title[:60])
@@ -1703,6 +1736,12 @@ def _compose_deterministic(
atomic._deadline_hours = deadline_hours # type: ignore[attr-defined]
atomic._frequency = frequency # type: ignore[attr-defined]
# ── Composite / Framework detection ───────────────────────
is_composite = _is_composite_obligation(obligation_text, object_)
atomic._is_composite = is_composite # type: ignore[attr-defined]
atomic._atomicity = "composite" if is_composite else "atomic" # type: ignore[attr-defined]
atomic._requires_decomposition = is_composite # type: ignore[attr-defined]
# ── Validate (log issues, never reject) ───────────────────
validation_issues = _validate_atomic_control(atomic, action_type, object_class)
atomic._validation_issues = validation_issues # type: ignore[attr-defined]
@@ -2403,23 +2442,7 @@ class DecompositionPass:
else:
# Deterministic engine — no LLM required
for obl in batch:
sub_actions = _split_compound_action(obl["action"])
for sub_action in sub_actions:
atomic = _compose_deterministic(
obligation_text=obl["obligation_text"],
action=sub_action,
object_=obl["object"],
parent_title=obl["parent_title"],
parent_severity=obl["parent_severity"],
parent_category=obl["parent_category"],
is_test=obl["is_test"],
is_reporting=obl["is_reporting"],
trigger_type=obl.get("trigger_type"),
condition=obl.get("condition"),
)
await self._process_pass0b_control(
obl, {}, stats, atomic=atomic,
)
await self._route_and_compose(obl, stats)
# Commit after each successful sub-batch
self.db.commit()
@@ -2435,6 +2458,107 @@ class DecompositionPass:
logger.info("Pass 0b: %s", stats)
return stats
async def _route_and_compose(
self, obl: dict, stats: dict,
) -> None:
"""Route an obligation through the framework detection layer,
then compose atomic controls.
Routing types:
- atomic: compose directly via _compose_deterministic
- compound: split compound verbs, compose each
- framework_container: decompose via framework registry,
then compose each sub-obligation
"""
from compliance.services.framework_decomposition import (
classify_routing,
decompose_framework_container,
)
routing = classify_routing(
obligation_text=obl["obligation_text"],
action_raw=obl["action"],
object_raw=obl["object"],
condition_raw=obl.get("condition"),
)
if routing.routing_type == "framework_container" and routing.framework_ref:
# Decompose framework container into sub-obligations
result = decompose_framework_container(
obligation_candidate_id=obl["candidate_id"],
parent_control_id=obl["parent_control_id"],
obligation_text=obl["obligation_text"],
framework_ref=routing.framework_ref,
framework_domain=routing.framework_domain,
)
stats.setdefault("framework_decomposed", 0)
stats.setdefault("framework_sub_obligations", 0)
if result.release_state == "decomposed" and result.decomposed_obligations:
stats["framework_decomposed"] += 1
stats["framework_sub_obligations"] += len(result.decomposed_obligations)
logger.info(
"Framework decomposition: %s%s/%s%d sub-obligations",
obl["candidate_id"], routing.framework_ref,
routing.framework_domain, len(result.decomposed_obligations),
)
# Compose each sub-obligation
for d_obl in result.decomposed_obligations:
sub_obl = {
**obl,
"obligation_text": d_obl.obligation_text,
"action": d_obl.action_raw,
"object": d_obl.object_raw,
}
sub_actions = _split_compound_action(sub_obl["action"])
for sub_action in sub_actions:
atomic = _compose_deterministic(
obligation_text=sub_obl["obligation_text"],
action=sub_action,
object_=sub_obl["object"],
parent_title=obl["parent_title"],
parent_severity=obl["parent_severity"],
parent_category=obl["parent_category"],
is_test=obl["is_test"],
is_reporting=obl["is_reporting"],
trigger_type=obl.get("trigger_type"),
condition=obl.get("condition"),
)
# Enrich gen_meta with framework info
atomic._framework_ref = routing.framework_ref # type: ignore[attr-defined]
atomic._framework_domain = routing.framework_domain # type: ignore[attr-defined]
atomic._framework_subcontrol_id = d_obl.subcontrol_id # type: ignore[attr-defined]
atomic._decomposition_source = "framework_decomposition" # type: ignore[attr-defined]
await self._process_pass0b_control(
obl, {}, stats, atomic=atomic,
)
return
else:
# Unmatched framework — fall through to normal composition
logger.warning(
"Framework decomposition unmatched: %s%s",
obl["candidate_id"], result.issues,
)
# Atomic or compound or unmatched framework: normal composition
sub_actions = _split_compound_action(obl["action"])
for sub_action in sub_actions:
atomic = _compose_deterministic(
obligation_text=obl["obligation_text"],
action=sub_action,
object_=obl["object"],
parent_title=obl["parent_title"],
parent_severity=obl["parent_severity"],
parent_category=obl["parent_category"],
is_test=obl["is_test"],
is_reporting=obl["is_reporting"],
trigger_type=obl.get("trigger_type"),
condition=obl.get("condition"),
)
await self._process_pass0b_control(
obl, {}, stats, atomic=atomic,
)
async def _process_pass0b_control(
self, obl: dict, parsed: dict, stats: dict,
atomic: Optional[AtomicControlCandidate] = None,
@@ -2855,6 +2979,13 @@ class DecompositionPass:
"deadline_hours": getattr(atomic, "_deadline_hours", None),
"frequency": getattr(atomic, "_frequency", None),
"validation_issues": getattr(atomic, "_validation_issues", []),
"is_composite": getattr(atomic, "_is_composite", False),
"atomicity": getattr(atomic, "_atomicity", "atomic"),
"requires_decomposition": getattr(atomic, "_requires_decomposition", False),
"framework_ref": getattr(atomic, "_framework_ref", None),
"framework_domain": getattr(atomic, "_framework_domain", None),
"framework_subcontrol_id": getattr(atomic, "_framework_subcontrol_id", None),
"decomposition_source": getattr(atomic, "_decomposition_source", "direct"),
}),
"framework_id": "14b1bdd2-abc7-4a43-adae-14471ee5c7cf",
},