feat: Framework Decomposition Engine + Composite Detection for Pass 0b
Adds a routing layer between Pass 0a and Pass 0b that classifies obligations into atomic/compound/framework_container. Framework-container obligations (e.g. "CCM-Praktiken fuer AIS") are decomposed into concrete sub-obligations via an internal framework registry before Pass 0b composition. - New: framework_decomposition.py with routing, matching, decomposition - New: Framework registry (NIST SP 800-53, OWASP ASVS, CSA CCM) as JSON - New: Composite detection flags on atomic controls (is_composite, atomicity) - New: gen_meta fields: framework_ref, framework_domain, decomposition_source - Integration: _route_and_compose() in run_pass0b() deterministic path - 248 tests (198 decomposition + 50 framework), all passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1493,7 +1493,37 @@ def _normalize_object(object_raw: str) -> str:
|
||||
return obj[:80] or "unknown"
|
||||
|
||||
|
||||
# ── 7b. Output Validator (Negativregeln) ─────────────────────────────────
|
||||
# ── 7b. Framework / Composite Detection ──────────────────────────────────
|
||||
|
||||
_FRAMEWORK_KEYWORDS: list[str] = [
|
||||
"praktiken", "kontrollen gemäß", "maßnahmen gemäß", "anforderungen aus",
|
||||
"anforderungen gemäß", "gemäß .+ umzusetzen", "framework", "standard",
|
||||
"controls for", "practices for", "requirements from",
|
||||
]
|
||||
|
||||
_COMPOSITE_OBJECT_KEYWORDS: list[str] = [
|
||||
"ccm", "nist", "iso 27001", "iso 27002", "owasp", "bsi",
|
||||
"cis controls", "cobit", "sox", "pci dss", "hitrust",
|
||||
"soc 2", "soc2", "enisa", "kritis",
|
||||
]
|
||||
|
||||
_COMPOSITE_RE = re.compile(
|
||||
"|".join(_FRAMEWORK_KEYWORDS + _COMPOSITE_OBJECT_KEYWORDS),
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _is_composite_obligation(obligation_text: str, object_: str) -> bool:
|
||||
"""Detect framework-level / composite obligations that are NOT atomic.
|
||||
|
||||
Returns True if the obligation references a framework domain, standard,
|
||||
or set of practices rather than a single auditable requirement.
|
||||
"""
|
||||
combined = f"{obligation_text} {object_}"
|
||||
return bool(_COMPOSITE_RE.search(combined))
|
||||
|
||||
|
||||
# ── 7c. Output Validator (Negativregeln) ─────────────────────────────────
|
||||
|
||||
def _validate_atomic_control(
|
||||
atomic: "AtomicControlCandidate",
|
||||
@@ -1544,6 +1574,9 @@ def _validate_atomic_control(
|
||||
if object_class == "general":
|
||||
issues.append("WARN: object_class is 'general' (unclassified)")
|
||||
|
||||
if getattr(atomic, "_is_composite", False):
|
||||
issues.append("WARN: composite/framework obligation — requires further decomposition")
|
||||
|
||||
for issue in issues:
|
||||
if issue.startswith("ERROR:"):
|
||||
logger.warning("Validation: %s — title=%s", issue, atomic.title[:60])
|
||||
@@ -1703,6 +1736,12 @@ def _compose_deterministic(
|
||||
atomic._deadline_hours = deadline_hours # type: ignore[attr-defined]
|
||||
atomic._frequency = frequency # type: ignore[attr-defined]
|
||||
|
||||
# ── Composite / Framework detection ───────────────────────
|
||||
is_composite = _is_composite_obligation(obligation_text, object_)
|
||||
atomic._is_composite = is_composite # type: ignore[attr-defined]
|
||||
atomic._atomicity = "composite" if is_composite else "atomic" # type: ignore[attr-defined]
|
||||
atomic._requires_decomposition = is_composite # type: ignore[attr-defined]
|
||||
|
||||
# ── Validate (log issues, never reject) ───────────────────
|
||||
validation_issues = _validate_atomic_control(atomic, action_type, object_class)
|
||||
atomic._validation_issues = validation_issues # type: ignore[attr-defined]
|
||||
@@ -2403,23 +2442,7 @@ class DecompositionPass:
|
||||
else:
|
||||
# Deterministic engine — no LLM required
|
||||
for obl in batch:
|
||||
sub_actions = _split_compound_action(obl["action"])
|
||||
for sub_action in sub_actions:
|
||||
atomic = _compose_deterministic(
|
||||
obligation_text=obl["obligation_text"],
|
||||
action=sub_action,
|
||||
object_=obl["object"],
|
||||
parent_title=obl["parent_title"],
|
||||
parent_severity=obl["parent_severity"],
|
||||
parent_category=obl["parent_category"],
|
||||
is_test=obl["is_test"],
|
||||
is_reporting=obl["is_reporting"],
|
||||
trigger_type=obl.get("trigger_type"),
|
||||
condition=obl.get("condition"),
|
||||
)
|
||||
await self._process_pass0b_control(
|
||||
obl, {}, stats, atomic=atomic,
|
||||
)
|
||||
await self._route_and_compose(obl, stats)
|
||||
|
||||
# Commit after each successful sub-batch
|
||||
self.db.commit()
|
||||
@@ -2435,6 +2458,107 @@ class DecompositionPass:
|
||||
logger.info("Pass 0b: %s", stats)
|
||||
return stats
|
||||
|
||||
async def _route_and_compose(
|
||||
self, obl: dict, stats: dict,
|
||||
) -> None:
|
||||
"""Route an obligation through the framework detection layer,
|
||||
then compose atomic controls.
|
||||
|
||||
Routing types:
|
||||
- atomic: compose directly via _compose_deterministic
|
||||
- compound: split compound verbs, compose each
|
||||
- framework_container: decompose via framework registry,
|
||||
then compose each sub-obligation
|
||||
"""
|
||||
from compliance.services.framework_decomposition import (
|
||||
classify_routing,
|
||||
decompose_framework_container,
|
||||
)
|
||||
|
||||
routing = classify_routing(
|
||||
obligation_text=obl["obligation_text"],
|
||||
action_raw=obl["action"],
|
||||
object_raw=obl["object"],
|
||||
condition_raw=obl.get("condition"),
|
||||
)
|
||||
|
||||
if routing.routing_type == "framework_container" and routing.framework_ref:
|
||||
# Decompose framework container into sub-obligations
|
||||
result = decompose_framework_container(
|
||||
obligation_candidate_id=obl["candidate_id"],
|
||||
parent_control_id=obl["parent_control_id"],
|
||||
obligation_text=obl["obligation_text"],
|
||||
framework_ref=routing.framework_ref,
|
||||
framework_domain=routing.framework_domain,
|
||||
)
|
||||
stats.setdefault("framework_decomposed", 0)
|
||||
stats.setdefault("framework_sub_obligations", 0)
|
||||
|
||||
if result.release_state == "decomposed" and result.decomposed_obligations:
|
||||
stats["framework_decomposed"] += 1
|
||||
stats["framework_sub_obligations"] += len(result.decomposed_obligations)
|
||||
logger.info(
|
||||
"Framework decomposition: %s → %s/%s → %d sub-obligations",
|
||||
obl["candidate_id"], routing.framework_ref,
|
||||
routing.framework_domain, len(result.decomposed_obligations),
|
||||
)
|
||||
# Compose each sub-obligation
|
||||
for d_obl in result.decomposed_obligations:
|
||||
sub_obl = {
|
||||
**obl,
|
||||
"obligation_text": d_obl.obligation_text,
|
||||
"action": d_obl.action_raw,
|
||||
"object": d_obl.object_raw,
|
||||
}
|
||||
sub_actions = _split_compound_action(sub_obl["action"])
|
||||
for sub_action in sub_actions:
|
||||
atomic = _compose_deterministic(
|
||||
obligation_text=sub_obl["obligation_text"],
|
||||
action=sub_action,
|
||||
object_=sub_obl["object"],
|
||||
parent_title=obl["parent_title"],
|
||||
parent_severity=obl["parent_severity"],
|
||||
parent_category=obl["parent_category"],
|
||||
is_test=obl["is_test"],
|
||||
is_reporting=obl["is_reporting"],
|
||||
trigger_type=obl.get("trigger_type"),
|
||||
condition=obl.get("condition"),
|
||||
)
|
||||
# Enrich gen_meta with framework info
|
||||
atomic._framework_ref = routing.framework_ref # type: ignore[attr-defined]
|
||||
atomic._framework_domain = routing.framework_domain # type: ignore[attr-defined]
|
||||
atomic._framework_subcontrol_id = d_obl.subcontrol_id # type: ignore[attr-defined]
|
||||
atomic._decomposition_source = "framework_decomposition" # type: ignore[attr-defined]
|
||||
await self._process_pass0b_control(
|
||||
obl, {}, stats, atomic=atomic,
|
||||
)
|
||||
return
|
||||
else:
|
||||
# Unmatched framework — fall through to normal composition
|
||||
logger.warning(
|
||||
"Framework decomposition unmatched: %s — %s",
|
||||
obl["candidate_id"], result.issues,
|
||||
)
|
||||
|
||||
# Atomic or compound or unmatched framework: normal composition
|
||||
sub_actions = _split_compound_action(obl["action"])
|
||||
for sub_action in sub_actions:
|
||||
atomic = _compose_deterministic(
|
||||
obligation_text=obl["obligation_text"],
|
||||
action=sub_action,
|
||||
object_=obl["object"],
|
||||
parent_title=obl["parent_title"],
|
||||
parent_severity=obl["parent_severity"],
|
||||
parent_category=obl["parent_category"],
|
||||
is_test=obl["is_test"],
|
||||
is_reporting=obl["is_reporting"],
|
||||
trigger_type=obl.get("trigger_type"),
|
||||
condition=obl.get("condition"),
|
||||
)
|
||||
await self._process_pass0b_control(
|
||||
obl, {}, stats, atomic=atomic,
|
||||
)
|
||||
|
||||
async def _process_pass0b_control(
|
||||
self, obl: dict, parsed: dict, stats: dict,
|
||||
atomic: Optional[AtomicControlCandidate] = None,
|
||||
@@ -2855,6 +2979,13 @@ class DecompositionPass:
|
||||
"deadline_hours": getattr(atomic, "_deadline_hours", None),
|
||||
"frequency": getattr(atomic, "_frequency", None),
|
||||
"validation_issues": getattr(atomic, "_validation_issues", []),
|
||||
"is_composite": getattr(atomic, "_is_composite", False),
|
||||
"atomicity": getattr(atomic, "_atomicity", "atomic"),
|
||||
"requires_decomposition": getattr(atomic, "_requires_decomposition", False),
|
||||
"framework_ref": getattr(atomic, "_framework_ref", None),
|
||||
"framework_domain": getattr(atomic, "_framework_domain", None),
|
||||
"framework_subcontrol_id": getattr(atomic, "_framework_subcontrol_id", None),
|
||||
"decomposition_source": getattr(atomic, "_decomposition_source", "direct"),
|
||||
}),
|
||||
"framework_id": "14b1bdd2-abc7-4a43-adae-14471ee5c7cf",
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user