feat(pipeline): implement Control Dependency Engine (Block 9)
Core engine (dependency_engine.py): - 5 dependency types: prerequisite, supersedes, compensating_control, conditional_requirement, scope_exclusion - Generic condition evaluator (JSONB rules with AND/OR/NOT/field ops) - Priority-based conflict resolution - Cycle detection (DFS) + topological sort - Full evaluation with MCP-compatible dependency_resolution trace - 39 tests all passing (incl. GHV scenario from user requirements) Automatic generator (dependency_generator.py): - Ontology-based: same normalized_object + phase sequence -> prerequisite - Pattern-based: define->implement, implement->monitor, etc. - Domain packs: YAML rules for GDPR, AI Act, CRA, Security, Labor Contracts - 14 tests all passing API routes (dependency_routes.py): - CRUD for dependencies - POST /evaluate with dependency resolution - POST /generate (auto-generation with dry_run) - POST /validate (cycle detection) - GET /graph (nodes + edges for visualization) Prompt enhancement (decomposition_pass.py): - Added dependency_hints + lifecycle_phase_order to Pass 0b prompt - Stored in generation_metadata for post-processing DB migration: control_dependencies + control_evaluation_results tables 126 tests total, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -351,3 +351,33 @@ def build_canonical_key(
|
||||
if asset_scope:
|
||||
parts.append(asset_scope)
|
||||
return ":".join(parts)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PHASE ORDERING (for dependency engine — lifecycle sequence)
|
||||
# ============================================================================
|
||||
|
||||
PHASE_ORDER: dict[str, int] = {
|
||||
"scope": 1,
|
||||
"definition": 2,
|
||||
"governance": 2,
|
||||
"design": 3,
|
||||
"implementation": 4,
|
||||
"configuration": 5,
|
||||
"operation": 6,
|
||||
"training": 6,
|
||||
"monitoring": 7,
|
||||
"testing": 8,
|
||||
"review": 9,
|
||||
"assessment": 10,
|
||||
"remediation": 10,
|
||||
"validation": 11,
|
||||
"reporting": 12,
|
||||
"evidence": 13,
|
||||
}
|
||||
|
||||
|
||||
def get_phase_order(action_type: str) -> int:
|
||||
"""Get the lifecycle phase order for an action_type (1-13)."""
|
||||
phase = get_phase(action_type)
|
||||
return PHASE_ORDER.get(phase, 6) # default: operation (middle)
|
||||
|
||||
@@ -220,6 +220,9 @@ class AtomicControlCandidate:
|
||||
pass_criteria: list = field(default_factory=list)
|
||||
fail_criteria: list = field(default_factory=list)
|
||||
check_type: str = ""
|
||||
# Dependency Engine Felder
|
||||
dependency_hints: list = field(default_factory=list)
|
||||
lifecycle_phase_order: int = 0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
@@ -238,6 +241,8 @@ class AtomicControlCandidate:
|
||||
"pass_criteria": self.pass_criteria,
|
||||
"fail_criteria": self.fail_criteria,
|
||||
"check_type": self.check_type,
|
||||
"dependency_hints": self.dependency_hints,
|
||||
"lifecycle_phase_order": self.lifecycle_phase_order,
|
||||
}
|
||||
|
||||
|
||||
@@ -2133,7 +2138,9 @@ Antworte als JSON:
|
||||
"severity": "critical|high|medium|low",
|
||||
"category": "security|privacy|governance|operations|finance|reporting",
|
||||
"check_type": "technical_config_check|document_clause_check|code_pattern_check|evidence_check|interview_required",
|
||||
"merge_key": "action_type:normalized_object:control_phase"
|
||||
"merge_key": "action_type:normalized_object:control_phase",
|
||||
"dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
|
||||
"lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)"
|
||||
}}"""
|
||||
|
||||
|
||||
@@ -2229,7 +2236,9 @@ Jedes Control hat dieses Format:
|
||||
"severity": "critical|high|medium|low",
|
||||
"category": "security|privacy|governance|operations|finance|reporting",
|
||||
"check_type": "technical_config_check|document_clause_check|code_pattern_check|evidence_check|interview_required",
|
||||
"merge_key": "action_type:normalized_object:control_phase"
|
||||
"merge_key": "action_type:normalized_object:control_phase",
|
||||
"dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
|
||||
"lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)"
|
||||
}}"""
|
||||
|
||||
|
||||
@@ -2971,6 +2980,8 @@ class DecompositionPass:
|
||||
pass_criteria=_ensure_list(parsed.get("pass_criteria", [])),
|
||||
fail_criteria=_ensure_list(parsed.get("fail_criteria", [])),
|
||||
check_type=parsed.get("check_type", ""),
|
||||
dependency_hints=_ensure_list(parsed.get("dependency_hints", [])),
|
||||
lifecycle_phase_order=int(parsed.get("lifecycle_phase_order", 0) or 0),
|
||||
)
|
||||
# Store merge_key from LLM output in metadata
|
||||
llm_merge_key = parsed.get("merge_key", "")
|
||||
@@ -2980,6 +2991,12 @@ class DecompositionPass:
|
||||
atomic.parent_control_uuid = obl["parent_uuid"]
|
||||
atomic.obligation_candidate_id = obl["candidate_id"]
|
||||
|
||||
# Set lifecycle_phase_order deterministically if not set by LLM
|
||||
if not atomic.lifecycle_phase_order:
|
||||
from services.control_ontology import classify_action, get_phase_order
|
||||
action_type = classify_action(obl.get("action", "") or atomic.title)
|
||||
atomic.lifecycle_phase_order = get_phase_order(action_type)
|
||||
|
||||
# Cap severity for implementation-specific obligations
|
||||
if obl.get("is_implementation_specific") and atomic.severity in (
|
||||
"critical", "high"
|
||||
@@ -3438,6 +3455,9 @@ class DecompositionPass:
|
||||
"pass_criteria": atomic.pass_criteria or [],
|
||||
"fail_criteria": atomic.fail_criteria or [],
|
||||
"check_type": atomic.check_type or "",
|
||||
# Dependency Engine Felder
|
||||
"dependency_hints": atomic.dependency_hints or [],
|
||||
"lifecycle_phase_order": atomic.lifecycle_phase_order or 0,
|
||||
}),
|
||||
"framework_id": "14b1bdd2-abc7-4a43-adae-14471ee5c7cf",
|
||||
},
|
||||
|
||||
599
control-pipeline/services/dependency_engine.py
Normal file
599
control-pipeline/services/dependency_engine.py
Normal file
@@ -0,0 +1,599 @@
|
||||
"""
|
||||
Control Dependency Engine — evaluates control statuses considering
|
||||
inter-control dependencies.
|
||||
|
||||
Pure functions (no DB coupling) for:
|
||||
- Generic condition evaluation (JSONB rules -> bool)
|
||||
- Effect application (modifies target status)
|
||||
- Cycle detection (DFS-based)
|
||||
- Topological sort (evaluation order)
|
||||
- Full evaluation resolution with priority-based conflict handling
|
||||
|
||||
DB interaction is in separate load/store functions at the bottom.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Optional
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# ENUMS
|
||||
# ============================================================================
|
||||
|
||||
class DependencyType(str, Enum):
|
||||
PREREQUISITE = "prerequisite"
|
||||
CONDITIONAL_REQUIREMENT = "conditional_requirement"
|
||||
SUPERSEDES = "supersedes"
|
||||
COMPENSATING_CONTROL = "compensating_control"
|
||||
SCOPE_EXCLUSION = "scope_exclusion"
|
||||
|
||||
|
||||
class EvaluationStatus(str, Enum):
|
||||
PASS = "pass"
|
||||
FAIL = "fail"
|
||||
NOT_APPLICABLE = "not_applicable"
|
||||
PARTIALLY_SATISFIED = "partially_satisfied"
|
||||
COMPENSATED_FAIL = "compensated_fail"
|
||||
REVIEW_REQUIRED = "review_required"
|
||||
|
||||
|
||||
# Default priority per dependency type (lower = higher priority)
|
||||
DEFAULT_PRIORITIES: dict[str, int] = {
|
||||
"supersedes": 10,
|
||||
"scope_exclusion": 20,
|
||||
"prerequisite": 50,
|
||||
"conditional_requirement": 70,
|
||||
"compensating_control": 80,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# DATA CLASSES
|
||||
# ============================================================================
|
||||
|
||||
@dataclass
|
||||
class Dependency:
|
||||
id: str = ""
|
||||
source_control_id: str = ""
|
||||
target_control_id: str = ""
|
||||
dependency_type: str = "prerequisite"
|
||||
condition: dict = field(default_factory=dict)
|
||||
effect: dict = field(default_factory=dict)
|
||||
priority: int = 100
|
||||
generation_method: str = "manual"
|
||||
is_active: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class ControlState:
|
||||
"""In-memory representation of a control's evaluation state."""
|
||||
control_id: str = ""
|
||||
raw_status: str = "fail"
|
||||
resolved_status: str = ""
|
||||
context: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvaluationResult:
|
||||
control_id: str = ""
|
||||
evaluation_run_id: str = ""
|
||||
raw_status: str = "fail"
|
||||
resolved_status: str = "fail"
|
||||
dependency_resolution: list = field(default_factory=list)
|
||||
confidence: float = 1.0
|
||||
reasoning: str = ""
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CONDITION EVALUATOR
|
||||
# ============================================================================
|
||||
|
||||
def _resolve_field(field_path: str, context: dict) -> Any:
|
||||
"""Resolve a dot-notation field path against a nested dict.
|
||||
|
||||
Examples:
|
||||
_resolve_field("source.status", {"source": {"status": "pass"}}) -> "pass"
|
||||
_resolve_field("context.company_size", {"context": {"company_size": "large"}}) -> "large"
|
||||
"""
|
||||
parts = field_path.split(".")
|
||||
current = context
|
||||
for part in parts:
|
||||
if isinstance(current, dict):
|
||||
current = current.get(part)
|
||||
else:
|
||||
return None
|
||||
return current
|
||||
|
||||
|
||||
def _evaluate_single_clause(clause: dict, context: dict) -> bool:
|
||||
"""Evaluate a single {field, op, value} clause."""
|
||||
field_path = clause.get("field", "")
|
||||
op = clause.get("op", "==")
|
||||
expected = clause.get("value")
|
||||
|
||||
actual = _resolve_field(field_path, context)
|
||||
|
||||
if op == "==":
|
||||
return actual == expected
|
||||
elif op == "!=":
|
||||
return actual != expected
|
||||
elif op == "in":
|
||||
if isinstance(expected, list):
|
||||
return actual in expected
|
||||
return False
|
||||
elif op == "not_in":
|
||||
if isinstance(expected, list):
|
||||
return actual not in expected
|
||||
return True
|
||||
elif op == ">":
|
||||
try:
|
||||
return float(actual) > float(expected)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
elif op == "<":
|
||||
try:
|
||||
return float(actual) < float(expected)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
elif op == ">=":
|
||||
try:
|
||||
return float(actual) >= float(expected)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
elif op == "<=":
|
||||
try:
|
||||
return float(actual) <= float(expected)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
elif op == "contains":
|
||||
if isinstance(actual, (list, set, tuple)):
|
||||
return expected in actual
|
||||
if isinstance(actual, str):
|
||||
return str(expected) in actual
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def evaluate_condition(condition: dict, context: dict) -> bool:
|
||||
"""Evaluate a generic condition against a context dict.
|
||||
|
||||
Supports:
|
||||
- Empty condition -> True (always matches)
|
||||
- Simple clause: {"field": "source.status", "op": "==", "value": "pass"}
|
||||
- Compound AND: {"operator": "AND", "clauses": [...]}
|
||||
- Compound OR: {"operator": "OR", "clauses": [...]}
|
||||
- Negation: {"operator": "NOT", "clause": {...}}
|
||||
"""
|
||||
if not condition:
|
||||
return True
|
||||
|
||||
operator = condition.get("operator")
|
||||
|
||||
if operator == "AND":
|
||||
clauses = condition.get("clauses", [])
|
||||
return all(evaluate_condition(c, context) for c in clauses)
|
||||
|
||||
if operator == "OR":
|
||||
clauses = condition.get("clauses", [])
|
||||
return any(evaluate_condition(c, context) for c in clauses)
|
||||
|
||||
if operator == "NOT":
|
||||
clause = condition.get("clause", {})
|
||||
return not evaluate_condition(clause, context)
|
||||
|
||||
# Simple clause with field/op/value
|
||||
if "field" in condition:
|
||||
return _evaluate_single_clause(condition, context)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# EFFECT APPLIER
|
||||
# ============================================================================
|
||||
|
||||
def apply_effect(effect: dict, current_status: str) -> str:
|
||||
"""Apply a dependency effect to produce a new status.
|
||||
|
||||
Effect schema:
|
||||
{"set_status": "not_applicable"}
|
||||
{"set_status": "compensated_fail"}
|
||||
"""
|
||||
new_status = effect.get("set_status")
|
||||
if new_status and new_status in {s.value for s in EvaluationStatus}:
|
||||
return new_status
|
||||
return current_status
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CYCLE DETECTION
|
||||
# ============================================================================
|
||||
|
||||
WHITE, GRAY, BLACK = 0, 1, 2
|
||||
|
||||
|
||||
def detect_cycles(dependencies: list[Dependency]) -> list[list[str]]:
|
||||
"""Detect cycles in the dependency graph using DFS.
|
||||
|
||||
Returns list of cycles (each cycle = list of control IDs).
|
||||
Empty list = no cycles.
|
||||
"""
|
||||
graph: dict[str, list[str]] = defaultdict(list)
|
||||
all_nodes: set[str] = set()
|
||||
|
||||
for dep in dependencies:
|
||||
if dep.is_active:
|
||||
graph[dep.source_control_id].append(dep.target_control_id)
|
||||
all_nodes.add(dep.source_control_id)
|
||||
all_nodes.add(dep.target_control_id)
|
||||
|
||||
color: dict[str, int] = {n: WHITE for n in all_nodes}
|
||||
parent: dict[str, Optional[str]] = {n: None for n in all_nodes}
|
||||
cycles: list[list[str]] = []
|
||||
|
||||
def dfs(node: str) -> None:
|
||||
color[node] = GRAY
|
||||
for neighbor in graph.get(node, []):
|
||||
if color.get(neighbor, WHITE) == GRAY:
|
||||
# Found a cycle — trace back
|
||||
cycle = [neighbor, node]
|
||||
current = parent.get(node)
|
||||
while current and current != neighbor:
|
||||
cycle.append(current)
|
||||
current = parent.get(current)
|
||||
cycles.append(cycle)
|
||||
elif color.get(neighbor, WHITE) == WHITE:
|
||||
parent[neighbor] = node
|
||||
dfs(neighbor)
|
||||
color[node] = BLACK
|
||||
|
||||
for node in all_nodes:
|
||||
if color[node] == WHITE:
|
||||
dfs(node)
|
||||
|
||||
return cycles
|
||||
|
||||
|
||||
def topological_sort(dependencies: list[Dependency]) -> list[str]:
|
||||
"""Return control IDs in dependency-safe evaluation order.
|
||||
|
||||
Sources (prerequisites) come before targets (dependents).
|
||||
Controls not involved in any dependency are omitted.
|
||||
"""
|
||||
graph: dict[str, list[str]] = defaultdict(list)
|
||||
in_degree: dict[str, int] = defaultdict(int)
|
||||
all_nodes: set[str] = set()
|
||||
|
||||
for dep in dependencies:
|
||||
if dep.is_active:
|
||||
# source -> target means: source should be evaluated first
|
||||
graph[dep.source_control_id].append(dep.target_control_id)
|
||||
in_degree.setdefault(dep.source_control_id, 0)
|
||||
in_degree[dep.target_control_id] = in_degree.get(dep.target_control_id, 0) + 1
|
||||
all_nodes.add(dep.source_control_id)
|
||||
all_nodes.add(dep.target_control_id)
|
||||
|
||||
# Kahn's algorithm
|
||||
queue = [n for n in all_nodes if in_degree.get(n, 0) == 0]
|
||||
result: list[str] = []
|
||||
|
||||
while queue:
|
||||
queue.sort() # deterministic order
|
||||
node = queue.pop(0)
|
||||
result.append(node)
|
||||
for neighbor in graph.get(node, []):
|
||||
in_degree[neighbor] -= 1
|
||||
if in_degree[neighbor] == 0:
|
||||
queue.append(neighbor)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# MAIN EVALUATION ENGINE
|
||||
# ============================================================================
|
||||
|
||||
def evaluate_controls(
|
||||
control_states: dict[str, ControlState],
|
||||
dependencies: list[Dependency],
|
||||
context: dict,
|
||||
) -> dict[str, EvaluationResult]:
|
||||
"""Evaluate all controls considering dependencies.
|
||||
|
||||
Args:
|
||||
control_states: control_id -> ControlState (with raw_status)
|
||||
dependencies: all active dependencies
|
||||
context: company profile (industry, company_size, scope_signals, etc.)
|
||||
|
||||
Returns:
|
||||
control_id -> EvaluationResult (with resolved_status + trace)
|
||||
|
||||
Algorithm:
|
||||
1. Build adjacency (target -> dependencies)
|
||||
2. Detect cycles -> involved controls = review_required
|
||||
3. Topological sort for evaluation order
|
||||
4. For each control: evaluate conditions, apply highest-priority effect
|
||||
5. Record full dependency trace for MCP output
|
||||
"""
|
||||
evaluation_run_id = str(uuid.uuid4())
|
||||
|
||||
# 1. Build adjacency: target_control_id -> list of dependencies
|
||||
target_deps: dict[str, list[Dependency]] = defaultdict(list)
|
||||
for dep in dependencies:
|
||||
if dep.is_active:
|
||||
target_deps[dep.target_control_id].append(dep)
|
||||
|
||||
# 2. Cycle detection
|
||||
cycles = detect_cycles(dependencies)
|
||||
cycle_controls: set[str] = set()
|
||||
for cycle in cycles:
|
||||
cycle_controls.update(cycle)
|
||||
|
||||
# 3. Topological sort (excluding cycle controls)
|
||||
safe_deps = [
|
||||
d for d in dependencies
|
||||
if d.is_active
|
||||
and d.source_control_id not in cycle_controls
|
||||
and d.target_control_id not in cycle_controls
|
||||
]
|
||||
eval_order = topological_sort(safe_deps)
|
||||
|
||||
# Add remaining controls (those not in any dependency + cycle controls)
|
||||
all_ids = set(control_states.keys())
|
||||
remaining = all_ids - set(eval_order)
|
||||
eval_order.extend(sorted(remaining))
|
||||
|
||||
# 4. Iterate and evaluate
|
||||
results: dict[str, EvaluationResult] = {}
|
||||
|
||||
for control_id in eval_order:
|
||||
state = control_states.get(control_id)
|
||||
if not state:
|
||||
continue
|
||||
|
||||
# Cycle controls -> review_required
|
||||
if control_id in cycle_controls:
|
||||
results[control_id] = EvaluationResult(
|
||||
control_id=control_id,
|
||||
evaluation_run_id=evaluation_run_id,
|
||||
raw_status=state.raw_status,
|
||||
resolved_status=EvaluationStatus.REVIEW_REQUIRED.value,
|
||||
dependency_resolution=[{"cycle_detected": True}],
|
||||
confidence=0.5,
|
||||
reasoning="Zyklische Abhaengigkeit erkannt — manuelle Pruefung erforderlich.",
|
||||
)
|
||||
continue
|
||||
|
||||
# Collect dependencies targeting this control
|
||||
deps_for_target = target_deps.get(control_id, [])
|
||||
if not deps_for_target:
|
||||
results[control_id] = EvaluationResult(
|
||||
control_id=control_id,
|
||||
evaluation_run_id=evaluation_run_id,
|
||||
raw_status=state.raw_status,
|
||||
resolved_status=state.raw_status,
|
||||
confidence=1.0,
|
||||
)
|
||||
continue
|
||||
|
||||
# Evaluate each dependency's condition
|
||||
matching_effects: list[tuple[int, dict, Dependency]] = []
|
||||
trace: list[dict] = []
|
||||
|
||||
for dep in sorted(deps_for_target, key=lambda d: d.priority):
|
||||
source_state = control_states.get(dep.source_control_id)
|
||||
source_result = results.get(dep.source_control_id)
|
||||
|
||||
source_status = "unknown"
|
||||
if source_result:
|
||||
source_status = source_result.resolved_status
|
||||
elif source_state:
|
||||
source_status = source_state.raw_status
|
||||
|
||||
eval_ctx = {
|
||||
"source": {"status": source_status},
|
||||
"target": {"status": state.raw_status},
|
||||
"context": context,
|
||||
}
|
||||
|
||||
condition_met = evaluate_condition(dep.condition, eval_ctx)
|
||||
|
||||
trace_entry = {
|
||||
"dependency_id": dep.id,
|
||||
"dependency_type": dep.dependency_type,
|
||||
"source_control_id": dep.source_control_id,
|
||||
"source_status": source_status,
|
||||
"condition_met": condition_met,
|
||||
"effect_applied": dep.effect if condition_met else None,
|
||||
"priority": dep.priority,
|
||||
}
|
||||
trace.append(trace_entry)
|
||||
|
||||
if condition_met:
|
||||
matching_effects.append((dep.priority, dep.effect, dep))
|
||||
|
||||
# Apply highest-priority (lowest number) effect
|
||||
resolved = state.raw_status
|
||||
if matching_effects:
|
||||
matching_effects.sort(key=lambda x: x[0])
|
||||
_, best_effect, _ = matching_effects[0]
|
||||
resolved = apply_effect(best_effect, state.raw_status)
|
||||
|
||||
results[control_id] = EvaluationResult(
|
||||
control_id=control_id,
|
||||
evaluation_run_id=evaluation_run_id,
|
||||
raw_status=state.raw_status,
|
||||
resolved_status=resolved,
|
||||
dependency_resolution=trace,
|
||||
confidence=_compute_confidence(trace),
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _compute_confidence(trace: list[dict]) -> float:
|
||||
"""Compute confidence based on dependency resolution trace."""
|
||||
if not trace:
|
||||
return 1.0
|
||||
|
||||
met_count = sum(1 for t in trace if t.get("condition_met"))
|
||||
total = len(trace)
|
||||
|
||||
if met_count == 0:
|
||||
return 1.0 # No dependencies fired -> raw status stands
|
||||
if met_count == 1:
|
||||
return 0.95 # Single dependency resolved
|
||||
# Multiple dependencies -> slightly lower confidence
|
||||
return max(0.7, 1.0 - (met_count - 1) * 0.1)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# DB INTERACTION (separate from pure logic)
|
||||
# ============================================================================
|
||||
|
||||
def load_dependencies_for_controls(
|
||||
db, control_ids: list[str],
|
||||
) -> list[Dependency]:
|
||||
"""Load all active dependencies involving the given control IDs."""
|
||||
if not control_ids:
|
||||
return []
|
||||
|
||||
rows = db.execute(
|
||||
text("""
|
||||
SELECT id, source_control_id, target_control_id,
|
||||
dependency_type, condition, effect, priority,
|
||||
generation_method, is_active
|
||||
FROM control_dependencies
|
||||
WHERE is_active = TRUE
|
||||
AND (source_control_id = ANY(CAST(:ids AS uuid[]))
|
||||
OR target_control_id = ANY(CAST(:ids AS uuid[])))
|
||||
"""),
|
||||
{"ids": control_ids},
|
||||
).fetchall()
|
||||
|
||||
return [
|
||||
Dependency(
|
||||
id=str(r[0]),
|
||||
source_control_id=str(r[1]),
|
||||
target_control_id=str(r[2]),
|
||||
dependency_type=r[3],
|
||||
condition=r[4] if isinstance(r[4], dict) else {},
|
||||
effect=r[5] if isinstance(r[5], dict) else {},
|
||||
priority=r[6],
|
||||
generation_method=r[7],
|
||||
is_active=r[8],
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
def load_all_active_dependencies(db) -> list[Dependency]:
|
||||
"""Load all active dependencies."""
|
||||
rows = db.execute(
|
||||
text("""
|
||||
SELECT id, source_control_id, target_control_id,
|
||||
dependency_type, condition, effect, priority,
|
||||
generation_method, is_active
|
||||
FROM control_dependencies
|
||||
WHERE is_active = TRUE
|
||||
ORDER BY priority
|
||||
"""),
|
||||
).fetchall()
|
||||
|
||||
return [
|
||||
Dependency(
|
||||
id=str(r[0]),
|
||||
source_control_id=str(r[1]),
|
||||
target_control_id=str(r[2]),
|
||||
dependency_type=r[3],
|
||||
condition=r[4] if isinstance(r[4], dict) else {},
|
||||
effect=r[5] if isinstance(r[5], dict) else {},
|
||||
priority=r[6],
|
||||
generation_method=r[7],
|
||||
is_active=r[8],
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
def store_dependency(db, dep: Dependency) -> str:
|
||||
"""Insert a dependency, return its UUID."""
|
||||
row = db.execute(
|
||||
text("""
|
||||
INSERT INTO control_dependencies
|
||||
(source_control_id, target_control_id, dependency_type,
|
||||
condition, effect, priority, generation_method, generation_metadata)
|
||||
VALUES
|
||||
(CAST(:src AS uuid), CAST(:tgt AS uuid), :dtype,
|
||||
CAST(:cond AS jsonb), CAST(:eff AS jsonb), :prio, :gmethod, CAST(:gmeta AS jsonb))
|
||||
ON CONFLICT (source_control_id, target_control_id, dependency_type)
|
||||
DO UPDATE SET
|
||||
condition = EXCLUDED.condition,
|
||||
effect = EXCLUDED.effect,
|
||||
priority = EXCLUDED.priority,
|
||||
updated_at = NOW()
|
||||
RETURNING id::text
|
||||
"""),
|
||||
{
|
||||
"src": dep.source_control_id,
|
||||
"tgt": dep.target_control_id,
|
||||
"dtype": dep.dependency_type,
|
||||
"cond": json.dumps(dep.condition),
|
||||
"eff": json.dumps(dep.effect),
|
||||
"prio": dep.priority,
|
||||
"gmethod": dep.generation_method,
|
||||
"gmeta": json.dumps({}),
|
||||
},
|
||||
).fetchone()
|
||||
|
||||
return row[0] if row else ""
|
||||
|
||||
|
||||
def store_evaluation_results(
|
||||
db, results: dict[str, EvaluationResult], company_profile: dict,
|
||||
) -> int:
|
||||
"""Batch insert evaluation results. Returns row count."""
|
||||
count = 0
|
||||
for result in results.values():
|
||||
db.execute(
|
||||
text("""
|
||||
INSERT INTO control_evaluation_results
|
||||
(control_id, evaluation_run_id, company_profile,
|
||||
raw_status, resolved_status, dependency_resolution,
|
||||
confidence, reasoning)
|
||||
VALUES
|
||||
(CAST(:cid AS uuid), CAST(:rid AS uuid), CAST(:prof AS jsonb),
|
||||
:raw, :resolved, CAST(:trace AS jsonb),
|
||||
:conf, :reason)
|
||||
ON CONFLICT (control_id, evaluation_run_id)
|
||||
DO UPDATE SET
|
||||
resolved_status = EXCLUDED.resolved_status,
|
||||
dependency_resolution = EXCLUDED.dependency_resolution,
|
||||
confidence = EXCLUDED.confidence
|
||||
"""),
|
||||
{
|
||||
"cid": result.control_id,
|
||||
"rid": result.evaluation_run_id,
|
||||
"prof": json.dumps(company_profile),
|
||||
"raw": result.raw_status,
|
||||
"resolved": result.resolved_status,
|
||||
"trace": json.dumps(result.dependency_resolution),
|
||||
"conf": result.confidence,
|
||||
"reason": result.reasoning,
|
||||
},
|
||||
)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
381
control-pipeline/services/dependency_generator.py
Normal file
381
control-pipeline/services/dependency_generator.py
Normal file
@@ -0,0 +1,381 @@
|
||||
"""
|
||||
Dependency Generator — automatic discovery of control dependencies.
|
||||
|
||||
Three strategies:
|
||||
1. Ontology-based: same normalized_object + phase sequence -> prerequisite
|
||||
2. Pattern-based: known patterns (define->implement, implement->monitor, etc.)
|
||||
3. Domain packs: YAML-defined rules for specific regulatory domains
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from typing import Optional
|
||||
|
||||
import yaml
|
||||
|
||||
from services.dependency_engine import Dependency, DEFAULT_PRIORITIES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PHASE ORDERING (imported from ontology)
|
||||
# ============================================================================
|
||||
|
||||
from services.control_ontology import PHASE_ORDER
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PATTERN RULES
|
||||
# ============================================================================
|
||||
|
||||
PATTERN_RULES: list[dict] = [
|
||||
{
|
||||
"name": "define_before_implement",
|
||||
"source_filter": {"action_type": "define"},
|
||||
"target_filter": {"action_type": "implement"},
|
||||
"match_on": "normalized_object",
|
||||
"dependency_type": "prerequisite",
|
||||
"condition": {},
|
||||
"effect": {"set_status": "review_required"},
|
||||
"priority": 50,
|
||||
},
|
||||
{
|
||||
"name": "implement_before_monitor",
|
||||
"source_filter": {"action_type_in": ["implement", "configure", "enforce"]},
|
||||
"target_filter": {"action_type_in": ["monitor", "review", "test"]},
|
||||
"match_on": "normalized_object",
|
||||
"dependency_type": "prerequisite",
|
||||
"condition": {},
|
||||
"effect": {"set_status": "review_required"},
|
||||
"priority": 50,
|
||||
},
|
||||
{
|
||||
"name": "define_before_enforce",
|
||||
"source_filter": {"action_type": "define"},
|
||||
"target_filter": {"action_type": "enforce"},
|
||||
"match_on": "normalized_object",
|
||||
"dependency_type": "prerequisite",
|
||||
"condition": {},
|
||||
"effect": {"set_status": "review_required"},
|
||||
"priority": 50,
|
||||
},
|
||||
{
|
||||
"name": "implement_before_validate",
|
||||
"source_filter": {"action_type_in": ["implement", "configure"]},
|
||||
"target_filter": {"action_type_in": ["validate", "verify"]},
|
||||
"match_on": "normalized_object",
|
||||
"dependency_type": "prerequisite",
|
||||
"condition": {},
|
||||
"effect": {"set_status": "review_required"},
|
||||
"priority": 50,
|
||||
},
|
||||
{
|
||||
"name": "train_before_review",
|
||||
"source_filter": {"action_type": "train"},
|
||||
"target_filter": {"action_type_in": ["review", "assess"]},
|
||||
"match_on": "normalized_object",
|
||||
"dependency_type": "prerequisite",
|
||||
"condition": {},
|
||||
"effect": {"set_status": "review_required"},
|
||||
"priority": 60,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# HELPER: Parse merge_key into components
|
||||
# ============================================================================
|
||||
|
||||
def _parse_merge_key(merge_key: str) -> dict:
|
||||
"""Parse 'action_type:normalized_object:phase[:asset_scope]' into components."""
|
||||
parts = merge_key.split(":")
|
||||
result = {
|
||||
"action_type": parts[0] if len(parts) > 0 else "",
|
||||
"normalized_object": parts[1] if len(parts) > 1 else "",
|
||||
"phase": parts[2] if len(parts) > 2 else "",
|
||||
"asset_scope": parts[3] if len(parts) > 3 else "",
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def _get_control_merge_key(control: dict) -> str:
|
||||
"""Extract merge_key from a control dict (from generation_metadata or top-level)."""
|
||||
mk = control.get("merge_key", "")
|
||||
if not mk:
|
||||
meta = control.get("generation_metadata", {})
|
||||
if isinstance(meta, str):
|
||||
try:
|
||||
import json
|
||||
meta = json.loads(meta)
|
||||
except (ValueError, TypeError):
|
||||
meta = {}
|
||||
mk = meta.get("merge_group_hint", "")
|
||||
return mk
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# ONTOLOGY-BASED GENERATOR
|
||||
# ============================================================================
|
||||
|
||||
def generate_ontology_dependencies(controls: list[dict]) -> list[Dependency]:
|
||||
"""Generate prerequisite dependencies from lifecycle phase ordering.
|
||||
|
||||
Rule: If two controls share the same normalized_object and control A's
|
||||
phase precedes control B's phase, then A is a prerequisite for B.
|
||||
|
||||
Groups by normalized_object first (O(n) grouping, O(k^2) per group
|
||||
where k is typically 2-8).
|
||||
"""
|
||||
# Group controls by normalized_object
|
||||
groups: dict[str, list[dict]] = defaultdict(list)
|
||||
|
||||
for ctrl in controls:
|
||||
mk = _get_control_merge_key(ctrl)
|
||||
if not mk:
|
||||
continue
|
||||
parsed = _parse_merge_key(mk)
|
||||
obj = parsed["normalized_object"]
|
||||
if obj:
|
||||
ctrl["_parsed_mk"] = parsed
|
||||
ctrl["_phase_order"] = PHASE_ORDER.get(parsed["phase"], 6)
|
||||
groups[obj].append(ctrl)
|
||||
|
||||
dependencies: list[Dependency] = []
|
||||
|
||||
for obj, group in groups.items():
|
||||
if len(group) < 2:
|
||||
continue
|
||||
|
||||
# Sort by phase order
|
||||
group.sort(key=lambda c: c["_phase_order"])
|
||||
|
||||
# Create prerequisite edges between adjacent phases
|
||||
for i in range(len(group)):
|
||||
for j in range(i + 1, len(group)):
|
||||
a = group[i]
|
||||
b = group[j]
|
||||
if a["_phase_order"] < b["_phase_order"]:
|
||||
dep = Dependency(
|
||||
source_control_id=a.get("id", a.get("control_id", "")),
|
||||
target_control_id=b.get("id", b.get("control_id", "")),
|
||||
dependency_type="prerequisite",
|
||||
condition={},
|
||||
effect={"set_status": "review_required"},
|
||||
priority=DEFAULT_PRIORITIES["prerequisite"],
|
||||
generation_method="ontology",
|
||||
)
|
||||
dependencies.append(dep)
|
||||
|
||||
return dependencies
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PATTERN-BASED GENERATOR
|
||||
# ============================================================================
|
||||
|
||||
def _matches_filter(control: dict, filter_: dict) -> bool:
|
||||
"""Check if a control matches a pattern filter."""
|
||||
parsed = control.get("_parsed_mk", {})
|
||||
action = parsed.get("action_type", "")
|
||||
|
||||
if "action_type" in filter_:
|
||||
if action != filter_["action_type"]:
|
||||
return False
|
||||
|
||||
if "action_type_in" in filter_:
|
||||
if action not in filter_["action_type_in"]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def generate_pattern_dependencies(
|
||||
controls: list[dict],
|
||||
rules: Optional[list[dict]] = None,
|
||||
) -> list[Dependency]:
|
||||
"""Apply pattern rules to generate dependencies between controls."""
|
||||
if rules is None:
|
||||
rules = PATTERN_RULES
|
||||
|
||||
# Pre-parse merge keys
|
||||
for ctrl in controls:
|
||||
if "_parsed_mk" not in ctrl:
|
||||
mk = _get_control_merge_key(ctrl)
|
||||
if mk:
|
||||
ctrl["_parsed_mk"] = _parse_merge_key(mk)
|
||||
else:
|
||||
ctrl["_parsed_mk"] = {}
|
||||
|
||||
dependencies: list[Dependency] = []
|
||||
|
||||
for rule in rules:
|
||||
sources = [c for c in controls if _matches_filter(c, rule["source_filter"])]
|
||||
targets = [c for c in controls if _matches_filter(c, rule["target_filter"])]
|
||||
|
||||
match_on = rule.get("match_on")
|
||||
|
||||
for src in sources:
|
||||
for tgt in targets:
|
||||
src_id = src.get("id", src.get("control_id", ""))
|
||||
tgt_id = tgt.get("id", tgt.get("control_id", ""))
|
||||
|
||||
if src_id == tgt_id:
|
||||
continue
|
||||
|
||||
if match_on == "normalized_object":
|
||||
src_obj = src.get("_parsed_mk", {}).get("normalized_object", "")
|
||||
tgt_obj = tgt.get("_parsed_mk", {}).get("normalized_object", "")
|
||||
if not src_obj or src_obj != tgt_obj:
|
||||
continue
|
||||
|
||||
dep = Dependency(
|
||||
source_control_id=src_id,
|
||||
target_control_id=tgt_id,
|
||||
dependency_type=rule["dependency_type"],
|
||||
condition=rule.get("condition", {}),
|
||||
effect=rule.get("effect", {"set_status": "review_required"}),
|
||||
priority=rule.get("priority", 100),
|
||||
generation_method="pattern",
|
||||
)
|
||||
dependencies.append(dep)
|
||||
|
||||
return dependencies
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# DOMAIN PACK GENERATOR
|
||||
# ============================================================================
|
||||
|
||||
def load_domain_pack(path: str) -> dict:
|
||||
"""Load a YAML domain pack."""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
|
||||
|
||||
def _title_matches(title: str, patterns: list[str]) -> bool:
|
||||
"""Check if a title contains any of the given patterns (case-insensitive)."""
|
||||
title_lower = title.lower()
|
||||
return any(p.lower() in title_lower for p in patterns)
|
||||
|
||||
|
||||
def generate_domain_dependencies(
|
||||
controls: list[dict],
|
||||
domain_pack_dir: str = "",
|
||||
) -> list[Dependency]:
|
||||
"""Apply all domain packs to generate domain-specific dependencies."""
|
||||
if not domain_pack_dir:
|
||||
domain_pack_dir = os.path.join(
|
||||
os.path.dirname(os.path.dirname(__file__)), "data", "domain_packs"
|
||||
)
|
||||
|
||||
if not os.path.isdir(domain_pack_dir):
|
||||
return []
|
||||
|
||||
dependencies: list[Dependency] = []
|
||||
|
||||
for filename in sorted(os.listdir(domain_pack_dir)):
|
||||
if not filename.endswith((".yaml", ".yml")):
|
||||
continue
|
||||
|
||||
pack = load_domain_pack(os.path.join(domain_pack_dir, filename))
|
||||
rules = pack.get("rules", [])
|
||||
|
||||
for rule in rules:
|
||||
src_match = rule.get("source_match", {})
|
||||
tgt_match = rule.get("target_match", {})
|
||||
|
||||
src_title_patterns = src_match.get("title_contains", [])
|
||||
tgt_title_patterns = tgt_match.get("title_contains", [])
|
||||
|
||||
sources = [
|
||||
c for c in controls
|
||||
if src_title_patterns and _title_matches(c.get("title", ""), src_title_patterns)
|
||||
]
|
||||
targets = [
|
||||
c for c in controls
|
||||
if tgt_title_patterns and _title_matches(c.get("title", ""), tgt_title_patterns)
|
||||
]
|
||||
|
||||
for src in sources:
|
||||
for tgt in targets:
|
||||
src_id = src.get("id", src.get("control_id", ""))
|
||||
tgt_id = tgt.get("id", tgt.get("control_id", ""))
|
||||
if src_id == tgt_id:
|
||||
continue
|
||||
|
||||
dep = Dependency(
|
||||
source_control_id=src_id,
|
||||
target_control_id=tgt_id,
|
||||
dependency_type=rule.get("dependency_type", "prerequisite"),
|
||||
condition=rule.get("condition", {
|
||||
"field": "source.status", "op": "==", "value": "pass",
|
||||
}),
|
||||
effect=rule.get("effect", {"set_status": "not_applicable"}),
|
||||
priority=rule.get("priority", DEFAULT_PRIORITIES.get(
|
||||
rule.get("dependency_type", "prerequisite"), 100
|
||||
)),
|
||||
generation_method="domain_pack",
|
||||
)
|
||||
dependencies.append(dep)
|
||||
|
||||
return dependencies
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# TOP-LEVEL GENERATOR
|
||||
# ============================================================================
|
||||
|
||||
def generate_all_dependencies(
|
||||
controls: list[dict],
|
||||
enable_ontology: bool = True,
|
||||
enable_patterns: bool = True,
|
||||
enable_domain_packs: bool = True,
|
||||
domain_pack_dir: str = "",
|
||||
) -> tuple[list[Dependency], dict]:
|
||||
"""Run all generators and return deduplicated dependencies + stats."""
|
||||
stats = {
|
||||
"ontology_generated": 0,
|
||||
"pattern_generated": 0,
|
||||
"domain_generated": 0,
|
||||
"total_before_dedup": 0,
|
||||
"total_unique": 0,
|
||||
"duplicates_removed": 0,
|
||||
}
|
||||
|
||||
all_deps: list[Dependency] = []
|
||||
|
||||
if enable_ontology:
|
||||
onto_deps = generate_ontology_dependencies(controls)
|
||||
stats["ontology_generated"] = len(onto_deps)
|
||||
all_deps.extend(onto_deps)
|
||||
|
||||
if enable_patterns:
|
||||
pat_deps = generate_pattern_dependencies(controls)
|
||||
stats["pattern_generated"] = len(pat_deps)
|
||||
all_deps.extend(pat_deps)
|
||||
|
||||
if enable_domain_packs:
|
||||
dom_deps = generate_domain_dependencies(controls, domain_pack_dir)
|
||||
stats["domain_generated"] = len(dom_deps)
|
||||
all_deps.extend(dom_deps)
|
||||
|
||||
stats["total_before_dedup"] = len(all_deps)
|
||||
|
||||
# Deduplicate by (source, target, type)
|
||||
seen: set[tuple[str, str, str]] = set()
|
||||
unique: list[Dependency] = []
|
||||
for dep in all_deps:
|
||||
key = (dep.source_control_id, dep.target_control_id, dep.dependency_type)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(dep)
|
||||
|
||||
stats["total_unique"] = len(unique)
|
||||
stats["duplicates_removed"] = stats["total_before_dedup"] - stats["total_unique"]
|
||||
|
||||
return unique, stats
|
||||
Reference in New Issue
Block a user