feat(pipeline): implement Control Dependency Engine (Block 9)

Core engine (dependency_engine.py): - 5 dependency types: prerequisite, supersedes, compensating_control, conditional_requirement, scope_exclusion - Generic condition evaluator (JSONB rules with AND/OR/NOT/field ops) - Priority-based conflict resolution - Cycle detection (DFS) + topological sort - Full evaluation with MCP-compatible dependency_resolution trace - 39 tests all passing (incl. GHV scenario from user requirements) Automatic generator (dependency_generator.py): - Ontology-based: same normalized_object + phase sequence -> prerequisite - Pattern-based: define->implement, implement->monitor, etc. - Domain packs: YAML rules for GDPR, AI Act, CRA, Security, Labor Contracts - 14 tests all passing API routes (dependency_routes.py): - CRUD for dependencies - POST /evaluate with dependency resolution - POST /generate (auto-generation with dry_run) - POST /validate (cycle detection) - GET /graph (nodes + edges for visualization) Prompt enhancement (decomposition_pass.py): - Added dependency_hints + lifecycle_phase_order to Pass 0b prompt - Stored in generation_metadata for post-processing DB migration: control_dependencies + control_evaluation_results tables 126 tests total, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-26 20:28:10 +02:00
parent 5aaa62dca7
commit 42ab5ead26
14 changed files with 2421 additions and 2 deletions
--- a/control-pipeline/services/control_ontology.py
+++ b/control-pipeline/services/control_ontology.py
@@ -351,3 +351,33 @@ def build_canonical_key(
    if asset_scope:
        parts.append(asset_scope)
    return ":".join(parts)
+
+
+# ============================================================================
+# PHASE ORDERING (for dependency engine — lifecycle sequence)
+# ============================================================================
+
+PHASE_ORDER: dict[str, int] = {
+    "scope": 1,
+    "definition": 2,
+    "governance": 2,
+    "design": 3,
+    "implementation": 4,
+    "configuration": 5,
+    "operation": 6,
+    "training": 6,
+    "monitoring": 7,
+    "testing": 8,
+    "review": 9,
+    "assessment": 10,
+    "remediation": 10,
+    "validation": 11,
+    "reporting": 12,
+    "evidence": 13,
+}
+
+
+def get_phase_order(action_type: str) -> int:
+    """Get the lifecycle phase order for an action_type (1-13)."""
+    phase = get_phase(action_type)
+    return PHASE_ORDER.get(phase, 6)  # default: operation (middle)
--- a/control-pipeline/services/decomposition_pass.py
+++ b/control-pipeline/services/decomposition_pass.py
@@ -220,6 +220,9 @@ class AtomicControlCandidate:
    pass_criteria: list = field(default_factory=list)
    fail_criteria: list = field(default_factory=list)
    check_type: str = ""
+    # Dependency Engine Felder
+    dependency_hints: list = field(default_factory=list)
+    lifecycle_phase_order: int = 0

    def to_dict(self) -> dict:
        return {
@@ -238,6 +241,8 @@ class AtomicControlCandidate:
            "pass_criteria": self.pass_criteria,
            "fail_criteria": self.fail_criteria,
            "check_type": self.check_type,
+            "dependency_hints": self.dependency_hints,
+            "lifecycle_phase_order": self.lifecycle_phase_order,
        }


@@ -2133,7 +2138,9 @@ Antworte als JSON:
  "severity": "critical|high|medium|low",
  "category": "security|privacy|governance|operations|finance|reporting",
  "check_type": "technical_config_check|document_clause_check|code_pattern_check|evidence_check|interview_required",
-  "merge_key": "action_type:normalized_object:control_phase"
+  "merge_key": "action_type:normalized_object:control_phase",
+  "dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
+  "lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)"
 }}"""


@@ -2229,7 +2236,9 @@ Jedes Control hat dieses Format:
  "severity": "critical|high|medium|low",
  "category": "security|privacy|governance|operations|finance|reporting",
  "check_type": "technical_config_check|document_clause_check|code_pattern_check|evidence_check|interview_required",
-  "merge_key": "action_type:normalized_object:control_phase"
+  "merge_key": "action_type:normalized_object:control_phase",
+  "dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
+  "lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)"
 }}"""


@@ -2971,6 +2980,8 @@ class DecompositionPass:
                pass_criteria=_ensure_list(parsed.get("pass_criteria", [])),
                fail_criteria=_ensure_list(parsed.get("fail_criteria", [])),
                check_type=parsed.get("check_type", ""),
+                dependency_hints=_ensure_list(parsed.get("dependency_hints", [])),
+                lifecycle_phase_order=int(parsed.get("lifecycle_phase_order", 0) or 0),
            )
            # Store merge_key from LLM output in metadata
            llm_merge_key = parsed.get("merge_key", "")
@@ -2980,6 +2991,12 @@ class DecompositionPass:
        atomic.parent_control_uuid = obl["parent_uuid"]
        atomic.obligation_candidate_id = obl["candidate_id"]

+        # Set lifecycle_phase_order deterministically if not set by LLM
+        if not atomic.lifecycle_phase_order:
+            from services.control_ontology import classify_action, get_phase_order
+            action_type = classify_action(obl.get("action", "") or atomic.title)
+            atomic.lifecycle_phase_order = get_phase_order(action_type)
+
        # Cap severity for implementation-specific obligations
        if obl.get("is_implementation_specific") and atomic.severity in (
            "critical", "high"
@@ -3438,6 +3455,9 @@ class DecompositionPass:
                    "pass_criteria": atomic.pass_criteria or [],
                    "fail_criteria": atomic.fail_criteria or [],
                    "check_type": atomic.check_type or "",
+                    # Dependency Engine Felder
+                    "dependency_hints": atomic.dependency_hints or [],
+                    "lifecycle_phase_order": atomic.lifecycle_phase_order or 0,
                }),
                "framework_id": "14b1bdd2-abc7-4a43-adae-14471ee5c7cf",
            },
--- a/control-pipeline/services/dependency_engine.py
+++ b/control-pipeline/services/dependency_engine.py
@@ -0,0 +1,599 @@
+"""
+Control Dependency Engine — evaluates control statuses considering
+inter-control dependencies.
+
+Pure functions (no DB coupling) for:
+  - Generic condition evaluation (JSONB rules -> bool)
+  - Effect application (modifies target status)
+  - Cycle detection (DFS-based)
+  - Topological sort (evaluation order)
+  - Full evaluation resolution with priority-based conflict handling
+
+DB interaction is in separate load/store functions at the bottom.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import uuid
+from collections import defaultdict
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Optional
+
+from sqlalchemy import text
+
+logger = logging.getLogger(__name__)
+
+
+# ============================================================================
+# ENUMS
+# ============================================================================
+
+class DependencyType(str, Enum):
+    PREREQUISITE = "prerequisite"
+    CONDITIONAL_REQUIREMENT = "conditional_requirement"
+    SUPERSEDES = "supersedes"
+    COMPENSATING_CONTROL = "compensating_control"
+    SCOPE_EXCLUSION = "scope_exclusion"
+
+
+class EvaluationStatus(str, Enum):
+    PASS = "pass"
+    FAIL = "fail"
+    NOT_APPLICABLE = "not_applicable"
+    PARTIALLY_SATISFIED = "partially_satisfied"
+    COMPENSATED_FAIL = "compensated_fail"
+    REVIEW_REQUIRED = "review_required"
+
+
+# Default priority per dependency type (lower = higher priority)
+DEFAULT_PRIORITIES: dict[str, int] = {
+    "supersedes": 10,
+    "scope_exclusion": 20,
+    "prerequisite": 50,
+    "conditional_requirement": 70,
+    "compensating_control": 80,
+}
+
+
+# ============================================================================
+# DATA CLASSES
+# ============================================================================
+
+@dataclass
+class Dependency:
+    id: str = ""
+    source_control_id: str = ""
+    target_control_id: str = ""
+    dependency_type: str = "prerequisite"
+    condition: dict = field(default_factory=dict)
+    effect: dict = field(default_factory=dict)
+    priority: int = 100
+    generation_method: str = "manual"
+    is_active: bool = True
+
+
+@dataclass
+class ControlState:
+    """In-memory representation of a control's evaluation state."""
+    control_id: str = ""
+    raw_status: str = "fail"
+    resolved_status: str = ""
+    context: dict = field(default_factory=dict)
+
+
+@dataclass
+class EvaluationResult:
+    control_id: str = ""
+    evaluation_run_id: str = ""
+    raw_status: str = "fail"
+    resolved_status: str = "fail"
+    dependency_resolution: list = field(default_factory=list)
+    confidence: float = 1.0
+    reasoning: str = ""
+
+
+# ============================================================================
+# CONDITION EVALUATOR
+# ============================================================================
+
+def _resolve_field(field_path: str, context: dict) -> Any:
+    """Resolve a dot-notation field path against a nested dict.
+
+    Examples:
+        _resolve_field("source.status", {"source": {"status": "pass"}}) -> "pass"
+        _resolve_field("context.company_size", {"context": {"company_size": "large"}}) -> "large"
+    """
+    parts = field_path.split(".")
+    current = context
+    for part in parts:
+        if isinstance(current, dict):
+            current = current.get(part)
+        else:
+            return None
+    return current
+
+
+def _evaluate_single_clause(clause: dict, context: dict) -> bool:
+    """Evaluate a single {field, op, value} clause."""
+    field_path = clause.get("field", "")
+    op = clause.get("op", "==")
+    expected = clause.get("value")
+
+    actual = _resolve_field(field_path, context)
+
+    if op == "==":
+        return actual == expected
+    elif op == "!=":
+        return actual != expected
+    elif op == "in":
+        if isinstance(expected, list):
+            return actual in expected
+        return False
+    elif op == "not_in":
+        if isinstance(expected, list):
+            return actual not in expected
+        return True
+    elif op == ">":
+        try:
+            return float(actual) > float(expected)
+        except (TypeError, ValueError):
+            return False
+    elif op == "<":
+        try:
+            return float(actual) < float(expected)
+        except (TypeError, ValueError):
+            return False
+    elif op == ">=":
+        try:
+            return float(actual) >= float(expected)
+        except (TypeError, ValueError):
+            return False
+    elif op == "<=":
+        try:
+            return float(actual) <= float(expected)
+        except (TypeError, ValueError):
+            return False
+    elif op == "contains":
+        if isinstance(actual, (list, set, tuple)):
+            return expected in actual
+        if isinstance(actual, str):
+            return str(expected) in actual
+        return False
+
+    return False
+
+
+def evaluate_condition(condition: dict, context: dict) -> bool:
+    """Evaluate a generic condition against a context dict.
+
+    Supports:
+      - Empty condition -> True (always matches)
+      - Simple clause: {"field": "source.status", "op": "==", "value": "pass"}
+      - Compound AND: {"operator": "AND", "clauses": [...]}
+      - Compound OR:  {"operator": "OR", "clauses": [...]}
+      - Negation:     {"operator": "NOT", "clause": {...}}
+    """
+    if not condition:
+        return True
+
+    operator = condition.get("operator")
+
+    if operator == "AND":
+        clauses = condition.get("clauses", [])
+        return all(evaluate_condition(c, context) for c in clauses)
+
+    if operator == "OR":
+        clauses = condition.get("clauses", [])
+        return any(evaluate_condition(c, context) for c in clauses)
+
+    if operator == "NOT":
+        clause = condition.get("clause", {})
+        return not evaluate_condition(clause, context)
+
+    # Simple clause with field/op/value
+    if "field" in condition:
+        return _evaluate_single_clause(condition, context)
+
+    return True
+
+
+# ============================================================================
+# EFFECT APPLIER
+# ============================================================================
+
+def apply_effect(effect: dict, current_status: str) -> str:
+    """Apply a dependency effect to produce a new status.
+
+    Effect schema:
+      {"set_status": "not_applicable"}
+      {"set_status": "compensated_fail"}
+    """
+    new_status = effect.get("set_status")
+    if new_status and new_status in {s.value for s in EvaluationStatus}:
+        return new_status
+    return current_status
+
+
+# ============================================================================
+# CYCLE DETECTION
+# ============================================================================
+
+WHITE, GRAY, BLACK = 0, 1, 2
+
+
+def detect_cycles(dependencies: list[Dependency]) -> list[list[str]]:
+    """Detect cycles in the dependency graph using DFS.
+
+    Returns list of cycles (each cycle = list of control IDs).
+    Empty list = no cycles.
+    """
+    graph: dict[str, list[str]] = defaultdict(list)
+    all_nodes: set[str] = set()
+
+    for dep in dependencies:
+        if dep.is_active:
+            graph[dep.source_control_id].append(dep.target_control_id)
+            all_nodes.add(dep.source_control_id)
+            all_nodes.add(dep.target_control_id)
+
+    color: dict[str, int] = {n: WHITE for n in all_nodes}
+    parent: dict[str, Optional[str]] = {n: None for n in all_nodes}
+    cycles: list[list[str]] = []
+
+    def dfs(node: str) -> None:
+        color[node] = GRAY
+        for neighbor in graph.get(node, []):
+            if color.get(neighbor, WHITE) == GRAY:
+                # Found a cycle — trace back
+                cycle = [neighbor, node]
+                current = parent.get(node)
+                while current and current != neighbor:
+                    cycle.append(current)
+                    current = parent.get(current)
+                cycles.append(cycle)
+            elif color.get(neighbor, WHITE) == WHITE:
+                parent[neighbor] = node
+                dfs(neighbor)
+        color[node] = BLACK
+
+    for node in all_nodes:
+        if color[node] == WHITE:
+            dfs(node)
+
+    return cycles
+
+
+def topological_sort(dependencies: list[Dependency]) -> list[str]:
+    """Return control IDs in dependency-safe evaluation order.
+
+    Sources (prerequisites) come before targets (dependents).
+    Controls not involved in any dependency are omitted.
+    """
+    graph: dict[str, list[str]] = defaultdict(list)
+    in_degree: dict[str, int] = defaultdict(int)
+    all_nodes: set[str] = set()
+
+    for dep in dependencies:
+        if dep.is_active:
+            # source -> target means: source should be evaluated first
+            graph[dep.source_control_id].append(dep.target_control_id)
+            in_degree.setdefault(dep.source_control_id, 0)
+            in_degree[dep.target_control_id] = in_degree.get(dep.target_control_id, 0) + 1
+            all_nodes.add(dep.source_control_id)
+            all_nodes.add(dep.target_control_id)
+
+    # Kahn's algorithm
+    queue = [n for n in all_nodes if in_degree.get(n, 0) == 0]
+    result: list[str] = []
+
+    while queue:
+        queue.sort()  # deterministic order
+        node = queue.pop(0)
+        result.append(node)
+        for neighbor in graph.get(node, []):
+            in_degree[neighbor] -= 1
+            if in_degree[neighbor] == 0:
+                queue.append(neighbor)
+
+    return result
+
+
+# ============================================================================
+# MAIN EVALUATION ENGINE
+# ============================================================================
+
+def evaluate_controls(
+    control_states: dict[str, ControlState],
+    dependencies: list[Dependency],
+    context: dict,
+) -> dict[str, EvaluationResult]:
+    """Evaluate all controls considering dependencies.
+
+    Args:
+        control_states: control_id -> ControlState (with raw_status)
+        dependencies: all active dependencies
+        context: company profile (industry, company_size, scope_signals, etc.)
+
+    Returns:
+        control_id -> EvaluationResult (with resolved_status + trace)
+
+    Algorithm:
+      1. Build adjacency (target -> dependencies)
+      2. Detect cycles -> involved controls = review_required
+      3. Topological sort for evaluation order
+      4. For each control: evaluate conditions, apply highest-priority effect
+      5. Record full dependency trace for MCP output
+    """
+    evaluation_run_id = str(uuid.uuid4())
+
+    # 1. Build adjacency: target_control_id -> list of dependencies
+    target_deps: dict[str, list[Dependency]] = defaultdict(list)
+    for dep in dependencies:
+        if dep.is_active:
+            target_deps[dep.target_control_id].append(dep)
+
+    # 2. Cycle detection
+    cycles = detect_cycles(dependencies)
+    cycle_controls: set[str] = set()
+    for cycle in cycles:
+        cycle_controls.update(cycle)
+
+    # 3. Topological sort (excluding cycle controls)
+    safe_deps = [
+        d for d in dependencies
+        if d.is_active
+        and d.source_control_id not in cycle_controls
+        and d.target_control_id not in cycle_controls
+    ]
+    eval_order = topological_sort(safe_deps)
+
+    # Add remaining controls (those not in any dependency + cycle controls)
+    all_ids = set(control_states.keys())
+    remaining = all_ids - set(eval_order)
+    eval_order.extend(sorted(remaining))
+
+    # 4. Iterate and evaluate
+    results: dict[str, EvaluationResult] = {}
+
+    for control_id in eval_order:
+        state = control_states.get(control_id)
+        if not state:
+            continue
+
+        # Cycle controls -> review_required
+        if control_id in cycle_controls:
+            results[control_id] = EvaluationResult(
+                control_id=control_id,
+                evaluation_run_id=evaluation_run_id,
+                raw_status=state.raw_status,
+                resolved_status=EvaluationStatus.REVIEW_REQUIRED.value,
+                dependency_resolution=[{"cycle_detected": True}],
+                confidence=0.5,
+                reasoning="Zyklische Abhaengigkeit erkannt — manuelle Pruefung erforderlich.",
+            )
+            continue
+
+        # Collect dependencies targeting this control
+        deps_for_target = target_deps.get(control_id, [])
+        if not deps_for_target:
+            results[control_id] = EvaluationResult(
+                control_id=control_id,
+                evaluation_run_id=evaluation_run_id,
+                raw_status=state.raw_status,
+                resolved_status=state.raw_status,
+                confidence=1.0,
+            )
+            continue
+
+        # Evaluate each dependency's condition
+        matching_effects: list[tuple[int, dict, Dependency]] = []
+        trace: list[dict] = []
+
+        for dep in sorted(deps_for_target, key=lambda d: d.priority):
+            source_state = control_states.get(dep.source_control_id)
+            source_result = results.get(dep.source_control_id)
+
+            source_status = "unknown"
+            if source_result:
+                source_status = source_result.resolved_status
+            elif source_state:
+                source_status = source_state.raw_status
+
+            eval_ctx = {
+                "source": {"status": source_status},
+                "target": {"status": state.raw_status},
+                "context": context,
+            }
+
+            condition_met = evaluate_condition(dep.condition, eval_ctx)
+
+            trace_entry = {
+                "dependency_id": dep.id,
+                "dependency_type": dep.dependency_type,
+                "source_control_id": dep.source_control_id,
+                "source_status": source_status,
+                "condition_met": condition_met,
+                "effect_applied": dep.effect if condition_met else None,
+                "priority": dep.priority,
+            }
+            trace.append(trace_entry)
+
+            if condition_met:
+                matching_effects.append((dep.priority, dep.effect, dep))
+
+        # Apply highest-priority (lowest number) effect
+        resolved = state.raw_status
+        if matching_effects:
+            matching_effects.sort(key=lambda x: x[0])
+            _, best_effect, _ = matching_effects[0]
+            resolved = apply_effect(best_effect, state.raw_status)
+
+        results[control_id] = EvaluationResult(
+            control_id=control_id,
+            evaluation_run_id=evaluation_run_id,
+            raw_status=state.raw_status,
+            resolved_status=resolved,
+            dependency_resolution=trace,
+            confidence=_compute_confidence(trace),
+        )
+
+    return results
+
+
+def _compute_confidence(trace: list[dict]) -> float:
+    """Compute confidence based on dependency resolution trace."""
+    if not trace:
+        return 1.0
+
+    met_count = sum(1 for t in trace if t.get("condition_met"))
+    total = len(trace)
+
+    if met_count == 0:
+        return 1.0  # No dependencies fired -> raw status stands
+    if met_count == 1:
+        return 0.95  # Single dependency resolved
+    # Multiple dependencies -> slightly lower confidence
+    return max(0.7, 1.0 - (met_count - 1) * 0.1)
+
+
+# ============================================================================
+# DB INTERACTION (separate from pure logic)
+# ============================================================================
+
+def load_dependencies_for_controls(
+    db, control_ids: list[str],
+) -> list[Dependency]:
+    """Load all active dependencies involving the given control IDs."""
+    if not control_ids:
+        return []
+
+    rows = db.execute(
+        text("""
+            SELECT id, source_control_id, target_control_id,
+                   dependency_type, condition, effect, priority,
+                   generation_method, is_active
+            FROM control_dependencies
+            WHERE is_active = TRUE
+              AND (source_control_id = ANY(CAST(:ids AS uuid[]))
+                   OR target_control_id = ANY(CAST(:ids AS uuid[])))
+        """),
+        {"ids": control_ids},
+    ).fetchall()
+
+    return [
+        Dependency(
+            id=str(r[0]),
+            source_control_id=str(r[1]),
+            target_control_id=str(r[2]),
+            dependency_type=r[3],
+            condition=r[4] if isinstance(r[4], dict) else {},
+            effect=r[5] if isinstance(r[5], dict) else {},
+            priority=r[6],
+            generation_method=r[7],
+            is_active=r[8],
+        )
+        for r in rows
+    ]
+
+
+def load_all_active_dependencies(db) -> list[Dependency]:
+    """Load all active dependencies."""
+    rows = db.execute(
+        text("""
+            SELECT id, source_control_id, target_control_id,
+                   dependency_type, condition, effect, priority,
+                   generation_method, is_active
+            FROM control_dependencies
+            WHERE is_active = TRUE
+            ORDER BY priority
+        """),
+    ).fetchall()
+
+    return [
+        Dependency(
+            id=str(r[0]),
+            source_control_id=str(r[1]),
+            target_control_id=str(r[2]),
+            dependency_type=r[3],
+            condition=r[4] if isinstance(r[4], dict) else {},
+            effect=r[5] if isinstance(r[5], dict) else {},
+            priority=r[6],
+            generation_method=r[7],
+            is_active=r[8],
+        )
+        for r in rows
+    ]
+
+
+def store_dependency(db, dep: Dependency) -> str:
+    """Insert a dependency, return its UUID."""
+    row = db.execute(
+        text("""
+            INSERT INTO control_dependencies
+                (source_control_id, target_control_id, dependency_type,
+                 condition, effect, priority, generation_method, generation_metadata)
+            VALUES
+                (CAST(:src AS uuid), CAST(:tgt AS uuid), :dtype,
+                 CAST(:cond AS jsonb), CAST(:eff AS jsonb), :prio, :gmethod, CAST(:gmeta AS jsonb))
+            ON CONFLICT (source_control_id, target_control_id, dependency_type)
+            DO UPDATE SET
+                condition = EXCLUDED.condition,
+                effect = EXCLUDED.effect,
+                priority = EXCLUDED.priority,
+                updated_at = NOW()
+            RETURNING id::text
+        """),
+        {
+            "src": dep.source_control_id,
+            "tgt": dep.target_control_id,
+            "dtype": dep.dependency_type,
+            "cond": json.dumps(dep.condition),
+            "eff": json.dumps(dep.effect),
+            "prio": dep.priority,
+            "gmethod": dep.generation_method,
+            "gmeta": json.dumps({}),
+        },
+    ).fetchone()
+
+    return row[0] if row else ""
+
+
+def store_evaluation_results(
+    db, results: dict[str, EvaluationResult], company_profile: dict,
+) -> int:
+    """Batch insert evaluation results. Returns row count."""
+    count = 0
+    for result in results.values():
+        db.execute(
+            text("""
+                INSERT INTO control_evaluation_results
+                    (control_id, evaluation_run_id, company_profile,
+                     raw_status, resolved_status, dependency_resolution,
+                     confidence, reasoning)
+                VALUES
+                    (CAST(:cid AS uuid), CAST(:rid AS uuid), CAST(:prof AS jsonb),
+                     :raw, :resolved, CAST(:trace AS jsonb),
+                     :conf, :reason)
+                ON CONFLICT (control_id, evaluation_run_id)
+                DO UPDATE SET
+                    resolved_status = EXCLUDED.resolved_status,
+                    dependency_resolution = EXCLUDED.dependency_resolution,
+                    confidence = EXCLUDED.confidence
+            """),
+            {
+                "cid": result.control_id,
+                "rid": result.evaluation_run_id,
+                "prof": json.dumps(company_profile),
+                "raw": result.raw_status,
+                "resolved": result.resolved_status,
+                "trace": json.dumps(result.dependency_resolution),
+                "conf": result.confidence,
+                "reason": result.reasoning,
+            },
+        )
+        count += 1
+
+    return count
--- a/control-pipeline/services/dependency_generator.py
+++ b/control-pipeline/services/dependency_generator.py
@@ -0,0 +1,381 @@
+"""
+Dependency Generator — automatic discovery of control dependencies.
+
+Three strategies:
+  1. Ontology-based: same normalized_object + phase sequence -> prerequisite
+  2. Pattern-based: known patterns (define->implement, implement->monitor, etc.)
+  3. Domain packs: YAML-defined rules for specific regulatory domains
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+from collections import defaultdict
+from typing import Optional
+
+import yaml
+
+from services.dependency_engine import Dependency, DEFAULT_PRIORITIES
+
+logger = logging.getLogger(__name__)
+
+
+# ============================================================================
+# PHASE ORDERING (imported from ontology)
+# ============================================================================
+
+from services.control_ontology import PHASE_ORDER
+
+
+# ============================================================================
+# PATTERN RULES
+# ============================================================================
+
+PATTERN_RULES: list[dict] = [
+    {
+        "name": "define_before_implement",
+        "source_filter": {"action_type": "define"},
+        "target_filter": {"action_type": "implement"},
+        "match_on": "normalized_object",
+        "dependency_type": "prerequisite",
+        "condition": {},
+        "effect": {"set_status": "review_required"},
+        "priority": 50,
+    },
+    {
+        "name": "implement_before_monitor",
+        "source_filter": {"action_type_in": ["implement", "configure", "enforce"]},
+        "target_filter": {"action_type_in": ["monitor", "review", "test"]},
+        "match_on": "normalized_object",
+        "dependency_type": "prerequisite",
+        "condition": {},
+        "effect": {"set_status": "review_required"},
+        "priority": 50,
+    },
+    {
+        "name": "define_before_enforce",
+        "source_filter": {"action_type": "define"},
+        "target_filter": {"action_type": "enforce"},
+        "match_on": "normalized_object",
+        "dependency_type": "prerequisite",
+        "condition": {},
+        "effect": {"set_status": "review_required"},
+        "priority": 50,
+    },
+    {
+        "name": "implement_before_validate",
+        "source_filter": {"action_type_in": ["implement", "configure"]},
+        "target_filter": {"action_type_in": ["validate", "verify"]},
+        "match_on": "normalized_object",
+        "dependency_type": "prerequisite",
+        "condition": {},
+        "effect": {"set_status": "review_required"},
+        "priority": 50,
+    },
+    {
+        "name": "train_before_review",
+        "source_filter": {"action_type": "train"},
+        "target_filter": {"action_type_in": ["review", "assess"]},
+        "match_on": "normalized_object",
+        "dependency_type": "prerequisite",
+        "condition": {},
+        "effect": {"set_status": "review_required"},
+        "priority": 60,
+    },
+]
+
+
+# ============================================================================
+# HELPER: Parse merge_key into components
+# ============================================================================
+
+def _parse_merge_key(merge_key: str) -> dict:
+    """Parse 'action_type:normalized_object:phase[:asset_scope]' into components."""
+    parts = merge_key.split(":")
+    result = {
+        "action_type": parts[0] if len(parts) > 0 else "",
+        "normalized_object": parts[1] if len(parts) > 1 else "",
+        "phase": parts[2] if len(parts) > 2 else "",
+        "asset_scope": parts[3] if len(parts) > 3 else "",
+    }
+    return result
+
+
+def _get_control_merge_key(control: dict) -> str:
+    """Extract merge_key from a control dict (from generation_metadata or top-level)."""
+    mk = control.get("merge_key", "")
+    if not mk:
+        meta = control.get("generation_metadata", {})
+        if isinstance(meta, str):
+            try:
+                import json
+                meta = json.loads(meta)
+            except (ValueError, TypeError):
+                meta = {}
+        mk = meta.get("merge_group_hint", "")
+    return mk
+
+
+# ============================================================================
+# ONTOLOGY-BASED GENERATOR
+# ============================================================================
+
+def generate_ontology_dependencies(controls: list[dict]) -> list[Dependency]:
+    """Generate prerequisite dependencies from lifecycle phase ordering.
+
+    Rule: If two controls share the same normalized_object and control A's
+    phase precedes control B's phase, then A is a prerequisite for B.
+
+    Groups by normalized_object first (O(n) grouping, O(k^2) per group
+    where k is typically 2-8).
+    """
+    # Group controls by normalized_object
+    groups: dict[str, list[dict]] = defaultdict(list)
+
+    for ctrl in controls:
+        mk = _get_control_merge_key(ctrl)
+        if not mk:
+            continue
+        parsed = _parse_merge_key(mk)
+        obj = parsed["normalized_object"]
+        if obj:
+            ctrl["_parsed_mk"] = parsed
+            ctrl["_phase_order"] = PHASE_ORDER.get(parsed["phase"], 6)
+            groups[obj].append(ctrl)
+
+    dependencies: list[Dependency] = []
+
+    for obj, group in groups.items():
+        if len(group) < 2:
+            continue
+
+        # Sort by phase order
+        group.sort(key=lambda c: c["_phase_order"])
+
+        # Create prerequisite edges between adjacent phases
+        for i in range(len(group)):
+            for j in range(i + 1, len(group)):
+                a = group[i]
+                b = group[j]
+                if a["_phase_order"] < b["_phase_order"]:
+                    dep = Dependency(
+                        source_control_id=a.get("id", a.get("control_id", "")),
+                        target_control_id=b.get("id", b.get("control_id", "")),
+                        dependency_type="prerequisite",
+                        condition={},
+                        effect={"set_status": "review_required"},
+                        priority=DEFAULT_PRIORITIES["prerequisite"],
+                        generation_method="ontology",
+                    )
+                    dependencies.append(dep)
+
+    return dependencies
+
+
+# ============================================================================
+# PATTERN-BASED GENERATOR
+# ============================================================================
+
+def _matches_filter(control: dict, filter_: dict) -> bool:
+    """Check if a control matches a pattern filter."""
+    parsed = control.get("_parsed_mk", {})
+    action = parsed.get("action_type", "")
+
+    if "action_type" in filter_:
+        if action != filter_["action_type"]:
+            return False
+
+    if "action_type_in" in filter_:
+        if action not in filter_["action_type_in"]:
+            return False
+
+    return True
+
+
+def generate_pattern_dependencies(
+    controls: list[dict],
+    rules: Optional[list[dict]] = None,
+) -> list[Dependency]:
+    """Apply pattern rules to generate dependencies between controls."""
+    if rules is None:
+        rules = PATTERN_RULES
+
+    # Pre-parse merge keys
+    for ctrl in controls:
+        if "_parsed_mk" not in ctrl:
+            mk = _get_control_merge_key(ctrl)
+            if mk:
+                ctrl["_parsed_mk"] = _parse_merge_key(mk)
+            else:
+                ctrl["_parsed_mk"] = {}
+
+    dependencies: list[Dependency] = []
+
+    for rule in rules:
+        sources = [c for c in controls if _matches_filter(c, rule["source_filter"])]
+        targets = [c for c in controls if _matches_filter(c, rule["target_filter"])]
+
+        match_on = rule.get("match_on")
+
+        for src in sources:
+            for tgt in targets:
+                src_id = src.get("id", src.get("control_id", ""))
+                tgt_id = tgt.get("id", tgt.get("control_id", ""))
+
+                if src_id == tgt_id:
+                    continue
+
+                if match_on == "normalized_object":
+                    src_obj = src.get("_parsed_mk", {}).get("normalized_object", "")
+                    tgt_obj = tgt.get("_parsed_mk", {}).get("normalized_object", "")
+                    if not src_obj or src_obj != tgt_obj:
+                        continue
+
+                dep = Dependency(
+                    source_control_id=src_id,
+                    target_control_id=tgt_id,
+                    dependency_type=rule["dependency_type"],
+                    condition=rule.get("condition", {}),
+                    effect=rule.get("effect", {"set_status": "review_required"}),
+                    priority=rule.get("priority", 100),
+                    generation_method="pattern",
+                )
+                dependencies.append(dep)
+
+    return dependencies
+
+
+# ============================================================================
+# DOMAIN PACK GENERATOR
+# ============================================================================
+
+def load_domain_pack(path: str) -> dict:
+    """Load a YAML domain pack."""
+    with open(path, "r", encoding="utf-8") as f:
+        return yaml.safe_load(f) or {}
+
+
+def _title_matches(title: str, patterns: list[str]) -> bool:
+    """Check if a title contains any of the given patterns (case-insensitive)."""
+    title_lower = title.lower()
+    return any(p.lower() in title_lower for p in patterns)
+
+
+def generate_domain_dependencies(
+    controls: list[dict],
+    domain_pack_dir: str = "",
+) -> list[Dependency]:
+    """Apply all domain packs to generate domain-specific dependencies."""
+    if not domain_pack_dir:
+        domain_pack_dir = os.path.join(
+            os.path.dirname(os.path.dirname(__file__)), "data", "domain_packs"
+        )
+
+    if not os.path.isdir(domain_pack_dir):
+        return []
+
+    dependencies: list[Dependency] = []
+
+    for filename in sorted(os.listdir(domain_pack_dir)):
+        if not filename.endswith((".yaml", ".yml")):
+            continue
+
+        pack = load_domain_pack(os.path.join(domain_pack_dir, filename))
+        rules = pack.get("rules", [])
+
+        for rule in rules:
+            src_match = rule.get("source_match", {})
+            tgt_match = rule.get("target_match", {})
+
+            src_title_patterns = src_match.get("title_contains", [])
+            tgt_title_patterns = tgt_match.get("title_contains", [])
+
+            sources = [
+                c for c in controls
+                if src_title_patterns and _title_matches(c.get("title", ""), src_title_patterns)
+            ]
+            targets = [
+                c for c in controls
+                if tgt_title_patterns and _title_matches(c.get("title", ""), tgt_title_patterns)
+            ]
+
+            for src in sources:
+                for tgt in targets:
+                    src_id = src.get("id", src.get("control_id", ""))
+                    tgt_id = tgt.get("id", tgt.get("control_id", ""))
+                    if src_id == tgt_id:
+                        continue
+
+                    dep = Dependency(
+                        source_control_id=src_id,
+                        target_control_id=tgt_id,
+                        dependency_type=rule.get("dependency_type", "prerequisite"),
+                        condition=rule.get("condition", {
+                            "field": "source.status", "op": "==", "value": "pass",
+                        }),
+                        effect=rule.get("effect", {"set_status": "not_applicable"}),
+                        priority=rule.get("priority", DEFAULT_PRIORITIES.get(
+                            rule.get("dependency_type", "prerequisite"), 100
+                        )),
+                        generation_method="domain_pack",
+                    )
+                    dependencies.append(dep)
+
+    return dependencies
+
+
+# ============================================================================
+# TOP-LEVEL GENERATOR
+# ============================================================================
+
+def generate_all_dependencies(
+    controls: list[dict],
+    enable_ontology: bool = True,
+    enable_patterns: bool = True,
+    enable_domain_packs: bool = True,
+    domain_pack_dir: str = "",
+) -> tuple[list[Dependency], dict]:
+    """Run all generators and return deduplicated dependencies + stats."""
+    stats = {
+        "ontology_generated": 0,
+        "pattern_generated": 0,
+        "domain_generated": 0,
+        "total_before_dedup": 0,
+        "total_unique": 0,
+        "duplicates_removed": 0,
+    }
+
+    all_deps: list[Dependency] = []
+
+    if enable_ontology:
+        onto_deps = generate_ontology_dependencies(controls)
+        stats["ontology_generated"] = len(onto_deps)
+        all_deps.extend(onto_deps)
+
+    if enable_patterns:
+        pat_deps = generate_pattern_dependencies(controls)
+        stats["pattern_generated"] = len(pat_deps)
+        all_deps.extend(pat_deps)
+
+    if enable_domain_packs:
+        dom_deps = generate_domain_dependencies(controls, domain_pack_dir)
+        stats["domain_generated"] = len(dom_deps)
+        all_deps.extend(dom_deps)
+
+    stats["total_before_dedup"] = len(all_deps)
+
+    # Deduplicate by (source, target, type)
+    seen: set[tuple[str, str, str]] = set()
+    unique: list[Dependency] = []
+    for dep in all_deps:
+        key = (dep.source_control_id, dep.target_control_id, dep.dependency_type)
+        if key not in seen:
+            seen.add(key)
+            unique.append(dep)
+
+    stats["total_unique"] = len(unique)
+    stats["duplicates_removed"] = stats["total_before_dedup"] - stats["total_unique"]
+
+    return unique, stats