feat(pipeline): Pass 0b prompt v4 + Haiku backfill endpoint

Prompt v4 adds 6 new fields to Pass 0b output: - applicability: condition rules (same format as dependency engine) - check_type: expanded to 10 granular types - scanner_hint: search_terms + negative_indicators for MCP - manual_review_required_if: escalation conditions - evidence_type: code/process/hybrid - provides_context: context variables this control creates New endpoint POST /generate/backfill-extended: - Backfills existing 9k controls via Haiku Batch API (~$1.50) - Adds all 6 new fields to generation_metadata - Supports dry_run mode Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-26 23:14:59 +02:00
parent 96b8f25747
commit 5ef039a6bc
2 changed files with 285 additions and 4 deletions
--- a/control-pipeline/services/decomposition_pass.py
+++ b/control-pipeline/services/decomposition_pass.py
@@ -223,6 +223,12 @@ class AtomicControlCandidate:
    # Dependency Engine Felder
    dependency_hints: list = field(default_factory=list)
    lifecycle_phase_order: int = 0
+    # Erweiterte Felder (v4)
+    applicability: dict = field(default_factory=dict)
+    scanner_hint: dict = field(default_factory=dict)
+    manual_review_required_if: list = field(default_factory=list)
+    evidence_type: str = ""
+    provides_context: list = field(default_factory=list)

    def to_dict(self) -> dict:
        return {
@@ -243,6 +249,11 @@ class AtomicControlCandidate:
            "check_type": self.check_type,
            "dependency_hints": self.dependency_hints,
            "lifecycle_phase_order": self.lifecycle_phase_order,
+            "applicability": self.applicability,
+            "scanner_hint": self.scanner_hint,
+            "manual_review_required_if": self.manual_review_required_if,
+            "evidence_type": self.evidence_type,
+            "provides_context": self.provides_context,
        }


@@ -457,6 +468,23 @@ WICHTIGE REGELN:
   - "test:authentication_mechanism:testing"
   - "report:supervisory_authority:reporting"

+8. APPLICABILITY + SCANNER: Bestimme fuer jedes Control:
+   - applicability: Unter welchen Bedingungen gilt dieses Control?
+     Wenn universell anwendbar: leeres Objekt {}
+     Sonst: {"field": "context.SIGNAL", "op": "==", "value": true}
+     Zusammengesetzt: {"operator": "AND", "clauses": [{...}, {...}]}
+     Typische Felder: context.uses_oauth, context.has_public_api,
+     context.processes_personal_data, context.uses_ai_system,
+     context.has_employees, context.sells_online, context.uses_encryption,
+     context.has_third_party_components, context.is_critical_infrastructure
+   - check_type: Praeziser Prueftyp (EINEN der 10 Werte waehlen)
+   - scanner_hint: Technische Suchbegriffe fuer automatisierte Pruefung
+     + negative_indicators die auf Nicht-Einhaltung hindeuten
+   - manual_review_required_if: Wann manuelle Pruefung statt Scanner noetig
+   - evidence_type: code (technisch pruefbar), process (organisatorisch), hybrid
+   - provides_context: Welche Context-Variablen erzeugt dieses Control bei Pruefung?
+     Beispiel: Ein Control "OAuth-Clients klassifizieren" liefert context.oauth_client_types
+
 Das Control muss UMSETZBAR sein — keine Gesetzesparaphrase.
 Antworte NUR als JSON. Keine Erklaerungen."""

@@ -2137,10 +2165,15 @@ Antworte als JSON:
  "fail_criteria": ["Wann gilt dieses Control als nicht erfuellt?"],
  "severity": "critical|high|medium|low",
  "category": "security|privacy|governance|operations|finance|reporting",
-  "check_type": "technical_config_check|document_clause_check|code_pattern_check|evidence_check|interview_required",
+  "check_type": "technical_config_check|code_pattern_check|runtime_security_test|document_policy_check|document_classification_check|document_contract_check|evidence_artifact_check|process_verification|training_verification|interview_assessment",
  "merge_key": "action_type:normalized_object:control_phase",
  "dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
-  "lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)"
+  "lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)",
+  "applicability": {{}},
+  "scanner_hint": {{"search_terms": ["technischer Suchbegriff"], "negative_indicators": ["Negativindikator"]}},
+  "manual_review_required_if": ["Bedingung fuer manuelle Pruefung"],
+  "evidence_type": "code|process|hybrid",
+  "provides_context": ["context.VARIABLE die dieses Control bei Pruefung erzeugt"]
 }}"""


@@ -2235,10 +2268,15 @@ Jedes Control hat dieses Format:
  "fail_criteria": ["Wann gilt dieses Control als nicht erfuellt?"],
  "severity": "critical|high|medium|low",
  "category": "security|privacy|governance|operations|finance|reporting",
-  "check_type": "technical_config_check|document_clause_check|code_pattern_check|evidence_check|interview_required",
+  "check_type": "technical_config_check|code_pattern_check|runtime_security_test|document_policy_check|document_classification_check|document_contract_check|evidence_artifact_check|process_verification|training_verification|interview_assessment",
  "merge_key": "action_type:normalized_object:control_phase",
  "dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
-  "lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)"
+  "lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)",
+  "applicability": {{}},
+  "scanner_hint": {{"search_terms": ["technischer Suchbegriff"], "negative_indicators": ["Negativindikator"]}},
+  "manual_review_required_if": ["Bedingung fuer manuelle Pruefung"],
+  "evidence_type": "code|process|hybrid",
+  "provides_context": ["context.VARIABLE die dieses Control bei Pruefung erzeugt"]
 }}"""


@@ -2982,6 +3020,11 @@ class DecompositionPass:
                check_type=parsed.get("check_type", ""),
                dependency_hints=_ensure_list(parsed.get("dependency_hints", [])),
                lifecycle_phase_order=int(parsed.get("lifecycle_phase_order", 0) or 0),
+                applicability=parsed.get("applicability") or {},
+                scanner_hint=parsed.get("scanner_hint") or {},
+                manual_review_required_if=_ensure_list(parsed.get("manual_review_required_if", [])),
+                evidence_type=parsed.get("evidence_type", ""),
+                provides_context=_ensure_list(parsed.get("provides_context", [])),
            )
            # Store merge_key from LLM output in metadata
            llm_merge_key = parsed.get("merge_key", "")
@@ -3458,6 +3501,12 @@ class DecompositionPass:
                    # Dependency Engine Felder
                    "dependency_hints": atomic.dependency_hints or [],
                    "lifecycle_phase_order": atomic.lifecycle_phase_order or 0,
+                    # Erweiterte Felder (v4)
+                    "applicability": atomic.applicability or {},
+                    "scanner_hint": atomic.scanner_hint or {},
+                    "manual_review_required_if": atomic.manual_review_required_if or [],
+                    "evidence_type": atomic.evidence_type or "",
+                    "provides_context": atomic.provides_context or [],
                }),
                "framework_id": "14b1bdd2-abc7-4a43-adae-14471ee5c7cf",
            },