feat(multi-layer): complete Multi-Layer Control Architecture (Phases 1-8 + Pass 0)
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 47s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 24s
CI/CD / test-python-dsms-gateway (push) Successful in 18s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped

Implements the full Multi-Layer Control Architecture for migrating ~25,000
Rich Controls into atomic, deduplicated Master Controls with full traceability.

Architecture: Legal Source → Obligation → Control Pattern → Master Control → Customer Instance

New services:
- ObligationExtractor: 3-tier extraction (exact → embedding → LLM)
- PatternMatcher: 2-tier matching (keyword + embedding + domain-bonus)
- ControlComposer: Pattern + Obligation → Master Control
- PipelineAdapter: Pipeline integration + Migration Passes 1-5
- DecompositionPass: Pass 0a/0b — Rich Control → atomic Controls
- CrosswalkRoutes: 15 API endpoints under /v1/canonical/

New DB schema:
- Migration 060: obligation_extractions, control_patterns, crosswalk_matrix
- Migration 061: obligation_candidates, parent_control_uuid tracking

Pattern Library: 50 YAML patterns (30 core + 20 IT-security)
Go SDK: Pattern loader with YAML validation and indexing
Documentation: MkDocs updated with full architecture overview

500 Python tests passing across all components.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-17 09:00:37 +01:00
parent 4f6bc8f6f6
commit 825e070ed9
23 changed files with 13553 additions and 0 deletions

View File

@@ -0,0 +1,260 @@
package ucca
import (
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"gopkg.in/yaml.v3"
)
// ControlPattern represents a reusable control pattern template.
// Pattern ID format: CP-{DOMAIN}-{NNN} (e.g. CP-AUTH-001).
type ControlPattern struct {
ID string `yaml:"id" json:"id"`
Name string `yaml:"name" json:"name"`
NameDE string `yaml:"name_de" json:"name_de"`
Domain string `yaml:"domain" json:"domain"`
Category string `yaml:"category" json:"category"`
Description string `yaml:"description" json:"description"`
ObjectiveTemplate string `yaml:"objective_template" json:"objective_template"`
RationaleTemplate string `yaml:"rationale_template" json:"rationale_template"`
RequirementsTemplate []string `yaml:"requirements_template" json:"requirements_template"`
TestProcedureTemplate []string `yaml:"test_procedure_template" json:"test_procedure_template"`
EvidenceTemplate []string `yaml:"evidence_template" json:"evidence_template"`
SeverityDefault string `yaml:"severity_default" json:"severity_default"`
ImplementationEffortDefault string `yaml:"implementation_effort_default,omitempty" json:"implementation_effort_default,omitempty"`
OpenAnchorRefs []AnchorRef `yaml:"open_anchor_refs,omitempty" json:"open_anchor_refs,omitempty"`
ObligationMatchKeywords []string `yaml:"obligation_match_keywords" json:"obligation_match_keywords"`
Tags []string `yaml:"tags" json:"tags"`
ComposableWith []string `yaml:"composable_with,omitempty" json:"composable_with,omitempty"`
}
// AnchorRef links a pattern to an open-source framework reference.
type AnchorRef struct {
Framework string `yaml:"framework" json:"framework"`
Ref string `yaml:"ref" json:"ref"`
}
// patternFile is the top-level YAML structure.
type patternFile struct {
Version string `yaml:"version"`
Description string `yaml:"description"`
Patterns []ControlPattern `yaml:"patterns"`
}
// ControlPatternIndex provides fast lookup of control patterns.
type ControlPatternIndex struct {
ByID map[string]*ControlPattern
ByDomain map[string][]*ControlPattern
ByCategory map[string][]*ControlPattern
ByTag map[string][]*ControlPattern
ByKeyword map[string][]*ControlPattern // keyword -> patterns (for obligation matching)
All []*ControlPattern
}
// LoadControlPatterns loads all YAML pattern files from the control_patterns directory.
func LoadControlPatterns() (*ControlPatternIndex, error) {
dir, err := findPatternsDir()
if err != nil {
return nil, err
}
entries, err := os.ReadDir(dir)
if err != nil {
return nil, fmt.Errorf("failed to read patterns directory: %w", err)
}
var allPatterns []ControlPattern
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
if strings.HasPrefix(name, "_") {
continue // skip schema and metadata files
}
if !strings.HasSuffix(name, ".yaml") && !strings.HasSuffix(name, ".yml") {
continue
}
data, err := os.ReadFile(filepath.Join(dir, name))
if err != nil {
return nil, fmt.Errorf("failed to read %s: %w", name, err)
}
var pf patternFile
if err := yaml.Unmarshal(data, &pf); err != nil {
return nil, fmt.Errorf("failed to parse %s: %w", name, err)
}
allPatterns = append(allPatterns, pf.Patterns...)
}
if len(allPatterns) == 0 {
return nil, fmt.Errorf("no control patterns found in %s", dir)
}
idx, err := buildPatternIndex(allPatterns)
if err != nil {
return nil, err
}
return idx, nil
}
func findPatternsDir() (string, error) {
candidates := []string{
"policies/control_patterns",
"../policies/control_patterns",
"../../policies/control_patterns",
}
_, filename, _, ok := runtime.Caller(0)
if ok {
srcDir := filepath.Dir(filename)
candidates = append(candidates,
filepath.Join(srcDir, "../../policies/control_patterns"),
)
}
for _, p := range candidates {
abs, err := filepath.Abs(p)
if err != nil {
continue
}
info, err := os.Stat(abs)
if err == nil && info.IsDir() {
return abs, nil
}
}
return "", fmt.Errorf("control_patterns directory not found in any candidate path")
}
func buildPatternIndex(patterns []ControlPattern) (*ControlPatternIndex, error) {
idx := &ControlPatternIndex{
ByID: make(map[string]*ControlPattern),
ByDomain: make(map[string][]*ControlPattern),
ByCategory: make(map[string][]*ControlPattern),
ByTag: make(map[string][]*ControlPattern),
ByKeyword: make(map[string][]*ControlPattern),
}
for i := range patterns {
p := &patterns[i]
// Validate ID uniqueness
if _, exists := idx.ByID[p.ID]; exists {
return nil, fmt.Errorf("duplicate pattern ID: %s", p.ID)
}
idx.ByID[p.ID] = p
idx.ByDomain[p.Domain] = append(idx.ByDomain[p.Domain], p)
idx.ByCategory[p.Category] = append(idx.ByCategory[p.Category], p)
idx.All = append(idx.All, p)
for _, tag := range p.Tags {
idx.ByTag[tag] = append(idx.ByTag[tag], p)
}
for _, kw := range p.ObligationMatchKeywords {
lower := strings.ToLower(kw)
idx.ByKeyword[lower] = append(idx.ByKeyword[lower], p)
}
}
return idx, nil
}
// GetPattern returns a pattern by its ID (e.g. "CP-AUTH-001").
func (idx *ControlPatternIndex) GetPattern(id string) (*ControlPattern, bool) {
p, ok := idx.ByID[strings.ToUpper(id)]
return p, ok
}
// GetPatternsByDomain returns all patterns for a domain (e.g. "AUTH").
func (idx *ControlPatternIndex) GetPatternsByDomain(domain string) []*ControlPattern {
return idx.ByDomain[strings.ToUpper(domain)]
}
// GetPatternsByCategory returns all patterns for a category (e.g. "authentication").
func (idx *ControlPatternIndex) GetPatternsByCategory(category string) []*ControlPattern {
return idx.ByCategory[strings.ToLower(category)]
}
// GetPatternsByTag returns all patterns with a given tag.
func (idx *ControlPatternIndex) GetPatternsByTag(tag string) []*ControlPattern {
return idx.ByTag[strings.ToLower(tag)]
}
// MatchByKeywords returns patterns whose obligation_match_keywords overlap with
// the given text. Returns matches sorted by score (number of keyword hits) descending.
func (idx *ControlPatternIndex) MatchByKeywords(text string) []PatternMatch {
textLower := strings.ToLower(text)
scores := make(map[string]int)
for kw, patterns := range idx.ByKeyword {
if strings.Contains(textLower, kw) {
for _, p := range patterns {
scores[p.ID]++
}
}
}
if len(scores) == 0 {
return nil
}
// Collect and sort by score descending
matches := make([]PatternMatch, 0, len(scores))
for id, score := range scores {
p := idx.ByID[id]
matches = append(matches, PatternMatch{
Pattern: p,
KeywordHits: score,
TotalKeywords: len(p.ObligationMatchKeywords),
})
}
// Simple insertion sort (small N)
for i := 1; i < len(matches); i++ {
for j := i; j > 0 && matches[j].KeywordHits > matches[j-1].KeywordHits; j-- {
matches[j], matches[j-1] = matches[j-1], matches[j]
}
}
return matches
}
// PatternMatch represents a keyword-based match result.
type PatternMatch struct {
Pattern *ControlPattern
KeywordHits int
TotalKeywords int
}
// Score returns the match score as a ratio of hits to total keywords.
func (m PatternMatch) Score() float64 {
if m.TotalKeywords == 0 {
return 0
}
return float64(m.KeywordHits) / float64(m.TotalKeywords)
}
// ValidatePatternID checks if a pattern ID exists in the index.
func (idx *ControlPatternIndex) ValidatePatternID(id string) bool {
_, ok := idx.ByID[strings.ToUpper(id)]
return ok
}
// Domains returns the list of unique domains that have patterns.
func (idx *ControlPatternIndex) Domains() []string {
domains := make([]string, 0, len(idx.ByDomain))
for d := range idx.ByDomain {
domains = append(domains, d)
}
return domains
}

View File

@@ -0,0 +1,384 @@
package ucca
import (
"strings"
"testing"
)
func TestLoadControlPatterns_ValidFiles(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
if idx == nil {
t.Fatal("Expected non-nil index")
}
if len(idx.All) != 50 {
t.Errorf("Expected 50 patterns, got %d", len(idx.All))
}
}
func TestLoadControlPatterns_NoDuplicateIDs(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
seen := make(map[string]bool)
for _, p := range idx.All {
if seen[p.ID] {
t.Errorf("Duplicate pattern ID: %s", p.ID)
}
seen[p.ID] = true
}
}
func TestControlPatternIndex_GetPattern(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
tests := []struct {
name string
id string
expected bool
}{
{"existing pattern CP-AUTH-001", "CP-AUTH-001", true},
{"existing pattern CP-CRYP-001", "CP-CRYP-001", true},
{"lowercase lookup", "cp-auth-001", true},
{"non-existing pattern", "CP-FAKE-999", false},
{"empty id", "", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p, ok := idx.GetPattern(tt.id)
if ok != tt.expected {
t.Errorf("GetPattern(%q): expected found=%v, got found=%v", tt.id, tt.expected, ok)
}
if ok && p.ID == "" {
t.Error("Pattern found but has empty ID")
}
})
}
}
func TestControlPatternIndex_GetPatternsByDomain(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
tests := []struct {
domain string
minCount int
}{
{"AUTH", 3},
{"CRYP", 3},
{"DATA", 5},
{"SEC", 3},
{"COMP", 5},
{"LOG", 2},
{"INC", 3},
{"AI", 2},
}
for _, tt := range tests {
t.Run(tt.domain, func(t *testing.T) {
patterns := idx.GetPatternsByDomain(tt.domain)
if len(patterns) < tt.minCount {
t.Errorf("Domain %s: expected at least %d patterns, got %d",
tt.domain, tt.minCount, len(patterns))
}
})
}
emptyPatterns := idx.GetPatternsByDomain("NOPE")
if len(emptyPatterns) != 0 {
t.Errorf("Expected 0 patterns for unknown domain, got %d", len(emptyPatterns))
}
}
func TestControlPatternIndex_GetPatternsByCategory(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
authPatterns := idx.GetPatternsByCategory("authentication")
if len(authPatterns) < 3 {
t.Errorf("Expected at least 3 authentication patterns, got %d", len(authPatterns))
}
encPatterns := idx.GetPatternsByCategory("encryption")
if len(encPatterns) < 3 {
t.Errorf("Expected at least 3 encryption patterns, got %d", len(encPatterns))
}
}
func TestControlPatternIndex_GetPatternsByTag(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
dpPatterns := idx.GetPatternsByTag("data_protection")
if len(dpPatterns) < 3 {
t.Errorf("Expected at least 3 data_protection tagged patterns, got %d", len(dpPatterns))
}
secPatterns := idx.GetPatternsByTag("security")
if len(secPatterns) >= 1 {
// At least 1 pattern tagged with "security" — good
}
}
func TestControlPatternIndex_MatchByKeywords(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
tests := []struct {
name string
text string
expectPatternID string
}{
{
"password related text",
"Die Passwortrichtlinie muss sicherstellen, dass Anmeldedaten geschuetzt sind",
"CP-AUTH-001",
},
{
"encryption text",
"Verschluesselung ruhender Daten muss mit AES-256 erfolgen",
"CP-CRYP-001",
},
{
"incident response text",
"Ein Vorfall-Reaktionsplan muss fuer Sicherheitsvorfaelle bereitstehen",
"CP-INC-001",
},
{
"DSGVO consent text",
"Die Einwilligung der betroffenen Person muss freiwillig erfolgen",
"CP-DATA-004",
},
{
"AI risk text",
"KI-Systeme mit hohem Risiko muessen einer Konformitaetsbewertung unterzogen werden",
"CP-AI-001",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
matches := idx.MatchByKeywords(tt.text)
if len(matches) == 0 {
t.Fatalf("Expected at least 1 match for text: %s", tt.text[:50])
}
// Check if the expected pattern is in top 3 matches
found := false
for i, m := range matches {
if i >= 3 {
break
}
if m.Pattern.ID == tt.expectPatternID {
found = true
break
}
}
if !found {
topIDs := make([]string, 0, 3)
for i, m := range matches {
if i >= 3 {
break
}
topIDs = append(topIDs, m.Pattern.ID)
}
t.Errorf("Expected %s in top 3, got %v", tt.expectPatternID, topIDs)
}
})
}
}
func TestControlPatternIndex_MatchByKeywords_NoMatch(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
matches := idx.MatchByKeywords("xyzzy foobar baz completely unrelated text")
if len(matches) != 0 {
t.Errorf("Expected 0 matches for unrelated text, got %d", len(matches))
}
}
func TestPatternMatch_Score(t *testing.T) {
match := PatternMatch{
KeywordHits: 3,
TotalKeywords: 7,
}
score := match.Score()
expected := 3.0 / 7.0
if score < expected-0.01 || score > expected+0.01 {
t.Errorf("Expected score ~%.3f, got %.3f", expected, score)
}
zeroMatch := PatternMatch{
KeywordHits: 0,
TotalKeywords: 0,
}
if zeroMatch.Score() != 0 {
t.Errorf("Expected 0 score for zero keywords, got %f", zeroMatch.Score())
}
}
func TestControlPatternIndex_ValidatePatternID(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
if !idx.ValidatePatternID("CP-AUTH-001") {
t.Error("Expected CP-AUTH-001 to be valid")
}
if idx.ValidatePatternID("CP-FAKE-999") {
t.Error("Expected CP-FAKE-999 to be invalid")
}
}
func TestControlPatternIndex_Domains(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
domains := idx.Domains()
if len(domains) < 5 {
t.Errorf("Expected at least 5 domains, got %d: %v", len(domains), domains)
}
// Check critical domains are present
domainSet := make(map[string]bool)
for _, d := range domains {
domainSet[d] = true
}
for _, required := range []string{"AUTH", "CRYP", "DATA", "SEC", "COMP"} {
if !domainSet[required] {
t.Errorf("Expected domain %s to be present", required)
}
}
}
func TestControlPattern_FieldsNotEmpty(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
for _, p := range idx.All {
t.Run(p.ID, func(t *testing.T) {
if p.ID == "" {
t.Error("Empty ID")
}
if p.Name == "" {
t.Error("Empty Name")
}
if p.NameDE == "" {
t.Error("Empty NameDE")
}
if p.Domain == "" {
t.Error("Empty Domain")
}
if p.Category == "" {
t.Error("Empty Category")
}
if len(p.Description) < 20 {
t.Errorf("Description too short: %d chars", len(p.Description))
}
if len(p.ObjectiveTemplate) < 20 {
t.Errorf("ObjectiveTemplate too short: %d chars", len(p.ObjectiveTemplate))
}
if len(p.RationaleTemplate) < 20 {
t.Errorf("RationaleTemplate too short: %d chars", len(p.RationaleTemplate))
}
if len(p.RequirementsTemplate) < 2 {
t.Errorf("Not enough requirements: %d", len(p.RequirementsTemplate))
}
if len(p.TestProcedureTemplate) < 1 {
t.Errorf("Not enough test procedures: %d", len(p.TestProcedureTemplate))
}
if len(p.EvidenceTemplate) < 1 {
t.Errorf("Not enough evidence items: %d", len(p.EvidenceTemplate))
}
if len(p.ObligationMatchKeywords) < 3 {
t.Errorf("Not enough keywords: %d", len(p.ObligationMatchKeywords))
}
if len(p.Tags) < 1 {
t.Errorf("Not enough tags: %d", len(p.Tags))
}
validSeverities := map[string]bool{"low": true, "medium": true, "high": true, "critical": true}
if !validSeverities[p.SeverityDefault] {
t.Errorf("Invalid severity: %s", p.SeverityDefault)
}
})
}
}
func TestControlPattern_IDDomainConsistency(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
for _, p := range idx.All {
parts := strings.Split(p.ID, "-")
if len(parts) != 3 {
t.Errorf("Pattern %s: expected 3 parts in ID, got %d", p.ID, len(parts))
continue
}
idDomain := parts[1]
if idDomain != p.Domain {
t.Errorf("Pattern %s: ID domain '%s' != field domain '%s'", p.ID, idDomain, p.Domain)
}
}
}
func TestControlPattern_ComposableWithValid(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
for _, p := range idx.All {
for _, ref := range p.ComposableWith {
if _, ok := idx.ByID[ref]; !ok {
t.Errorf("Pattern %s: composable_with ref '%s' does not exist", p.ID, ref)
}
if ref == p.ID {
t.Errorf("Pattern %s: composable_with contains self-reference", p.ID)
}
}
}
}
func TestControlPattern_KeywordsLowercase(t *testing.T) {
idx, err := LoadControlPatterns()
if err != nil {
t.Fatalf("Failed to load patterns: %v", err)
}
for _, p := range idx.All {
for _, kw := range p.ObligationMatchKeywords {
if kw != strings.ToLower(kw) {
t.Errorf("Pattern %s: keyword should be lowercase: '%s'", p.ID, kw)
}
}
}
}