Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 47s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 24s
CI/CD / test-python-dsms-gateway (push) Successful in 18s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
Implements the full Multi-Layer Control Architecture for migrating ~25,000 Rich Controls into atomic, deduplicated Master Controls with full traceability. Architecture: Legal Source → Obligation → Control Pattern → Master Control → Customer Instance New services: - ObligationExtractor: 3-tier extraction (exact → embedding → LLM) - PatternMatcher: 2-tier matching (keyword + embedding + domain-bonus) - ControlComposer: Pattern + Obligation → Master Control - PipelineAdapter: Pipeline integration + Migration Passes 1-5 - DecompositionPass: Pass 0a/0b — Rich Control → atomic Controls - CrosswalkRoutes: 15 API endpoints under /v1/canonical/ New DB schema: - Migration 060: obligation_extractions, control_patterns, crosswalk_matrix - Migration 061: obligation_candidates, parent_control_uuid tracking Pattern Library: 50 YAML patterns (30 core + 20 IT-security) Go SDK: Pattern loader with YAML validation and indexing Documentation: MkDocs updated with full architecture overview 500 Python tests passing across all components. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
261 lines
7.9 KiB
Go
261 lines
7.9 KiB
Go
package ucca
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strings"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// ControlPattern represents a reusable control pattern template.
|
|
// Pattern ID format: CP-{DOMAIN}-{NNN} (e.g. CP-AUTH-001).
|
|
type ControlPattern struct {
|
|
ID string `yaml:"id" json:"id"`
|
|
Name string `yaml:"name" json:"name"`
|
|
NameDE string `yaml:"name_de" json:"name_de"`
|
|
Domain string `yaml:"domain" json:"domain"`
|
|
Category string `yaml:"category" json:"category"`
|
|
Description string `yaml:"description" json:"description"`
|
|
ObjectiveTemplate string `yaml:"objective_template" json:"objective_template"`
|
|
RationaleTemplate string `yaml:"rationale_template" json:"rationale_template"`
|
|
RequirementsTemplate []string `yaml:"requirements_template" json:"requirements_template"`
|
|
TestProcedureTemplate []string `yaml:"test_procedure_template" json:"test_procedure_template"`
|
|
EvidenceTemplate []string `yaml:"evidence_template" json:"evidence_template"`
|
|
SeverityDefault string `yaml:"severity_default" json:"severity_default"`
|
|
ImplementationEffortDefault string `yaml:"implementation_effort_default,omitempty" json:"implementation_effort_default,omitempty"`
|
|
OpenAnchorRefs []AnchorRef `yaml:"open_anchor_refs,omitempty" json:"open_anchor_refs,omitempty"`
|
|
ObligationMatchKeywords []string `yaml:"obligation_match_keywords" json:"obligation_match_keywords"`
|
|
Tags []string `yaml:"tags" json:"tags"`
|
|
ComposableWith []string `yaml:"composable_with,omitempty" json:"composable_with,omitempty"`
|
|
}
|
|
|
|
// AnchorRef links a pattern to an open-source framework reference.
|
|
type AnchorRef struct {
|
|
Framework string `yaml:"framework" json:"framework"`
|
|
Ref string `yaml:"ref" json:"ref"`
|
|
}
|
|
|
|
// patternFile is the top-level YAML structure.
|
|
type patternFile struct {
|
|
Version string `yaml:"version"`
|
|
Description string `yaml:"description"`
|
|
Patterns []ControlPattern `yaml:"patterns"`
|
|
}
|
|
|
|
// ControlPatternIndex provides fast lookup of control patterns.
|
|
type ControlPatternIndex struct {
|
|
ByID map[string]*ControlPattern
|
|
ByDomain map[string][]*ControlPattern
|
|
ByCategory map[string][]*ControlPattern
|
|
ByTag map[string][]*ControlPattern
|
|
ByKeyword map[string][]*ControlPattern // keyword -> patterns (for obligation matching)
|
|
All []*ControlPattern
|
|
}
|
|
|
|
// LoadControlPatterns loads all YAML pattern files from the control_patterns directory.
|
|
func LoadControlPatterns() (*ControlPatternIndex, error) {
|
|
dir, err := findPatternsDir()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
entries, err := os.ReadDir(dir)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read patterns directory: %w", err)
|
|
}
|
|
|
|
var allPatterns []ControlPattern
|
|
for _, entry := range entries {
|
|
if entry.IsDir() {
|
|
continue
|
|
}
|
|
name := entry.Name()
|
|
if strings.HasPrefix(name, "_") {
|
|
continue // skip schema and metadata files
|
|
}
|
|
if !strings.HasSuffix(name, ".yaml") && !strings.HasSuffix(name, ".yml") {
|
|
continue
|
|
}
|
|
|
|
data, err := os.ReadFile(filepath.Join(dir, name))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read %s: %w", name, err)
|
|
}
|
|
|
|
var pf patternFile
|
|
if err := yaml.Unmarshal(data, &pf); err != nil {
|
|
return nil, fmt.Errorf("failed to parse %s: %w", name, err)
|
|
}
|
|
|
|
allPatterns = append(allPatterns, pf.Patterns...)
|
|
}
|
|
|
|
if len(allPatterns) == 0 {
|
|
return nil, fmt.Errorf("no control patterns found in %s", dir)
|
|
}
|
|
|
|
idx, err := buildPatternIndex(allPatterns)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return idx, nil
|
|
}
|
|
|
|
func findPatternsDir() (string, error) {
|
|
candidates := []string{
|
|
"policies/control_patterns",
|
|
"../policies/control_patterns",
|
|
"../../policies/control_patterns",
|
|
}
|
|
|
|
_, filename, _, ok := runtime.Caller(0)
|
|
if ok {
|
|
srcDir := filepath.Dir(filename)
|
|
candidates = append(candidates,
|
|
filepath.Join(srcDir, "../../policies/control_patterns"),
|
|
)
|
|
}
|
|
|
|
for _, p := range candidates {
|
|
abs, err := filepath.Abs(p)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
info, err := os.Stat(abs)
|
|
if err == nil && info.IsDir() {
|
|
return abs, nil
|
|
}
|
|
}
|
|
|
|
return "", fmt.Errorf("control_patterns directory not found in any candidate path")
|
|
}
|
|
|
|
func buildPatternIndex(patterns []ControlPattern) (*ControlPatternIndex, error) {
|
|
idx := &ControlPatternIndex{
|
|
ByID: make(map[string]*ControlPattern),
|
|
ByDomain: make(map[string][]*ControlPattern),
|
|
ByCategory: make(map[string][]*ControlPattern),
|
|
ByTag: make(map[string][]*ControlPattern),
|
|
ByKeyword: make(map[string][]*ControlPattern),
|
|
}
|
|
|
|
for i := range patterns {
|
|
p := &patterns[i]
|
|
|
|
// Validate ID uniqueness
|
|
if _, exists := idx.ByID[p.ID]; exists {
|
|
return nil, fmt.Errorf("duplicate pattern ID: %s", p.ID)
|
|
}
|
|
|
|
idx.ByID[p.ID] = p
|
|
idx.ByDomain[p.Domain] = append(idx.ByDomain[p.Domain], p)
|
|
idx.ByCategory[p.Category] = append(idx.ByCategory[p.Category], p)
|
|
idx.All = append(idx.All, p)
|
|
|
|
for _, tag := range p.Tags {
|
|
idx.ByTag[tag] = append(idx.ByTag[tag], p)
|
|
}
|
|
|
|
for _, kw := range p.ObligationMatchKeywords {
|
|
lower := strings.ToLower(kw)
|
|
idx.ByKeyword[lower] = append(idx.ByKeyword[lower], p)
|
|
}
|
|
}
|
|
|
|
return idx, nil
|
|
}
|
|
|
|
// GetPattern returns a pattern by its ID (e.g. "CP-AUTH-001").
|
|
func (idx *ControlPatternIndex) GetPattern(id string) (*ControlPattern, bool) {
|
|
p, ok := idx.ByID[strings.ToUpper(id)]
|
|
return p, ok
|
|
}
|
|
|
|
// GetPatternsByDomain returns all patterns for a domain (e.g. "AUTH").
|
|
func (idx *ControlPatternIndex) GetPatternsByDomain(domain string) []*ControlPattern {
|
|
return idx.ByDomain[strings.ToUpper(domain)]
|
|
}
|
|
|
|
// GetPatternsByCategory returns all patterns for a category (e.g. "authentication").
|
|
func (idx *ControlPatternIndex) GetPatternsByCategory(category string) []*ControlPattern {
|
|
return idx.ByCategory[strings.ToLower(category)]
|
|
}
|
|
|
|
// GetPatternsByTag returns all patterns with a given tag.
|
|
func (idx *ControlPatternIndex) GetPatternsByTag(tag string) []*ControlPattern {
|
|
return idx.ByTag[strings.ToLower(tag)]
|
|
}
|
|
|
|
// MatchByKeywords returns patterns whose obligation_match_keywords overlap with
|
|
// the given text. Returns matches sorted by score (number of keyword hits) descending.
|
|
func (idx *ControlPatternIndex) MatchByKeywords(text string) []PatternMatch {
|
|
textLower := strings.ToLower(text)
|
|
scores := make(map[string]int)
|
|
|
|
for kw, patterns := range idx.ByKeyword {
|
|
if strings.Contains(textLower, kw) {
|
|
for _, p := range patterns {
|
|
scores[p.ID]++
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(scores) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Collect and sort by score descending
|
|
matches := make([]PatternMatch, 0, len(scores))
|
|
for id, score := range scores {
|
|
p := idx.ByID[id]
|
|
matches = append(matches, PatternMatch{
|
|
Pattern: p,
|
|
KeywordHits: score,
|
|
TotalKeywords: len(p.ObligationMatchKeywords),
|
|
})
|
|
}
|
|
|
|
// Simple insertion sort (small N)
|
|
for i := 1; i < len(matches); i++ {
|
|
for j := i; j > 0 && matches[j].KeywordHits > matches[j-1].KeywordHits; j-- {
|
|
matches[j], matches[j-1] = matches[j-1], matches[j]
|
|
}
|
|
}
|
|
|
|
return matches
|
|
}
|
|
|
|
// PatternMatch represents a keyword-based match result.
|
|
type PatternMatch struct {
|
|
Pattern *ControlPattern
|
|
KeywordHits int
|
|
TotalKeywords int
|
|
}
|
|
|
|
// Score returns the match score as a ratio of hits to total keywords.
|
|
func (m PatternMatch) Score() float64 {
|
|
if m.TotalKeywords == 0 {
|
|
return 0
|
|
}
|
|
return float64(m.KeywordHits) / float64(m.TotalKeywords)
|
|
}
|
|
|
|
// ValidatePatternID checks if a pattern ID exists in the index.
|
|
func (idx *ControlPatternIndex) ValidatePatternID(id string) bool {
|
|
_, ok := idx.ByID[strings.ToUpper(id)]
|
|
return ok
|
|
}
|
|
|
|
// Domains returns the list of unique domains that have patterns.
|
|
func (idx *ControlPatternIndex) Domains() []string {
|
|
domains := make([]string, 0, len(idx.ByDomain))
|
|
for d := range idx.ByDomain {
|
|
domains = append(domains, d)
|
|
}
|
|
return domains
|
|
}
|